diff --git a/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt b/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt
new file mode 100644
index 0000000..eab88de
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt
@@ -0,0 +1,5 @@
+
+~~~~~~~~~~
+<table>quack</table>
+~~~~~~~~~~
+
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
index ea99a60..57f2fe1 100644
--- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
@@ -1,138 +1,142 @@
/i', $lines[$cursor])) {
$num_lines++;
$cursor++;
while (isset($lines[$cursor])) {
$num_lines++;
if (preg_match('@\s*$@i', $lines[$cursor])) {
break;
}
$cursor++;
}
}
return $num_lines;
}
public function markupText($text, $children) {
$root = id(new PhutilHTMLParser())
->parseDocument($text);
$nodes = $root->selectChildrenWithTags(array('table'));
$out = array();
$seen_table = false;
foreach ($nodes as $node) {
if ($node->isContentNode()) {
$content = $node->getContent();
if (!strlen(trim($content))) {
// Ignore whitespace.
continue;
}
// If we find other content, fail the rule. This can happen if the
// input is two consecutive table tags on one line with some text
// in between them, which we currently forbid.
return $text;
} else {
// If we have multiple table tags, just return the raw text.
if ($seen_table) {
return $text;
}
$seen_table = true;
$out[] = $this->newTable($node);
}
}
- return phutil_implode_html('', $out);
+ if ($this->getEngine()->isTextMode()) {
+ return implode('', $out);
+ } else {
+ return phutil_implode_html('', $out);
+ }
}
private function newTable(PhutilDOMNode $table) {
$nodes = $table->selectChildrenWithTags(
array(
'colgroup',
'tr',
));
$colgroup = null;
$rows = array();
foreach ($nodes as $node) {
if ($node->isContentNode()) {
$content = $node->getContent();
// If this is whitespace, ignore it.
if (!strlen(trim($content))) {
continue;
}
// If we have nonempty content between the rows, this isn't a valid
// table. We can't really do anything reasonable with this, so just
// fail out and render the raw text.
return $table->newRawString();
}
if ($node->getTagName() === 'colgroup') {
// This table has multiple "" tags. Just bail out.
if ($colgroup !== null) {
return $table->newRawString();
}
// This table has a "" after a "
". We could parse
// this, but just reject it out of an abundance of caution.
if ($rows) {
return $table->newRawString();
}
$colgroup = $node;
continue;
}
$rows[] = $node;
}
$row_specs = array();
foreach ($rows as $row) {
$cells = $row->selectChildrenWithTags(array('td', 'th'));
$cell_specs = array();
foreach ($cells as $cell) {
if ($cell->isContentNode()) {
$content = $node->getContent();
if (!strlen(trim($content))) {
continue;
}
return $table->newRawString();
}
- $content = $cell->getRawContentString();
+ $content = $cell->newRawContentString();
$content = $this->applyRules($content);
$cell_specs[] = array(
'type' => $cell->getTagName(),
'content' => $content,
);
}
$row_specs[] = array(
'type' => 'tr',
'content' => $cell_specs,
);
}
return $this->renderRemarkupTable($row_specs);
}
}
diff --git a/src/parser/html/PhutilDOMNode.php b/src/parser/html/PhutilDOMNode.php
index 84781c7..469e0b1 100644
--- a/src/parser/html/PhutilDOMNode.php
+++ b/src/parser/html/PhutilDOMNode.php
@@ -1,208 +1,208 @@
content = $content;
return $this;
}
public function getContent() {
return $this->content;
}
public function isContentNode() {
return ($this->content !== null);
}
public function setTagName($tag_name) {
$this->tagName = $tag_name;
return $this;
}
public function getTagName() {
return $this->tagName;
}
public function appendChild(PhutilDOMNode $node) {
$node->parentNode = $this;
$this->children[] = $node;
return $this;
}
public function getChildren() {
return $this->children;
}
public function getParentNode() {
return $this->parentNode;
}
public function setAttributes(array $attributes) {
$this->attributes = $attributes;
return $this;
}
public function getAttributes() {
return $this->attributes;
}
public function setRawHead($raw_string) {
$this->rawHead = $raw_string;
return $this;
}
public function setRawTail($raw_tail) {
$this->rawTail = $raw_tail;
return $this;
}
- public function getRawString() {
+ public function newRawString() {
$raw = array();
$raw[] = $this->rawHead;
foreach ($this->getChildren() as $child) {
- $raw[] = $child->getRawString();
+ $raw[] = $child->newRawString();
}
$raw[] = $this->rawTail;
return implode('', $raw);
}
public function toDictionary() {
if ($this->isContentNode()) {
return array(
'content' => $this->content,
);
} else {
$children = array();
foreach ($this->getChildren() as $child) {
$children[] = $child->toDictionary();
}
return array(
'tag' => $this->getTagName(),
'attributes' => $this->getAttributes(),
'children' => $children,
);
}
}
/**
* Get a list of the children of a given DOM node, treating unexpected
* tags as if they were raw content.
*/
public function selectChildrenWithTags(array $tag_list) {
$tag_map = array_fuse($tag_list);
$nodes = array();
foreach ($this->getChildren() as $child) {
// If this is already a content node, just keep it as-is.
if ($child->isContentNode()) {
$nodes[] = $child;
continue;
}
$tag_name = $child->getTagName();
// If this is a tag that we're allowing, keep it as-is.
if (isset($tag_map[$tag_name])) {
$nodes[] = $child;
continue;
}
// Otherwise, this is some other tag. Convert it into a content
// node.
- $raw_string = $child->getRawString();
+ $raw_string = $child->newRawString();
$nodes[] = id(new self())
->setContent($raw_string)
->setRawHead($raw_string);
}
return $this->mergeContentNodes($nodes);
}
- public function getRawContentString() {
+ public function newRawContentString() {
$content_node = $this->selectChildrenWithTags(array());
if (!$content_node) {
return '';
}
- return head($content_node)->getRawString();
+ return head($content_node)->newRawString();
}
public function mergeContent() {
$this->children = $this->mergeContentNodes($this->children);
foreach ($this->getChildren() as $child) {
$child->parentNode = $this;
$child->mergeContent();
}
return $this;
}
/**
* Given a list of nodes, combine sequences of multiple adjacent content
* nodes into single nodes.
*/
private function mergeContentNodes(array $nodes) {
$list = array();
$content_block = array();
foreach ($nodes as $node) {
if ($node->isContentNode()) {
$content_block[] = $node;
continue;
}
$list[] = $content_block;
$content_block = array();
$list[] = $node;
}
$list[] = $content_block;
$results = array();
foreach ($list as $item) {
if (!is_array($item)) {
$results[] = $item;
continue;
}
if (!$item) {
continue;
}
$parts = array();
foreach ($item as $content_node) {
- $parts[] = $content_node->getRawString();
+ $parts[] = $content_node->newRawString();
}
$parts = implode('', $parts);
if (!strlen($parts)) {
continue;
}
$results[] = id(new self())
->setContent($parts)
->setRawHead($parts);
}
return $results;
}
}