diff --git a/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt b/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt new file mode 100644 index 0000000..eab88de --- /dev/null +++ b/src/markup/engine/__tests__/remarkup/table-with-direct-content.txt @@ -0,0 +1,5 @@ +quack
+~~~~~~~~~~ +<table>quack</table> +~~~~~~~~~~ +quack
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php index ea99a60..57f2fe1 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php @@ -1,138 +1,142 @@ /i', $lines[$cursor])) { $num_lines++; $cursor++; while (isset($lines[$cursor])) { $num_lines++; if (preg_match('@\s*$@i', $lines[$cursor])) { break; } $cursor++; } } return $num_lines; } public function markupText($text, $children) { $root = id(new PhutilHTMLParser()) ->parseDocument($text); $nodes = $root->selectChildrenWithTags(array('table')); $out = array(); $seen_table = false; foreach ($nodes as $node) { if ($node->isContentNode()) { $content = $node->getContent(); if (!strlen(trim($content))) { // Ignore whitespace. continue; } // If we find other content, fail the rule. This can happen if the // input is two consecutive table tags on one line with some text // in between them, which we currently forbid. return $text; } else { // If we have multiple table tags, just return the raw text. if ($seen_table) { return $text; } $seen_table = true; $out[] = $this->newTable($node); } } - return phutil_implode_html('', $out); + if ($this->getEngine()->isTextMode()) { + return implode('', $out); + } else { + return phutil_implode_html('', $out); + } } private function newTable(PhutilDOMNode $table) { $nodes = $table->selectChildrenWithTags( array( 'colgroup', 'tr', )); $colgroup = null; $rows = array(); foreach ($nodes as $node) { if ($node->isContentNode()) { $content = $node->getContent(); // If this is whitespace, ignore it. if (!strlen(trim($content))) { continue; } // If we have nonempty content between the rows, this isn't a valid // table. We can't really do anything reasonable with this, so just // fail out and render the raw text. return $table->newRawString(); } if ($node->getTagName() === 'colgroup') { // This table has multiple "" tags. Just bail out. if ($colgroup !== null) { return $table->newRawString(); } // This table has a "" after a "". We could parse // this, but just reject it out of an abundance of caution. if ($rows) { return $table->newRawString(); } $colgroup = $node; continue; } $rows[] = $node; } $row_specs = array(); foreach ($rows as $row) { $cells = $row->selectChildrenWithTags(array('td', 'th')); $cell_specs = array(); foreach ($cells as $cell) { if ($cell->isContentNode()) { $content = $node->getContent(); if (!strlen(trim($content))) { continue; } return $table->newRawString(); } - $content = $cell->getRawContentString(); + $content = $cell->newRawContentString(); $content = $this->applyRules($content); $cell_specs[] = array( 'type' => $cell->getTagName(), 'content' => $content, ); } $row_specs[] = array( 'type' => 'tr', 'content' => $cell_specs, ); } return $this->renderRemarkupTable($row_specs); } } diff --git a/src/parser/html/PhutilDOMNode.php b/src/parser/html/PhutilDOMNode.php index 84781c7..469e0b1 100644 --- a/src/parser/html/PhutilDOMNode.php +++ b/src/parser/html/PhutilDOMNode.php @@ -1,208 +1,208 @@ content = $content; return $this; } public function getContent() { return $this->content; } public function isContentNode() { return ($this->content !== null); } public function setTagName($tag_name) { $this->tagName = $tag_name; return $this; } public function getTagName() { return $this->tagName; } public function appendChild(PhutilDOMNode $node) { $node->parentNode = $this; $this->children[] = $node; return $this; } public function getChildren() { return $this->children; } public function getParentNode() { return $this->parentNode; } public function setAttributes(array $attributes) { $this->attributes = $attributes; return $this; } public function getAttributes() { return $this->attributes; } public function setRawHead($raw_string) { $this->rawHead = $raw_string; return $this; } public function setRawTail($raw_tail) { $this->rawTail = $raw_tail; return $this; } - public function getRawString() { + public function newRawString() { $raw = array(); $raw[] = $this->rawHead; foreach ($this->getChildren() as $child) { - $raw[] = $child->getRawString(); + $raw[] = $child->newRawString(); } $raw[] = $this->rawTail; return implode('', $raw); } public function toDictionary() { if ($this->isContentNode()) { return array( 'content' => $this->content, ); } else { $children = array(); foreach ($this->getChildren() as $child) { $children[] = $child->toDictionary(); } return array( 'tag' => $this->getTagName(), 'attributes' => $this->getAttributes(), 'children' => $children, ); } } /** * Get a list of the children of a given DOM node, treating unexpected * tags as if they were raw content. */ public function selectChildrenWithTags(array $tag_list) { $tag_map = array_fuse($tag_list); $nodes = array(); foreach ($this->getChildren() as $child) { // If this is already a content node, just keep it as-is. if ($child->isContentNode()) { $nodes[] = $child; continue; } $tag_name = $child->getTagName(); // If this is a tag that we're allowing, keep it as-is. if (isset($tag_map[$tag_name])) { $nodes[] = $child; continue; } // Otherwise, this is some other tag. Convert it into a content // node. - $raw_string = $child->getRawString(); + $raw_string = $child->newRawString(); $nodes[] = id(new self()) ->setContent($raw_string) ->setRawHead($raw_string); } return $this->mergeContentNodes($nodes); } - public function getRawContentString() { + public function newRawContentString() { $content_node = $this->selectChildrenWithTags(array()); if (!$content_node) { return ''; } - return head($content_node)->getRawString(); + return head($content_node)->newRawString(); } public function mergeContent() { $this->children = $this->mergeContentNodes($this->children); foreach ($this->getChildren() as $child) { $child->parentNode = $this; $child->mergeContent(); } return $this; } /** * Given a list of nodes, combine sequences of multiple adjacent content * nodes into single nodes. */ private function mergeContentNodes(array $nodes) { $list = array(); $content_block = array(); foreach ($nodes as $node) { if ($node->isContentNode()) { $content_block[] = $node; continue; } $list[] = $content_block; $content_block = array(); $list[] = $node; } $list[] = $content_block; $results = array(); foreach ($list as $item) { if (!is_array($item)) { $results[] = $item; continue; } if (!$item) { continue; } $parts = array(); foreach ($item as $content_node) { - $parts[] = $content_node->getRawString(); + $parts[] = $content_node->newRawString(); } $parts = implode('', $parts); if (!strlen($parts)) { continue; } $results[] = id(new self()) ->setContent($parts) ->setRawHead($parts); } return $results; } }