diff --git a/src/parser/html/PhutilDOMNode.php b/src/parser/html/PhutilDOMNode.php --- a/src/parser/html/PhutilDOMNode.php +++ b/src/parser/html/PhutilDOMNode.php @@ -7,7 +7,8 @@ private $children = array(); private $attributes = array(); private $parentNode; - private $rawString; + private $rawHead; + private $rawTail; public function setContent($content) { $this->content = $content; @@ -54,13 +55,27 @@ return $this->attributes; } - public function setRawString($raw_string) { - $this->rawString = $raw_string; + public function setRawHead($raw_string) { + $this->rawHead = $raw_string; + return $this; + } + + public function setRawTail($raw_tail) { + $this->rawTail = $raw_tail; return $this; } public function getRawString() { - return $this->rawString; + $raw = array(); + $raw[] = $this->rawHead; + + foreach ($this->getChildren() as $child) { + $raw[] = $child->getRawString(); + } + + $raw[] = $this->rawTail; + + return implode('', $raw); } public function toDictionary() { @@ -109,11 +124,11 @@ // Otherwise, this is some other tag. Convert it into a content // node. - $raw_content = $child->getRawString(); + $raw_string = $child->getRawString(); $nodes[] = id(new self()) - ->setContent($raw_content) - ->setRawContent($raw_content); + ->setContent($raw_string) + ->setRawHead($raw_string); } return $this->mergeContentNodes($nodes); @@ -147,16 +162,16 @@ private function mergeContentNodes(array $nodes) { $list = array(); $content_block = array(); - foreach ($this->getChildren() as $child) { - if ($child->isContentNode()) { - $content_block[] = $child; + foreach ($nodes as $node) { + if ($node->isContentNode()) { + $content_block[] = $node; continue; } $list[] = $content_block; $content_block = array(); - $list[] = $child; + $list[] = $node; } $list[] = $content_block; @@ -184,7 +199,7 @@ $results[] = id(new self()) ->setContent($parts) - ->setRawString($parts); + ->setRawHead($parts); } return $results; diff --git a/src/parser/html/PhutilHTMLParser.php b/src/parser/html/PhutilHTMLParser.php --- a/src/parser/html/PhutilHTMLParser.php +++ b/src/parser/html/PhutilHTMLParser.php @@ -104,7 +104,7 @@ $node = id(new PhutilDOMNode()) ->setContent($content) - ->setRawString($content); + ->setRawHead($content); $this->getCursor()->appendChild($node); } @@ -182,8 +182,12 @@ while ($cursor) { if ($cursor->getTagName() === $tag_name) { + // Add this raw content to the raw content of the tag we're closing. + $cursor->setRawTail('<'.$raw_content.'>'); + $parent = $cursor->getParentNode(); $this->setCursor($parent); + return true; } $cursor = $cursor->getParentNode(); @@ -205,7 +209,7 @@ $node = id(new PhutilDOMNode()) ->setTagName($tag_name) ->setAttributes($attribute_map) - ->setRawString('<'.$raw_content.'>'); + ->setRawHead('<'.$raw_content.'>'); $cursor = $this->getCursor(); $cursor->appendChild($node); diff --git a/src/parser/html/__tests__/PhutilHTMLParserTestCase.php b/src/parser/html/__tests__/PhutilHTMLParserTestCase.php --- a/src/parser/html/__tests__/PhutilHTMLParserTestCase.php +++ b/src/parser/html/__tests__/PhutilHTMLParserTestCase.php @@ -42,4 +42,63 @@ } } + public function testSelectChildrenWithTags() { + $input = 'x'; + $document = id(new PhutilHTMLParser()) + ->parseDocument($input); + + $children = $document->selectChildrenWithTags(array('a')); + + $list = array(); + foreach ($children as $child) { + $list[] = $child->toDictionary(); + } + + $this->assertEqual( + array( + array( + 'tag' => 'a', + 'attributes' => array(), + 'children' => array(), + ), + array( + 'content' => '', + ), + array( + 'tag' => 'a', + 'attributes' => array(), + 'children' => array(), + ), + array( + 'content' => '', + ), + array( + 'tag' => 'a', + 'attributes' => array(), + 'children' => array(), + ), + array( + 'content' => '', + ), + array( + 'tag' => 'a', + 'attributes' => array(), + 'children' => array(), + ), + array( + 'content' => 'x', + ), + array( + 'tag' => 'a', + 'attributes' => array(), + 'children' => array(), + ), + array( + 'content' => '', + ), + ), + $list, + pht('Child selection of: %s.', $input)); + } + }