diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -203,6 +203,7 @@
     'PhutilCowsayTestCase' => 'utils/__tests__/PhutilCowsayTestCase.php',
     'PhutilCsprintfTestCase' => 'xsprintf/__tests__/PhutilCsprintfTestCase.php',
     'PhutilCzechLocale' => 'internationalization/locales/PhutilCzechLocale.php',
+    'PhutilDOMNode' => 'parser/html/PhutilDOMNode.php',
     'PhutilDaemon' => 'daemon/PhutilDaemon.php',
     'PhutilDaemonHandle' => 'daemon/PhutilDaemonHandle.php',
     'PhutilDaemonOverseer' => 'daemon/PhutilDaemonOverseer.php',
@@ -260,6 +261,8 @@
     'PhutilGitURI' => 'parser/PhutilGitURI.php',
     'PhutilGitURITestCase' => 'parser/__tests__/PhutilGitURITestCase.php',
     'PhutilGoogleAuthAdapter' => 'auth/PhutilGoogleAuthAdapter.php',
+    'PhutilHTMLParser' => 'parser/html/PhutilHTMLParser.php',
+    'PhutilHTMLParserTestCase' => 'parser/html/__tests__/PhutilHTMLParserTestCase.php',
     'PhutilHTTPEngineExtension' => 'future/http/PhutilHTTPEngineExtension.php',
     'PhutilHTTPResponse' => 'parser/http/PhutilHTTPResponse.php',
     'PhutilHTTPResponseParser' => 'parser/http/PhutilHTTPResponseParser.php',
@@ -859,6 +862,7 @@
     'PhutilCowsayTestCase' => 'PhutilTestCase',
     'PhutilCsprintfTestCase' => 'PhutilTestCase',
     'PhutilCzechLocale' => 'PhutilLocale',
+    'PhutilDOMNode' => 'Phobject',
     'PhutilDaemon' => 'Phobject',
     'PhutilDaemonHandle' => 'Phobject',
     'PhutilDaemonOverseer' => 'Phobject',
@@ -916,6 +920,8 @@
     'PhutilGitURI' => 'Phobject',
     'PhutilGitURITestCase' => 'PhutilTestCase',
     'PhutilGoogleAuthAdapter' => 'PhutilOAuthAuthAdapter',
+    'PhutilHTMLParser' => 'Phobject',
+    'PhutilHTMLParserTestCase' => 'PhutilTestCase',
     'PhutilHTTPEngineExtension' => 'Phobject',
     'PhutilHTTPResponse' => 'Phobject',
     'PhutilHTTPResponseParser' => 'Phobject',
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
--- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
@@ -11,7 +11,7 @@
 
       while (isset($lines[$cursor])) {
         $num_lines++;
-        if (preg_match('@</table>$@i', $lines[$cursor])) {
+        if (preg_match('@</table>\s*$@i', $lines[$cursor])) {
           break;
         }
         $cursor++;
@@ -22,86 +22,117 @@
   }
 
   public function markupText($text, $children) {
-    $matches = array();
+    $root = id(new PhutilHTMLParser())
+      ->parseDocument($text);
 
-    if (!preg_match('@^\s*<table>(.*)</table>$@si', $text, $matches)) {
-      return $this->fail(
-        $text,
-        pht('Bad table (expected %s)', '<table>...</table>'));
-    }
+    $nodes = $root->selectChildrenWithTags(array('table'));
 
-    $body = $matches[1];
+    $out = array();
+    $seen_table = false;
+    foreach ($nodes as $node) {
+      if ($node->isContentNode()) {
+        $content = $node->getContent();
 
-    $row_fragment = '(?:\s*<tr>(.*)</tr>\s*)';
-    $cell_fragment = '(?:\s*<(td|th)>(.*)</(?:td|th)>\s*)';
+        if (!strlen(trim($content))) {
+          // Ignore whitespace.
+          continue;
+        }
 
-    // Test that the body contains only valid rows.
-    if (!preg_match('@^'.$row_fragment.'+$@Usi', $body)) {
-      return $this->fail(
-        $body,
-        pht('Bad table syntax (expected rows %s)', '<tr>...</tr>'));
-    }
+        // If we find other content, fail the rule. This can happen if the
+        // input is two consecutive table tags on one line with some text
+        // in between them, which we currently forbid.
+        return $text;
+      } else {
+        // If we have multiple table tags, just return the raw text.
+        if ($seen_table) {
+          return $text;
+        }
+        $seen_table = true;
 
-    // Capture the rows.
-    $row_regex = '@'.$row_fragment.'@Usi';
-    if (!preg_match_all($row_regex, $body, $matches, PREG_SET_ORDER)) {
-      throw new Exception(
-        pht('Bug in Remarkup tables, parsing fails for input: %s', $text));
+        $out[] = $this->newTable($node);
+      }
     }
 
-    $out_rows = array();
+    return phutil_implode_html('', $out);
+  }
 
-    $rows = $matches;
-    foreach ($rows as $row) {
-      $content = $row[1];
+  private function newTable(PhutilDOMNode $table) {
+    $nodes = $table->selectChildrenWithTags(
+      array(
+        'colgroup',
+        'tr',
+      ));
 
-      // Test that the row contains only valid cells.
-      if (!preg_match('@^'.$cell_fragment.'+$@Usi', $content)) {
-        return $this->fail(
-          $content,
-          pht('Bad table syntax (expected cells %s)', '<td>...</td>'));
-      }
+    $colgroup = null;
+    $rows = array();
+
+    foreach ($nodes as $node) {
+      if ($node->isContentNode()) {
+        $content = $node->getContent();
+
+        // If this is whitespace, ignore it.
+        if (!strlen(trim($content))) {
+          continue;
+        }
 
-      // Capture the cells.
-      $cell_regex = '@'.$cell_fragment.'@Usi';
-      if (!preg_match_all($cell_regex, $content, $matches, PREG_SET_ORDER)) {
-        throw new Exception(
-          pht('Bug in Remarkup tables, parsing fails for input: %s', $text));
+        // If we have nonempty content between the rows, this isn't a valid
+        // table. We can't really do anything reasonable with this, so just
+        // fail out and render the raw text.
+        return $table->newRawString();
       }
 
-      $out_cells = array();
-      foreach ($matches as $cell) {
-        $cell_type = $cell[1];
-        $cell_content = $cell[2];
+      if ($node->getTagName() === 'colgroup') {
+        // This table has multiple "<colgroup />" tags. Just bail out.
+        if ($colgroup !== null) {
+          return $table->newRawString();
+        }
 
-        $out_cells[] = array(
-          'type'      => $cell_type,
-          'content'   => $this->applyRules($cell_content),
-        );
+        // This table has a "<colgroup />" after a "<tr />". We could parse
+        // this, but just reject it out of an abundance of caution.
+        if ($rows) {
+          return $table->newRawString();
+        }
+
+        $colgroup = $node;
+        continue;
       }
 
-      $out_rows[] = array(
-        'type'    => 'tr',
-        'content' => $out_cells,
-      );
+      $rows[] = $node;
     }
 
-    return $this->renderRemarkupTable($out_rows);
-  }
+    $row_specs = array();
 
-  private function fail($near, $message) {
-    $message = sprintf(
-      '%s near: %s',
-      $message,
-      id(new PhutilUTF8StringTruncator())
-      ->setMaximumGlyphs(32000)
-      ->truncateString($near));
+    foreach ($rows as $row) {
+      $cells = $row->selectChildrenWithTags(array('td', 'th'));
 
-    if ($this->getEngine()->isTextMode()) {
-      return '('.$message.')';
+      $cell_specs = array();
+      foreach ($cells as $cell) {
+        if ($cell->isContentNode()) {
+          $content = $node->getContent();
+
+          if (!strlen(trim($content))) {
+            continue;
+          }
+
+          return $table->newRawString();
+        }
+
+        $content = $cell->getRawContentString();
+        $content = $this->applyRules($content);
+
+        $cell_specs[] = array(
+          'type' => $cell->getTagName(),
+          'content' => $content,
+        );
+      }
+
+      $row_specs[] = array(
+        'type' => 'tr',
+        'content' => $cell_specs,
+      );
     }
 
-    return hsprintf('<div style="color: red;">%s</div>', $message);
+    return $this->renderRemarkupTable($row_specs);
   }
 
 }
diff --git a/src/parser/html/PhutilDOMNode.php b/src/parser/html/PhutilDOMNode.php
new file mode 100644
--- /dev/null
+++ b/src/parser/html/PhutilDOMNode.php
@@ -0,0 +1,193 @@
+<?php
+
+final class PhutilDOMNode extends Phobject {
+
+  private $content;
+  private $tagName;
+  private $children = array();
+  private $attributes = array();
+  private $parentNode;
+  private $rawString;
+
+  public function setContent($content) {
+    $this->content = $content;
+    return $this;
+  }
+
+  public function getContent() {
+    return $this->content;
+  }
+
+  public function isContentNode() {
+    return ($this->content !== null);
+  }
+
+  public function setTagName($tag_name) {
+    $this->tagName = $tag_name;
+    return $this;
+  }
+
+  public function getTagName() {
+    return $this->tagName;
+  }
+
+  public function appendChild(PhutilDOMNode $node) {
+    $node->parentNode = $this;
+    $this->children[] = $node;
+    return $this;
+  }
+
+  public function getChildren() {
+    return $this->children;
+  }
+
+  public function getParentNode() {
+    return $this->parentNode;
+  }
+
+  public function setAttributes(array $attributes) {
+    $this->attributes = $attributes;
+    return $this;
+  }
+
+  public function getAttributes() {
+    return $this->attributes;
+  }
+
+  public function setRawString($raw_string) {
+    $this->rawString = $raw_string;
+    return $this;
+  }
+
+  public function getRawString() {
+    return $this->rawString;
+  }
+
+  public function toDictionary() {
+    if ($this->isContentNode()) {
+      return array(
+        'content' => $this->content,
+      );
+    } else {
+      $children = array();
+
+      foreach ($this->getChildren() as $child) {
+        $children[] = $child->toDictionary();
+      }
+
+      return array(
+        'tag' => $this->getTagName(),
+        'attributes' => $this->getAttributes(),
+        'children' => $children,
+      );
+    }
+  }
+
+  /**
+   * Get a list of the children of a given DOM node, treating unexpected
+   * tags as if they were raw content.
+   */
+  public function selectChildrenWithTags(array $tag_list) {
+    $tag_map = array_fuse($tag_list);
+
+    $nodes = array();
+    foreach ($this->getChildren() as $child) {
+      // If this is already a content node, just keep it as-is.
+      if ($child->isContentNode()) {
+        $nodes[] = $child;
+        continue;
+      }
+
+      $tag_name = $child->getTagName();
+
+      // If this is a tag that we're allowing, keep it as-is.
+      if (isset($tag_map[$tag_name])) {
+        $nodes[] = $child;
+        continue;
+      }
+
+      // Otherwise, this is some other tag. Convert it into a content
+      // node.
+
+      $raw_content = $child->getRawString();
+
+      $nodes[] = id(new self())
+        ->setContent($raw_content)
+        ->setRawContent($raw_content);
+    }
+
+    return $this->mergeContentNodes($nodes);
+  }
+
+  public function getRawContentString() {
+    $content_node = $this->selectChildrenWithTags(array());
+
+    if (!$content_node) {
+      return '';
+    }
+
+    return head($content_node)->getRawString();
+  }
+
+  public function mergeContent() {
+    $this->children = $this->mergeContentNodes($this->children);
+
+    foreach ($this->getChildren() as $child) {
+      $child->parentNode = $this;
+      $child->mergeContent();
+    }
+
+    return $this;
+  }
+
+  /**
+   * Given a list of nodes, combine sequences of multiple adjacent content
+   * nodes into single nodes.
+   */
+  private function mergeContentNodes(array $nodes) {
+    $list = array();
+    $content_block = array();
+    foreach ($this->getChildren() as $child) {
+      if ($child->isContentNode()) {
+        $content_block[] = $child;
+        continue;
+      }
+
+      $list[] = $content_block;
+      $content_block = array();
+
+      $list[] = $child;
+    }
+
+    $list[] = $content_block;
+
+    $results = array();
+    foreach ($list as $item) {
+      if (!is_array($item)) {
+        $results[] = $item;
+        continue;
+      }
+
+      if (!$item) {
+        continue;
+      }
+
+      $parts = array();
+      foreach ($item as $content_node) {
+        $parts[] = $content_node->getRawString();
+      }
+      $parts = implode('', $parts);
+
+      if (!strlen($parts)) {
+        continue;
+      }
+
+      $results[] = id(new self())
+        ->setContent($parts)
+        ->setRawString($parts);
+    }
+
+    return $results;
+  }
+
+}
diff --git a/src/parser/html/PhutilHTMLParser.php b/src/parser/html/PhutilHTMLParser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/html/PhutilHTMLParser.php
@@ -0,0 +1,426 @@
+<?php
+
+final class PhutilHTMLParser extends Phobject {
+
+  private $cursor;
+
+  public function parseDocument($corpus) {
+    // Divide the block into sequences of "tag" and "non-tag" content. Tag
+    // content is anything between angle brackets ("<" and ">"). Non-tag
+    // content is anything else.
+
+    $segment_pos = 0;
+    $segments = array();
+    $in_tag = false;
+
+    for ($ii = 0; $ii < strlen($corpus); $ii++) {
+      $c = $corpus[$ii];
+
+      if ($in_tag && ($c === '>')) {
+        if ($segment_pos !== null) {
+          $segments[] = array(
+            'tag' => $in_tag,
+            'pos' => $segment_pos,
+            'end' => $ii + 1,
+          );
+        }
+
+        $segment_pos = $ii + 1;
+        $in_tag = false;
+        continue;
+      }
+
+      if (!$in_tag && ($c === '<')) {
+        $segments[] = array(
+          'tag' => $in_tag,
+          'pos' => $segment_pos,
+          'end' => $ii,
+        );
+
+        $segment_pos = $ii;
+        $in_tag = true;
+        continue;
+      }
+    }
+
+    // Add whatever content was left at the end of the string. If we were in
+    // a tag but did not find a closing ">", we treat this as normal content.
+    $segments[] = array(
+      'tag' => false,
+      'pos' => $segment_pos,
+      'end' => $ii,
+    );
+
+    // Slice the marked segments out of the raw corpus so we get a list of
+    // "tag" strings and a list of "non-tag" strings.
+
+    $parts = array();
+    $corpus_length = strlen($corpus);
+    foreach ($segments as $segment) {
+      $tag = $segment['tag'];
+      $pos = $segment['pos'];
+      $len = $segment['end'] - $pos;
+
+      // If this is a tag, we'll drop the "<" at the beginning and the ">"
+      // at the end here.
+      if ($tag) {
+        $slice_pos = $pos + 1;
+        $slice_len = $len - 2;
+      } else {
+        $slice_pos = $pos;
+        $slice_len = $len;
+      }
+
+      if (($slice_pos < $corpus_length) && ($slice_len > 0)) {
+        $content = substr($corpus, $slice_pos, $slice_len);
+      } else {
+        $content = '';
+      }
+
+      $parts[] = array(
+        'tag' => $tag,
+        'pos' => $pos,
+        'len' => $len,
+        'content' => $content,
+      );
+    }
+
+    $root = new PhutilDOMNode();
+    $this->setCursor($root);
+
+    foreach ($parts as $part) {
+      $tag = $this->newTagDOMNode($part);
+
+      if ($tag !== null) {
+        continue;
+      }
+
+      $content = $part['content'];
+
+      // If this part is a tag, restore the angle brackets.
+      if ($part['tag']) {
+        $content = '<'.$content.'>';
+      }
+
+      $node = id(new PhutilDOMNode())
+        ->setContent($content)
+        ->setRawString($content);
+
+      $this->getCursor()->appendChild($node);
+    }
+
+    $root->mergeContent();
+
+    return $root;
+  }
+
+  private function newTagDOMNode(array $part) {
+    if (!$part['tag']) {
+      return null;
+    }
+
+    $raw_content = $part['content'];
+    $content = $raw_content;
+
+    $content = trim($content);
+    $content_len = strlen($content);
+
+    // If the tag content begins with "/", like "</td>", strip the slash
+    // off and mark this as a closing tag.
+    $is_close = false;
+    if ($content_len > 0 && $content[0] === '/') {
+      $is_close = true;
+      $content = substr($content, 1);
+      $content = trim($content);
+      $content_len = strlen($content);
+    }
+
+    // If the tag content ends with "/", like "<td />", strip the slash off
+    // and mark this as self-closing.
+    $self_close = false;
+    if ($content_len > 0 && $content[$content_len - 1] === '/') {
+      $self_close = true;
+      $content = substr($content, 0, $content_len - 1);
+      $content = trim($content);
+      $content_len = strlen($content);
+    }
+
+    // If this tag is both a closing tag and a self-closing tag, it is
+    // not formatted correctly. Treat it as content.
+    if ($self_close && $is_close) {
+      return null;
+    }
+
+    // Now, split the rest of the tag into the tag name and tag attributes.
+    $pieces = preg_split('/\s+/', $content, 2);
+    $tag_name = $pieces[0];
+
+    if (count($pieces) > 1) {
+      $attributes = $pieces[1];
+    } else {
+      $attributes = '';
+    }
+
+    // If there's no tag name, this tag is not valid. Treat it as content.
+    if (!strlen($tag_name)) {
+      return null;
+    }
+
+    // If this is a closing tag with attributes, it's not valid. Treat it
+    // as content.
+    if ($is_close && strlen($attributes)) {
+      return null;
+    }
+
+    $tag_name = phutil_utf8_strtolower($tag_name);
+
+    // If we find a valid closing tag, try to find a matching tag on the stack.
+    // If we find a matching tag, close it.
+    // If we do not find a matching tag, treat the closing tag as content.
+    if ($is_close) {
+      $cursor = $this->getCursor();
+
+      while ($cursor) {
+        if ($cursor->getTagName() === $tag_name) {
+          $parent = $cursor->getParentNode();
+          $this->setCursor($parent);
+          return true;
+        }
+        $cursor = $cursor->getParentNode();
+      }
+
+      return null;
+    }
+
+    if (strlen($attributes)) {
+      $attribute_map = $this->parseAttributes($attributes);
+      // If the attributes can't be parsed, treat the tag as content.
+      if ($attribute_map === null) {
+        return null;
+      }
+    } else {
+      $attribute_map = array();
+    }
+
+    $node = id(new PhutilDOMNode())
+      ->setTagName($tag_name)
+      ->setAttributes($attribute_map)
+      ->setRawString('<'.$raw_content.'>');
+
+    $cursor = $this->getCursor();
+    $cursor->appendChild($node);
+
+    if (!$self_close) {
+      $this->setCursor($node);
+    }
+
+    return $node;
+  }
+
+  private function setCursor(PhutilDOMNode $cursor) {
+    $this->cursor = $cursor;
+    return $this;
+  }
+
+  private function getCursor() {
+    return $this->cursor;
+  }
+
+  private function parseAttributes($attributes) {
+    $state = 'key';
+
+    $whitespace = array(
+      ' ' => true,
+      "\n" => true,
+      "\t" => true,
+      "\r" => true,
+    );
+
+    $map = array();
+    $len = strlen($attributes);
+    $key_pos = null;
+    for ($ii = 0; $ii < $len; $ii++) {
+      $c = $attributes[$ii];
+      $is_space = isset($whitespace[$c]);
+
+      switch ($state) {
+        case 'key':
+          // We're looking for the start of an attribute name.
+
+          // Skip over any whitespace.
+          if ($is_space) {
+            break;
+          }
+
+          // If we see "<tag =...", that isn't valid. Treat this tag as
+          // content.
+          if ($c === '=') {
+            return null;
+          }
+
+          // If we see a quotation mark with no attribute name, that isn't
+          // valid. Treat this tag as content.
+          if ($c === '"') {
+            return null;
+          }
+
+          // Any other character marks the beginning of an attribute name.
+          // Switch the parser state to "name" to parse the name.
+          $name_pos = $ii;
+          $state = 'name';
+          break;
+        case 'name':
+          // We're looking for the end of an attribute name.
+
+          // Finding a "=" or a space character ends the attribute name.
+          // Save it, then figure out what to do with the parser state.
+          if ($c === '=' || $is_space) {
+            $name_value = substr($attributes, $name_pos, $ii - $name_pos);
+            $name_value = phutil_utf8_strtolower($name_value);
+
+            // If this attribute already exists, the tag is invalid. This means
+            // the input is something like "<tag a=1 a=2>".
+            if (isset($map[$name_value])) {
+              return null;
+            }
+          }
+
+          // If we find an "=", that's the end of the name. Next, we're going
+          // to parse a value.
+          if ($c === '=') {
+            $state = 'value';
+            break;
+          }
+
+          // If we find whitespace, that's the end of the name. We're going
+          // to look for an "=".
+          if ($is_space) {
+            $state = 'equals';
+            break;
+          }
+
+          break;
+        case 'equals':
+          // We've parsed the name of an attribute and are looking for an
+          // "=" character.
+
+          // Skip over any whitespace.
+          if ($is_space) {
+            break;
+          }
+
+          // This is the "=" we're looking for, so we're good to go.
+          if ($c === '=') {
+            $state = 'value';
+            break;
+          }
+
+          // If this is anything else, this is an attribute name with no
+          // value. Treat it as "true" and move on. This corresponds to an
+          // input like "<input disabled>".
+          $map[$name_value] = true;
+          $name_pos = $ii;
+          $state = 'name';
+          break;
+        case 'value':
+          // We've parsed an "=" and are looking for the start of a value.
+
+          // Skip over any whitespace.
+          if ($is_space) {
+            break;
+          }
+
+          // Don't accept "<tag a==" to mean that key "a" has a value of
+          // "=", since this is silly. To specify a value beginning with "=",
+          // you have to quote it.
+          if ($c === '=') {
+            return null;
+          }
+
+          // Anything else is a value.
+          $value_pos = $ii;
+
+          // This is a quotation mark, so parse a quoted value.
+          if ($c === '"') {
+            $value_pos = $value_pos + 1;
+            $state = 'quoted';
+          } else {
+            $state = 'unquoted';
+          }
+          break;
+        case 'quoted':
+          // We've started parsing a quoted value, so look for the closing
+          // quote.
+
+          // We found the closing quote, so pull out the actual value.
+          if ($c === '"') {
+            $attr_value = substr($attributes, $value_pos, $ii - $value_pos);
+
+            $map[$name_value] = $attr_value;
+            $state = 'key';
+            break;
+          }
+
+          // Anything else is more text in the quoted value.
+          break;
+        case 'unquoted':
+          // We've started parsing an unquoted value, so look for terminating
+          // whitespace.
+
+          // We've found some whitespace, so pull out the actual value.
+          if ($is_space) {
+            $attr_value = substr($attributes, $value_pos, $ii - $value_pos);
+
+            $map[$name_value] = $attr_value;
+            $state = 'key';
+            break;
+          }
+
+          // Anything else is more text in the unquoted value.
+          break;
+      }
+    }
+
+    switch ($state) {
+      case 'key':
+        // We were looking for the start of an attribute name, so there's
+        // nothing to clean up.
+        break;
+      case 'name':
+        // We were looking for the end of an attribute name. Treat whatever
+        // we found as a name.
+        $name_value = substr($attributes, $name_pos, $len - $name_pos);
+
+        if (isset($map[$name_value])) {
+          return null;
+        }
+
+        $map[$name_value] = true;
+        break;
+      case 'equals':
+      case 'value':
+        // We found an attribute name followed by whitespace or an "=". Treat
+        // whatever we found as a valid attribute name with no value.
+
+        if (isset($map[$name_value])) {
+          return null;
+        }
+
+        $map[$name_value] = true;
+        break;
+      case 'quoted':
+      case 'unquoted':
+        // We were parsing a value but ran out of characters before we found
+        // the delimiter or closing quote. Treat whatever we found as a quoted
+        // value.
+
+        $attr_value = substr($attributes, $value_pos, $len - $name_pos);
+
+        $map[$name_value] = $attr_value;
+        break;
+    }
+
+    return $map;
+  }
+
+}
diff --git a/src/parser/html/__tests__/PhutilHTMLParserTestCase.php b/src/parser/html/__tests__/PhutilHTMLParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/PhutilHTMLParserTestCase.php
@@ -0,0 +1,45 @@
+<?php
+
+final class PhutilHTMLParserTestCase
+  extends PhutilTestCase {
+
+  public function testHTMLParser() {
+
+    $root = dirname(__FILE__).'/data/';
+    $tests = Filesystem::listDirectory($root, $include_hidden = false);
+
+    foreach ($tests as $test) {
+      $path = $root.$test;
+      $data = Filesystem::readFile($path);
+
+      $parts = explode("\n~~~~~~~~~~\n", $data);
+      if (count($parts) !== 2) {
+        throw new Exception(
+          pht(
+            'Expected "~~~~~~~~~~" delimiter in test "%s".',
+            $test));
+      }
+
+      $input = $parts[0];
+
+      $expect = $parts[1];
+      $expect = phutil_json_decode($parts[1]);
+
+      $document = id(new PhutilHTMLParser())
+        ->parseDocument($input);
+
+      // We're just testing the child list of the root node since this
+      // reduces the amount of boilerplate in the test cases.
+      $list = array();
+      foreach ($document->getChildren() as $child) {
+        $list[] = $child->toDictionary();
+      }
+
+      $this->assertEqual(
+        $expect,
+        $list,
+        pht('DOM tree for "%s".', $test));
+    }
+  }
+
+}
diff --git a/src/parser/html/__tests__/data/attributes-basic.txt b/src/parser/html/__tests__/data/attributes-basic.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/attributes-basic.txt
@@ -0,0 +1,13 @@
+<a b=1 c d="e" />
+~~~~~~~~~~
+[
+  {
+    "tag": "a",
+    "attributes": {
+      "b": "1",
+      "c": true,
+      "d": "e"
+    },
+    "children": []
+  }
+]
diff --git a/src/parser/html/__tests__/data/content-angle.txt b/src/parser/html/__tests__/data/content-angle.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/content-angle.txt
@@ -0,0 +1,7 @@
+o< quack
+~~~~~~~~~~
+[
+  {
+    "content": "o< quack"
+  }
+]
diff --git a/src/parser/html/__tests__/data/content-simple.txt b/src/parser/html/__tests__/data/content-simple.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/content-simple.txt
@@ -0,0 +1,7 @@
+quack
+~~~~~~~~~~
+[
+  {
+    "content": "quack"
+  }
+]
diff --git a/src/parser/html/__tests__/data/tag-mismatch.txt b/src/parser/html/__tests__/data/tag-mismatch.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/tag-mismatch.txt
@@ -0,0 +1,21 @@
+<a><b><c></b></a>
+~~~~~~~~~~
+[
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": [
+      {
+        "tag": "b",
+        "attributes": {},
+        "children": [
+          {
+            "tag": "c",
+            "attributes": {},
+            "children": []
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/src/parser/html/__tests__/data/tag-simple.txt b/src/parser/html/__tests__/data/tag-simple.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/tag-simple.txt
@@ -0,0 +1,135 @@
+<a/>
+<a/ >
+< a/>
+<a />
+<a / >
+< a />
+< a / >
+<a></a>
+<a ></a >
+< a>< /a>
+< a >< /a >
+<a></ a>
+<a ></ a >
+< a>< / a>
+< a >< / a >
+~~~~~~~~~~
+[
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  },
+  {
+    "content": "\n"
+  },
+  {
+    "tag": "a",
+    "attributes": {},
+    "children": []
+  }
+]
diff --git a/src/parser/html/__tests__/data/tag-table.txt b/src/parser/html/__tests__/data/tag-table.txt
new file mode 100644
--- /dev/null
+++ b/src/parser/html/__tests__/data/tag-table.txt
@@ -0,0 +1,39 @@
+<table><tr><td>a</td><td>b</td><td /></tr></table>
+~~~~~~~~~~
+[
+  {
+    "tag": "table",
+    "attributes": {},
+    "children": [
+      {
+        "tag": "tr",
+        "attributes": {},
+        "children": [
+          {
+            "tag": "td",
+            "attributes": {},
+            "children": [
+              {
+                "content": "a"
+              }
+            ]
+          },
+          {
+            "tag": "td",
+            "attributes": {},
+            "children": [
+              {
+                "content": "b"
+              }
+            ]
+          },
+          {
+            "tag": "td",
+            "attributes": {},
+            "children": []
+          }
+        ]
+      }
+    ]
+  }
+]