Changeset View
Changeset View
Standalone View
Standalone View
src/infrastructure/markup/blockrule/PhutilRemarkupListBlockRule.php
- This file was added.
<?php | |||||
final class PhutilRemarkupListBlockRule extends PhutilRemarkupBlockRule { | |||||
/** | |||||
* This rule must apply before the Code block rule because it needs to | |||||
* win blocks which begin ` - Lorem ipsum`. | |||||
*/ | |||||
public function getPriority() { | |||||
return 400; | |||||
} | |||||
public function getMatchingLineCount(array $lines, $cursor) { | |||||
$num_lines = 0; | |||||
$first_line = $cursor; | |||||
$is_one_line = false; | |||||
while (isset($lines[$cursor])) { | |||||
if (!$num_lines) { | |||||
if (preg_match(self::START_BLOCK_PATTERN, $lines[$cursor])) { | |||||
$num_lines++; | |||||
$cursor++; | |||||
$is_one_line = true; | |||||
continue; | |||||
} | |||||
} else { | |||||
if (preg_match(self::CONT_BLOCK_PATTERN, $lines[$cursor])) { | |||||
$num_lines++; | |||||
$cursor++; | |||||
$is_one_line = false; | |||||
continue; | |||||
} | |||||
// Allow lists to continue across multiple paragraphs, as long as lines | |||||
// are indented or a single empty line separates indented lines. | |||||
$this_empty = !strlen(trim($lines[$cursor])); | |||||
$this_indented = preg_match('/^ /', $lines[$cursor]); | |||||
$next_empty = true; | |||||
$next_indented = false; | |||||
if (isset($lines[$cursor + 1])) { | |||||
$next_empty = !strlen(trim($lines[$cursor + 1])); | |||||
$next_indented = preg_match('/^ /', $lines[$cursor + 1]); | |||||
} | |||||
if ($this_empty || $this_indented) { | |||||
if (($this_indented && !$this_empty) || | |||||
($next_indented && !$next_empty)) { | |||||
$num_lines++; | |||||
$cursor++; | |||||
continue; | |||||
} | |||||
} | |||||
if ($this_empty) { | |||||
$num_lines++; | |||||
} | |||||
} | |||||
break; | |||||
} | |||||
// If this list only has one item in it, and the list marker is "#", and | |||||
// it's not the last line in the input, parse it as a header instead of a | |||||
// list. This produces better behavior for alternate Markdown headers. | |||||
if ($is_one_line) { | |||||
if (($first_line + $num_lines) < count($lines)) { | |||||
if (strncmp($lines[$first_line], '#', 1) === 0) { | |||||
return 0; | |||||
} | |||||
} | |||||
} | |||||
return $num_lines; | |||||
} | |||||
/** | |||||
* The maximum sub-list depth you can nest to. Avoids silliness and blowing | |||||
* the stack. | |||||
*/ | |||||
const MAXIMUM_LIST_NESTING_DEPTH = 12; | |||||
const START_BLOCK_PATTERN = '@^\s*(?:[-*#]+|([1-9][0-9]*)[.)]|\[\D?\])\s+@'; | |||||
const CONT_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)]|\[\D?\])\s+@'; | |||||
const STRIP_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)])\s*@'; | |||||
public function markupText($text, $children) { | |||||
$items = array(); | |||||
$lines = explode("\n", $text); | |||||
// We allow users to delimit lists using either differing indentation | |||||
// levels: | |||||
// | |||||
// - a | |||||
// - b | |||||
// | |||||
// ...or differing numbers of item-delimiter characters: | |||||
// | |||||
// - a | |||||
// -- b | |||||
// | |||||
// If they use the second style but block-indent the whole list, we'll | |||||
// get the depth counts wrong for the first item. To prevent this, | |||||
// un-indent every item by the minimum indentation level for the whole | |||||
// block before we begin parsing. | |||||
$regex = self::START_BLOCK_PATTERN; | |||||
$min_space = PHP_INT_MAX; | |||||
foreach ($lines as $ii => $line) { | |||||
$matches = null; | |||||
if (preg_match($regex, $line)) { | |||||
$regex = self::CONT_BLOCK_PATTERN; | |||||
if (preg_match('/^(\s+)/', $line, $matches)) { | |||||
$space = strlen($matches[1]); | |||||
} else { | |||||
$space = 0; | |||||
} | |||||
$min_space = min($min_space, $space); | |||||
} | |||||
} | |||||
$regex = self::START_BLOCK_PATTERN; | |||||
if ($min_space) { | |||||
foreach ($lines as $key => $line) { | |||||
if (preg_match($regex, $line)) { | |||||
$regex = self::CONT_BLOCK_PATTERN; | |||||
$lines[$key] = substr($line, $min_space); | |||||
} | |||||
} | |||||
} | |||||
// The input text may have linewraps in it, like this: | |||||
// | |||||
// - derp derp derp derp | |||||
// derp derp derp derp | |||||
// - blarp blarp blarp blarp | |||||
// | |||||
// Group text lines together into list items, stored in $items. So the | |||||
// result in the above case will be: | |||||
// | |||||
// array( | |||||
// array( | |||||
// "- derp derp derp derp", | |||||
// " derp derp derp derp", | |||||
// ), | |||||
// array( | |||||
// "- blarp blarp blarp blarp", | |||||
// ), | |||||
// ); | |||||
$item = array(); | |||||
$starts_at = null; | |||||
$regex = self::START_BLOCK_PATTERN; | |||||
foreach ($lines as $line) { | |||||
$match = null; | |||||
if (preg_match($regex, $line, $match)) { | |||||
if (!$starts_at && !empty($match[1])) { | |||||
$starts_at = $match[1]; | |||||
} | |||||
$regex = self::CONT_BLOCK_PATTERN; | |||||
if ($item) { | |||||
$items[] = $item; | |||||
$item = array(); | |||||
} | |||||
} | |||||
$item[] = $line; | |||||
} | |||||
if ($item) { | |||||
$items[] = $item; | |||||
} | |||||
if (!$starts_at) { | |||||
$starts_at = 1; | |||||
} | |||||
// Process each item to normalize the text, remove line wrapping, and | |||||
// determine its depth (indentation level) and style (ordered vs unordered). | |||||
// | |||||
// We preserve consecutive linebreaks and interpret them as paragraph | |||||
// breaks. | |||||
// | |||||
// Given the above example, the processed array will look like: | |||||
// | |||||
// array( | |||||
// array( | |||||
// 'text' => 'derp derp derp derp derp derp derp derp', | |||||
// 'depth' => 0, | |||||
// 'style' => '-', | |||||
// ), | |||||
// array( | |||||
// 'text' => 'blarp blarp blarp blarp', | |||||
// 'depth' => 0, | |||||
// 'style' => '-', | |||||
// ), | |||||
// ); | |||||
$has_marks = false; | |||||
foreach ($items as $key => $item) { | |||||
// Trim space around newlines, to strip trailing whitespace and formatting | |||||
// indentation. | |||||
$item = preg_replace('/ *(\n+) */', '\1', implode("\n", $item)); | |||||
// Replace single newlines with a space. Preserve multiple newlines as | |||||
// paragraph breaks. | |||||
$item = preg_replace('/(?<!\n)\n(?!\n)/', ' ', $item); | |||||
$item = rtrim($item); | |||||
if (!strlen($item)) { | |||||
unset($items[$key]); | |||||
continue; | |||||
} | |||||
$matches = null; | |||||
if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) { | |||||
// Alternate-style indents; use number of list item symbols. | |||||
$depth = strlen($matches[1]) - 1; | |||||
} else if (preg_match('/^(\s+)/', $item, $matches)) { | |||||
// Markdown-style indents; use indent depth. | |||||
$depth = strlen($matches[1]); | |||||
} else { | |||||
$depth = 0; | |||||
} | |||||
if (preg_match('/^\s*(?:#|[0-9])/', $item)) { | |||||
$style = '#'; | |||||
} else { | |||||
$style = '-'; | |||||
} | |||||
// Strip leading indicators off the item. | |||||
$text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item); | |||||
// Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a | |||||
// checkbox. We don't render [1], [2], etc., as checkboxes, as these | |||||
// are often used as footnotes. | |||||
$mark = null; | |||||
$matches = null; | |||||
if (preg_match('/^\s*\[(\D?)\]\s*/', $text, $matches)) { | |||||
if (strlen(trim($matches[1]))) { | |||||
$mark = true; | |||||
} else { | |||||
$mark = false; | |||||
} | |||||
$has_marks = true; | |||||
$text = substr($text, strlen($matches[0])); | |||||
} | |||||
$items[$key] = array( | |||||
'text' => $text, | |||||
'depth' => $depth, | |||||
'style' => $style, | |||||
'mark' => $mark, | |||||
); | |||||
} | |||||
$items = array_values($items); | |||||
// Users can create a sub-list by indenting any deeper amount than the | |||||
// previous list, so these are both valid: | |||||
// | |||||
// - a | |||||
// - b | |||||
// | |||||
// - a | |||||
// - b | |||||
// | |||||
// In the former case, we'll have depths (0, 2). In the latter case, depths | |||||
// (0, 4). We don't actually care about how many spaces there are, only | |||||
// how many list indentation levels (that is, we want to map both of | |||||
// those cases to (0, 1), indicating "outermost list" and "first sublist"). | |||||
// | |||||
// This is made more complicated because lists at two different indentation | |||||
// levels might be at the same list level: | |||||
// | |||||
// - a | |||||
// - b | |||||
// - c | |||||
// - d | |||||
// | |||||
// Here, 'b' and 'd' are at the same list level (2) but different indent | |||||
// levels (2, 4). | |||||
// | |||||
// Users can also create "staircases" like this: | |||||
// | |||||
// - a | |||||
// - b | |||||
// # c | |||||
// | |||||
// While this is silly, we'd like to render it as faithfully as possible. | |||||
// | |||||
// In order to do this, we convert the list of nodes into a tree, | |||||
// normalizing indentation levels and inserting dummy nodes as necessary to | |||||
// make the tree well-formed. See additional notes at buildTree(). | |||||
// | |||||
// In the case above, the result is a tree like this: | |||||
// | |||||
// - <null> | |||||
// - <null> | |||||
// - a | |||||
// - b | |||||
// # c | |||||
$l = 0; | |||||
$r = count($items); | |||||
$tree = $this->buildTree($items, $l, $r, $cur_level = 0); | |||||
// We may need to open a list on a <null> node, but they do not have | |||||
// list style information yet. We need to propagate list style information | |||||
// backward through the tree. In the above example, the tree now looks | |||||
// like this: | |||||
// | |||||
// - <null (style=#)> | |||||
// - <null (style=-)> | |||||
// - a | |||||
// - b | |||||
// # c | |||||
$this->adjustTreeStyleInformation($tree); | |||||
// Finally, we have enough information to render the tree. | |||||
$out = $this->renderTree($tree, 0, $has_marks, $starts_at); | |||||
if ($this->getEngine()->isTextMode()) { | |||||
$out = implode('', $out); | |||||
$out = rtrim($out, "\n"); | |||||
$out = preg_replace('/ +$/m', '', $out); | |||||
return $out; | |||||
} | |||||
return phutil_implode_html('', $out); | |||||
} | |||||
/** | |||||
* See additional notes in @{method:markupText}. | |||||
*/ | |||||
private function buildTree(array $items, $l, $r, $cur_level) { | |||||
if ($l == $r) { | |||||
return array(); | |||||
} | |||||
if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) { | |||||
// This algorithm is recursive and we don't need you blowing the stack | |||||
// with your oh-so-clever 50,000-item-deep list. Cap indentation levels | |||||
// at a reasonable number and just shove everything deeper up to this | |||||
// level. | |||||
$nodes = array(); | |||||
for ($ii = $l; $ii < $r; $ii++) { | |||||
$nodes[] = array( | |||||
'level' => $cur_level, | |||||
'items' => array(), | |||||
) + $items[$ii]; | |||||
} | |||||
return $nodes; | |||||
} | |||||
$min = $l; | |||||
for ($ii = $r - 1; $ii >= $l; $ii--) { | |||||
if ($items[$ii]['depth'] <= $items[$min]['depth']) { | |||||
$min = $ii; | |||||
} | |||||
} | |||||
$min_depth = $items[$min]['depth']; | |||||
$nodes = array(); | |||||
if ($min != $l) { | |||||
$nodes[] = array( | |||||
'text' => null, | |||||
'level' => $cur_level, | |||||
'style' => null, | |||||
'mark' => null, | |||||
'items' => $this->buildTree($items, $l, $min, $cur_level + 1), | |||||
); | |||||
} | |||||
$last = $min; | |||||
for ($ii = $last + 1; $ii < $r; $ii++) { | |||||
if ($items[$ii]['depth'] == $min_depth) { | |||||
$nodes[] = array( | |||||
'level' => $cur_level, | |||||
'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1), | |||||
) + $items[$last]; | |||||
$last = $ii; | |||||
} | |||||
} | |||||
$nodes[] = array( | |||||
'level' => $cur_level, | |||||
'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1), | |||||
) + $items[$last]; | |||||
return $nodes; | |||||
} | |||||
/** | |||||
* See additional notes in @{method:markupText}. | |||||
*/ | |||||
private function adjustTreeStyleInformation(array &$tree) { | |||||
// The effect here is just to walk backward through the nodes at this level | |||||
// and apply the first style in the list to any empty nodes we inserted | |||||
// before it. As we go, also recurse down the tree. | |||||
$style = '-'; | |||||
for ($ii = count($tree) - 1; $ii >= 0; $ii--) { | |||||
if ($tree[$ii]['style'] !== null) { | |||||
// This is the earliest node we've seen with style, so set the | |||||
// style to its style. | |||||
$style = $tree[$ii]['style']; | |||||
} else { | |||||
// This node has no style, so apply the current style. | |||||
$tree[$ii]['style'] = $style; | |||||
} | |||||
if ($tree[$ii]['items']) { | |||||
$this->adjustTreeStyleInformation($tree[$ii]['items']); | |||||
} | |||||
} | |||||
} | |||||
/** | |||||
* See additional notes in @{method:markupText}. | |||||
*/ | |||||
private function renderTree( | |||||
array $tree, | |||||
$level, | |||||
$has_marks, | |||||
$starts_at = 1) { | |||||
$style = idx(head($tree), 'style'); | |||||
$out = array(); | |||||
if (!$this->getEngine()->isTextMode()) { | |||||
switch ($style) { | |||||
case '#': | |||||
$tag = 'ol'; | |||||
break; | |||||
case '-': | |||||
$tag = 'ul'; | |||||
break; | |||||
} | |||||
$start_attr = null; | |||||
if (ctype_digit($starts_at) && $starts_at > 1) { | |||||
$start_attr = hsprintf(' start="%d"', $starts_at); | |||||
} | |||||
if ($has_marks) { | |||||
$out[] = hsprintf( | |||||
'<%s class="remarkup-list remarkup-list-with-checkmarks"%s>', | |||||
$tag, | |||||
$start_attr); | |||||
} else { | |||||
$out[] = hsprintf( | |||||
'<%s class="remarkup-list"%s>', | |||||
$tag, | |||||
$start_attr); | |||||
} | |||||
$out[] = "\n"; | |||||
} | |||||
$number = $starts_at; | |||||
foreach ($tree as $item) { | |||||
if ($this->getEngine()->isTextMode()) { | |||||
if ($item['text'] === null) { | |||||
// Don't render anything. | |||||
} else { | |||||
$indent = str_repeat(' ', 2 * $level); | |||||
$out[] = $indent; | |||||
if ($item['mark'] !== null) { | |||||
if ($item['mark']) { | |||||
$out[] = '[X] '; | |||||
} else { | |||||
$out[] = '[ ] '; | |||||
} | |||||
} else { | |||||
switch ($style) { | |||||
case '#': | |||||
$out[] = $number.'. '; | |||||
$number++; | |||||
break; | |||||
case '-': | |||||
$out[] = '- '; | |||||
break; | |||||
} | |||||
} | |||||
$parts = preg_split('/\n{2,}/', $item['text']); | |||||
foreach ($parts as $key => $part) { | |||||
if ($key != 0) { | |||||
$out[] = "\n\n ".$indent; | |||||
} | |||||
$out[] = $this->applyRules($part); | |||||
} | |||||
$out[] = "\n"; | |||||
} | |||||
} else { | |||||
if ($item['text'] === null) { | |||||
$out[] = hsprintf('<li class="remarkup-list-item phantom-item">'); | |||||
} else { | |||||
if ($item['mark'] !== null) { | |||||
if ($item['mark'] == true) { | |||||
$out[] = hsprintf( | |||||
'<li class="remarkup-list-item remarkup-checked-item">'); | |||||
} else { | |||||
$out[] = hsprintf( | |||||
'<li class="remarkup-list-item remarkup-unchecked-item">'); | |||||
} | |||||
$out[] = phutil_tag( | |||||
'input', | |||||
array( | |||||
'type' => 'checkbox', | |||||
'checked' => ($item['mark'] ? 'checked' : null), | |||||
'disabled' => 'disabled', | |||||
)); | |||||
$out[] = ' '; | |||||
} else { | |||||
$out[] = hsprintf('<li class="remarkup-list-item">'); | |||||
} | |||||
$parts = preg_split('/\n{2,}/', $item['text']); | |||||
foreach ($parts as $key => $part) { | |||||
if ($key != 0) { | |||||
$out[] = array( | |||||
"\n", | |||||
phutil_tag('br'), | |||||
phutil_tag('br'), | |||||
"\n", | |||||
); | |||||
} | |||||
$out[] = $this->applyRules($part); | |||||
} | |||||
} | |||||
} | |||||
if ($item['items']) { | |||||
$subitems = $this->renderTree($item['items'], $level + 1, $has_marks); | |||||
foreach ($subitems as $i) { | |||||
$out[] = $i; | |||||
} | |||||
} | |||||
if (!$this->getEngine()->isTextMode()) { | |||||
$out[] = hsprintf("</li>\n"); | |||||
} | |||||
} | |||||
if (!$this->getEngine()->isTextMode()) { | |||||
switch ($style) { | |||||
case '#': | |||||
$out[] = hsprintf('</ol>'); | |||||
break; | |||||
case '-': | |||||
$out[] = hsprintf('</ul>'); | |||||
break; | |||||
} | |||||
} | |||||
return $out; | |||||
} | |||||
} |