Changeset View
Changeset View
Standalone View
Standalone View
src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
Show First 20 Lines • Show All 147 Lines • ▼ Show 20 Lines | final class PhutilRemarkupEngine extends PhutilMarkupEngine { | ||||
private function splitTextIntoBlocks($text, $depth = 0) { | private function splitTextIntoBlocks($text, $depth = 0) { | ||||
// Apply basic block and paragraph normalization to the text. NOTE: We don't | // Apply basic block and paragraph normalization to the text. NOTE: We don't | ||||
// strip trailing whitespace because it is semantic in some contexts, | // strip trailing whitespace because it is semantic in some contexts, | ||||
// notably inlined diffs that the author intends to show as a code block. | // notably inlined diffs that the author intends to show as a code block. | ||||
$text = phutil_split_lines($text, true); | $text = phutil_split_lines($text, true); | ||||
$block_rules = $this->blockRules; | $block_rules = $this->blockRules; | ||||
$blocks = array(); | $blocks = array(); | ||||
$cursor = 0; | $cursor = 0; | ||||
$prev_block = array(); | |||||
$can_merge = array(); | |||||
foreach ($block_rules as $key => $block_rule) { | |||||
if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) { | |||||
$can_merge[$key] = true; | |||||
} | |||||
} | |||||
$last_block = null; | |||||
$last_block_key = -1; | |||||
// See T13487. For very large inputs, block separation can dominate | |||||
// runtime. This is written somewhat clumsily to attempt to handle | |||||
// very large inputs as gracefully as is practical. | |||||
while (isset($text[$cursor])) { | while (isset($text[$cursor])) { | ||||
$starting_cursor = $cursor; | $starting_cursor = $cursor; | ||||
foreach ($block_rules as $block_rule) { | foreach ($block_rules as $block_key => $block_rule) { | ||||
$num_lines = $block_rule->getMatchingLineCount($text, $cursor); | $num_lines = $block_rule->getMatchingLineCount($text, $cursor); | ||||
if ($num_lines) { | if ($num_lines) { | ||||
if ($blocks) { | $current_block = array( | ||||
$prev_block = last($blocks); | |||||
} | |||||
$curr_block = array( | |||||
'start' => $cursor, | 'start' => $cursor, | ||||
'num_lines' => $num_lines, | 'num_lines' => $num_lines, | ||||
'rule' => $block_rule, | 'rule' => $block_rule, | ||||
'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines), | 'empty' => self::isEmptyBlock($text, $cursor, $num_lines), | ||||
'children' => array(), | 'children' => array(), | ||||
'merge' => isset($can_merge[$block_key]), | |||||
); | ); | ||||
if ($prev_block | $should_merge = self::shouldMergeParagraphBlocks( | ||||
&& self::shouldMergeBlocks($text, $prev_block, $curr_block)) { | $text, | ||||
$blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines']; | $last_block, | ||||
$blocks[last_key($blocks)]['is_empty'] = | $current_block); | ||||
$blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty']; | |||||
if ($should_merge) { | |||||
$last_block['num_lines'] = | |||||
($last_block['num_lines'] + $current_block['num_lines']); | |||||
$last_block['empty'] = | |||||
($last_block['empty'] && $current_block['empty']); | |||||
$blocks[$last_block_key] = $last_block; | |||||
} else { | } else { | ||||
$blocks[] = $curr_block; | $blocks[] = $current_block; | ||||
$last_block = $current_block; | |||||
$last_block_key++; | |||||
} | } | ||||
$cursor += $num_lines; | $cursor += $num_lines; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if ($starting_cursor === $cursor) { | if ($starting_cursor === $cursor) { | ||||
throw new Exception(pht('Block in text did not match any block rule.')); | throw new Exception(pht('Block in text did not match any block rule.')); | ||||
} | } | ||||
} | } | ||||
// See T13487. It's common for blocks to be small, and this loop seems to | |||||
// measure as faster if we manually concatenate blocks than if we | |||||
// "array_slice()" and "implode()" blocks. This is a bit muddy. | |||||
foreach ($blocks as $key => $block) { | foreach ($blocks as $key => $block) { | ||||
$lines = array_slice($text, $block['start'], $block['num_lines']); | $min = $block['start']; | ||||
$blocks[$key]['text'] = implode('', $lines); | $max = $min + $block['num_lines']; | ||||
$lines = ''; | |||||
for ($ii = $min; $ii < $max; $ii++) { | |||||
$lines .= $text[$ii]; | |||||
} | |||||
$blocks[$key]['text'] = $lines; | |||||
} | } | ||||
// Stop splitting child blocks apart if we get too deep. This arrests | // Stop splitting child blocks apart if we get too deep. This arrests | ||||
// any blocks which have looping child rules, and stops the stack from | // any blocks which have looping child rules, and stops the stack from | ||||
// exploding if someone writes a hilarious comment with 5,000 levels of | // exploding if someone writes a hilarious comment with 5,000 levels of | ||||
// quoted text. | // quoted text. | ||||
if ($depth < self::MAX_CHILD_DEPTH) { | if ($depth < self::MAX_CHILD_DEPTH) { | ||||
Show All 35 Lines | if ($this->isTextMode()) { | ||||
$output = implode("\n\n", $output)."\n"; | $output = implode("\n\n", $output)."\n"; | ||||
} else { | } else { | ||||
$output = phutil_implode_html("\n\n", $output); | $output = phutil_implode_html("\n\n", $output); | ||||
} | } | ||||
return $output; | return $output; | ||||
} | } | ||||
private static function shouldMergeBlocks($text, $prev_block, $curr_block) { | private static function shouldMergeParagraphBlocks( | ||||
$block_rules = ipull(array($prev_block, $curr_block), 'rule'); | $text, | ||||
$last_block, | |||||
$current_block) { | |||||
$default_rule = 'PhutilRemarkupDefaultBlockRule'; | // If we're at the beginning of the input, we can't merge. | ||||
try { | if ($last_block === null) { | ||||
assert_instances_of($block_rules, $default_rule); | return false; | ||||
epriestley: This piece was pretty wild. It meant "Are both blocks default blocks (plain old paragraphs)?"… | |||||
} | |||||
// If the last block was empty keep merging | // If the previous block wasn't a default block, we can't merge. | ||||
if ($prev_block['is_empty']) { | if (!$last_block['merge']) { | ||||
return true; | return false; | ||||
} | } | ||||
// If this line is blank keep merging | // If the current block isn't a default block, we can't merge. | ||||
if ($curr_block['is_empty']) { | if (!$current_block['merge']) { | ||||
return false; | |||||
} | |||||
// If the last block was empty, we definitely want to merge. | |||||
if ($last_block['empty']) { | |||||
return true; | return true; | ||||
} | } | ||||
// If the current line and the last line have content, keep merging | // If this block is empty, we definitely want to merge. | ||||
if (strlen(trim($text[$curr_block['start'] - 1]))) { | if ($current_block['empty']) { | ||||
if (strlen(trim($text[$curr_block['start']]))) { | |||||
return true; | return true; | ||||
} | } | ||||
// Check if the last line of the previous block or the first line of this | |||||
// block have any non-whitespace text. If they both do, we're going to | |||||
// merge. | |||||
// If either of them are a blank line or a line with only whitespace, we | |||||
// do not merge: this means we've found a paragraph break. | |||||
$tail = $text[$current_block['start'] - 1]; | |||||
$head = $text[$current_block['start']]; | |||||
if (strlen(trim($tail)) && strlen(trim($head))) { | |||||
return true; | |||||
} | } | ||||
} catch (Exception $e) {} | |||||
return false; | return false; | ||||
} | } | ||||
private static function isEmptyBlock($text, $start, $num_lines) { | private static function isEmptyBlock($text, $start, $num_lines) { | ||||
for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) { | for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) { | ||||
if (strlen(trim($text[$cursor]))) { | if (strlen(trim($text[$cursor]))) { | ||||
return false; | return false; | ||||
Show All 26 Lines |
This piece was pretty wild. It meant "Are both blocks default blocks (plain old paragraphs)?" but tested that in a very convoluted way.