Differential D20968 Diff 49962 src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php

Changeset View

Standalone View

src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php

Show First 20 Lines • Show All 147 Lines • ▼ Show 20 Lines	final class PhutilRemarkupEngine extends PhutilMarkupEngine {
private function splitTextIntoBlocks($text, $depth = 0) {		private function splitTextIntoBlocks($text, $depth = 0) {
// Apply basic block and paragraph normalization to the text. NOTE: We don't		// Apply basic block and paragraph normalization to the text. NOTE: We don't
// strip trailing whitespace because it is semantic in some contexts,		// strip trailing whitespace because it is semantic in some contexts,
// notably inlined diffs that the author intends to show as a code block.		// notably inlined diffs that the author intends to show as a code block.
$text = phutil_split_lines($text, true);		$text = phutil_split_lines($text, true);
$block_rules = $this->blockRules;		$block_rules = $this->blockRules;
$blocks = array();		$blocks = array();
$cursor = 0;		$cursor = 0;
$prev_block = array();
		$can_merge = array();
		foreach ($block_rules as $key => $block_rule) {
		if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) {
		$can_merge[$key] = true;
		}
		}

		$last_block = null;
		$last_block_key = -1;

		// See T13487. For very large inputs, block separation can dominate
		// runtime. This is written somewhat clumsily to attempt to handle
		// very large inputs as gracefully as is practical.

while (isset($text[$cursor])) {		while (isset($text[$cursor])) {
$starting_cursor = $cursor;		$starting_cursor = $cursor;
foreach ($block_rules as $block_rule) {		foreach ($block_rules as $block_key => $block_rule) {
$num_lines = $block_rule->getMatchingLineCount($text, $cursor);		$num_lines = $block_rule->getMatchingLineCount($text, $cursor);

if ($num_lines) {		if ($num_lines) {
if ($blocks) {		$current_block = array(
$prev_block = last($blocks);
}

$curr_block = array(
'start' => $cursor,		'start' => $cursor,
'num_lines' => $num_lines,		'num_lines' => $num_lines,
'rule' => $block_rule,		'rule' => $block_rule,
'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines),		'empty' => self::isEmptyBlock($text, $cursor, $num_lines),
'children' => array(),		'children' => array(),
		'merge' => isset($can_merge[$block_key]),
);		);

if ($prev_block		$should_merge = self::shouldMergeParagraphBlocks(
&& self::shouldMergeBlocks($text, $prev_block, $curr_block)) {		$text,
$blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines'];		$last_block,
$blocks[last_key($blocks)]['is_empty'] =		$current_block);
$blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty'];
		if ($should_merge) {
		$last_block['num_lines'] =
		($last_block['num_lines'] + $current_block['num_lines']);

		$last_block['empty'] =
		($last_block['empty'] && $current_block['empty']);

		$blocks[$last_block_key] = $last_block;
} else {		} else {
$blocks[] = $curr_block;		$blocks[] = $current_block;

		$last_block = $current_block;
		$last_block_key++;
}		}

$cursor += $num_lines;		$cursor += $num_lines;
break;		break;
}		}
}		}

if ($starting_cursor === $cursor) {		if ($starting_cursor === $cursor) {
throw new Exception(pht('Block in text did not match any block rule.'));		throw new Exception(pht('Block in text did not match any block rule.'));
}		}
}		}

		// See T13487. It's common for blocks to be small, and this loop seems to
		// measure as faster if we manually concatenate blocks than if we
		// "array_slice()" and "implode()" blocks. This is a bit muddy.

foreach ($blocks as $key => $block) {		foreach ($blocks as $key => $block) {
$lines = array_slice($text, $block['start'], $block['num_lines']);		$min = $block['start'];
$blocks[$key]['text'] = implode('', $lines);		$max = $min + $block['num_lines'];

		$lines = '';
		for ($ii = $min; $ii < $max; $ii++) {
		$lines .= $text[$ii];
		}

		$blocks[$key]['text'] = $lines;
}		}

// Stop splitting child blocks apart if we get too deep. This arrests		// Stop splitting child blocks apart if we get too deep. This arrests
// any blocks which have looping child rules, and stops the stack from		// any blocks which have looping child rules, and stops the stack from
// exploding if someone writes a hilarious comment with 5,000 levels of		// exploding if someone writes a hilarious comment with 5,000 levels of
// quoted text.		// quoted text.

if ($depth < self::MAX_CHILD_DEPTH) {		if ($depth < self::MAX_CHILD_DEPTH) {
Show All 35 Lines	if ($this->isTextMode()) {
$output = implode("\n\n", $output)."\n";		$output = implode("\n\n", $output)."\n";
} else {		} else {
$output = phutil_implode_html("\n\n", $output);		$output = phutil_implode_html("\n\n", $output);
}		}

return $output;		return $output;
}		}

private static function shouldMergeBlocks($text, $prev_block, $curr_block) {		private static function shouldMergeParagraphBlocks(
$block_rules = ipull(array($prev_block, $curr_block), 'rule');		$text,
		$last_block,
		$current_block) {

$default_rule = 'PhutilRemarkupDefaultBlockRule';		// If we're at the beginning of the input, we can't merge.
try {		if ($last_block === null) {
assert_instances_of($block_rules, $default_rule);		return false;
epriestleyAuthorUnsubmitted Done Inline Actions This piece was pretty wild. It meant "Are both blocks default blocks (plain old paragraphs)?" but tested that in a very convoluted way. epriestley: This piece was pretty wild. It meant "Are both blocks default blocks (plain old paragraphs)?"…
		}

// If the last block was empty keep merging		// If the previous block wasn't a default block, we can't merge.
if ($prev_block['is_empty']) {		if (!$last_block['merge']) {
return true;		return false;
}		}

// If this line is blank keep merging		// If the current block isn't a default block, we can't merge.
if ($curr_block['is_empty']) {		if (!$current_block['merge']) {
		return false;
		}

		// If the last block was empty, we definitely want to merge.
		if ($last_block['empty']) {
return true;		return true;
}		}

// If the current line and the last line have content, keep merging		// If this block is empty, we definitely want to merge.
if (strlen(trim($text[$curr_block['start'] - 1]))) {		if ($current_block['empty']) {
if (strlen(trim($text[$curr_block['start']]))) {
return true;		return true;
}		}

		// Check if the last line of the previous block or the first line of this
		// block have any non-whitespace text. If they both do, we're going to
		// merge.

		// If either of them are a blank line or a line with only whitespace, we
		// do not merge: this means we've found a paragraph break.

		$tail = $text[$current_block['start'] - 1];
		$head = $text[$current_block['start']];
		if (strlen(trim($tail)) && strlen(trim($head))) {
		return true;
}		}
} catch (Exception $e) {}

return false;		return false;
}		}

private static function isEmptyBlock($text, $start, $num_lines) {		private static function isEmptyBlock($text, $start, $num_lines) {
for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) {		for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) {
if (strlen(trim($text[$cursor]))) {		if (strlen(trim($text[$cursor]))) {
return false;		return false;
Show All 26 Lines