D20968.diff
No OneTemporary
Actions

Size

9 KB

Referenced Files

None

Subscribers

None

D20968.diff
View Options

	diff --git a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
	--- a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
	+++ b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
	@@ -100,22 +100,28 @@
	}

	private function getRegEx() {
	- $words = array(
	- 'NOTE',
	- 'IMPORTANT',
	- 'WARNING',
	- );
	-
	- foreach ($words as $k => $word) {
	- $words[$k] = preg_quote($word, '/');
	+ static $regex;
	+
	+ if ($regex === null) {
	+ $words = array(
	+ 'NOTE',
	+ 'IMPORTANT',
	+ 'WARNING',
	+ );
	+
	+ foreach ($words as $k => $word) {
	+ $words[$k] = preg_quote($word, '/');
	+ }
	+ $words = implode('\|', $words);
	+
	+ $regex =
	+ '/^(?:'.
	+ '(?:$(?P<hideword>'.$words.')$)'.
	+ '\|'.
	+ '(?:(?P<showword>'.$words.'):))\s*'.
	+ '/';
	}
	- $words = implode('\|', $words);
	-
	- return
	- '/^(?:'.
	- '(?:$(?P<hideword>'.$words.')$)'.
	- '\|'.
	- '(?:(?P<showword>'.$words.'):))\s*'.
	- '/';
	+
	+ return $regex;
	}
	}
	diff --git a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
	--- a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
	+++ b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
	@@ -153,33 +153,54 @@
	$block_rules = $this->blockRules;
	$blocks = array();
	$cursor = 0;
	- $prev_block = array();
	+
	+ $can_merge = array();
	+ foreach ($block_rules as $key => $block_rule) {
	+ if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) {
	+ $can_merge[$key] = true;
	+ }
	+ }
	+
	+ $last_block = null;
	+ $last_block_key = -1;
	+
	+ // See T13487. For very large inputs, block separation can dominate
	+ // runtime. This is written somewhat clumsily to attempt to handle
	+ // very large inputs as gracefully as is practical.

	while (isset($text[$cursor])) {
	$starting_cursor = $cursor;
	- foreach ($block_rules as $block_rule) {
	+ foreach ($block_rules as $block_key => $block_rule) {
	$num_lines = $block_rule->getMatchingLineCount($text, $cursor);

	if ($num_lines) {
	- if ($blocks) {
	- $prev_block = last($blocks);
	- }
	-
	- $curr_block = array(
	+ $current_block = array(
	'start' => $cursor,
	'num_lines' => $num_lines,
	'rule' => $block_rule,
	- 'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines),
	+ 'empty' => self::isEmptyBlock($text, $cursor, $num_lines),
	'children' => array(),
	+ 'merge' => isset($can_merge[$block_key]),
	);

	- if ($prev_block
	- && self::shouldMergeBlocks($text, $prev_block, $curr_block)) {
	- $blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines'];
	- $blocks[last_key($blocks)]['is_empty'] =
	- $blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty'];
	+ $should_merge = self::shouldMergeParagraphBlocks(
	+ $text,
	+ $last_block,
	+ $current_block);
	+
	+ if ($should_merge) {
	+ $last_block['num_lines'] =
	+ ($last_block['num_lines'] + $current_block['num_lines']);
	+
	+ $last_block['empty'] =
	+ ($last_block['empty'] && $current_block['empty']);
	+
	+ $blocks[$last_block_key] = $last_block;
	} else {
	- $blocks[] = $curr_block;
	+ $blocks[] = $current_block;
	+
	+ $last_block = $current_block;
	+ $last_block_key++;
	}

	$cursor += $num_lines;
	@@ -192,9 +213,20 @@
	}
	}

	+ // See T13487. It's common for blocks to be small, and this loop seems to
	+ // measure as faster if we manually concatenate blocks than if we
	+ // "array_slice()" and "implode()" blocks. This is a bit muddy.
	+
	foreach ($blocks as $key => $block) {
	- $lines = array_slice($text, $block['start'], $block['num_lines']);
	- $blocks[$key]['text'] = implode('', $lines);
	+ $min = $block['start'];
	+ $max = $min + $block['num_lines'];
	+
	+ $lines = '';
	+ for ($ii = $min; $ii < $max; $ii++) {
	+ $lines .= $text[$ii];
	+ }
	+
	+ $blocks[$key]['text'] = $lines;
	}

	// Stop splitting child blocks apart if we get too deep. This arrests
	@@ -246,30 +278,48 @@
	return $output;
	}

	- private static function shouldMergeBlocks($text, $prev_block, $curr_block) {
	- $block_rules = ipull(array($prev_block, $curr_block), 'rule');
	+ private static function shouldMergeParagraphBlocks(
	+ $text,
	+ $last_block,
	+ $current_block) {

	- $default_rule = 'PhutilRemarkupDefaultBlockRule';
	- try {
	- assert_instances_of($block_rules, $default_rule);
	+ // If we're at the beginning of the input, we can't merge.
	+ if ($last_block === null) {
	+ return false;
	+ }

	- // If the last block was empty keep merging
	- if ($prev_block['is_empty']) {
	- return true;
	- }
	+ // If the previous block wasn't a default block, we can't merge.
	+ if (!$last_block['merge']) {
	+ return false;
	+ }

	- // If this line is blank keep merging
	- if ($curr_block['is_empty']) {
	- return true;
	- }
	+ // If the current block isn't a default block, we can't merge.
	+ if (!$current_block['merge']) {
	+ return false;
	+ }

	- // If the current line and the last line have content, keep merging
	- if (strlen(trim($text[$curr_block['start'] - 1]))) {
	- if (strlen(trim($text[$curr_block['start']]))) {
	- return true;
	- }
	- }
	- } catch (Exception $e) {}
	+ // If the last block was empty, we definitely want to merge.
	+ if ($last_block['empty']) {
	+ return true;
	+ }
	+
	+ // If this block is empty, we definitely want to merge.
	+ if ($current_block['empty']) {
	+ return true;
	+ }
	+
	+ // Check if the last line of the previous block or the first line of this
	+ // block have any non-whitespace text. If they both do, we're going to
	+ // merge.
	+
	+ // If either of them are a blank line or a line with only whitespace, we
	+ // do not merge: this means we've found a paragraph break.
	+
	+ $tail = $text[$current_block['start'] - 1];
	+ $head = $text[$current_block['start']];
	+ if (strlen(trim($tail)) && strlen(trim($head))) {
	+ return true;
	+ }

	return false;
	}
	diff --git a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
	--- a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
	+++ b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
	@@ -2,6 +2,9 @@

	abstract class PhabricatorObjectRemarkupRule extends PhutilRemarkupRule {

	+ private $referencePattern;
	+ private $embedPattern;
	+
	const KEY_RULE_OBJECT = 'rule.object';
	const KEY_MENTIONED_OBJECTS = 'rule.object.mentioned';

	@@ -192,38 +195,48 @@
	}

	private function getObjectEmbedPattern() {
	- $prefix = $this->getObjectNamePrefix();
	- $prefix = preg_quote($prefix);
	- $id = $this->getObjectIDPattern();
	+ if ($this->embedPattern === null) {
	+ $prefix = $this->getObjectNamePrefix();
	+ $prefix = preg_quote($prefix);
	+ $id = $this->getObjectIDPattern();

	- return '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]\|\\\\.)*)?}\B)u';
	+ $this->embedPattern =
	+ '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]\|\\\\.)*)?}\B)u';
	+ }
	+
	+ return $this->embedPattern;
	}

	private function getObjectReferencePattern() {
	- $prefix = $this->getObjectNamePrefix();
	- $prefix = preg_quote($prefix);
	-
	- $id = $this->getObjectIDPattern();
	-
	- // If the prefix starts with a word character (like "D"), we want to
	- // require a word boundary so that we don't match "XD1" as "D1". If the
	- // prefix does not start with a word character, we want to require no word
	- // boundary for the same reasons. Test if the prefix starts with a word
	- // character.
	- if ($this->getObjectNamePrefixBeginsWithWordCharacter()) {
	- $boundary = '\\b';
	- } else {
	- $boundary = '\\B';
	- }
	+ if ($this->referencePattern === null) {
	+ $prefix = $this->getObjectNamePrefix();
	+ $prefix = preg_quote($prefix);
	+
	+ $id = $this->getObjectIDPattern();
	+
	+ // If the prefix starts with a word character (like "D"), we want to
	+ // require a word boundary so that we don't match "XD1" as "D1". If the
	+ // prefix does not start with a word character, we want to require no word
	+ // boundary for the same reasons. Test if the prefix starts with a word
	+ // character.
	+ if ($this->getObjectNamePrefixBeginsWithWordCharacter()) {
	+ $boundary = '\\b';
	+ } else {
	+ $boundary = '\\B';
	+ }

	- // The "(?<![#@-])" prevents us from linking "#abcdef" or similar, and
	- // "ABC-T1" (see T5714), and from matching "@T1" as a task (it is a user)
	- // (see T9479).
	+ // The "(?<![#@-])" prevents us from linking "#abcdef" or similar, and
	+ // "ABC-T1" (see T5714), and from matching "@T1" as a task (it is a user)
	+ // (see T9479).

	- // The "\b" allows us to link "(abcdef)" or similar without linking things
	- // in the middle of words.
	+ // The "\b" allows us to link "(abcdef)" or similar without linking things
	+ // in the middle of words.
	+
	+ $this->referencePattern =
	+ '((?<![#@-])'.$boundary.$prefix.'('.$id.')(?:#([-\w\d]+))?(?!\w))u';
	+ }

	- return '((?<![#@-])'.$boundary.$prefix.'('.$id.')(?:#([-\w\d]+))?(?!\w))u';
	+ return $this->referencePattern;
	}

File Metadata

Mime Type: text/plain
Expires: May 22 2024, 8:53 PM (4 w, 6 d ago)
Storage Engine: blob
Storage Format: Encrypted (AES-256-CBC)
Storage Handle: 6306252
Default Alt Text: D20968.diff (9 KB)

D20968.diffNo OneTemporaryActions

D20968.diffView Options

File Metadata

Event Timeline

D20968.diff
No OneTemporary
Actions

D20968.diff
View Options