Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15390887
D20968.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Referenced Files
None
Subscribers
None
D20968.diff
View Options
diff --git a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
--- a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
+++ b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php
@@ -100,22 +100,28 @@
}
private function getRegEx() {
- $words = array(
- 'NOTE',
- 'IMPORTANT',
- 'WARNING',
- );
-
- foreach ($words as $k => $word) {
- $words[$k] = preg_quote($word, '/');
+ static $regex;
+
+ if ($regex === null) {
+ $words = array(
+ 'NOTE',
+ 'IMPORTANT',
+ 'WARNING',
+ );
+
+ foreach ($words as $k => $word) {
+ $words[$k] = preg_quote($word, '/');
+ }
+ $words = implode('|', $words);
+
+ $regex =
+ '/^(?:'.
+ '(?:\((?P<hideword>'.$words.')\))'.
+ '|'.
+ '(?:(?P<showword>'.$words.'):))\s*'.
+ '/';
}
- $words = implode('|', $words);
-
- return
- '/^(?:'.
- '(?:\((?P<hideword>'.$words.')\))'.
- '|'.
- '(?:(?P<showword>'.$words.'):))\s*'.
- '/';
+
+ return $regex;
}
}
diff --git a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
--- a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
+++ b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php
@@ -153,33 +153,54 @@
$block_rules = $this->blockRules;
$blocks = array();
$cursor = 0;
- $prev_block = array();
+
+ $can_merge = array();
+ foreach ($block_rules as $key => $block_rule) {
+ if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) {
+ $can_merge[$key] = true;
+ }
+ }
+
+ $last_block = null;
+ $last_block_key = -1;
+
+ // See T13487. For very large inputs, block separation can dominate
+ // runtime. This is written somewhat clumsily to attempt to handle
+ // very large inputs as gracefully as is practical.
while (isset($text[$cursor])) {
$starting_cursor = $cursor;
- foreach ($block_rules as $block_rule) {
+ foreach ($block_rules as $block_key => $block_rule) {
$num_lines = $block_rule->getMatchingLineCount($text, $cursor);
if ($num_lines) {
- if ($blocks) {
- $prev_block = last($blocks);
- }
-
- $curr_block = array(
+ $current_block = array(
'start' => $cursor,
'num_lines' => $num_lines,
'rule' => $block_rule,
- 'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines),
+ 'empty' => self::isEmptyBlock($text, $cursor, $num_lines),
'children' => array(),
+ 'merge' => isset($can_merge[$block_key]),
);
- if ($prev_block
- && self::shouldMergeBlocks($text, $prev_block, $curr_block)) {
- $blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines'];
- $blocks[last_key($blocks)]['is_empty'] =
- $blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty'];
+ $should_merge = self::shouldMergeParagraphBlocks(
+ $text,
+ $last_block,
+ $current_block);
+
+ if ($should_merge) {
+ $last_block['num_lines'] =
+ ($last_block['num_lines'] + $current_block['num_lines']);
+
+ $last_block['empty'] =
+ ($last_block['empty'] && $current_block['empty']);
+
+ $blocks[$last_block_key] = $last_block;
} else {
- $blocks[] = $curr_block;
+ $blocks[] = $current_block;
+
+ $last_block = $current_block;
+ $last_block_key++;
}
$cursor += $num_lines;
@@ -192,9 +213,20 @@
}
}
+ // See T13487. It's common for blocks to be small, and this loop seems to
+ // measure as faster if we manually concatenate blocks than if we
+ // "array_slice()" and "implode()" blocks. This is a bit muddy.
+
foreach ($blocks as $key => $block) {
- $lines = array_slice($text, $block['start'], $block['num_lines']);
- $blocks[$key]['text'] = implode('', $lines);
+ $min = $block['start'];
+ $max = $min + $block['num_lines'];
+
+ $lines = '';
+ for ($ii = $min; $ii < $max; $ii++) {
+ $lines .= $text[$ii];
+ }
+
+ $blocks[$key]['text'] = $lines;
}
// Stop splitting child blocks apart if we get too deep. This arrests
@@ -246,30 +278,48 @@
return $output;
}
- private static function shouldMergeBlocks($text, $prev_block, $curr_block) {
- $block_rules = ipull(array($prev_block, $curr_block), 'rule');
+ private static function shouldMergeParagraphBlocks(
+ $text,
+ $last_block,
+ $current_block) {
- $default_rule = 'PhutilRemarkupDefaultBlockRule';
- try {
- assert_instances_of($block_rules, $default_rule);
+ // If we're at the beginning of the input, we can't merge.
+ if ($last_block === null) {
+ return false;
+ }
- // If the last block was empty keep merging
- if ($prev_block['is_empty']) {
- return true;
- }
+ // If the previous block wasn't a default block, we can't merge.
+ if (!$last_block['merge']) {
+ return false;
+ }
- // If this line is blank keep merging
- if ($curr_block['is_empty']) {
- return true;
- }
+ // If the current block isn't a default block, we can't merge.
+ if (!$current_block['merge']) {
+ return false;
+ }
- // If the current line and the last line have content, keep merging
- if (strlen(trim($text[$curr_block['start'] - 1]))) {
- if (strlen(trim($text[$curr_block['start']]))) {
- return true;
- }
- }
- } catch (Exception $e) {}
+ // If the last block was empty, we definitely want to merge.
+ if ($last_block['empty']) {
+ return true;
+ }
+
+ // If this block is empty, we definitely want to merge.
+ if ($current_block['empty']) {
+ return true;
+ }
+
+ // Check if the last line of the previous block or the first line of this
+ // block have any non-whitespace text. If they both do, we're going to
+ // merge.
+
+ // If either of them are a blank line or a line with only whitespace, we
+ // do not merge: this means we've found a paragraph break.
+
+ $tail = $text[$current_block['start'] - 1];
+ $head = $text[$current_block['start']];
+ if (strlen(trim($tail)) && strlen(trim($head))) {
+ return true;
+ }
return false;
}
diff --git a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
--- a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
+++ b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php
@@ -2,6 +2,9 @@
abstract class PhabricatorObjectRemarkupRule extends PhutilRemarkupRule {
+ private $referencePattern;
+ private $embedPattern;
+
const KEY_RULE_OBJECT = 'rule.object';
const KEY_MENTIONED_OBJECTS = 'rule.object.mentioned';
@@ -192,38 +195,48 @@
}
private function getObjectEmbedPattern() {
- $prefix = $this->getObjectNamePrefix();
- $prefix = preg_quote($prefix);
- $id = $this->getObjectIDPattern();
+ if ($this->embedPattern === null) {
+ $prefix = $this->getObjectNamePrefix();
+ $prefix = preg_quote($prefix);
+ $id = $this->getObjectIDPattern();
- return '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]|\\\\.)*)?}\B)u';
+ $this->embedPattern =
+ '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]|\\\\.)*)?}\B)u';
+ }
+
+ return $this->embedPattern;
}
private function getObjectReferencePattern() {
- $prefix = $this->getObjectNamePrefix();
- $prefix = preg_quote($prefix);
-
- $id = $this->getObjectIDPattern();
-
- // If the prefix starts with a word character (like "D"), we want to
- // require a word boundary so that we don't match "XD1" as "D1". If the
- // prefix does not start with a word character, we want to require no word
- // boundary for the same reasons. Test if the prefix starts with a word
- // character.
- if ($this->getObjectNamePrefixBeginsWithWordCharacter()) {
- $boundary = '\\b';
- } else {
- $boundary = '\\B';
- }
+ if ($this->referencePattern === null) {
+ $prefix = $this->getObjectNamePrefix();
+ $prefix = preg_quote($prefix);
+
+ $id = $this->getObjectIDPattern();
+
+ // If the prefix starts with a word character (like "D"), we want to
+ // require a word boundary so that we don't match "XD1" as "D1". If the
+ // prefix does not start with a word character, we want to require no word
+ // boundary for the same reasons. Test if the prefix starts with a word
+ // character.
+ if ($this->getObjectNamePrefixBeginsWithWordCharacter()) {
+ $boundary = '\\b';
+ } else {
+ $boundary = '\\B';
+ }
- // The "(?<![#@-])" prevents us from linking "#abcdef" or similar, and
- // "ABC-T1" (see T5714), and from matching "@T1" as a task (it is a user)
- // (see T9479).
+ // The "(?<![#@-])" prevents us from linking "#abcdef" or similar, and
+ // "ABC-T1" (see T5714), and from matching "@T1" as a task (it is a user)
+ // (see T9479).
- // The "\b" allows us to link "(abcdef)" or similar without linking things
- // in the middle of words.
+ // The "\b" allows us to link "(abcdef)" or similar without linking things
+ // in the middle of words.
+
+ $this->referencePattern =
+ '((?<![#@-])'.$boundary.$prefix.'('.$id.')(?:#([-\w\d]+))?(?!\w))u';
+ }
- return '((?<![#@-])'.$boundary.$prefix.'('.$id.')(?:#([-\w\d]+))?(?!\w))u';
+ return $this->referencePattern;
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Mar 16, 7:12 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7341739
Default Alt Text
D20968.diff (9 KB)
Attached To
Mode
D20968: Improve Remarkup parsing performance for certain large input blocks
Attached
Detach File
Event Timeline
Log In to Comment