diff --git a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php --- a/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php +++ b/src/infrastructure/markup/blockrule/PhutilRemarkupNoteBlockRule.php @@ -100,22 +100,28 @@ } private function getRegEx() { - $words = array( - 'NOTE', - 'IMPORTANT', - 'WARNING', - ); - - foreach ($words as $k => $word) { - $words[$k] = preg_quote($word, '/'); + static $regex; + + if ($regex === null) { + $words = array( + 'NOTE', + 'IMPORTANT', + 'WARNING', + ); + + foreach ($words as $k => $word) { + $words[$k] = preg_quote($word, '/'); + } + $words = implode('|', $words); + + $regex = + '/^(?:'. + '(?:\((?P'.$words.')\))'. + '|'. + '(?:(?P'.$words.'):))\s*'. + '/'; } - $words = implode('|', $words); - - return - '/^(?:'. - '(?:\((?P'.$words.')\))'. - '|'. - '(?:(?P'.$words.'):))\s*'. - '/'; + + return $regex; } } diff --git a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php --- a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php +++ b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php @@ -153,33 +153,54 @@ $block_rules = $this->blockRules; $blocks = array(); $cursor = 0; - $prev_block = array(); + + $can_merge = array(); + foreach ($block_rules as $key => $block_rule) { + if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) { + $can_merge[$key] = true; + } + } + + $last_block = null; + $last_block_key = -1; + + // See T13487. For very large inputs, block separation can dominate + // runtime. This is written somewhat clumsily to attempt to handle + // very large inputs as gracefully as is practical. while (isset($text[$cursor])) { $starting_cursor = $cursor; - foreach ($block_rules as $block_rule) { + foreach ($block_rules as $block_key => $block_rule) { $num_lines = $block_rule->getMatchingLineCount($text, $cursor); if ($num_lines) { - if ($blocks) { - $prev_block = last($blocks); - } - - $curr_block = array( + $current_block = array( 'start' => $cursor, 'num_lines' => $num_lines, 'rule' => $block_rule, - 'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines), + 'empty' => self::isEmptyBlock($text, $cursor, $num_lines), 'children' => array(), + 'merge' => isset($can_merge[$block_key]), ); - if ($prev_block - && self::shouldMergeBlocks($text, $prev_block, $curr_block)) { - $blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines']; - $blocks[last_key($blocks)]['is_empty'] = - $blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty']; + $should_merge = self::shouldMergeParagraphBlocks( + $text, + $last_block, + $current_block); + + if ($should_merge) { + $last_block['num_lines'] = + ($last_block['num_lines'] + $current_block['num_lines']); + + $last_block['empty'] = + ($last_block['empty'] && $current_block['empty']); + + $blocks[$last_block_key] = $last_block; } else { - $blocks[] = $curr_block; + $blocks[] = $current_block; + + $last_block = $current_block; + $last_block_key++; } $cursor += $num_lines; @@ -192,9 +213,20 @@ } } + // See T13487. It's common for blocks to be small, and this loop seems to + // measure as faster if we manually concatenate blocks than if we + // "array_slice()" and "implode()" blocks. This is a bit muddy. + foreach ($blocks as $key => $block) { - $lines = array_slice($text, $block['start'], $block['num_lines']); - $blocks[$key]['text'] = implode('', $lines); + $min = $block['start']; + $max = $min + $block['num_lines']; + + $lines = ''; + for ($ii = $min; $ii < $max; $ii++) { + $lines .= $text[$ii]; + } + + $blocks[$key]['text'] = $lines; } // Stop splitting child blocks apart if we get too deep. This arrests @@ -246,30 +278,48 @@ return $output; } - private static function shouldMergeBlocks($text, $prev_block, $curr_block) { - $block_rules = ipull(array($prev_block, $curr_block), 'rule'); + private static function shouldMergeParagraphBlocks( + $text, + $last_block, + $current_block) { - $default_rule = 'PhutilRemarkupDefaultBlockRule'; - try { - assert_instances_of($block_rules, $default_rule); + // If we're at the beginning of the input, we can't merge. + if ($last_block === null) { + return false; + } - // If the last block was empty keep merging - if ($prev_block['is_empty']) { - return true; - } + // If the previous block wasn't a default block, we can't merge. + if (!$last_block['merge']) { + return false; + } - // If this line is blank keep merging - if ($curr_block['is_empty']) { - return true; - } + // If the current block isn't a default block, we can't merge. + if (!$current_block['merge']) { + return false; + } - // If the current line and the last line have content, keep merging - if (strlen(trim($text[$curr_block['start'] - 1]))) { - if (strlen(trim($text[$curr_block['start']]))) { - return true; - } - } - } catch (Exception $e) {} + // If the last block was empty, we definitely want to merge. + if ($last_block['empty']) { + return true; + } + + // If this block is empty, we definitely want to merge. + if ($current_block['empty']) { + return true; + } + + // Check if the last line of the previous block or the first line of this + // block have any non-whitespace text. If they both do, we're going to + // merge. + + // If either of them are a blank line or a line with only whitespace, we + // do not merge: this means we've found a paragraph break. + + $tail = $text[$current_block['start'] - 1]; + $head = $text[$current_block['start']]; + if (strlen(trim($tail)) && strlen(trim($head))) { + return true; + } return false; } diff --git a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php --- a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php +++ b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php @@ -2,6 +2,9 @@ abstract class PhabricatorObjectRemarkupRule extends PhutilRemarkupRule { + private $referencePattern; + private $embedPattern; + const KEY_RULE_OBJECT = 'rule.object'; const KEY_MENTIONED_OBJECTS = 'rule.object.mentioned'; @@ -192,38 +195,48 @@ } private function getObjectEmbedPattern() { - $prefix = $this->getObjectNamePrefix(); - $prefix = preg_quote($prefix); - $id = $this->getObjectIDPattern(); + if ($this->embedPattern === null) { + $prefix = $this->getObjectNamePrefix(); + $prefix = preg_quote($prefix); + $id = $this->getObjectIDPattern(); - return '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]|\\\\.)*)?}\B)u'; + $this->embedPattern = + '(\B{'.$prefix.'('.$id.')([,\s](?:[^}\\\\]|\\\\.)*)?}\B)u'; + } + + return $this->embedPattern; } private function getObjectReferencePattern() { - $prefix = $this->getObjectNamePrefix(); - $prefix = preg_quote($prefix); - - $id = $this->getObjectIDPattern(); - - // If the prefix starts with a word character (like "D"), we want to - // require a word boundary so that we don't match "XD1" as "D1". If the - // prefix does not start with a word character, we want to require no word - // boundary for the same reasons. Test if the prefix starts with a word - // character. - if ($this->getObjectNamePrefixBeginsWithWordCharacter()) { - $boundary = '\\b'; - } else { - $boundary = '\\B'; - } + if ($this->referencePattern === null) { + $prefix = $this->getObjectNamePrefix(); + $prefix = preg_quote($prefix); + + $id = $this->getObjectIDPattern(); + + // If the prefix starts with a word character (like "D"), we want to + // require a word boundary so that we don't match "XD1" as "D1". If the + // prefix does not start with a word character, we want to require no word + // boundary for the same reasons. Test if the prefix starts with a word + // character. + if ($this->getObjectNamePrefixBeginsWithWordCharacter()) { + $boundary = '\\b'; + } else { + $boundary = '\\B'; + } - // The "(?referencePattern = + '((?referencePattern; }