diff --git a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php index 93d1c94..2b97c13 100644 --- a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php +++ b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php @@ -1,77 +1,176 @@ ". The first + * byte, "<0x01>" is a single byte with value 1 that marks a token. If this is + * token ID "444", the text may now look like this: * - * //\11Z// + * //<0x01>444Z// * - * Now the italics match and are replaced: + * Now the italics match and are replaced, using the next token ID: * - * \12Z + * <0x01>445Z * - * When processing completes, all the tokens are replaced again in reverse - * order: + * When processing completes, all the tokens are replaced with their final + * equivalents. For example, token 444 is evaluated to: * - * \11Z + * ... * - * Then: + * Then token 445 is evaluated: + * + * <0x01>444Z + * + * ...and all tokens it contains are replaced: * * ... * * If we didn't do this, the italics rule could match the "//" in "http://", * or any other number of processing mistakes could occur, some of which create * security risks. * * This class generates keys, and stores the map of keys to replacement text. */ final class PhutilRemarkupBlockStorage extends Phobject { const MAGIC_BYTE = "\1"; private $map = array(); - private $index; + private $index = 0; public function store($text) { $key = self::MAGIC_BYTE.(++$this->index).'Z'; $this->map[$key] = $text; return $key; } public function restore($corpus, $text_mode = false) { - if ($this->map) { - if ($text_mode) { - $corpus = str_replace( - array_reverse(array_keys($this->map)), - array_reverse($this->map), - $corpus); - } else { - $corpus = phutil_safe_html(str_replace( - array_reverse(array_keys($this->map)), - array_map('phutil_escape_html', array_reverse($this->map)), - phutil_escape_html($corpus))); + $map = $this->map; + + if (!$text_mode) { + foreach ($map as $key => $content) { + $map[$key] = phutil_escape_html($content); } + $corpus = phutil_escape_html($corpus); } + + // NOTE: Tokens may contain other tokens: for example, a table may have + // links inside it. So we can't do a single simple find/replace, because + // we need to find and replace child tokens inside the content of parent + // tokens. + + // However, we know that rules which have child tokens must always store + // all their child tokens first, before they store their parent token: you + // have to pass the "store(text)" API a block of text with tokens already + // in it, so you must have created child tokens already. + + // Thus, all child tokens will appear in the list before parent tokens, so + // if we start at the beginning of the list and replace all the tokens we + // find in each piece of content, we'll end up expanding all subtokens + // correctly. + + $map[] = $corpus; + $seen = array(); + foreach ($map as $key => $content) { + $seen[$key] = true; + + // If the content contains no token magic, we don't need to replace + // anything. + if (strpos($content, self::MAGIC_BYTE) === false) { + continue; + } + + $matches = null; + preg_match_all( + '/'.self::MAGIC_BYTE.'\d+Z/', + $content, + $matches, + PREG_OFFSET_CAPTURE); + + $matches = $matches[0]; + + // See PHI1114. We're replacing all the matches in one pass because this + // is significantly faster than doing "substr_replace()" in a loop if the + // corpus is large and we have a large number of matches. + + // Build a list of string pieces in "$parts" by interleaving the + // plain strings between each token and the replacement token text, then + // implode the whole thing when we're done. + + $parts = array(); + $pos = 0; + foreach ($matches as $next) { + $subkey = $next[0]; + + // If we've matched a token pattern but don't actually have any + // corresponding token, just skip this match. This should not be + // possible, and should perhaps be an error. + if (!isset($seen[$subkey])) { + if (!isset($map[$subkey])) { + throw new Exception( + pht( + 'Matched token key "%s" while processing remarkup block, but '. + 'this token does not exist in the token map.', + $subkey)); + } else { + throw new Exception( + pht( + 'Matched token key "%s" while processing remarkup block, but '. + 'this token appears later in the list than the key being '. + 'processed ("%s").', + $subkey, + $key)); + } + } + + $subpos = $next[1]; + + // If there were any non-token bytes since the last token, add them. + if ($subpos > $pos) { + $parts[] = substr($content, $pos, $subpos - $pos); + } + + // Add the token replacement text. + $parts[] = $map[$subkey]; + + // Move the non-token cursor forward over the token. + $pos = $subpos + strlen($subkey); + } + + // Add any leftover non-token bytes after the last token. + $parts[] = substr($content, $pos); + + $content = implode('', $parts); + + $map[$key] = $content; + } + $corpus = last($map); + + if (!$text_mode) { + $corpus = phutil_safe_html($corpus); + } + return $corpus; } public function overwrite($key, $new_text) { $this->map[$key] = $new_text; return $this; } public function getMap() { return $this->map; } public function setMap(array $map) { $this->map = $map; return $this; } }