diff --git a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php --- a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php +++ b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php @@ -44,19 +44,89 @@ } public function restore($corpus, $text_mode = false) { - if ($this->map) { - if ($text_mode) { - $corpus = str_replace( - array_reverse(array_keys($this->map)), - array_reverse($this->map), - $corpus); - } else { - $corpus = phutil_safe_html(str_replace( - array_reverse(array_keys($this->map)), - array_map('phutil_escape_html', array_reverse($this->map)), - phutil_escape_html($corpus))); + $map = $this->map; + + if (!$text_mode) { + foreach ($map as $key => $content) { + $map[$key] = phutil_escape_html($content); } + $corpus = phutil_escape_html($corpus); } + + // NOTE: Tokens may contain other tokens: for example, a table may have + // links inside it. So we can't do a single simple find/replace, because + // we need to find and replace child tokens inside the content of parent + // tokens. + + // However, we know that all children of a given token always appear in + // the list before the token itself does, so if we start at the beginning + // of the list and replace all the tokens we find in each piece of content, + // we'll end up expanding all subtokens correctly. + + $map[] = $corpus; + foreach ($map as $key => $content) { + // If the content contains no token magic, we don't need to replace + // anything. + if (strpos($content, self::MAGIC_BYTE) === false) { + continue; + } + + $matches = null; + preg_match_all( + '/'.self::MAGIC_BYTE.'\d+Z/', + $content, + $matches, + PREG_OFFSET_CAPTURE); + + $matches = $matches[0]; + + // See PHI1114. We're replacing all the matches in one pass because this + // is significantly faster than doing "substr_replace()" in a loop if the + // corpus is large and we have a large number of matches. + + // Build a list of string pieces in "$parts" by interleaving the + // plain strings between each token and the replacement token text, then + // implode the whole thing when we're done. + + $parts = array(); + $pos = 0; + foreach ($matches as $next) { + $subkey = $next[0]; + + // If we've matched a token pattern but don't actually have any + // corresponding token, just skip this match. This should not be + // possible, and should perhaps be an error. + if (!isset($map[$subkey])) { + continue; + } + + $subpos = $next[1]; + + // If there were any non-token bytes since the last token, add them. + if ($subpos > $pos) { + $parts[] = substr($content, $pos, $subpos - $pos); + } + + // Add the token replacement text. + $parts[] = $map[$subkey]; + + // Move the non-token cursor forward over the token. + $pos = $subpos + strlen($subkey); + } + + // Add any leftover non-token bytes after the last token. + $parts[] = substr($content, $pos); + + $content = implode('', $parts); + + $map[$key] = $content; + } + $corpus = last($map); + + if (!$text_mode) { + $corpus = phutil_safe_html($corpus); + } + return $corpus; }