Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15391639
D20522.id48951.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Referenced Files
None
Subscribers
None
D20522.id48951.diff
View Options
diff --git a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php
--- a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php
+++ b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php
@@ -7,20 +7,27 @@
*
* //D12//
*
- * It is processed in several stages. First the "D12" matches and is replaced:
+ * It is processed in several stages. First the "D12" matches and is replaced
+ * with a token, in the form of "<0x01><ID number><literal "Z">". The first
+ * byte, "<0x01>" is a single byte with value 1 that marks a token. If this is
+ * token ID "444", the text may now look like this:
*
- * //\11Z//
+ * //<0x01>444Z//
*
- * Now the italics match and are replaced:
+ * Now the italics match and are replaced, using the next token ID:
*
- * \12Z
+ * <0x01>445Z
*
- * When processing completes, all the tokens are replaced again in reverse
- * order:
+ * When processing completes, all the tokens are replaced with their final
+ * equivalents. For example, token 444 is evaluated to:
*
- * <em>\11Z</em>
+ * <a href="http://...">...</a>
*
- * Then:
+ * Then token 445 is evaluated:
+ *
+ * <em><0x01>444Z</em>
+ *
+ * ...and all tokens it contains are replaced:
*
* <em><a href="http://...">...</a></em>
*
@@ -35,7 +42,7 @@
const MAGIC_BYTE = "\1";
private $map = array();
- private $index;
+ private $index = 0;
public function store($text) {
$key = self::MAGIC_BYTE.(++$this->index).'Z';
@@ -44,19 +51,111 @@
}
public function restore($corpus, $text_mode = false) {
- if ($this->map) {
- if ($text_mode) {
- $corpus = str_replace(
- array_reverse(array_keys($this->map)),
- array_reverse($this->map),
- $corpus);
- } else {
- $corpus = phutil_safe_html(str_replace(
- array_reverse(array_keys($this->map)),
- array_map('phutil_escape_html', array_reverse($this->map)),
- phutil_escape_html($corpus)));
+ $map = $this->map;
+
+ if (!$text_mode) {
+ foreach ($map as $key => $content) {
+ $map[$key] = phutil_escape_html($content);
}
+ $corpus = phutil_escape_html($corpus);
}
+
+ // NOTE: Tokens may contain other tokens: for example, a table may have
+ // links inside it. So we can't do a single simple find/replace, because
+ // we need to find and replace child tokens inside the content of parent
+ // tokens.
+
+ // However, we know that rules which have child tokens must always store
+ // all their child tokens first, before they store their parent token: you
+ // have to pass the "store(text)" API a block of text with tokens already
+ // in it, so you must have created child tokens already.
+
+ // Thus, all child tokens will appear in the list before parent tokens, so
+ // if we start at the beginning of the list and replace all the tokens we
+ // find in each piece of content, we'll end up expanding all subtokens
+ // correctly.
+
+ $map[] = $corpus;
+ $seen = array();
+ foreach ($map as $key => $content) {
+ $seen[$key] = true;
+
+ // If the content contains no token magic, we don't need to replace
+ // anything.
+ if (strpos($content, self::MAGIC_BYTE) === false) {
+ continue;
+ }
+
+ $matches = null;
+ preg_match_all(
+ '/'.self::MAGIC_BYTE.'\d+Z/',
+ $content,
+ $matches,
+ PREG_OFFSET_CAPTURE);
+
+ $matches = $matches[0];
+
+ // See PHI1114. We're replacing all the matches in one pass because this
+ // is significantly faster than doing "substr_replace()" in a loop if the
+ // corpus is large and we have a large number of matches.
+
+ // Build a list of string pieces in "$parts" by interleaving the
+ // plain strings between each token and the replacement token text, then
+ // implode the whole thing when we're done.
+
+ $parts = array();
+ $pos = 0;
+ foreach ($matches as $next) {
+ $subkey = $next[0];
+
+ // If we've matched a token pattern but don't actually have any
+ // corresponding token, just skip this match. This should not be
+ // possible, and should perhaps be an error.
+ if (!isset($seen[$subkey])) {
+ if (!isset($map[$subkey])) {
+ throw new Exception(
+ pht(
+ 'Matched token key "%s" while processing remarkup block, but '.
+ 'this token does not exist in the token map.',
+ $subkey));
+ } else {
+ throw new Exception(
+ pht(
+ 'Matched token key "%s" while processing remarkup block, but '.
+ 'this token appears later in the list than the key being '.
+ 'processed ("%s").',
+ $subkey,
+ $key));
+ }
+ }
+
+ $subpos = $next[1];
+
+ // If there were any non-token bytes since the last token, add them.
+ if ($subpos > $pos) {
+ $parts[] = substr($content, $pos, $subpos - $pos);
+ }
+
+ // Add the token replacement text.
+ $parts[] = $map[$subkey];
+
+ // Move the non-token cursor forward over the token.
+ $pos = $subpos + strlen($subkey);
+ }
+
+ // Add any leftover non-token bytes after the last token.
+ $parts[] = substr($content, $pos);
+
+ $content = implode('', $parts);
+
+ $map[$key] = $content;
+ }
+ $corpus = last($map);
+
+ if (!$text_mode) {
+ $corpus = phutil_safe_html($corpus);
+ }
+
return $corpus;
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Mar 16, 10:48 AM (5 d, 18 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7398685
Default Alt Text
D20522.id48951.diff (5 KB)
Attached To
Mode
D20522: Improve the performance of large remarkup documents with many complex rules
Attached
Detach File
Event Timeline
Log In to Comment