Page MenuHomePhabricator

D16683.id40166.diff
No OneTemporary

D16683.id40166.diff

diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php
--- a/src/difference/ArcanistDiffUtils.php
+++ b/src/difference/ArcanistDiffUtils.php
@@ -56,7 +56,30 @@
);
}
- return self::computeIntralineEdits($o, $n);
+ // Do a fast check for certainly-too-long inputs before splitting the
+ // lines. Inputs take ~200x more memory to represent as lists than as
+ // strings, so we can run out of memory quickly if we try to split huge
+ // inputs. See T11744.
+ $ol = strlen($o);
+ $nl = strlen($n);
+
+ $max_glyphs = 80;
+
+ // This has some wiggle room for multi-byte UTF8 characters, and the
+ // fact that we're testing the sum of the lengths of both strings. It can
+ // still generate false positives for, say, Chinese text liberally
+ // slathered with combining characters, but this kind of text should be
+ // vitually nonexistent in real data.
+ $too_many_bytes = (16 * $max_glyphs);
+
+ if ($ol + $nl > $too_many_bytes) {
+ return array(
+ array(array(1, $ol)),
+ array(array(1, $nl)),
+ );
+ }
+
+ return self::computeIntralineEdits($o, $n, $max_glyphs);
}
public static function applyIntralineDiff($str, $intra_stack) {
@@ -155,7 +178,7 @@
->getEditString();
}
- public static function computeIntralineEdits($o, $n) {
+ private static function computeIntralineEdits($o, $n, $max_glyphs) {
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
$ov = phutil_utf8v_combined($o);
$nv = phutil_utf8v_combined($n);
@@ -166,7 +189,7 @@
$multibyte = false;
}
- $result = self::generateEditString($ov, $nv);
+ $result = self::generateEditString($ov, $nv, $max_glyphs);
// Now we have a character-based description of the edit. We need to
// convert into a byte-based description. Walk through the edit string and

File Metadata

Mime Type
text/plain
Expires
Sun, Apr 6, 12:02 AM (19 h, 20 m ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7598522
Default Alt Text
D16683.id40166.diff (1 KB)

Event Timeline