Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15470643
D16683.id40166.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1 KB
Referenced Files
None
Subscribers
None
D16683.id40166.diff
View Options
diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php
--- a/src/difference/ArcanistDiffUtils.php
+++ b/src/difference/ArcanistDiffUtils.php
@@ -56,7 +56,30 @@
);
}
- return self::computeIntralineEdits($o, $n);
+ // Do a fast check for certainly-too-long inputs before splitting the
+ // lines. Inputs take ~200x more memory to represent as lists than as
+ // strings, so we can run out of memory quickly if we try to split huge
+ // inputs. See T11744.
+ $ol = strlen($o);
+ $nl = strlen($n);
+
+ $max_glyphs = 80;
+
+ // This has some wiggle room for multi-byte UTF8 characters, and the
+ // fact that we're testing the sum of the lengths of both strings. It can
+ // still generate false positives for, say, Chinese text liberally
+ // slathered with combining characters, but this kind of text should be
+ // vitually nonexistent in real data.
+ $too_many_bytes = (16 * $max_glyphs);
+
+ if ($ol + $nl > $too_many_bytes) {
+ return array(
+ array(array(1, $ol)),
+ array(array(1, $nl)),
+ );
+ }
+
+ return self::computeIntralineEdits($o, $n, $max_glyphs);
}
public static function applyIntralineDiff($str, $intra_stack) {
@@ -155,7 +178,7 @@
->getEditString();
}
- public static function computeIntralineEdits($o, $n) {
+ private static function computeIntralineEdits($o, $n, $max_glyphs) {
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
$ov = phutil_utf8v_combined($o);
$nv = phutil_utf8v_combined($n);
@@ -166,7 +189,7 @@
$multibyte = false;
}
- $result = self::generateEditString($ov, $nv);
+ $result = self::generateEditString($ov, $nv, $max_glyphs);
// Now we have a character-based description of the edit. We need to
// convert into a byte-based description. Walk through the edit string and
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Apr 6, 12:02 AM (19 h, 20 m ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7598522
Default Alt Text
D16683.id40166.diff (1 KB)
Attached To
Mode
D16683: Don't compute intraline diffs if the input fails a coarse check for being huge
Attached
Detach File
Event Timeline
Log In to Comment