Changeset View
Changeset View
Standalone View
Standalone View
src/difference/ArcanistDiffUtils.php
| Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | public static function generateIntralineDiff($o, $n) { | ||||
| if (($o === $n) || !$ol || !$nl) { | if (($o === $n) || !$ol || !$nl) { | ||||
| return array( | return array( | ||||
| array(array(0, $ol)), | array(array(0, $ol)), | ||||
| array(array(0, $nl)), | array(array(0, $nl)), | ||||
| ); | ); | ||||
| } | } | ||||
| return self::computeIntralineEdits($o, $n); | // Do a fast check for certainly-too-long inputs before splitting the | ||||
| // lines. Inputs take ~200x more memory to represent as lists than as | |||||
| // strings, so we can run out of memory quickly if we try to split huge | |||||
| // inputs. See T11744. | |||||
| $ol = strlen($o); | |||||
| $nl = strlen($n); | |||||
| $max_glyphs = 80; | |||||
| // This has some wiggle room for multi-byte UTF8 characters, and the | |||||
| // fact that we're testing the sum of the lengths of both strings. It can | |||||
| // still generate false positives for, say, Chinese text liberally | |||||
| // slathered with combining characters, but this kind of text should be | |||||
| // vitually nonexistent in real data. | |||||
| $too_many_bytes = (16 * $max_glyphs); | |||||
| if ($ol + $nl > $too_many_bytes) { | |||||
| return array( | |||||
| array(array(1, $ol)), | |||||
| array(array(1, $nl)), | |||||
| ); | |||||
| } | |||||
| return self::computeIntralineEdits($o, $n, $max_glyphs); | |||||
| } | } | ||||
| public static function applyIntralineDiff($str, $intra_stack) { | public static function applyIntralineDiff($str, $intra_stack) { | ||||
| $buf = ''; | $buf = ''; | ||||
| $p = $s = $e = 0; // position, start, end | $p = $s = $e = 0; // position, start, end | ||||
| $highlight = $tag = $ent = false; | $highlight = $tag = $ent = false; | ||||
| $highlight_o = '<span class="bright">'; | $highlight_o = '<span class="bright">'; | ||||
| $highlight_c = '</span>'; | $highlight_c = '</span>'; | ||||
| ▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | return id(new PhutilEditDistanceMatrix()) | ||||
| ->setAlterCost(1 / ($max * 2)) | ->setAlterCost(1 / ($max * 2)) | ||||
| ->setReplaceCost(2) | ->setReplaceCost(2) | ||||
| ->setMaximumLength($max) | ->setMaximumLength($max) | ||||
| ->setSequences($ov, $nv) | ->setSequences($ov, $nv) | ||||
| ->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_INTERNAL) | ->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_INTERNAL) | ||||
| ->getEditString(); | ->getEditString(); | ||||
| } | } | ||||
| public static function computeIntralineEdits($o, $n) { | private static function computeIntralineEdits($o, $n, $max_glyphs) { | ||||
| if (preg_match('/[\x80-\xFF]/', $o.$n)) { | if (preg_match('/[\x80-\xFF]/', $o.$n)) { | ||||
| $ov = phutil_utf8v_combined($o); | $ov = phutil_utf8v_combined($o); | ||||
| $nv = phutil_utf8v_combined($n); | $nv = phutil_utf8v_combined($n); | ||||
| $multibyte = true; | $multibyte = true; | ||||
| } else { | } else { | ||||
| $ov = str_split($o); | $ov = str_split($o); | ||||
| $nv = str_split($n); | $nv = str_split($n); | ||||
| $multibyte = false; | $multibyte = false; | ||||
| } | } | ||||
| $result = self::generateEditString($ov, $nv); | $result = self::generateEditString($ov, $nv, $max_glyphs); | ||||
| // Now we have a character-based description of the edit. We need to | // Now we have a character-based description of the edit. We need to | ||||
| // convert into a byte-based description. Walk through the edit string and | // convert into a byte-based description. Walk through the edit string and | ||||
| // adjust each operation to reflect the number of bytes in the underlying | // adjust each operation to reflect the number of bytes in the underlying | ||||
| // character. | // character. | ||||
| $o_pos = 0; | $o_pos = 0; | ||||
| $n_pos = 0; | $n_pos = 0; | ||||
| ▲ Show 20 Lines • Show All 59 Lines • Show Last 20 Lines | |||||