diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php index 0f798c9e..36b939d0 100644 --- a/src/difference/ArcanistDiffUtils.php +++ b/src/difference/ArcanistDiffUtils.php @@ -1,274 +1,185 @@ $too_many_bytes) { return array( array(array(1, $ol)), array(array(1, $nl)), ); } return self::computeIntralineEdits($o, $n, $max_glyphs); } - public static function applyIntralineDiff($str, $intra_stack) { - $buf = ''; - $p = $s = $e = 0; // position, start, end - $highlight = $tag = $ent = false; - $highlight_o = ''; - $highlight_c = ''; - - $depth_in = ''; - $depth_out = ''; - - $is_html = false; - if ($str instanceof PhutilSafeHTML) { - $is_html = true; - $str = $str->getHTMLContent(); - } - - $n = strlen($str); - for ($i = 0; $i < $n; $i++) { - - if ($p == $e) { - do { - if (empty($intra_stack)) { - $buf .= substr($str, $i); - break 2; - } - $stack = array_shift($intra_stack); - $s = $e; - $e += $stack[1]; - } while ($stack[0] === 0); - - switch ($stack[0]) { - case '>': - $open_tag = $depth_in; - break; - case '<': - $open_tag = $depth_out; - break; - default: - $open_tag = $highlight_o; - break; - } - } - - if (!$highlight && !$tag && !$ent && $p == $s) { - $buf .= $open_tag; - $highlight = true; - } - - if ($str[$i] == '<') { - $tag = true; - if ($highlight) { - $buf .= $highlight_c; - } - } - - if (!$tag) { - if ($str[$i] == '&') { - $ent = true; - } - if ($ent && $str[$i] == ';') { - $ent = false; - } - if (!$ent) { - $p++; - } - } - - $buf .= $str[$i]; - - if ($tag && $str[$i] == '>') { - $tag = false; - if ($highlight) { - $buf .= $open_tag; - } - } - - if ($highlight && ($p == $e || $i == $n - 1)) { - $buf .= $highlight_c; - $highlight = false; - } - } - - if ($is_html) { - return phutil_safe_html($buf); - } - - return $buf; - } - private static function collapseIntralineRuns($runs) { $count = count($runs); for ($ii = 0; $ii < $count - 1; $ii++) { if ($runs[$ii][0] == $runs[$ii + 1][0]) { $runs[$ii + 1][1] += $runs[$ii][1]; unset($runs[$ii]); } } return array_values($runs); } public static function generateEditString(array $ov, array $nv, $max = 80) { return id(new PhutilEditDistanceMatrix()) ->setComputeString(true) ->setAlterCost(1 / ($max * 2)) ->setReplaceCost(2) ->setMaximumLength($max) ->setSequences($ov, $nv) ->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_INTERNAL) ->getEditString(); } private static function computeIntralineEdits($o, $n, $max_glyphs) { if (preg_match('/[\x80-\xFF]/', $o.$n)) { $ov = phutil_utf8v_combined($o); $nv = phutil_utf8v_combined($n); $multibyte = true; } else { $ov = str_split($o); $nv = str_split($n); $multibyte = false; } $result = self::generateEditString($ov, $nv, $max_glyphs); // Now we have a character-based description of the edit. We need to // convert into a byte-based description. Walk through the edit string and // adjust each operation to reflect the number of bytes in the underlying // character. $o_pos = 0; $n_pos = 0; $result_len = strlen($result); $o_run = array(); $n_run = array(); $old_char_len = 1; $new_char_len = 1; for ($ii = 0; $ii < $result_len; $ii++) { $c = $result[$ii]; if ($multibyte) { $old_char_len = strlen($ov[$o_pos]); $new_char_len = strlen($nv[$n_pos]); } switch ($c) { case 's': case 'x': $byte_o = $old_char_len; $byte_n = $new_char_len; $o_pos++; $n_pos++; break; case 'i': $byte_o = 0; $byte_n = $new_char_len; $n_pos++; break; case 'd': $byte_o = $old_char_len; $byte_n = 0; $o_pos++; break; } if ($byte_o) { if ($c == 's') { $o_run[] = array(0, $byte_o); } else { $o_run[] = array(1, $byte_o); } } if ($byte_n) { if ($c == 's') { $n_run[] = array(0, $byte_n); } else { $n_run[] = array(1, $byte_n); } } } $o_run = self::collapseIntralineRuns($o_run); $n_run = self::collapseIntralineRuns($n_run); return array($o_run, $n_run); } }