diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php
index 0f798c9e..36b939d0 100644
--- a/src/difference/ArcanistDiffUtils.php
+++ b/src/difference/ArcanistDiffUtils.php
@@ -1,274 +1,185 @@
$too_many_bytes) {
return array(
array(array(1, $ol)),
array(array(1, $nl)),
);
}
return self::computeIntralineEdits($o, $n, $max_glyphs);
}
- public static function applyIntralineDiff($str, $intra_stack) {
- $buf = '';
- $p = $s = $e = 0; // position, start, end
- $highlight = $tag = $ent = false;
- $highlight_o = '';
- $highlight_c = '';
-
- $depth_in = '';
- $depth_out = '';
-
- $is_html = false;
- if ($str instanceof PhutilSafeHTML) {
- $is_html = true;
- $str = $str->getHTMLContent();
- }
-
- $n = strlen($str);
- for ($i = 0; $i < $n; $i++) {
-
- if ($p == $e) {
- do {
- if (empty($intra_stack)) {
- $buf .= substr($str, $i);
- break 2;
- }
- $stack = array_shift($intra_stack);
- $s = $e;
- $e += $stack[1];
- } while ($stack[0] === 0);
-
- switch ($stack[0]) {
- case '>':
- $open_tag = $depth_in;
- break;
- case '<':
- $open_tag = $depth_out;
- break;
- default:
- $open_tag = $highlight_o;
- break;
- }
- }
-
- if (!$highlight && !$tag && !$ent && $p == $s) {
- $buf .= $open_tag;
- $highlight = true;
- }
-
- if ($str[$i] == '<') {
- $tag = true;
- if ($highlight) {
- $buf .= $highlight_c;
- }
- }
-
- if (!$tag) {
- if ($str[$i] == '&') {
- $ent = true;
- }
- if ($ent && $str[$i] == ';') {
- $ent = false;
- }
- if (!$ent) {
- $p++;
- }
- }
-
- $buf .= $str[$i];
-
- if ($tag && $str[$i] == '>') {
- $tag = false;
- if ($highlight) {
- $buf .= $open_tag;
- }
- }
-
- if ($highlight && ($p == $e || $i == $n - 1)) {
- $buf .= $highlight_c;
- $highlight = false;
- }
- }
-
- if ($is_html) {
- return phutil_safe_html($buf);
- }
-
- return $buf;
- }
-
private static function collapseIntralineRuns($runs) {
$count = count($runs);
for ($ii = 0; $ii < $count - 1; $ii++) {
if ($runs[$ii][0] == $runs[$ii + 1][0]) {
$runs[$ii + 1][1] += $runs[$ii][1];
unset($runs[$ii]);
}
}
return array_values($runs);
}
public static function generateEditString(array $ov, array $nv, $max = 80) {
return id(new PhutilEditDistanceMatrix())
->setComputeString(true)
->setAlterCost(1 / ($max * 2))
->setReplaceCost(2)
->setMaximumLength($max)
->setSequences($ov, $nv)
->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_INTERNAL)
->getEditString();
}
private static function computeIntralineEdits($o, $n, $max_glyphs) {
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
$ov = phutil_utf8v_combined($o);
$nv = phutil_utf8v_combined($n);
$multibyte = true;
} else {
$ov = str_split($o);
$nv = str_split($n);
$multibyte = false;
}
$result = self::generateEditString($ov, $nv, $max_glyphs);
// Now we have a character-based description of the edit. We need to
// convert into a byte-based description. Walk through the edit string and
// adjust each operation to reflect the number of bytes in the underlying
// character.
$o_pos = 0;
$n_pos = 0;
$result_len = strlen($result);
$o_run = array();
$n_run = array();
$old_char_len = 1;
$new_char_len = 1;
for ($ii = 0; $ii < $result_len; $ii++) {
$c = $result[$ii];
if ($multibyte) {
$old_char_len = strlen($ov[$o_pos]);
$new_char_len = strlen($nv[$n_pos]);
}
switch ($c) {
case 's':
case 'x':
$byte_o = $old_char_len;
$byte_n = $new_char_len;
$o_pos++;
$n_pos++;
break;
case 'i':
$byte_o = 0;
$byte_n = $new_char_len;
$n_pos++;
break;
case 'd':
$byte_o = $old_char_len;
$byte_n = 0;
$o_pos++;
break;
}
if ($byte_o) {
if ($c == 's') {
$o_run[] = array(0, $byte_o);
} else {
$o_run[] = array(1, $byte_o);
}
}
if ($byte_n) {
if ($c == 's') {
$n_run[] = array(0, $byte_n);
} else {
$n_run[] = array(1, $byte_n);
}
}
}
$o_run = self::collapseIntralineRuns($o_run);
$n_run = self::collapseIntralineRuns($n_run);
return array($o_run, $n_run);
}
}