Page MenuHomePhabricator

D16071.id38671.diff
No OneTemporary

D16071.id38671.diff

diff --git a/src/utils/PhutilEditDistanceMatrix.php b/src/utils/PhutilEditDistanceMatrix.php
--- a/src/utils/PhutilEditDistanceMatrix.php
+++ b/src/utils/PhutilEditDistanceMatrix.php
@@ -288,11 +288,15 @@
$str = strrev($str);
+ // We pad the edit string before smoothing it, so ranges of similar
+ // characters at the beginning or end of the string can also be smoothed.
+ $str = $this->padEditString($str);
+
if ($this->getApplySmoothing()) {
$str = $this->applySmoothing($str);
}
- return $this->padEditString($str);
+ return $str;
}
private function padEditString($str) {
@@ -512,9 +516,9 @@
// since there are fewer choppy runs of short added and removed substrings.
do {
$original = $result;
- $result = preg_replace('/([xdi])(s{3})([xdi])/', '$1xxx$3', $result);
- $result = preg_replace('/([xdi])(s{2})([xdi])/', '$1xx$3', $result);
- $result = preg_replace('/([xdi])(s{1})([xdi])/', '$1x$3', $result);
+ $result = preg_replace('/(^|[xdi])(s{3})([xdi]|\z)/', '$1xxx$3', $result);
+ $result = preg_replace('/(^|[xdi])(s{2})([xdi]|\z)/', '$1xx$3', $result);
+ $result = preg_replace('/(^|[xdi])(s{1})([xdi]|\z)/', '$1x$3', $result);
} while ($result != $original);
return $result;
diff --git a/src/utils/PhutilProseDiff.php b/src/utils/PhutilProseDiff.php
--- a/src/utils/PhutilProseDiff.php
+++ b/src/utils/PhutilProseDiff.php
@@ -20,6 +20,7 @@
// Reorder sequences of removed and added sections to put all the "-"
// parts together first, then all the "+" parts together. This produces
// a more human-readable result than intermingling them.
+
$o_run = array();
$n_run = array();
$result = array();
@@ -33,24 +34,22 @@
$n_run[] = $part;
break;
default:
- foreach ($o_run as $o) {
- $result[] = $o;
- }
- foreach ($n_run as $n) {
- $result[] = $n;
+ if ($o_run || $n_run) {
+ foreach ($this->combineRuns($o_run, $n_run) as $merged_part) {
+ $result[] = $merged_part;
+ }
+ $o_run = array();
+ $n_run = array();
}
$result[] = $part;
- $o_run = array();
- $n_run = array();
break;
}
}
- foreach ($o_run as $o) {
- $result[] = $o;
- }
- foreach ($n_run as $n) {
- $result[] = $n;
+ if ($o_run || $n_run) {
+ foreach ($this->combineRuns($o_run, $n_run) as $part) {
+ $result[] = $part;
+ }
}
// Now, combine consecuitive runs of the same type of change (like a
@@ -88,4 +87,109 @@
return $this;
}
+ private function combineRuns($o_run, $n_run) {
+ $o_merge = $this->mergeParts($o_run);
+ $n_merge = $this->mergeParts($n_run);
+
+ // When removed and added blocks share a prefix or suffix, we sometimes
+ // want to count it as unchanged (for example, if it is whitespace) but
+ // sometimes want to count it as changed (for example, if it is a word
+ // suffix like "ing"). Find common prefixes and suffixes of these layout
+ // characters and emit them as "=" (unchanged) blocks.
+
+ $layout_characters = array(
+ ' ' => true,
+ "\n" => true,
+ '.' => true,
+ '!' => true,
+ ',' => true,
+ '?' => true,
+ );
+
+ $o_text = $o_merge['text'];
+ $n_text = $n_merge['text'];
+ $o_len = strlen($o_text);
+ $n_len = strlen($n_text);
+ $min_len = min($o_len, $n_len);
+
+ $prefix_len = 0;
+ for ($pos = 0; $pos < $min_len; $pos++) {
+ $o = $o_text[$pos];
+ $n = $n_text[$pos];
+ if ($o !== $n) {
+ break;
+ }
+ if (empty($layout_characters[$o])) {
+ break;
+ }
+ $prefix_len++;
+ }
+
+ $suffix_len = 0;
+ for ($pos = 1; $pos <= $min_len; $pos++) {
+ $o = $o_text[$o_len - $pos];
+ $n = $n_text[$n_len - $pos];
+ if ($o !== $n) {
+ break;
+ }
+ if (empty($layout_characters[$o])) {
+ break;
+ }
+ $suffix_len++;
+ }
+
+ $results = array();
+
+ if ($prefix_len) {
+ $results[] = array(
+ 'type' => '=',
+ 'text' => substr($o_text, 0, $prefix_len),
+ );
+ }
+
+ if ($prefix_len < $o_len) {
+ $results[] = array(
+ 'type' => '-',
+ 'text' => substr($o_text, $prefix_len, $o_len - $suffix_len),
+ );
+ }
+
+ if ($prefix_len < $n_len) {
+ $results[] = array(
+ 'type' => '+',
+ 'text' => substr($n_text, $prefix_len, $n_len - $suffix_len),
+ );
+ }
+
+ if ($suffix_len) {
+ $results[] = array(
+ 'type' => '=',
+ 'text' => substr($o_text, -$suffix_len),
+ );
+ }
+
+ return $results;
+ }
+
+ private function mergeParts(array $parts) {
+ $text = '';
+ $type = null;
+ foreach ($parts as $part) {
+ $part_type = $part['type'];
+ if ($type === null) {
+ $type = $part_type;
+ }
+ if ($type !== $part_type) {
+ throw new Exception(pht('Can not merge parts of dissimilar types!'));
+ }
+ $text .= $part['text'];
+ }
+
+ return array(
+ 'type' => $type,
+ 'text' => $text,
+ );
+ }
+
+
}
diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php
--- a/src/utils/__tests__/PhutilProseDiffTestCase.php
+++ b/src/utils/__tests__/PhutilProseDiffTestCase.php
@@ -57,6 +57,35 @@
),
pht('"Says/remarks" word edit smoothenss.'));
+ $this->assertProseParts(
+ 'See screenshots',
+ 'Viewed video files',
+ array(
+ '- See screenshots',
+ '+ Viewed video files',
+ ),
+ pht('Complete paragraph rewrite.'));
+
+ $this->assertProseParts(
+ 'xaaax',
+ 'xbbbx',
+ array(
+ '- xaaax',
+ '+ xbbbx',
+ ),
+ pht('Whole word rewrite with common prefix and suffix.'));
+
+ $this->assertProseParts(
+ ' aaa ',
+ ' bbb ',
+ array(
+ '= ',
+ '- aaa',
+ '+ bbb',
+ '= ',
+ ),
+ pht('Whole word rewrite with whitespace prefix and suffix.'));
+
}
private function assertProseParts($old, $new, array $expect_parts, $label) {

File Metadata

Mime Type
text/plain
Expires
Mon, Apr 7, 12:44 PM (1 w, 11 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7223283
Default Alt Text
D16071.id38671.diff (6 KB)

Event Timeline