Page MenuHomePhabricator

D16068.id38660.diff
No OneTemporary

D16068.id38660.diff

diff --git a/src/utils/PhutilEditDistanceMatrix.php b/src/utils/PhutilEditDistanceMatrix.php
--- a/src/utils/PhutilEditDistanceMatrix.php
+++ b/src/utils/PhutilEditDistanceMatrix.php
@@ -54,6 +54,7 @@
private $alterCost = 0;
private $maximumLength;
private $computeString;
+ private $applySmoothing;
private $x;
private $y;
@@ -126,6 +127,15 @@
return $this->alterCost;
}
+ public function setApplySmoothing($apply_smoothing) {
+ $this->applySmoothing = $apply_smoothing;
+ return $this;
+ }
+
+ public function getApplySmoothing() {
+ return $this->applySmoothing;
+ }
+
public function setSequences(array $x, array $y) {
// NOTE: We strip common prefixes and suffixes from the inputs because
@@ -276,7 +286,13 @@
}
}
- return $this->padEditString(strrev($str));
+ $str = strrev($str);
+
+ if ($this->getApplySmoothing()) {
+ $str = $this->applySmoothing($str);
+ }
+
+ return $this->padEditString($str);
}
private function padEditString($str) {
@@ -488,4 +504,20 @@
}
}
+ private function applySmoothing($str) {
+ $result = $str;
+
+ // Smooth the string out, by replacing short runs of similar characters
+ // with 'x' operations. This makes the result more readable to humans,
+ // since there are fewer choppy runs of short added and removed substrings.
+ do {
+ $original = $result;
+ $result = preg_replace('/([xdi])(s{3})([xdi])/', '$1xxx$3', $result);
+ $result = preg_replace('/([xdi])(s{2})([xdi])/', '$1xx$3', $result);
+ $result = preg_replace('/([xdi])(s{1})([xdi])/', '$1x$3', $result);
+ } while ($result != $original);
+
+ return $result;
+ }
+
}
diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php
--- a/src/utils/PhutilProseDifferenceEngine.php
+++ b/src/utils/PhutilProseDifferenceEngine.php
@@ -25,6 +25,12 @@
->setSequences($u_parts, $v_parts)
->setComputeString(true);
+ // For word-level and character-level changes, smooth the output string
+ // to reduce the choppiness of the diff.
+ if ($level > 1) {
+ $matrix->setApplySmoothing(true);
+ }
+
$u_pos = 0;
$v_pos = 0;
diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php
--- a/src/utils/__tests__/PhutilProseDiffTestCase.php
+++ b/src/utils/__tests__/PhutilProseDiffTestCase.php
@@ -28,6 +28,35 @@
'= yyy',
),
pht('Remove Paragraph'));
+
+
+ // Without smoothing, the alogorithm identifies that "shark" and "cat"
+ // both contain the letter "a" and try to express this as a very
+ // fine-grained edit which replaces "sh" with "c" and then "rk" with "t".
+ // This is technically correct, but it is much easier for human viewers to
+ // parse if we smooth this into a single removal and a single addition.
+
+ $this->assertProseParts(
+ 'They say the shark has nine lives.',
+ 'They say the cat has nine lives.',
+ array(
+ '= They say the ',
+ '- shark',
+ '+ cat',
+ '= has nine lives.',
+ ),
+ pht('"Shark/cat" word edit smoothenss.'));
+
+ $this->assertProseParts(
+ 'Rising quickly, she says',
+ 'Rising quickly, she remarks:',
+ array(
+ '= Rising quickly, she ',
+ '- says',
+ '+ remarks:',
+ ),
+ pht('"Says/remarks" word edit smoothenss.'));
+
}
private function assertProseParts($old, $new, array $expect_parts, $label) {

File Metadata

Mime Type
text/plain
Expires
Thu, May 16, 4:38 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6297310
Default Alt Text
D16068.id38660.diff (3 KB)

Event Timeline