Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15426729
D16068.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Referenced Files
None
Subscribers
None
D16068.diff
View Options
diff --git a/src/utils/PhutilEditDistanceMatrix.php b/src/utils/PhutilEditDistanceMatrix.php
--- a/src/utils/PhutilEditDistanceMatrix.php
+++ b/src/utils/PhutilEditDistanceMatrix.php
@@ -54,6 +54,7 @@
private $alterCost = 0;
private $maximumLength;
private $computeString;
+ private $applySmoothing;
private $x;
private $y;
@@ -126,6 +127,15 @@
return $this->alterCost;
}
+ public function setApplySmoothing($apply_smoothing) {
+ $this->applySmoothing = $apply_smoothing;
+ return $this;
+ }
+
+ public function getApplySmoothing() {
+ return $this->applySmoothing;
+ }
+
public function setSequences(array $x, array $y) {
// NOTE: We strip common prefixes and suffixes from the inputs because
@@ -276,7 +286,13 @@
}
}
- return $this->padEditString(strrev($str));
+ $str = strrev($str);
+
+ if ($this->getApplySmoothing()) {
+ $str = $this->applySmoothing($str);
+ }
+
+ return $this->padEditString($str);
}
private function padEditString($str) {
@@ -488,4 +504,20 @@
}
}
+ private function applySmoothing($str) {
+ $result = $str;
+
+ // Smooth the string out, by replacing short runs of similar characters
+ // with 'x' operations. This makes the result more readable to humans,
+ // since there are fewer choppy runs of short added and removed substrings.
+ do {
+ $original = $result;
+ $result = preg_replace('/([xdi])(s{3})([xdi])/', '$1xxx$3', $result);
+ $result = preg_replace('/([xdi])(s{2})([xdi])/', '$1xx$3', $result);
+ $result = preg_replace('/([xdi])(s{1})([xdi])/', '$1x$3', $result);
+ } while ($result != $original);
+
+ return $result;
+ }
+
}
diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php
--- a/src/utils/PhutilProseDifferenceEngine.php
+++ b/src/utils/PhutilProseDifferenceEngine.php
@@ -25,6 +25,12 @@
->setSequences($u_parts, $v_parts)
->setComputeString(true);
+ // For word-level and character-level changes, smooth the output string
+ // to reduce the choppiness of the diff.
+ if ($level > 1) {
+ $matrix->setApplySmoothing(true);
+ }
+
$u_pos = 0;
$v_pos = 0;
diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php
--- a/src/utils/__tests__/PhutilProseDiffTestCase.php
+++ b/src/utils/__tests__/PhutilProseDiffTestCase.php
@@ -28,6 +28,35 @@
'= yyy',
),
pht('Remove Paragraph'));
+
+
+ // Without smoothing, the alogorithm identifies that "shark" and "cat"
+ // both contain the letter "a" and tries to express this as a very
+ // fine-grained edit which replaces "sh" with "c" and then "rk" with "t".
+ // This is technically correct, but it is much easier for human viewers to
+ // parse if we smooth this into a single removal and a single addition.
+
+ $this->assertProseParts(
+ 'They say the shark has nine lives.',
+ 'They say the cat has nine lives.',
+ array(
+ '= They say the ',
+ '- shark',
+ '+ cat',
+ '= has nine lives.',
+ ),
+ pht('"Shark/cat" word edit smoothenss.'));
+
+ $this->assertProseParts(
+ 'Rising quickly, she says',
+ 'Rising quickly, she remarks:',
+ array(
+ '= Rising quickly, she ',
+ '- says',
+ '+ remarks:',
+ ),
+ pht('"Says/remarks" word edit smoothenss.'));
+
}
private function assertProseParts($old, $new, array $expect_parts, $label) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Mar 24, 10:37 AM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7220280
Default Alt Text
D16068.diff (3 KB)
Attached To
Mode
D16068: Apply edit smoothing to prose diffs
Attached
Detach File
Event Timeline
Log In to Comment