Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13970401
D16839.id40550.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Referenced Files
None
Subscribers
None
D16839.id40550.diff
View Options
diff --git a/src/utils/PhutilProseDiff.php b/src/utils/PhutilProseDiff.php
--- a/src/utils/PhutilProseDiff.php
+++ b/src/utils/PhutilProseDiff.php
@@ -16,6 +16,11 @@
return $this->parts;
}
+ public function clearParts() {
+ $this->parts = array();
+ return $this;
+ }
+
/**
* Get diff parts, but replace large blocks of unchanged text with "."
* parts representing missing context.
diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php
--- a/src/utils/PhutilProseDifferenceEngine.php
+++ b/src/utils/PhutilProseDifferenceEngine.php
@@ -3,21 +3,10 @@
final class PhutilProseDifferenceEngine extends Phobject {
public function getDiff($u, $v) {
- $diff = id(new PhutilProseDiff());
-
- $this->buildDiff($diff, $u, $v, 1);
- $diff->reorderParts();
-
- return $diff;
+ return $this->buildDiff($u, $v, 1);
}
- private function buildDiff(PhutilProseDiff $diff, $u, $v, $level) {
- if ($level == 4) {
- $diff->addPart('-', $u);
- $diff->addPart('+', $v);
- return;
- }
-
+ private function buildDiff($u, $v, $level) {
$u_parts = $this->splitCorpus($u, $level);
$v_parts = $this->splitCorpus($v, $level);
@@ -38,6 +27,7 @@
$edits = $matrix->getEditString();
$edits_length = strlen($edits);
+ $diff = new PhutilProseDiff();
for ($ii = 0; $ii < $edits_length; $ii++) {
$c = $edits[$ii];
if ($c == 's') {
@@ -51,7 +41,8 @@
$diff->addPart('+', $v_parts[$v_pos]);
$v_pos++;
} else if ($c == 'x') {
- $this->buildDiff($diff, $u_parts[$u_pos], $v_parts[$v_pos], $level + 1);
+ $diff->addPart('-', $u_parts[$u_pos]);
+ $diff->addPart('+', $v_parts[$v_pos]);
$u_pos++;
$v_pos++;
} else {
@@ -61,6 +52,88 @@
$c));
}
}
+
+ $diff->reorderParts();
+
+ // If we just built a character-level diff, we're all done and do not
+ // need to go any deeper.
+ if ($level == 3) {
+ return $diff;
+ }
+
+ $blocks = array();
+ $block = null;
+ foreach ($diff->getParts() as $part) {
+ $type = $part['type'];
+ $text = $part['text'];
+ switch ($type) {
+ case '=':
+ if ($block) {
+ $blocks[] = $block;
+ $block = null;
+ }
+ $blocks[] = array(
+ 'type' => $type,
+ 'text' => $text,
+ );
+ break;
+ case '-':
+ if (!$block) {
+ $block = array(
+ 'type' => '!',
+ 'old' => '',
+ 'new' => '',
+ );
+ }
+ $block['old'] .= $text;
+ break;
+ case '+':
+ if (!$block) {
+ $block = array(
+ 'type' => '!',
+ 'old' => '',
+ 'new' => '',
+ );
+ }
+ $block['new'] .= $text;
+ break;
+ }
+ }
+
+ if ($block) {
+ $blocks[] = $block;
+ }
+
+ $result = new PhutilProseDiff();
+ foreach ($blocks as $block) {
+ $type = $block['type'];
+ if ($type == '=') {
+ $result->addPart('=', $block['text']);
+ } else {
+ $old = $block['old'];
+ $new = $block['new'];
+ if (!strlen($old) && !strlen($new)) {
+ // Nothing to do.
+ } else if (!strlen($old)) {
+ $result->addPart('+', $new);
+ } else if (!strlen($new)) {
+ $result->addPart('-', $old);
+ } else {
+ $subdiff = $this->buildDiff(
+ $old,
+ $new,
+ $level + 1);
+
+ foreach ($subdiff->getParts() as $part) {
+ $result->addPart($part['type'], $part['text']);
+ }
+ }
+ }
+ }
+
+ $result->reorderParts();
+
+ return $result;
}
private function splitCorpus($corpus, $level) {
diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php
--- a/src/utils/__tests__/PhutilProseDiffTestCase.php
+++ b/src/utils/__tests__/PhutilProseDiffTestCase.php
@@ -130,6 +130,28 @@
"+ \n\n- ccc ccc ccc",
),
pht('Diff with new trailing content.'));
+
+ $this->assertProseParts(
+ 'aaa aaa aaa aaa, bbb bbb bbb bbb.',
+ 'aaa aaa aaa aaa bbb bbb bbb bbb.',
+ array(
+ '= aaa aaa aaa aaa',
+ '- ,',
+ '= bbb bbb bbb bbb.',
+ ),
+ pht('Diff with a removed comma.'));
+
+ $this->assertProseParts(
+ 'aaa aaa aaa aaa, bbb bbb bbb bbb.',
+ "aaa aaa aaa aaa bbb bbb bbb bbb.\n\n- ccc ccc ccc!",
+ array(
+ '= aaa aaa aaa aaa',
+ '- ,',
+ '= bbb bbb bbb bbb.',
+ "+ \n\n- ccc ccc ccc!",
+ ),
+ pht('Diff with a removed comma and new trailing content.'));
+
}
private function assertProseParts($old, $new, array $expect_parts, $label) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Oct 18, 7:29 AM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6716470
Default Alt Text
D16839.id40550.diff (4 KB)
Attached To
Mode
D16839: Make prose diff algorithm more iterative, to improve prose diffs for (among other things) removed commas
Attached
Detach File
Event Timeline
Log In to Comment