Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F14000563
D16881.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Referenced Files
None
Subscribers
None
D16881.diff
View Options
diff --git a/src/utils/PhutilEditDistanceMatrix.php b/src/utils/PhutilEditDistanceMatrix.php
--- a/src/utils/PhutilEditDistanceMatrix.php
+++ b/src/utils/PhutilEditDistanceMatrix.php
@@ -55,6 +55,7 @@
private $maximumLength;
private $computeString;
private $applySmoothing = self::SMOOTHING_NONE;
+ private $reachedMaximumLength;
private $x;
private $y;
@@ -77,6 +78,10 @@
return coalesce($this->maximumLength, $this->getInfinity());
}
+ public function didReachMaximumLength() {
+ return $this->reachedMaximumLength;
+ }
+
public function setComputeString($compute_string) {
$this->computeString = $compute_string;
return $this;
@@ -199,6 +204,7 @@
$max = $this->getMaximumLength();
if (count($x) > $max || count($y) > $max) {
+ $this->reachedMaximumLength = true;
return ($this->insertCost * count($y)) + ($this->deleteCost * count($x));
}
@@ -245,6 +251,7 @@
$max = $this->getMaximumLength();
if (count($x) > $max || count($y) > $max) {
+ $this->reachedMaximumLength = true;
return $this->padEditString(
str_repeat('d', count($x)).
str_repeat('i', count($y)));
diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php
--- a/src/utils/PhutilProseDifferenceEngine.php
+++ b/src/utils/PhutilProseDifferenceEngine.php
@@ -3,7 +3,7 @@
final class PhutilProseDifferenceEngine extends Phobject {
public function getDiff($u, $v) {
- return $this->buildDiff($u, $v, 1);
+ return $this->buildDiff($u, $v, 0);
}
private function buildDiff($u, $v, $level) {
@@ -119,13 +119,19 @@
} else if (!strlen($new)) {
$result->addPart('-', $old);
} else {
- $subdiff = $this->buildDiff(
- $old,
- $new,
- $level + 1);
-
- foreach ($subdiff->getParts() as $part) {
- $result->addPart($part['type'], $part['text']);
+ if ($matrix->didReachMaximumLength()) {
+ // If this text was too big to diff, don't try to subdivide it.
+ $result->addPart('-', $old);
+ $result->addPart('+', $new);
+ } else {
+ $subdiff = $this->buildDiff(
+ $old,
+ $new,
+ $level + 1);
+
+ foreach ($subdiff->getParts() as $part) {
+ $result->addPart($part['type'], $part['text']);
+ }
}
}
}
@@ -138,6 +144,10 @@
private function splitCorpus($corpus, $level) {
switch ($level) {
+ case 0:
+ // Level 0: Split into paragraphs.
+ $expr = '/([\n]+)/';
+ break;
case 1:
// Level 1: Split into sentences.
$expr = '/([\n,!;?\.]+)/';
@@ -164,7 +174,7 @@
$result .= $pieces[$ii + 1];
}
- if ($level == 1) {
+ if ($level < 2) {
// Split pieces into separate text and whitespace sections: make one
// piece out of all the whitespace at the beginning, one piece out of
// all the actual text in the middle, and one piece out of all the
diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php
--- a/src/utils/__tests__/PhutilProseDiffTestCase.php
+++ b/src/utils/__tests__/PhutilProseDiffTestCase.php
@@ -173,6 +173,26 @@
'= ]]',
),
pht('Diff changing a remarkup wiki link target.'));
+
+ // Create a large corpus with many sentences and paragraphs.
+ $large_paragraph = 'xyz. ';
+ $large_paragraph = str_repeat($large_paragraph, 50);
+ $large_paragraph = rtrim($large_paragraph);
+
+ $large_corpus = $large_paragraph."\n\n";
+ $large_corpus = str_repeat($large_corpus, 50);
+ $large_corpus = rtrim($large_corpus);
+
+ $this->assertProseParts(
+ $large_corpus,
+ "aaa\n\n".$large_corpus."\n\nzzz",
+ array(
+ "+ aaa\n\n",
+ '= '.$large_corpus,
+ "+ \n\nzzz",
+ ),
+ pht('Adding initial and final lines to a large corpus.'));
+
}
private function assertProseParts($old, $new, array $expect_parts, $label) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Oct 25, 10:54 PM (3 w, 13 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6749362
Default Alt Text
D16881.diff (4 KB)
Attached To
Mode
D16881: Improve prose diffs for changes spanning very large blocks of intermediate text
Attached
Detach File
Event Timeline
Log In to Comment