Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15434734
D20187.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Referenced Files
None
Subscribers
None
D20187.diff
View Options
diff --git a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
--- a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
+++ b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
@@ -14,6 +14,10 @@
}
$data = Filesystem::readFile($dir.$file);
+ // Strip trailing "~" characters from inputs so they may contain
+ // trailing whitespace.
+ $data = preg_replace('/~$/m', '', $data);
+
$opt_file = $dir.$file.'.options';
if (Filesystem::pathExists($opt_file)) {
$options = Filesystem::readFile($opt_file);
diff --git a/src/applications/differential/__tests__/data/generated.diff b/src/applications/differential/__tests__/data/generated.diff
--- a/src/applications/differential/__tests__/data/generated.diff
+++ b/src/applications/differential/__tests__/data/generated.diff
@@ -4,7 +4,7 @@
+++ b/GENERATED
@@ -1,4 +1,4 @@
@generated
-
+ ~
-This is a generated file.
+This is a generated file, full of generated code.
diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -643,6 +643,9 @@
$hunk_parser = new DifferentialHunkParser();
$hunk_parser->parseHunksForLineData($changeset->getHunks());
+
+ $this->realignDiff($changeset, $hunk_parser);
+
$hunk_parser->reparseHunksForSpecialAttributes();
$unchanged = false;
@@ -1366,4 +1369,51 @@
return $key;
}
+ private function realignDiff(
+ DifferentialChangeset $changeset,
+ DifferentialHunkParser $hunk_parser) {
+ // Normalizing and realigning the diff depends on rediffing the files, and
+ // we currently need complete representations of both files to do anything
+ // reasonable. If we only have parts of the files, skip realignment.
+
+ // We have more than one hunk, so we're definitely missing part of the file.
+ $hunks = $changeset->getHunks();
+ if (count($hunks) !== 1) {
+ return null;
+ }
+
+ // The first hunk doesn't start at the beginning of the file, so we're
+ // missing some context.
+ $first_hunk = head($hunks);
+ if ($first_hunk->getOldOffset() != 1 || $first_hunk->getNewOffset() != 1) {
+ return null;
+ }
+
+ $old_file = $changeset->makeOldFile();
+ $new_file = $changeset->makeNewFile();
+ if ($old_file === $new_file) {
+ // If the old and new files are exactly identical, the synthetic
+ // diff below will give us nonsense and whitespace modes are
+ // irrelevant anyway. This occurs when you, e.g., copy a file onto
+ // itself in Subversion (see T271).
+ return null;
+ }
+
+
+ $engine = id(new PhabricatorDifferenceEngine())
+ ->setNormalize(true);
+
+ $normalized_changeset = $engine->generateChangesetFromFileContent(
+ $old_file,
+ $new_file);
+
+ $type_parser = new DifferentialHunkParser();
+ $type_parser->parseHunksForLineData($normalized_changeset->getHunks());
+
+ $hunk_parser->setNormalized(true);
+ $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap());
+ $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap());
+ }
+
+
}
diff --git a/src/applications/differential/parser/DifferentialHunkParser.php b/src/applications/differential/parser/DifferentialHunkParser.php
--- a/src/applications/differential/parser/DifferentialHunkParser.php
+++ b/src/applications/differential/parser/DifferentialHunkParser.php
@@ -7,6 +7,7 @@
private $intraLineDiffs;
private $depthOnlyLines;
private $visibleLinesMask;
+ private $normalized;
/**
* Get a map of lines on which hunks start, other than line 1. This
@@ -124,6 +125,15 @@
return $this->depthOnlyLines;
}
+ public function setNormalized($normalized) {
+ $this->normalized = $normalized;
+ return $this;
+ }
+
+ public function getNormalized() {
+ return $this->normalized;
+ }
+
public function getIsDeleted() {
foreach ($this->getNewLines() as $line) {
if ($line) {
@@ -252,6 +262,8 @@
$this->setOldLines($rebuild_old);
$this->setNewLines($rebuild_new);
+ $this->updateChangeTypesForNormalization();
+
return $this;
}
@@ -753,4 +765,55 @@
return $character_depth;
}
+ private function updateChangeTypesForNormalization() {
+ if (!$this->getNormalized()) {
+ return;
+ }
+
+ // If we've parsed based on a normalized diff alignment, we may currently
+ // believe some lines are unchanged when they have actually changed. This
+ // happens when:
+ //
+ // - a line changes;
+ // - the change is a kind of change we normalize away when aligning the
+ // diff, like an indentation change;
+ // - we normalize the change away to align the diff; and so
+ // - the old and new copies of the line are now aligned in the new
+ // normalized diff.
+ //
+ // Then we end up with an alignment where the two lines that differ only
+ // in some some trivial way are aligned. This is great, and exactly what
+ // we're trying to accomplish by doing all this alignment stuff in the
+ // first place.
+ //
+ // However, in this case the correctly-aligned lines will be incorrectly
+ // marked as unchanged because the diff alorithm was fed normalized copies
+ // of the lines, and these copies truly weren't any different.
+ //
+ // When lines are aligned and marked identical, but they're not actually
+ // identcal, we now mark them as changed. The rest of the processing will
+ // figure out how to render them appropritely.
+
+ $new = $this->getNewLines();
+ $old = $this->getOldLines();
+ foreach ($old as $key => $o) {
+ $n = $new[$key];
+
+ if (!$o || !$n) {
+ continue;
+ }
+
+ if ($o['type'] === null && $n['type'] === null) {
+ if ($o['text'] !== $n['text']) {
+ $old[$key]['type'] = '-';
+ $new[$key]['type'] = '+';
+ }
+ }
+ }
+
+ $this->setOldLines($old);
+ $this->setNewLines($new);
+ }
+
+
}
diff --git a/src/infrastructure/diff/PhabricatorDifferenceEngine.php b/src/infrastructure/diff/PhabricatorDifferenceEngine.php
--- a/src/infrastructure/diff/PhabricatorDifferenceEngine.php
+++ b/src/infrastructure/diff/PhabricatorDifferenceEngine.php
@@ -12,6 +12,7 @@
private $oldName;
private $newName;
+ private $normalize;
/* -( Configuring the Engine )--------------------------------------------- */
@@ -43,6 +44,16 @@
}
+ public function setNormalize($normalize) {
+ $this->normalize = $normalize;
+ return $this;
+ }
+
+ public function getNormalize() {
+ return $this->normalize;
+ }
+
+
/* -( Generating Diffs )--------------------------------------------------- */
@@ -71,6 +82,12 @@
$options[] = '-L';
$options[] = $new_name;
+ $normalize = $this->getNormalize();
+ if ($normalize) {
+ $old = $this->normalizeFile($old);
+ $new = $this->normalizeFile($new);
+ }
+
$old_tmp = new TempFile();
$new_tmp = new TempFile();
@@ -129,4 +146,27 @@
return head($diff->getChangesets());
}
+ private function normalizeFile($corpus) {
+ // We can freely apply any other transformations we want to here: we have
+ // no constraints on what we need to preserve. If we normalize every line
+ // to "cat", the diff will still work, the alignment of the "-" / "+"
+ // lines will just be very hard to read.
+
+ // In general, we'll make the diff better if we normalize two lines that
+ // humans think are the same.
+
+ // We'll make the diff worse if we normalize two lines that humans think
+ // are different.
+
+
+ // Strip all whitespace present anywhere in the diff, since humans never
+ // consider whitespace changes to alter the line into a "different line"
+ // even when they're semantic (e.g., in a string constant). This covers
+ // indentation changes, trailing whitepspace, and formatting changes
+ // like "+/-".
+ $corpus = preg_replace('/[ \t]/', '', $corpus);
+
+ return $corpus;
+ }
+
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Mar 26, 5:04 AM (1 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7401749
Default Alt Text
D20187.diff (8 KB)
Attached To
Mode
D20187: Put some whitespace behaviors back, but only for "diff alignment", not display
Attached
Detach File
Event Timeline
Log In to Comment