Page MenuHomePhabricator

D20187.diff
No OneTemporary

D20187.diff

diff --git a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
--- a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
+++ b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php
@@ -14,6 +14,10 @@
}
$data = Filesystem::readFile($dir.$file);
+ // Strip trailing "~" characters from inputs so they may contain
+ // trailing whitespace.
+ $data = preg_replace('/~$/m', '', $data);
+
$opt_file = $dir.$file.'.options';
if (Filesystem::pathExists($opt_file)) {
$options = Filesystem::readFile($opt_file);
diff --git a/src/applications/differential/__tests__/data/generated.diff b/src/applications/differential/__tests__/data/generated.diff
--- a/src/applications/differential/__tests__/data/generated.diff
+++ b/src/applications/differential/__tests__/data/generated.diff
@@ -4,7 +4,7 @@
+++ b/GENERATED
@@ -1,4 +1,4 @@
@generated
-
+ ~
-This is a generated file.
+This is a generated file, full of generated code.
diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -643,6 +643,9 @@
$hunk_parser = new DifferentialHunkParser();
$hunk_parser->parseHunksForLineData($changeset->getHunks());
+
+ $this->realignDiff($changeset, $hunk_parser);
+
$hunk_parser->reparseHunksForSpecialAttributes();
$unchanged = false;
@@ -1366,4 +1369,51 @@
return $key;
}
+ private function realignDiff(
+ DifferentialChangeset $changeset,
+ DifferentialHunkParser $hunk_parser) {
+ // Normalizing and realigning the diff depends on rediffing the files, and
+ // we currently need complete representations of both files to do anything
+ // reasonable. If we only have parts of the files, skip realignment.
+
+ // We have more than one hunk, so we're definitely missing part of the file.
+ $hunks = $changeset->getHunks();
+ if (count($hunks) !== 1) {
+ return null;
+ }
+
+ // The first hunk doesn't start at the beginning of the file, so we're
+ // missing some context.
+ $first_hunk = head($hunks);
+ if ($first_hunk->getOldOffset() != 1 || $first_hunk->getNewOffset() != 1) {
+ return null;
+ }
+
+ $old_file = $changeset->makeOldFile();
+ $new_file = $changeset->makeNewFile();
+ if ($old_file === $new_file) {
+ // If the old and new files are exactly identical, the synthetic
+ // diff below will give us nonsense and whitespace modes are
+ // irrelevant anyway. This occurs when you, e.g., copy a file onto
+ // itself in Subversion (see T271).
+ return null;
+ }
+
+
+ $engine = id(new PhabricatorDifferenceEngine())
+ ->setNormalize(true);
+
+ $normalized_changeset = $engine->generateChangesetFromFileContent(
+ $old_file,
+ $new_file);
+
+ $type_parser = new DifferentialHunkParser();
+ $type_parser->parseHunksForLineData($normalized_changeset->getHunks());
+
+ $hunk_parser->setNormalized(true);
+ $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap());
+ $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap());
+ }
+
+
}
diff --git a/src/applications/differential/parser/DifferentialHunkParser.php b/src/applications/differential/parser/DifferentialHunkParser.php
--- a/src/applications/differential/parser/DifferentialHunkParser.php
+++ b/src/applications/differential/parser/DifferentialHunkParser.php
@@ -7,6 +7,7 @@
private $intraLineDiffs;
private $depthOnlyLines;
private $visibleLinesMask;
+ private $normalized;
/**
* Get a map of lines on which hunks start, other than line 1. This
@@ -124,6 +125,15 @@
return $this->depthOnlyLines;
}
+ public function setNormalized($normalized) {
+ $this->normalized = $normalized;
+ return $this;
+ }
+
+ public function getNormalized() {
+ return $this->normalized;
+ }
+
public function getIsDeleted() {
foreach ($this->getNewLines() as $line) {
if ($line) {
@@ -252,6 +262,8 @@
$this->setOldLines($rebuild_old);
$this->setNewLines($rebuild_new);
+ $this->updateChangeTypesForNormalization();
+
return $this;
}
@@ -753,4 +765,55 @@
return $character_depth;
}
+ private function updateChangeTypesForNormalization() {
+ if (!$this->getNormalized()) {
+ return;
+ }
+
+ // If we've parsed based on a normalized diff alignment, we may currently
+ // believe some lines are unchanged when they have actually changed. This
+ // happens when:
+ //
+ // - a line changes;
+ // - the change is a kind of change we normalize away when aligning the
+ // diff, like an indentation change;
+ // - we normalize the change away to align the diff; and so
+ // - the old and new copies of the line are now aligned in the new
+ // normalized diff.
+ //
+ // Then we end up with an alignment where the two lines that differ only
+ // in some some trivial way are aligned. This is great, and exactly what
+ // we're trying to accomplish by doing all this alignment stuff in the
+ // first place.
+ //
+ // However, in this case the correctly-aligned lines will be incorrectly
+ // marked as unchanged because the diff alorithm was fed normalized copies
+ // of the lines, and these copies truly weren't any different.
+ //
+ // When lines are aligned and marked identical, but they're not actually
+ // identcal, we now mark them as changed. The rest of the processing will
+ // figure out how to render them appropritely.
+
+ $new = $this->getNewLines();
+ $old = $this->getOldLines();
+ foreach ($old as $key => $o) {
+ $n = $new[$key];
+
+ if (!$o || !$n) {
+ continue;
+ }
+
+ if ($o['type'] === null && $n['type'] === null) {
+ if ($o['text'] !== $n['text']) {
+ $old[$key]['type'] = '-';
+ $new[$key]['type'] = '+';
+ }
+ }
+ }
+
+ $this->setOldLines($old);
+ $this->setNewLines($new);
+ }
+
+
}
diff --git a/src/infrastructure/diff/PhabricatorDifferenceEngine.php b/src/infrastructure/diff/PhabricatorDifferenceEngine.php
--- a/src/infrastructure/diff/PhabricatorDifferenceEngine.php
+++ b/src/infrastructure/diff/PhabricatorDifferenceEngine.php
@@ -12,6 +12,7 @@
private $oldName;
private $newName;
+ private $normalize;
/* -( Configuring the Engine )--------------------------------------------- */
@@ -43,6 +44,16 @@
}
+ public function setNormalize($normalize) {
+ $this->normalize = $normalize;
+ return $this;
+ }
+
+ public function getNormalize() {
+ return $this->normalize;
+ }
+
+
/* -( Generating Diffs )--------------------------------------------------- */
@@ -71,6 +82,12 @@
$options[] = '-L';
$options[] = $new_name;
+ $normalize = $this->getNormalize();
+ if ($normalize) {
+ $old = $this->normalizeFile($old);
+ $new = $this->normalizeFile($new);
+ }
+
$old_tmp = new TempFile();
$new_tmp = new TempFile();
@@ -129,4 +146,27 @@
return head($diff->getChangesets());
}
+ private function normalizeFile($corpus) {
+ // We can freely apply any other transformations we want to here: we have
+ // no constraints on what we need to preserve. If we normalize every line
+ // to "cat", the diff will still work, the alignment of the "-" / "+"
+ // lines will just be very hard to read.
+
+ // In general, we'll make the diff better if we normalize two lines that
+ // humans think are the same.
+
+ // We'll make the diff worse if we normalize two lines that humans think
+ // are different.
+
+
+ // Strip all whitespace present anywhere in the diff, since humans never
+ // consider whitespace changes to alter the line into a "different line"
+ // even when they're semantic (e.g., in a string constant). This covers
+ // indentation changes, trailing whitepspace, and formatting changes
+ // like "+/-".
+ $corpus = preg_replace('/[ \t]/', '', $corpus);
+
+ return $corpus;
+ }
+
}

File Metadata

Mime Type
text/plain
Expires
Wed, Mar 26, 5:04 AM (1 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7401749
Default Alt Text
D20187.diff (8 KB)

Event Timeline