diff --git a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php --- a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php +++ b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php @@ -14,6 +14,10 @@ } $data = Filesystem::readFile($dir.$file); + // Strip trailing "~" characters from inputs so they may contain + // trailing whitespace. + $data = preg_replace('/~$/m', '', $data); + $opt_file = $dir.$file.'.options'; if (Filesystem::pathExists($opt_file)) { $options = Filesystem::readFile($opt_file); diff --git a/src/applications/differential/__tests__/data/generated.diff b/src/applications/differential/__tests__/data/generated.diff --- a/src/applications/differential/__tests__/data/generated.diff +++ b/src/applications/differential/__tests__/data/generated.diff @@ -4,7 +4,7 @@ +++ b/GENERATED @@ -1,4 +1,4 @@ @generated - + ~ -This is a generated file. +This is a generated file, full of generated code. diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -643,6 +643,9 @@ $hunk_parser = new DifferentialHunkParser(); $hunk_parser->parseHunksForLineData($changeset->getHunks()); + + $this->realignDiff($changeset, $hunk_parser); + $hunk_parser->reparseHunksForSpecialAttributes(); $unchanged = false; @@ -1366,4 +1369,51 @@ return $key; } + private function realignDiff( + DifferentialChangeset $changeset, + DifferentialHunkParser $hunk_parser) { + // Normalizing and realigning the diff depends on rediffing the files, and + // we currently need complete representations of both files to do anything + // reasonable. If we only have parts of the files, skip realignment. + + // We have more than one hunk, so we're definitely missing part of the file. + $hunks = $changeset->getHunks(); + if (count($hunks) !== 1) { + return null; + } + + // The first hunk doesn't start at the beginning of the file, so we're + // missing some context. + $first_hunk = head($hunks); + if ($first_hunk->getOldOffset() != 1 || $first_hunk->getNewOffset() != 1) { + return null; + } + + $old_file = $changeset->makeOldFile(); + $new_file = $changeset->makeNewFile(); + if ($old_file === $new_file) { + // If the old and new files are exactly identical, the synthetic + // diff below will give us nonsense and whitespace modes are + // irrelevant anyway. This occurs when you, e.g., copy a file onto + // itself in Subversion (see T271). + return null; + } + + + $engine = id(new PhabricatorDifferenceEngine()) + ->setNormalize(true); + + $normalized_changeset = $engine->generateChangesetFromFileContent( + $old_file, + $new_file); + + $type_parser = new DifferentialHunkParser(); + $type_parser->parseHunksForLineData($normalized_changeset->getHunks()); + + $hunk_parser->setNormalized(true); + $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap()); + $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap()); + } + + } diff --git a/src/applications/differential/parser/DifferentialHunkParser.php b/src/applications/differential/parser/DifferentialHunkParser.php --- a/src/applications/differential/parser/DifferentialHunkParser.php +++ b/src/applications/differential/parser/DifferentialHunkParser.php @@ -7,6 +7,7 @@ private $intraLineDiffs; private $depthOnlyLines; private $visibleLinesMask; + private $normalized; /** * Get a map of lines on which hunks start, other than line 1. This @@ -124,6 +125,15 @@ return $this->depthOnlyLines; } + public function setNormalized($normalized) { + $this->normalized = $normalized; + return $this; + } + + public function getNormalized() { + return $this->normalized; + } + public function getIsDeleted() { foreach ($this->getNewLines() as $line) { if ($line) { @@ -252,6 +262,8 @@ $this->setOldLines($rebuild_old); $this->setNewLines($rebuild_new); + $this->updateChangeTypesForNormalization(); + return $this; } @@ -753,4 +765,55 @@ return $character_depth; } + private function updateChangeTypesForNormalization() { + if (!$this->getNormalized()) { + return; + } + + // If we've parsed based on a normalized diff alignment, we may currently + // believe some lines are unchanged when they have actually changed. This + // happens when: + // + // - a line changes; + // - the change is a kind of change we normalize away when aligning the + // diff, like an indentation change; + // - we normalize the change away to align the diff; and so + // - the old and new copies of the line are now aligned in the new + // normalized diff. + // + // Then we end up with an alignment where the two lines that differ only + // in some some trivial way are aligned. This is great, and exactly what + // we're trying to accomplish by doing all this alignment stuff in the + // first place. + // + // However, in this case the correctly-aligned lines will be incorrectly + // marked as unchanged because the diff alorithm was fed normalized copies + // of the lines, and these copies truly weren't any different. + // + // When lines are aligned and marked identical, but they're not actually + // identcal, we now mark them as changed. The rest of the processing will + // figure out how to render them appropritely. + + $new = $this->getNewLines(); + $old = $this->getOldLines(); + foreach ($old as $key => $o) { + $n = $new[$key]; + + if (!$o || !$n) { + continue; + } + + if ($o['type'] === null && $n['type'] === null) { + if ($o['text'] !== $n['text']) { + $old[$key]['type'] = '-'; + $new[$key]['type'] = '+'; + } + } + } + + $this->setOldLines($old); + $this->setNewLines($new); + } + + } diff --git a/src/infrastructure/diff/PhabricatorDifferenceEngine.php b/src/infrastructure/diff/PhabricatorDifferenceEngine.php --- a/src/infrastructure/diff/PhabricatorDifferenceEngine.php +++ b/src/infrastructure/diff/PhabricatorDifferenceEngine.php @@ -12,6 +12,7 @@ private $oldName; private $newName; + private $normalize; /* -( Configuring the Engine )--------------------------------------------- */ @@ -43,6 +44,16 @@ } + public function setNormalize($normalize) { + $this->normalize = $normalize; + return $this; + } + + public function getNormalize() { + return $this->normalize; + } + + /* -( Generating Diffs )--------------------------------------------------- */ @@ -71,6 +82,12 @@ $options[] = '-L'; $options[] = $new_name; + $normalize = $this->getNormalize(); + if ($normalize) { + $old = $this->normalizeFile($old); + $new = $this->normalizeFile($new); + } + $old_tmp = new TempFile(); $new_tmp = new TempFile(); @@ -129,4 +146,27 @@ return head($diff->getChangesets()); } + private function normalizeFile($corpus) { + // We can freely apply any other transformations we want to here: we have + // no constraints on what we need to preserve. If we normalize every line + // to "cat", the diff will still work, the alignment of the "-" / "+" + // lines will just be very hard to read. + + // In general, we'll make the diff better if we normalize two lines that + // humans think are the same. + + // We'll make the diff worse if we normalize two lines that humans think + // are different. + + + // Strip all whitespace present anywhere in the diff, since humans never + // consider whitespace changes to alter the line into a "different line" + // even when they're semantic (e.g., in a string constant). This covers + // indentation changes, trailing whitepspace, and formatting changes + // like "+/-". + $corpus = preg_replace('/[ \t]/', '', $corpus); + + return $corpus; + } + }