Page MenuHomePhabricator

D12145.id29194.diff
No OneTemporary

D12145.id29194.diff

diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -1324,17 +1324,20 @@
foreach ($changesets as $changeset) {
$file = $changeset->getFilename();
foreach ($changeset->getHunks() as $hunk) {
- $line = $hunk->getOldOffset();
- foreach (explode("\n", $hunk->getChanges()) as $code) {
- $type = (isset($code[0]) ? $code[0] : '');
- if ($type == '-' || $type == ' ') {
- $code = trim(substr($code, 1));
- $files[$file][$line] = $code;
- $types[$file][$line] = $type;
- if (strlen($code) >= $min_width) {
- $map[$code][] = array($file, $line);
- }
- $line++;
+ $lines = $hunk->getStructuredOldFile();
+ foreach ($lines as $line => $info) {
+ $type = $info['type'];
+ if ($type == '\\') {
+ continue;
+ }
+ $types[$file][$line] = $type;
+
+ $text = $info['text'];
+ $text = trim($text);
+ $files[$file][$line] = $text;
+
+ if (strlen($text) >= $min_width) {
+ $map[$text][] = array($file, $line);
}
}
}
@@ -1343,57 +1346,118 @@
foreach ($changesets as $changeset) {
$copies = array();
foreach ($changeset->getHunks() as $hunk) {
- $added = array_map('trim', $hunk->getAddedLines());
- for (reset($added); list($line, $code) = each($added); ) {
- if (isset($map[$code])) { // We found a long matching line.
-
- if (count($map[$code]) > 16) {
- // If there are a large number of identical lines in this diff,
- // don't try to figure out where this block came from: the
- // analysis is O(N^2), since we need to compare every line
- // against every other line. Even if we arrive at a result, it
- // is unlikely to be meaningful. See T5041.
- continue 2;
- }
+ $added = $hunk->getStructuredNewFile();
+
+ foreach ($added as $line => $info) {
+ if ($info['type'] != '+') {
+ unset($added[$line]);
+ continue;
+ }
+ $added[$line] = trim($info['text']);
+ }
- $best_length = 0;
- foreach ($map[$code] as $val) { // Explore all candidates.
- list($file, $orig_line) = $val;
- $length = 1;
- // Search also backwards for short lines.
- foreach (array(-1, 1) as $direction) {
- $offset = $direction;
- while (!isset($copies[$line + $offset]) &&
- isset($added[$line + $offset]) &&
- idx($files[$file], $orig_line + $offset) ===
- $added[$line + $offset]) {
- $length++;
- $offset += $direction;
+ $skip_lines = 0;
+ foreach ($added as $line => $code) {
+ if ($skip_lines) {
+ // We're skipping lines that we already processed because we
+ // extended a block above them downward to include them.
+ $skip_lines--;
+ continue;
+ }
+
+ if (empty($map[$code])) {
+ // This line was too short to trigger copy/move detection.
+ continue;
+ }
+
+ if (count($map[$code]) > 16) {
+ // If there are a large number of identical lines in this diff,
+ // don't try to figure out where this block came from: the analysis
+ // is O(N^2), since we need to compare every line against every
+ // other line. Even if we arrive at a result, it is unlikely to be
+ // meaningful. See T5041.
+ continue;
+ }
+
+ $best_length = 0;
+
+ // Explore all candidates.
+ foreach ($map[$code] as $val) {
+ list($file, $orig_line) = $val;
+ $length = 1;
+
+ // Search backward and forward to find all of the adjacent lines
+ // which match.
+ foreach (array(-1, 1) as $direction) {
+ $offset = $direction;
+ while (true) {
+ if (isset($copies[$line + $offset])) {
+ // If we run into a block above us which we've already
+ // attributed to a move or copy from elsewhere, stop
+ // looking.
+ break;
}
- }
- if ($length > $best_length ||
- ($length == $best_length && // Prefer moves.
- idx($types[$file], $orig_line) == '-')) {
- $best_length = $length;
- // ($offset - 1) contains number of forward matching lines.
- $best_offset = $offset - 1;
- $best_file = $file;
- $best_line = $orig_line;
+
+ if (!isset($added[$line + $offset])) {
+ // If we've run off the beginning or end of the new file,
+ // stop looking.
+ break;
+ }
+
+ if (!isset($files[$file][$orig_line + $offset])) {
+ // If we've run off the beginning or end of the original
+ // file, we also stop looking.
+ break;
+ }
+
+ $old = $files[$file][$orig_line + $offset];
+ $new = $added[$line + $offset];
+ if ($old !== $new) {
+ // If the old line doesn't match the new line, stop
+ // looking.
+ break;
+ }
+
+ $length++;
+ $offset += $direction;
}
}
- $file = ($best_file == $changeset->getFilename() ? '' : $best_file);
- for ($i = $best_length; $i--; ) {
- $type = idx($types[$best_file], $best_line + $best_offset - $i);
- $copies[$line + $best_offset - $i] = ($best_length < $min_lines
- ? array() // Ignore short blocks.
- : array($file, $best_line + $best_offset - $i, $type));
+
+ if ($length < $best_length) {
+ // If we already know of a better source (more matching lines)
+ // for this move/copy, stick with that one. We prefer long
+ // copies/moves which match a lot of context over short ones.
+ continue;
}
- for ($i = 0; $i < $best_offset; $i++) {
- next($added);
+
+ if ($length == $best_length) {
+ if (idx($types[$file], $orig_line) != '-') {
+ // If we already know of an equally good source (same number
+ // of matching lines) and this isn't a move, stick with the
+ // other one. We prefer moves over copies.
+ continue;
+ }
}
+
+ $best_length = $length;
+ // ($offset - 1) contains number of forward matching lines.
+ $best_offset = $offset - 1;
+ $best_file = $file;
+ $best_line = $orig_line;
}
+
+ $file = ($best_file == $changeset->getFilename() ? '' : $best_file);
+ for ($i = $best_length; $i--; ) {
+ $type = idx($types[$best_file], $best_line + $best_offset - $i);
+ $copies[$line + $best_offset - $i] = ($best_length < $min_lines
+ ? array() // Ignore short blocks.
+ : array($file, $best_line + $best_offset - $i, $type));
+ }
+
+ $skip_lines = $best_offset;
}
}
+
$copies = array_filter($copies);
if ($copies) {
$metadata = $changeset->getMetadata();

File Metadata

Mime Type
text/plain
Expires
Oct 16 2024, 8:36 AM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6716767
Default Alt Text
D12145.id29194.diff (7 KB)

Event Timeline