Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13962178
D12145.id29194.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Referenced Files
None
Subscribers
None
D12145.id29194.diff
View Options
diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -1324,17 +1324,20 @@
foreach ($changesets as $changeset) {
$file = $changeset->getFilename();
foreach ($changeset->getHunks() as $hunk) {
- $line = $hunk->getOldOffset();
- foreach (explode("\n", $hunk->getChanges()) as $code) {
- $type = (isset($code[0]) ? $code[0] : '');
- if ($type == '-' || $type == ' ') {
- $code = trim(substr($code, 1));
- $files[$file][$line] = $code;
- $types[$file][$line] = $type;
- if (strlen($code) >= $min_width) {
- $map[$code][] = array($file, $line);
- }
- $line++;
+ $lines = $hunk->getStructuredOldFile();
+ foreach ($lines as $line => $info) {
+ $type = $info['type'];
+ if ($type == '\\') {
+ continue;
+ }
+ $types[$file][$line] = $type;
+
+ $text = $info['text'];
+ $text = trim($text);
+ $files[$file][$line] = $text;
+
+ if (strlen($text) >= $min_width) {
+ $map[$text][] = array($file, $line);
}
}
}
@@ -1343,57 +1346,118 @@
foreach ($changesets as $changeset) {
$copies = array();
foreach ($changeset->getHunks() as $hunk) {
- $added = array_map('trim', $hunk->getAddedLines());
- for (reset($added); list($line, $code) = each($added); ) {
- if (isset($map[$code])) { // We found a long matching line.
-
- if (count($map[$code]) > 16) {
- // If there are a large number of identical lines in this diff,
- // don't try to figure out where this block came from: the
- // analysis is O(N^2), since we need to compare every line
- // against every other line. Even if we arrive at a result, it
- // is unlikely to be meaningful. See T5041.
- continue 2;
- }
+ $added = $hunk->getStructuredNewFile();
+
+ foreach ($added as $line => $info) {
+ if ($info['type'] != '+') {
+ unset($added[$line]);
+ continue;
+ }
+ $added[$line] = trim($info['text']);
+ }
- $best_length = 0;
- foreach ($map[$code] as $val) { // Explore all candidates.
- list($file, $orig_line) = $val;
- $length = 1;
- // Search also backwards for short lines.
- foreach (array(-1, 1) as $direction) {
- $offset = $direction;
- while (!isset($copies[$line + $offset]) &&
- isset($added[$line + $offset]) &&
- idx($files[$file], $orig_line + $offset) ===
- $added[$line + $offset]) {
- $length++;
- $offset += $direction;
+ $skip_lines = 0;
+ foreach ($added as $line => $code) {
+ if ($skip_lines) {
+ // We're skipping lines that we already processed because we
+ // extended a block above them downward to include them.
+ $skip_lines--;
+ continue;
+ }
+
+ if (empty($map[$code])) {
+ // This line was too short to trigger copy/move detection.
+ continue;
+ }
+
+ if (count($map[$code]) > 16) {
+ // If there are a large number of identical lines in this diff,
+ // don't try to figure out where this block came from: the analysis
+ // is O(N^2), since we need to compare every line against every
+ // other line. Even if we arrive at a result, it is unlikely to be
+ // meaningful. See T5041.
+ continue;
+ }
+
+ $best_length = 0;
+
+ // Explore all candidates.
+ foreach ($map[$code] as $val) {
+ list($file, $orig_line) = $val;
+ $length = 1;
+
+ // Search backward and forward to find all of the adjacent lines
+ // which match.
+ foreach (array(-1, 1) as $direction) {
+ $offset = $direction;
+ while (true) {
+ if (isset($copies[$line + $offset])) {
+ // If we run into a block above us which we've already
+ // attributed to a move or copy from elsewhere, stop
+ // looking.
+ break;
}
- }
- if ($length > $best_length ||
- ($length == $best_length && // Prefer moves.
- idx($types[$file], $orig_line) == '-')) {
- $best_length = $length;
- // ($offset - 1) contains number of forward matching lines.
- $best_offset = $offset - 1;
- $best_file = $file;
- $best_line = $orig_line;
+
+ if (!isset($added[$line + $offset])) {
+ // If we've run off the beginning or end of the new file,
+ // stop looking.
+ break;
+ }
+
+ if (!isset($files[$file][$orig_line + $offset])) {
+ // If we've run off the beginning or end of the original
+ // file, we also stop looking.
+ break;
+ }
+
+ $old = $files[$file][$orig_line + $offset];
+ $new = $added[$line + $offset];
+ if ($old !== $new) {
+ // If the old line doesn't match the new line, stop
+ // looking.
+ break;
+ }
+
+ $length++;
+ $offset += $direction;
}
}
- $file = ($best_file == $changeset->getFilename() ? '' : $best_file);
- for ($i = $best_length; $i--; ) {
- $type = idx($types[$best_file], $best_line + $best_offset - $i);
- $copies[$line + $best_offset - $i] = ($best_length < $min_lines
- ? array() // Ignore short blocks.
- : array($file, $best_line + $best_offset - $i, $type));
+
+ if ($length < $best_length) {
+ // If we already know of a better source (more matching lines)
+ // for this move/copy, stick with that one. We prefer long
+ // copies/moves which match a lot of context over short ones.
+ continue;
}
- for ($i = 0; $i < $best_offset; $i++) {
- next($added);
+
+ if ($length == $best_length) {
+ if (idx($types[$file], $orig_line) != '-') {
+ // If we already know of an equally good source (same number
+ // of matching lines) and this isn't a move, stick with the
+ // other one. We prefer moves over copies.
+ continue;
+ }
}
+
+ $best_length = $length;
+ // ($offset - 1) contains number of forward matching lines.
+ $best_offset = $offset - 1;
+ $best_file = $file;
+ $best_line = $orig_line;
}
+
+ $file = ($best_file == $changeset->getFilename() ? '' : $best_file);
+ for ($i = $best_length; $i--; ) {
+ $type = idx($types[$best_file], $best_line + $best_offset - $i);
+ $copies[$line + $best_offset - $i] = ($best_length < $min_lines
+ ? array() // Ignore short blocks.
+ : array($file, $best_line + $best_offset - $i, $type));
+ }
+
+ $skip_lines = $best_offset;
}
}
+
$copies = array_filter($copies);
if ($copies) {
$metadata = $changeset->getMetadata();
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Oct 16 2024, 8:36 AM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6716767
Default Alt Text
D12145.id29194.diff (7 KB)
Attached To
Mode
D12145: Clean up copy detection code a bit
Attached
Detach File
Event Timeline
Log In to Comment