Changeset View
Changeset View
Standalone View
Standalone View
src/applications/differential/engine/DifferentialChangesetEngine.php
- This file was added.
<?php | |||||
final class DifferentialChangesetEngine extends Phobject { | |||||
public function rebuildChangesets(array $changesets) { | |||||
assert_instances_of($changesets, 'DifferentialChangeset'); | |||||
foreach ($changesets as $changeset) { | |||||
$this->detectGeneratedCode($changeset); | |||||
} | |||||
$this->detectCopiedCode($changesets); | |||||
} | |||||
/* -( Generated Code )----------------------------------------------------- */ | |||||
private function detectGeneratedCode(DifferentialChangeset $changeset) { | |||||
$is_generated_trusted = $this->isTrustedGeneratedCode($changeset); | |||||
if ($is_generated_trusted) { | |||||
$changeset->setTrustedChangesetAttribute( | |||||
DifferentialChangeset::ATTRIBUTE_GENERATED, | |||||
$is_generated_trusted); | |||||
} | |||||
$is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset); | |||||
if ($is_generated_untrusted) { | |||||
$changeset->setUntrustedChangesetAttribute( | |||||
DifferentialChangeset::ATTRIBUTE_GENERATED, | |||||
$is_generated_untrusted); | |||||
} | |||||
} | |||||
private function isTrustedGeneratedCode(DifferentialChangeset $changeset) { | |||||
$filename = $changeset->getFilename(); | |||||
$paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); | |||||
foreach ($paths as $regexp) { | |||||
if (preg_match($regexp, $filename)) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) { | |||||
if ($changeset->getHunks()) { | |||||
$new_data = $changeset->makeNewFile(); | |||||
if (strpos($new_data, '@'.'generated') !== false) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
/* -( Copied Code )-------------------------------------------------------- */ | |||||
private function detectCopiedCode(array $changesets) { | |||||
$min_width = 30; | |||||
$min_lines = 3; | |||||
$map = array(); | |||||
$files = array(); | |||||
$types = array(); | |||||
foreach ($changesets as $changeset) { | |||||
$file = $changeset->getFilename(); | |||||
foreach ($changeset->getHunks() as $hunk) { | |||||
$lines = $hunk->getStructuredOldFile(); | |||||
foreach ($lines as $line => $info) { | |||||
$type = $info['type']; | |||||
if ($type == '\\') { | |||||
continue; | |||||
} | |||||
$types[$file][$line] = $type; | |||||
$text = $info['text']; | |||||
$text = trim($text); | |||||
$files[$file][$line] = $text; | |||||
if (strlen($text) >= $min_width) { | |||||
$map[$text][] = array($file, $line); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
foreach ($changesets as $changeset) { | |||||
$copies = array(); | |||||
foreach ($changeset->getHunks() as $hunk) { | |||||
$added = $hunk->getStructuredNewFile(); | |||||
$atype = array(); | |||||
foreach ($added as $line => $info) { | |||||
$atype[$line] = $info['type']; | |||||
$added[$line] = trim($info['text']); | |||||
} | |||||
$skip_lines = 0; | |||||
foreach ($added as $line => $code) { | |||||
if ($skip_lines) { | |||||
// We're skipping lines that we already processed because we | |||||
// extended a block above them downward to include them. | |||||
$skip_lines--; | |||||
continue; | |||||
} | |||||
if ($atype[$line] !== '+') { | |||||
// This line hasn't been changed in the new file, so don't try | |||||
// to figure out where it came from. | |||||
continue; | |||||
} | |||||
if (empty($map[$code])) { | |||||
// This line was too short to trigger copy/move detection. | |||||
continue; | |||||
} | |||||
if (count($map[$code]) > 16) { | |||||
// If there are a large number of identical lines in this diff, | |||||
// don't try to figure out where this block came from: the analysis | |||||
// is O(N^2), since we need to compare every line against every | |||||
// other line. Even if we arrive at a result, it is unlikely to be | |||||
// meaningful. See T5041. | |||||
continue; | |||||
} | |||||
$best_length = 0; | |||||
// Explore all candidates. | |||||
foreach ($map[$code] as $val) { | |||||
list($file, $orig_line) = $val; | |||||
$length = 1; | |||||
// Search backward and forward to find all of the adjacent lines | |||||
// which match. | |||||
foreach (array(-1, 1) as $direction) { | |||||
$offset = $direction; | |||||
while (true) { | |||||
if (isset($copies[$line + $offset])) { | |||||
// If we run into a block above us which we've already | |||||
// attributed to a move or copy from elsewhere, stop | |||||
// looking. | |||||
break; | |||||
} | |||||
if (!isset($added[$line + $offset])) { | |||||
// If we've run off the beginning or end of the new file, | |||||
// stop looking. | |||||
break; | |||||
} | |||||
if (!isset($files[$file][$orig_line + $offset])) { | |||||
// If we've run off the beginning or end of the original | |||||
// file, we also stop looking. | |||||
break; | |||||
} | |||||
$old = $files[$file][$orig_line + $offset]; | |||||
$new = $added[$line + $offset]; | |||||
if ($old !== $new) { | |||||
// If the old line doesn't match the new line, stop | |||||
// looking. | |||||
break; | |||||
} | |||||
$length++; | |||||
$offset += $direction; | |||||
} | |||||
} | |||||
if ($length < $best_length) { | |||||
// If we already know of a better source (more matching lines) | |||||
// for this move/copy, stick with that one. We prefer long | |||||
// copies/moves which match a lot of context over short ones. | |||||
continue; | |||||
} | |||||
if ($length == $best_length) { | |||||
if (idx($types[$file], $orig_line) != '-') { | |||||
// If we already know of an equally good source (same number | |||||
// of matching lines) and this isn't a move, stick with the | |||||
// other one. We prefer moves over copies. | |||||
continue; | |||||
} | |||||
} | |||||
$best_length = $length; | |||||
// ($offset - 1) contains number of forward matching lines. | |||||
$best_offset = $offset - 1; | |||||
$best_file = $file; | |||||
$best_line = $orig_line; | |||||
} | |||||
$file = ($best_file == $changeset->getFilename() ? '' : $best_file); | |||||
for ($i = $best_length; $i--; ) { | |||||
$type = idx($types[$best_file], $best_line + $best_offset - $i); | |||||
$copies[$line + $best_offset - $i] = ($best_length < $min_lines | |||||
? array() // Ignore short blocks. | |||||
: array($file, $best_line + $best_offset - $i, $type)); | |||||
} | |||||
$skip_lines = $best_offset; | |||||
} | |||||
} | |||||
$copies = array_filter($copies); | |||||
if ($copies) { | |||||
$metadata = $changeset->getMetadata(); | |||||
$metadata['copy:lines'] = $copies; | |||||
$changeset->setMetadata($metadata); | |||||
} | |||||
} | |||||
} | |||||
} |