diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -432,6 +432,7 @@ 'DifferentialChangesSinceLastUpdateField' => 'applications/differential/customfield/DifferentialChangesSinceLastUpdateField.php', 'DifferentialChangeset' => 'applications/differential/storage/DifferentialChangeset.php', 'DifferentialChangesetDetailView' => 'applications/differential/view/DifferentialChangesetDetailView.php', + 'DifferentialChangesetEngine' => 'applications/differential/engine/DifferentialChangesetEngine.php', 'DifferentialChangesetFileTreeSideNavBuilder' => 'applications/differential/view/DifferentialChangesetFileTreeSideNavBuilder.php', 'DifferentialChangesetHTMLRenderer' => 'applications/differential/render/DifferentialChangesetHTMLRenderer.php', 'DifferentialChangesetListController' => 'applications/differential/controller/DifferentialChangesetListController.php', @@ -2869,6 +2870,7 @@ 'PhabricatorDifferentialExtractWorkflow' => 'applications/differential/management/PhabricatorDifferentialExtractWorkflow.php', 'PhabricatorDifferentialManagementWorkflow' => 'applications/differential/management/PhabricatorDifferentialManagementWorkflow.php', 'PhabricatorDifferentialMigrateHunkWorkflow' => 'applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php', + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php', 'PhabricatorDifferentialRevisionTestDataGenerator' => 'applications/differential/lipsum/PhabricatorDifferentialRevisionTestDataGenerator.php', 'PhabricatorDiffusionApplication' => 'applications/diffusion/application/PhabricatorDiffusionApplication.php', 'PhabricatorDiffusionBlameSetting' => 'applications/settings/setting/PhabricatorDiffusionBlameSetting.php', @@ -5729,6 +5731,7 @@ 'PhabricatorDestructibleInterface', ), 'DifferentialChangesetDetailView' => 'AphrontView', + 'DifferentialChangesetEngine' => 'Phobject', 'DifferentialChangesetFileTreeSideNavBuilder' => 'Phobject', 'DifferentialChangesetHTMLRenderer' => 'DifferentialChangesetRenderer', 'DifferentialChangesetListController' => 'DifferentialController', @@ -8530,6 +8533,7 @@ 'PhabricatorDifferentialExtractWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 'PhabricatorDifferentialManagementWorkflow' => 'PhabricatorManagementWorkflow', 'PhabricatorDifferentialMigrateHunkWorkflow' => 'PhabricatorDifferentialManagementWorkflow', + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 'PhabricatorDifferentialRevisionTestDataGenerator' => 'PhabricatorTestDataGenerator', 'PhabricatorDiffusionApplication' => 'PhabricatorApplication', 'PhabricatorDiffusionBlameSetting' => 'PhabricatorInternalSetting', diff --git a/src/applications/differential/engine/DifferentialChangesetEngine.php b/src/applications/differential/engine/DifferentialChangesetEngine.php new file mode 100644 --- /dev/null +++ b/src/applications/differential/engine/DifferentialChangesetEngine.php @@ -0,0 +1,223 @@ +detectGeneratedCode($changeset); + } + + $this->detectCopiedCode($changesets); + } + + +/* -( Generated Code )----------------------------------------------------- */ + + + private function detectGeneratedCode(DifferentialChangeset $changeset) { + $is_generated_trusted = $this->isTrustedGeneratedCode($changeset); + if ($is_generated_trusted) { + $changeset->setTrustedChangesetAttribute( + DifferentialChangeset::ATTRIBUTE_GENERATED, + $is_generated_trusted); + } + + $is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset); + if ($is_generated_untrusted) { + $changeset->setUntrustedChangesetAttribute( + DifferentialChangeset::ATTRIBUTE_GENERATED, + $is_generated_untrusted); + } + } + + private function isTrustedGeneratedCode(DifferentialChangeset $changeset) { + + $filename = $changeset->getFilename(); + + $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); + foreach ($paths as $regexp) { + if (preg_match($regexp, $filename)) { + return true; + } + } + + return false; + } + + private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) { + + if ($changeset->getHunks()) { + $new_data = $changeset->makeNewFile(); + if (strpos($new_data, '@'.'generated') !== false) { + return true; + } + } + + return false; + } + + +/* -( Copied Code )-------------------------------------------------------- */ + + + private function detectCopiedCode(array $changesets) { + $min_width = 30; + $min_lines = 3; + + $map = array(); + $files = array(); + $types = array(); + foreach ($changesets as $changeset) { + $file = $changeset->getFilename(); + foreach ($changeset->getHunks() as $hunk) { + $lines = $hunk->getStructuredOldFile(); + foreach ($lines as $line => $info) { + $type = $info['type']; + if ($type == '\\') { + continue; + } + $types[$file][$line] = $type; + + $text = $info['text']; + $text = trim($text); + $files[$file][$line] = $text; + + if (strlen($text) >= $min_width) { + $map[$text][] = array($file, $line); + } + } + } + } + + foreach ($changesets as $changeset) { + $copies = array(); + foreach ($changeset->getHunks() as $hunk) { + $added = $hunk->getStructuredNewFile(); + $atype = array(); + + foreach ($added as $line => $info) { + $atype[$line] = $info['type']; + $added[$line] = trim($info['text']); + } + + $skip_lines = 0; + foreach ($added as $line => $code) { + if ($skip_lines) { + // We're skipping lines that we already processed because we + // extended a block above them downward to include them. + $skip_lines--; + continue; + } + + if ($atype[$line] !== '+') { + // This line hasn't been changed in the new file, so don't try + // to figure out where it came from. + continue; + } + + if (empty($map[$code])) { + // This line was too short to trigger copy/move detection. + continue; + } + + if (count($map[$code]) > 16) { + // If there are a large number of identical lines in this diff, + // don't try to figure out where this block came from: the analysis + // is O(N^2), since we need to compare every line against every + // other line. Even if we arrive at a result, it is unlikely to be + // meaningful. See T5041. + continue; + } + + $best_length = 0; + + // Explore all candidates. + foreach ($map[$code] as $val) { + list($file, $orig_line) = $val; + $length = 1; + + // Search backward and forward to find all of the adjacent lines + // which match. + foreach (array(-1, 1) as $direction) { + $offset = $direction; + while (true) { + if (isset($copies[$line + $offset])) { + // If we run into a block above us which we've already + // attributed to a move or copy from elsewhere, stop + // looking. + break; + } + + if (!isset($added[$line + $offset])) { + // If we've run off the beginning or end of the new file, + // stop looking. + break; + } + + if (!isset($files[$file][$orig_line + $offset])) { + // If we've run off the beginning or end of the original + // file, we also stop looking. + break; + } + + $old = $files[$file][$orig_line + $offset]; + $new = $added[$line + $offset]; + if ($old !== $new) { + // If the old line doesn't match the new line, stop + // looking. + break; + } + + $length++; + $offset += $direction; + } + } + + if ($length < $best_length) { + // If we already know of a better source (more matching lines) + // for this move/copy, stick with that one. We prefer long + // copies/moves which match a lot of context over short ones. + continue; + } + + if ($length == $best_length) { + if (idx($types[$file], $orig_line) != '-') { + // If we already know of an equally good source (same number + // of matching lines) and this isn't a move, stick with the + // other one. We prefer moves over copies. + continue; + } + } + + $best_length = $length; + // ($offset - 1) contains number of forward matching lines. + $best_offset = $offset - 1; + $best_file = $file; + $best_line = $orig_line; + } + + $file = ($best_file == $changeset->getFilename() ? '' : $best_file); + for ($i = $best_length; $i--; ) { + $type = idx($types[$best_file], $best_line + $best_offset - $i); + $copies[$line + $best_offset - $i] = ($best_length < $min_lines + ? array() // Ignore short blocks. + : array($file, $best_line + $best_offset - $i, $type)); + } + + $skip_lines = $best_offset; + } + } + + $copies = array_filter($copies); + if ($copies) { + $metadata = $changeset->getMetadata(); + $metadata['copy:lines'] = $copies; + $changeset->setMetadata($metadata); + } + } + + } + +} diff --git a/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php new file mode 100644 --- /dev/null +++ b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php @@ -0,0 +1,92 @@ +setName('rebuild-changesets') + ->setExamples('**rebuild-changesets** --revision __revision__') + ->setSynopsis(pht('Rebuild changesets for a revision.')) + ->setArguments( + array( + array( + 'name' => 'revision', + 'param' => 'revision', + 'help' => pht('Revision to rebuild changesets for.'), + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $viewer = $this->getViewer(); + + $revision_identifier = $args->getArg('revision'); + if (!$revision_identifier) { + throw new PhutilArgumentUsageException( + pht('Specify a revision to rebuild changesets for with "--revision".')); + } + + $revision = id(new PhabricatorObjectQuery()) + ->setViewer($viewer) + ->withNames(array($revision_identifier)) + ->executeOne(); + if ($revision) { + if (!($revision instanceof DifferentialRevision)) { + throw new PhutilArgumentUsageException( + pht( + 'Object "%s" specified by "--revision" must be a Differential '. + 'revision.')); + } + } else { + $revision = id(new DifferentialRevisionQuery()) + ->setViewer($viewer) + ->withIDs(array($revision_identifier)) + ->executeOne(); + } + + if (!$revision) { + throw new PhutilArgumentUsageException( + pht( + 'No revision "%s" exists.', + $revision_identifier)); + } + + $diffs = id(new DifferentialDiffQuery()) + ->setViewer($viewer) + ->withRevisionIDs(array($revision->getID())) + ->execute(); + + $changesets = id(new DifferentialChangesetQuery()) + ->setViewer($viewer) + ->withDiffs($diffs) + ->needHunks(true) + ->execute(); + + $changeset_groups = mgroup($changesets, 'getDiffID'); + + foreach ($changeset_groups as $diff_id => $changesets) { + echo tsprintf( + "%s\n", + pht( + 'Rebuilding %s changeset(s) for diff ID %d.', + phutil_count($changesets), + $diff_id)); + + foreach ($changesets as $changeset) { + echo tsprintf( + " %s\n", + $changeset->getFilename()); + } + + id(new DifferentialChangesetEngine()) + ->rebuildChangesets($changesets); + + echo tsprintf( + "%s\n", + pht('Done.')); + } + } + + +} diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -1419,167 +1419,6 @@ return sprintf('%d%%', 100 * ($covered / ($covered + $not_covered))); } - public function detectCopiedCode( - array $changesets, - $min_width = 30, - $min_lines = 3) { - - assert_instances_of($changesets, 'DifferentialChangeset'); - - $map = array(); - $files = array(); - $types = array(); - foreach ($changesets as $changeset) { - $file = $changeset->getFilename(); - foreach ($changeset->getHunks() as $hunk) { - $lines = $hunk->getStructuredOldFile(); - foreach ($lines as $line => $info) { - $type = $info['type']; - if ($type == '\\') { - continue; - } - $types[$file][$line] = $type; - - $text = $info['text']; - $text = trim($text); - $files[$file][$line] = $text; - - if (strlen($text) >= $min_width) { - $map[$text][] = array($file, $line); - } - } - } - } - - foreach ($changesets as $changeset) { - $copies = array(); - foreach ($changeset->getHunks() as $hunk) { - $added = $hunk->getStructuredNewFile(); - $atype = array(); - - foreach ($added as $line => $info) { - $atype[$line] = $info['type']; - $added[$line] = trim($info['text']); - } - - $skip_lines = 0; - foreach ($added as $line => $code) { - if ($skip_lines) { - // We're skipping lines that we already processed because we - // extended a block above them downward to include them. - $skip_lines--; - continue; - } - - if ($atype[$line] !== '+') { - // This line hasn't been changed in the new file, so don't try - // to figure out where it came from. - continue; - } - - if (empty($map[$code])) { - // This line was too short to trigger copy/move detection. - continue; - } - - if (count($map[$code]) > 16) { - // If there are a large number of identical lines in this diff, - // don't try to figure out where this block came from: the analysis - // is O(N^2), since we need to compare every line against every - // other line. Even if we arrive at a result, it is unlikely to be - // meaningful. See T5041. - continue; - } - - $best_length = 0; - - // Explore all candidates. - foreach ($map[$code] as $val) { - list($file, $orig_line) = $val; - $length = 1; - - // Search backward and forward to find all of the adjacent lines - // which match. - foreach (array(-1, 1) as $direction) { - $offset = $direction; - while (true) { - if (isset($copies[$line + $offset])) { - // If we run into a block above us which we've already - // attributed to a move or copy from elsewhere, stop - // looking. - break; - } - - if (!isset($added[$line + $offset])) { - // If we've run off the beginning or end of the new file, - // stop looking. - break; - } - - if (!isset($files[$file][$orig_line + $offset])) { - // If we've run off the beginning or end of the original - // file, we also stop looking. - break; - } - - $old = $files[$file][$orig_line + $offset]; - $new = $added[$line + $offset]; - if ($old !== $new) { - // If the old line doesn't match the new line, stop - // looking. - break; - } - - $length++; - $offset += $direction; - } - } - - if ($length < $best_length) { - // If we already know of a better source (more matching lines) - // for this move/copy, stick with that one. We prefer long - // copies/moves which match a lot of context over short ones. - continue; - } - - if ($length == $best_length) { - if (idx($types[$file], $orig_line) != '-') { - // If we already know of an equally good source (same number - // of matching lines) and this isn't a move, stick with the - // other one. We prefer moves over copies. - continue; - } - } - - $best_length = $length; - // ($offset - 1) contains number of forward matching lines. - $best_offset = $offset - 1; - $best_file = $file; - $best_line = $orig_line; - } - - $file = ($best_file == $changeset->getFilename() ? '' : $best_file); - for ($i = $best_length; $i--; ) { - $type = idx($types[$best_file], $best_line + $best_offset - $i); - $copies[$line + $best_offset - $i] = ($best_length < $min_lines - ? array() // Ignore short blocks. - : array($file, $best_line + $best_offset - $i, $type)); - } - - $skip_lines = $best_offset; - } - } - - $copies = array_filter($copies); - if ($copies) { - $metadata = $changeset->getMetadata(); - $metadata['copy:lines'] = $copies; - $changeset->setMetadata($metadata); - } - } - return $changesets; - } - /** * Build maps from lines comments appear on to actual lines. */ diff --git a/src/applications/differential/storage/DifferentialDiff.php b/src/applications/differential/storage/DifferentialDiff.php --- a/src/applications/differential/storage/DifferentialDiff.php +++ b/src/applications/differential/storage/DifferentialDiff.php @@ -226,23 +226,18 @@ $changeset->setAddLines($add_lines); $changeset->setDelLines($del_lines); - self::detectGeneratedCode($changeset); - $diff->addUnsavedChangeset($changeset); } $diff->setLineCount($lines); - $parser = new DifferentialChangesetParser(); - $changesets = $parser->detectCopiedCode( - $diff->getChangesets(), - $min_width = 30, - $min_lines = 3); - $diff->attachChangesets($changesets); + $changesets = $diff->getChangesets(); + + id(new DifferentialChangesetEngine()) + ->rebuildChangesets($changesets); return $diff; } - public function getDiffDict() { $dict = array( 'id' => $this->getID(), @@ -823,50 +818,4 @@ ); } - private static function detectGeneratedCode( - DifferentialChangeset $changeset) { - - $is_generated_trusted = self::isTrustedGeneratedCode($changeset); - if ($is_generated_trusted) { - $changeset->setTrustedChangesetAttribute( - DifferentialChangeset::ATTRIBUTE_GENERATED, - $is_generated_trusted); - } - - $is_generated_untrusted = self::isUntrustedGeneratedCode($changeset); - if ($is_generated_untrusted) { - $changeset->setUntrustedChangesetAttribute( - DifferentialChangeset::ATTRIBUTE_GENERATED, - $is_generated_untrusted); - } - } - - private static function isTrustedGeneratedCode( - DifferentialChangeset $changeset) { - - $filename = $changeset->getFilename(); - - $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); - foreach ($paths as $regexp) { - if (preg_match($regexp, $filename)) { - return true; - } - } - - return false; - } - - private static function isUntrustedGeneratedCode( - DifferentialChangeset $changeset) { - - if ($changeset->getHunks()) { - $new_data = $changeset->makeNewFile(); - if (strpos($new_data, '@'.'generated') !== false) { - return true; - } - } - - return false; - } - }