diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -1176,6 +1176,16 @@ $added = array_map('trim', $hunk->getAddedLines()); for (reset($added); list($line, $code) = each($added); ) { if (isset($map[$code])) { // We found a long matching line. + + if (count($map[$code]) > 16) { + // If there are a large number of identical lines in this diff, + // don't try to figure out where this block came from: the + // analysis is O(N^2), since we need to compare every line + // against every other line. Even if we arrive at a result, it + // is unlikely to be meaningful. See T5041. + continue 2; + } + $best_length = 0; foreach ($map[$code] as $val) { // Explore all candidates. list($file, $orig_line) = $val; diff --git a/src/applications/differential/storage/__tests__/DifferentialDiffTestCase.php b/src/applications/differential/storage/__tests__/DifferentialDiffTestCase.php --- a/src/applications/differential/storage/__tests__/DifferentialDiffTestCase.php +++ b/src/applications/differential/storage/__tests__/DifferentialDiffTestCase.php @@ -15,4 +15,41 @@ ipull($copies, 1)); } + public function testDetectSlowCopiedCode() { + // This tests that the detector has a reasonable runtime when a diff + // contains a very large number of identical lines. See T5041. + + $parser = new ArcanistDiffParser(); + + $line = str_repeat('x', 60); + $oline = '-'.$line."\n"; + $nline = '+'.$line."\n"; + + $n = 1000; + $oblock = str_repeat($oline, $n); + $nblock = str_repeat($nline, $n); + + $raw_diff = <<parseDiff($raw_diff)); + + $this->assertTrue(true); + } + + }