diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -12397,7 +12397,7 @@ 'PhutilPHPCodeSnippetContextFreeGrammar' => 'PhutilCLikeCodeSnippetContextFreeGrammar', 'PhutilPhabricatorAuthAdapter' => 'PhutilOAuthAuthAdapter', 'PhutilProseDiff' => 'Phobject', - 'PhutilProseDiffTestCase' => 'PhutilTestCase', + 'PhutilProseDiffTestCase' => 'PhabricatorTestCase', 'PhutilProseDifferenceEngine' => 'Phobject', 'PhutilQueryString' => 'Phobject', 'PhutilRealNameContextFreeGrammar' => 'PhutilContextFreeGrammar', diff --git a/src/applications/files/diff/PhabricatorDocumentEngineBlocks.php b/src/applications/files/diff/PhabricatorDocumentEngineBlocks.php --- a/src/applications/files/diff/PhabricatorDocumentEngineBlocks.php +++ b/src/applications/files/diff/PhabricatorDocumentEngineBlocks.php @@ -50,20 +50,35 @@ if ($old_line) { $old_hash = rtrim($old_line['text'], "\n"); - $old_block = array_shift($old_map[$old_hash]); - $old_block->setDifferenceType($old_line['type']); + if (!strlen($old_hash)) { + // This can happen when one of the sources has no blocks. + $old_block = null; + } else { + $old_block = array_shift($old_map[$old_hash]); + $old_block->setDifferenceType($old_line['type']); + } } else { $old_block = null; } if ($new_line) { $new_hash = rtrim($new_line['text'], "\n"); - $new_block = array_shift($new_map[$new_hash]); - $new_block->setDifferenceType($new_line['type']); + if (!strlen($new_hash)) { + $new_block = null; + } else { + $new_block = array_shift($new_map[$new_hash]); + $new_block->setDifferenceType($new_line['type']); + } } else { $new_block = null; } + // If both lists are empty, we may generate a row which has two empty + // blocks. + if (!$old_block && !$new_block) { + continue; + } + $rows[] = array( $old_block, $new_block, diff --git a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php --- a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php +++ b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php @@ -10,47 +10,14 @@ $u_parts = $this->splitCorpus($u, $level); $v_parts = $this->splitCorpus($v, $level); - $matrix = id(new PhutilEditDistanceMatrix()) - ->setMaximumLength(128) - ->setSequences($u_parts, $v_parts) - ->setComputeString(true); - - // For word-level and character-level changes, smooth the output string - // to reduce the choppiness of the diff. - if ($level > 1) { - $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); - } - - $u_pos = 0; - $v_pos = 0; - - $edits = $matrix->getEditString(); - $edits_length = strlen($edits); - - $diff = new PhutilProseDiff(); - for ($ii = 0; $ii < $edits_length; $ii++) { - $c = $edits[$ii]; - if ($c == 's') { - $diff->addPart('=', $u_parts[$u_pos]); - $u_pos++; - $v_pos++; - } else if ($c == 'd') { - $diff->addPart('-', $u_parts[$u_pos]); - $u_pos++; - } else if ($c == 'i') { - $diff->addPart('+', $v_parts[$v_pos]); - $v_pos++; - } else if ($c == 'x') { - $diff->addPart('-', $u_parts[$u_pos]); - $diff->addPart('+', $v_parts[$v_pos]); - $u_pos++; - $v_pos++; - } else { - throw new Exception( - pht( - 'Unexpected character ("%s") in edit string.', - $c)); - } + if ($level === 0) { + $diff = $this->newHashDiff($u_parts, $v_parts); + $too_large = false; + } else { + list($diff, $too_large) = $this->newEditDistanceMatrixDiff( + $u_parts, + $v_parts, + $level); } $diff->reorderParts(); @@ -119,7 +86,7 @@ } else if (!strlen($new)) { $result->addPart('-', $old); } else { - if ($matrix->didReachMaximumLength()) { + if ($too_large) { // If this text was too big to diff, don't try to subdivide it. $result->addPart('-', $old); $result->addPart('+', $new); @@ -206,4 +173,103 @@ return $results; } + private function newEditDistanceMatrixDiff( + array $u_parts, + array $v_parts, + $level) { + + $matrix = id(new PhutilEditDistanceMatrix()) + ->setMaximumLength(128) + ->setSequences($u_parts, $v_parts) + ->setComputeString(true); + + // For word-level and character-level changes, smooth the output string + // to reduce the choppiness of the diff. + if ($level > 1) { + $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); + } + + $u_pos = 0; + $v_pos = 0; + + $edits = $matrix->getEditString(); + $edits_length = strlen($edits); + + $diff = new PhutilProseDiff(); + for ($ii = 0; $ii < $edits_length; $ii++) { + $c = $edits[$ii]; + if ($c == 's') { + $diff->addPart('=', $u_parts[$u_pos]); + $u_pos++; + $v_pos++; + } else if ($c == 'd') { + $diff->addPart('-', $u_parts[$u_pos]); + $u_pos++; + } else if ($c == 'i') { + $diff->addPart('+', $v_parts[$v_pos]); + $v_pos++; + } else if ($c == 'x') { + $diff->addPart('-', $u_parts[$u_pos]); + $diff->addPart('+', $v_parts[$v_pos]); + $u_pos++; + $v_pos++; + } else { + throw new Exception( + pht( + 'Unexpected character ("%s") in edit string.', + $c)); + } + } + + return array($diff, $matrix->didReachMaximumLength()); + } + + private function newHashDiff(array $u_parts, array $v_parts) { + + $u_ref = new PhabricatorDocumentRef(); + $v_ref = new PhabricatorDocumentRef(); + + $u_blocks = $this->newDocumentEngineBlocks($u_parts); + $v_blocks = $this->newDocumentEngineBlocks($v_parts); + + $rows = id(new PhabricatorDocumentEngineBlocks()) + ->addBlockList($u_ref, $u_blocks) + ->addBlockList($v_ref, $v_blocks) + ->newTwoUpLayout(); + + $diff = new PhutilProseDiff(); + foreach ($rows as $row) { + list($u_block, $v_block) = $row; + + if ($u_block && $v_block) { + if ($u_block->getDifferenceType() === '-') { + $diff->addPart('-', $u_block->getContent()); + $diff->addPart('+', $v_block->getContent()); + } else { + $diff->addPart('=', $u_block->getContent()); + } + } else if ($u_block) { + $diff->addPart('-', $u_block->getContent()); + } else { + $diff->addPart('+', $v_block->getContent()); + } + } + + return $diff; + } + + private function newDocumentEngineBlocks(array $parts) { + $blocks = array(); + + foreach ($parts as $part) { + $hash = PhabricatorHash::digestForIndex($part); + + $blocks[] = id(new PhabricatorDocumentEngineBlock()) + ->setContent($part) + ->setDifferenceHash($hash); + } + + return $blocks; + } + } diff --git a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php --- a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php +++ b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php @@ -1,6 +1,7 @@ assertProseParts(