diff --git a/src/utils/PhutilProseDiff.php b/src/utils/PhutilProseDiff.php index 5507e52..df34d64 100644 --- a/src/utils/PhutilProseDiff.php +++ b/src/utils/PhutilProseDiff.php @@ -1,286 +1,292 @@ parts[] = array( 'type' => $type, 'text' => $text, ); return $this; } public function getParts() { return $this->parts; } /** * Get diff parts, but replace large blocks of unchanged text with "." * parts representing missing context. */ public function getSummaryParts() { $parts = $this->getParts(); $head_key = head_key($parts); $last_key = last_key($parts); $results = array(); foreach ($parts as $key => $part) { $is_head = ($key == $head_key); $is_last = ($key == $last_key); switch ($part['type']) { case '=': $pieces = $this->splitTextForSummary($part['text']); if ($is_head || $is_last) { $need = 2; } else { $need = 3; } // We don't have enough pieces to omit anything, so just continue. if (count($pieces) < $need) { $results[] = $part; break; } if (!$is_head) { $results[] = array( 'type' => '=', 'text' => head($pieces), ); } $results[] = array( 'type' => '.', 'text' => null, ); if (!$is_last) { $results[] = array( 'type' => '=', 'text' => last($pieces), ); } break; default: $results[] = $part; break; } } return $results; } public function reorderParts() { // Reorder sequences of removed and added sections to put all the "-" // parts together first, then all the "+" parts together. This produces // a more human-readable result than intermingling them. $o_run = array(); $n_run = array(); $result = array(); foreach ($this->parts as $part) { $type = $part['type']; switch ($type) { case '-': $o_run[] = $part; break; case '+': $n_run[] = $part; break; default: if ($o_run || $n_run) { foreach ($this->combineRuns($o_run, $n_run) as $merged_part) { $result[] = $merged_part; } $o_run = array(); $n_run = array(); } $result[] = $part; break; } } if ($o_run || $n_run) { foreach ($this->combineRuns($o_run, $n_run) as $part) { $result[] = $part; } } // Now, combine consecuitive runs of the same type of change (like a // series of "-" parts) into a single run. $combined = array(); $last = null; $last_text = null; foreach ($result as $part) { $type = $part['type']; if ($last !== $type) { if ($last !== null) { $combined[] = array( 'type' => $last, 'text' => $last_text, ); } $last_text = null; $last = $type; } $last_text .= $part['text']; } if ($last_text !== null) { $combined[] = array( 'type' => $last, 'text' => $last_text, ); } $this->parts = $combined; return $this; } private function combineRuns($o_run, $n_run) { $o_merge = $this->mergeParts($o_run); $n_merge = $this->mergeParts($n_run); // When removed and added blocks share a prefix or suffix, we sometimes // want to count it as unchanged (for example, if it is whitespace) but // sometimes want to count it as changed (for example, if it is a word // suffix like "ing"). Find common prefixes and suffixes of these layout // characters and emit them as "=" (unchanged) blocks. $layout_characters = array( ' ' => true, "\n" => true, '.' => true, '!' => true, ',' => true, '?' => true, + ']' => true, + '[' => true, + '(' => true, + ')' => true, + '<' => true, + '>' => true, ); $o_text = $o_merge['text']; $n_text = $n_merge['text']; $o_len = strlen($o_text); $n_len = strlen($n_text); $min_len = min($o_len, $n_len); $prefix_len = 0; for ($pos = 0; $pos < $min_len; $pos++) { $o = $o_text[$pos]; $n = $n_text[$pos]; if ($o !== $n) { break; } if (empty($layout_characters[$o])) { break; } $prefix_len++; } $suffix_len = 0; for ($pos = 0; $pos < ($min_len - $prefix_len); $pos++) { $o = $o_text[$o_len - ($pos + 1)]; $n = $n_text[$n_len - ($pos + 1)]; if ($o !== $n) { break; } if (empty($layout_characters[$o])) { break; } $suffix_len++; } $results = array(); if ($prefix_len) { $results[] = array( 'type' => '=', 'text' => substr($o_text, 0, $prefix_len), ); } if ($prefix_len < $o_len) { $results[] = array( 'type' => '-', 'text' => substr( $o_text, $prefix_len, $o_len - $prefix_len - $suffix_len), ); } if ($prefix_len < $n_len) { $results[] = array( 'type' => '+', 'text' => substr( $n_text, $prefix_len, $n_len - $prefix_len - $suffix_len), ); } if ($suffix_len) { $results[] = array( 'type' => '=', 'text' => substr($o_text, -$suffix_len), ); } return $results; } private function mergeParts(array $parts) { $text = ''; $type = null; foreach ($parts as $part) { $part_type = $part['type']; if ($type === null) { $type = $part_type; } if ($type !== $part_type) { throw new Exception(pht('Can not merge parts of dissimilar types!')); } $text .= $part['text']; } return array( 'type' => $type, 'text' => $text, ); } private function splitTextForSummary($text) { $matches = null; $ok = preg_match('/^(\n*[^\n]+)\n/', $text, $matches); if (!$ok) { return array($text); } $head = $matches[1]; $text = substr($text, strlen($head)); $ok = preg_match('/\n([^\n]+\n*)\z/', $text, $matches); if (!$ok) { return array($text); } $last = $matches[1]; $text = substr($text, 0, -strlen($last)); if (!strlen(trim($text))) { return array($head, $last); } else { return array($head, $text, $last); } } } diff --git a/src/utils/__tests__/PhutilProseDiffTestCase.php b/src/utils/__tests__/PhutilProseDiffTestCase.php index 8d40484..bddebd3 100644 --- a/src/utils/__tests__/PhutilProseDiffTestCase.php +++ b/src/utils/__tests__/PhutilProseDiffTestCase.php @@ -1,204 +1,225 @@ assertProseParts( '', '', array(), pht('Empty')); $this->assertProseParts( "xxx\nyyy", "xxx\nzzz\nyyy", array( "= xxx\n", "+ zzz\n", '= yyy', ), pht('Add Paragraph')); $this->assertProseParts( "xxx\nzzz\nyyy", "xxx\nyyy", array( "= xxx\n", "- zzz\n", '= yyy', ), pht('Remove Paragraph')); // Without smoothing, the alogorithm identifies that "shark" and "cat" // both contain the letter "a" and tries to express this as a very // fine-grained edit which replaces "sh" with "c" and then "rk" with "t". // This is technically correct, but it is much easier for human viewers to // parse if we smooth this into a single removal and a single addition. $this->assertProseParts( 'They say the shark has nine lives.', 'They say the cat has nine lives.', array( '= They say the ', '- shark', '+ cat', '= has nine lives.', ), pht('"Shark/cat" word edit smoothenss.')); $this->assertProseParts( 'Rising quickly, she says', 'Rising quickly, she remarks:', array( '= Rising quickly, she ', '- says', '+ remarks:', ), pht('"Says/remarks" word edit smoothenss.')); $this->assertProseParts( 'See screenshots', 'Viewed video files', array( '- See screenshots', '+ Viewed video files', ), pht('Complete paragraph rewrite.')); $this->assertProseParts( 'xaaax', 'xbbbx', array( '- xaaax', '+ xbbbx', ), pht('Whole word rewrite with common prefix and suffix.')); $this->assertProseParts( ' aaa ', ' bbb ', array( '= ', '- aaa', '+ bbb', '= ', ), pht('Whole word rewrite with whitespace prefix and suffix.')); $this->assertSummaryProseParts( "a\nb\nc\nd\ne\nf\ng\nh\n", "a\nb\nc\nd\nX\nf\ng\nh\n", array( '.', "= d\n", '- e', '+ X', "= \nf", '.', ), pht('Summary diff with middle change.')); $this->assertSummaryProseParts( "a\nb\nc\nd\ne\nf\ng\nh\n", "X\nb\nc\nd\ne\nf\ng\nh\n", array( '- a', '+ X', "= \nb", '.', ), pht('Summary diff with head change.')); $this->assertSummaryProseParts( "a\nb\nc\nd\ne\nf\ng\nh\n", "a\nb\nc\nd\ne\nf\ng\nX\n", array( '.', "= g\n", '- h', '+ X', "= \n", ), pht('Summary diff with last change.')); $this->assertProseParts( 'aaa aaa aaa aaa, bbb bbb bbb bbb.', "aaa aaa aaa aaa, bbb bbb bbb bbb.\n\n- ccc ccc ccc", array( '= aaa aaa aaa aaa, bbb bbb bbb bbb.', "+ \n\n- ccc ccc ccc", ), pht('Diff with new trailing content.')); $this->assertProseParts( 'aaa aaa aaa aaa, bbb bbb bbb bbb.', 'aaa aaa aaa aaa bbb bbb bbb bbb.', array( '= aaa aaa aaa aaa', '- ,', '= bbb bbb bbb bbb.', ), pht('Diff with a removed comma.')); $this->assertProseParts( 'aaa aaa aaa aaa, bbb bbb bbb bbb.', "aaa aaa aaa aaa bbb bbb bbb bbb.\n\n- ccc ccc ccc!", array( '= aaa aaa aaa aaa', '- ,', '= bbb bbb bbb bbb.', "+ \n\n- ccc ccc ccc!", ), pht('Diff with a removed comma and new trailing content.')); + $this->assertProseParts( + '[ ] Walnuts', + '[X] Walnuts', + array( + '= [', + '- ', + '+ X', + '= ] Walnuts', + ), + pht('Diff adding a tickmark to a checkbox list.')); + + $this->assertProseParts( + '[[ ./week49 ]]', + '[[ ./week50 ]]', + array( + '= [[ ./week', + '- 49', + '+ 50', + '= ]]', + ), + pht('Diff changing a remarkup wiki link target.')); } private function assertProseParts($old, $new, array $expect_parts, $label) { $engine = new PhutilProseDifferenceEngine(); $diff = $engine->getDiff($old, $new); $parts = $diff->getParts(); $this->assertParts($expect_parts, $parts, $label); } private function assertSummaryProseParts( $old, $new, array $expect_parts, $label) { $engine = new PhutilProseDifferenceEngine(); $diff = $engine->getDiff($old, $new); $parts = $diff->getSummaryParts(); $this->assertParts($expect_parts, $parts, $label); } private function assertParts( array $expect, array $actual_parts, $label) { $actual = array(); foreach ($actual_parts as $actual_part) { $type = $actual_part['type']; $text = $actual_part['text']; switch ($type) { case '.': $actual[] = $type; break; default: $actual[] = "{$type} {$text}"; break; } } $this->assertEqual($expect, $actual, $label); } }