Page MenuHomePhabricator

D16055.id38631.diff
No OneTemporary

D16055.id38631.diff

diff --git a/scripts/utils/prosediff.php b/scripts/utils/prosediff.php
new file mode 100755
--- /dev/null
+++ b/scripts/utils/prosediff.php
@@ -0,0 +1,50 @@
+#!/usr/bin/env php
+<?php
+
+require_once dirname(__FILE__).'/../__init_script__.php';
+
+$args = new PhutilArgumentParser($argv);
+$args->setTagline(pht('show prose differences'));
+$args->setSynopsis(<<<EOHELP
+**prosediff.php** __old__ __new__ [__options__]
+ Diff two prose files.
+
+EOHELP
+);
+
+$args->parseStandardArguments();
+$args->parse(
+ array(
+ array(
+ 'name' => 'files',
+ 'wildcard' => true,
+ ),
+ ));
+
+$files = $args->getArg('files');
+if (count($files) !== 2) {
+ $args->printHelpAndExit();
+}
+$old_file = head($files);
+$new_file = last($files);
+
+$old_data = Filesystem::readFile($old_file);
+$new_data = Filesystem::readFile($new_file);
+
+$engine = new PhutilProseDifferenceEngine();
+
+$prose_diff = $engine->getDiff($old_data, $new_data);
+
+foreach ($prose_diff->getParts() as $part) {
+ switch ($part['type']) {
+ case '-':
+ echo tsprintf('<bg:red>%B</bg>', $part['text']);
+ break;
+ case '+':
+ echo tsprintf('<bg:green>%B</bg>', $part['text']);
+ break;
+ case '=':
+ echo tsprintf('%B', $part['text']);
+ break;
+ }
+}
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -298,6 +298,8 @@
'PhutilPortuguesePortugalLocale' => 'internationalization/locales/PhutilPortuguesePortugalLocale.php',
'PhutilPregsprintfTestCase' => 'xsprintf/__tests__/PhutilPregsprintfTestCase.php',
'PhutilProcessGroupDaemon' => 'daemon/torture/PhutilProcessGroupDaemon.php',
+ 'PhutilProseDiff' => 'utils/PhutilProseDiff.php',
+ 'PhutilProseDifferenceEngine' => 'utils/PhutilProseDifferenceEngine.php',
'PhutilProtocolChannel' => 'channel/PhutilProtocolChannel.php',
'PhutilProxyException' => 'error/PhutilProxyException.php',
'PhutilProxyIterator' => 'utils/PhutilProxyIterator.php',
@@ -859,6 +861,8 @@
'PhutilPortuguesePortugalLocale' => 'PhutilLocale',
'PhutilPregsprintfTestCase' => 'PhutilTestCase',
'PhutilProcessGroupDaemon' => 'PhutilTortureTestDaemon',
+ 'PhutilProseDiff' => 'Phobject',
+ 'PhutilProseDifferenceEngine' => 'Phobject',
'PhutilProtocolChannel' => 'PhutilChannelChannel',
'PhutilProxyException' => 'Exception',
'PhutilProxyIterator' => array(
diff --git a/src/utils/PhutilProseDiff.php b/src/utils/PhutilProseDiff.php
new file mode 100644
--- /dev/null
+++ b/src/utils/PhutilProseDiff.php
@@ -0,0 +1,89 @@
+<?php
+
+final class PhutilProseDiff extends Phobject {
+
+ private $parts = array();
+
+ public function addPart($type, $text) {
+ $this->parts[] = array(
+ 'type' => $type,
+ 'text' => $text,
+ );
+ return $this;
+ }
+
+ public function getParts() {
+ return $this->parts;
+ }
+
+ public function reorderParts() {
+ // Reorder sequences of removed and added sections to put all the "-"
+ // parts together first, then all the "+" parts together. This produces
+ // a more human-readable result than intermingling them.
+ $o_run = array();
+ $n_run = array();
+ $result = array();
+ foreach ($this->parts as $part) {
+ $type = $part['type'];
+ switch ($type) {
+ case '-':
+ $o_run[] = $part;
+ break;
+ case '+':
+ $n_run[] = $part;
+ break;
+ default:
+ foreach ($o_run as $o) {
+ $result[] = $o;
+ }
+ foreach ($n_run as $n) {
+ $result[] = $n;
+ }
+ $result[] = $part;
+ $o_run = array();
+ $n_run = array();
+ break;
+ }
+ }
+
+ foreach ($o_run as $o) {
+ $result[] = $o;
+ }
+ foreach ($n_run as $n) {
+ $result[] = $n;
+ }
+
+ // Now, combine consecuitive runs of the same type of change (like a
+ // series of "-" parts) into a single run.
+ $combined = array();
+
+ $last = null;
+ $last_text = null;
+ foreach ($result as $part) {
+ $type = $part['type'];
+
+ if ($last !== $type) {
+ $combined[] = array(
+ 'type' => $last,
+ 'text' => $last_text,
+ );
+ $last_text = null;
+ $last = $type;
+ }
+
+ $last_text .= $part['text'];
+ }
+
+ if ($last_text !== null) {
+ $combined[] = array(
+ 'type' => $last,
+ 'text' => $last_text,
+ );
+ }
+
+ $this->parts = $combined;
+
+ return $this;
+ }
+
+}
diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php
new file mode 100644
--- /dev/null
+++ b/src/utils/PhutilProseDifferenceEngine.php
@@ -0,0 +1,92 @@
+<?php
+
+final class PhutilProseDifferenceEngine extends Phobject {
+
+ public function getDiff($u, $v) {
+ $diff = id(new PhutilProseDiff());
+
+ $this->buildDiff($diff, $u, $v, 1);
+ $diff->reorderParts();
+
+ return $diff;
+ }
+
+ private function buildDiff(PhutilProseDiff $diff, $u, $v, $level) {
+ if ($level == 4) {
+ $diff->addPart('-', $u);
+ $diff->addPart('+', $v);
+ return;
+ }
+
+ $u_parts = $this->splitCorpus($u, $level);
+ $v_parts = $this->splitCorpus($v, $level);
+
+ $matrix = id(new PhutilEditDistanceMatrix())
+ ->setSequences($u_parts, $v_parts)
+ ->setComputeString(true);
+
+ $u_pos = 0;
+ $v_pos = 0;
+
+ $edits = $matrix->getEditString();
+ $edits_length = strlen($edits);
+
+ for ($ii = 0; $ii < $edits_length; $ii++) {
+ $c = $edits[$ii];
+ if ($c == 's') {
+ $diff->addPart('=', $u_parts[$u_pos]);
+ $u_pos++;
+ $v_pos++;
+ } else if ($c == 'd') {
+ $diff->addPart('-', $u_parts[$u_pos]);
+ $u_pos++;
+ } else if ($c == 'i') {
+ $diff->addPart('+', $v_parts[$v_pos]);
+ $v_pos++;
+ } else if ($c == 'x') {
+ $this->buildDiff($diff, $u_parts[$u_pos], $v_parts[$v_pos], $level + 1);
+ $u_pos++;
+ $v_pos++;
+ } else {
+ throw new Exception(
+ pht(
+ 'Unexpected character ("%s") in edit string.',
+ $c));
+ }
+ }
+ }
+
+ private function splitCorpus($corpus, $level) {
+ switch ($level) {
+ case 1:
+ // Level 1: Split into sentences.
+ $expr = '/([\n,!;?\.]+)/';
+ break;
+ case 2:
+ // Level 2: Split into words.
+ $expr = '/(\s+)/';
+ break;
+ case 3:
+ // Level 3: Split into characters.
+ return phutil_utf8v_combined($corpus);
+ }
+
+ $pieces = preg_split($expr, $corpus, -1, PREG_SPLIT_DELIM_CAPTURE);
+ return $this->stitchPieces($pieces);
+ }
+
+ private function stitchPieces(array $pieces) {
+ $results = array();
+ $count = count($pieces);
+ for ($ii = 0; $ii < $count; $ii += 2) {
+ $result = $pieces[$ii];
+ if ($ii + 1 < $count) {
+ $result .= $pieces[$ii + 1];
+ }
+ $results[] = $result;
+ }
+
+ return $results;
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Wed, May 15, 11:58 AM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6285144
Default Alt Text
D16055.id38631.diff (6 KB)

Event Timeline