diff --git a/src/utils/PhutilEditDistanceMatrix.php b/src/utils/PhutilEditDistanceMatrix.php --- a/src/utils/PhutilEditDistanceMatrix.php +++ b/src/utils/PhutilEditDistanceMatrix.php @@ -54,7 +54,7 @@ private $alterCost = 0; private $maximumLength; private $computeString; - private $applySmoothing; + private $applySmoothing = self::SMOOTHING_NONE; private $x; private $y; @@ -64,6 +64,10 @@ private $distanceMatrix = null; private $typeMatrix = null; + const SMOOTHING_NONE = 'none'; + const SMOOTHING_INTERNAL = 'internal'; + const SMOOTHING_FULL = 'full'; + public function setMaximumLength($maximum_length) { $this->maximumLength = $maximum_length; return $this; @@ -288,12 +292,30 @@ $str = strrev($str); - // We pad the edit string before smoothing it, so ranges of similar - // characters at the beginning or end of the string can also be smoothed. - $str = $this->padEditString($str); + // For full smoothing, we pad the edit string before smoothing it, so + // ranges of similar characters at the beginning or end of the string can + // also be smoothed. + + // For internal smoothing, we only smooth ranges within the change itself. - if ($this->getApplySmoothing()) { - $str = $this->applySmoothing($str); + $smoothing = $this->getApplySmoothing(); + switch ($smoothing) { + case self::SMOOTHING_FULL: + $str = $this->padEditString($str); + $str = $this->applySmoothing($str, true); + break; + case self::SMOOTHING_INTERNAL: + $str = $this->applySmoothing($str, false); + $str = $this->padEditString($str); + break; + case self::SMOOTHING_NONE: + $str = $this->padEditString($str); + break; + default: + throw new Exception( + pht( + 'Unknown smoothing type "%s".', + $smoothing)); } return $str; @@ -508,20 +530,26 @@ } } - private function applySmoothing($str) { - $result = $str; + private function applySmoothing($str, $full) { + if ($full) { + $prefix = '(^|[xdi])'; + $suffix = '([xdi]|\z)'; + } else { + $prefix = '([xdi])'; + $suffix = '([xdi])'; + } // Smooth the string out, by replacing short runs of similar characters // with 'x' operations. This makes the result more readable to humans, // since there are fewer choppy runs of short added and removed substrings. do { - $original = $result; - $result = preg_replace('/(^|[xdi])(s{3})([xdi]|\z)/', '$1xxx$3', $result); - $result = preg_replace('/(^|[xdi])(s{2})([xdi]|\z)/', '$1xx$3', $result); - $result = preg_replace('/(^|[xdi])(s{1})([xdi]|\z)/', '$1x$3', $result); - } while ($result != $original); + $original = $str; + $str = preg_replace('/'.$prefix.'(s{3})'.$suffix.'/', '$1xxx$3', $str); + $str = preg_replace('/'.$prefix.'(s{2})'.$suffix.'/', '$1xx$3', $str); + $str = preg_replace('/'.$prefix.'(s{1})'.$suffix.'/', '$1x$3', $str); + } while ($str != $original); - return $result; + return $str; } } diff --git a/src/utils/PhutilProseDifferenceEngine.php b/src/utils/PhutilProseDifferenceEngine.php --- a/src/utils/PhutilProseDifferenceEngine.php +++ b/src/utils/PhutilProseDifferenceEngine.php @@ -28,7 +28,7 @@ // For word-level and character-level changes, smooth the output string // to reduce the choppiness of the diff. if ($level > 1) { - $matrix->setApplySmoothing(true); + $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); } $u_pos = 0;