diff --git a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php index 259bb671de..ce93bbe65a 100644 --- a/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php +++ b/src/applications/differential/__tests__/DifferentialParseRenderTestCase.php @@ -1,110 +1,114 @@ getTestDataDirectory(); foreach (Filesystem::listDirectory($dir, $show_hidden = false) as $file) { if (!preg_match('/\.diff$/', $file)) { continue; } $data = Filesystem::readFile($dir.$file); + // Strip trailing "~" characters from inputs so they may contain + // trailing whitespace. + $data = preg_replace('/~$/m', '', $data); + $opt_file = $dir.$file.'.options'; if (Filesystem::pathExists($opt_file)) { $options = Filesystem::readFile($opt_file); try { $options = phutil_json_decode($options); } catch (PhutilJSONParserException $ex) { throw new PhutilProxyException( pht('Invalid options file: %s.', $opt_file), $ex); } } else { $options = array(); } foreach (array('one', 'two') as $type) { $this->runParser($type, $data, $file, 'expect'); $this->runParser($type, $data, $file, 'unshielded'); } } } private function runParser($type, $data, $file, $extension) { $dir = $this->getTestDataDirectory(); $test_file = $dir.$file.'.'.$type.'.'.$extension; if (!Filesystem::pathExists($test_file)) { return; } $unshielded = false; switch ($extension) { case 'unshielded': $unshielded = true; break; } $parsers = $this->buildChangesetParsers($type, $data, $file); $actual = $this->renderParsers($parsers, $unshielded); $expect = Filesystem::readFile($test_file); $this->assertEqual($expect, $actual, basename($test_file)); } private function renderParsers(array $parsers, $unshield) { $result = array(); foreach ($parsers as $parser) { if ($unshield) { $s_range = 0; $e_range = 0xFFFF; } else { $s_range = null; $e_range = null; } $result[] = $parser->render($s_range, $e_range, array()); } return implode(str_repeat('~', 80)."\n", $result); } private function buildChangesetParsers($type, $data, $file) { $parser = new ArcanistDiffParser(); $changes = $parser->parseDiff($data); $diff = DifferentialDiff::newFromRawChanges( PhabricatorUser::getOmnipotentUser(), $changes); $changesets = $diff->getChangesets(); $engine = new PhabricatorMarkupEngine(); $engine->setViewer(new PhabricatorUser()); $parsers = array(); foreach ($changesets as $changeset) { $cparser = new DifferentialChangesetParser(); $cparser->setUser(new PhabricatorUser()); $cparser->setDisableCache(true); $cparser->setChangeset($changeset); $cparser->setMarkupEngine($engine); if ($type == 'one') { $cparser->setRenderer(new DifferentialChangesetOneUpTestRenderer()); } else if ($type == 'two') { $cparser->setRenderer(new DifferentialChangesetTwoUpTestRenderer()); } else { throw new Exception(pht('Unknown renderer type "%s"!', $type)); } $parsers[] = $cparser; } return $parsers; } } diff --git a/src/applications/differential/__tests__/data/generated.diff b/src/applications/differential/__tests__/data/generated.diff index 7846c9a494..c130993cf7 100644 --- a/src/applications/differential/__tests__/data/generated.diff +++ b/src/applications/differential/__tests__/data/generated.diff @@ -1,10 +1,10 @@ diff --git a/GENERATED b/GENERATED index 5dcff7f..eff82ef 100644 --- a/GENERATED +++ b/GENERATED @@ -1,4 +1,4 @@ @generated - + ~ -This is a generated file. +This is a generated file, full of generated code. diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php index caa7463672..f0f952328a 100644 --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -1,1369 +1,1419 @@ rangeStart = $start; $this->rangeEnd = $end; return $this; } public function setMask(array $mask) { $this->mask = $mask; return $this; } public function renderChangeset() { return $this->render($this->rangeStart, $this->rangeEnd, $this->mask); } public function setShowEditAndReplyLinks($bool) { $this->showEditAndReplyLinks = $bool; return $this; } public function getShowEditAndReplyLinks() { return $this->showEditAndReplyLinks; } public function setHighlightAs($highlight_as) { $this->highlightAs = $highlight_as; return $this; } public function getHighlightAs() { return $this->highlightAs; } public function setCharacterEncoding($character_encoding) { $this->characterEncoding = $character_encoding; return $this; } public function getCharacterEncoding() { return $this->characterEncoding; } public function setRenderer(DifferentialChangesetRenderer $renderer) { $this->renderer = $renderer; return $this; } public function getRenderer() { if (!$this->renderer) { return new DifferentialChangesetTwoUpRenderer(); } return $this->renderer; } public function setDisableCache($disable_cache) { $this->disableCache = $disable_cache; return $this; } public function getDisableCache() { return $this->disableCache; } public function setCanMarkDone($can_mark_done) { $this->canMarkDone = $can_mark_done; return $this; } public function getCanMarkDone() { return $this->canMarkDone; } public function setObjectOwnerPHID($phid) { $this->objectOwnerPHID = $phid; return $this; } public function getObjectOwnerPHID() { return $this->objectOwnerPHID; } public function setOffsetMode($offset_mode) { $this->offsetMode = $offset_mode; return $this; } public function getOffsetMode() { return $this->offsetMode; } public static function getDefaultRendererForViewer(PhabricatorUser $viewer) { $is_unified = $viewer->compareUserSetting( PhabricatorUnifiedDiffsSetting::SETTINGKEY, PhabricatorUnifiedDiffsSetting::VALUE_ALWAYS_UNIFIED); if ($is_unified) { return '1up'; } return null; } public function readParametersFromRequest(AphrontRequest $request) { $this->setCharacterEncoding($request->getStr('encoding')); $this->setHighlightAs($request->getStr('highlight')); $renderer = null; // If the viewer prefers unified diffs, always set the renderer to unified. // Otherwise, we leave it unspecified and the client will choose a // renderer based on the screen size. if ($request->getStr('renderer')) { $renderer = $request->getStr('renderer'); } else { $renderer = self::getDefaultRendererForViewer($request->getViewer()); } switch ($renderer) { case '1up': $this->setRenderer(new DifferentialChangesetOneUpRenderer()); break; default: $this->setRenderer(new DifferentialChangesetTwoUpRenderer()); break; } return $this; } const CACHE_VERSION = 13; const CACHE_MAX_SIZE = 8e6; const ATTR_GENERATED = 'attr:generated'; const ATTR_DELETED = 'attr:deleted'; const ATTR_UNCHANGED = 'attr:unchanged'; const ATTR_MOVEAWAY = 'attr:moveaway'; public function setOldLines(array $lines) { $this->old = $lines; return $this; } public function setNewLines(array $lines) { $this->new = $lines; return $this; } public function setSpecialAttributes(array $attributes) { $this->specialAttributes = $attributes; return $this; } public function setIntraLineDiffs(array $diffs) { $this->intra = $diffs; return $this; } public function setDepthOnlyLines(array $lines) { $this->depthOnlyLines = $lines; return $this; } public function getDepthOnlyLines() { return $this->depthOnlyLines; } public function setVisibileLinesMask(array $mask) { $this->visible = $mask; return $this; } public function setLinesOfContext($lines_of_context) { $this->linesOfContext = $lines_of_context; return $this; } public function getLinesOfContext() { return $this->linesOfContext; } /** * Configure which Changeset comments added to the right side of the visible * diff will be attached to. The ID must be the ID of a real Differential * Changeset. * * The complexity here is that we may show an arbitrary side of an arbitrary * changeset as either the left or right part of a diff. This method allows * the left and right halves of the displayed diff to be correctly mapped to * storage changesets. * * @param id The Differential Changeset ID that comments added to the right * side of the visible diff should be attached to. * @param bool If true, attach new comments to the right side of the storage * changeset. Note that this may be false, if the left side of * some storage changeset is being shown as the right side of * a display diff. * @return this */ public function setRightSideCommentMapping($id, $is_new) { $this->rightSideChangesetID = $id; $this->rightSideAttachesToNewFile = $is_new; return $this; } /** * See setRightSideCommentMapping(), but this sets information for the left * side of the display diff. */ public function setLeftSideCommentMapping($id, $is_new) { $this->leftSideChangesetID = $id; $this->leftSideAttachesToNewFile = $is_new; return $this; } public function setOriginals( DifferentialChangeset $left, DifferentialChangeset $right) { $this->originalLeft = $left; $this->originalRight = $right; return $this; } public function diffOriginals() { $engine = new PhabricatorDifferenceEngine(); $changeset = $engine->generateChangesetFromFileContent( implode('', mpull($this->originalLeft->getHunks(), 'getChanges')), implode('', mpull($this->originalRight->getHunks(), 'getChanges'))); $parser = new DifferentialHunkParser(); return $parser->parseHunksForHighlightMasks( $changeset->getHunks(), $this->originalLeft->getHunks(), $this->originalRight->getHunks()); } /** * Set a key for identifying this changeset in the render cache. If set, the * parser will attempt to use the changeset render cache, which can improve * performance for frequently-viewed changesets. * * By default, there is no render cache key and parsers do not use the cache. * This is appropriate for rarely-viewed changesets. * * NOTE: Currently, this key must be a valid Differential Changeset ID. * * @param string Key for identifying this changeset in the render cache. * @return this */ public function setRenderCacheKey($key) { $this->renderCacheKey = $key; return $this; } private function getRenderCacheKey() { return $this->renderCacheKey; } public function setChangeset(DifferentialChangeset $changeset) { $this->changeset = $changeset; $this->setFilename($changeset->getFilename()); return $this; } public function setRenderingReference($ref) { $this->renderingReference = $ref; return $this; } private function getRenderingReference() { return $this->renderingReference; } public function getChangeset() { return $this->changeset; } public function setFilename($filename) { $this->filename = $filename; return $this; } public function setHandles(array $handles) { assert_instances_of($handles, 'PhabricatorObjectHandle'); $this->handles = $handles; return $this; } public function setMarkupEngine(PhabricatorMarkupEngine $engine) { $this->markupEngine = $engine; return $this; } public function setUser(PhabricatorUser $user) { $this->user = $user; return $this; } public function getUser() { return $this->user; } public function setCoverage($coverage) { $this->coverage = $coverage; return $this; } private function getCoverage() { return $this->coverage; } public function parseInlineComment( PhabricatorInlineCommentInterface $comment) { // Parse only comments which are actually visible. if ($this->isCommentVisibleOnRenderedDiff($comment)) { $this->comments[] = $comment; } return $this; } private function loadCache() { $render_cache_key = $this->getRenderCacheKey(); if (!$render_cache_key) { return false; } $data = null; $changeset = new DifferentialChangeset(); $conn_r = $changeset->establishConnection('r'); $data = queryfx_one( $conn_r, 'SELECT * FROM %T WHERE id = %d', $changeset->getTableName().'_parse_cache', $render_cache_key); if (!$data) { return false; } if ($data['cache'][0] == '{') { // This is likely an old-style JSON cache which we will not be able to // deserialize. return false; } $data = unserialize($data['cache']); if (!is_array($data) || !$data) { return false; } foreach (self::getCacheableProperties() as $cache_key) { if (!array_key_exists($cache_key, $data)) { // If we're missing a cache key, assume we're looking at an old cache // and ignore it. return false; } } if ($data['cacheVersion'] !== self::CACHE_VERSION) { return false; } // Someone displays contents of a partially cached shielded file. if (!isset($data['newRender']) && (!$this->isTopLevel || $this->comments)) { return false; } unset($data['cacheVersion'], $data['cacheHost']); $cache_prop = array_select_keys($data, self::getCacheableProperties()); foreach ($cache_prop as $cache_key => $v) { $this->$cache_key = $v; } return true; } protected static function getCacheableProperties() { return array( 'visible', 'new', 'old', 'intra', 'depthOnlyLines', 'newRender', 'oldRender', 'specialAttributes', 'hunkStartLines', 'cacheVersion', 'cacheHost', 'highlightingDisabled', ); } public function saveCache() { if (PhabricatorEnv::isReadOnly()) { return false; } if ($this->highlightErrors) { return false; } $render_cache_key = $this->getRenderCacheKey(); if (!$render_cache_key) { return false; } $cache = array(); foreach (self::getCacheableProperties() as $cache_key) { switch ($cache_key) { case 'cacheVersion': $cache[$cache_key] = self::CACHE_VERSION; break; case 'cacheHost': $cache[$cache_key] = php_uname('n'); break; default: $cache[$cache_key] = $this->$cache_key; break; } } $cache = serialize($cache); // We don't want to waste too much space by a single changeset. if (strlen($cache) > self::CACHE_MAX_SIZE) { return; } $changeset = new DifferentialChangeset(); $conn_w = $changeset->establishConnection('w'); $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); try { queryfx( $conn_w, 'INSERT INTO %T (id, cache, dateCreated) VALUES (%d, %B, %d) ON DUPLICATE KEY UPDATE cache = VALUES(cache)', DifferentialChangeset::TABLE_CACHE, $render_cache_key, $cache, time()); } catch (AphrontQueryException $ex) { // Ignore these exceptions. A common cause is that the cache is // larger than 'max_allowed_packet', in which case we're better off // not writing it. // TODO: It would be nice to tailor this more narrowly. } unset($unguarded); } private function markGenerated($new_corpus_block = '') { $generated_guess = (strpos($new_corpus_block, '@'.'generated') !== false); if (!$generated_guess) { $generated_path_regexps = PhabricatorEnv::getEnvConfig( 'differential.generated-paths'); foreach ($generated_path_regexps as $regexp) { if (preg_match($regexp, $this->changeset->getFilename())) { $generated_guess = true; break; } } } $event = new PhabricatorEvent( PhabricatorEventType::TYPE_DIFFERENTIAL_WILLMARKGENERATED, array( 'corpus' => $new_corpus_block, 'is_generated' => $generated_guess, ) ); PhutilEventEngine::dispatchEvent($event); $generated = $event->getValue('is_generated'); $attribute = $this->changeset->isGeneratedChangeset(); if ($attribute) { $generated = true; } $this->specialAttributes[self::ATTR_GENERATED] = $generated; } public function isGenerated() { return idx($this->specialAttributes, self::ATTR_GENERATED, false); } public function isDeleted() { return idx($this->specialAttributes, self::ATTR_DELETED, false); } public function isUnchanged() { return idx($this->specialAttributes, self::ATTR_UNCHANGED, false); } public function isMoveAway() { return idx($this->specialAttributes, self::ATTR_MOVEAWAY, false); } private function applyIntraline(&$render, $intra, $corpus) { foreach ($render as $key => $text) { if (isset($intra[$key])) { $render[$key] = ArcanistDiffUtils::applyIntralineDiff( $text, $intra[$key]); } } } private function getHighlightFuture($corpus) { $language = $this->highlightAs; if (!$language) { $language = $this->highlightEngine->getLanguageFromFilename( $this->filename); if (($language != 'txt') && (strlen($corpus) > self::HIGHLIGHT_BYTE_LIMIT)) { $this->highlightingDisabled = true; $language = 'txt'; } } return $this->highlightEngine->getHighlightFuture( $language, $corpus); } protected function processHighlightedSource($data, $result) { $result_lines = phutil_split_lines($result); foreach ($data as $key => $info) { if (!$info) { unset($result_lines[$key]); } } return $result_lines; } private function tryCacheStuff() { $skip_cache = false; if ($this->disableCache) { $skip_cache = true; } if ($this->characterEncoding) { $skip_cache = true; } if ($this->highlightAs) { $skip_cache = true; } $changeset = $this->changeset; if ($changeset->getFileType() != DifferentialChangeType::FILE_TEXT && $changeset->getFileType() != DifferentialChangeType::FILE_SYMLINK) { $this->markGenerated(); } else { if ($skip_cache || !$this->loadCache()) { $this->process(); if (!$skip_cache) { $this->saveCache(); } } } } private function process() { $changeset = $this->changeset; $hunk_parser = new DifferentialHunkParser(); $hunk_parser->parseHunksForLineData($changeset->getHunks()); + + $this->realignDiff($changeset, $hunk_parser); + $hunk_parser->reparseHunksForSpecialAttributes(); $unchanged = false; if (!$hunk_parser->getHasAnyChanges()) { $filetype = $this->changeset->getFileType(); if ($filetype == DifferentialChangeType::FILE_TEXT || $filetype == DifferentialChangeType::FILE_SYMLINK) { $unchanged = true; } } $moveaway = false; $changetype = $this->changeset->getChangeType(); if ($changetype == DifferentialChangeType::TYPE_MOVE_AWAY) { $moveaway = true; } $this->setSpecialAttributes(array( self::ATTR_UNCHANGED => $unchanged, self::ATTR_DELETED => $hunk_parser->getIsDeleted(), self::ATTR_MOVEAWAY => $moveaway, )); $lines_context = $this->getLinesOfContext(); $hunk_parser->generateIntraLineDiffs(); $hunk_parser->generateVisibileLinesMask($lines_context); $this->setOldLines($hunk_parser->getOldLines()); $this->setNewLines($hunk_parser->getNewLines()); $this->setIntraLineDiffs($hunk_parser->getIntraLineDiffs()); $this->setDepthOnlyLines($hunk_parser->getDepthOnlyLines()); $this->setVisibileLinesMask($hunk_parser->getVisibleLinesMask()); $this->hunkStartLines = $hunk_parser->getHunkStartLines( $changeset->getHunks()); $new_corpus = $hunk_parser->getNewCorpus(); $new_corpus_block = implode('', $new_corpus); $this->markGenerated($new_corpus_block); if ($this->isTopLevel && !$this->comments && ($this->isGenerated() || $this->isUnchanged() || $this->isDeleted())) { return; } $old_corpus = $hunk_parser->getOldCorpus(); $old_corpus_block = implode('', $old_corpus); $old_future = $this->getHighlightFuture($old_corpus_block); $new_future = $this->getHighlightFuture($new_corpus_block); $futures = array( 'old' => $old_future, 'new' => $new_future, ); $corpus_blocks = array( 'old' => $old_corpus_block, 'new' => $new_corpus_block, ); $this->highlightErrors = false; foreach (new FutureIterator($futures) as $key => $future) { try { try { $highlighted = $future->resolve(); } catch (PhutilSyntaxHighlighterException $ex) { $this->highlightErrors = true; $highlighted = id(new PhutilDefaultSyntaxHighlighter()) ->getHighlightFuture($corpus_blocks[$key]) ->resolve(); } switch ($key) { case 'old': $this->oldRender = $this->processHighlightedSource( $this->old, $highlighted); break; case 'new': $this->newRender = $this->processHighlightedSource( $this->new, $highlighted); break; } } catch (Exception $ex) { phlog($ex); throw $ex; } } $this->applyIntraline( $this->oldRender, ipull($this->intra, 0), $old_corpus); $this->applyIntraline( $this->newRender, ipull($this->intra, 1), $new_corpus); } private function shouldRenderPropertyChangeHeader($changeset) { if (!$this->isTopLevel) { // We render properties only at top level; otherwise we get multiple // copies of them when a user clicks "Show More". return false; } return true; } public function render( $range_start = null, $range_len = null, $mask_force = array()) { // "Top level" renders are initial requests for the whole file, versus // requests for a specific range generated by clicking "show more". We // generate property changes and "shield" UI elements only for toplevel // requests. $this->isTopLevel = (($range_start === null) && ($range_len === null)); $this->highlightEngine = PhabricatorSyntaxHighlighter::newEngine(); $encoding = null; if ($this->characterEncoding) { // We are forcing this changeset to be interpreted with a specific // character encoding, so force all the hunks into that encoding and // propagate it to the renderer. $encoding = $this->characterEncoding; foreach ($this->changeset->getHunks() as $hunk) { $hunk->forceEncoding($this->characterEncoding); } } else { // We're just using the default, so tell the renderer what that is // (by reading the encoding from the first hunk). foreach ($this->changeset->getHunks() as $hunk) { $encoding = $hunk->getDataEncoding(); break; } } $this->tryCacheStuff(); // If we're rendering in an offset mode, treat the range numbers as line // numbers instead of rendering offsets. $offset_mode = $this->getOffsetMode(); if ($offset_mode) { if ($offset_mode == 'new') { $offset_map = $this->new; } else { $offset_map = $this->old; } // NOTE: Inline comments use zero-based lengths. For example, a comment // that starts and ends on line 123 has length 0. Rendering considers // this range to have length 1. Probably both should agree, but that // ship likely sailed long ago. Tweak things here to get the two systems // to agree. See PHI985, where this affected mail rendering of inline // comments left on the final line of a file. $range_end = $this->getOffset($offset_map, $range_start + $range_len); $range_start = $this->getOffset($offset_map, $range_start); $range_len = ($range_end - $range_start) + 1; } $render_pch = $this->shouldRenderPropertyChangeHeader($this->changeset); $rows = max( count($this->old), count($this->new)); $renderer = $this->getRenderer() ->setUser($this->getUser()) ->setChangeset($this->changeset) ->setRenderPropertyChangeHeader($render_pch) ->setIsTopLevel($this->isTopLevel) ->setOldRender($this->oldRender) ->setNewRender($this->newRender) ->setHunkStartLines($this->hunkStartLines) ->setOldChangesetID($this->leftSideChangesetID) ->setNewChangesetID($this->rightSideChangesetID) ->setOldAttachesToNewFile($this->leftSideAttachesToNewFile) ->setNewAttachesToNewFile($this->rightSideAttachesToNewFile) ->setCodeCoverage($this->getCoverage()) ->setRenderingReference($this->getRenderingReference()) ->setMarkupEngine($this->markupEngine) ->setHandles($this->handles) ->setOldLines($this->old) ->setNewLines($this->new) ->setOriginalCharacterEncoding($encoding) ->setShowEditAndReplyLinks($this->getShowEditAndReplyLinks()) ->setCanMarkDone($this->getCanMarkDone()) ->setObjectOwnerPHID($this->getObjectOwnerPHID()) ->setHighlightingDisabled($this->highlightingDisabled) ->setDepthOnlyLines($this->getDepthOnlyLines()); $shield = null; if ($this->isTopLevel && !$this->comments) { if ($this->isGenerated()) { $shield = $renderer->renderShield( pht( 'This file contains generated code, which does not normally '. 'need to be reviewed.')); } else if ($this->isMoveAway()) { // We put an empty shield on these files. Normally, they do not have // any diff content anyway. However, if they come through `arc`, they // may have content. We don't want to show it (it's not useful) and // we bailed out of fully processing it earlier anyway. // We could show a message like "this file was moved", but we show // that as a change header anyway, so it would be redundant. Instead, // just render an empty shield to skip rendering the diff body. $shield = ''; } else if ($this->isUnchanged()) { $type = 'text'; if (!$rows) { // NOTE: Normally, diffs which don't change files do not include // file content (for example, if you "chmod +x" a file and then // run "git show", the file content is not available). Similarly, // if you move a file from A to B without changing it, diffs normally // do not show the file content. In some cases `arc` is able to // synthetically generate content for these diffs, but for raw diffs // we'll never have it so we need to be prepared to not render a link. $type = 'none'; } $type_add = DifferentialChangeType::TYPE_ADD; if ($this->changeset->getChangeType() == $type_add) { // Although the generic message is sort of accurate in a technical // sense, this more-tailored message is less confusing. $shield = $renderer->renderShield( pht('This is an empty file.'), $type); } else { $shield = $renderer->renderShield( pht('The contents of this file were not changed.'), $type); } } else if ($this->isDeleted()) { $shield = $renderer->renderShield( pht('This file was completely deleted.')); } else if ($this->changeset->getAffectedLineCount() > 2500) { $shield = $renderer->renderShield( pht( 'This file has a very large number of changes (%s lines).', new PhutilNumber($this->changeset->getAffectedLineCount()))); } } if ($shield !== null) { return $renderer->renderChangesetTable($shield); } // This request should render the "undershield" headers if it's a top-level // request which made it this far (indicating the changeset has no shield) // or it's a request with no mask information (indicating it's the request // that removes the rendering shield). Possibly, this second class of // request might need to be made more explicit. $is_undershield = (empty($mask_force) || $this->isTopLevel); $renderer->setIsUndershield($is_undershield); $old_comments = array(); $new_comments = array(); $old_mask = array(); $new_mask = array(); $feedback_mask = array(); $lines_context = $this->getLinesOfContext(); if ($this->comments) { // If there are any comments which appear in sections of the file which // we don't have, we're going to move them backwards to the closest // earlier line. Two cases where this may happen are: // // - Porting ghost comments forward into a file which was mostly // deleted. // - Porting ghost comments forward from a full-context diff to a // partial-context diff. list($old_backmap, $new_backmap) = $this->buildLineBackmaps(); foreach ($this->comments as $comment) { $new_side = $this->isCommentOnRightSideWhenDisplayed($comment); $line = $comment->getLineNumber(); if ($new_side) { $back_line = $new_backmap[$line]; } else { $back_line = $old_backmap[$line]; } if ($back_line != $line) { // TODO: This should probably be cleaner, but just be simple and // obvious for now. $ghost = $comment->getIsGhost(); if ($ghost) { $moved = pht( 'This comment originally appeared on line %s, but that line '. 'does not exist in this version of the diff. It has been '. 'moved backward to the nearest line.', new PhutilNumber($line)); $ghost['reason'] = $ghost['reason']."\n\n".$moved; $comment->setIsGhost($ghost); } $comment->setLineNumber($back_line); $comment->setLineLength(0); } $start = max($comment->getLineNumber() - $lines_context, 0); $end = $comment->getLineNumber() + $comment->getLineLength() + $lines_context; for ($ii = $start; $ii <= $end; $ii++) { if ($new_side) { $new_mask[$ii] = true; } else { $old_mask[$ii] = true; } } } foreach ($this->old as $ii => $old) { if (isset($old['line']) && isset($old_mask[$old['line']])) { $feedback_mask[$ii] = true; } } foreach ($this->new as $ii => $new) { if (isset($new['line']) && isset($new_mask[$new['line']])) { $feedback_mask[$ii] = true; } } $this->comments = id(new PHUIDiffInlineThreader()) ->reorderAndThreadCommments($this->comments); foreach ($this->comments as $comment) { $final = $comment->getLineNumber() + $comment->getLineLength(); $final = max(1, $final); if ($this->isCommentOnRightSideWhenDisplayed($comment)) { $new_comments[$final][] = $comment; } else { $old_comments[$final][] = $comment; } } } $renderer ->setOldComments($old_comments) ->setNewComments($new_comments); switch ($this->changeset->getFileType()) { case DifferentialChangeType::FILE_IMAGE: $old = null; $new = null; // TODO: Improve the architectural issue as discussed in D955 // https://secure.phabricator.com/D955 $reference = $this->getRenderingReference(); $parts = explode('/', $reference); if (count($parts) == 2) { list($id, $vs) = $parts; } else { $id = $parts[0]; $vs = 0; } $id = (int)$id; $vs = (int)$vs; if (!$vs) { $metadata = $this->changeset->getMetadata(); $data = idx($metadata, 'attachment-data'); $old_phid = idx($metadata, 'old:binary-phid'); $new_phid = idx($metadata, 'new:binary-phid'); } else { $vs_changeset = id(new DifferentialChangeset())->load($vs); $old_phid = null; $new_phid = null; // TODO: This is spooky, see D6851 if ($vs_changeset) { $vs_metadata = $vs_changeset->getMetadata(); $old_phid = idx($vs_metadata, 'new:binary-phid'); } $changeset = id(new DifferentialChangeset())->load($id); if ($changeset) { $metadata = $changeset->getMetadata(); $new_phid = idx($metadata, 'new:binary-phid'); } } if ($old_phid || $new_phid) { // grab the files, (micro) optimization for 1 query not 2 $file_phids = array(); if ($old_phid) { $file_phids[] = $old_phid; } if ($new_phid) { $file_phids[] = $new_phid; } $files = id(new PhabricatorFileQuery()) ->setViewer($this->getUser()) ->withPHIDs($file_phids) ->execute(); foreach ($files as $file) { if (empty($file)) { continue; } if ($file->getPHID() == $old_phid) { $old = $file; } else if ($file->getPHID() == $new_phid) { $new = $file; } } } $renderer->attachOldFile($old); $renderer->attachNewFile($new); return $renderer->renderFileChange($old, $new, $id, $vs); case DifferentialChangeType::FILE_DIRECTORY: case DifferentialChangeType::FILE_BINARY: $output = $renderer->renderChangesetTable(null); return $output; } if ($this->originalLeft && $this->originalRight) { list($highlight_old, $highlight_new) = $this->diffOriginals(); $highlight_old = array_flip($highlight_old); $highlight_new = array_flip($highlight_new); $renderer ->setHighlightOld($highlight_old) ->setHighlightNew($highlight_new); } $renderer ->setOriginalOld($this->originalLeft) ->setOriginalNew($this->originalRight); if ($range_start === null) { $range_start = 0; } if ($range_len === null) { $range_len = $rows; } $range_len = min($range_len, $rows - $range_start); list($gaps, $mask) = $this->calculateGapsAndMask( $mask_force, $feedback_mask, $range_start, $range_len); $renderer ->setGaps($gaps) ->setMask($mask); $html = $renderer->renderTextChange( $range_start, $range_len, $rows); return $renderer->renderChangesetTable($html); } /** * This function calculates a lot of stuff we need to know to display * the diff: * * Gaps - compute gaps in the visible display diff, where we will render * "Show more context" spacers. If a gap is smaller than the context size, * we just display it. Otherwise, we record it into $gaps and will render a * "show more context" element instead of diff text below. A given $gap * is a tuple of $gap_line_number_start and $gap_length. * * Mask - compute the actual lines that need to be shown (because they * are near changes lines, near inline comments, or the request has * explicitly asked for them, i.e. resulting from the user clicking * "show more"). The $mask returned is a sparsely populated dictionary * of $visible_line_number => true. * * @return array($gaps, $mask) */ private function calculateGapsAndMask( $mask_force, $feedback_mask, $range_start, $range_len) { $lines_context = $this->getLinesOfContext(); $gaps = array(); $gap_start = 0; $in_gap = false; $base_mask = $this->visible + $mask_force + $feedback_mask; $base_mask[$range_start + $range_len] = true; for ($ii = $range_start; $ii <= $range_start + $range_len; $ii++) { if (isset($base_mask[$ii])) { if ($in_gap) { $gap_length = $ii - $gap_start; if ($gap_length <= $lines_context) { for ($jj = $gap_start; $jj <= $gap_start + $gap_length; $jj++) { $base_mask[$jj] = true; } } else { $gaps[] = array($gap_start, $gap_length); } $in_gap = false; } } else { if (!$in_gap) { $gap_start = $ii; $in_gap = true; } } } $gaps = array_reverse($gaps); $mask = $base_mask; return array($gaps, $mask); } /** * Determine if an inline comment will appear on the rendered diff, * taking into consideration which halves of which changesets will actually * be shown. * * @param PhabricatorInlineCommentInterface Comment to test for visibility. * @return bool True if the comment is visible on the rendered diff. */ private function isCommentVisibleOnRenderedDiff( PhabricatorInlineCommentInterface $comment) { $changeset_id = $comment->getChangesetID(); $is_new = $comment->getIsNewFile(); if ($changeset_id == $this->rightSideChangesetID && $is_new == $this->rightSideAttachesToNewFile) { return true; } if ($changeset_id == $this->leftSideChangesetID && $is_new == $this->leftSideAttachesToNewFile) { return true; } return false; } /** * Determine if a comment will appear on the right side of the display diff. * Note that the comment must appear somewhere on the rendered changeset, as * per isCommentVisibleOnRenderedDiff(). * * @param PhabricatorInlineCommentInterface Comment to test for display * location. * @return bool True for right, false for left. */ private function isCommentOnRightSideWhenDisplayed( PhabricatorInlineCommentInterface $comment) { if (!$this->isCommentVisibleOnRenderedDiff($comment)) { throw new Exception(pht('Comment is not visible on changeset!')); } $changeset_id = $comment->getChangesetID(); $is_new = $comment->getIsNewFile(); if ($changeset_id == $this->rightSideChangesetID && $is_new == $this->rightSideAttachesToNewFile) { return true; } return false; } /** * Parse the 'range' specification that this class and the client-side JS * emit to indicate that a user clicked "Show more..." on a diff. Generally, * use is something like this: * * $spec = $request->getStr('range'); * $parsed = DifferentialChangesetParser::parseRangeSpecification($spec); * list($start, $end, $mask) = $parsed; * $parser->render($start, $end, $mask); * * @param string Range specification, indicating the range of the diff that * should be rendered. * @return tuple List of suitable for passing to * @{method:render}. */ public static function parseRangeSpecification($spec) { $range_s = null; $range_e = null; $mask = array(); if ($spec) { $match = null; if (preg_match('@^(\d+)-(\d+)(?:/(\d+)-(\d+))?$@', $spec, $match)) { $range_s = (int)$match[1]; $range_e = (int)$match[2]; if (count($match) > 3) { $start = (int)$match[3]; $len = (int)$match[4]; for ($ii = $start; $ii < $start + $len; $ii++) { $mask[$ii] = true; } } } } return array($range_s, $range_e, $mask); } /** * Render "modified coverage" information; test coverage on modified lines. * This synthesizes diff information with unit test information into a useful * indicator of how well tested a change is. */ public function renderModifiedCoverage() { $na = phutil_tag('em', array(), '-'); $coverage = $this->getCoverage(); if (!$coverage) { return $na; } $covered = 0; $not_covered = 0; foreach ($this->new as $k => $new) { if (!$new['line']) { continue; } if (!$new['type']) { continue; } if (empty($coverage[$new['line'] - 1])) { continue; } switch ($coverage[$new['line'] - 1]) { case 'C': $covered++; break; case 'U': $not_covered++; break; } } if (!$covered && !$not_covered) { return $na; } return sprintf('%d%%', 100 * ($covered / ($covered + $not_covered))); } /** * Build maps from lines comments appear on to actual lines. */ private function buildLineBackmaps() { $old_back = array(); $new_back = array(); foreach ($this->old as $ii => $old) { $old_back[$old['line']] = $old['line']; } foreach ($this->new as $ii => $new) { $new_back[$new['line']] = $new['line']; } $max_old_line = 0; $max_new_line = 0; foreach ($this->comments as $comment) { if ($this->isCommentOnRightSideWhenDisplayed($comment)) { $max_new_line = max($max_new_line, $comment->getLineNumber()); } else { $max_old_line = max($max_old_line, $comment->getLineNumber()); } } $cursor = 1; for ($ii = 1; $ii <= $max_old_line; $ii++) { if (empty($old_back[$ii])) { $old_back[$ii] = $cursor; } else { $cursor = $old_back[$ii]; } } $cursor = 1; for ($ii = 1; $ii <= $max_new_line; $ii++) { if (empty($new_back[$ii])) { $new_back[$ii] = $cursor; } else { $cursor = $new_back[$ii]; } } return array($old_back, $new_back); } private function getOffset(array $map, $line) { if (!$map) { return null; } $line = (int)$line; foreach ($map as $key => $spec) { if ($spec && isset($spec['line'])) { if ((int)$spec['line'] >= $line) { return $key; } } } return $key; } + private function realignDiff( + DifferentialChangeset $changeset, + DifferentialHunkParser $hunk_parser) { + // Normalizing and realigning the diff depends on rediffing the files, and + // we currently need complete representations of both files to do anything + // reasonable. If we only have parts of the files, skip realignment. + + // We have more than one hunk, so we're definitely missing part of the file. + $hunks = $changeset->getHunks(); + if (count($hunks) !== 1) { + return null; + } + + // The first hunk doesn't start at the beginning of the file, so we're + // missing some context. + $first_hunk = head($hunks); + if ($first_hunk->getOldOffset() != 1 || $first_hunk->getNewOffset() != 1) { + return null; + } + + $old_file = $changeset->makeOldFile(); + $new_file = $changeset->makeNewFile(); + if ($old_file === $new_file) { + // If the old and new files are exactly identical, the synthetic + // diff below will give us nonsense and whitespace modes are + // irrelevant anyway. This occurs when you, e.g., copy a file onto + // itself in Subversion (see T271). + return null; + } + + + $engine = id(new PhabricatorDifferenceEngine()) + ->setNormalize(true); + + $normalized_changeset = $engine->generateChangesetFromFileContent( + $old_file, + $new_file); + + $type_parser = new DifferentialHunkParser(); + $type_parser->parseHunksForLineData($normalized_changeset->getHunks()); + + $hunk_parser->setNormalized(true); + $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap()); + $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap()); + } + + } diff --git a/src/applications/differential/parser/DifferentialHunkParser.php b/src/applications/differential/parser/DifferentialHunkParser.php index d89089a7e7..d59358bc82 100644 --- a/src/applications/differential/parser/DifferentialHunkParser.php +++ b/src/applications/differential/parser/DifferentialHunkParser.php @@ -1,756 +1,819 @@ Map of lines where hunks start, other than line 1. */ public function getHunkStartLines(array $hunks) { assert_instances_of($hunks, 'DifferentialHunk'); $map = array(); foreach ($hunks as $hunk) { $line = $hunk->getOldOffset(); if ($line > 1) { $map[$line] = true; } } return $map; } private function setVisibleLinesMask($mask) { $this->visibleLinesMask = $mask; return $this; } public function getVisibleLinesMask() { if ($this->visibleLinesMask === null) { throw new PhutilInvalidStateException('generateVisibileLinesMask'); } return $this->visibleLinesMask; } private function setIntraLineDiffs($intra_line_diffs) { $this->intraLineDiffs = $intra_line_diffs; return $this; } public function getIntraLineDiffs() { if ($this->intraLineDiffs === null) { throw new PhutilInvalidStateException('generateIntraLineDiffs'); } return $this->intraLineDiffs; } private function setNewLines($new_lines) { $this->newLines = $new_lines; return $this; } public function getNewLines() { if ($this->newLines === null) { throw new PhutilInvalidStateException('parseHunksForLineData'); } return $this->newLines; } private function setOldLines($old_lines) { $this->oldLines = $old_lines; return $this; } public function getOldLines() { if ($this->oldLines === null) { throw new PhutilInvalidStateException('parseHunksForLineData'); } return $this->oldLines; } public function getOldLineTypeMap() { $map = array(); $old = $this->getOldLines(); foreach ($old as $o) { if (!$o) { continue; } $map[$o['line']] = $o['type']; } return $map; } public function setOldLineTypeMap(array $map) { $lines = $this->getOldLines(); foreach ($lines as $key => $data) { $lines[$key]['type'] = idx($map, $data['line']); } $this->oldLines = $lines; return $this; } public function getNewLineTypeMap() { $map = array(); $new = $this->getNewLines(); foreach ($new as $n) { if (!$n) { continue; } $map[$n['line']] = $n['type']; } return $map; } public function setNewLineTypeMap(array $map) { $lines = $this->getNewLines(); foreach ($lines as $key => $data) { $lines[$key]['type'] = idx($map, $data['line']); } $this->newLines = $lines; return $this; } public function setDepthOnlyLines(array $map) { $this->depthOnlyLines = $map; return $this; } public function getDepthOnlyLines() { return $this->depthOnlyLines; } + public function setNormalized($normalized) { + $this->normalized = $normalized; + return $this; + } + + public function getNormalized() { + return $this->normalized; + } + public function getIsDeleted() { foreach ($this->getNewLines() as $line) { if ($line) { // At least one new line, so the entire file wasn't deleted. return false; } } foreach ($this->getOldLines() as $line) { if ($line) { // No new lines, at least one old line; the entire file was deleted. return true; } } // This is an empty file. return false; } /** * Returns true if the hunks change anything, including whitespace. */ public function getHasAnyChanges() { return $this->getHasChanges('any'); } private function getHasChanges($filter) { if ($filter !== 'any' && $filter !== 'text') { throw new Exception(pht("Unknown change filter '%s'.", $filter)); } $old = $this->getOldLines(); $new = $this->getNewLines(); $is_any = ($filter === 'any'); foreach ($old as $key => $o) { $n = $new[$key]; if ($o === null || $n === null) { // One side is missing, and it's impossible for both sides to be null, // so the other side must have something, and thus the two sides are // different and the file has been changed under any type of filter. return true; } if ($o['type'] !== $n['type']) { return true; } if ($o['text'] !== $n['text']) { if ($is_any) { // The text is different, so there's a change. return true; } else if (trim($o['text']) !== trim($n['text'])) { return true; } } } // No changes anywhere in the file. return false; } /** * This function takes advantage of the parsing work done in * @{method:parseHunksForLineData} and continues the struggle to hammer this * data into something we can display to a user. * * In particular, this function re-parses the hunks to make them equivalent * in length for easy rendering, adding `null` as necessary to pad the * length. * * Anyhoo, this function is not particularly well-named but I try. * * NOTE: this function must be called after * @{method:parseHunksForLineData}. */ public function reparseHunksForSpecialAttributes() { $rebuild_old = array(); $rebuild_new = array(); $old_lines = array_reverse($this->getOldLines()); $new_lines = array_reverse($this->getNewLines()); while (count($old_lines) || count($new_lines)) { $old_line_data = array_pop($old_lines); $new_line_data = array_pop($new_lines); if ($old_line_data) { $o_type = $old_line_data['type']; } else { $o_type = null; } if ($new_line_data) { $n_type = $new_line_data['type']; } else { $n_type = null; } // This line does not exist in the new file. if (($o_type != null) && ($n_type == null)) { $rebuild_old[] = $old_line_data; $rebuild_new[] = null; if ($new_line_data) { array_push($new_lines, $new_line_data); } continue; } // This line does not exist in the old file. if (($n_type != null) && ($o_type == null)) { $rebuild_old[] = null; $rebuild_new[] = $new_line_data; if ($old_line_data) { array_push($old_lines, $old_line_data); } continue; } $rebuild_old[] = $old_line_data; $rebuild_new[] = $new_line_data; } $this->setOldLines($rebuild_old); $this->setNewLines($rebuild_new); + $this->updateChangeTypesForNormalization(); + return $this; } public function generateIntraLineDiffs() { $old = $this->getOldLines(); $new = $this->getNewLines(); $diffs = array(); $depth_only = array(); foreach ($old as $key => $o) { $n = $new[$key]; if (!$o || !$n) { continue; } if ($o['type'] != $n['type']) { $o_segments = array(); $n_segments = array(); $tab_width = 2; $o_text = $o['text']; $n_text = $n['text']; if ($o_text !== $n_text) { $o_depth = $this->getIndentDepth($o_text, $tab_width); $n_depth = $this->getIndentDepth($n_text, $tab_width); if ($o_depth < $n_depth) { $segment_type = '>'; $segment_width = $this->getCharacterCountForVisualWhitespace( $n_text, ($n_depth - $o_depth), $tab_width); if ($segment_width) { $n_text = substr($n_text, $segment_width); $n_segments[] = array( $segment_type, $segment_width, ); } } else if ($o_depth > $n_depth) { $segment_type = '<'; $segment_width = $this->getCharacterCountForVisualWhitespace( $o_text, ($o_depth - $n_depth), $tab_width); if ($segment_width) { $o_text = substr($o_text, $segment_width); $o_segments[] = array( $segment_type, $segment_width, ); } } // If there are no remaining changes to this line after we've marked // off the indent depth changes, this line was only modified by // changing the indent depth. Mark it for later so we can change how // it is displayed. if ($o_text === $n_text) { $depth_only[$key] = $segment_type; } } $intraline_segments = ArcanistDiffUtils::generateIntralineDiff( $o_text, $n_text); foreach ($intraline_segments[0] as $o_segment) { $o_segments[] = $o_segment; } foreach ($intraline_segments[1] as $n_segment) { $n_segments[] = $n_segment; } $diffs[$key] = array( $o_segments, $n_segments, ); } } $this->setIntraLineDiffs($diffs); $this->setDepthOnlyLines($depth_only); return $this; } public function generateVisibileLinesMask($lines_context) { $old = $this->getOldLines(); $new = $this->getNewLines(); $max_length = max(count($old), count($new)); $visible = false; $last = 0; $mask = array(); for ($cursor = -$lines_context; $cursor < $max_length; $cursor++) { $offset = $cursor + $lines_context; if ((isset($old[$offset]) && $old[$offset]['type']) || (isset($new[$offset]) && $new[$offset]['type'])) { $visible = true; $last = $offset; } else if ($cursor > $last + $lines_context) { $visible = false; } if ($visible && $cursor > 0) { $mask[$cursor] = 1; } } $this->setVisibleLinesMask($mask); return $this; } public function getOldCorpus() { return $this->getCorpus($this->getOldLines()); } public function getNewCorpus() { return $this->getCorpus($this->getNewLines()); } private function getCorpus(array $lines) { $corpus = array(); foreach ($lines as $l) { if ($l['type'] != '\\') { if ($l['text'] === null) { // There's no text on this side of the diff, but insert a placeholder // newline so the highlighted line numbers match up. $corpus[] = "\n"; } else { $corpus[] = $l['text']; } } } return $corpus; } public function parseHunksForLineData(array $hunks) { assert_instances_of($hunks, 'DifferentialHunk'); $old_lines = array(); $new_lines = array(); foreach ($hunks as $hunk) { $lines = $hunk->getSplitLines(); $line_type_map = array(); $line_text = array(); foreach ($lines as $line_index => $line) { if (isset($line[0])) { $char = $line[0]; switch ($char) { case ' ': $line_type_map[$line_index] = null; $line_text[$line_index] = substr($line, 1); break; case "\r": case "\n": // NOTE: Normally, the first character is a space, plus, minus or // backslash, but it may be a newline if it used to be a space and // trailing whitespace has been stripped via email transmission or // some similar mechanism. In these cases, we essentially pretend // the missing space is still there. $line_type_map[$line_index] = null; $line_text[$line_index] = $line; break; case '+': case '-': case '\\': $line_type_map[$line_index] = $char; $line_text[$line_index] = substr($line, 1); break; default: throw new Exception( pht( 'Unexpected leading character "%s" at line index %s!', $char, $line_index)); } } else { $line_type_map[$line_index] = null; $line_text[$line_index] = ''; } } $old_line = $hunk->getOldOffset(); $new_line = $hunk->getNewOffset(); $num_lines = count($lines); for ($cursor = 0; $cursor < $num_lines; $cursor++) { $type = $line_type_map[$cursor]; $data = array( 'type' => $type, 'text' => $line_text[$cursor], 'line' => $new_line, ); if ($type == '\\') { $type = $line_type_map[$cursor - 1]; $data['text'] = ltrim($data['text']); } switch ($type) { case '+': $new_lines[] = $data; ++$new_line; break; case '-': $data['line'] = $old_line; $old_lines[] = $data; ++$old_line; break; default: $new_lines[] = $data; $data['line'] = $old_line; $old_lines[] = $data; ++$new_line; ++$old_line; break; } } } $this->setOldLines($old_lines); $this->setNewLines($new_lines); return $this; } public function parseHunksForHighlightMasks( array $changeset_hunks, array $old_hunks, array $new_hunks) { assert_instances_of($changeset_hunks, 'DifferentialHunk'); assert_instances_of($old_hunks, 'DifferentialHunk'); assert_instances_of($new_hunks, 'DifferentialHunk'); // Put changes side by side. $olds = array(); $news = array(); $olds_cursor = -1; $news_cursor = -1; foreach ($changeset_hunks as $hunk) { $n_old = $hunk->getOldOffset(); $n_new = $hunk->getNewOffset(); $changes = $hunk->getSplitLines(); foreach ($changes as $line) { $diff_type = $line[0]; // Change type in diff of diffs. $orig_type = $line[1]; // Change type in the original diff. if ($diff_type == ' ') { // Use the same key for lines that are next to each other. if ($olds_cursor > $news_cursor) { $key = $olds_cursor + 1; } else { $key = $news_cursor + 1; } $olds[$key] = null; $news[$key] = null; $olds_cursor = $key; $news_cursor = $key; } else if ($diff_type == '-') { $olds[] = array($n_old, $orig_type); $olds_cursor++; } else if ($diff_type == '+') { $news[] = array($n_new, $orig_type); $news_cursor++; } if (($diff_type == '-' || $diff_type == ' ') && $orig_type != '-') { $n_old++; } if (($diff_type == '+' || $diff_type == ' ') && $orig_type != '-') { $n_new++; } } } $offsets_old = $this->computeOffsets($old_hunks); $offsets_new = $this->computeOffsets($new_hunks); // Highlight lines that were added on each side or removed on the other // side. $highlight_old = array(); $highlight_new = array(); $last = max(last_key($olds), last_key($news)); for ($i = 0; $i <= $last; $i++) { if (isset($olds[$i])) { list($n, $type) = $olds[$i]; if ($type == '+' || ($type == ' ' && isset($news[$i]) && $news[$i][1] != ' ')) { $highlight_old[] = $offsets_old[$n]; } } if (isset($news[$i])) { list($n, $type) = $news[$i]; if ($type == '+' || ($type == ' ' && isset($olds[$i]) && $olds[$i][1] != ' ')) { $highlight_new[] = $offsets_new[$n]; } } } return array($highlight_old, $highlight_new); } public function makeContextDiff( array $hunks, $is_new, $line_number, $line_length, $add_context) { assert_instances_of($hunks, 'DifferentialHunk'); $context = array(); if ($is_new) { $prefix = '+'; } else { $prefix = '-'; } foreach ($hunks as $hunk) { if ($is_new) { $offset = $hunk->getNewOffset(); $length = $hunk->getNewLen(); } else { $offset = $hunk->getOldOffset(); $length = $hunk->getOldLen(); } $start = $line_number - $offset; $end = $start + $line_length; // We need to go in if $start == $length, because the last line // might be a "\No newline at end of file" marker, which we want // to show if the additional context is > 0. if ($start <= $length && $end >= 0) { $start = $start - $add_context; $end = $end + $add_context; $hunk_content = array(); $hunk_pos = array('-' => 0, '+' => 0); $hunk_offset = array('-' => null, '+' => null); $hunk_last = array('-' => null, '+' => null); foreach (explode("\n", $hunk->getChanges()) as $line) { $in_common = strncmp($line, ' ', 1) === 0; $in_old = strncmp($line, '-', 1) === 0 || $in_common; $in_new = strncmp($line, '+', 1) === 0 || $in_common; $in_selected = strncmp($line, $prefix, 1) === 0; $skip = !$in_selected && !$in_common; if ($hunk_pos[$prefix] <= $end) { if ($start <= $hunk_pos[$prefix]) { if (!$skip || ($hunk_pos[$prefix] != $start && $hunk_pos[$prefix] != $end)) { if ($in_old) { if ($hunk_offset['-'] === null) { $hunk_offset['-'] = $hunk_pos['-']; } $hunk_last['-'] = $hunk_pos['-']; } if ($in_new) { if ($hunk_offset['+'] === null) { $hunk_offset['+'] = $hunk_pos['+']; } $hunk_last['+'] = $hunk_pos['+']; } $hunk_content[] = $line; } } if ($in_old) { ++$hunk_pos['-']; } if ($in_new) { ++$hunk_pos['+']; } } } if ($hunk_offset['-'] !== null || $hunk_offset['+'] !== null) { $header = '@@'; if ($hunk_offset['-'] !== null) { $header .= ' -'.($hunk->getOldOffset() + $hunk_offset['-']). ','.($hunk_last['-'] - $hunk_offset['-'] + 1); } if ($hunk_offset['+'] !== null) { $header .= ' +'.($hunk->getNewOffset() + $hunk_offset['+']). ','.($hunk_last['+'] - $hunk_offset['+'] + 1); } $header .= ' @@'; $context[] = $header; $context[] = implode("\n", $hunk_content); } } } return implode("\n", $context); } private function computeOffsets(array $hunks) { assert_instances_of($hunks, 'DifferentialHunk'); $offsets = array(); $n = 1; foreach ($hunks as $hunk) { $new_length = $hunk->getNewLen(); $new_offset = $hunk->getNewOffset(); for ($i = 0; $i < $new_length; $i++) { $offsets[$n] = $new_offset + $i; $n++; } } return $offsets; } private function getIndentDepth($text, $tab_width) { $len = strlen($text); $depth = 0; for ($ii = 0; $ii < $len; $ii++) { $c = $text[$ii]; // If this is a space, increase the indent depth by 1. if ($c == ' ') { $depth++; continue; } // If this is a tab, increase the indent depth to the next tabstop. // For example, if the tab width is 4, these sequences both lead us to // a visual width of 8, i.e. the cursor will be in the 8th column: // // // if ($c == "\t") { $depth = ($depth + $tab_width); $depth = $depth - ($depth % $tab_width); continue; } break; } return $depth; } private function getCharacterCountForVisualWhitespace( $text, $depth, $tab_width) { // Here, we know the visual indent depth of a line has been increased by // some amount (for example, 6 characters). // We want to find the largest whitespace prefix of the string we can // which still fits into that amount of visual space. // In most cases, this is very easy. For example, if the string has been // indented by two characters and the string begins with two spaces, that's // a perfect match. // However, if the string has been indented by 7 characters, the tab width // is 8, and the string begins with "", we can only // mark the two spaces as an indent change. These cases are unusual. $character_depth = 0; $visual_depth = 0; $len = strlen($text); for ($ii = 0; $ii < $len; $ii++) { if ($visual_depth >= $depth) { break; } $c = $text[$ii]; if ($c == ' ') { $character_depth++; $visual_depth++; continue; } if ($c == "\t") { // Figure out how many visual spaces we have until the next tabstop. $tab_visual = ($visual_depth + $tab_width); $tab_visual = $tab_visual - ($tab_visual % $tab_width); $tab_visual = ($tab_visual - $visual_depth); // If this tab would take us over the limit, we're all done. $remaining_depth = ($depth - $visual_depth); if ($remaining_depth < $tab_visual) { break; } $character_depth++; $visual_depth += $tab_visual; continue; } break; } return $character_depth; } + private function updateChangeTypesForNormalization() { + if (!$this->getNormalized()) { + return; + } + + // If we've parsed based on a normalized diff alignment, we may currently + // believe some lines are unchanged when they have actually changed. This + // happens when: + // + // - a line changes; + // - the change is a kind of change we normalize away when aligning the + // diff, like an indentation change; + // - we normalize the change away to align the diff; and so + // - the old and new copies of the line are now aligned in the new + // normalized diff. + // + // Then we end up with an alignment where the two lines that differ only + // in some some trivial way are aligned. This is great, and exactly what + // we're trying to accomplish by doing all this alignment stuff in the + // first place. + // + // However, in this case the correctly-aligned lines will be incorrectly + // marked as unchanged because the diff alorithm was fed normalized copies + // of the lines, and these copies truly weren't any different. + // + // When lines are aligned and marked identical, but they're not actually + // identcal, we now mark them as changed. The rest of the processing will + // figure out how to render them appropritely. + + $new = $this->getNewLines(); + $old = $this->getOldLines(); + foreach ($old as $key => $o) { + $n = $new[$key]; + + if (!$o || !$n) { + continue; + } + + if ($o['type'] === null && $n['type'] === null) { + if ($o['text'] !== $n['text']) { + $old[$key]['type'] = '-'; + $new[$key]['type'] = '+'; + } + } + } + + $this->setOldLines($old); + $this->setNewLines($new); + } + + } diff --git a/src/infrastructure/diff/PhabricatorDifferenceEngine.php b/src/infrastructure/diff/PhabricatorDifferenceEngine.php index 90f1d9b10e..84e88ceaaa 100644 --- a/src/infrastructure/diff/PhabricatorDifferenceEngine.php +++ b/src/infrastructure/diff/PhabricatorDifferenceEngine.php @@ -1,132 +1,172 @@ oldName = $old_name; return $this; } /** * Set the name to identify the new file with. Primarily cosmetic. * * @param string New file name. * @return this * @task config */ public function setNewName($new_name) { $this->newName = $new_name; return $this; } + public function setNormalize($normalize) { + $this->normalize = $normalize; + return $this; + } + + public function getNormalize() { + return $this->normalize; + } + + /* -( Generating Diffs )--------------------------------------------------- */ /** * Generate a raw diff from two raw files. This is a lower-level API than * @{method:generateChangesetFromFileContent}, but may be useful if you need * to use a custom parser configuration, as with Diffusion. * * @param string Entire previous file content. * @param string Entire current file content. * @return string Raw diff between the two files. * @task diff */ public function generateRawDiffFromFileContent($old, $new) { $options = array(); // Generate diffs with full context. $options[] = '-U65535'; $old_name = nonempty($this->oldName, '/dev/universe').' 9999-99-99'; $new_name = nonempty($this->newName, '/dev/universe').' 9999-99-99'; $options[] = '-L'; $options[] = $old_name; $options[] = '-L'; $options[] = $new_name; + $normalize = $this->getNormalize(); + if ($normalize) { + $old = $this->normalizeFile($old); + $new = $this->normalizeFile($new); + } + $old_tmp = new TempFile(); $new_tmp = new TempFile(); Filesystem::writeFile($old_tmp, $old); Filesystem::writeFile($new_tmp, $new); list($err, $diff) = exec_manual( 'diff %Ls %s %s', $options, $old_tmp, $new_tmp); if (!$err) { // This indicates that the two files are the same. Build a synthetic, // changeless diff so that we can still render the raw, unchanged file // instead of being forced to just say "this file didn't change" since we // don't have the content. $entire_file = explode("\n", $old); foreach ($entire_file as $k => $line) { $entire_file[$k] = ' '.$line; } $len = count($entire_file); $entire_file = implode("\n", $entire_file); // TODO: If both files were identical but missing newlines, we probably // get this wrong. Unclear if it ever matters. // This is a bit hacky but the diff parser can handle it. $diff = "--- {$old_name}\n". "+++ {$new_name}\n". "@@ -1,{$len} +1,{$len} @@\n". $entire_file."\n"; } return $diff; } /** * Generate an @{class:DifferentialChangeset} from two raw files. This is * principally useful because you can feed the output to * @{class:DifferentialChangesetParser} in order to render it. * * @param string Entire previous file content. * @param string Entire current file content. * @return @{class:DifferentialChangeset} Synthetic changeset. * @task diff */ public function generateChangesetFromFileContent($old, $new) { $diff = $this->generateRawDiffFromFileContent($old, $new); $changes = id(new ArcanistDiffParser())->parseDiff($diff); $diff = DifferentialDiff::newEphemeralFromRawChanges( $changes); return head($diff->getChangesets()); } + private function normalizeFile($corpus) { + // We can freely apply any other transformations we want to here: we have + // no constraints on what we need to preserve. If we normalize every line + // to "cat", the diff will still work, the alignment of the "-" / "+" + // lines will just be very hard to read. + + // In general, we'll make the diff better if we normalize two lines that + // humans think are the same. + + // We'll make the diff worse if we normalize two lines that humans think + // are different. + + + // Strip all whitespace present anywhere in the diff, since humans never + // consider whitespace changes to alter the line into a "different line" + // even when they're semantic (e.g., in a string constant). This covers + // indentation changes, trailing whitepspace, and formatting changes + // like "+/-". + $corpus = preg_replace('/[ \t]/', '', $corpus); + + return $corpus; + } + }