diff --git a/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php b/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php index 18afd3f359..0db158e171 100644 --- a/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php +++ b/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php @@ -1,128 +1,212 @@ setName('migrate-hunk') ->setExamples( "**migrate-hunk** --id __hunk__ --to __storage__\n". "**migrate-hunk** --all") ->setSynopsis(pht('Migrate storage engines for a hunk.')) ->setArguments( array( array( 'name' => 'id', 'param' => 'id', 'help' => pht('Hunk ID to migrate.'), ), array( 'name' => 'to', 'param' => 'storage', 'help' => pht('Storage engine to migrate to.'), ), array( 'name' => 'all', 'help' => pht('Migrate all hunks.'), ), + array( + 'name' => 'auto', + 'help' => pht('Select storage format automatically.'), + ), + array( + 'name' => 'dry-run', + 'help' => pht('Show planned writes but do not perform them.'), + ), )); } public function execute(PhutilArgumentParser $args) { + $is_dry_run = $args->getArg('dry-run'); + $id = $args->getArg('id'); $is_all = $args->getArg('all'); if ($is_all && $id) { throw new PhutilArgumentUsageException( pht( 'Options "--all" (to migrate all hunks) and "--id" (to migrate a '. 'specific hunk) are mutually exclusive.')); } else if (!$is_all && !$id) { throw new PhutilArgumentUsageException( pht( 'Specify a hunk to migrate with "--id", or migrate all hunks '. 'with "--all".')); } + $is_auto = $args->getArg('auto'); $storage = $args->getArg('to'); - switch ($storage) { - case DifferentialHunk::DATATYPE_TEXT: - case DifferentialHunk::DATATYPE_FILE: - break; - default: + if ($is_auto && $storage) { + throw new PhutilArgumentUsageException( + pht( + 'Options "--to" (to choose a specific storage format) and "--auto" '. + '(to select a storage format automatically) are mutually '. + 'exclusive.')); + } else if (!$is_auto && !$storage) { + throw new PhutilArgumentUsageException( + pht( + 'Use "--to" to choose a storage format, or "--auto" to select a '. + 'format automatically.')); + } + + $types = array( + DifferentialHunk::DATATYPE_TEXT, + DifferentialHunk::DATATYPE_FILE, + ); + $types = array_fuse($types); + if (strlen($storage)) { + if (!isset($types[$storage])) { throw new PhutilArgumentUsageException( - pht('Specify a hunk storage engine with --to.')); + pht( + 'Storage type "%s" is unknown. Supported types are: %s.', + $storage, + implode(', ', array_keys($types)))); + } } if ($id) { $hunk = $this->loadHunk($id); $hunks = array($hunk); } else { $hunks = new LiskMigrationIterator(new DifferentialHunk()); } foreach ($hunks as $hunk) { try { - $this->migrateHunk($hunk, $storage); + $this->migrateHunk($hunk, $storage, $is_auto, $is_dry_run); } catch (Exception $ex) { // If we're migrating a single hunk, just throw the exception. If // we're migrating multiple hunks, warn but continue. if ($id) { throw $ex; } $this->logWarn( pht('WARN'), pht( 'Failed to migrate hunk %d: %s', $hunk->getID(), $ex->getMessage())); } } return 0; } private function loadHunk($id) { $hunk = id(new DifferentialHunk())->load($id); if (!$hunk) { throw new PhutilArgumentUsageException( pht( 'No hunk exists with ID "%s".', $id)); } return $hunk; } - private function migrateHunk(DifferentialHunk $hunk, $format) { + private function migrateHunk( + DifferentialHunk $hunk, + $type, + $is_auto, + $is_dry_run) { + + $old_type = $hunk->getDataType(); + + if ($is_auto) { + // By default, we're just going to keep hunks in the same storage + // engine. In the future, we could perhaps select large hunks stored in + // text engine and move them into file storage. + $new_type = $old_type; + } else { + $new_type = $type; + } + + // Figure out if the storage format (e.g., plain text vs compressed) + // would change if we wrote this hunk anew today. + $old_format = $hunk->getDataFormat(); + $new_format = $hunk->getAutomaticDataFormat(); + + $same_type = ($old_type === $new_type); + $same_format = ($old_format === $new_format); + + // If we aren't going to change the storage engine and aren't going to + // change the storage format, just bail out. + if ($same_type && $same_format) { + $this->logInfo( + pht('SKIP'), + pht( + 'Hunk %d is already stored in the preferred engine ("%s") '. + 'with the preferred format ("%s").', + $hunk->getID(), + $new_type, + $new_format)); + return; + } + + if ($is_dry_run) { + $this->logOkay( + pht('DRY RUN'), + pht( + 'Hunk %d would be rewritten (storage: "%s" -> "%s"; '. + 'format: "%s" -> "%s").', + $hunk->getID(), + $old_type, + $new_type, + $old_format, + $new_format)); + return; + } + $old_data = $hunk->getChanges(); - switch ($format) { + switch ($new_type) { case DifferentialHunk::DATATYPE_TEXT: $hunk->saveAsText(); - $this->logOkay( - pht('TEXT'), - pht('Converted hunk to text storage.')); break; case DifferentialHunk::DATATYPE_FILE: $hunk->saveAsFile(); - $this->logOkay( - pht('FILE'), - pht('Converted hunk to file storage.')); break; } + $this->logOkay( + pht('MIGRATE'), + pht( + 'Converted hunk %d to "%s" storage (with format "%s").', + $hunk->getID(), + $new_type, + $hunk->getDataFormat())); + $hunk = $this->loadHunk($hunk->getID()); $new_data = $hunk->getChanges(); if ($old_data !== $new_data) { throw new Exception( pht( - 'Integrity check failed: new file data differs fom old data!')); + 'Integrity check failed: new file data differs from old data!')); } } } diff --git a/src/applications/differential/storage/DifferentialHunk.php b/src/applications/differential/storage/DifferentialHunk.php index 3defb3566b..6bfb38b789 100644 --- a/src/applications/differential/storage/DifferentialHunk.php +++ b/src/applications/differential/storage/DifferentialHunk.php @@ -1,479 +1,485 @@ array( 'data' => true, ), self::CONFIG_COLUMN_SCHEMA => array( 'dataType' => 'bytes4', 'dataEncoding' => 'text16?', 'dataFormat' => 'bytes4', 'oldOffset' => 'uint32', 'oldLen' => 'uint32', 'newOffset' => 'uint32', 'newLen' => 'uint32', ), self::CONFIG_KEY_SCHEMA => array( 'key_changeset' => array( 'columns' => array('changesetID'), ), 'key_created' => array( 'columns' => array('dateCreated'), ), ), ) + parent::getConfiguration(); } public function getAddedLines() { return $this->makeContent($include = '+'); } public function getRemovedLines() { return $this->makeContent($include = '-'); } public function makeNewFile() { return implode('', $this->makeContent($include = ' +')); } public function makeOldFile() { return implode('', $this->makeContent($include = ' -')); } public function makeChanges() { return implode('', $this->makeContent($include = '-+')); } public function getStructuredOldFile() { return $this->getStructuredFile('-'); } public function getStructuredNewFile() { return $this->getStructuredFile('+'); } private function getStructuredFile($kind) { if ($kind !== '+' && $kind !== '-') { throw new Exception( pht( 'Structured file kind should be "+" or "-", got "%s".', $kind)); } if (!isset($this->structuredFiles[$kind])) { if ($kind == '+') { $number = $this->newOffset; } else { $number = $this->oldOffset; } $lines = $this->getStructuredLines(); // NOTE: We keep the "\ No newline at end of file" line if it appears // after a line which is not excluded. For example, if we're constructing // the "+" side of the diff, we want to ignore this one since it's // relevant only to the "-" side of the diff: // // - x // \ No newline at end of file // + x // // ...but we want to keep this one: // // - x // + x // \ No newline at end of file $file = array(); $keep = true; foreach ($lines as $line) { switch ($line['type']) { case ' ': case $kind: $file[$number++] = $line; $keep = true; break; case '\\': if ($keep) { // Strip the actual newline off the line's text. $text = $file[$number - 1]['text']; $text = rtrim($text, "\r\n"); $file[$number - 1]['text'] = $text; $file[$number++] = $line; $keep = false; } break; default: $keep = false; break; } } $this->structuredFiles[$kind] = $file; } return $this->structuredFiles[$kind]; } public function getSplitLines() { if ($this->splitLines === null) { $this->splitLines = phutil_split_lines($this->getChanges()); } return $this->splitLines; } public function getStructuredLines() { if ($this->structuredLines === null) { $lines = $this->getSplitLines(); $structured = array(); foreach ($lines as $line) { if (empty($line[0])) { // TODO: Can we just get rid of this? continue; } $structured[] = array( 'type' => $line[0], 'text' => substr($line, 1), ); } $this->structuredLines = $structured; } return $this->structuredLines; } public function getContentWithMask($mask) { $include = array(); if (($mask & self::FLAG_LINES_ADDED)) { $include[] = '+'; } if (($mask & self::FLAG_LINES_REMOVED)) { $include[] = '-'; } if (($mask & self::FLAG_LINES_STABLE)) { $include[] = ' '; } $include = implode('', $include); return implode('', $this->makeContent($include)); } final private function makeContent($include) { $lines = $this->getSplitLines(); $results = array(); $include_map = array(); for ($ii = 0; $ii < strlen($include); $ii++) { $include_map[$include[$ii]] = true; } if (isset($include_map['+'])) { $n = $this->newOffset; } else { $n = $this->oldOffset; } $use_next_newline = false; foreach ($lines as $line) { if (!isset($line[0])) { continue; } if ($line[0] == '\\') { if ($use_next_newline) { $results[last_key($results)] = rtrim(end($results), "\n"); } } else if (empty($include_map[$line[0]])) { $use_next_newline = false; } else { $use_next_newline = true; $results[$n] = substr($line, 1); } if ($line[0] == ' ' || isset($include_map[$line[0]])) { $n++; } } return $results; } public function getChangeset() { return $this->assertAttached($this->changeset); } public function attachChangeset(DifferentialChangeset $changeset) { $this->changeset = $changeset; return $this; } /* -( Storage )------------------------------------------------------------ */ public function setChanges($text) { $this->rawData = $text; $this->dataEncoding = $this->detectEncodingForStorage($text); $this->dataType = self::DATATYPE_TEXT; list($format, $data) = $this->formatDataForStorage($text); $this->dataFormat = $format; $this->data = $data; return $this; } public function getChanges() { return $this->getUTF8StringFromStorage( $this->getRawData(), nonempty($this->forcedEncoding, $this->getDataEncoding())); } public function forceEncoding($encoding) { $this->forcedEncoding = $encoding; return $this; } private function formatDataForStorage($data) { $deflated = PhabricatorCaches::maybeDeflateData($data); if ($deflated !== null) { return array(self::DATAFORMAT_DEFLATED, $deflated); } return array(self::DATAFORMAT_RAW, $data); } + public function getAutomaticDataFormat() { + // If the hunk is already stored deflated, just keep it deflated. This is + // mostly a performance improvement for "bin/differential migrate-hunk" so + // that we don't have to recompress all the stored hunks when looking for + // stray uncompressed hunks. + if ($this->dataFormat === self::DATAFORMAT_DEFLATED) { + return self::DATAFORMAT_DEFLATED; + } + + list($format) = $this->formatDataForStorage($this->getRawData()); + + return $format; + } + public function saveAsText() { $old_type = $this->getDataType(); $old_data = $this->getData(); - if ($old_type == self::DATATYPE_TEXT) { - return $this; - } - $raw_data = $this->getRawData(); $this->setDataType(self::DATATYPE_TEXT); list($format, $data) = $this->formatDataForStorage($raw_data); $this->setDataFormat($format); $this->setData($data); $result = $this->save(); $this->destroyData($old_type, $old_data); return $result; } public function saveAsFile() { $old_type = $this->getDataType(); $old_data = $this->getData(); - if ($old_type == self::DATATYPE_FILE) { - return $this; - } - $raw_data = $this->getRawData(); list($format, $data) = $this->formatDataForStorage($raw_data); $this->setDataFormat($format); $file = PhabricatorFile::newFromFileData( $data, array( 'name' => 'differential-hunk', 'mime-type' => 'application/octet-stream', 'viewPolicy' => PhabricatorPolicies::POLICY_NOONE, )); $this->setDataType(self::DATATYPE_FILE); $this->setData($file->getPHID()); // NOTE: Because hunks don't have a PHID and we just load hunk data with // the omnipotent viewer, we do not need to attach the file to anything. $result = $this->save(); $this->destroyData($old_type, $old_data); return $result; } private function getRawData() { if ($this->rawData === null) { $type = $this->getDataType(); $data = $this->getData(); switch ($type) { case self::DATATYPE_TEXT: // In this storage type, the changes are stored on the object. $data = $data; break; case self::DATATYPE_FILE: $data = $this->loadFileData(); break; default: throw new Exception( pht('Hunk has unsupported data type "%s"!', $type)); } $format = $this->getDataFormat(); switch ($format) { case self::DATAFORMAT_RAW: // In this format, the changes are stored as-is. $data = $data; break; case self::DATAFORMAT_DEFLATED: $data = PhabricatorCaches::inflateData($data); break; default: throw new Exception( pht('Hunk has unsupported data encoding "%s"!', $type)); } $this->rawData = $data; } return $this->rawData; } private function loadFileData() { if ($this->fileData === null) { $type = $this->getDataType(); if ($type !== self::DATATYPE_FILE) { throw new Exception( pht( 'Unable to load file data for hunk with wrong data type ("%s").', $type)); } $file_phid = $this->getData(); $file = $this->loadRawFile($file_phid); $data = $file->loadFileData(); $this->fileData = $data; } return $this->fileData; } private function loadRawFile($file_phid) { $viewer = PhabricatorUser::getOmnipotentUser(); $files = id(new PhabricatorFileQuery()) ->setViewer($viewer) ->withPHIDs(array($file_phid)) ->execute(); if (!$files) { throw new Exception( pht( 'Failed to load file ("%s") with hunk data.', $file_phid)); } $file = head($files); return $file; } private function destroyData( $type, $data, PhabricatorDestructionEngine $engine = null) { if (!$engine) { $engine = new PhabricatorDestructionEngine(); } switch ($type) { case self::DATATYPE_FILE: $file = $this->loadRawFile($data); $engine->destroyObject($file); break; } } /* -( PhabricatorPolicyInterface )----------------------------------------- */ public function getCapabilities() { return array( PhabricatorPolicyCapability::CAN_VIEW, ); } public function getPolicy($capability) { return $this->getChangeset()->getPolicy($capability); } public function hasAutomaticCapability($capability, PhabricatorUser $viewer) { return $this->getChangeset()->hasAutomaticCapability($capability, $viewer); } /* -( PhabricatorDestructibleInterface )----------------------------------- */ public function destroyObjectPermanently( PhabricatorDestructionEngine $engine) { $type = $this->getDataType(); $data = $this->getData(); $this->destroyData($type, $data, $engine); $this->delete(); } }