diff --git a/src/applications/files/management/PhabricatorFilesManagementMigrateWorkflow.php b/src/applications/files/management/PhabricatorFilesManagementMigrateWorkflow.php index 909e45c31f..58d5155aed 100644 --- a/src/applications/files/management/PhabricatorFilesManagementMigrateWorkflow.php +++ b/src/applications/files/management/PhabricatorFilesManagementMigrateWorkflow.php @@ -1,247 +1,281 @@ setName('migrate') ->setSynopsis(pht('Migrate files between storage engines.')) ->setArguments( array( array( 'name' => 'engine', 'param' => 'storage_engine', 'help' => pht('Migrate to the named storage engine.'), ), array( 'name' => 'dry-run', 'help' => pht('Show what would be migrated.'), ), array( 'name' => 'min-size', 'param' => 'bytes', 'help' => pht( 'Do not migrate data for files which are smaller than a given '. 'filesize.'), ), array( 'name' => 'max-size', 'param' => 'bytes', 'help' => pht( 'Do not migrate data for files which are larger than a given '. 'filesize.'), ), array( 'name' => 'all', 'help' => pht('Migrate all files.'), ), array( 'name' => 'copy', 'help' => pht( 'Copy file data instead of moving it: after migrating, do not '. 'remove the old data even if it is no longer referenced.'), ), array( 'name' => 'names', 'wildcard' => true, ), + array( + 'name' => 'from-engine', + 'param' => 'engine', + 'help' => pht('Migrate files from the named storage engine.'), + ), + array( + 'name' => 'local-disk-source', + 'param' => 'path', + 'help' => pht( + 'When migrating from a local disk source, use the specified '. + 'path as the root directory.'), + ), )); } public function execute(PhutilArgumentParser $args) { + + // See T13306. This flag allows you to import files from a backup of + // local disk storage into some other engine. When the caller provides + // the flag, we override the local disk engine configuration and treat + // it as though it is configured to use the specified location. + + $local_disk_source = $args->getArg('local-disk-source'); + if (strlen($local_disk_source)) { + $path = Filesystem::resolvePath($local_disk_source); + try { + Filesystem::assertIsDirectory($path); + } catch (FilesystemException $ex) { + throw new PhutilArgumentUsageException( + pht( + 'The "--local-disk-source" argument must point to a valid, '. + 'readable directory on local disk.')); + } + + $env = PhabricatorEnv::beginScopedEnv(); + $env->overrideEnvConfig('storage.local-disk.path', $path); + } + $target_key = $args->getArg('engine'); if (!$target_key) { throw new PhutilArgumentUsageException( pht( 'Specify an engine to migrate to with `%s`. '. 'Use `%s` to get a list of engines.', '--engine', 'files engines')); } $target_engine = PhabricatorFile::buildEngine($target_key); $iterator = $this->buildIterator($args); if (!$iterator) { throw new PhutilArgumentUsageException( pht( 'Either specify a list of files to migrate, or use `%s` '. 'to migrate all files.', '--all')); } $is_dry_run = $args->getArg('dry-run'); $min_size = (int)$args->getArg('min-size'); $max_size = (int)$args->getArg('max-size'); $is_copy = $args->getArg('copy'); $failed = array(); $engines = PhabricatorFileStorageEngine::loadAllEngines(); $total_bytes = 0; $total_files = 0; foreach ($iterator as $file) { $monogram = $file->getMonogram(); // See T7148. When we export data for an instance, we copy all the data // for Files from S3 into the database dump so that the database dump is // a complete, standalone archive of all the data. In the general case, // installs may have a similar process using "--copy" to create a more // complete backup. // When doing this, we may run into temporary files which have been // deleted between the time we took the original dump and the current // timestamp. These files can't be copied since the data no longer // exists: the daemons on the live install already deleted it. // Simply avoid this whole mess by declining to migrate expired temporary // files. They're as good as dead anyway. $ttl = $file->getTTL(); if ($ttl) { if ($ttl < PhabricatorTime::getNow()) { echo tsprintf( "%s\n", pht( '%s: Skipping expired temporary file.', $monogram)); continue; } } $engine_key = $file->getStorageEngine(); $engine = idx($engines, $engine_key); if (!$engine) { echo tsprintf( "%s\n", pht( '%s: Uses unknown storage engine "%s".', $monogram, $engine_key)); $failed[] = $file; continue; } if ($engine->isChunkEngine()) { echo tsprintf( "%s\n", pht( '%s: Stored as chunks, no data to migrate directly.', $monogram)); continue; } if ($engine_key === $target_key) { echo tsprintf( "%s\n", pht( '%s: Already stored in engine "%s".', $monogram, $target_key)); continue; } $byte_size = $file->getByteSize(); if ($min_size && ($byte_size < $min_size)) { echo tsprintf( "%s\n", pht( '%s: File size (%s) is smaller than minimum size (%s).', $monogram, phutil_format_bytes($byte_size), phutil_format_bytes($min_size))); continue; } if ($max_size && ($byte_size > $max_size)) { echo tsprintf( "%s\n", pht( '%s: File size (%s) is larger than maximum size (%s).', $monogram, phutil_format_bytes($byte_size), phutil_format_bytes($max_size))); continue; } if ($is_dry_run) { echo tsprintf( "%s\n", pht( '%s: (%s) Would migrate from "%s" to "%s" (dry run)...', $monogram, phutil_format_bytes($byte_size), $engine_key, $target_key)); } else { echo tsprintf( "%s\n", pht( '%s: (%s) Migrating from "%s" to "%s"...', $monogram, phutil_format_bytes($byte_size), $engine_key, $target_key)); } try { if ($is_dry_run) { // Do nothing, this is a dry run. } else { $file->migrateToEngine($target_engine, $is_copy); } $total_files += 1; $total_bytes += $byte_size; echo tsprintf( "%s\n", pht('Done.')); } catch (Exception $ex) { echo tsprintf( "%s\n", pht('Failed! %s', (string)$ex)); $failed[] = $file; throw $ex; } } echo tsprintf( "%s\n", pht( 'Total Migrated Files: %s', new PhutilNumber($total_files))); echo tsprintf( "%s\n", pht( 'Total Migrated Bytes: %s', phutil_format_bytes($total_bytes))); if ($is_dry_run) { echo tsprintf( "%s\n", pht( 'This was a dry run, so no real migrations were performed.')); } if ($failed) { $monograms = mpull($failed, 'getMonogram'); echo tsprintf( "%s\n", pht('Failures: %s.', implode(', ', $monograms))); return 1; } return 0; } } diff --git a/src/applications/files/management/PhabricatorFilesManagementWorkflow.php b/src/applications/files/management/PhabricatorFilesManagementWorkflow.php index e94fa1d96a..44d43dc66a 100644 --- a/src/applications/files/management/PhabricatorFilesManagementWorkflow.php +++ b/src/applications/files/management/PhabricatorFilesManagementWorkflow.php @@ -1,47 +1,72 @@ getViewer(); $names = $args->getArg('names'); - if ($args->getArg('all')) { - if ($names) { - throw new PhutilArgumentUsageException( - pht( - 'Specify either a list of files or `%s`, but not both.', - '--all')); - } - return new LiskMigrationIterator(new PhabricatorFile()); + $is_all = $args->getArg('all'); + $from_engine = $args->getArg('from-engine'); + + $any_constraint = ($from_engine || $names); + + if (!$is_all && !$any_constraint) { + throw new PhutilArgumentUsageException( + pht( + 'Use "--all" to migrate all files, or choose files to migrate '. + 'with "--names" or "--from-engine".')); + } + + if ($is_all && $any_constraint) { + throw new PhutilArgumentUsageException( + pht( + 'You can not migrate all files with "--all" and also migrate only '. + 'a subset of files with "--from-engine" or "--names".')); } + // If we're migrating specific named files, convert the names into IDs + // first. + $ids = null; if ($names) { - return $this->loadFilesWithNames($names); + $files = $this->loadFilesWithNames($names); + $ids = mpull($files, 'getID'); + } + + $query = id(new PhabricatorFileQuery()) + ->setViewer($viewer); + + if ($ids) { + $query->withIDs($ids); + } + + if ($from_engine) { + $query->withStorageEngines(array($from_engine)); } - return null; + return new PhabricatorQueryIterator($query); } protected function loadFilesWithNames(array $names) { $query = id(new PhabricatorObjectQuery()) ->setViewer($this->getViewer()) ->withNames($names) ->withTypes(array(PhabricatorFileFilePHIDType::TYPECONST)); $query->execute(); $files = $query->getNamedResults(); foreach ($names as $name) { if (empty($files[$name])) { throw new PhutilArgumentUsageException( pht( - "No file '%s' exists!", + 'No file "%s" exists.', $name)); } } return array_values($files); } } diff --git a/src/applications/files/query/PhabricatorFileQuery.php b/src/applications/files/query/PhabricatorFileQuery.php index 4205ab5c5d..c19574acaa 100644 --- a/src/applications/files/query/PhabricatorFileQuery.php +++ b/src/applications/files/query/PhabricatorFileQuery.php @@ -1,483 +1,496 @@ ids = $ids; return $this; } public function withPHIDs(array $phids) { $this->phids = $phids; return $this; } public function withAuthorPHIDs(array $phids) { $this->authorPHIDs = $phids; return $this; } public function withDateCreatedBefore($date_created_before) { $this->dateCreatedBefore = $date_created_before; return $this; } public function withDateCreatedAfter($date_created_after) { $this->dateCreatedAfter = $date_created_after; return $this; } public function withContentHashes(array $content_hashes) { $this->contentHashes = $content_hashes; return $this; } public function withBuiltinKeys(array $keys) { $this->builtinKeys = $keys; return $this; } public function withIsBuiltin($is_builtin) { $this->isBuiltin = $is_builtin; return $this; } /** * Select files which are transformations of some other file. For example, * you can use this query to find previously generated thumbnails of an image * file. * * As a parameter, provide a list of transformation specifications. Each * specification is a dictionary with the keys `originalPHID` and `transform`. * The `originalPHID` is the PHID of the original file (the file which was * transformed) and the `transform` is the name of the transform to query * for. If you pass `true` as the `transform`, all transformations of the * file will be selected. * * For example: * * array( * array( * 'originalPHID' => 'PHID-FILE-aaaa', * 'transform' => 'sepia', * ), * array( * 'originalPHID' => 'PHID-FILE-bbbb', * 'transform' => true, * ), * ) * * This selects the `"sepia"` transformation of the file with PHID * `PHID-FILE-aaaa` and all transformations of the file with PHID * `PHID-FILE-bbbb`. * * @param list List of transform specifications, described above. * @return this */ public function withTransforms(array $specs) { foreach ($specs as $spec) { if (!is_array($spec) || empty($spec['originalPHID']) || empty($spec['transform'])) { throw new Exception( pht( "Transform specification must be a dictionary with keys ". "'%s' and '%s'!", 'originalPHID', 'transform')); } } $this->transforms = $specs; return $this; } public function withLengthBetween($min, $max) { $this->minLength = $min; $this->maxLength = $max; return $this; } public function withNames(array $names) { $this->names = $names; return $this; } public function withIsPartial($partial) { $this->isPartial = $partial; return $this; } public function withIsDeleted($deleted) { $this->isDeleted = $deleted; return $this; } public function withNameNgrams($ngrams) { return $this->withNgramsConstraint( id(new PhabricatorFileNameNgrams()), $ngrams); } + public function withStorageEngines(array $engines) { + $this->storageEngines = $engines; + return $this; + } + public function showOnlyExplicitUploads($explicit_uploads) { $this->explicitUploads = $explicit_uploads; return $this; } public function needTransforms(array $transforms) { $this->needTransforms = $transforms; return $this; } public function newResultObject() { return new PhabricatorFile(); } protected function loadPage() { $files = $this->loadStandardPage($this->newResultObject()); if (!$files) { return $files; } // Figure out which files we need to load attached objects for. In most // cases, we need to load attached objects to perform policy checks for // files. // However, in some special cases where we know files will always be // visible, we skip this. See T8478 and T13106. $need_objects = array(); $need_xforms = array(); foreach ($files as $file) { $always_visible = false; if ($file->getIsProfileImage()) { $always_visible = true; } if ($file->isBuiltin()) { $always_visible = true; } if ($always_visible) { // We just treat these files as though they aren't attached to // anything. This saves a query in common cases when we're loading // profile images or builtins. We could be slightly more nuanced // about this and distinguish between "not attached to anything" and // "might be attached but policy checks don't need to care". $file->attachObjectPHIDs(array()); continue; } $need_objects[] = $file; $need_xforms[] = $file; } $viewer = $this->getViewer(); $is_omnipotent = $viewer->isOmnipotent(); // If we have any files left which do need objects, load the edges now. $object_phids = array(); if ($need_objects) { $edge_type = PhabricatorFileHasObjectEdgeType::EDGECONST; $file_phids = mpull($need_objects, 'getPHID'); $edges = id(new PhabricatorEdgeQuery()) ->withSourcePHIDs($file_phids) ->withEdgeTypes(array($edge_type)) ->execute(); foreach ($need_objects as $file) { $phids = array_keys($edges[$file->getPHID()][$edge_type]); $file->attachObjectPHIDs($phids); if ($is_omnipotent) { // If the viewer is omnipotent, we don't need to load the associated // objects either since the viewer can certainly see the object. // Skipping this can improve performance and prevent cycles. This // could possibly become part of the profile/builtin code above which // short circuits attacment policy checks in cases where we know them // to be unnecessary. continue; } foreach ($phids as $phid) { $object_phids[$phid] = true; } } } // If this file is a transform of another file, load that file too. If you // can see the original file, you can see the thumbnail. // TODO: It might be nice to put this directly on PhabricatorFile and // remove the PhabricatorTransformedFile table, which would be a little // simpler. if ($need_xforms) { $xforms = id(new PhabricatorTransformedFile())->loadAllWhere( 'transformedPHID IN (%Ls)', mpull($need_xforms, 'getPHID')); $xform_phids = mpull($xforms, 'getOriginalPHID', 'getTransformedPHID'); foreach ($xform_phids as $derived_phid => $original_phid) { $object_phids[$original_phid] = true; } } else { $xform_phids = array(); } $object_phids = array_keys($object_phids); // Now, load the objects. $objects = array(); if ($object_phids) { // NOTE: We're explicitly turning policy exceptions off, since the rule // here is "you can see the file if you can see ANY associated object". // Without this explicit flag, we'll incorrectly throw unless you can // see ALL associated objects. $objects = id(new PhabricatorObjectQuery()) ->setParentQuery($this) ->setViewer($this->getViewer()) ->withPHIDs($object_phids) ->setRaisePolicyExceptions(false) ->execute(); $objects = mpull($objects, null, 'getPHID'); } foreach ($files as $file) { $file_objects = array_select_keys($objects, $file->getObjectPHIDs()); $file->attachObjects($file_objects); } foreach ($files as $key => $file) { $original_phid = idx($xform_phids, $file->getPHID()); if ($original_phid == PhabricatorPHIDConstants::PHID_VOID) { // This is a special case for builtin files, which are handled // oddly. $original = null; } else if ($original_phid) { $original = idx($objects, $original_phid); if (!$original) { // If the viewer can't see the original file, also prevent them from // seeing the transformed file. $this->didRejectResult($file); unset($files[$key]); continue; } } else { $original = null; } $file->attachOriginalFile($original); } return $files; } protected function didFilterPage(array $files) { $xform_keys = $this->needTransforms; if ($xform_keys !== null) { $xforms = id(new PhabricatorTransformedFile())->loadAllWhere( 'originalPHID IN (%Ls) AND transform IN (%Ls)', mpull($files, 'getPHID'), $xform_keys); if ($xforms) { $xfiles = id(new PhabricatorFile())->loadAllWhere( 'phid IN (%Ls)', mpull($xforms, 'getTransformedPHID')); $xfiles = mpull($xfiles, null, 'getPHID'); } $xform_map = array(); foreach ($xforms as $xform) { $xfile = idx($xfiles, $xform->getTransformedPHID()); if (!$xfile) { continue; } $original_phid = $xform->getOriginalPHID(); $xform_key = $xform->getTransform(); $xform_map[$original_phid][$xform_key] = $xfile; } $default_xforms = array_fill_keys($xform_keys, null); foreach ($files as $file) { $file_xforms = idx($xform_map, $file->getPHID(), array()); $file_xforms += $default_xforms; $file->attachTransforms($file_xforms); } } return $files; } protected function buildJoinClauseParts(AphrontDatabaseConnection $conn) { $joins = parent::buildJoinClauseParts($conn); if ($this->transforms) { $joins[] = qsprintf( $conn, 'JOIN %T t ON t.transformedPHID = f.phid', id(new PhabricatorTransformedFile())->getTableName()); } return $joins; } protected function buildWhereClauseParts(AphrontDatabaseConnection $conn) { $where = parent::buildWhereClauseParts($conn); if ($this->ids !== null) { $where[] = qsprintf( $conn, 'f.id IN (%Ld)', $this->ids); } if ($this->phids !== null) { $where[] = qsprintf( $conn, 'f.phid IN (%Ls)', $this->phids); } if ($this->authorPHIDs !== null) { $where[] = qsprintf( $conn, 'f.authorPHID IN (%Ls)', $this->authorPHIDs); } if ($this->explicitUploads !== null) { $where[] = qsprintf( $conn, 'f.isExplicitUpload = %d', (int)$this->explicitUploads); } if ($this->transforms !== null) { $clauses = array(); foreach ($this->transforms as $transform) { if ($transform['transform'] === true) { $clauses[] = qsprintf( $conn, '(t.originalPHID = %s)', $transform['originalPHID']); } else { $clauses[] = qsprintf( $conn, '(t.originalPHID = %s AND t.transform = %s)', $transform['originalPHID'], $transform['transform']); } } $where[] = qsprintf($conn, '%LO', $clauses); } if ($this->dateCreatedAfter !== null) { $where[] = qsprintf( $conn, 'f.dateCreated >= %d', $this->dateCreatedAfter); } if ($this->dateCreatedBefore !== null) { $where[] = qsprintf( $conn, 'f.dateCreated <= %d', $this->dateCreatedBefore); } if ($this->contentHashes !== null) { $where[] = qsprintf( $conn, 'f.contentHash IN (%Ls)', $this->contentHashes); } if ($this->minLength !== null) { $where[] = qsprintf( $conn, 'byteSize >= %d', $this->minLength); } if ($this->maxLength !== null) { $where[] = qsprintf( $conn, 'byteSize <= %d', $this->maxLength); } if ($this->names !== null) { $where[] = qsprintf( $conn, 'name in (%Ls)', $this->names); } if ($this->isPartial !== null) { $where[] = qsprintf( $conn, 'isPartial = %d', (int)$this->isPartial); } if ($this->isDeleted !== null) { $where[] = qsprintf( $conn, 'isDeleted = %d', (int)$this->isDeleted); } if ($this->builtinKeys !== null) { $where[] = qsprintf( $conn, 'builtinKey IN (%Ls)', $this->builtinKeys); } if ($this->isBuiltin !== null) { if ($this->isBuiltin) { $where[] = qsprintf( $conn, 'builtinKey IS NOT NULL'); } else { $where[] = qsprintf( $conn, 'builtinKey IS NULL'); } } + if ($this->storageEngines !== null) { + $where[] = qsprintf( + $conn, + 'storageEngine IN (%Ls)', + $this->storageEngines); + } + return $where; } protected function getPrimaryTableAlias() { return 'f'; } public function getQueryApplicationClass() { return 'PhabricatorFilesApplication'; } }