diff --git a/resources/sql/autopatches/20210216.index.01.version.sql b/resources/sql/autopatches/20210216.index.01.version.sql new file mode 100644 --- /dev/null +++ b/resources/sql/autopatches/20210216.index.01.version.sql @@ -0,0 +1,2 @@ +ALTER TABLE {$NAMESPACE}_search.search_indexversion + ADD indexVersion BINARY(12) NOT NULL; diff --git a/resources/sql/autopatches/20210216.index.02.epoch.sql b/resources/sql/autopatches/20210216.index.02.epoch.sql new file mode 100644 --- /dev/null +++ b/resources/sql/autopatches/20210216.index.02.epoch.sql @@ -0,0 +1,2 @@ +ALTER TABLE {$NAMESPACE}_search.search_indexversion + ADD indexEpoch INT UNSIGNED NOT NULL; diff --git a/src/applications/search/index/PhabricatorIndexEngine.php b/src/applications/search/index/PhabricatorIndexEngine.php --- a/src/applications/search/index/PhabricatorIndexEngine.php +++ b/src/applications/search/index/PhabricatorIndexEngine.php @@ -109,8 +109,10 @@ $rows = queryfx_all( $conn_r, - 'SELECT * FROM %T WHERE objectPHID = %s AND extensionKey IN (%Ls)', - $table->getTableName(), + 'SELECT version, extensionKey + FROM %R + WHERE objectPHID = %s AND extensionKey IN (%Ls)', + $table, $object_phid, $extension_keys); @@ -128,22 +130,35 @@ $table = new PhabricatorSearchIndexVersion(); $conn_w = $table->establishConnection('w'); + $now = PhabricatorTime::getNow(); + + // See T13587. For now, this is just a marker to make it easy to reindex + // documents if some version of the indexing code is later discovered to + // be questionable. + $index_version = '2021-02-16-A'; + $sql = array(); foreach ($versions as $key => $version) { $sql[] = qsprintf( $conn_w, - '(%s, %s, %s)', + '(%s, %s, %s, %s, %d)', $object_phid, $key, - $version); + $version, + $index_version, + $now); } queryfx( $conn_w, - 'INSERT INTO %T (objectPHID, extensionKey, version) + 'INSERT INTO %R (objectPHID, extensionKey, version, + indexVersion, indexEpoch) VALUES %LQ - ON DUPLICATE KEY UPDATE version = VALUES(version)', - $table->getTableName(), + ON DUPLICATE KEY UPDATE + version = VALUES(version), + indexVersion = VALUES(indexVersion), + indexEpoch = VALUES(indexEpoch)', + $table, $sql); } diff --git a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php --- a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php +++ b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php @@ -8,9 +8,13 @@ ->setName('index') ->setSynopsis(pht('Build or rebuild search indexes.')) ->setExamples( - "**index** D123\n". - "**index** --type task\n". - "**index** --all") + implode( + "\n", + array( + '**index** D123', + '**index** --all', + '**index** [--type __task__] [--version __version__] ...', + ))) ->setArguments( array( array( @@ -20,6 +24,7 @@ array( 'name' => 'type', 'param' => 'type', + 'repeat' => true, 'help' => pht( 'Object types to reindex, like "task", "commit" or "revision".'), ), @@ -37,6 +42,28 @@ 'Force a complete rebuild of the entire index instead of an '. 'incremental update.'), ), + array( + 'name' => 'version', + 'param' => 'version', + 'repeat' => true, + 'help' => pht( + 'Reindex objects previously indexed with a particular '. + 'version of the indexer.'), + ), + array( + 'name' => 'min-index-date', + 'param' => 'date', + 'help' => pht( + 'Reindex objects previously indexed on or after a '. + 'given date.'), + ), + array( + 'name' => 'max-index-date', + 'param' => 'date', + 'help' => pht( + 'Reindex objects previously indexed on or before a '. + 'given date.'), + ), array( 'name' => 'objects', 'wildcard' => true, @@ -47,37 +74,46 @@ public function execute(PhutilArgumentParser $args) { $this->validateClusterSearchConfig(); - $console = PhutilConsole::getConsole(); - $is_all = $args->getArg('all'); - $is_type = $args->getArg('type'); $is_force = $args->getArg('force'); - $obj_names = $args->getArg('objects'); + $object_types = $args->getArg('type'); + $index_versions = $args->getArg('version'); - if ($obj_names && ($is_all || $is_type)) { - throw new PhutilArgumentUsageException( - pht( - "You can not name objects to index alongside the '%s' or '%s' flags.", - '--all', - '--type')); - } else if (!$obj_names && !($is_all || $is_type)) { + $min_epoch = $args->getArg('min-index-date'); + if ($min_epoch !== null) { + $min_epoch = $this->parseTimeArgument($min_epoch); + } + + $max_epoch = $args->getArg('max-index-date'); + if ($max_epoch !== null) { + $max_epoch = $this->parseTimeArgument($max_epoch); + } + + $object_names = $args->getArg('objects'); + + $any_constraints = + ($object_names) || + ($object_types) || + ($index_versions) || + ($min_epoch) || + ($max_epoch); + + if ($is_all && $any_constraints) { throw new PhutilArgumentUsageException( pht( - "Provide one of '%s', '%s' or a list of object names.", - '--all', - '--type')); + 'You can not use query constraint flags (like "--version", '. + '"--type", or a list of specific objects) with "--all".')); } - if ($obj_names) { - $phids = $this->loadPHIDsByNames($obj_names); - } else { - $phids = $this->loadPHIDsByTypes($is_type); + if (!$is_all && !$any_constraints) { + throw new PhutilArgumentUsageException( + pht( + 'Provide a list of objects to index (like "D123"), or a set of '. + 'query constraint flags (like "--type"), or "--all" to index '. + 'all objects.')); } - if (!$phids) { - throw new PhutilArgumentUsageException(pht('Nothing to index!')); - } if ($args->getArg('background')) { $is_background = true; @@ -87,21 +123,80 @@ } if (!$is_background) { - echo tsprintf( - "** %s ** %s\n", + $this->logInfo( pht('NOTE'), pht( - 'Run this workflow with "%s" to queue tasks for the daemon workers.', - '--background')); + 'Run this workflow with "--background" to queue tasks for the '. + 'daemon workers.')); + } + + $this->logInfo( + pht('SELECT'), + pht('Selecting objects to index...')); + + $object_phids = null; + if ($object_names) { + $object_phids = $this->loadPHIDsByNames($object_names); + $object_phids = array_fuse($object_phids); } - $groups = phid_group_by_type($phids); - foreach ($groups as $group_type => $group) { - $console->writeOut( - "%s\n", - pht('Indexing %d object(s) of type %s.', count($group), $group_type)); + $type_phids = null; + if ($is_all || $object_types) { + $object_map = $this->getIndexableObjectsByTypes($object_types); + $type_phids = array(); + foreach ($object_map as $object) { + $iterator = new LiskMigrationIterator($object); + foreach ($iterator as $o) { + $type_phids[] = $o->getPHID(); + } + } + $type_phids = array_fuse($type_phids); + } + + $index_phids = null; + if ($index_versions || $min_epoch || $max_epoch) { + $index_phids = $this->loadPHIDsByIndexConstraints( + $index_versions, + $min_epoch, + $max_epoch); + $index_phids = array_fuse($index_phids); + } + + $working_set = null; + $filter_sets = array( + $object_phids, + $type_phids, + $index_phids, + ); + + foreach ($filter_sets as $filter_set) { + if ($filter_set === null) { + continue; + } + + if ($working_set === null) { + $working_set = $filter_set; + continue; + } + + $working_set = array_intersect_key($working_set, $filter_set); + } + + $phids = array_keys($working_set); + + if (!$phids) { + $this->logWarn( + pht('NO OBJECTS'), + pht('No objects selected to index.')); + return 0; } + $this->logInfo( + pht('INDEXING'), + pht( + 'Indexing %s object(s).', + phutil_count($phids))); + $bar = id(new PhutilConsoleProgressBar()) ->setTotal(count($phids)); @@ -166,8 +261,7 @@ if ($track_skips) { if ($count_updated) { - echo tsprintf( - "** %s ** %s\n", + $this->logOkay( pht('DONE'), pht( 'Updated search indexes for %s document(s).', @@ -175,29 +269,25 @@ } if ($count_skipped) { - echo tsprintf( - "** %s ** %s\n", + $this->logWarn( pht('SKIP'), pht( 'Skipped %s documents(s) which have not updated since they were '. 'last indexed.', new PhutilNumber($count_skipped))); - echo tsprintf( - "** %s ** %s\n", + $this->logInfo( pht('NOTE'), pht( 'Use "--force" to force the index to update these documents.')); } } else if ($is_background) { - echo tsprintf( - "** %s ** %s\n", + $this->logOkay( pht('DONE'), pht( 'Queued %s document(s) for background indexing.', new PhutilNumber(count($phids)))); } else { - echo tsprintf( - "** %s ** %s\n", + $this->logOkay( pht('DONE'), pht( 'Forced search index updates for %s document(s).', @@ -224,62 +314,100 @@ return mpull($objects, 'getPHID'); } - private function loadPHIDsByTypes($type) { + private function getIndexableObjectsByTypes(array $types) { $objects = id(new PhutilClassMapQuery()) ->setAncestorClass('PhabricatorIndexableInterface') ->execute(); - $normalized_type = phutil_utf8_strtolower($type); + $type_map = array(); + $normal_map = array(); + foreach ($types as $type) { + $normalized_type = phutil_utf8_strtolower($type); + $type_map[$type] = $normalized_type; + + if (isset($normal_map[$normalized_type])) { + $old_type = $normal_map[$normalized_type]; + throw new PhutilArgumentUsageException( + pht( + 'Type specification "%s" duplicates type specification "%s". '. + 'Specify each type only once.', + $type, + $old_type)); + } - $matches = array(); + $normal_map[$normalized_type] = $type; + } + + $object_matches = array(); + + $matches_map = array(); + $exact_map = array(); foreach ($objects as $object) { $object_class = get_class($object); - $normalized_class = phutil_utf8_strtolower($object_class); - if ($normalized_class === $normalized_type) { - $matches = array($object_class => $object); - break; + if (!$types) { + $object_matches[$object_class] = $object; + continue; } - if (!strlen($type) || - strpos($normalized_class, $normalized_type) !== false) { - $matches[$object_class] = $object; + $normalized_class = phutil_utf8_strtolower($object_class); + // If a specified type is exactly the name of this class, match it. + if (isset($normal_map[$normalized_class])) { + $object_matches[$object_class] = $object; + $matching_type = $normal_map[$normalized_class]; + $matches_map[$matching_type] = array($object_class); + $exact_map[$matching_type] = true; + continue; } - } - if (!$matches) { - $all_types = array(); - foreach ($objects as $object) { - $all_types[] = get_class($object); - } - sort($all_types); + foreach ($type_map as $type => $normalized_type) { + // If we already have an exact match for this type, don't match it + // as a substring. An indexable "MothObject" should be selectable + // exactly without also selecting "MammothObject". + if (isset($exact_map[$type])) { + continue; + } - throw new PhutilArgumentUsageException( - pht( - 'Type "%s" matches no indexable objects. Supported types are: %s.', - $type, - implode(', ', $all_types))); + // If the selector isn't a substring of the class name, continue. + if (strpos($normalized_class, $normalized_type) === false) { + continue; + } + + $matches_map[$type][] = $object_class; + $object_matches[$object_class] = $object; + } } - if ((count($matches) > 1) && strlen($type)) { - throw new PhutilArgumentUsageException( - pht( - 'Type "%s" matches multiple indexable objects. Use a more '. - 'specific string. Matching object types are: %s.', - $type, - implode(', ', array_keys($matches)))); + $all_types = array(); + foreach ($objects as $object) { + $all_types[] = get_class($object); } + sort($all_types); + $type_list = implode(', ', $all_types); - $phids = array(); - foreach ($matches as $match) { - $iterator = new LiskMigrationIterator($match); - foreach ($iterator as $object) { - $phids[] = $object->getPHID(); + foreach ($type_map as $type => $normalized_type) { + $matches = idx($matches_map, $type); + if (!$matches) { + throw new PhutilArgumentUsageException( + pht( + 'Type "%s" matches no indexable objects. '. + 'Supported types are: %s.', + $type, + $type_list)); + } + + if (count($matches) > 1) { + throw new PhutilArgumentUsageException( + pht( + 'Type "%s" matches multiple indexable objects. Use a more '. + 'specific string. Matching objects are: %s.', + $type, + implode(', ', $matches))); } } - return $phids; + return $object_matches; } private function loadIndexVersions($phid) { @@ -294,4 +422,43 @@ $phid); } + private function loadPHIDsByIndexConstraints( + array $index_versions, + $min_date, + $max_date) { + + $table = new PhabricatorSearchIndexVersion(); + $conn = $table->establishConnection('r'); + + $where = array(); + if ($index_versions) { + $where[] = qsprintf( + $conn, + 'indexVersion IN (%Ls)', + $index_versions); + } + + if ($min_date !== null) { + $where[] = qsprintf( + $conn, + 'indexEpoch >= %d', + $min_date); + } + + if ($max_date !== null) { + $where[] = qsprintf( + $conn, + 'indexEpoch <= %d', + $max_date); + } + + $rows = queryfx_all( + $conn, + 'SELECT DISTINCT objectPHID FROM %R WHERE %LA', + $table, + $where); + + return ipull($rows, 'objectPHID'); + } + } diff --git a/src/applications/search/storage/PhabricatorSearchIndexVersion.php b/src/applications/search/storage/PhabricatorSearchIndexVersion.php --- a/src/applications/search/storage/PhabricatorSearchIndexVersion.php +++ b/src/applications/search/storage/PhabricatorSearchIndexVersion.php @@ -6,6 +6,8 @@ protected $objectPHID; protected $extensionKey; protected $version; + protected $indexVersion; + protected $indexEpoch; protected function getConfiguration() { return array( @@ -13,12 +15,18 @@ self::CONFIG_COLUMN_SCHEMA => array( 'extensionKey' => 'text64', 'version' => 'text128', + 'indexVersion' => 'bytes12', + 'indexEpoch' => 'epoch', ), self::CONFIG_KEY_SCHEMA => array( 'key_object' => array( 'columns' => array('objectPHID', 'extensionKey'), 'unique' => true, ), + + // NOTE: "bin/search index" may query this table by "indexVersion" or + // "indexEpoch", but this is rare and scanning the table seems fine. + ), ) + parent::getConfiguration(); }