Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15285761
D21560.id51326.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Referenced Files
None
Subscribers
None
D21560.id51326.diff
View Options
diff --git a/resources/sql/autopatches/20210216.index.01.version.sql b/resources/sql/autopatches/20210216.index.01.version.sql
new file mode 100644
--- /dev/null
+++ b/resources/sql/autopatches/20210216.index.01.version.sql
@@ -0,0 +1,2 @@
+ALTER TABLE {$NAMESPACE}_search.search_indexversion
+ ADD indexVersion BINARY(12) NOT NULL;
diff --git a/resources/sql/autopatches/20210216.index.02.epoch.sql b/resources/sql/autopatches/20210216.index.02.epoch.sql
new file mode 100644
--- /dev/null
+++ b/resources/sql/autopatches/20210216.index.02.epoch.sql
@@ -0,0 +1,2 @@
+ALTER TABLE {$NAMESPACE}_search.search_indexversion
+ ADD indexEpoch INT UNSIGNED NOT NULL;
diff --git a/src/applications/search/index/PhabricatorIndexEngine.php b/src/applications/search/index/PhabricatorIndexEngine.php
--- a/src/applications/search/index/PhabricatorIndexEngine.php
+++ b/src/applications/search/index/PhabricatorIndexEngine.php
@@ -109,8 +109,10 @@
$rows = queryfx_all(
$conn_r,
- 'SELECT * FROM %T WHERE objectPHID = %s AND extensionKey IN (%Ls)',
- $table->getTableName(),
+ 'SELECT version, extensionKey
+ FROM %R
+ WHERE objectPHID = %s AND extensionKey IN (%Ls)',
+ $table,
$object_phid,
$extension_keys);
@@ -128,22 +130,35 @@
$table = new PhabricatorSearchIndexVersion();
$conn_w = $table->establishConnection('w');
+ $now = PhabricatorTime::getNow();
+
+ // See T13587. For now, this is just a marker to make it easy to reindex
+ // documents if some version of the indexing code is later discovered to
+ // be questionable.
+ $index_version = '2021-02-16-A';
+
$sql = array();
foreach ($versions as $key => $version) {
$sql[] = qsprintf(
$conn_w,
- '(%s, %s, %s)',
+ '(%s, %s, %s, %s, %d)',
$object_phid,
$key,
- $version);
+ $version,
+ $index_version,
+ $now);
}
queryfx(
$conn_w,
- 'INSERT INTO %T (objectPHID, extensionKey, version)
+ 'INSERT INTO %R (objectPHID, extensionKey, version,
+ indexVersion, indexEpoch)
VALUES %LQ
- ON DUPLICATE KEY UPDATE version = VALUES(version)',
- $table->getTableName(),
+ ON DUPLICATE KEY UPDATE
+ version = VALUES(version),
+ indexVersion = VALUES(indexVersion),
+ indexEpoch = VALUES(indexEpoch)',
+ $table,
$sql);
}
diff --git a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
--- a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
+++ b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
@@ -8,9 +8,13 @@
->setName('index')
->setSynopsis(pht('Build or rebuild search indexes.'))
->setExamples(
- "**index** D123\n".
- "**index** --type task\n".
- "**index** --all")
+ implode(
+ "\n",
+ array(
+ '**index** D123',
+ '**index** --all',
+ '**index** [--type __task__] [--version __version__] ...',
+ )))
->setArguments(
array(
array(
@@ -20,6 +24,7 @@
array(
'name' => 'type',
'param' => 'type',
+ 'repeat' => true,
'help' => pht(
'Object types to reindex, like "task", "commit" or "revision".'),
),
@@ -37,6 +42,28 @@
'Force a complete rebuild of the entire index instead of an '.
'incremental update.'),
),
+ array(
+ 'name' => 'version',
+ 'param' => 'version',
+ 'repeat' => true,
+ 'help' => pht(
+ 'Reindex objects previously indexed with a particular '.
+ 'version of the indexer.'),
+ ),
+ array(
+ 'name' => 'min-index-date',
+ 'param' => 'date',
+ 'help' => pht(
+ 'Reindex objects previously indexed on or after a '.
+ 'given date.'),
+ ),
+ array(
+ 'name' => 'max-index-date',
+ 'param' => 'date',
+ 'help' => pht(
+ 'Reindex objects previously indexed on or before a '.
+ 'given date.'),
+ ),
array(
'name' => 'objects',
'wildcard' => true,
@@ -47,37 +74,46 @@
public function execute(PhutilArgumentParser $args) {
$this->validateClusterSearchConfig();
- $console = PhutilConsole::getConsole();
-
$is_all = $args->getArg('all');
- $is_type = $args->getArg('type');
$is_force = $args->getArg('force');
- $obj_names = $args->getArg('objects');
+ $object_types = $args->getArg('type');
+ $index_versions = $args->getArg('version');
- if ($obj_names && ($is_all || $is_type)) {
- throw new PhutilArgumentUsageException(
- pht(
- "You can not name objects to index alongside the '%s' or '%s' flags.",
- '--all',
- '--type'));
- } else if (!$obj_names && !($is_all || $is_type)) {
+ $min_epoch = $args->getArg('min-index-date');
+ if ($min_epoch !== null) {
+ $min_epoch = $this->parseTimeArgument($min_epoch);
+ }
+
+ $max_epoch = $args->getArg('max-index-date');
+ if ($max_epoch !== null) {
+ $max_epoch = $this->parseTimeArgument($max_epoch);
+ }
+
+ $object_names = $args->getArg('objects');
+
+ $any_constraints =
+ ($object_names) ||
+ ($object_types) ||
+ ($index_versions) ||
+ ($min_epoch) ||
+ ($max_epoch);
+
+ if ($is_all && $any_constraints) {
throw new PhutilArgumentUsageException(
pht(
- "Provide one of '%s', '%s' or a list of object names.",
- '--all',
- '--type'));
+ 'You can not use query constraint flags (like "--version", '.
+ '"--type", or a list of specific objects) with "--all".'));
}
- if ($obj_names) {
- $phids = $this->loadPHIDsByNames($obj_names);
- } else {
- $phids = $this->loadPHIDsByTypes($is_type);
+ if (!$is_all && !$any_constraints) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Provide a list of objects to index (like "D123"), or a set of '.
+ 'query constraint flags (like "--type"), or "--all" to index '.
+ 'all objects.'));
}
- if (!$phids) {
- throw new PhutilArgumentUsageException(pht('Nothing to index!'));
- }
if ($args->getArg('background')) {
$is_background = true;
@@ -87,21 +123,80 @@
}
if (!$is_background) {
- echo tsprintf(
- "**<bg:blue> %s </bg>** %s\n",
+ $this->logInfo(
pht('NOTE'),
pht(
- 'Run this workflow with "%s" to queue tasks for the daemon workers.',
- '--background'));
+ 'Run this workflow with "--background" to queue tasks for the '.
+ 'daemon workers.'));
+ }
+
+ $this->logInfo(
+ pht('SELECT'),
+ pht('Selecting objects to index...'));
+
+ $object_phids = null;
+ if ($object_names) {
+ $object_phids = $this->loadPHIDsByNames($object_names);
+ $object_phids = array_fuse($object_phids);
}
- $groups = phid_group_by_type($phids);
- foreach ($groups as $group_type => $group) {
- $console->writeOut(
- "%s\n",
- pht('Indexing %d object(s) of type %s.', count($group), $group_type));
+ $type_phids = null;
+ if ($is_all || $object_types) {
+ $object_map = $this->getIndexableObjectsByTypes($object_types);
+ $type_phids = array();
+ foreach ($object_map as $object) {
+ $iterator = new LiskMigrationIterator($object);
+ foreach ($iterator as $o) {
+ $type_phids[] = $o->getPHID();
+ }
+ }
+ $type_phids = array_fuse($type_phids);
+ }
+
+ $index_phids = null;
+ if ($index_versions || $min_epoch || $max_epoch) {
+ $index_phids = $this->loadPHIDsByIndexConstraints(
+ $index_versions,
+ $min_epoch,
+ $max_epoch);
+ $index_phids = array_fuse($index_phids);
+ }
+
+ $working_set = null;
+ $filter_sets = array(
+ $object_phids,
+ $type_phids,
+ $index_phids,
+ );
+
+ foreach ($filter_sets as $filter_set) {
+ if ($filter_set === null) {
+ continue;
+ }
+
+ if ($working_set === null) {
+ $working_set = $filter_set;
+ continue;
+ }
+
+ $working_set = array_intersect_key($working_set, $filter_set);
+ }
+
+ $phids = array_keys($working_set);
+
+ if (!$phids) {
+ $this->logWarn(
+ pht('NO OBJECTS'),
+ pht('No objects selected to index.'));
+ return 0;
}
+ $this->logInfo(
+ pht('INDEXING'),
+ pht(
+ 'Indexing %s object(s).',
+ phutil_count($phids)));
+
$bar = id(new PhutilConsoleProgressBar())
->setTotal(count($phids));
@@ -166,8 +261,7 @@
if ($track_skips) {
if ($count_updated) {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Updated search indexes for %s document(s).',
@@ -175,29 +269,25 @@
}
if ($count_skipped) {
- echo tsprintf(
- "**<bg:yellow> %s </bg>** %s\n",
+ $this->logWarn(
pht('SKIP'),
pht(
'Skipped %s documents(s) which have not updated since they were '.
'last indexed.',
new PhutilNumber($count_skipped)));
- echo tsprintf(
- "**<bg:blue> %s </bg>** %s\n",
+ $this->logInfo(
pht('NOTE'),
pht(
'Use "--force" to force the index to update these documents.'));
}
} else if ($is_background) {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Queued %s document(s) for background indexing.',
new PhutilNumber(count($phids))));
} else {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Forced search index updates for %s document(s).',
@@ -224,62 +314,100 @@
return mpull($objects, 'getPHID');
}
- private function loadPHIDsByTypes($type) {
+ private function getIndexableObjectsByTypes(array $types) {
$objects = id(new PhutilClassMapQuery())
->setAncestorClass('PhabricatorIndexableInterface')
->execute();
- $normalized_type = phutil_utf8_strtolower($type);
+ $type_map = array();
+ $normal_map = array();
+ foreach ($types as $type) {
+ $normalized_type = phutil_utf8_strtolower($type);
+ $type_map[$type] = $normalized_type;
+
+ if (isset($normal_map[$normalized_type])) {
+ $old_type = $normal_map[$normalized_type];
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type specification "%s" duplicates type specification "%s". '.
+ 'Specify each type only once.',
+ $type,
+ $old_type));
+ }
- $matches = array();
+ $normal_map[$normalized_type] = $type;
+ }
+
+ $object_matches = array();
+
+ $matches_map = array();
+ $exact_map = array();
foreach ($objects as $object) {
$object_class = get_class($object);
- $normalized_class = phutil_utf8_strtolower($object_class);
- if ($normalized_class === $normalized_type) {
- $matches = array($object_class => $object);
- break;
+ if (!$types) {
+ $object_matches[$object_class] = $object;
+ continue;
}
- if (!strlen($type) ||
- strpos($normalized_class, $normalized_type) !== false) {
- $matches[$object_class] = $object;
+ $normalized_class = phutil_utf8_strtolower($object_class);
+ // If a specified type is exactly the name of this class, match it.
+ if (isset($normal_map[$normalized_class])) {
+ $object_matches[$object_class] = $object;
+ $matching_type = $normal_map[$normalized_class];
+ $matches_map[$matching_type] = array($object_class);
+ $exact_map[$matching_type] = true;
+ continue;
}
- }
- if (!$matches) {
- $all_types = array();
- foreach ($objects as $object) {
- $all_types[] = get_class($object);
- }
- sort($all_types);
+ foreach ($type_map as $type => $normalized_type) {
+ // If we already have an exact match for this type, don't match it
+ // as a substring. An indexable "MothObject" should be selectable
+ // exactly without also selecting "MammothObject".
+ if (isset($exact_map[$type])) {
+ continue;
+ }
- throw new PhutilArgumentUsageException(
- pht(
- 'Type "%s" matches no indexable objects. Supported types are: %s.',
- $type,
- implode(', ', $all_types)));
+ // If the selector isn't a substring of the class name, continue.
+ if (strpos($normalized_class, $normalized_type) === false) {
+ continue;
+ }
+
+ $matches_map[$type][] = $object_class;
+ $object_matches[$object_class] = $object;
+ }
}
- if ((count($matches) > 1) && strlen($type)) {
- throw new PhutilArgumentUsageException(
- pht(
- 'Type "%s" matches multiple indexable objects. Use a more '.
- 'specific string. Matching object types are: %s.',
- $type,
- implode(', ', array_keys($matches))));
+ $all_types = array();
+ foreach ($objects as $object) {
+ $all_types[] = get_class($object);
}
+ sort($all_types);
+ $type_list = implode(', ', $all_types);
- $phids = array();
- foreach ($matches as $match) {
- $iterator = new LiskMigrationIterator($match);
- foreach ($iterator as $object) {
- $phids[] = $object->getPHID();
+ foreach ($type_map as $type => $normalized_type) {
+ $matches = idx($matches_map, $type);
+ if (!$matches) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type "%s" matches no indexable objects. '.
+ 'Supported types are: %s.',
+ $type,
+ $type_list));
+ }
+
+ if (count($matches) > 1) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type "%s" matches multiple indexable objects. Use a more '.
+ 'specific string. Matching objects are: %s.',
+ $type,
+ implode(', ', $matches)));
}
}
- return $phids;
+ return $object_matches;
}
private function loadIndexVersions($phid) {
@@ -294,4 +422,43 @@
$phid);
}
+ private function loadPHIDsByIndexConstraints(
+ array $index_versions,
+ $min_date,
+ $max_date) {
+
+ $table = new PhabricatorSearchIndexVersion();
+ $conn = $table->establishConnection('r');
+
+ $where = array();
+ if ($index_versions) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexVersion IN (%Ls)',
+ $index_versions);
+ }
+
+ if ($min_date !== null) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexEpoch >= %d',
+ $min_date);
+ }
+
+ if ($max_date !== null) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexEpoch <= %d',
+ $max_date);
+ }
+
+ $rows = queryfx_all(
+ $conn,
+ 'SELECT DISTINCT objectPHID FROM %R WHERE %LA',
+ $table,
+ $where);
+
+ return ipull($rows, 'objectPHID');
+ }
+
}
diff --git a/src/applications/search/storage/PhabricatorSearchIndexVersion.php b/src/applications/search/storage/PhabricatorSearchIndexVersion.php
--- a/src/applications/search/storage/PhabricatorSearchIndexVersion.php
+++ b/src/applications/search/storage/PhabricatorSearchIndexVersion.php
@@ -6,6 +6,8 @@
protected $objectPHID;
protected $extensionKey;
protected $version;
+ protected $indexVersion;
+ protected $indexEpoch;
protected function getConfiguration() {
return array(
@@ -13,12 +15,18 @@
self::CONFIG_COLUMN_SCHEMA => array(
'extensionKey' => 'text64',
'version' => 'text128',
+ 'indexVersion' => 'bytes12',
+ 'indexEpoch' => 'epoch',
),
self::CONFIG_KEY_SCHEMA => array(
'key_object' => array(
'columns' => array('objectPHID', 'extensionKey'),
'unique' => true,
),
+
+ // NOTE: "bin/search index" may query this table by "indexVersion" or
+ // "indexEpoch", but this is rare and scanning the table seems fine.
+
),
) + parent::getConfiguration();
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Mar 5, 3:56 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7225934
Default Alt Text
D21560.id51326.diff (16 KB)
Attached To
Mode
D21560: When documents are indexed, record the indexer version (versus the object version) and index epoch
Attached
Detach File
Event Timeline
Log In to Comment