diff --git a/resources/sql/autopatches/20151221.search.2.ownersngrams.sql b/resources/sql/autopatches/20151221.search.2.ownersngrams.sql new file mode 100644 --- /dev/null +++ b/resources/sql/autopatches/20151221.search.2.ownersngrams.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_owners.owners_name_ngrams ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + objectID INT UNSIGNED NOT NULL, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + KEY `key_object` (objectID), + KEY `key_ngram` (ngram, objectID) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20151221.search.3.reindex.php b/resources/sql/autopatches/20151221.search.3.reindex.php new file mode 100644 --- /dev/null +++ b/resources/sql/autopatches/20151221.search.3.reindex.php @@ -0,0 +1,11 @@ +getPHID(), + array( + 'force' => true, + )); +} diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2548,6 +2548,8 @@ 'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', 'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', 'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', + 'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php', + 'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php', 'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', 'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php', 'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php', @@ -2636,7 +2638,9 @@ 'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php', 'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php', 'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php', + 'PhabricatorOwnersPackageFulltextEngine' => 'applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php', 'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php', + 'PhabricatorOwnersPackageNameNgrams' => 'applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php', 'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php', 'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php', 'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php', @@ -3047,6 +3051,8 @@ 'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', 'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', 'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', + 'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php', + 'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php', 'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php', 'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php', 'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php', @@ -6802,6 +6808,7 @@ 'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', 'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', + 'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension', 'PhabricatorNotificationBuilder' => 'Phobject', 'PhabricatorNotificationClearController' => 'PhabricatorNotificationController', 'PhabricatorNotificationClient' => 'Phobject', @@ -6907,10 +6914,14 @@ 'PhabricatorCustomFieldInterface', 'PhabricatorDestructibleInterface', 'PhabricatorConduitResultInterface', + 'PhabricatorFulltextInterface', + 'PhabricatorNgramsInterface', ), 'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource', 'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine', + 'PhabricatorOwnersPackageFulltextEngine' => 'PhabricatorFulltextEngine', 'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource', + 'PhabricatorOwnersPackageNameNgrams' => 'PhabricatorSearchNgrams', 'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource', 'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType', 'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', @@ -7414,6 +7425,8 @@ 'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', + 'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO', + 'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', 'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController', 'PhabricatorSearchOrderField' => 'PhabricatorSearchField', 'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel', diff --git a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php --- a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php @@ -201,7 +201,8 @@ $is_binary = ($this->getUTF8Charset() == 'binary'); $matches = null; - if (preg_match('/^(fulltext|sort|text)(\d+)?\z/', $data_type, $matches)) { + $pattern = '/^(fulltext|sort|text|char)(\d+)?\z/'; + if (preg_match($pattern, $data_type, $matches)) { // Limit the permitted column lengths under the theory that it would // be nice to eventually reduce this to a small set of standard lengths. @@ -220,6 +221,7 @@ 'text8' => true, 'text4' => true, 'text' => true, + 'char3' => true, 'sort255' => true, 'sort128' => true, 'sort64' => true, @@ -266,10 +268,14 @@ // the majority of cases. $column_type = 'longtext'; break; + case 'char': + $column_type = 'char('.$size.')'; + break; } switch ($type) { case 'text': + case 'char': if ($is_binary) { // We leave collation and character set unspecified in order to // generate valid SQL. diff --git a/src/applications/owners/editor/PhabricatorOwnersPackageTransactionEditor.php b/src/applications/owners/editor/PhabricatorOwnersPackageTransactionEditor.php --- a/src/applications/owners/editor/PhabricatorOwnersPackageTransactionEditor.php +++ b/src/applications/owners/editor/PhabricatorOwnersPackageTransactionEditor.php @@ -334,4 +334,8 @@ return $body; } + protected function supportsSearch() { + return true; + } + } diff --git a/src/applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php b/src/applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php new file mode 100644 --- /dev/null +++ b/src/applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php @@ -0,0 +1,26 @@ +setDocumentTitle($package->getName()); + + // TODO: These are bogus, but not currently stored on packages. + $document->setDocumentCreated(PhabricatorTime::getNow()); + $document->setDocumentModified(PhabricatorTime::getNow()); + + $document->addRelationship( + $package->isArchived() + ? PhabricatorSearchRelationship::RELATIONSHIP_CLOSED + : PhabricatorSearchRelationship::RELATIONSHIP_OPEN, + $package->getPHID(), + PhabricatorOwnersPackagePHIDType::TYPECONST, + PhabricatorTime::getNow()); + } + +} diff --git a/src/applications/owners/query/PhabricatorOwnersPackageQuery.php b/src/applications/owners/query/PhabricatorOwnersPackageQuery.php --- a/src/applications/owners/query/PhabricatorOwnersPackageQuery.php +++ b/src/applications/owners/query/PhabricatorOwnersPackageQuery.php @@ -9,7 +9,6 @@ private $authorityPHIDs; private $repositoryPHIDs; private $paths; - private $namePrefix; private $statuses; private $controlMap = array(); @@ -78,9 +77,10 @@ return $this; } - public function withNamePrefix($prefix) { - $this->namePrefix = $prefix; - return $this; + public function withNameNgrams($ngrams) { + return $this->withNgramsConstraint( + new PhabricatorOwnersPackageNameNgrams(), + $ngrams); } public function needPaths($need_paths) { @@ -208,15 +208,6 @@ $this->statuses); } - if (strlen($this->namePrefix)) { - // NOTE: This is a hacky mess, but this column is currently case - // sensitive and unique. - $where[] = qsprintf( - $conn, - 'LOWER(p.name) LIKE %>', - phutil_utf8_strtolower($this->namePrefix)); - } - if ($this->controlMap) { $clauses = array(); foreach ($this->controlMap as $repository_phid => $paths) { diff --git a/src/applications/owners/query/PhabricatorOwnersPackageSearchEngine.php b/src/applications/owners/query/PhabricatorOwnersPackageSearchEngine.php --- a/src/applications/owners/query/PhabricatorOwnersPackageSearchEngine.php +++ b/src/applications/owners/query/PhabricatorOwnersPackageSearchEngine.php @@ -25,6 +25,10 @@ ->setDescription( pht('Search for packages with specific owners.')) ->setDatasource(new PhabricatorProjectOrUserDatasource()), + id(new PhabricatorSearchTextField()) + ->setLabel(pht('Name Contains')) + ->setKey('name') + ->setDescription(pht('Search for packages by name substrings.')), id(new PhabricatorSearchDatasourceField()) ->setLabel(pht('Repositories')) ->setKey('repositoryPHIDs') @@ -69,6 +73,10 @@ $query->withStatuses($map['statuses']); } + if (strlen($map['name'])) { + $query->withNameNgrams($map['name']); + } + return $query; } diff --git a/src/applications/owners/storage/PhabricatorOwnersPackage.php b/src/applications/owners/storage/PhabricatorOwnersPackage.php --- a/src/applications/owners/storage/PhabricatorOwnersPackage.php +++ b/src/applications/owners/storage/PhabricatorOwnersPackage.php @@ -7,7 +7,9 @@ PhabricatorApplicationTransactionInterface, PhabricatorCustomFieldInterface, PhabricatorDestructibleInterface, - PhabricatorConduitResultInterface { + PhabricatorConduitResultInterface, + PhabricatorFulltextInterface, + PhabricatorNgramsInterface { protected $name; protected $originalName; @@ -46,7 +48,7 @@ self::CONFIG_TIMESTAMPS => false, self::CONFIG_AUX_PHID => true, self::CONFIG_COLUMN_SCHEMA => array( - 'name' => 'text128', + 'name' => 'sort128', 'originalName' => 'text255', 'description' => 'text', 'primaryOwnerPHID' => 'phid?', @@ -54,17 +56,6 @@ 'mailKey' => 'bytes20', 'status' => 'text32', ), - self::CONFIG_KEY_SCHEMA => array( - 'key_phid' => null, - 'phid' => array( - 'columns' => array('phid'), - 'unique' => true, - ), - 'name' => array( - 'columns' => array('name'), - 'unique' => true, - ), - ), ) + parent::getConfiguration(); } @@ -433,4 +424,23 @@ ); } + +/* -( PhabricatorFulltextInterface )--------------------------------------- */ + + + public function newFulltextEngine() { + return new PhabricatorOwnersPackageFulltextEngine(); + } + + +/* -( PhabricatorNgramInterface )------------------------------------------ */ + + + public function newNgrams() { + return array( + id(new PhabricatorOwnersPackageNameNgrams()) + ->setValue($this->getName()), + ); + } + } diff --git a/src/applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php b/src/applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php new file mode 100644 --- /dev/null +++ b/src/applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php @@ -0,0 +1,18 @@ +getTransactionType()) { case self::TYPE_OWNERS: + if (!is_array($old)) { + $old = array(); + } + + if (!is_array($new)) { + $new = array(); + } + $add = array_diff($new, $old); foreach ($add as $phid) { $phids[] = $phid; diff --git a/src/applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php b/src/applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php --- a/src/applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php +++ b/src/applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php @@ -22,7 +22,7 @@ $results = array(); $query = id(new PhabricatorOwnersPackageQuery()) - ->withNamePrefix($raw_query) + ->withNameNgrams($raw_query) ->setOrder('name'); $packages = $this->executeQuery($query); diff --git a/src/applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php b/src/applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php --- a/src/applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php +++ b/src/applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php @@ -65,6 +65,9 @@ try { $comment = $xaction->getApplicationTransactionCommentObject(); + if (!$comment) { + return 'none'; + } } catch (Exception $ex) { return 'none'; } diff --git a/src/applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php b/src/applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php new file mode 100644 --- /dev/null +++ b/src/applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php @@ -0,0 +1,34 @@ +newNgrams(); + $map = mpull($ngrams, 'getValue', 'getNgramKey'); + ksort($map); + $serialized = serialize($map); + + return PhabricatorHash::digestForIndex($serialized); + } + + public function shouldIndexObject($object) { + return ($object instanceof PhabricatorNgramsInterface); + } + + public function indexObject( + PhabricatorIndexEngine $engine, + $object) { + + foreach ($object->newNgrams() as $ngram) { + $ngram->writeNgram($object->getID()); + } + } + +} diff --git a/src/applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php b/src/applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php new file mode 100644 --- /dev/null +++ b/src/applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php @@ -0,0 +1,31 @@ +newNgrams() as $ngram) { + queryfx( + $ngram->establishConnection('w'), + 'DELETE FROM %T WHERE objectID = %d', + $ngram->getTableName(), + $object->getID()); + } + } + +} diff --git a/src/applications/search/interface/PhabricatorNgramsInterface.php b/src/applications/search/interface/PhabricatorNgramsInterface.php new file mode 100644 --- /dev/null +++ b/src/applications/search/interface/PhabricatorNgramsInterface.php @@ -0,0 +1,7 @@ +value = $value; + return $this; + } + + final public function getValue() { + return $this->value; + } + + protected function getConfiguration() { + return array( + self::CONFIG_TIMESTAMPS => false, + self::CONFIG_COLUMN_SCHEMA => array( + 'objectID' => 'uint32', + 'ngram' => 'char3', + ), + self::CONFIG_KEY_SCHEMA => array( + 'key_ngram' => array( + 'columns' => array('ngram', 'objectID'), + ), + 'key_object' => array( + 'columns' => array('objectID'), + ), + ), + ) + parent::getConfiguration(); + } + + public function getTableName() { + $application = $this->getApplicationName(); + $key = $this->getNgramKey(); + return "{$application}_{$key}_ngrams"; + } + + final public function tokenizeString($value) { + $value = trim($value, ' '); + $value = preg_split('/ +/', $value); + return $value; + } + + final public function getNgramsFromString($value, $mode) { + $tokens = $this->tokenizeString($value); + + $ngrams = array(); + foreach ($tokens as $token) { + $token = phutil_utf8_strtolower($token); + + switch ($mode) { + case 'query': + break; + case 'index': + $token = ' '.$token.' '; + break; + case 'prefix': + $token = ' '.$token; + break; + } + + $len = (strlen($token) - 2); + for ($ii = 0; $ii < $len; $ii++) { + $ngram = substr($token, $ii, 3); + $ngrams[$ngram] = $ngram; + } + } + + ksort($ngrams); + + return array_keys($ngrams); + } + + final public function writeNgram($object_id) { + $ngrams = $this->getNgramsFromString($this->getValue(), 'index'); + $conn_w = $this->establishConnection('w'); + + $sql = array(); + foreach ($ngrams as $ngram) { + $sql[] = qsprintf( + $conn_w, + '(%d, %s)', + $object_id, + $ngram); + } + + queryfx( + $conn_w, + 'DELETE FROM %T WHERE objectID = %d', + $this->getTableName(), + $object_id); + + if ($sql) { + queryfx( + $conn_w, + 'INSERT INTO %T (objectID, ngram) VALUES %Q', + $this->getTableName(), + implode(', ', $sql)); + } + + return $this; + } + +} diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -26,6 +26,7 @@ private $edgeLogicConstraintsAreValid = false; private $spacePHIDs; private $spaceIsArchived; + private $ngrams = array(); protected function getPageCursors(array $page) { return array( @@ -253,6 +254,7 @@ $joins = array(); $joins[] = $this->buildEdgeLogicJoinClause($conn); $joins[] = $this->buildApplicationSearchJoinClause($conn); + $joins[] = $this->buildNgramsJoinClause($conn); return $joins; } @@ -274,6 +276,7 @@ $where[] = $this->buildPagingClause($conn); $where[] = $this->buildEdgeLogicWhereClause($conn); $where[] = $this->buildSpacesWhereClause($conn); + $where[] = $this->buildNgramsWhereClause($conn); return $where; } @@ -324,6 +327,10 @@ return true; } + if ($this->shouldGroupNgramResultRows()) { + return true; + } + return false; } @@ -1345,6 +1352,138 @@ } +/* -( Ngrams )------------------------------------------------------------- */ + + + protected function withNgramsConstraint( + PhabricatorSearchNgrams $index, + $value) { + + if (strlen($value)) { + $this->ngrams[] = array( + 'index' => $index, + 'value' => $value, + 'length' => count(phutil_utf8v($value)), + ); + } + + return $this; + } + + + protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) { + $flat = array(); + foreach ($this->ngrams as $spec) { + $index = $spec['index']; + $value = $spec['value']; + $length = $spec['length']; + + if ($length >= 3) { + $ngrams = $index->getNgramsFromString($value, 'query'); + $prefix = false; + } else if ($length == 2) { + $ngrams = $index->getNgramsFromString($value, 'prefix'); + $prefix = false; + } else { + $ngrams = array(' '.$value); + $prefix = true; + } + + foreach ($ngrams as $ngram) { + $flat[] = array( + 'table' => $index->getTableName(), + 'ngram' => $ngram, + 'prefix' => $prefix, + ); + } + } + + // MySQL only allows us to join a maximum of 61 tables per query. Each + // ngram is going to cost us a join toward that limit, so if the user + // specified a very long query string, just pick 16 of the ngrams + // at random. + if (count($flat) > 16) { + shuffle($flat); + $flat = array_slice($flat, 0, 16); + } + + $alias = $this->getPrimaryTableAlias(); + if ($alias) { + $id_column = qsprintf($conn, '%T.%T', $alias, 'id'); + } else { + $id_column = qsprintf($conn, '%T', 'id'); + } + + $idx = 1; + $joins = array(); + foreach ($flat as $spec) { + $table = $spec['table']; + $ngram = $spec['ngram']; + $prefix = $spec['prefix']; + + $alias = 'ngm'.$idx++; + + if ($prefix) { + $joins[] = qsprintf( + $conn, + 'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>', + $table, + $alias, + $alias, + $id_column, + $alias, + $ngram); + } else { + $joins[] = qsprintf( + $conn, + 'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s', + $table, + $alias, + $alias, + $id_column, + $alias, + $ngram); + } + } + + return $joins; + } + + + protected function buildNgramsWhereClause(AphrontDatabaseConnection $conn) { + $where = array(); + + foreach ($this->ngrams as $ngram) { + $index = $ngram['index']; + $value = $ngram['value']; + + $column = $index->getColumnName(); + $alias = $this->getPrimaryTableAlias(); + if ($alias) { + $column = qsprintf($conn, '%T.%T', $alias, $column); + } else { + $column = qsprintf($conn, '%T', $column); + } + + $tokens = $index->tokenizeString($value); + foreach ($tokens as $token) { + $where[] = qsprintf( + $conn, + '%Q LIKE %~', + $column, + $token); + } + } + + return $where; + } + + + protected function shouldGroupNgramResultRows() { + return (bool)$this->ngrams; + } + + /* -( Edge Logic )--------------------------------------------------------- */