diff --git a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php index 8c75c17e36..f6aead3759 100644 --- a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php @@ -1,579 +1,543 @@ service = $service; $config = $service->getConfig(); $index = idx($config, 'path', '/phabricator'); $this->index = str_replace('/', '', $index); $this->timeout = idx($config, 'timeout', 15); $this->version = (int)idx($config, 'version', 5); return $this; } public function getEngineIdentifier() { return 'elasticsearch'; } public function getTimestampField() { return $this->version < 2 ? '_timestamp' : 'lastModified'; } public function getTextFieldType() { return $this->version >= 5 ? 'text' : 'string'; } public function getHostType() { return new PhabricatorElasticsearchHost($this); } public function getHostForRead() { return $this->getService()->getAnyHostForRole('read'); } public function getHostForWrite() { return $this->getService()->getAnyHostForRole('write'); } public function setTimeout($timeout) { $this->timeout = $timeout; return $this; } public function getTimeout() { return $this->timeout; } public function getTypeConstants($class) { $relationship_class = new ReflectionClass($class); $typeconstants = $relationship_class->getConstants(); return array_unique(array_values($typeconstants)); } public function reindexAbstractDocument( PhabricatorSearchAbstractDocument $doc) { $host = $this->getHostForWrite(); $type = $doc->getDocumentType(); $phid = $doc->getPHID(); $handle = id(new PhabricatorHandleQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withPHIDs(array($phid)) ->executeOne(); $timestamp_key = $this->getTimestampField(); $spec = array( 'title' => $doc->getDocumentTitle(), 'dateCreated' => $doc->getDocumentCreated(), $timestamp_key => $doc->getDocumentModified(), ); foreach ($doc->getFieldData() as $field) { list($field_name, $corpus, $aux) = $field; if (!isset($spec[$field_name])) { $spec[$field_name] = array($corpus); } else { $spec[$field_name][] = $corpus; } if ($aux != null) { $spec[$field_name][] = $aux; } } foreach ($doc->getRelationshipData() as $field) { list($field_name, $related_phid, $rtype, $time) = $field; if (!isset($spec[$field_name])) { $spec[$field_name] = array($related_phid); } else { $spec[$field_name][] = $related_phid; } if ($time) { $spec[$field_name.'_ts'] = $time; } } $this->executeRequest($host, "/{$type}/{$phid}/", $spec, 'PUT'); } - public function reconstructDocument($phid) { - $type = phid_get_type($phid); - $host = $this->getHostForRead(); - $response = $this->executeRequest($host, "/{$type}/{$phid}", array()); - - if (empty($response['exists'])) { - return null; - } - - $hit = $response['_source']; - - $doc = new PhabricatorSearchAbstractDocument(); - $doc->setPHID($phid); - $doc->setDocumentType($response['_type']); - $doc->setDocumentTitle($hit['title']); - $doc->setDocumentCreated($hit['dateCreated']); - $doc->setDocumentModified($hit[$this->getTimestampField()]); - - foreach ($hit['field'] as $fdef) { - $field_type = $fdef['type']; - $doc->addField($field_type, $hit[$field_type], $fdef['aux']); - } - - foreach ($hit['relationship'] as $rtype => $rships) { - foreach ($rships as $rship) { - $doc->addRelationship( - $rtype, - $rship['phid'], - $rship['phidType'], - $rship['when']); - } - } - - return $doc; - } - private function buildSpec(PhabricatorSavedQuery $query) { $q = new PhabricatorElasticsearchQueryBuilder('bool'); $query_string = $query->getParameter('query'); if (strlen($query_string)) { $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType'); // Build a simple_query_string query over all fields that must match all // of the words in the search string. $q->addMustClause(array( 'simple_query_string' => array( 'query' => $query_string, 'fields' => array( PhabricatorSearchDocumentFieldType::FIELD_TITLE.'.*', PhabricatorSearchDocumentFieldType::FIELD_BODY.'.*', PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'.*', ), 'default_operator' => 'AND', ), )); // This second query clause is "SHOULD' so it only affects ranking of // documents which already matched the Must clause. This amplifies the // score of documents which have an exact match on title, body // or comments. $q->addShouldClause(array( 'simple_query_string' => array( 'query' => $query_string, 'fields' => array( '*.raw', PhabricatorSearchDocumentFieldType::FIELD_TITLE.'^4', PhabricatorSearchDocumentFieldType::FIELD_BODY.'^3', PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'^1.2', ), 'analyzer' => 'english_exact', 'default_operator' => 'and', ), )); } $exclude = $query->getParameter('exclude'); if ($exclude) { $q->addFilterClause(array( 'not' => array( 'ids' => array( 'values' => array($exclude), ), ), )); } $relationship_map = array( PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR => $query->getParameter('authorPHIDs', array()), PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER => $query->getParameter('subscriberPHIDs', array()), PhabricatorSearchRelationship::RELATIONSHIP_PROJECT => $query->getParameter('projectPHIDs', array()), PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY => $query->getParameter('repositoryPHIDs', array()), ); $statuses = $query->getParameter('statuses', array()); $statuses = array_fuse($statuses); $rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; $rel_closed = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED; $rel_unowned = PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED; $include_open = !empty($statuses[$rel_open]); $include_closed = !empty($statuses[$rel_closed]); if ($include_open && !$include_closed) { $q->addExistsClause($rel_open); } else if (!$include_open && $include_closed) { $q->addExistsClause($rel_closed); } if ($query->getParameter('withUnowned')) { $q->addExistsClause($rel_unowned); } $rel_owner = PhabricatorSearchRelationship::RELATIONSHIP_OWNER; if ($query->getParameter('withAnyOwner')) { $q->addExistsClause($rel_owner); } else { $owner_phids = $query->getParameter('ownerPHIDs', array()); if (count($owner_phids)) { $q->addTermsClause($rel_owner, $owner_phids); } } foreach ($relationship_map as $field => $phids) { if (is_array($phids) && !empty($phids)) { $q->addTermsClause($field, $phids); } } if (!$q->getClauseCount('must')) { $q->addMustClause(array('match_all' => array('boost' => 1 ))); } $spec = array( '_source' => false, 'query' => array( 'bool' => $q->toArray(), ), ); if (!$query->getParameter('query')) { $spec['sort'] = array( array('dateCreated' => 'desc'), ); } $offset = (int)$query->getParameter('offset', 0); $limit = (int)$query->getParameter('limit', 101); if ($offset + $limit > 10000) { throw new Exception(pht( 'Query offset is too large. offset+limit=%s (max=%s)', $offset + $limit, 10000)); } $spec['from'] = $offset; $spec['size'] = $limit; return $spec; } public function executeSearch(PhabricatorSavedQuery $query) { $types = $query->getParameter('types'); if (!$types) { $types = array_keys( PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes()); } // Don't use '/_search' for the case that there is something // else in the index (for example if 'phabricator' is only an alias to // some bigger index). Use '/$types/_search' instead. $uri = '/'.implode(',', $types).'/_search'; $spec = $this->buildSpec($query); $exceptions = array(); foreach ($this->service->getAllHostsForRole('read') as $host) { try { $response = $this->executeRequest($host, $uri, $spec); $phids = ipull($response['hits']['hits'], '_id'); return $phids; } catch (Exception $e) { $exceptions[] = $e; } } throw new PhutilAggregateException(pht('All Fulltext Search hosts failed:'), $exceptions); } public function indexExists(PhabricatorElasticsearchHost $host = null) { if (!$host) { $host = $this->getHostForRead(); } try { if ($this->version >= 5) { $uri = '/_stats/'; $res = $this->executeRequest($host, $uri, array()); return isset($res['indices']['phabricator']); } else if ($this->version >= 2) { $uri = ''; } else { $uri = '/_status/'; } return (bool)$this->executeRequest($host, $uri, array()); } catch (HTTPFutureHTTPResponseStatus $e) { if ($e->getStatusCode() == 404) { return false; } throw $e; } } private function getIndexConfiguration() { $data = array(); $data['settings'] = array( 'index' => array( 'auto_expand_replicas' => '0-2', 'analysis' => array( 'filter' => array( 'english_stop' => array( 'type' => 'stop', 'stopwords' => '_english_', ), 'english_stemmer' => array( 'type' => 'stemmer', 'language' => 'english', ), 'english_possessive_stemmer' => array( 'type' => 'stemmer', 'language' => 'possessive_english', ), ), 'analyzer' => array( 'english_exact' => array( 'tokenizer' => 'standard', 'filter' => array('lowercase'), ), 'letter_stop' => array( 'tokenizer' => 'letter', 'filter' => array('lowercase', 'english_stop'), ), 'english_stem' => array( 'tokenizer' => 'standard', 'filter' => array( 'english_possessive_stemmer', 'lowercase', 'english_stop', 'english_stemmer', ), ), ), ), ), ); $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType'); $relationships = $this->getTypeConstants('PhabricatorSearchRelationship'); $doc_types = array_keys( PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes()); $text_type = $this->getTextFieldType(); foreach ($doc_types as $type) { $properties = array(); foreach ($fields as $field) { // Use the custom analyzer for the corpus of text $properties[$field] = array( 'type' => $text_type, 'fields' => array( 'raw' => array( 'type' => $text_type, 'analyzer' => 'english_exact', 'search_analyzer' => 'english', 'search_quote_analyzer' => 'english_exact', ), 'keywords' => array( 'type' => $text_type, 'analyzer' => 'letter_stop', ), 'stems' => array( 'type' => $text_type, 'analyzer' => 'english_stem', ), ), ); } if ($this->version < 5) { foreach ($relationships as $rel) { $properties[$rel] = array( 'type' => 'string', 'index' => 'not_analyzed', 'include_in_all' => false, ); $properties[$rel.'_ts'] = array( 'type' => 'date', 'include_in_all' => false, ); } } else { foreach ($relationships as $rel) { $properties[$rel] = array( 'type' => 'keyword', 'include_in_all' => false, 'doc_values' => false, ); $properties[$rel.'_ts'] = array( 'type' => 'date', 'include_in_all' => false, ); } } // Ensure we have dateCreated since the default query requires it $properties['dateCreated']['type'] = 'date'; $properties['lastModified']['type'] = 'date'; $data['mappings'][$type]['properties'] = $properties; } return $data; } public function indexIsSane(PhabricatorElasticsearchHost $host = null) { if (!$host) { $host = $this->getHostForRead(); } if (!$this->indexExists($host)) { return false; } $cur_mapping = $this->executeRequest($host, '/_mapping/', array()); $cur_settings = $this->executeRequest($host, '/_settings/', array()); $actual = array_merge($cur_settings[$this->index], $cur_mapping[$this->index]); $res = $this->check($actual, $this->getIndexConfiguration()); return $res; } /** * Recursively check if two Elasticsearch configuration arrays are equal * * @param $actual * @param $required array * @return bool */ private function check($actual, $required, $path = '') { foreach ($required as $key => $value) { if (!array_key_exists($key, $actual)) { if ($key === '_all') { // The _all field never comes back so we just have to assume it // is set correctly. continue; } return false; } if (is_array($value)) { if (!is_array($actual[$key])) { return false; } if (!$this->check($actual[$key], $value, $path.'.'.$key)) { return false; } continue; } $actual[$key] = self::normalizeConfigValue($actual[$key]); $value = self::normalizeConfigValue($value); if ($actual[$key] != $value) { return false; } } return true; } /** * Normalize a config value for comparison. Elasticsearch accepts all kinds * of config values but it tends to throw back 'true' for true and 'false' for * false so we normalize everything. Sometimes, oddly, it'll throw back false * for false.... * * @param mixed $value config value * @return mixed value normalized */ private static function normalizeConfigValue($value) { if ($value === true) { return 'true'; } else if ($value === false) { return 'false'; } return $value; } public function initIndex() { $host = $this->getHostForWrite(); if ($this->indexExists()) { $this->executeRequest($host, '/', array(), 'DELETE'); } $data = $this->getIndexConfiguration(); $this->executeRequest($host, '/', $data, 'PUT'); } public function getIndexStats(PhabricatorElasticsearchHost $host = null) { if ($this->version < 2) { return false; } if (!$host) { $host = $this->getHostForRead(); } $uri = '/_stats/'; $res = $this->executeRequest($host, $uri, array()); $stats = $res['indices'][$this->index]; return array( pht('Queries') => idxv($stats, array('primaries', 'search', 'query_total')), pht('Documents') => idxv($stats, array('total', 'docs', 'count')), pht('Deleted') => idxv($stats, array('total', 'docs', 'deleted')), pht('Storage Used') => phutil_format_bytes(idxv($stats, array('total', 'store', 'size_in_bytes'))), ); } private function executeRequest(PhabricatorElasticsearchHost $host, $path, array $data, $method = 'GET') { $uri = $host->getURI($path); $data = phutil_json_encode($data); $future = new HTTPSFuture($uri, $data); $future->addHeader('Content-Type', 'application/json'); if ($method != 'GET') { $future->setMethod($method); } if ($this->getTimeout()) { $future->setTimeout($this->getTimeout()); } try { list($body) = $future->resolvex(); } catch (HTTPFutureResponseStatus $ex) { if ($ex->isTimeout() || (int)$ex->getStatusCode() > 499) { $host->didHealthCheck(false); } throw $ex; } if ($method != 'GET') { return null; } try { $data = phutil_json_decode($body); $host->didHealthCheck(true); return $data; } catch (PhutilJSONParserException $ex) { $host->didHealthCheck(false); throw new PhutilProxyException( pht('Elasticsearch server returned invalid JSON!'), $ex); } } } diff --git a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php index 588ccc3e5e..ba019ea593 100644 --- a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php @@ -1,108 +1,99 @@ service->getHosts(); } public function setService(PhabricatorSearchService $service) { $this->service = $service; return $this; } /** * @return PhabricatorSearchService */ public function getService() { return $this->service; } /** * Implementations must return a prototype host instance which is cloned * by the PhabricatorSearchService infrastructure to configure each engine. * @return PhabricatorSearchHost */ abstract public function getHostType(); /* -( Engine Metadata )---------------------------------------------------- */ /** * Return a unique, nonempty string which identifies this storage engine. * * @return string Unique string for this engine, max length 32. * @task meta */ abstract public function getEngineIdentifier(); /* -( Managing Documents )------------------------------------------------- */ /** * Update the index for an abstract document. * * @param PhabricatorSearchAbstractDocument Document to update. * @return void */ abstract public function reindexAbstractDocument( PhabricatorSearchAbstractDocument $document); - /** - * Reconstruct the document for a given PHID. This is used for debugging - * and does not need to be perfect if it is unreasonable to implement it. - * - * @param phid Document PHID to reconstruct. - * @return PhabricatorSearchAbstractDocument Abstract document. - */ - abstract public function reconstructDocument($phid); - /** * Execute a search query. * * @param PhabricatorSavedQuery A query to execute. * @return list A list of matching PHIDs. */ abstract public function executeSearch(PhabricatorSavedQuery $query); /** * Does the search index exist? * * @return bool */ abstract public function indexExists(); /** * Implementations should override this method to return a dictionary of * stats which are suitable for display in the admin UI. */ abstract public function getIndexStats(); /** * Is the index in a usable state? * * @return bool */ public function indexIsSane() { return $this->indexExists(); } /** * Do any sort of setup for the search index. * * @return void */ public function initIndex() {} public function getFulltextTokens() { return array(); } } diff --git a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php index fe526a8133..c2e38d2db7 100644 --- a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php @@ -1,571 +1,504 @@ getPHID(); if (!$phid) { throw new Exception(pht('Document has no PHID!')); } $store = new PhabricatorSearchDocument(); $store->setPHID($doc->getPHID()); $store->setDocumentType($doc->getDocumentType()); $store->setDocumentTitle($doc->getDocumentTitle()); $store->setDocumentCreated($doc->getDocumentCreated()); $store->setDocumentModified($doc->getDocumentModified()); $store->replace(); $conn_w = $store->establishConnection('w'); $stemmer = new PhutilSearchStemmer(); $field_dao = new PhabricatorSearchDocumentField(); queryfx( $conn_w, 'DELETE FROM %T WHERE phid = %s', $field_dao->getTableName(), $phid); foreach ($doc->getFieldData() as $field) { list($ftype, $corpus, $aux_phid) = $field; $stemmed_corpus = $stemmer->stemCorpus($corpus); queryfx( $conn_w, 'INSERT INTO %T (phid, phidType, field, auxPHID, corpus, stemmedCorpus) '. 'VALUES (%s, %s, %s, %ns, %s, %s)', $field_dao->getTableName(), $phid, $doc->getDocumentType(), $ftype, $aux_phid, $corpus, $stemmed_corpus); } $sql = array(); foreach ($doc->getRelationshipData() as $relationship) { list($rtype, $to_phid, $to_type, $time) = $relationship; $sql[] = qsprintf( $conn_w, '(%s, %s, %s, %s, %d)', $phid, $to_phid, $rtype, $to_type, $time); } $rship_dao = new PhabricatorSearchDocumentRelationship(); queryfx( $conn_w, 'DELETE FROM %T WHERE phid = %s', $rship_dao->getTableName(), $phid); if ($sql) { queryfx( $conn_w, 'INSERT INTO %T '. '(phid, relatedPHID, relation, relatedType, relatedTime) '. 'VALUES %Q', $rship_dao->getTableName(), implode(', ', $sql)); } } - /** - * Rebuild the PhabricatorSearchAbstractDocument that was used to index - * an object out of the index itself. This is primarily useful for debugging, - * as it allows you to inspect the search index representation of a - * document. - * - * @param phid PHID of a document which exists in the search index. - * @return null|PhabricatorSearchAbstractDocument Abstract document object - * which corresponds to the original abstract document used to - * build the document index. - */ - public function reconstructDocument($phid) { - $dao_doc = new PhabricatorSearchDocument(); - $dao_field = new PhabricatorSearchDocumentField(); - $dao_relationship = new PhabricatorSearchDocumentRelationship(); - - $t_doc = $dao_doc->getTableName(); - $t_field = $dao_field->getTableName(); - $t_relationship = $dao_relationship->getTableName(); - - $doc = queryfx_one( - $dao_doc->establishConnection('r'), - 'SELECT * FROM %T WHERE phid = %s', - $t_doc, - $phid); - - if (!$doc) { - return null; - } - - $fields = queryfx_all( - $dao_field->establishConnection('r'), - 'SELECT * FROM %T WHERE phid = %s', - $t_field, - $phid); - - $relationships = queryfx_all( - $dao_relationship->establishConnection('r'), - 'SELECT * FROM %T WHERE phid = %s', - $t_relationship, - $phid); - - $adoc = id(new PhabricatorSearchAbstractDocument()) - ->setPHID($phid) - ->setDocumentType($doc['documentType']) - ->setDocumentTitle($doc['documentTitle']) - ->setDocumentCreated($doc['documentCreated']) - ->setDocumentModified($doc['documentModified']); - - foreach ($fields as $field) { - $adoc->addField( - $field['field'], - $field['corpus'], - $field['auxPHID']); - } - - foreach ($relationships as $relationship) { - $adoc->addRelationship( - $relationship['relation'], - $relationship['relatedPHID'], - $relationship['relatedType'], - $relationship['relatedTime']); - } - - return $adoc; - } - public function executeSearch(PhabricatorSavedQuery $query) { $table = new PhabricatorSearchDocument(); $document_table = $table->getTableName(); $conn = $table->establishConnection('r'); $subquery = $this->newFulltextSubquery($query, $conn); $offset = (int)$query->getParameter('offset', 0); $limit = (int)$query->getParameter('limit', 25); // NOTE: We must JOIN the subquery in order to apply a limit. $results = queryfx_all( $conn, 'SELECT documentPHID, MAX(fieldScore) AS documentScore FROM (%Q) query JOIN %T root ON query.documentPHID = root.phid GROUP BY documentPHID ORDER BY documentScore DESC LIMIT %d, %d', $subquery, $document_table, $offset, $limit); return ipull($results, 'documentPHID'); } private function newFulltextSubquery( PhabricatorSavedQuery $query, AphrontDatabaseConnection $conn) { $field = new PhabricatorSearchDocumentField(); $field_table = $field->getTableName(); $document = new PhabricatorSearchDocument(); $document_table = $document->getTableName(); $select = array(); $select[] = 'document.phid AS documentPHID'; $join = array(); $where = array(); $title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE; $title_boost = 1024; $stemmer = new PhutilSearchStemmer(); $raw_query = $query->getParameter('query'); $raw_query = trim($raw_query); if (strlen($raw_query)) { $compiler = PhabricatorSearchDocument::newQueryCompiler() ->setStemmer($stemmer); $tokens = $compiler->newTokens($raw_query); list($min_length, $stopword_list) = $this->getEngineLimits($conn); // Process all the parts of the user's query so we can show them which // parts we searched for and which ones we ignored. $fulltext_tokens = array(); foreach ($tokens as $key => $token) { $fulltext_token = id(new PhabricatorFulltextToken()) ->setToken($token); $fulltext_tokens[$key] = $fulltext_token; $value = $token->getValue(); // If the value is unquoted, we'll stem it in the query, so stem it // here before performing filtering tests. See T12596. if (!$token->isQuoted()) { $value = $stemmer->stemToken($value); } if ($this->isShortToken($value, $min_length)) { $fulltext_token->setIsShort(true); continue; } if (isset($stopword_list[phutil_utf8_strtolower($value)])) { $fulltext_token->setIsStopword(true); continue; } } $this->fulltextTokens = $fulltext_tokens; // Remove tokens which aren't queryable from the query. This is mostly // a workaround for the peculiar behaviors described in T12137. foreach ($this->fulltextTokens as $key => $fulltext_token) { if (!$fulltext_token->isQueryable()) { unset($tokens[$key]); } } if (!$tokens) { throw new PhutilSearchQueryCompilerSyntaxException( pht( 'All of your search terms are too short or too common to '. 'appear in the search index. Search for longer or more '. 'distinctive terms.')); } $queries = array(); $queries[] = $compiler->compileLiteralQuery($tokens); $queries[] = $compiler->compileStemmedQuery($tokens); $compiled_query = implode(' ', array_filter($queries)); } else { $compiled_query = null; } if (strlen($compiled_query)) { $select[] = qsprintf( $conn, 'IF(field.field = %s, %d, 0) + MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore', $title_field, $title_boost, $compiled_query); $join[] = qsprintf( $conn, '%T field ON field.phid = document.phid', $field_table); $where[] = qsprintf( $conn, 'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)', $compiled_query); if ($query->getParameter('field')) { $where[] = qsprintf( $conn, 'field.field = %s', $field); } } else { $select[] = qsprintf( $conn, 'document.documentCreated AS fieldScore'); } $exclude = $query->getParameter('exclude'); if ($exclude) { $where[] = qsprintf( $conn, 'document.phid != %s', $exclude); } $types = $query->getParameter('types'); if ($types) { if (strlen($compiled_query)) { $where[] = qsprintf( $conn, 'field.phidType IN (%Ls)', $types); } $where[] = qsprintf( $conn, 'document.documentType IN (%Ls)', $types); } $join[] = $this->joinRelationship( $conn, $query, 'authorPHIDs', PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR); $statuses = $query->getParameter('statuses', array()); $statuses = array_fuse($statuses); $open_rel = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; $closed_rel = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED; $include_open = !empty($statuses[$open_rel]); $include_closed = !empty($statuses[$closed_rel]); if ($include_open && !$include_closed) { $join[] = $this->joinRelationship( $conn, $query, 'statuses', $open_rel, true); } else if ($include_closed && !$include_open) { $join[] = $this->joinRelationship( $conn, $query, 'statuses', $closed_rel, true); } if ($query->getParameter('withAnyOwner')) { $join[] = $this->joinRelationship( $conn, $query, 'withAnyOwner', PhabricatorSearchRelationship::RELATIONSHIP_OWNER, true); } else if ($query->getParameter('withUnowned')) { $join[] = $this->joinRelationship( $conn, $query, 'withUnowned', PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED, true); } else { $join[] = $this->joinRelationship( $conn, $query, 'ownerPHIDs', PhabricatorSearchRelationship::RELATIONSHIP_OWNER); } $join[] = $this->joinRelationship( $conn, $query, 'subscriberPHIDs', PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER); $join[] = $this->joinRelationship( $conn, $query, 'projectPHIDs', PhabricatorSearchRelationship::RELATIONSHIP_PROJECT); $join[] = $this->joinRelationship( $conn, $query, 'repository', PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY); $select = implode(', ', $select); $join = array_filter($join); foreach ($join as $key => $clause) { $join[$key] = ' JOIN '.$clause; } $join = implode(' ', $join); if ($where) { $where = 'WHERE '.implode(' AND ', $where); } else { $where = ''; } if (strlen($compiled_query)) { $order = ''; } else { // When not executing a query, order by document creation date. This // is the default view in object browser dialogs, like "Close Duplicate". $order = qsprintf( $conn, 'ORDER BY document.documentCreated DESC'); } return qsprintf( $conn, 'SELECT %Q FROM %T document %Q %Q %Q LIMIT 1000', $select, $document_table, $join, $where, $order); } protected function joinRelationship( AphrontDatabaseConnection $conn, PhabricatorSavedQuery $query, $field, $type, $is_existence = false) { $sql = qsprintf( $conn, '%T AS %C ON %C.phid = document.phid AND %C.relation = %s', id(new PhabricatorSearchDocumentRelationship())->getTableName(), $field, $field, $field, $type); if (!$is_existence) { $phids = $query->getParameter($field, array()); if (!$phids) { return null; } $sql .= qsprintf( $conn, ' AND %C.relatedPHID in (%Ls)', $field, $phids); } return $sql; } public function indexExists() { return true; } public function getIndexStats() { return false; } public function getFulltextTokens() { return $this->fulltextTokens; } private function getEngineLimits(AphrontDatabaseConnection $conn) { if ($this->engineLimits === null) { $this->engineLimits = $this->newEngineLimits($conn); } return $this->engineLimits; } private function newEngineLimits(AphrontDatabaseConnection $conn) { // First, try InnoDB. Some database may not have both table engines, so // selecting variables from missing table engines can fail and throw. try { $result = queryfx_one( $conn, 'SELECT @@innodb_ft_min_token_size innodb_max, @@innodb_ft_server_stopword_table innodb_stopword_config'); } catch (AphrontQueryException $ex) { $result = null; } if ($result) { $min_len = $result['innodb_max']; $stopword_config = $result['innodb_stopword_config']; if (preg_match('(/)', $stopword_config)) { // If the setting is nonempty and contains a slash, query the // table the user has configured. $parts = explode('/', $stopword_config); list($stopword_database, $stopword_table) = $parts; } else { // Otherwise, query the InnoDB default stopword table. $stopword_database = 'INFORMATION_SCHEMA'; $stopword_table = 'INNODB_FT_DEFAULT_STOPWORD'; } $stopwords = queryfx_all( $conn, 'SELECT * FROM %T.%T', $stopword_database, $stopword_table); $stopwords = ipull($stopwords, 'value'); $stopwords = array_fuse($stopwords); return array($min_len, $stopwords); } // If InnoDB fails, try MyISAM. $result = queryfx_one( $conn, 'SELECT @@ft_min_word_len myisam_max, @@ft_stopword_file myisam_stopwords'); $min_len = $result['myisam_max']; $file = $result['myisam_stopwords']; if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) { // If this is set to something that looks like the Phabricator // stopword file, read that. $file = 'stopwords.txt'; } else { // Otherwise, just use the default stopwords. This might be wrong // but we can't read the actual value dynamically and reading // whatever file the variable is set to could be a big headache // to get right from a security perspective. $file = 'stopwords_myisam.txt'; } $root = dirname(phutil_get_library_root('phabricator')); $data = Filesystem::readFile($root.'/resources/sql/'.$file); $stopwords = explode("\n", $data); $stopwords = array_filter($stopwords); $stopwords = array_fuse($stopwords); return array($min_len, $stopwords); } private function isShortToken($value, $min_length) { // NOTE: The engine tokenizes internally on periods, so terms in the form // "ab.cd", where short substrings are separated by periods, do not produce // any queryable tokens. These terms are meaningful if at least one // substring is longer than the minimum length, like "example.py". See // T12928. This also applies to words with intermediate apostrophes, like // "to's". $parts = preg_split('/[.\']+/', $value); foreach ($parts as $part) { if (phutil_utf8_strlen($part) >= $min_length) { return false; } } return true; } }