Differential D18500 Diff 44446 src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
Changeset View
Changeset View
Standalone View
Standalone View
src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
| Show First 20 Lines • Show All 1,403 Lines • ▼ Show 20 Lines | public function withFerretConstraint( | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { | protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { | ||||
| if (!$this->ferretEngine) { | if (!$this->ferretEngine) { | ||||
| return array(); | return array(); | ||||
| } | } | ||||
| $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; | |||||
| $engine = $this->ferretEngine; | $engine = $this->ferretEngine; | ||||
| $ngram_engine = new PhabricatorNgramEngine(); | $ngram_engine = new PhabricatorNgramEngine(); | ||||
| $stemmer = new PhutilSearchStemmer(); | |||||
| $ngram_table = $engine->newNgramsObject(); | $ngram_table = $engine->newNgramsObject(); | ||||
| $ngram_table_name = $ngram_table->getTableName(); | $ngram_table_name = $ngram_table->getTableName(); | ||||
| $flat = array(); | $flat = array(); | ||||
| foreach ($this->ferretTokens as $fulltext_token) { | foreach ($this->ferretTokens as $fulltext_token) { | ||||
| $raw_token = $fulltext_token->getToken(); | $raw_token = $fulltext_token->getToken(); | ||||
| $value = $raw_token->getValue(); | $value = $raw_token->getValue(); | ||||
| $length = count(phutil_utf8v($value)); | $length = count(phutil_utf8v($value)); | ||||
| if ($length >= 3) { | if ($raw_token->getOperator() == $op_sub) { | ||||
| $is_substring = true; | |||||
| } else { | |||||
| $is_substring = false; | |||||
| } | |||||
| // If the user specified a substring query for a substring which is | |||||
| // shorter than the ngram length, we can't use the ngram index, so | |||||
| // don't do a join. We'll fall back to just doing LIKE on the full | |||||
| // corpus. | |||||
| if ($is_substring) { | |||||
| if ($length < 3) { | |||||
| continue; | |||||
| } | |||||
| } | |||||
| if ($raw_token->isQuoted()) { | |||||
| $is_stemmed = false; | |||||
| } else { | |||||
| $is_stemmed = true; | |||||
| } | |||||
| if ($is_substring) { | |||||
| $ngrams = $ngram_engine->getNgramsFromString($value, 'query'); | $ngrams = $ngram_engine->getNgramsFromString($value, 'query'); | ||||
| $prefix = false; | |||||
| } else if ($length == 2) { | |||||
| $ngrams = $ngram_engine->getNgramsFromString($value, 'prefix'); | |||||
| $prefix = false; | |||||
| } else { | } else { | ||||
| $ngrams = array(' '.$value); | $ngrams = $ngram_engine->getNgramsFromString($value, 'index'); | ||||
| $prefix = true; | |||||
| // If this is a stemmed term, only look for ngrams present in both the | |||||
| // unstemmed and stemmed variations. | |||||
| if ($is_stemmed) { | |||||
| $stem_value = $stemmer->stemToken($value); | |||||
| $stem_ngrams = $ngram_engine->getNgramsFromString( | |||||
| $stem_value, | |||||
| 'index'); | |||||
| $ngrams = array_intersect($ngrams, $stem_ngrams); | |||||
| } | |||||
| } | } | ||||
| foreach ($ngrams as $ngram) { | foreach ($ngrams as $ngram) { | ||||
| $flat[] = array( | $flat[] = array( | ||||
| 'table' => $ngram_table_name, | 'table' => $ngram_table_name, | ||||
| 'ngram' => $ngram, | 'ngram' => $ngram, | ||||
| 'prefix' => $prefix, | |||||
| ); | ); | ||||
| } | } | ||||
| } | } | ||||
| // MySQL only allows us to join a maximum of 61 tables per query. Each | // MySQL only allows us to join a maximum of 61 tables per query. Each | ||||
| // ngram is going to cost us a join toward that limit, so if the user | // ngram is going to cost us a join toward that limit, so if the user | ||||
| // specified a very long query string, just pick 16 of the ngrams | // specified a very long query string, just pick 16 of the ngrams | ||||
| // at random. | // at random. | ||||
| Show All 18 Lines | $joins[] = qsprintf( | ||||
| 'JOIN %T ftdoc ON ftdoc.objectPHID = %Q', | 'JOIN %T ftdoc ON ftdoc.objectPHID = %Q', | ||||
| $document_table->getTableName(), | $document_table->getTableName(), | ||||
| $phid_column); | $phid_column); | ||||
| $idx = 1; | $idx = 1; | ||||
| foreach ($flat as $spec) { | foreach ($flat as $spec) { | ||||
| $table = $spec['table']; | $table = $spec['table']; | ||||
| $ngram = $spec['ngram']; | $ngram = $spec['ngram']; | ||||
| $prefix = $spec['prefix']; | |||||
| $alias = 'ft'.$idx++; | $alias = 'ft'.$idx++; | ||||
| if ($prefix) { | |||||
| $joins[] = qsprintf( | |||||
| $conn, | |||||
| 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram LIKE %>', | |||||
| $table, | |||||
| $alias, | |||||
| $alias, | |||||
| $alias, | |||||
| $ngram); | |||||
| } else { | |||||
| $joins[] = qsprintf( | $joins[] = qsprintf( | ||||
| $conn, | $conn, | ||||
| 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s', | 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s', | ||||
| $table, | $table, | ||||
| $alias, | $alias, | ||||
| $alias, | $alias, | ||||
| $alias, | $alias, | ||||
| $ngram); | $ngram); | ||||
| } | } | ||||
| } | |||||
| $joins[] = qsprintf( | $joins[] = qsprintf( | ||||
| $conn, | $conn, | ||||
| 'JOIN %T ftfield ON ftdoc.id = ftfield.documentID', | 'JOIN %T ftfield ON ftdoc.id = ftfield.documentID', | ||||
| $field_table->getTableName()); | $field_table->getTableName()); | ||||
| return $joins; | return $joins; | ||||
| } | } | ||||
| protected function buildFerretWhereClause(AphrontDatabaseConnection $conn) { | protected function buildFerretWhereClause(AphrontDatabaseConnection $conn) { | ||||
| if (!$this->ferretEngine) { | if (!$this->ferretEngine) { | ||||
| return array(); | return array(); | ||||
| } | } | ||||
| $ngram_engine = new PhabricatorNgramEngine(); | |||||
| $stemmer = new PhutilSearchStemmer(); | |||||
| $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; | |||||
| $where = array(); | $where = array(); | ||||
| foreach ($this->ferretTokens as $fulltext_token) { | foreach ($this->ferretTokens as $fulltext_token) { | ||||
| $raw_token = $fulltext_token->getToken(); | $raw_token = $fulltext_token->getToken(); | ||||
| $value = $raw_token->getValue(); | $value = $raw_token->getValue(); | ||||
| if ($raw_token->getOperator() == $op_sub) { | |||||
| $is_substring = true; | |||||
| } else { | |||||
| $is_substring = false; | |||||
| } | |||||
| // If we're doing substring search, we just match against the raw corpus | |||||
| // and we're done. | |||||
| if ($is_substring) { | |||||
| $where[] = qsprintf( | $where[] = qsprintf( | ||||
| $conn, | $conn, | ||||
| '(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)', | '(ftfield.rawCorpus LIKE %~)', | ||||
| $value, | |||||
| $value); | $value); | ||||
| continue; | |||||
| } | |||||
| // Otherwise, we need to match against the term corpus and the normal | |||||
| // corpus, so that searching for "raw" does not find "strawberry". | |||||
| if ($raw_token->isQuoted()) { | |||||
| $is_quoted = true; | |||||
| $is_stemmed = false; | |||||
| } else { | |||||
| $is_quoted = false; | |||||
| $is_stemmed = true; | |||||
| } | |||||
| $term_constraints = array(); | |||||
| $term_value = ' '.$ngram_engine->newTermsCorpus($value).' '; | |||||
| $term_constraints[] = qsprintf( | |||||
| $conn, | |||||
| '(ftfield.termCorpus LIKE %~)', | |||||
| $term_value); | |||||
| if ($is_stemmed) { | |||||
| $stem_value = $stemmer->stemToken($value); | |||||
| $stem_value = $ngram_engine->newTermsCorpus($stem_value); | |||||
| $stem_value = ' '.$stem_value.' '; | |||||
| $term_constraints[] = qsprintf( | |||||
| $conn, | |||||
| '(ftfield.normalCorpus LIKE %~)', | |||||
| $stem_value); | |||||
| } | |||||
| if ($is_quoted) { | |||||
| $where[] = qsprintf( | |||||
| $conn, | |||||
| '(ftfield.rawCorpus LIKE %~ AND (%Q))', | |||||
| $value, | |||||
| implode(' OR ', $term_constraints)); | |||||
| } else { | |||||
| $where[] = qsprintf( | |||||
| $conn, | |||||
| '(%Q)', | |||||
| implode(' OR ', $term_constraints)); | |||||
| } | |||||
| } | } | ||||
| return $where; | return $where; | ||||
| } | } | ||||
| protected function shouldGroupFerretResultRows() { | protected function shouldGroupFerretResultRows() { | ||||
| return (bool)$this->ferretTokens; | return (bool)$this->ferretTokens; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 754 Lines • Show Last 20 Lines | |||||