Page MenuHomePhabricator

D18500.id44446.diff
No OneTemporary

D18500.id44446.diff

diff --git a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php
--- a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php
+++ b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php
@@ -92,8 +92,8 @@
->setLabel(pht('Contains Words'))
->setKey('fulltext'),
id(new PhabricatorSearchTextField())
- ->setLabel(pht('Matches (Prototype)'))
- ->setKey('ferret')
+ ->setLabel(pht('Query (Prototype)'))
+ ->setKey('query')
->setIsHidden($hide_ferret),
id(new PhabricatorSearchThreeStateField())
->setLabel(pht('Open Parents'))
@@ -150,8 +150,8 @@
'statuses',
'priorities',
'subtypes',
+ 'query',
'fulltext',
- 'ferret',
'hasParents',
'hasSubtasks',
'parentIDs',
@@ -231,8 +231,8 @@
$query->withFullTextSearch($map['fulltext']);
}
- if (strlen($map['ferret'])) {
- $raw_query = $map['ferret'];
+ if (strlen($map['query'])) {
+ $raw_query = $map['query'];
$compiler = id(new PhutilSearchQueryCompiler())
->setEnableFunctions(true);
diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
--- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
+++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
@@ -50,9 +50,11 @@
continue;
}
- $normal_corpus = $stemmer->stemCorpus($raw_corpus);
$term_corpus = $ngram_engine->newTermsCorpus($raw_corpus);
+ $normal_corpus = $stemmer->stemCorpus($raw_corpus);
+ $normal_coprus = $ngram_engine->newTermsCorpus($normal_corpus);
+
if (!isset($ferret_corpus_map[$key])) {
$ferret_corpus_map[$key] = $empty_template;
}
@@ -67,16 +69,23 @@
}
$ferret_fields = array();
+ $ngrams_source = array();
foreach ($ferret_corpus_map as $key => $fields) {
$raw_corpus = $fields['raw'];
$raw_corpus = implode("\n", $raw_corpus);
+ $ngrams_source[] = $raw_corpus;
$normal_corpus = $fields['normal'];
- $normal_corpus = implode("\n", $normal_corpus);
+ $normal_corpus = implode(' ', $normal_corpus);
+ if (strlen($normal_corpus)) {
+ $ngrams_source[] = $normal_corpus;
+ $normal_corpus = ' '.$normal_corpus.' ';
+ }
$term_corpus = $fields['term'];
$term_corpus = implode(' ', $term_corpus);
if (strlen($term_corpus)) {
+ $ngrams_source[] = $term_corpus;
$term_corpus = ' '.$term_corpus.' ';
}
@@ -86,9 +95,7 @@
->setTermCorpus($term_corpus)
->setNormalCorpus($normal_corpus);
}
-
- $ngrams_source = $ferret_corpus_map[$key_all]['raw'];
- $ngrams_source = implode("\n", $ngrams_source);
+ $ngrams_source = implode(' ', $ngrams_source);
$ngrams = $ngram_engine->getNgramsFromString($ngrams_source, 'index');
diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
--- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
+++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
@@ -1409,8 +1409,11 @@
return array();
}
+ $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
+
$engine = $this->ferretEngine;
$ngram_engine = new PhabricatorNgramEngine();
+ $stemmer = new PhutilSearchStemmer();
$ngram_table = $engine->newNgramsObject();
$ngram_table_name = $ngram_table->getTableName();
@@ -1422,22 +1425,49 @@
$length = count(phutil_utf8v($value));
- if ($length >= 3) {
+ if ($raw_token->getOperator() == $op_sub) {
+ $is_substring = true;
+ } else {
+ $is_substring = false;
+ }
+
+ // If the user specified a substring query for a substring which is
+ // shorter than the ngram length, we can't use the ngram index, so
+ // don't do a join. We'll fall back to just doing LIKE on the full
+ // corpus.
+ if ($is_substring) {
+ if ($length < 3) {
+ continue;
+ }
+ }
+
+ if ($raw_token->isQuoted()) {
+ $is_stemmed = false;
+ } else {
+ $is_stemmed = true;
+ }
+
+ if ($is_substring) {
$ngrams = $ngram_engine->getNgramsFromString($value, 'query');
- $prefix = false;
- } else if ($length == 2) {
- $ngrams = $ngram_engine->getNgramsFromString($value, 'prefix');
- $prefix = false;
} else {
- $ngrams = array(' '.$value);
- $prefix = true;
+ $ngrams = $ngram_engine->getNgramsFromString($value, 'index');
+
+ // If this is a stemmed term, only look for ngrams present in both the
+ // unstemmed and stemmed variations.
+ if ($is_stemmed) {
+ $stem_value = $stemmer->stemToken($value);
+ $stem_ngrams = $ngram_engine->getNgramsFromString(
+ $stem_value,
+ 'index');
+
+ $ngrams = array_intersect($ngrams, $stem_ngrams);
+ }
}
foreach ($ngrams as $ngram) {
$flat[] = array(
'table' => $ngram_table_name,
'ngram' => $ngram,
- 'prefix' => $prefix,
);
}
}
@@ -1472,29 +1502,17 @@
foreach ($flat as $spec) {
$table = $spec['table'];
$ngram = $spec['ngram'];
- $prefix = $spec['prefix'];
$alias = 'ft'.$idx++;
- if ($prefix) {
- $joins[] = qsprintf(
- $conn,
- 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram LIKE %>',
- $table,
- $alias,
- $alias,
- $alias,
- $ngram);
- } else {
- $joins[] = qsprintf(
- $conn,
- 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s',
- $table,
- $alias,
- $alias,
- $alias,
- $ngram);
- }
+ $joins[] = qsprintf(
+ $conn,
+ 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s',
+ $table,
+ $alias,
+ $alias,
+ $alias,
+ $ngram);
}
$joins[] = qsprintf(
@@ -1510,16 +1528,72 @@
return array();
}
+ $ngram_engine = new PhabricatorNgramEngine();
+ $stemmer = new PhutilSearchStemmer();
+ $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
+
$where = array();
foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken();
$value = $raw_token->getValue();
- $where[] = qsprintf(
+ if ($raw_token->getOperator() == $op_sub) {
+ $is_substring = true;
+ } else {
+ $is_substring = false;
+ }
+
+ // If we're doing substring search, we just match against the raw corpus
+ // and we're done.
+ if ($is_substring) {
+ $where[] = qsprintf(
+ $conn,
+ '(ftfield.rawCorpus LIKE %~)',
+ $value);
+ continue;
+ }
+
+ // Otherwise, we need to match against the term corpus and the normal
+ // corpus, so that searching for "raw" does not find "strawberry".
+ if ($raw_token->isQuoted()) {
+ $is_quoted = true;
+ $is_stemmed = false;
+ } else {
+ $is_quoted = false;
+ $is_stemmed = true;
+ }
+
+ $term_constraints = array();
+
+ $term_value = ' '.$ngram_engine->newTermsCorpus($value).' ';
+ $term_constraints[] = qsprintf(
$conn,
- '(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)',
- $value,
- $value);
+ '(ftfield.termCorpus LIKE %~)',
+ $term_value);
+
+ if ($is_stemmed) {
+ $stem_value = $stemmer->stemToken($value);
+ $stem_value = $ngram_engine->newTermsCorpus($stem_value);
+ $stem_value = ' '.$stem_value.' ';
+
+ $term_constraints[] = qsprintf(
+ $conn,
+ '(ftfield.normalCorpus LIKE %~)',
+ $stem_value);
+ }
+
+ if ($is_quoted) {
+ $where[] = qsprintf(
+ $conn,
+ '(ftfield.rawCorpus LIKE %~ AND (%Q))',
+ $value,
+ implode(' OR ', $term_constraints));
+ } else {
+ $where[] = qsprintf(
+ $conn,
+ '(%Q)',
+ implode(' OR ', $term_constraints));
+ }
}
return $where;

File Metadata

Mime Type
text/plain
Expires
Thu, Mar 27, 11:34 AM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7712139
Default Alt Text
D18500.id44446.diff (8 KB)

Event Timeline