diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php --- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php +++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php @@ -71,7 +71,7 @@ $term_corpus = $engine->newTermsCorpus($raw_corpus); $normal_corpus = $stemmer->stemCorpus($raw_corpus); - $normal_coprus = $engine->newTermsCorpus($normal_corpus); + $normal_corpus = $engine->newTermsCorpus($normal_corpus); if (!isset($ferret_corpus_map[$key])) { $ferret_corpus_map[$key] = $empty_template; @@ -91,20 +91,20 @@ foreach ($ferret_corpus_map as $key => $fields) { $raw_corpus = $fields['raw']; $raw_corpus = implode("\n", $raw_corpus); - $ngrams_source[] = $raw_corpus; + if (strlen($raw_corpus)) { + $ngrams_source[] = $raw_corpus; + } $normal_corpus = $fields['normal']; - $normal_corpus = implode(' ', $normal_corpus); + $normal_corpus = implode("\n", $normal_corpus); if (strlen($normal_corpus)) { $ngrams_source[] = $normal_corpus; - $normal_corpus = ' '.$normal_corpus.' '; } $term_corpus = $fields['term']; - $term_corpus = implode(' ', $term_corpus); + $term_corpus = implode("\n", $term_corpus); if (strlen($term_corpus)) { $ngrams_source[] = $term_corpus; - $term_corpus = ' '.$term_corpus.' '; } $ferret_fields[] = $engine->newFieldObject() @@ -113,9 +113,9 @@ ->setTermCorpus($term_corpus) ->setNormalCorpus($normal_corpus); } - $ngrams_source = implode(' ', $ngrams_source); + $ngrams_source = implode("\n", $ngrams_source); - $ngrams = $engine->getNgramsFromString($ngrams_source, 'index'); + $ngrams = $engine->getTermNgramsFromString($ngrams_source); $ferret_document->openTransaction(); diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php --- a/src/applications/search/ferret/PhabricatorFerretEngine.php +++ b/src/applications/search/ferret/PhabricatorFerretEngine.php @@ -16,22 +16,23 @@ return $value; } - public function getNgramsFromString($value, $mode) { + public function getTermNgramsFromString($string) { + return $this->getNgramsFromString($string, true); + } + + public function getSubstringNgramsFromString($string) { + return $this->getNgramsFromString($string, false); + } + + private function getNgramsFromString($value, $as_term) { $tokens = $this->tokenizeString($value); $ngrams = array(); foreach ($tokens as $token) { $token = phutil_utf8_strtolower($token); - switch ($mode) { - case 'query': - break; - case 'index': - $token = ' '.$token.' '; - break; - case 'prefix': - $token = ' '.$token; - break; + if ($as_term) { + $token = ' '.$token.' '; } $token_v = phutil_utf8v($token); @@ -96,6 +97,10 @@ $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus); $term_corpus = trim($term_corpus, ' '); + if (strlen($term_corpus)) { + $term_corpus = ' '.$term_corpus.' '; + } + return $term_corpus; } diff --git a/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php --- a/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php +++ b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php @@ -5,11 +5,11 @@ public function testTermsCorpus() { $map = array( - 'Hear ye, hear ye!' => 'Hear ye hear ye', - "Thou whom'st've art worthy." => "Thou whom'st've art worthy", - 'Guaranteed to contain "food".' => 'Guaranteed to contain food', + 'Hear ye, hear ye!' => ' Hear ye hear ye ', + "Thou whom'st've art worthy." => " Thou whom'st've art worthy ", + 'Guaranteed to contain "food".' => ' Guaranteed to contain food ', 'http://example.org/path/to/file.jpg' => - 'http example org path to file jpg', + ' http example org path to file jpg ', ); $engine = new ManiphestTaskFerretEngine(); diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -1497,18 +1497,15 @@ } if ($is_substring) { - $ngrams = $engine->getNgramsFromString($value, 'query'); + $ngrams = $engine->getSubstringNgramsFromString($value); } else { - $ngrams = $engine->getNgramsFromString($value, 'index'); + $ngrams = $engine->getTermNgramsFromString($value); // If this is a stemmed term, only look for ngrams present in both the // unstemmed and stemmed variations. if ($is_stemmed) { $stem_value = $stemmer->stemToken($value); - $stem_ngrams = $engine->getNgramsFromString( - $stem_value, - 'index'); - + $stem_ngrams = $engine->getTermNgramsFromString($stem_value); $ngrams = array_intersect($ngrams, $stem_ngrams); } } @@ -1652,7 +1649,7 @@ $term_constraints = array(); - $term_value = ' '.$engine->newTermsCorpus($value).' '; + $term_value = $engine->newTermsCorpus($value); if ($is_not) { $term_constraints[] = qsprintf( $conn, @@ -1670,7 +1667,6 @@ if ($is_stemmed) { $stem_value = $stemmer->stemToken($value); $stem_value = $engine->newTermsCorpus($stem_value); - $stem_value = ' '.$stem_value.' '; $term_constraints[] = qsprintf( $conn,