Page MenuHomePhabricator

D18533.diff
No OneTemporary

D18533.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -2834,6 +2834,7 @@
'PhabricatorFeedStoryReference' => 'applications/feed/storage/PhabricatorFeedStoryReference.php',
'PhabricatorFerretDocument' => 'applications/search/ferret/PhabricatorFerretDocument.php',
'PhabricatorFerretEngine' => 'applications/search/ferret/PhabricatorFerretEngine.php',
+ 'PhabricatorFerretEngineTestCase' => 'applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php',
'PhabricatorFerretField' => 'applications/search/ferret/PhabricatorFerretField.php',
'PhabricatorFerretFulltextEngineExtension' => 'applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php',
'PhabricatorFerretInterface' => 'applications/search/ferret/PhabricatorFerretInterface.php',
@@ -3205,8 +3206,6 @@
'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php',
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
- 'PhabricatorNgramEngine' => 'applications/search/ngrams/PhabricatorNgramEngine.php',
- 'PhabricatorNgramEngineTestCase' => 'applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php',
'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php',
'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php',
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
@@ -8166,6 +8165,7 @@
'PhabricatorFeedStoryReference' => 'PhabricatorFeedDAO',
'PhabricatorFerretDocument' => 'PhabricatorSearchDAO',
'PhabricatorFerretEngine' => 'Phobject',
+ 'PhabricatorFerretEngineTestCase' => 'PhabricatorTestCase',
'PhabricatorFerretField' => 'PhabricatorSearchDAO',
'PhabricatorFerretFulltextEngineExtension' => 'PhabricatorFulltextEngineExtension',
'PhabricatorFerretNgrams' => 'PhabricatorSearchDAO',
@@ -8587,8 +8587,6 @@
'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
- 'PhabricatorNgramEngine' => 'Phobject',
- 'PhabricatorNgramEngineTestCase' => 'PhabricatorTestCase',
'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
'PhabricatorNgramsInterface' => 'PhabricatorIndexableInterface',
'PhabricatorNotificationBuilder' => 'Phobject',
diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
--- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
+++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
@@ -29,8 +29,7 @@
->setEpochCreated(0)
->setEpochModified(0);
- $stemmer = new PhutilSearchStemmer();
- $ngram_engine = id(new PhabricatorNgramEngine());
+ $stemmer = $engine->newStemmer();
// Copy all of the "title" and "body" fields to create new "core" fields.
// This allows users to search "in title or body" with the "core:" prefix.
@@ -69,10 +68,10 @@
continue;
}
- $term_corpus = $ngram_engine->newTermsCorpus($raw_corpus);
+ $term_corpus = $engine->newTermsCorpus($raw_corpus);
$normal_corpus = $stemmer->stemCorpus($raw_corpus);
- $normal_coprus = $ngram_engine->newTermsCorpus($normal_corpus);
+ $normal_coprus = $engine->newTermsCorpus($normal_corpus);
if (!isset($ferret_corpus_map[$key])) {
$ferret_corpus_map[$key] = $empty_template;
@@ -116,7 +115,7 @@
}
$ngrams_source = implode(' ', $ngrams_source);
- $ngrams = $ngram_engine->getNgramsFromString($ngrams_source, 'index');
+ $ngrams = $engine->getNgramsFromString($ngrams_source, 'index');
$ferret_document->openTransaction();
diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php
--- a/src/applications/search/ferret/PhabricatorFerretEngine.php
+++ b/src/applications/search/ferret/PhabricatorFerretEngine.php
@@ -6,4 +6,97 @@
abstract public function newDocumentObject();
abstract public function newFieldObject();
+ public function newStemmer() {
+ return new PhutilSearchStemmer();
+ }
+
+ public function tokenizeString($value) {
+ $value = trim($value, ' ');
+ $value = preg_split('/ +/', $value);
+ return $value;
+ }
+
+ public function getNgramsFromString($value, $mode) {
+ $tokens = $this->tokenizeString($value);
+
+ $ngrams = array();
+ foreach ($tokens as $token) {
+ $token = phutil_utf8_strtolower($token);
+
+ switch ($mode) {
+ case 'query':
+ break;
+ case 'index':
+ $token = ' '.$token.' ';
+ break;
+ case 'prefix':
+ $token = ' '.$token;
+ break;
+ }
+
+ $token_v = phutil_utf8v($token);
+ $len = (count($token_v) - 2);
+ for ($ii = 0; $ii < $len; $ii++) {
+ $ngram = array_slice($token_v, $ii, 3);
+ $ngram = implode('', $ngram);
+ $ngrams[$ngram] = $ngram;
+ }
+ }
+
+ ksort($ngrams);
+
+ return array_keys($ngrams);
+ }
+
+ public function newTermsCorpus($raw_corpus) {
+ $term_corpus = strtr(
+ $raw_corpus,
+ array(
+ '!' => ' ',
+ '"' => ' ',
+ '#' => ' ',
+ '$' => ' ',
+ '%' => ' ',
+ '&' => ' ',
+ '(' => ' ',
+ ')' => ' ',
+ '*' => ' ',
+ '+' => ' ',
+ ',' => ' ',
+ '-' => ' ',
+ '/' => ' ',
+ ':' => ' ',
+ ';' => ' ',
+ '<' => ' ',
+ '=' => ' ',
+ '>' => ' ',
+ '?' => ' ',
+ '@' => ' ',
+ '[' => ' ',
+ '\\' => ' ',
+ ']' => ' ',
+ '^' => ' ',
+ '`' => ' ',
+ '{' => ' ',
+ '|' => ' ',
+ '}' => ' ',
+ '~' => ' ',
+ '.' => ' ',
+ '_' => ' ',
+ "\n" => ' ',
+ "\r" => ' ',
+ "\t" => ' ',
+ ));
+
+ // NOTE: Single quotes divide terms only if they're at a word boundary.
+ // In contractions, like "whom'st've", the entire word is a single term.
+ $term_corpus = preg_replace('/(^| )[\']+/', ' ', $term_corpus);
+ $term_corpus = preg_replace('/[\']+( |$)/', ' ', $term_corpus);
+
+ $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
+ $term_corpus = trim($term_corpus, ' ');
+
+ return $term_corpus;
+ }
+
}
diff --git a/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
rename from src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
rename to src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
--- a/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
+++ b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
@@ -1,6 +1,6 @@
<?php
-final class PhabricatorNgramEngineTestCase
+final class PhabricatorFerretEngineTestCase
extends PhabricatorTestCase {
public function testTermsCorpus() {
@@ -12,7 +12,8 @@
'http example org path to file jpg',
);
- $engine = new PhabricatorNgramEngine();
+ $engine = new ManiphestTaskFerretEngine();
+
foreach ($map as $input => $expect) {
$actual = $engine->newTermsCorpus($input);
diff --git a/src/applications/search/ngrams/PhabricatorNgramEngine.php b/src/applications/search/ngrams/PhabricatorNgramEngine.php
deleted file mode 100644
--- a/src/applications/search/ngrams/PhabricatorNgramEngine.php
+++ /dev/null
@@ -1,95 +0,0 @@
-<?php
-
-final class PhabricatorNgramEngine extends Phobject {
-
- public function tokenizeString($value) {
- $value = trim($value, ' ');
- $value = preg_split('/ +/', $value);
- return $value;
- }
-
- public function getNgramsFromString($value, $mode) {
- $tokens = $this->tokenizeString($value);
-
- $ngrams = array();
- foreach ($tokens as $token) {
- $token = phutil_utf8_strtolower($token);
-
- switch ($mode) {
- case 'query':
- break;
- case 'index':
- $token = ' '.$token.' ';
- break;
- case 'prefix':
- $token = ' '.$token;
- break;
- }
-
- $token_v = phutil_utf8v($token);
- $len = (count($token_v) - 2);
- for ($ii = 0; $ii < $len; $ii++) {
- $ngram = array_slice($token_v, $ii, 3);
- $ngram = implode('', $ngram);
- $ngrams[$ngram] = $ngram;
- }
- }
-
- ksort($ngrams);
-
- return array_keys($ngrams);
- }
-
- public function newTermsCorpus($raw_corpus) {
- $term_corpus = strtr(
- $raw_corpus,
- array(
- '!' => ' ',
- '"' => ' ',
- '#' => ' ',
- '$' => ' ',
- '%' => ' ',
- '&' => ' ',
- '(' => ' ',
- ')' => ' ',
- '*' => ' ',
- '+' => ' ',
- ',' => ' ',
- '-' => ' ',
- '/' => ' ',
- ':' => ' ',
- ';' => ' ',
- '<' => ' ',
- '=' => ' ',
- '>' => ' ',
- '?' => ' ',
- '@' => ' ',
- '[' => ' ',
- '\\' => ' ',
- ']' => ' ',
- '^' => ' ',
- '`' => ' ',
- '{' => ' ',
- '|' => ' ',
- '}' => ' ',
- '~' => ' ',
- '.' => ' ',
- '_' => ' ',
- "\n" => ' ',
- "\r" => ' ',
- "\t" => ' ',
- ));
-
- // NOTE: Single quotes divide terms only if they're at a word boundary.
- // In contractions, like "whom'st've", the entire word is a single term.
- $term_corpus = preg_replace('/(^| )[\']+/', ' ', $term_corpus);
- $term_corpus = preg_replace('/[\']+( |$)/', ' ', $term_corpus);
-
- $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
- $term_corpus = trim($term_corpus, ' ');
-
- return $term_corpus;
- }
-
-
-}
diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
--- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
+++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
@@ -1453,8 +1453,7 @@
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
$engine = $this->ferretEngine;
- $ngram_engine = new PhabricatorNgramEngine();
- $stemmer = new PhutilSearchStemmer();
+ $stemmer = $engine->newStemmer();
$ngram_table = $engine->newNgramsObject();
$ngram_table_name = $ngram_table->getTableName();
@@ -1498,15 +1497,15 @@
}
if ($is_substring) {
- $ngrams = $ngram_engine->getNgramsFromString($value, 'query');
+ $ngrams = $engine->getNgramsFromString($value, 'query');
} else {
- $ngrams = $ngram_engine->getNgramsFromString($value, 'index');
+ $ngrams = $engine->getNgramsFromString($value, 'index');
// If this is a stemmed term, only look for ngrams present in both the
// unstemmed and stemmed variations.
if ($is_stemmed) {
$stem_value = $stemmer->stemToken($value);
- $stem_ngrams = $ngram_engine->getNgramsFromString(
+ $stem_ngrams = $engine->getNgramsFromString(
$stem_value,
'index');
@@ -1587,8 +1586,8 @@
return array();
}
- $ngram_engine = new PhabricatorNgramEngine();
- $stemmer = new PhutilSearchStemmer();
+ $engine = $this->ferretEngine;
+ $stemmer = $engine->newStemmer();
$table_map = $this->ferretTables;
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
@@ -1653,7 +1652,7 @@
$term_constraints = array();
- $term_value = ' '.$ngram_engine->newTermsCorpus($value).' ';
+ $term_value = ' '.$engine->newTermsCorpus($value).' ';
if ($is_not) {
$term_constraints[] = qsprintf(
$conn,
@@ -1670,7 +1669,7 @@
if ($is_stemmed) {
$stem_value = $stemmer->stemToken($value);
- $stem_value = $ngram_engine->newTermsCorpus($stem_value);
+ $stem_value = $engine->newTermsCorpus($stem_value);
$stem_value = ' '.$stem_value.' ';
$term_constraints[] = qsprintf(

File Metadata

Mime Type
text/plain
Expires
Tue, Mar 25, 9:23 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7718202
Default Alt Text
D18533.diff (12 KB)

Event Timeline