Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15433101
D18533.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Referenced Files
None
Subscribers
None
D18533.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -2834,6 +2834,7 @@
'PhabricatorFeedStoryReference' => 'applications/feed/storage/PhabricatorFeedStoryReference.php',
'PhabricatorFerretDocument' => 'applications/search/ferret/PhabricatorFerretDocument.php',
'PhabricatorFerretEngine' => 'applications/search/ferret/PhabricatorFerretEngine.php',
+ 'PhabricatorFerretEngineTestCase' => 'applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php',
'PhabricatorFerretField' => 'applications/search/ferret/PhabricatorFerretField.php',
'PhabricatorFerretFulltextEngineExtension' => 'applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php',
'PhabricatorFerretInterface' => 'applications/search/ferret/PhabricatorFerretInterface.php',
@@ -3205,8 +3206,6 @@
'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php',
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
- 'PhabricatorNgramEngine' => 'applications/search/ngrams/PhabricatorNgramEngine.php',
- 'PhabricatorNgramEngineTestCase' => 'applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php',
'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php',
'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php',
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
@@ -8166,6 +8165,7 @@
'PhabricatorFeedStoryReference' => 'PhabricatorFeedDAO',
'PhabricatorFerretDocument' => 'PhabricatorSearchDAO',
'PhabricatorFerretEngine' => 'Phobject',
+ 'PhabricatorFerretEngineTestCase' => 'PhabricatorTestCase',
'PhabricatorFerretField' => 'PhabricatorSearchDAO',
'PhabricatorFerretFulltextEngineExtension' => 'PhabricatorFulltextEngineExtension',
'PhabricatorFerretNgrams' => 'PhabricatorSearchDAO',
@@ -8587,8 +8587,6 @@
'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
- 'PhabricatorNgramEngine' => 'Phobject',
- 'PhabricatorNgramEngineTestCase' => 'PhabricatorTestCase',
'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
'PhabricatorNgramsInterface' => 'PhabricatorIndexableInterface',
'PhabricatorNotificationBuilder' => 'Phobject',
diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
--- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
+++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
@@ -29,8 +29,7 @@
->setEpochCreated(0)
->setEpochModified(0);
- $stemmer = new PhutilSearchStemmer();
- $ngram_engine = id(new PhabricatorNgramEngine());
+ $stemmer = $engine->newStemmer();
// Copy all of the "title" and "body" fields to create new "core" fields.
// This allows users to search "in title or body" with the "core:" prefix.
@@ -69,10 +68,10 @@
continue;
}
- $term_corpus = $ngram_engine->newTermsCorpus($raw_corpus);
+ $term_corpus = $engine->newTermsCorpus($raw_corpus);
$normal_corpus = $stemmer->stemCorpus($raw_corpus);
- $normal_coprus = $ngram_engine->newTermsCorpus($normal_corpus);
+ $normal_coprus = $engine->newTermsCorpus($normal_corpus);
if (!isset($ferret_corpus_map[$key])) {
$ferret_corpus_map[$key] = $empty_template;
@@ -116,7 +115,7 @@
}
$ngrams_source = implode(' ', $ngrams_source);
- $ngrams = $ngram_engine->getNgramsFromString($ngrams_source, 'index');
+ $ngrams = $engine->getNgramsFromString($ngrams_source, 'index');
$ferret_document->openTransaction();
diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php
--- a/src/applications/search/ferret/PhabricatorFerretEngine.php
+++ b/src/applications/search/ferret/PhabricatorFerretEngine.php
@@ -6,4 +6,97 @@
abstract public function newDocumentObject();
abstract public function newFieldObject();
+ public function newStemmer() {
+ return new PhutilSearchStemmer();
+ }
+
+ public function tokenizeString($value) {
+ $value = trim($value, ' ');
+ $value = preg_split('/ +/', $value);
+ return $value;
+ }
+
+ public function getNgramsFromString($value, $mode) {
+ $tokens = $this->tokenizeString($value);
+
+ $ngrams = array();
+ foreach ($tokens as $token) {
+ $token = phutil_utf8_strtolower($token);
+
+ switch ($mode) {
+ case 'query':
+ break;
+ case 'index':
+ $token = ' '.$token.' ';
+ break;
+ case 'prefix':
+ $token = ' '.$token;
+ break;
+ }
+
+ $token_v = phutil_utf8v($token);
+ $len = (count($token_v) - 2);
+ for ($ii = 0; $ii < $len; $ii++) {
+ $ngram = array_slice($token_v, $ii, 3);
+ $ngram = implode('', $ngram);
+ $ngrams[$ngram] = $ngram;
+ }
+ }
+
+ ksort($ngrams);
+
+ return array_keys($ngrams);
+ }
+
+ public function newTermsCorpus($raw_corpus) {
+ $term_corpus = strtr(
+ $raw_corpus,
+ array(
+ '!' => ' ',
+ '"' => ' ',
+ '#' => ' ',
+ '$' => ' ',
+ '%' => ' ',
+ '&' => ' ',
+ '(' => ' ',
+ ')' => ' ',
+ '*' => ' ',
+ '+' => ' ',
+ ',' => ' ',
+ '-' => ' ',
+ '/' => ' ',
+ ':' => ' ',
+ ';' => ' ',
+ '<' => ' ',
+ '=' => ' ',
+ '>' => ' ',
+ '?' => ' ',
+ '@' => ' ',
+ '[' => ' ',
+ '\\' => ' ',
+ ']' => ' ',
+ '^' => ' ',
+ '`' => ' ',
+ '{' => ' ',
+ '|' => ' ',
+ '}' => ' ',
+ '~' => ' ',
+ '.' => ' ',
+ '_' => ' ',
+ "\n" => ' ',
+ "\r" => ' ',
+ "\t" => ' ',
+ ));
+
+ // NOTE: Single quotes divide terms only if they're at a word boundary.
+ // In contractions, like "whom'st've", the entire word is a single term.
+ $term_corpus = preg_replace('/(^| )[\']+/', ' ', $term_corpus);
+ $term_corpus = preg_replace('/[\']+( |$)/', ' ', $term_corpus);
+
+ $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
+ $term_corpus = trim($term_corpus, ' ');
+
+ return $term_corpus;
+ }
+
}
diff --git a/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
rename from src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
rename to src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
--- a/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
+++ b/src/applications/search/ferret/__tests__/PhabricatorFerretEngineTestCase.php
@@ -1,6 +1,6 @@
<?php
-final class PhabricatorNgramEngineTestCase
+final class PhabricatorFerretEngineTestCase
extends PhabricatorTestCase {
public function testTermsCorpus() {
@@ -12,7 +12,8 @@
'http example org path to file jpg',
);
- $engine = new PhabricatorNgramEngine();
+ $engine = new ManiphestTaskFerretEngine();
+
foreach ($map as $input => $expect) {
$actual = $engine->newTermsCorpus($input);
diff --git a/src/applications/search/ngrams/PhabricatorNgramEngine.php b/src/applications/search/ngrams/PhabricatorNgramEngine.php
deleted file mode 100644
--- a/src/applications/search/ngrams/PhabricatorNgramEngine.php
+++ /dev/null
@@ -1,95 +0,0 @@
-<?php
-
-final class PhabricatorNgramEngine extends Phobject {
-
- public function tokenizeString($value) {
- $value = trim($value, ' ');
- $value = preg_split('/ +/', $value);
- return $value;
- }
-
- public function getNgramsFromString($value, $mode) {
- $tokens = $this->tokenizeString($value);
-
- $ngrams = array();
- foreach ($tokens as $token) {
- $token = phutil_utf8_strtolower($token);
-
- switch ($mode) {
- case 'query':
- break;
- case 'index':
- $token = ' '.$token.' ';
- break;
- case 'prefix':
- $token = ' '.$token;
- break;
- }
-
- $token_v = phutil_utf8v($token);
- $len = (count($token_v) - 2);
- for ($ii = 0; $ii < $len; $ii++) {
- $ngram = array_slice($token_v, $ii, 3);
- $ngram = implode('', $ngram);
- $ngrams[$ngram] = $ngram;
- }
- }
-
- ksort($ngrams);
-
- return array_keys($ngrams);
- }
-
- public function newTermsCorpus($raw_corpus) {
- $term_corpus = strtr(
- $raw_corpus,
- array(
- '!' => ' ',
- '"' => ' ',
- '#' => ' ',
- '$' => ' ',
- '%' => ' ',
- '&' => ' ',
- '(' => ' ',
- ')' => ' ',
- '*' => ' ',
- '+' => ' ',
- ',' => ' ',
- '-' => ' ',
- '/' => ' ',
- ':' => ' ',
- ';' => ' ',
- '<' => ' ',
- '=' => ' ',
- '>' => ' ',
- '?' => ' ',
- '@' => ' ',
- '[' => ' ',
- '\\' => ' ',
- ']' => ' ',
- '^' => ' ',
- '`' => ' ',
- '{' => ' ',
- '|' => ' ',
- '}' => ' ',
- '~' => ' ',
- '.' => ' ',
- '_' => ' ',
- "\n" => ' ',
- "\r" => ' ',
- "\t" => ' ',
- ));
-
- // NOTE: Single quotes divide terms only if they're at a word boundary.
- // In contractions, like "whom'st've", the entire word is a single term.
- $term_corpus = preg_replace('/(^| )[\']+/', ' ', $term_corpus);
- $term_corpus = preg_replace('/[\']+( |$)/', ' ', $term_corpus);
-
- $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
- $term_corpus = trim($term_corpus, ' ');
-
- return $term_corpus;
- }
-
-
-}
diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
--- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
+++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
@@ -1453,8 +1453,7 @@
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
$engine = $this->ferretEngine;
- $ngram_engine = new PhabricatorNgramEngine();
- $stemmer = new PhutilSearchStemmer();
+ $stemmer = $engine->newStemmer();
$ngram_table = $engine->newNgramsObject();
$ngram_table_name = $ngram_table->getTableName();
@@ -1498,15 +1497,15 @@
}
if ($is_substring) {
- $ngrams = $ngram_engine->getNgramsFromString($value, 'query');
+ $ngrams = $engine->getNgramsFromString($value, 'query');
} else {
- $ngrams = $ngram_engine->getNgramsFromString($value, 'index');
+ $ngrams = $engine->getNgramsFromString($value, 'index');
// If this is a stemmed term, only look for ngrams present in both the
// unstemmed and stemmed variations.
if ($is_stemmed) {
$stem_value = $stemmer->stemToken($value);
- $stem_ngrams = $ngram_engine->getNgramsFromString(
+ $stem_ngrams = $engine->getNgramsFromString(
$stem_value,
'index');
@@ -1587,8 +1586,8 @@
return array();
}
- $ngram_engine = new PhabricatorNgramEngine();
- $stemmer = new PhutilSearchStemmer();
+ $engine = $this->ferretEngine;
+ $stemmer = $engine->newStemmer();
$table_map = $this->ferretTables;
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
@@ -1653,7 +1652,7 @@
$term_constraints = array();
- $term_value = ' '.$ngram_engine->newTermsCorpus($value).' ';
+ $term_value = ' '.$engine->newTermsCorpus($value).' ';
if ($is_not) {
$term_constraints[] = qsprintf(
$conn,
@@ -1670,7 +1669,7 @@
if ($is_stemmed) {
$stem_value = $stemmer->stemToken($value);
- $stem_value = $ngram_engine->newTermsCorpus($stem_value);
+ $stem_value = $engine->newTermsCorpus($stem_value);
$stem_value = ' '.$stem_value.' ';
$term_constraints[] = qsprintf(
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Mar 25, 9:23 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7718202
Default Alt Text
D18533.diff (12 KB)
Attached To
Mode
D18533: Consolidate more Ferret engine code into FerretEngine
Attached
Detach File
Event Timeline
Log In to Comment