Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13960058
D18498.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Referenced Files
None
Subscribers
None
D18498.diff
View Options
diff --git a/resources/sql/autopatches/20170830.ferret.02.term.sql b/resources/sql/autopatches/20170830.ferret.02.term.sql
new file mode 100644
--- /dev/null
+++ b/resources/sql/autopatches/20170830.ferret.02.term.sql
@@ -0,0 +1,2 @@
+ALTER TABLE {$NAMESPACE}_maniphest.maniphest_task_ffield
+ ADD termCorpus LONGTEXT NOT NULL COLLATE {$COLLATE_SORT};
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -3206,6 +3206,7 @@
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
'PhabricatorNgramEngine' => 'applications/search/ngrams/PhabricatorNgramEngine.php',
+ 'PhabricatorNgramEngineTestCase' => 'applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php',
'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php',
'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php',
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
@@ -8587,6 +8588,7 @@
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
'PhabricatorNgramEngine' => 'Phobject',
+ 'PhabricatorNgramEngineTestCase' => 'PhabricatorTestCase',
'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
'PhabricatorNgramsInterface' => 'PhabricatorIndexableInterface',
'PhabricatorNotificationBuilder' => 'Phobject',
diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
--- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
+++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
@@ -30,11 +30,13 @@
->setEpochModified(0);
$stemmer = new PhutilSearchStemmer();
+ $ngram_engine = id(new PhabricatorNgramEngine());
$key_all = PhabricatorSearchDocumentFieldType::FIELD_ALL;
$empty_template = array(
'raw' => array(),
+ 'term' => array(),
'normal' => array(),
);
@@ -49,15 +51,18 @@
}
$normal_corpus = $stemmer->stemCorpus($raw_corpus);
+ $term_corpus = $ngram_engine->newTermsCorpus($raw_corpus);
if (!isset($ferret_corpus_map[$key])) {
$ferret_corpus_map[$key] = $empty_template;
}
$ferret_corpus_map[$key]['raw'][] = $raw_corpus;
+ $ferret_corpus_map[$key]['term'][] = $term_corpus;
$ferret_corpus_map[$key]['normal'][] = $normal_corpus;
$ferret_corpus_map[$key_all]['raw'][] = $raw_corpus;
+ $ferret_corpus_map[$key_all]['term'][] = $term_corpus;
$ferret_corpus_map[$key_all]['normal'][] = $normal_corpus;
}
@@ -69,17 +74,23 @@
$normal_corpus = $fields['normal'];
$normal_corpus = implode("\n", $normal_corpus);
+ $term_corpus = $fields['term'];
+ $term_corpus = implode(' ', $term_corpus);
+ if (strlen($term_corpus)) {
+ $term_corpus = ' '.$term_corpus.' ';
+ }
+
$ferret_fields[] = $engine->newFieldObject()
->setFieldKey($key)
->setRawCorpus($raw_corpus)
+ ->setTermCorpus($term_corpus)
->setNormalCorpus($normal_corpus);
}
$ngrams_source = $ferret_corpus_map[$key_all]['raw'];
$ngrams_source = implode("\n", $ngrams_source);
- $ngrams = id(new PhabricatorNgramEngine())
- ->getNgramsFromString($ngrams_source, 'index');
+ $ngrams = $ngram_engine->getNgramsFromString($ngrams_source, 'index');
$ferret_document->openTransaction();
diff --git a/src/applications/search/ferret/PhabricatorFerretField.php b/src/applications/search/ferret/PhabricatorFerretField.php
--- a/src/applications/search/ferret/PhabricatorFerretField.php
+++ b/src/applications/search/ferret/PhabricatorFerretField.php
@@ -6,6 +6,7 @@
protected $documentID;
protected $fieldKey;
protected $rawCorpus;
+ protected $termCorpus;
protected $normalCorpus;
abstract public function getIndexKey();
@@ -17,6 +18,7 @@
'documentID' => 'uint32',
'fieldKey' => 'text4',
'rawCorpus' => 'sort',
+ 'termCorpus' => 'sort',
'normalCorpus' => 'sort',
),
self::CONFIG_KEY_SCHEMA => array(
diff --git a/src/applications/search/ngrams/PhabricatorNgramEngine.php b/src/applications/search/ngrams/PhabricatorNgramEngine.php
--- a/src/applications/search/ngrams/PhabricatorNgramEngine.php
+++ b/src/applications/search/ngrams/PhabricatorNgramEngine.php
@@ -40,4 +40,56 @@
return array_keys($ngrams);
}
+ public function newTermsCorpus($raw_corpus) {
+ $term_corpus = strtr(
+ $raw_corpus,
+ array(
+ '!' => ' ',
+ '"' => ' ',
+ '#' => ' ',
+ '$' => ' ',
+ '%' => ' ',
+ '&' => ' ',
+ '(' => ' ',
+ ')' => ' ',
+ '*' => ' ',
+ '+' => ' ',
+ ',' => ' ',
+ '-' => ' ',
+ '/' => ' ',
+ ':' => ' ',
+ ';' => ' ',
+ '<' => ' ',
+ '=' => ' ',
+ '>' => ' ',
+ '?' => ' ',
+ '@' => ' ',
+ '[' => ' ',
+ '\\' => ' ',
+ ']' => ' ',
+ '^' => ' ',
+ '`' => ' ',
+ '{' => ' ',
+ '|' => ' ',
+ '}' => ' ',
+ '~' => ' ',
+ '.' => ' ',
+ '_' => ' ',
+ "\n" => ' ',
+ "\r" => ' ',
+ "\t" => ' ',
+ ));
+
+ // NOTE: Single quotes divide terms only if they're at a word boundary.
+ // In contractions, like "whom'st've", the entire word is a single term.
+ $term_corpus = preg_replace('/(^| )[\']+/', ' ', $term_corpus);
+ $term_corpus = preg_replace('/[\']+( |$)/', ' ', $term_corpus);
+
+ $term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
+ $term_corpus = trim($term_corpus, ' ');
+
+ return $term_corpus;
+ }
+
+
}
diff --git a/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php b/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/ngrams/__tests__/PhabricatorNgramEngineTestCase.php
@@ -0,0 +1,26 @@
+<?php
+
+final class PhabricatorNgramEngineTestCase
+ extends PhabricatorTestCase {
+
+ public function testTermsCorpus() {
+ $map = array(
+ 'Hear ye, hear ye!' => 'Hear ye hear ye',
+ "Thou whom'st've art worthy." => "Thou whom'st've art worthy",
+ 'Guaranteed to contain "food".' => 'Guaranteed to contain food',
+ 'http://example.org/path/to/file.jpg' =>
+ 'http example org path to file jpg',
+ );
+
+ $engine = new PhabricatorNgramEngine();
+ foreach ($map as $input => $expect) {
+ $actual = $engine->newTermsCorpus($input);
+
+ $this->assertEqual(
+ $expect,
+ $actual,
+ pht('Terms corpus for: %s', $input));
+ }
+ }
+
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Oct 15 2024, 9:48 PM (4 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6714571
Default Alt Text
D18498.diff (7 KB)
Attached To
Mode
D18498: Add a "terms" corpus to Ferret fields
Attached
Detach File
Event Timeline
Log In to Comment