Page MenuHomePhabricator

D18647.diff
No OneTemporary

D18647.diff

diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php
--- a/src/applications/search/ferret/PhabricatorFerretEngine.php
+++ b/src/applications/search/ferret/PhabricatorFerretEngine.php
@@ -88,16 +88,23 @@
}
private function getNgramsFromString($value, $as_term) {
+ $value = phutil_utf8_strtolower($value);
$tokens = $this->tokenizeString($value);
- $ngrams = array();
+ // First, extract unique tokens from the string. This reduces the number
+ // of `phutil_utf8v()` calls we need to make if we are indexing a large
+ // corpus with redundant terms.
+ $unique_tokens = array();
foreach ($tokens as $token) {
- $token = phutil_utf8_strtolower($token);
-
if ($as_term) {
$token = ' '.$token.' ';
}
+ $unique_tokens[$token] = true;
+ }
+
+ $ngrams = array();
+ foreach ($unique_tokens as $token => $ignored) {
$token_v = phutil_utf8v($token);
$len = (count($token_v) - 2);
for ($ii = 0; $ii < $len; $ii++) {

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 8, 5:35 AM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6714073
Default Alt Text
D18647.diff (1 KB)

Event Timeline