Page MenuHomePhabricator

D17001.id40901.diff
No OneTemporary

D17001.id40901.diff

diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php
--- a/src/search/PhutilSearchStemmer.php
+++ b/src/search/PhutilSearchStemmer.php
@@ -45,7 +45,16 @@
$loaded = true;
}
- return Porter::stem($normalized_token);
+ $stem = Porter::stem($normalized_token);
+
+ // If the stem is too short, it won't be a candidate for indexing. These
+ // tokens are also likely to be acronyms (like "DNS") rather than real
+ // English words.
+ if (strlen($stem) < 3) {
+ return $normalized_token;
+ }
+
+ return $stem;
}
}
diff --git a/src/search/__tests__/PhutilSearchStemmerTestCase.php b/src/search/__tests__/PhutilSearchStemmerTestCase.php
--- a/src/search/__tests__/PhutilSearchStemmerTestCase.php
+++ b/src/search/__tests__/PhutilSearchStemmerTestCase.php
@@ -30,6 +30,11 @@
// Stems should be normalized.
'DOG' => 'dog',
+
+ // If stemming would bring a token under 3 characters, it should not
+ // be stemmed.
+ 'dns' => 'dns',
+ 'nis' => 'nis',
);
$stemmer = new PhutilSearchStemmer();

File Metadata

Mime Type
text/plain
Expires
Sun, Mar 9, 10:19 AM (3 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7388230
Default Alt Text
D17001.id40901.diff (1 KB)

Event Timeline