Page MenuHomePhabricator

D17330.id.diff
No OneTemporary

D17330.id.diff

diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php
--- a/src/search/PhutilSearchStemmer.php
+++ b/src/search/PhutilSearchStemmer.php
@@ -9,10 +9,12 @@
}
public function stemCorpus($corpus) {
- $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF]+/', $corpus);
+ $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus);
$words = array();
foreach ($tokens as $key => $token) {
+ $token = trim($token, '._');
+
if (strlen($token) < 3) {
continue;
}
@@ -37,6 +39,13 @@
* @phutil-external-symbol class Porter
*/
private function applyStemmer($normalized_token) {
+ // If the token has internal punctuation, handle it literally. This
+ // deals with things like domain names, Conduit API methods, and other
+ // sorts of informal tokens.
+ if (preg_match('/[._]/', $normalized_token)) {
+ return $normalized_token;
+ }
+
static $loaded;
if ($loaded === null) {
@@ -45,6 +54,7 @@
$loaded = true;
}
+
$stem = Porter::stem($normalized_token);
// If the stem is too short, it won't be a candidate for indexing. These
diff --git a/src/search/__tests__/PhutilSearchStemmerTestCase.php b/src/search/__tests__/PhutilSearchStemmerTestCase.php
--- a/src/search/__tests__/PhutilSearchStemmerTestCase.php
+++ b/src/search/__tests__/PhutilSearchStemmerTestCase.php
@@ -35,6 +35,14 @@
// be stemmed.
'dns' => 'dns',
'nis' => 'nis',
+
+ // Complex tokens with internal punctuation should be left untouched;
+ // these are usually things like domain names, API calls, informal tags,
+ // etc.
+ 'apples' => 'appl',
+ 'bananas' => 'banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas',
);
$stemmer = new PhutilSearchStemmer();
@@ -57,6 +65,10 @@
'in a 1-meter radius around a target.' =>
'firebal seventh level spell which deal 2d16 point damag meter '.
'radiu around target',
+ 'apples-bananas' => 'appl banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples.bananas' => 'apples.bananas',
+ 'oddly-proportioned' => 'oddli proport',
);
$stemmer = new PhutilSearchStemmer();

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 31, 5:39 AM (11 h, 52 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7077526
Default Alt Text
D17330.id.diff (2 KB)

Event Timeline