Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15380402
D18648.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1023 B
Referenced Files
None
Subscribers
None
D18648.diff
View Options
diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php
--- a/src/search/PhutilSearchStemmer.php
+++ b/src/search/PhutilSearchStemmer.php
@@ -9,6 +9,7 @@
}
public function stemCorpus($corpus) {
+ $corpus = $this->normalizeCorpus($corpus);
$tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus);
$words = array();
@@ -19,13 +20,12 @@
continue;
}
- $normal_word = $this->normalizeToken($token);
- $words[$normal_word] = $normal_word;
+ $words[$token] = $token;
}
$stems = array();
- foreach ($words as $normal_word) {
- $stems[] = $this->applyStemmer($normal_word);
+ foreach ($words as $word) {
+ $stems[] = $this->applyStemmer($word);
}
return implode(' ', $stems);
@@ -35,6 +35,10 @@
return phutil_utf8_strtolower($token);
}
+ private function normalizeCorpus($corpus) {
+ return phutil_utf8_strtolower($corpus);
+ }
+
/**
* @phutil-external-symbol class Porter
*/
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Mar 15, 2:51 AM (10 h, 9 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7681300
Default Alt Text
D18648.diff (1023 B)
Attached To
Mode
D18648: Improve search stemmer performance for large inputs
Attached
Detach File
Event Timeline
Log In to Comment