diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php --- a/src/search/PhutilSearchStemmer.php +++ b/src/search/PhutilSearchStemmer.php @@ -45,7 +45,16 @@ $loaded = true; } - return Porter::stem($normalized_token); + $stem = Porter::stem($normalized_token); + + // If the stem is too short, it won't be a candidate for indexing. These + // tokens are also likely to be acronyms (like "DNS") rather than real + // English words. + if (strlen($stem) < 3) { + return $normalized_token; + } + + return $stem; } } diff --git a/src/search/__tests__/PhutilSearchStemmerTestCase.php b/src/search/__tests__/PhutilSearchStemmerTestCase.php --- a/src/search/__tests__/PhutilSearchStemmerTestCase.php +++ b/src/search/__tests__/PhutilSearchStemmerTestCase.php @@ -30,6 +30,11 @@ // Stems should be normalized. 'DOG' => 'dog', + + // If stemming would bring a token under 3 characters, it should not + // be stemmed. + 'dns' => 'dns', + 'nis' => 'nis', ); $stemmer = new PhutilSearchStemmer();