diff --git a/externals/porter-stemmer/LICENSE b/externals/porter-stemmer/LICENSE deleted file mode 100644 --- a/externals/porter-stemmer/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2005-2016 Richard Heyes (http://www.phpguru.org/) - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/externals/porter-stemmer/README.md b/externals/porter-stemmer/README.md deleted file mode 100644 --- a/externals/porter-stemmer/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# Porter Stemmer by Richard Heyes - -# Installation (with composer) - -```json -{ - "require": { - "camspiers/porter-stemmer": "1.0.0" - } -} -``` - - $ composer install - -# Usage - -```php -$stem = Porter::Stem($word); -``` - -# License - -The MIT License (MIT) - -Copyright (c) 2005-2016 Richard Heyes (http://www.phpguru.org/) - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/externals/porter-stemmer/src/Porter.php b/externals/porter-stemmer/src/Porter.php deleted file mode 100644 --- a/externals/porter-stemmer/src/Porter.php +++ /dev/null @@ -1,426 +0,0 @@ - - * - * Originally available under the GPL 2 or greater. Relicensed with permission - * of original authors under the MIT License in 2016. - * - * All rights reserved. - * - * @package PorterStemmer - * @author Richard Heyes - * @author Jon Abernathy - * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/) - * @license http://www.opensource.org/licenses/mit-license.html MIT License - */ - -/** - * PHP 5 Implementation of the Porter Stemmer algorithm. Certain elements - * were borrowed from the (broken) implementation by Jon Abernathy. - * - * See http://tartarus.org/~martin/PorterStemmer/ for a description of the - * algorithm. - * - * Usage: - * - * $stem = PorterStemmer::Stem($word); - * - * How easy is that? - * - * @package PorterStemmer - * @author Richard Heyes - * @author Jon Abernathy - * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/) - * @license http://www.opensource.org/licenses/mit-license.html MIT License - */ -class Porter -{ - /** - * Regex for matching a consonant - * - * @var string - */ - private static $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)'; - - /** - * Regex for matching a vowel - * - * @var string - */ - private static $regex_vowel = '(?:[aeiou]|(? 1) { - self::replace($word, 'e', ''); - - } elseif (self::m(substr($word, 0, -1)) == 1) { - - if (!self::cvc(substr($word, 0, -1))) { - self::replace($word, 'e', ''); - } - } - } - - // Part b - if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') { - $word = substr($word, 0, -1); - } - - return $word; - } - - /** - * Replaces the first string with the second, at the end of the string - * - * If third arg is given, then the preceding string must match that m - * count at least. - * - * @param string $str String to check - * @param string $check Ending to check for - * @param string $repl Replacement string - * @param int $m Optional minimum number of m() to meet - * - * @return bool Whether the $check string was at the end of the $str - * string. True does not necessarily mean that it was - * replaced. - */ - private static function replace(&$str, $check, $repl, $m = null) - { - $len = 0 - strlen($check); - - if (substr($str, $len) == $check) { - $substr = substr($str, 0, $len); - if (is_null($m) OR self::m($substr) > $m) { - $str = $substr . $repl; - } - - return true; - } - - return false; - } - - /** - * What, you mean it's not obvious from the name? - * - * m() measures the number of consonant sequences in $str. if c is - * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary - * presence, - * - * gives 0 - * vc gives 1 - * vcvc gives 2 - * vcvcvc gives 3 - * - * @param string $str The string to return the m count for - * - * @return int The m count - */ - private static function m($str) - { - $c = self::$regex_consonant; - $v = self::$regex_vowel; - - $str = preg_replace("#^$c+#", '', $str); - $str = preg_replace("#$v+$#", '', $str); - - preg_match_all("#($v+$c+)#", $str, $matches); - - return count($matches[1]); - } - - /** - * Returns true/false as to whether the given string contains two - * of the same consonant next to each other at the end of the string. - * - * @param string $str String to check - * - * @return bool Result - */ - private static function doubleConsonant($str) - { - $c = self::$regex_consonant; - - return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1}; - } - - /** - * Checks for ending CVC sequence where second C is not W, X or Y - * - * @param string $str String to check - * - * @return bool Result - */ - private static function cvc($str) - { - $c = self::$regex_consonant; - $v = self::$regex_vowel; - - return preg_match("#($c$v$c)$#", $str, $matches) - AND strlen($matches[1]) == 3 - AND $matches[1]{2} != 'w' - AND $matches[1]{2} != 'x' - AND $matches[1]{2} != 'y'; - } -} diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -317,12 +317,6 @@ 'PhutilSafeHTMLProducerInterface' => 'markup/PhutilSafeHTMLProducerInterface.php', 'PhutilSafeHTMLTestCase' => 'markup/__tests__/PhutilSafeHTMLTestCase.php', 'PhutilSaturateStdoutDaemon' => 'daemon/torture/PhutilSaturateStdoutDaemon.php', - 'PhutilSearchQueryCompiler' => 'search/PhutilSearchQueryCompiler.php', - 'PhutilSearchQueryCompilerSyntaxException' => 'search/PhutilSearchQueryCompilerSyntaxException.php', - 'PhutilSearchQueryCompilerTestCase' => 'search/__tests__/PhutilSearchQueryCompilerTestCase.php', - 'PhutilSearchQueryToken' => 'search/PhutilSearchQueryToken.php', - 'PhutilSearchStemmer' => 'search/PhutilSearchStemmer.php', - 'PhutilSearchStemmerTestCase' => 'search/__tests__/PhutilSearchStemmerTestCase.php', 'PhutilServiceProfiler' => 'serviceprofiler/PhutilServiceProfiler.php', 'PhutilShellLexer' => 'lexer/PhutilShellLexer.php', 'PhutilShellLexerTestCase' => 'lexer/__tests__/PhutilShellLexerTestCase.php', @@ -860,12 +854,6 @@ 'PhutilSafeHTML' => 'Phobject', 'PhutilSafeHTMLTestCase' => 'PhutilTestCase', 'PhutilSaturateStdoutDaemon' => 'PhutilTortureTestDaemon', - 'PhutilSearchQueryCompiler' => 'Phobject', - 'PhutilSearchQueryCompilerSyntaxException' => 'Exception', - 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase', - 'PhutilSearchQueryToken' => 'Phobject', - 'PhutilSearchStemmer' => 'Phobject', - 'PhutilSearchStemmerTestCase' => 'PhutilTestCase', 'PhutilServiceProfiler' => 'Phobject', 'PhutilShellLexer' => 'PhutilLexer', 'PhutilShellLexerTestCase' => 'PhutilTestCase', diff --git a/src/search/PhutilSearchQueryCompiler.php b/src/search/PhutilSearchQueryCompiler.php deleted file mode 100644 --- a/src/search/PhutilSearchQueryCompiler.php +++ /dev/null @@ -1,374 +0,0 @@ -<()~*:""&|'; - private $query; - private $stemmer; - private $enableFunctions = false; - - const OPERATOR_NOT = 'not'; - const OPERATOR_AND = 'and'; - const OPERATOR_SUBSTRING = 'sub'; - const OPERATOR_EXACT = 'exact'; - - public function setOperators($operators) { - $this->operators = $operators; - return $this; - } - - public function getOperators() { - return $this->operators; - } - - public function setStemmer(PhutilSearchStemmer $stemmer) { - $this->stemmer = $stemmer; - return $this; - } - - public function getStemmer() { - return $this->stemmer; - } - - public function setEnableFunctions($enable_functions) { - $this->enableFunctions = $enable_functions; - return $this; - } - - public function getEnableFunctions() { - return $this->enableFunctions; - } - - public function compileQuery(array $tokens) { - assert_instances_of($tokens, 'PhutilSearchQueryToken'); - - $result = array(); - foreach ($tokens as $token) { - $result[] = $this->renderToken($token); - } - - return $this->compileRenderedTokens($result); - } - - public function compileLiteralQuery(array $tokens) { - assert_instances_of($tokens, 'PhutilSearchQueryToken'); - - $result = array(); - foreach ($tokens as $token) { - if (!$token->isQuoted()) { - continue; - } - $result[] = $this->renderToken($token); - } - - return $this->compileRenderedTokens($result); - } - - public function compileStemmedQuery(array $tokens) { - assert_instances_of($tokens, 'PhutilSearchQueryToken'); - - $result = array(); - foreach ($tokens as $token) { - if ($token->isQuoted()) { - continue; - } - $result[] = $this->renderToken($token, $this->getStemmer()); - } - - return $this->compileRenderedTokens($result); - } - - private function compileRenderedTokens(array $list) { - if (!$list) { - return null; - } - - $list = array_unique($list); - return implode(' ', $list); - } - - public function newTokens($query) { - $results = $this->tokenizeQuery($query); - - $tokens = array(); - foreach ($results as $result) { - $tokens[] = PhutilSearchQueryToken::newFromDictionary($result); - } - - return $tokens; - } - - private function tokenizeQuery($query) { - $maximum_bytes = 1024; - - $query_bytes = strlen($query); - if ($query_bytes > $maximum_bytes) { - throw new PhutilSearchQueryCompilerSyntaxException( - pht( - 'Query is too long (%s bytes, maximum is %s bytes).', - new PhutilNumber($query_bytes), - new PhutilNumber($maximum_bytes))); - } - - $query = phutil_utf8v($query); - $length = count($query); - - $enable_functions = $this->getEnableFunctions(); - - $mode = 'scan'; - $current_operator = array(); - $current_token = array(); - $current_function = null; - $is_quoted = false; - $tokens = array(); - - if ($enable_functions) { - $operator_characters = '[~=+-]'; - } else { - $operator_characters = '[+-]'; - } - - for ($ii = 0; $ii < $length; $ii++) { - $character = $query[$ii]; - - if ($mode == 'scan') { - if (preg_match('/^\s\z/u', $character)) { - continue; - } - - $mode = 'function'; - } - - if ($mode == 'function') { - $mode = 'operator'; - - if ($enable_functions) { - $found = false; - for ($jj = $ii; $jj < $length; $jj++) { - if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) { - continue; - } - if ($query[$jj] == ':') { - $found = $jj; - } - break; - } - - if ($found !== false) { - $function = array_slice($query, $ii, ($jj - $ii)); - $current_function = implode('', $function); - - if (!strlen($current_function)) { - $current_function = null; - } - - $ii = $jj; - continue; - } - } - } - - if ($mode == 'operator') { - if (preg_match('/^\s\z/u', $character)) { - continue; - } - - if (preg_match('/^'.$operator_characters.'\z/', $character)) { - $current_operator[] = $character; - continue; - } - - $mode = 'quote'; - } - - if ($mode == 'quote') { - if (preg_match('/^"\z/', $character)) { - $is_quoted = true; - $mode = 'token'; - continue; - } - - $mode = 'token'; - } - - if ($mode == 'token') { - $capture = false; - $was_quoted = $is_quoted; - if ($is_quoted) { - if (preg_match('/^"\z/', $character)) { - $capture = true; - $mode = 'scan'; - $is_quoted = false; - } - } else { - if (preg_match('/^\s\z/u', $character)) { - $capture = true; - $mode = 'scan'; - } - - if (preg_match('/^"\z/', $character)) { - $capture = true; - $mode = 'token'; - $is_quoted = true; - } - } - - if ($capture) { - $token = array( - 'operator' => $current_operator, - 'quoted' => $was_quoted, - 'value' => $current_token, - ); - - if ($enable_functions) { - $token['function'] = $current_function; - } - - $tokens[] = $token; - - $current_operator = array(); - $current_token = array(); - $current_function = null; - continue; - } else { - $current_token[] = $character; - } - } - } - - if ($is_quoted) { - throw new PhutilSearchQueryCompilerSyntaxException( - pht( - 'Query contains unmatched double quotes.')); - } - - if ($mode == 'operator') { - throw new PhutilSearchQueryCompilerSyntaxException( - pht( - 'Query contains operator ("%s") with no search term.', - implode('', $current_operator))); - } - - $token = array( - 'operator' => $current_operator, - 'quoted' => false, - 'value' => $current_token, - ); - - if ($enable_functions) { - $token['function'] = $current_function; - } - - $tokens[] = $token; - - $results = array(); - foreach ($tokens as $token) { - $value = implode('', $token['value']); - $operator_string = implode('', $token['operator']); - - if (!strlen($value)) { - continue; - } - - $is_quoted = $token['quoted']; - - switch ($operator_string) { - case '-': - $operator = self::OPERATOR_NOT; - break; - case '~': - $operator = self::OPERATOR_SUBSTRING; - break; - case '=': - $operator = self::OPERATOR_EXACT; - break; - case '+': - $operator = self::OPERATOR_AND; - break; - case '': - // See T12995. If this query term contains Chinese, Japanese or - // Korean characters, treat the term as a substring term by default. - // These languages do not separate words with spaces, so the term - // search mode is normally useless. - if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) { - $operator = self::OPERATOR_SUBSTRING; - } else { - $operator = self::OPERATOR_AND; - } - break; - default: - throw new PhutilSearchQueryCompilerSyntaxException( - pht( - 'Query has an invalid sequence of operators ("%s").', - $operator_string)); - } - - $result = array( - 'operator' => $operator, - 'quoted' => $is_quoted, - 'value' => $value, - ); - - if ($enable_functions) { - $result['function'] = $token['function']; - } - - $results[] = $result; - } - - return $results; - } - - private function renderToken( - PhutilSearchQueryToken $token, - PhutilSearchStemmer $stemmer = null) { - $value = $token->getValue(); - - if ($stemmer) { - $value = $stemmer->stemToken($value); - } - - $value = $this->quoteToken($value); - $operator = $token->getOperator(); - $prefix = $this->getOperatorPrefix($operator); - - $value = $prefix.$value; - - return $value; - } - - private function getOperatorPrefix($operator) { - $operators = $this->operators; - - switch ($operator) { - case self::OPERATOR_AND: - $prefix = $operators[0]; - break; - case self::OPERATOR_NOT: - $prefix = $operators[2]; - break; - default: - throw new PhutilSearchQueryCompilerSyntaxException( - pht( - 'Unsupported operator prefix "%s".', - $operator)); - } - - if ($prefix == ' ') { - $prefix = null; - } - - return $prefix; - } - - private function quoteToken($value) { - $operators = $this->operators; - - $open_quote = $this->operators[10]; - $close_quote = $this->operators[11]; - - return $open_quote.$value.$close_quote; - } - -} diff --git a/src/search/PhutilSearchQueryCompilerSyntaxException.php b/src/search/PhutilSearchQueryCompilerSyntaxException.php deleted file mode 100644 --- a/src/search/PhutilSearchQueryCompilerSyntaxException.php +++ /dev/null @@ -1,4 +0,0 @@ -isQuoted = $dictionary['quoted']; - $token->operator = $dictionary['operator']; - $token->value = $dictionary['value']; - $token->function = idx($dictionary, 'function'); - - return $token; - } - - public function isQuoted() { - return $this->isQuoted; - } - - public function getValue() { - return $this->value; - } - - public function getOperator() { - return $this->operator; - } - - public function getFunction() { - return $this->function; - } - -} diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php deleted file mode 100644 --- a/src/search/PhutilSearchStemmer.php +++ /dev/null @@ -1,74 +0,0 @@ -normalizeToken($token); - return $this->applyStemmer($token); - } - - public function stemCorpus($corpus) { - $corpus = $this->normalizeCorpus($corpus); - $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus); - - $words = array(); - foreach ($tokens as $key => $token) { - $token = trim($token, '._'); - - if (strlen($token) < 3) { - continue; - } - - $words[$token] = $token; - } - - $stems = array(); - foreach ($words as $word) { - $stems[] = $this->applyStemmer($word); - } - - return implode(' ', $stems); - } - - private function normalizeToken($token) { - return phutil_utf8_strtolower($token); - } - - private function normalizeCorpus($corpus) { - return phutil_utf8_strtolower($corpus); - } - - /** - * @phutil-external-symbol class Porter - */ - private function applyStemmer($normalized_token) { - // If the token has internal punctuation, handle it literally. This - // deals with things like domain names, Conduit API methods, and other - // sorts of informal tokens. - if (preg_match('/[._]/', $normalized_token)) { - return $normalized_token; - } - - static $loaded; - - if ($loaded === null) { - $root = dirname(phutil_get_library_root('phutil')); - require_once $root.'/externals/porter-stemmer/src/Porter.php'; - $loaded = true; - } - - - $stem = Porter::stem($normalized_token); - - // If the stem is too short, it won't be a candidate for indexing. These - // tokens are also likely to be acronyms (like "DNS") rather than real - // English words. - if (strlen($stem) < 3) { - return $normalized_token; - } - - return $stem; - } - -} diff --git a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php deleted file mode 100644 --- a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php +++ /dev/null @@ -1,220 +0,0 @@ - null, - 'cat dog' => '+"cat" +"dog"', - 'cat -dog' => '+"cat" -"dog"', - 'cat-dog' => '+"cat-dog"', - - // If there are spaces after an operator, the operator applies to the - // next search term. - 'cat - dog' => '+"cat" -"dog"', - - // Double quotes serve as delimiters even if there is no whitespace - // between terms. - '"cat"dog' => '+"cat" +"dog"', - - // This query is too long. - str_repeat('x', 2048) => false, - - // Multiple operators are not permitted. - '++cat' => false, - '+-cat' => false, - '--cat' => false, - - // Stray operators are not permitted. - '+' => false, - 'cat +' => false, - - // Double quotes must be paired. - '"' => false, - 'cat "' => false, - '"cat' => false, - 'A"' => false, - 'A"B"' => '+"A" +"B"', - ); - - $this->assertCompileQueries($tests); - - // Test that we compile queries correctly if the operators have been - // swapped to use "AND" by default. - $operator_tests = array( - 'cat dog' => '"cat" "dog"', - 'cat -dog' => '"cat" -"dog"', - ); - $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\''); - - - // Test that we compile queries correctly if the quote operators have - // been swapped to differ. - $quote_tests = array( - 'cat dog' => '+[cat] +[dog]', - 'cat -dog' => '+[cat] -[dog]', - ); - $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|'); - - } - - public function testCompileQueriesWithStemming() { - $stemming_tests = array( - 'cat dog' => array( - null, - '+"cat" +"dog"', - ), - 'cats dogs' => array( - null, - '+"cat" +"dog"', - ), - 'cats "dogs"' => array( - '+"dogs"', - '+"cat"', - ), - '"blessed blade" of the windseeker' => array( - '+"blessed blade"', - '+"of" +"the" +"windseek"', - ), - 'mailing users for mentions on tasks' => array( - null, - '+"mail" +"user" +"for" +"mention" +"on" +"task"', - ), - ); - - $stemmer = new PhutilSearchStemmer(); - $this->assertCompileQueries($stemming_tests, null, $stemmer); - } - - public function testCompileQueriesWithFunctions() { - $op_and = PhutilSearchQueryCompiler::OPERATOR_AND; - $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; - $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT; - - $mao = "\xE7\x8C\xAB"; - - $function_tests = array( - 'cat' => array( - array(null, $op_and, 'cat'), - ), - ':cat' => array( - array(null, $op_and, 'cat'), - ), - 'title:cat' => array( - array('title', $op_and, 'cat'), - ), - 'title:cat:dog' => array( - array('title', $op_and, 'cat:dog'), - ), - 'title:~cat' => array( - array('title', $op_sub, 'cat'), - ), - 'cat title:="Meow Meow"' => array( - array(null, $op_and, 'cat'), - array('title', $op_exact, 'Meow Meow'), - ), - 'title:cat title:dog' => array( - array('title', $op_and, 'cat'), - array('title', $op_and, 'dog'), - ), - '~"core and seven years ag"' => array( - array(null, $op_sub, 'core and seven years ag'), - ), - $mao => array( - array(null, $op_sub, $mao), - ), - '+'.$mao => array( - array(null, $op_and, $mao), - ), - '~'.$mao => array( - array(null, $op_sub, $mao), - ), - '"'.$mao.'"' => array( - array(null, $op_and, $mao), - ), - ); - - $this->assertCompileFunctionQueries($function_tests); - } - - private function assertCompileQueries( - array $tests, - $operators = null, - PhutilSearchStemmer $stemmer = null) { - foreach ($tests as $input => $expect) { - $caught = null; - - $query = null; - $literal_query = null; - $stemmed_query = null; - - try { - $compiler = new PhutilSearchQueryCompiler(); - - if ($operators !== null) { - $compiler->setOperators($operators); - } - - if ($stemmer !== null) { - $compiler->setStemmer($stemmer); - } - - $tokens = $compiler->newTokens($input); - - if ($stemmer) { - $literal_query = $compiler->compileLiteralQuery($tokens); - $stemmed_query = $compiler->compileStemmedQuery($tokens); - } else { - $query = $compiler->compileQuery($tokens); - } - } catch (PhutilSearchQueryCompilerSyntaxException $ex) { - $caught = $ex; - } - - if ($caught !== null) { - $query = false; - $literal_query = false; - $stemmed_query = false; - } - - if (!$stemmer) { - $this->assertEqual( - $expect, - $query, - pht('Compilation of query: %s', $input)); - } else { - $this->assertEqual( - $expect, - ($literal_query === false) - ? false - : array($literal_query, $stemmed_query), - pht('Stemmed compilation of query: %s', $input)); - } - } - } - - private function assertCompileFunctionQueries(array $tests) { - foreach ($tests as $input => $expect) { - $compiler = id(new PhutilSearchQueryCompiler()) - ->setEnableFunctions(true); - - $tokens = $compiler->newTokens($input); - - $result = array(); - foreach ($tokens as $token) { - $result[] = array( - $token->getFunction(), - $token->getOperator(), - $token->getValue(), - ); - } - - $this->assertEqual( - $expect, - $result, - pht('Function compilation of query: %s', $input)); - } - } - -} diff --git a/src/search/__tests__/PhutilSearchStemmerTestCase.php b/src/search/__tests__/PhutilSearchStemmerTestCase.php deleted file mode 100644 --- a/src/search/__tests__/PhutilSearchStemmerTestCase.php +++ /dev/null @@ -1,85 +0,0 @@ - 'token', - 'panels' => 'panel', - - 'renames' => 'renam', - 'rename' => 'renam', - - 'components' => 'compon', - 'component' => 'compon', - - 'implementation' => 'implement', - 'implements' => 'implement', - 'implementing' => 'implement', - 'implementer' => 'implement', - - 'deleting' => 'delet', - 'deletion' => 'delet', - 'delete' => 'delet', - - 'erratically' => 'errat', - 'erratic' => 'errat', - - // Stems should be normalized. - 'DOG' => 'dog', - - // If stemming would bring a token under 3 characters, it should not - // be stemmed. - 'dns' => 'dns', - 'nis' => 'nis', - - // Complex tokens with internal punctuation should be left untouched; - // these are usually things like domain names, API calls, informal tags, - // etc. - 'apples' => 'appl', - 'bananas' => 'banana', - 'apples_bananas' => 'apples_bananas', - 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas', - ); - - $stemmer = new PhutilSearchStemmer(); - foreach ($tests as $input => $expect) { - $stem = $stemmer->stemToken($input); - $this->assertEqual( - $expect, - $stem, - pht('Token stem of "%s".', $input)); - } - } - - public function testStemDocuments() { - $tests = array( - 'The wild boar meandered erratically.' => - 'the wild boar meander errat', - 'Fool me onc, shame on you. Fool me twice, shame on me.' => - 'fool onc shame you twice', - 'Fireball is a seventh-level spell which deals 2d16 points of damage '. - 'in a 1-meter radius around a target.' => - 'firebal seventh level spell which deal 2d16 point damag meter '. - 'radiu around target', - 'apples-bananas' => 'appl banana', - 'apples_bananas' => 'apples_bananas', - 'apples.bananas' => 'apples.bananas', - 'oddly-proportioned' => 'oddli proport', - ); - - $stemmer = new PhutilSearchStemmer(); - foreach ($tests as $input => $expect) { - $stem = $stemmer->stemCorpus($input); - $this->assertEqual( - $expect, - $stem, - pht('Corpus stem of: %s', $input)); - } - } - - -}