Page MenuHomePhabricator

D20940.id49894.diff
No OneTemporary

D20940.id49894.diff

diff --git a/externals/porter-stemmer/LICENSE b/externals/porter-stemmer/LICENSE
deleted file mode 100644
--- a/externals/porter-stemmer/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2005-2016 Richard Heyes (http://www.phpguru.org/)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/externals/porter-stemmer/README.md b/externals/porter-stemmer/README.md
deleted file mode 100644
--- a/externals/porter-stemmer/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Porter Stemmer by Richard Heyes
-
-# Installation (with composer)
-
-```json
-{
- "require": {
- "camspiers/porter-stemmer": "1.0.0"
- }
-}
-```
-
- $ composer install
-
-# Usage
-
-```php
-$stem = Porter::Stem($word);
-```
-
-# License
-
-The MIT License (MIT)
-
-Copyright (c) 2005-2016 Richard Heyes (http://www.phpguru.org/)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/externals/porter-stemmer/src/Porter.php b/externals/porter-stemmer/src/Porter.php
deleted file mode 100644
--- a/externals/porter-stemmer/src/Porter.php
+++ /dev/null
@@ -1,426 +0,0 @@
-<?php
-
-# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
-
-/**
- * Copyright (c) 2005-2016 Richard Heyes (http://www.phpguru.org/)
- *
- * Portions Copyright 2003-2007 Jon Abernathy <jon@chuggnutt.com>
- *
- * Originally available under the GPL 2 or greater. Relicensed with permission
- * of original authors under the MIT License in 2016.
- *
- * All rights reserved.
- *
- * @package PorterStemmer
- * @author Richard Heyes
- * @author Jon Abernathy <jon@chuggnutt.com>
- * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/)
- * @license http://www.opensource.org/licenses/mit-license.html MIT License
- */
-
-/**
- * PHP 5 Implementation of the Porter Stemmer algorithm. Certain elements
- * were borrowed from the (broken) implementation by Jon Abernathy.
- *
- * See http://tartarus.org/~martin/PorterStemmer/ for a description of the
- * algorithm.
- *
- * Usage:
- *
- * $stem = PorterStemmer::Stem($word);
- *
- * How easy is that?
- *
- * @package PorterStemmer
- * @author Richard Heyes
- * @author Jon Abernathy <jon@chuggnutt.com>
- * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/)
- * @license http://www.opensource.org/licenses/mit-license.html MIT License
- */
-class Porter
-{
- /**
- * Regex for matching a consonant
- *
- * @var string
- */
- private static $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
-
- /**
- * Regex for matching a vowel
- *
- * @var string
- */
- private static $regex_vowel = '(?:[aeiou]|(?<![aeiou])y)';
-
- /**
- * Stems a word. Simple huh?
- *
- * @param string $word Word to stem
- *
- * @return string Stemmed word
- */
- public static function Stem($word)
- {
- if (strlen($word) <= 2) {
- return $word;
- }
-
- $word = self::step1ab($word);
- $word = self::step1c($word);
- $word = self::step2($word);
- $word = self::step3($word);
- $word = self::step4($word);
- $word = self::step5($word);
-
- return $word;
- }
-
- /**
- * Step 1
- */
- private static function step1ab($word)
- {
- // Part a
- if (substr($word, -1) == 's') {
-
- self::replace($word, 'sses', 'ss')
- OR self::replace($word, 'ies', 'i')
- OR self::replace($word, 'ss', 'ss')
- OR self::replace($word, 's', '');
- }
-
- // Part b
- if (substr($word, -2, 1) != 'e' OR !self::replace($word, 'eed', 'ee', 0)) { // First rule
- $v = self::$regex_vowel;
-
- // ing and ed
- if ( preg_match("#$v+#", substr($word, 0, -3)) && self::replace($word, 'ing', '')
- OR preg_match("#$v+#", substr($word, 0, -2)) && self::replace($word, 'ed', '')) { // Note use of && and OR, for precedence reasons
-
- // If one of above two test successful
- if ( !self::replace($word, 'at', 'ate')
- AND !self::replace($word, 'bl', 'ble')
- AND !self::replace($word, 'iz', 'ize')) {
-
- // Double consonant ending
- if ( self::doubleConsonant($word)
- AND substr($word, -2) != 'll'
- AND substr($word, -2) != 'ss'
- AND substr($word, -2) != 'zz') {
-
- $word = substr($word, 0, -1);
-
- } elseif (self::m($word) == 1 AND self::cvc($word)) {
- $word .= 'e';
- }
- }
- }
- }
-
- return $word;
- }
-
- /**
- * Step 1c
- *
- * @param string $word Word to stem
- */
- private static function step1c($word)
- {
- $v = self::$regex_vowel;
-
- if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
- self::replace($word, 'y', 'i');
- }
-
- return $word;
- }
-
- /**
- * Step 2
- *
- * @param string $word Word to stem
- */
- private static function step2($word)
- {
- switch (substr($word, -2, 1)) {
- case 'a':
- self::replace($word, 'ational', 'ate', 0)
- OR self::replace($word, 'tional', 'tion', 0);
- break;
-
- case 'c':
- self::replace($word, 'enci', 'ence', 0)
- OR self::replace($word, 'anci', 'ance', 0);
- break;
-
- case 'e':
- self::replace($word, 'izer', 'ize', 0);
- break;
-
- case 'g':
- self::replace($word, 'logi', 'log', 0);
- break;
-
- case 'l':
- self::replace($word, 'entli', 'ent', 0)
- OR self::replace($word, 'ousli', 'ous', 0)
- OR self::replace($word, 'alli', 'al', 0)
- OR self::replace($word, 'bli', 'ble', 0)
- OR self::replace($word, 'eli', 'e', 0);
- break;
-
- case 'o':
- self::replace($word, 'ization', 'ize', 0)
- OR self::replace($word, 'ation', 'ate', 0)
- OR self::replace($word, 'ator', 'ate', 0);
- break;
-
- case 's':
- self::replace($word, 'iveness', 'ive', 0)
- OR self::replace($word, 'fulness', 'ful', 0)
- OR self::replace($word, 'ousness', 'ous', 0)
- OR self::replace($word, 'alism', 'al', 0);
- break;
-
- case 't':
- self::replace($word, 'biliti', 'ble', 0)
- OR self::replace($word, 'aliti', 'al', 0)
- OR self::replace($word, 'iviti', 'ive', 0);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 3
- *
- * @param string $word String to stem
- */
- private static function step3($word)
- {
- switch (substr($word, -2, 1)) {
- case 'a':
- self::replace($word, 'ical', 'ic', 0);
- break;
-
- case 's':
- self::replace($word, 'ness', '', 0);
- break;
-
- case 't':
- self::replace($word, 'icate', 'ic', 0)
- OR self::replace($word, 'iciti', 'ic', 0);
- break;
-
- case 'u':
- self::replace($word, 'ful', '', 0);
- break;
-
- case 'v':
- self::replace($word, 'ative', '', 0);
- break;
-
- case 'z':
- self::replace($word, 'alize', 'al', 0);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 4
- *
- * @param string $word Word to stem
- */
- private static function step4($word)
- {
- switch (substr($word, -2, 1)) {
- case 'a':
- self::replace($word, 'al', '', 1);
- break;
-
- case 'c':
- self::replace($word, 'ance', '', 1)
- OR self::replace($word, 'ence', '', 1);
- break;
-
- case 'e':
- self::replace($word, 'er', '', 1);
- break;
-
- case 'i':
- self::replace($word, 'ic', '', 1);
- break;
-
- case 'l':
- self::replace($word, 'able', '', 1)
- OR self::replace($word, 'ible', '', 1);
- break;
-
- case 'n':
- self::replace($word, 'ant', '', 1)
- OR self::replace($word, 'ement', '', 1)
- OR self::replace($word, 'ment', '', 1)
- OR self::replace($word, 'ent', '', 1);
- break;
-
- case 'o':
- if (substr($word, -4) == 'tion' OR substr($word, -4) == 'sion') {
- self::replace($word, 'ion', '', 1);
- } else {
- self::replace($word, 'ou', '', 1);
- }
- break;
-
- case 's':
- self::replace($word, 'ism', '', 1);
- break;
-
- case 't':
- self::replace($word, 'ate', '', 1)
- OR self::replace($word, 'iti', '', 1);
- break;
-
- case 'u':
- self::replace($word, 'ous', '', 1);
- break;
-
- case 'v':
- self::replace($word, 'ive', '', 1);
- break;
-
- case 'z':
- self::replace($word, 'ize', '', 1);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 5
- *
- * @param string $word Word to stem
- */
- private static function step5($word)
- {
- // Part a
- if (substr($word, -1) == 'e') {
- if (self::m(substr($word, 0, -1)) > 1) {
- self::replace($word, 'e', '');
-
- } elseif (self::m(substr($word, 0, -1)) == 1) {
-
- if (!self::cvc(substr($word, 0, -1))) {
- self::replace($word, 'e', '');
- }
- }
- }
-
- // Part b
- if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') {
- $word = substr($word, 0, -1);
- }
-
- return $word;
- }
-
- /**
- * Replaces the first string with the second, at the end of the string
- *
- * If third arg is given, then the preceding string must match that m
- * count at least.
- *
- * @param string $str String to check
- * @param string $check Ending to check for
- * @param string $repl Replacement string
- * @param int $m Optional minimum number of m() to meet
- *
- * @return bool Whether the $check string was at the end of the $str
- * string. True does not necessarily mean that it was
- * replaced.
- */
- private static function replace(&$str, $check, $repl, $m = null)
- {
- $len = 0 - strlen($check);
-
- if (substr($str, $len) == $check) {
- $substr = substr($str, 0, $len);
- if (is_null($m) OR self::m($substr) > $m) {
- $str = $substr . $repl;
- }
-
- return true;
- }
-
- return false;
- }
-
- /**
- * What, you mean it's not obvious from the name?
- *
- * m() measures the number of consonant sequences in $str. if c is
- * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- * presence,
- *
- * <c><v> gives 0
- * <c>vc<v> gives 1
- * <c>vcvc<v> gives 2
- * <c>vcvcvc<v> gives 3
- *
- * @param string $str The string to return the m count for
- *
- * @return int The m count
- */
- private static function m($str)
- {
- $c = self::$regex_consonant;
- $v = self::$regex_vowel;
-
- $str = preg_replace("#^$c+#", '', $str);
- $str = preg_replace("#$v+$#", '', $str);
-
- preg_match_all("#($v+$c+)#", $str, $matches);
-
- return count($matches[1]);
- }
-
- /**
- * Returns true/false as to whether the given string contains two
- * of the same consonant next to each other at the end of the string.
- *
- * @param string $str String to check
- *
- * @return bool Result
- */
- private static function doubleConsonant($str)
- {
- $c = self::$regex_consonant;
-
- return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
- }
-
- /**
- * Checks for ending CVC sequence where second C is not W, X or Y
- *
- * @param string $str String to check
- *
- * @return bool Result
- */
- private static function cvc($str)
- {
- $c = self::$regex_consonant;
- $v = self::$regex_vowel;
-
- return preg_match("#($c$v$c)$#", $str, $matches)
- AND strlen($matches[1]) == 3
- AND $matches[1]{2} != 'w'
- AND $matches[1]{2} != 'x'
- AND $matches[1]{2} != 'y';
- }
-}
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -317,12 +317,6 @@
'PhutilSafeHTMLProducerInterface' => 'markup/PhutilSafeHTMLProducerInterface.php',
'PhutilSafeHTMLTestCase' => 'markup/__tests__/PhutilSafeHTMLTestCase.php',
'PhutilSaturateStdoutDaemon' => 'daemon/torture/PhutilSaturateStdoutDaemon.php',
- 'PhutilSearchQueryCompiler' => 'search/PhutilSearchQueryCompiler.php',
- 'PhutilSearchQueryCompilerSyntaxException' => 'search/PhutilSearchQueryCompilerSyntaxException.php',
- 'PhutilSearchQueryCompilerTestCase' => 'search/__tests__/PhutilSearchQueryCompilerTestCase.php',
- 'PhutilSearchQueryToken' => 'search/PhutilSearchQueryToken.php',
- 'PhutilSearchStemmer' => 'search/PhutilSearchStemmer.php',
- 'PhutilSearchStemmerTestCase' => 'search/__tests__/PhutilSearchStemmerTestCase.php',
'PhutilServiceProfiler' => 'serviceprofiler/PhutilServiceProfiler.php',
'PhutilShellLexer' => 'lexer/PhutilShellLexer.php',
'PhutilShellLexerTestCase' => 'lexer/__tests__/PhutilShellLexerTestCase.php',
@@ -860,12 +854,6 @@
'PhutilSafeHTML' => 'Phobject',
'PhutilSafeHTMLTestCase' => 'PhutilTestCase',
'PhutilSaturateStdoutDaemon' => 'PhutilTortureTestDaemon',
- 'PhutilSearchQueryCompiler' => 'Phobject',
- 'PhutilSearchQueryCompilerSyntaxException' => 'Exception',
- 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase',
- 'PhutilSearchQueryToken' => 'Phobject',
- 'PhutilSearchStemmer' => 'Phobject',
- 'PhutilSearchStemmerTestCase' => 'PhutilTestCase',
'PhutilServiceProfiler' => 'Phobject',
'PhutilShellLexer' => 'PhutilLexer',
'PhutilShellLexerTestCase' => 'PhutilTestCase',
diff --git a/src/search/PhutilSearchQueryCompiler.php b/src/search/PhutilSearchQueryCompiler.php
deleted file mode 100644
--- a/src/search/PhutilSearchQueryCompiler.php
+++ /dev/null
@@ -1,374 +0,0 @@
-<?php
-
-final class PhutilSearchQueryCompiler
- extends Phobject {
-
- private $operators = '+ -><()~*:""&|';
- private $query;
- private $stemmer;
- private $enableFunctions = false;
-
- const OPERATOR_NOT = 'not';
- const OPERATOR_AND = 'and';
- const OPERATOR_SUBSTRING = 'sub';
- const OPERATOR_EXACT = 'exact';
-
- public function setOperators($operators) {
- $this->operators = $operators;
- return $this;
- }
-
- public function getOperators() {
- return $this->operators;
- }
-
- public function setStemmer(PhutilSearchStemmer $stemmer) {
- $this->stemmer = $stemmer;
- return $this;
- }
-
- public function getStemmer() {
- return $this->stemmer;
- }
-
- public function setEnableFunctions($enable_functions) {
- $this->enableFunctions = $enable_functions;
- return $this;
- }
-
- public function getEnableFunctions() {
- return $this->enableFunctions;
- }
-
- public function compileQuery(array $tokens) {
- assert_instances_of($tokens, 'PhutilSearchQueryToken');
-
- $result = array();
- foreach ($tokens as $token) {
- $result[] = $this->renderToken($token);
- }
-
- return $this->compileRenderedTokens($result);
- }
-
- public function compileLiteralQuery(array $tokens) {
- assert_instances_of($tokens, 'PhutilSearchQueryToken');
-
- $result = array();
- foreach ($tokens as $token) {
- if (!$token->isQuoted()) {
- continue;
- }
- $result[] = $this->renderToken($token);
- }
-
- return $this->compileRenderedTokens($result);
- }
-
- public function compileStemmedQuery(array $tokens) {
- assert_instances_of($tokens, 'PhutilSearchQueryToken');
-
- $result = array();
- foreach ($tokens as $token) {
- if ($token->isQuoted()) {
- continue;
- }
- $result[] = $this->renderToken($token, $this->getStemmer());
- }
-
- return $this->compileRenderedTokens($result);
- }
-
- private function compileRenderedTokens(array $list) {
- if (!$list) {
- return null;
- }
-
- $list = array_unique($list);
- return implode(' ', $list);
- }
-
- public function newTokens($query) {
- $results = $this->tokenizeQuery($query);
-
- $tokens = array();
- foreach ($results as $result) {
- $tokens[] = PhutilSearchQueryToken::newFromDictionary($result);
- }
-
- return $tokens;
- }
-
- private function tokenizeQuery($query) {
- $maximum_bytes = 1024;
-
- $query_bytes = strlen($query);
- if ($query_bytes > $maximum_bytes) {
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Query is too long (%s bytes, maximum is %s bytes).',
- new PhutilNumber($query_bytes),
- new PhutilNumber($maximum_bytes)));
- }
-
- $query = phutil_utf8v($query);
- $length = count($query);
-
- $enable_functions = $this->getEnableFunctions();
-
- $mode = 'scan';
- $current_operator = array();
- $current_token = array();
- $current_function = null;
- $is_quoted = false;
- $tokens = array();
-
- if ($enable_functions) {
- $operator_characters = '[~=+-]';
- } else {
- $operator_characters = '[+-]';
- }
-
- for ($ii = 0; $ii < $length; $ii++) {
- $character = $query[$ii];
-
- if ($mode == 'scan') {
- if (preg_match('/^\s\z/u', $character)) {
- continue;
- }
-
- $mode = 'function';
- }
-
- if ($mode == 'function') {
- $mode = 'operator';
-
- if ($enable_functions) {
- $found = false;
- for ($jj = $ii; $jj < $length; $jj++) {
- if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) {
- continue;
- }
- if ($query[$jj] == ':') {
- $found = $jj;
- }
- break;
- }
-
- if ($found !== false) {
- $function = array_slice($query, $ii, ($jj - $ii));
- $current_function = implode('', $function);
-
- if (!strlen($current_function)) {
- $current_function = null;
- }
-
- $ii = $jj;
- continue;
- }
- }
- }
-
- if ($mode == 'operator') {
- if (preg_match('/^\s\z/u', $character)) {
- continue;
- }
-
- if (preg_match('/^'.$operator_characters.'\z/', $character)) {
- $current_operator[] = $character;
- continue;
- }
-
- $mode = 'quote';
- }
-
- if ($mode == 'quote') {
- if (preg_match('/^"\z/', $character)) {
- $is_quoted = true;
- $mode = 'token';
- continue;
- }
-
- $mode = 'token';
- }
-
- if ($mode == 'token') {
- $capture = false;
- $was_quoted = $is_quoted;
- if ($is_quoted) {
- if (preg_match('/^"\z/', $character)) {
- $capture = true;
- $mode = 'scan';
- $is_quoted = false;
- }
- } else {
- if (preg_match('/^\s\z/u', $character)) {
- $capture = true;
- $mode = 'scan';
- }
-
- if (preg_match('/^"\z/', $character)) {
- $capture = true;
- $mode = 'token';
- $is_quoted = true;
- }
- }
-
- if ($capture) {
- $token = array(
- 'operator' => $current_operator,
- 'quoted' => $was_quoted,
- 'value' => $current_token,
- );
-
- if ($enable_functions) {
- $token['function'] = $current_function;
- }
-
- $tokens[] = $token;
-
- $current_operator = array();
- $current_token = array();
- $current_function = null;
- continue;
- } else {
- $current_token[] = $character;
- }
- }
- }
-
- if ($is_quoted) {
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Query contains unmatched double quotes.'));
- }
-
- if ($mode == 'operator') {
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Query contains operator ("%s") with no search term.',
- implode('', $current_operator)));
- }
-
- $token = array(
- 'operator' => $current_operator,
- 'quoted' => false,
- 'value' => $current_token,
- );
-
- if ($enable_functions) {
- $token['function'] = $current_function;
- }
-
- $tokens[] = $token;
-
- $results = array();
- foreach ($tokens as $token) {
- $value = implode('', $token['value']);
- $operator_string = implode('', $token['operator']);
-
- if (!strlen($value)) {
- continue;
- }
-
- $is_quoted = $token['quoted'];
-
- switch ($operator_string) {
- case '-':
- $operator = self::OPERATOR_NOT;
- break;
- case '~':
- $operator = self::OPERATOR_SUBSTRING;
- break;
- case '=':
- $operator = self::OPERATOR_EXACT;
- break;
- case '+':
- $operator = self::OPERATOR_AND;
- break;
- case '':
- // See T12995. If this query term contains Chinese, Japanese or
- // Korean characters, treat the term as a substring term by default.
- // These languages do not separate words with spaces, so the term
- // search mode is normally useless.
- if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) {
- $operator = self::OPERATOR_SUBSTRING;
- } else {
- $operator = self::OPERATOR_AND;
- }
- break;
- default:
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Query has an invalid sequence of operators ("%s").',
- $operator_string));
- }
-
- $result = array(
- 'operator' => $operator,
- 'quoted' => $is_quoted,
- 'value' => $value,
- );
-
- if ($enable_functions) {
- $result['function'] = $token['function'];
- }
-
- $results[] = $result;
- }
-
- return $results;
- }
-
- private function renderToken(
- PhutilSearchQueryToken $token,
- PhutilSearchStemmer $stemmer = null) {
- $value = $token->getValue();
-
- if ($stemmer) {
- $value = $stemmer->stemToken($value);
- }
-
- $value = $this->quoteToken($value);
- $operator = $token->getOperator();
- $prefix = $this->getOperatorPrefix($operator);
-
- $value = $prefix.$value;
-
- return $value;
- }
-
- private function getOperatorPrefix($operator) {
- $operators = $this->operators;
-
- switch ($operator) {
- case self::OPERATOR_AND:
- $prefix = $operators[0];
- break;
- case self::OPERATOR_NOT:
- $prefix = $operators[2];
- break;
- default:
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Unsupported operator prefix "%s".',
- $operator));
- }
-
- if ($prefix == ' ') {
- $prefix = null;
- }
-
- return $prefix;
- }
-
- private function quoteToken($value) {
- $operators = $this->operators;
-
- $open_quote = $this->operators[10];
- $close_quote = $this->operators[11];
-
- return $open_quote.$value.$close_quote;
- }
-
-}
diff --git a/src/search/PhutilSearchQueryCompilerSyntaxException.php b/src/search/PhutilSearchQueryCompilerSyntaxException.php
deleted file mode 100644
--- a/src/search/PhutilSearchQueryCompilerSyntaxException.php
+++ /dev/null
@@ -1,4 +0,0 @@
-<?php
-
-final class PhutilSearchQueryCompilerSyntaxException
- extends Exception {}
diff --git a/src/search/PhutilSearchQueryToken.php b/src/search/PhutilSearchQueryToken.php
deleted file mode 100644
--- a/src/search/PhutilSearchQueryToken.php
+++ /dev/null
@@ -1,37 +0,0 @@
-<?php
-
-final class PhutilSearchQueryToken extends Phobject {
-
- private $isQuoted;
- private $value;
- private $operator;
- private $function;
-
- public static function newFromDictionary(array $dictionary) {
- $token = new self();
-
- $token->isQuoted = $dictionary['quoted'];
- $token->operator = $dictionary['operator'];
- $token->value = $dictionary['value'];
- $token->function = idx($dictionary, 'function');
-
- return $token;
- }
-
- public function isQuoted() {
- return $this->isQuoted;
- }
-
- public function getValue() {
- return $this->value;
- }
-
- public function getOperator() {
- return $this->operator;
- }
-
- public function getFunction() {
- return $this->function;
- }
-
-}
diff --git a/src/search/PhutilSearchStemmer.php b/src/search/PhutilSearchStemmer.php
deleted file mode 100644
--- a/src/search/PhutilSearchStemmer.php
+++ /dev/null
@@ -1,74 +0,0 @@
-<?php
-
-final class PhutilSearchStemmer
- extends Phobject {
-
- public function stemToken($token) {
- $token = $this->normalizeToken($token);
- return $this->applyStemmer($token);
- }
-
- public function stemCorpus($corpus) {
- $corpus = $this->normalizeCorpus($corpus);
- $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus);
-
- $words = array();
- foreach ($tokens as $key => $token) {
- $token = trim($token, '._');
-
- if (strlen($token) < 3) {
- continue;
- }
-
- $words[$token] = $token;
- }
-
- $stems = array();
- foreach ($words as $word) {
- $stems[] = $this->applyStemmer($word);
- }
-
- return implode(' ', $stems);
- }
-
- private function normalizeToken($token) {
- return phutil_utf8_strtolower($token);
- }
-
- private function normalizeCorpus($corpus) {
- return phutil_utf8_strtolower($corpus);
- }
-
- /**
- * @phutil-external-symbol class Porter
- */
- private function applyStemmer($normalized_token) {
- // If the token has internal punctuation, handle it literally. This
- // deals with things like domain names, Conduit API methods, and other
- // sorts of informal tokens.
- if (preg_match('/[._]/', $normalized_token)) {
- return $normalized_token;
- }
-
- static $loaded;
-
- if ($loaded === null) {
- $root = dirname(phutil_get_library_root('phutil'));
- require_once $root.'/externals/porter-stemmer/src/Porter.php';
- $loaded = true;
- }
-
-
- $stem = Porter::stem($normalized_token);
-
- // If the stem is too short, it won't be a candidate for indexing. These
- // tokens are also likely to be acronyms (like "DNS") rather than real
- // English words.
- if (strlen($stem) < 3) {
- return $normalized_token;
- }
-
- return $stem;
- }
-
-}
diff --git a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
deleted file mode 100644
--- a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
+++ /dev/null
@@ -1,220 +0,0 @@
-<?php
-
-final class PhutilSearchQueryCompilerTestCase
- extends PhutilTestCase {
-
- public function testCompileQueries() {
- $tests = array(
- '' => null,
- 'cat dog' => '+"cat" +"dog"',
- 'cat -dog' => '+"cat" -"dog"',
- 'cat-dog' => '+"cat-dog"',
-
- // If there are spaces after an operator, the operator applies to the
- // next search term.
- 'cat - dog' => '+"cat" -"dog"',
-
- // Double quotes serve as delimiters even if there is no whitespace
- // between terms.
- '"cat"dog' => '+"cat" +"dog"',
-
- // This query is too long.
- str_repeat('x', 2048) => false,
-
- // Multiple operators are not permitted.
- '++cat' => false,
- '+-cat' => false,
- '--cat' => false,
-
- // Stray operators are not permitted.
- '+' => false,
- 'cat +' => false,
-
- // Double quotes must be paired.
- '"' => false,
- 'cat "' => false,
- '"cat' => false,
- 'A"' => false,
- 'A"B"' => '+"A" +"B"',
- );
-
- $this->assertCompileQueries($tests);
-
- // Test that we compile queries correctly if the operators have been
- // swapped to use "AND" by default.
- $operator_tests = array(
- 'cat dog' => '"cat" "dog"',
- 'cat -dog' => '"cat" -"dog"',
- );
- $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
-
-
- // Test that we compile queries correctly if the quote operators have
- // been swapped to differ.
- $quote_tests = array(
- 'cat dog' => '+[cat] +[dog]',
- 'cat -dog' => '+[cat] -[dog]',
- );
- $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
-
- }
-
- public function testCompileQueriesWithStemming() {
- $stemming_tests = array(
- 'cat dog' => array(
- null,
- '+"cat" +"dog"',
- ),
- 'cats dogs' => array(
- null,
- '+"cat" +"dog"',
- ),
- 'cats "dogs"' => array(
- '+"dogs"',
- '+"cat"',
- ),
- '"blessed blade" of the windseeker' => array(
- '+"blessed blade"',
- '+"of" +"the" +"windseek"',
- ),
- 'mailing users for mentions on tasks' => array(
- null,
- '+"mail" +"user" +"for" +"mention" +"on" +"task"',
- ),
- );
-
- $stemmer = new PhutilSearchStemmer();
- $this->assertCompileQueries($stemming_tests, null, $stemmer);
- }
-
- public function testCompileQueriesWithFunctions() {
- $op_and = PhutilSearchQueryCompiler::OPERATOR_AND;
- $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
- $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
-
- $mao = "\xE7\x8C\xAB";
-
- $function_tests = array(
- 'cat' => array(
- array(null, $op_and, 'cat'),
- ),
- ':cat' => array(
- array(null, $op_and, 'cat'),
- ),
- 'title:cat' => array(
- array('title', $op_and, 'cat'),
- ),
- 'title:cat:dog' => array(
- array('title', $op_and, 'cat:dog'),
- ),
- 'title:~cat' => array(
- array('title', $op_sub, 'cat'),
- ),
- 'cat title:="Meow Meow"' => array(
- array(null, $op_and, 'cat'),
- array('title', $op_exact, 'Meow Meow'),
- ),
- 'title:cat title:dog' => array(
- array('title', $op_and, 'cat'),
- array('title', $op_and, 'dog'),
- ),
- '~"core and seven years ag"' => array(
- array(null, $op_sub, 'core and seven years ag'),
- ),
- $mao => array(
- array(null, $op_sub, $mao),
- ),
- '+'.$mao => array(
- array(null, $op_and, $mao),
- ),
- '~'.$mao => array(
- array(null, $op_sub, $mao),
- ),
- '"'.$mao.'"' => array(
- array(null, $op_and, $mao),
- ),
- );
-
- $this->assertCompileFunctionQueries($function_tests);
- }
-
- private function assertCompileQueries(
- array $tests,
- $operators = null,
- PhutilSearchStemmer $stemmer = null) {
- foreach ($tests as $input => $expect) {
- $caught = null;
-
- $query = null;
- $literal_query = null;
- $stemmed_query = null;
-
- try {
- $compiler = new PhutilSearchQueryCompiler();
-
- if ($operators !== null) {
- $compiler->setOperators($operators);
- }
-
- if ($stemmer !== null) {
- $compiler->setStemmer($stemmer);
- }
-
- $tokens = $compiler->newTokens($input);
-
- if ($stemmer) {
- $literal_query = $compiler->compileLiteralQuery($tokens);
- $stemmed_query = $compiler->compileStemmedQuery($tokens);
- } else {
- $query = $compiler->compileQuery($tokens);
- }
- } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
- $caught = $ex;
- }
-
- if ($caught !== null) {
- $query = false;
- $literal_query = false;
- $stemmed_query = false;
- }
-
- if (!$stemmer) {
- $this->assertEqual(
- $expect,
- $query,
- pht('Compilation of query: %s', $input));
- } else {
- $this->assertEqual(
- $expect,
- ($literal_query === false)
- ? false
- : array($literal_query, $stemmed_query),
- pht('Stemmed compilation of query: %s', $input));
- }
- }
- }
-
- private function assertCompileFunctionQueries(array $tests) {
- foreach ($tests as $input => $expect) {
- $compiler = id(new PhutilSearchQueryCompiler())
- ->setEnableFunctions(true);
-
- $tokens = $compiler->newTokens($input);
-
- $result = array();
- foreach ($tokens as $token) {
- $result[] = array(
- $token->getFunction(),
- $token->getOperator(),
- $token->getValue(),
- );
- }
-
- $this->assertEqual(
- $expect,
- $result,
- pht('Function compilation of query: %s', $input));
- }
- }
-
-}
diff --git a/src/search/__tests__/PhutilSearchStemmerTestCase.php b/src/search/__tests__/PhutilSearchStemmerTestCase.php
deleted file mode 100644
--- a/src/search/__tests__/PhutilSearchStemmerTestCase.php
+++ /dev/null
@@ -1,85 +0,0 @@
-<?php
-
-final class PhutilSearchStemmerTestCase
- extends PhutilTestCase {
-
- public function testStemTokens() {
- $tests = array(
- // Various real-world cases collected from users before we implemented
- // stemming.
- 'tokens' => 'token',
- 'panels' => 'panel',
-
- 'renames' => 'renam',
- 'rename' => 'renam',
-
- 'components' => 'compon',
- 'component' => 'compon',
-
- 'implementation' => 'implement',
- 'implements' => 'implement',
- 'implementing' => 'implement',
- 'implementer' => 'implement',
-
- 'deleting' => 'delet',
- 'deletion' => 'delet',
- 'delete' => 'delet',
-
- 'erratically' => 'errat',
- 'erratic' => 'errat',
-
- // Stems should be normalized.
- 'DOG' => 'dog',
-
- // If stemming would bring a token under 3 characters, it should not
- // be stemmed.
- 'dns' => 'dns',
- 'nis' => 'nis',
-
- // Complex tokens with internal punctuation should be left untouched;
- // these are usually things like domain names, API calls, informal tags,
- // etc.
- 'apples' => 'appl',
- 'bananas' => 'banana',
- 'apples_bananas' => 'apples_bananas',
- 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas',
- );
-
- $stemmer = new PhutilSearchStemmer();
- foreach ($tests as $input => $expect) {
- $stem = $stemmer->stemToken($input);
- $this->assertEqual(
- $expect,
- $stem,
- pht('Token stem of "%s".', $input));
- }
- }
-
- public function testStemDocuments() {
- $tests = array(
- 'The wild boar meandered erratically.' =>
- 'the wild boar meander errat',
- 'Fool me onc, shame on you. Fool me twice, shame on me.' =>
- 'fool onc shame you twice',
- 'Fireball is a seventh-level spell which deals 2d16 points of damage '.
- 'in a 1-meter radius around a target.' =>
- 'firebal seventh level spell which deal 2d16 point damag meter '.
- 'radiu around target',
- 'apples-bananas' => 'appl banana',
- 'apples_bananas' => 'apples_bananas',
- 'apples.bananas' => 'apples.bananas',
- 'oddly-proportioned' => 'oddli proport',
- );
-
- $stemmer = new PhutilSearchStemmer();
- foreach ($tests as $input => $expect) {
- $stem = $stemmer->stemCorpus($input);
- $this->assertEqual(
- $expect,
- $stem,
- pht('Corpus stem of: %s', $input));
- }
- }
-
-
-}

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 25, 12:11 PM (21 h, 14 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6785857
Default Alt Text
D20940.id49894.diff (37 KB)

Event Timeline