Changeset View
Changeset View
Standalone View
Standalone View
src/search/PhutilSearchQueryCompiler.php
| <?php | <?php | ||||
| final class PhutilSearchQueryCompiler | final class PhutilSearchQueryCompiler | ||||
| extends Phobject { | extends Phobject { | ||||
| private $operators = '+ -><()~*:""&|'; | private $operators = '+ -><()~*:""&|'; | ||||
| private $query; | private $query; | ||||
| private $stemmer; | private $stemmer; | ||||
| private $enableFunctions = false; | |||||
| const OPERATOR_NOT = 'not'; | const OPERATOR_NOT = 'not'; | ||||
| const OPERATOR_AND = 'and'; | const OPERATOR_AND = 'and'; | ||||
| const OPERATOR_SUBSTRING = 'sub'; | |||||
| const OPERATOR_EXACT = 'exact'; | |||||
| public function setOperators($operators) { | public function setOperators($operators) { | ||||
| $this->operators = $operators; | $this->operators = $operators; | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| public function getOperators() { | public function getOperators() { | ||||
| return $this->operators; | return $this->operators; | ||||
| } | } | ||||
| public function setStemmer(PhutilSearchStemmer $stemmer) { | public function setStemmer(PhutilSearchStemmer $stemmer) { | ||||
| $this->stemmer = $stemmer; | $this->stemmer = $stemmer; | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| public function getStemmer() { | public function getStemmer() { | ||||
| return $this->stemmer; | return $this->stemmer; | ||||
| } | } | ||||
| public function setEnableFunctions($enable_functions) { | |||||
| $this->enableFunctions = $enable_functions; | |||||
| return $this; | |||||
| } | |||||
| public function getEnableFunctions() { | |||||
| return $this->enableFunctions; | |||||
| } | |||||
| public function compileQuery(array $tokens) { | public function compileQuery(array $tokens) { | ||||
| assert_instances_of($tokens, 'PhutilSearchQueryToken'); | assert_instances_of($tokens, 'PhutilSearchQueryToken'); | ||||
| $result = array(); | $result = array(); | ||||
| foreach ($tokens as $token) { | foreach ($tokens as $token) { | ||||
| $result[] = $this->renderToken($token); | $result[] = $this->renderToken($token); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | if ($query_bytes > $maximum_bytes) { | ||||
| 'Query is too long (%s bytes, maximum is %s bytes).', | 'Query is too long (%s bytes, maximum is %s bytes).', | ||||
| new PhutilNumber($query_bytes), | new PhutilNumber($query_bytes), | ||||
| new PhutilNumber($maximum_bytes))); | new PhutilNumber($maximum_bytes))); | ||||
| } | } | ||||
| $query = phutil_utf8v($query); | $query = phutil_utf8v($query); | ||||
| $length = count($query); | $length = count($query); | ||||
| $enable_functions = $this->getEnableFunctions(); | |||||
| $mode = 'scan'; | $mode = 'scan'; | ||||
| $current_operator = array(); | $current_operator = array(); | ||||
| $current_token = array(); | $current_token = array(); | ||||
| $current_function = null; | |||||
| $is_quoted = false; | $is_quoted = false; | ||||
| $tokens = array(); | $tokens = array(); | ||||
| if ($enable_functions) { | |||||
| $operator_characters = '[~=+-]'; | |||||
| } else { | |||||
| $operator_characters = '[+-]'; | |||||
| } | |||||
| for ($ii = 0; $ii < $length; $ii++) { | for ($ii = 0; $ii < $length; $ii++) { | ||||
| $character = $query[$ii]; | $character = $query[$ii]; | ||||
| if ($mode == 'scan') { | if ($mode == 'scan') { | ||||
| if (preg_match('/^\s\z/u', $character)) { | if (preg_match('/^\s\z/u', $character)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| $mode = 'function'; | |||||
| } | |||||
| if ($mode == 'function') { | |||||
| $mode = 'operator'; | $mode = 'operator'; | ||||
| if ($enable_functions) { | |||||
| $found = false; | |||||
| for ($jj = $ii; $jj < $length; $jj++) { | |||||
| if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) { | |||||
| continue; | |||||
| } | |||||
| if ($query[$jj] == ':') { | |||||
| $found = $jj; | |||||
| } | |||||
| break; | |||||
| } | |||||
| if ($found !== false) { | |||||
| $function = array_slice($query, $ii, ($jj - $ii)); | |||||
| $current_function = implode('', $function); | |||||
| if (!strlen($current_function)) { | |||||
| $current_function = null; | |||||
| } | |||||
| $ii = $jj; | |||||
| continue; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| if ($mode == 'operator') { | if ($mode == 'operator') { | ||||
| if (preg_match('/^\s\z/u', $character)) { | if (preg_match('/^\s\z/u', $character)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (preg_match('/^[+-]\z/', $character)) { | if (preg_match('/^'.$operator_characters.'\z/', $character)) { | ||||
| $current_operator[] = $character; | $current_operator[] = $character; | ||||
| continue; | continue; | ||||
| } | } | ||||
| $mode = 'quote'; | $mode = 'quote'; | ||||
| } | } | ||||
| if ($mode == 'quote') { | if ($mode == 'quote') { | ||||
| Show All 24 Lines | for ($ii = 0; $ii < $length; $ii++) { | ||||
| if (preg_match('/^"\z/', $character)) { | if (preg_match('/^"\z/', $character)) { | ||||
| $capture = true; | $capture = true; | ||||
| $mode = 'token'; | $mode = 'token'; | ||||
| $is_quoted = true; | $is_quoted = true; | ||||
| } | } | ||||
| } | } | ||||
| if ($capture) { | if ($capture) { | ||||
| $tokens[] = array( | $token = array( | ||||
| 'operator' => $current_operator, | 'operator' => $current_operator, | ||||
| 'quoted' => $was_quoted, | 'quoted' => $was_quoted, | ||||
| 'value' => $current_token, | 'value' => $current_token, | ||||
| ); | ); | ||||
| if ($enable_functions) { | |||||
| $token['function'] = $current_function; | |||||
| } | |||||
| $tokens[] = $token; | |||||
| $current_operator = array(); | $current_operator = array(); | ||||
| $current_token = array(); | $current_token = array(); | ||||
| $current_function = null; | |||||
| continue; | continue; | ||||
| } else { | } else { | ||||
| $current_token[] = $character; | $current_token[] = $character; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| if ($is_quoted) { | if ($is_quoted) { | ||||
| throw new PhutilSearchQueryCompilerSyntaxException( | throw new PhutilSearchQueryCompilerSyntaxException( | ||||
| pht( | pht( | ||||
| 'Query contains unmatched double quotes.')); | 'Query contains unmatched double quotes.')); | ||||
| } | } | ||||
| if ($mode == 'operator') { | if ($mode == 'operator') { | ||||
| throw new PhutilSearchQueryCompilerSyntaxException( | throw new PhutilSearchQueryCompilerSyntaxException( | ||||
| pht( | pht( | ||||
| 'Query contains operator ("%s") with no search term.', | 'Query contains operator ("%s") with no search term.', | ||||
| implode('', $current_operator))); | implode('', $current_operator))); | ||||
| } | } | ||||
| $tokens[] = array( | $token = array( | ||||
| 'operator' => $current_operator, | 'operator' => $current_operator, | ||||
| 'quoted' => false, | 'quoted' => false, | ||||
| 'value' => $current_token, | 'value' => $current_token, | ||||
| ); | ); | ||||
| if ($enable_functions) { | |||||
| $token['function'] = $current_function; | |||||
| } | |||||
| $tokens[] = $token; | |||||
| $results = array(); | $results = array(); | ||||
| foreach ($tokens as $token) { | foreach ($tokens as $token) { | ||||
| $value = implode('', $token['value']); | $value = implode('', $token['value']); | ||||
| $operator_string = implode('', $token['operator']); | $operator_string = implode('', $token['operator']); | ||||
| if (!strlen($value)) { | if (!strlen($value)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| switch ($operator_string) { | switch ($operator_string) { | ||||
| case '-': | case '-': | ||||
| $operator = self::OPERATOR_NOT; | $operator = self::OPERATOR_NOT; | ||||
| break; | break; | ||||
| case '~': | |||||
| $operator = self::OPERATOR_SUBSTRING; | |||||
| break; | |||||
| case '=': | |||||
| $operator = self::OPERATOR_EXACT; | |||||
| break; | |||||
| case '': | case '': | ||||
| case '+': | case '+': | ||||
| $operator = self::OPERATOR_AND; | $operator = self::OPERATOR_AND; | ||||
| break; | break; | ||||
| default: | default: | ||||
| throw new PhutilSearchQueryCompilerSyntaxException( | throw new PhutilSearchQueryCompilerSyntaxException( | ||||
| pht( | pht( | ||||
| 'Query has an invalid sequence of operators ("%s").', | 'Query has an invalid sequence of operators ("%s").', | ||||
| $operator_string)); | $operator_string)); | ||||
| } | } | ||||
| $results[] = array( | $result = array( | ||||
| 'operator' => $operator, | 'operator' => $operator, | ||||
| 'quoted' => $token['quoted'], | 'quoted' => $token['quoted'], | ||||
| 'value' => $value, | 'value' => $value, | ||||
| ); | ); | ||||
| if ($enable_functions) { | |||||
| $result['function'] = $token['function']; | |||||
| } | |||||
| $results[] = $result; | |||||
| } | } | ||||
| return $results; | return $results; | ||||
| } | } | ||||
| private function renderToken( | private function renderToken( | ||||
| PhutilSearchQueryToken $token, | PhutilSearchQueryToken $token, | ||||
| PhutilSearchStemmer $stemmer = null) { | PhutilSearchStemmer $stemmer = null) { | ||||
| ▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines | |||||