diff --git a/src/search/PhutilSearchQueryCompiler.php b/src/search/PhutilSearchQueryCompiler.php index 5ddb0db..930db22 100644 --- a/src/search/PhutilSearchQueryCompiler.php +++ b/src/search/PhutilSearchQueryCompiler.php @@ -1,285 +1,362 @@ <()~*:""&|'; private $query; private $stemmer; + private $enableFunctions = false; const OPERATOR_NOT = 'not'; const OPERATOR_AND = 'and'; + const OPERATOR_SUBSTRING = 'sub'; + const OPERATOR_EXACT = 'exact'; public function setOperators($operators) { $this->operators = $operators; return $this; } public function getOperators() { return $this->operators; } public function setStemmer(PhutilSearchStemmer $stemmer) { $this->stemmer = $stemmer; return $this; } public function getStemmer() { return $this->stemmer; } + public function setEnableFunctions($enable_functions) { + $this->enableFunctions = $enable_functions; + return $this; + } + + public function getEnableFunctions() { + return $this->enableFunctions; + } + public function compileQuery(array $tokens) { assert_instances_of($tokens, 'PhutilSearchQueryToken'); $result = array(); foreach ($tokens as $token) { $result[] = $this->renderToken($token); } return $this->compileRenderedTokens($result); } public function compileLiteralQuery(array $tokens) { assert_instances_of($tokens, 'PhutilSearchQueryToken'); $result = array(); foreach ($tokens as $token) { if (!$token->isQuoted()) { continue; } $result[] = $this->renderToken($token); } return $this->compileRenderedTokens($result); } public function compileStemmedQuery(array $tokens) { assert_instances_of($tokens, 'PhutilSearchQueryToken'); $result = array(); foreach ($tokens as $token) { if ($token->isQuoted()) { continue; } $result[] = $this->renderToken($token, $this->getStemmer()); } return $this->compileRenderedTokens($result); } private function compileRenderedTokens(array $list) { if (!$list) { return null; } $list = array_unique($list); return implode(' ', $list); } public function newTokens($query) { $results = $this->tokenizeQuery($query); $tokens = array(); foreach ($results as $result) { $tokens[] = PhutilSearchQueryToken::newFromDictionary($result); } return $tokens; } private function tokenizeQuery($query) { $maximum_bytes = 1024; $query_bytes = strlen($query); if ($query_bytes > $maximum_bytes) { throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Query is too long (%s bytes, maximum is %s bytes).', new PhutilNumber($query_bytes), new PhutilNumber($maximum_bytes))); } $query = phutil_utf8v($query); $length = count($query); + $enable_functions = $this->getEnableFunctions(); + $mode = 'scan'; $current_operator = array(); $current_token = array(); + $current_function = null; $is_quoted = false; $tokens = array(); + + if ($enable_functions) { + $operator_characters = '[~=+-]'; + } else { + $operator_characters = '[+-]'; + } + for ($ii = 0; $ii < $length; $ii++) { $character = $query[$ii]; if ($mode == 'scan') { if (preg_match('/^\s\z/u', $character)) { continue; } + $mode = 'function'; + } + + if ($mode == 'function') { $mode = 'operator'; + + if ($enable_functions) { + $found = false; + for ($jj = $ii; $jj < $length; $jj++) { + if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) { + continue; + } + if ($query[$jj] == ':') { + $found = $jj; + } + break; + } + + if ($found !== false) { + $function = array_slice($query, $ii, ($jj - $ii)); + $current_function = implode('', $function); + + if (!strlen($current_function)) { + $current_function = null; + } + + $ii = $jj; + continue; + } + } } if ($mode == 'operator') { if (preg_match('/^\s\z/u', $character)) { continue; } - if (preg_match('/^[+-]\z/', $character)) { + if (preg_match('/^'.$operator_characters.'\z/', $character)) { $current_operator[] = $character; continue; } $mode = 'quote'; } if ($mode == 'quote') { if (preg_match('/^"\z/', $character)) { $is_quoted = true; $mode = 'token'; continue; } $mode = 'token'; } if ($mode == 'token') { $capture = false; $was_quoted = $is_quoted; if ($is_quoted) { if (preg_match('/^"\z/', $character)) { $capture = true; $mode = 'scan'; $is_quoted = false; } } else { if (preg_match('/^\s\z/u', $character)) { $capture = true; $mode = 'scan'; } if (preg_match('/^"\z/', $character)) { $capture = true; $mode = 'token'; $is_quoted = true; } } if ($capture) { - $tokens[] = array( + $token = array( 'operator' => $current_operator, 'quoted' => $was_quoted, 'value' => $current_token, ); + + if ($enable_functions) { + $token['function'] = $current_function; + } + + $tokens[] = $token; + $current_operator = array(); $current_token = array(); + $current_function = null; continue; } else { $current_token[] = $character; } } } if ($is_quoted) { throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Query contains unmatched double quotes.')); } if ($mode == 'operator') { throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Query contains operator ("%s") with no search term.', implode('', $current_operator))); } - $tokens[] = array( + $token = array( 'operator' => $current_operator, 'quoted' => false, 'value' => $current_token, ); + if ($enable_functions) { + $token['function'] = $current_function; + } + + $tokens[] = $token; + $results = array(); foreach ($tokens as $token) { $value = implode('', $token['value']); $operator_string = implode('', $token['operator']); if (!strlen($value)) { continue; } switch ($operator_string) { case '-': $operator = self::OPERATOR_NOT; break; + case '~': + $operator = self::OPERATOR_SUBSTRING; + break; + case '=': + $operator = self::OPERATOR_EXACT; + break; case '': case '+': $operator = self::OPERATOR_AND; break; default: throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Query has an invalid sequence of operators ("%s").', $operator_string)); } - $results[] = array( + $result = array( 'operator' => $operator, 'quoted' => $token['quoted'], 'value' => $value, ); + + if ($enable_functions) { + $result['function'] = $token['function']; + } + + $results[] = $result; } return $results; } private function renderToken( PhutilSearchQueryToken $token, PhutilSearchStemmer $stemmer = null) { $value = $token->getValue(); if ($stemmer) { $value = $stemmer->stemToken($value); } $value = $this->quoteToken($value); $operator = $token->getOperator(); $prefix = $this->getOperatorPrefix($operator); $value = $prefix.$value; return $value; } private function getOperatorPrefix($operator) { $operators = $this->operators; switch ($operator) { case self::OPERATOR_AND: $prefix = $operators[0]; break; case self::OPERATOR_NOT: $prefix = $operators[2]; break; default: throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Unsupported operator prefix "%s".', $operator)); } if ($prefix == ' ') { $prefix = null; } return $prefix; } private function quoteToken($value) { $operators = $this->operators; $open_quote = $this->operators[10]; $close_quote = $this->operators[11]; return $open_quote.$value.$close_quote; } } diff --git a/src/search/PhutilSearchQueryToken.php b/src/search/PhutilSearchQueryToken.php index 9eb7430..b634271 100644 --- a/src/search/PhutilSearchQueryToken.php +++ b/src/search/PhutilSearchQueryToken.php @@ -1,31 +1,37 @@ isQuoted = $dictionary['quoted']; $token->operator = $dictionary['operator']; $token->value = $dictionary['value']; + $token->function = idx($dictionary, 'function'); return $token; } public function isQuoted() { return $this->isQuoted; } public function getValue() { return $this->value; } public function getOperator() { return $this->operator; } + public function getFunction() { + return $this->function; + } + } diff --git a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php index 712427c..dc6c9d1 100644 --- a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php +++ b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php @@ -1,146 +1,206 @@ null, 'cat dog' => '+"cat" +"dog"', 'cat -dog' => '+"cat" -"dog"', 'cat-dog' => '+"cat-dog"', // If there are spaces after an operator, the operator applies to the // next search term. 'cat - dog' => '+"cat" -"dog"', // Double quotes serve as delimiters even if there is no whitespace // between terms. '"cat"dog' => '+"cat" +"dog"', // This query is too long. str_repeat('x', 2048) => false, // Multiple operators are not permitted. '++cat' => false, '+-cat' => false, '--cat' => false, // Stray operators are not permitted. '+' => false, 'cat +' => false, // Double quotes must be paired. '"' => false, 'cat "' => false, '"cat' => false, 'A"' => false, 'A"B"' => '+"A" +"B"', ); $this->assertCompileQueries($tests); // Test that we compile queries correctly if the operators have been // swapped to use "AND" by default. $operator_tests = array( 'cat dog' => '"cat" "dog"', 'cat -dog' => '"cat" -"dog"', ); $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\''); // Test that we compile queries correctly if the quote operators have // been swapped to differ. $quote_tests = array( 'cat dog' => '+[cat] +[dog]', 'cat -dog' => '+[cat] -[dog]', ); $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|'); } public function testCompileQueriesWithStemming() { $stemming_tests = array( 'cat dog' => array( null, '+"cat" +"dog"', ), 'cats dogs' => array( null, '+"cat" +"dog"', ), 'cats "dogs"' => array( '+"dogs"', '+"cat"', ), '"blessed blade" of the windseeker' => array( '+"blessed blade"', '+"of" +"the" +"windseek"', ), 'mailing users for mentions on tasks' => array( null, '+"mail" +"user" +"for" +"mention" +"on" +"task"', ), ); $stemmer = new PhutilSearchStemmer(); $this->assertCompileQueries($stemming_tests, null, $stemmer); } + public function testCompileQueriesWithFunctions() { + $op_and = PhutilSearchQueryCompiler::OPERATOR_AND; + $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; + $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT; + + $function_tests = array( + 'cat' => array( + array(null, $op_and, 'cat'), + ), + ':cat' => array( + array(null, $op_and, 'cat'), + ), + 'title:cat' => array( + array('title', $op_and, 'cat'), + ), + 'title:cat:dog' => array( + array('title', $op_and, 'cat:dog'), + ), + 'title:~cat' => array( + array('title', $op_sub, 'cat'), + ), + 'cat title:="Meow Meow"' => array( + array(null, $op_and, 'cat'), + array('title', $op_exact, 'Meow Meow'), + ), + 'title:cat title:dog' => array( + array('title', $op_and, 'cat'), + array('title', $op_and, 'dog'), + ), + '~"core and seven years ag"' => array( + array(null, $op_sub, 'core and seven years ag'), + ), + ); + + $this->assertCompileFunctionQueries($function_tests); + } + private function assertCompileQueries( array $tests, $operators = null, PhutilSearchStemmer $stemmer = null) { foreach ($tests as $input => $expect) { $caught = null; $query = null; $literal_query = null; $stemmed_query = null; try { $compiler = new PhutilSearchQueryCompiler(); if ($operators !== null) { $compiler->setOperators($operators); } if ($stemmer !== null) { $compiler->setStemmer($stemmer); } $tokens = $compiler->newTokens($input); if ($stemmer) { $literal_query = $compiler->compileLiteralQuery($tokens); $stemmed_query = $compiler->compileStemmedQuery($tokens); } else { $query = $compiler->compileQuery($tokens); } } catch (PhutilSearchQueryCompilerSyntaxException $ex) { $caught = $ex; } if ($caught !== null) { $query = false; $literal_query = false; $stemmed_query = false; } if (!$stemmer) { $this->assertEqual( $expect, $query, pht('Compilation of query: %s', $input)); } else { $this->assertEqual( $expect, ($literal_query === false) ? false : array($literal_query, $stemmed_query), pht('Stemmed compilation of query: %s', $input)); } } } + private function assertCompileFunctionQueries(array $tests) { + foreach ($tests as $input => $expect) { + $compiler = id(new PhutilSearchQueryCompiler()) + ->setEnableFunctions(true); + + $tokens = $compiler->newTokens($input); + + $result = array(); + foreach ($tokens as $token) { + $result[] = array( + $token->getFunction(), + $token->getOperator(), + $token->getValue(), + ); + } + + $this->assertEqual( + $expect, + $result, + pht('Function compilation of query: %s', $input)); + } + } + }