Page MenuHomePhabricator

D16938.id40771.diff
No OneTemporary

D16938.id40771.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -380,6 +380,9 @@
'PhutilSafeHTMLProducerInterface' => 'markup/PhutilSafeHTMLProducerInterface.php',
'PhutilSafeHTMLTestCase' => 'markup/__tests__/PhutilSafeHTMLTestCase.php',
'PhutilSaturateStdoutDaemon' => 'daemon/torture/PhutilSaturateStdoutDaemon.php',
+ 'PhutilSearchQueryCompiler' => 'search/PhutilSearchQueryCompiler.php',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'search/PhutilSearchQueryCompilerSyntaxException.php',
+ 'PhutilSearchQueryCompilerTestCase' => 'search/__tests__/PhutilSearchQueryCompilerTestCase.php',
'PhutilServiceProfiler' => 'serviceprofiler/PhutilServiceProfiler.php',
'PhutilShellLexer' => 'lexer/PhutilShellLexer.php',
'PhutilShellLexerTestCase' => 'lexer/__tests__/PhutilShellLexerTestCase.php',
@@ -981,6 +984,9 @@
'PhutilSafeHTML' => 'Phobject',
'PhutilSafeHTMLTestCase' => 'PhutilTestCase',
'PhutilSaturateStdoutDaemon' => 'PhutilTortureTestDaemon',
+ 'PhutilSearchQueryCompiler' => 'Phobject',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'Exception',
+ 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase',
'PhutilServiceProfiler' => 'Phobject',
'PhutilShellLexer' => 'PhutilLexer',
'PhutilShellLexerTestCase' => 'PhutilTestCase',
diff --git a/src/search/PhutilSearchQueryCompiler.php b/src/search/PhutilSearchQueryCompiler.php
new file mode 100644
--- /dev/null
+++ b/src/search/PhutilSearchQueryCompiler.php
@@ -0,0 +1,231 @@
+<?php
+
+final class PhutilSearchQueryCompiler
+ extends Phobject {
+
+ private $operators = '+ -><()~*:""&|';
+ private $query;
+
+ const OPERATOR_NOT = 'not';
+ const OPERATOR_AND = 'and';
+
+ public function setOperators($operators) {
+ $this->operators = $operators;
+ return $this;
+ }
+
+ public function getOperators() {
+ return $this->operators;
+ }
+
+ public function setQuery($query) {
+ $this->query = $query;
+ return $this;
+ }
+
+ public function getQuery() {
+ return $this->query;
+ }
+
+ public function compileQuery() {
+ $query = $this->getQuery();
+ $tokens = $this->tokenizeQuery($query);
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = $this->renderToken($token);
+ }
+
+ $result = array_unique($result);
+ return implode(' ', $result);
+ }
+
+ private function tokenizeQuery($query) {
+ $maximum_bytes = 1024;
+
+ $query_bytes = strlen($query);
+ if ($query_bytes > $maximum_bytes) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query is too long (%s bytes, maximum is %s bytes).',
+ new PhutilNumber($query_bytes),
+ new PhutilNumber($maximum_bytes)));
+ }
+
+ $query = phutil_utf8v($query);
+ $length = count($query);
+
+ $mode = 'scan';
+ $current_operator = array();
+ $current_token = array();
+ $is_quoted = false;
+ $tokens = array();
+ for ($ii = 0; $ii < $length; $ii++) {
+ $character = $query[$ii];
+
+ if ($mode == 'scan') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ $mode = 'operator';
+ }
+
+ if ($mode == 'operator') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ if (preg_match('/^[+-]\z/', $character)) {
+ $current_operator[] = $character;
+ continue;
+ }
+
+ $mode = 'quote';
+ }
+
+ if ($mode == 'quote') {
+ if (preg_match('/^"\z/', $character)) {
+ $is_quoted = true;
+ $mode = 'token';
+ continue;
+ }
+
+ $mode = 'token';
+ }
+
+ if ($mode == 'token') {
+ $capture = false;
+ $was_quoted = $is_quoted;
+ if ($is_quoted) {
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ $is_quoted = false;
+ }
+ } else {
+ if (preg_match('/^\s\z/u', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ }
+
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'token';
+ $is_quoted = true;
+ }
+ }
+
+ if ($capture) {
+ $tokens[] = array(
+ 'operator' => $current_operator,
+ 'quoted' => $was_quoted,
+ 'value' => $current_token,
+ );
+ $current_operator = array();
+ $current_token = array();
+ continue;
+ } else {
+ $current_token[] = $character;
+ }
+ }
+ }
+
+ if ($is_quoted) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains unmatched double quotes.'));
+ }
+
+ if ($mode == 'operator') {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains operator ("%s") with no search term.',
+ implode('', $current_operator)));
+ }
+
+ $tokens[] = array(
+ 'operator' => $current_operator,
+ 'quoted' => false,
+ 'value' => $current_token,
+ );
+
+ $results = array();
+ foreach ($tokens as $token) {
+ $value = implode('', $token['value']);
+ $operator_string = implode('', $token['operator']);
+
+ if (!strlen($value)) {
+ continue;
+ }
+
+ switch ($operator_string) {
+ case '-':
+ $operator = self::OPERATOR_NOT;
+ break;
+ case '':
+ case '+':
+ $operator = self::OPERATOR_AND;
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query has an invalid sequence of operators ("%s").',
+ $operator_string));
+ }
+
+ $results[] = array(
+ 'operator' => $operator,
+ 'quoted' => $token['quoted'],
+ 'value' => $value,
+ );
+ }
+
+ return $results;
+ }
+
+ private function renderToken(array $token) {
+ $value = $this->quoteToken($token['value']);
+ $operator = $token['operator'];
+ $prefix = $this->getOperatorPrefix($operator);
+
+ $value = $prefix.$value;
+
+ return $value;
+ }
+
+ private function getOperatorPrefix($operator) {
+ $operators = $this->operators;
+
+ switch ($operator) {
+ case self::OPERATOR_AND:
+ $prefix = $operators[0];
+ break;
+ case self::OPERATOR_NOT:
+ $prefix = $operators[2];
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Unsupported operator prefix "%s".',
+ $operator));
+ }
+
+ if ($prefix == ' ') {
+ $prefix = null;
+ }
+
+ return $prefix;
+ }
+
+ private function quoteToken($value) {
+ $operators = $this->operators;
+
+ $open_quote = $this->operators[10];
+ $close_quote = $this->operators[11];
+
+ return $open_quote.$value.$close_quote;
+ }
+
+
+}
diff --git a/src/search/PhutilSearchQueryCompilerSyntaxException.php b/src/search/PhutilSearchQueryCompilerSyntaxException.php
new file mode 100644
--- /dev/null
+++ b/src/search/PhutilSearchQueryCompilerSyntaxException.php
@@ -0,0 +1,4 @@
+<?php
+
+final class PhutilSearchQueryCompilerSyntaxException
+ extends Exception {}
diff --git a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -0,0 +1,91 @@
+<?php
+
+final class PhutilSearchQueryCompilerTestCase
+ extends PhutilTestCase {
+
+
+ public function testCompileQueries() {
+ $tests = array(
+ '' => '',
+ 'cat dog' => '+"cat" +"dog"',
+ 'cat -dog' => '+"cat" -"dog"',
+ 'cat-dog' => '+"cat-dog"',
+
+ // If there are spaces after an operator, the operator applies to the
+ // next search term.
+ 'cat - dog' => '+"cat" -"dog"',
+
+ // Double quotes serve as delimiters even if there is no whitespace
+ // between terms.
+ '"cat"dog' => '+"cat" +"dog"',
+
+ // This query is too long.
+ str_repeat('x', 2048) => false,
+
+ // Multiple operators are not permitted.
+ '++cat' => false,
+ '+-cat' => false,
+ '--cat' => false,
+
+ // Stray operators are not permitted.
+ '+' => false,
+ 'cat +' => false,
+
+ // Double quotes must be paired.
+ '"' => false,
+ 'cat "' => false,
+ '"cat' => false,
+ 'A"' => false,
+ 'A"B"' => '+"A" +"B"',
+ );
+
+ $this->assertCompileQueries($tests);
+
+ // Test that we compile queries correctly if the operators have been
+ // swapped to use "AND" by default.
+ $operator_tests = array(
+ 'cat dog' => '"cat" "dog"',
+ 'cat -dog' => '"cat" -"dog"',
+ );
+ $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
+
+
+ // Test that we compile queries correctly if the quote operators have
+ // been swapped to differ.
+ $quote_tests = array(
+ 'cat dog' => '+[cat] +[dog]',
+ 'cat -dog' => '+[cat] -[dog]',
+ );
+ $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
+
+ }
+
+ private function assertCompileQueries(array $tests, $operators = null) {
+ foreach ($tests as $input => $expect) {
+ $caught = null;
+
+ try {
+ $compiler = id(new PhutilSearchQueryCompiler())
+ ->setQuery($input);
+
+ if ($operators !== null) {
+ $compiler->setOperators($operators);
+ }
+
+ $query = $compiler->compileQuery();
+ } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
+ $caught = $ex;
+ }
+
+ if ($caught !== null) {
+ $query = false;
+ }
+
+ $this->assertEqual(
+ $expect,
+ $query,
+ pht('Compilation of query: %s', $input));
+ }
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Thu, Jun 6, 6:52 PM (1 w, 4 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6289752
Default Alt Text
D16938.id40771.diff (9 KB)

Event Timeline