Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13965129
D16938.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Referenced Files
None
Subscribers
None
D16938.id.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -380,6 +380,9 @@
'PhutilSafeHTMLProducerInterface' => 'markup/PhutilSafeHTMLProducerInterface.php',
'PhutilSafeHTMLTestCase' => 'markup/__tests__/PhutilSafeHTMLTestCase.php',
'PhutilSaturateStdoutDaemon' => 'daemon/torture/PhutilSaturateStdoutDaemon.php',
+ 'PhutilSearchQueryCompiler' => 'search/PhutilSearchQueryCompiler.php',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'search/PhutilSearchQueryCompilerSyntaxException.php',
+ 'PhutilSearchQueryCompilerTestCase' => 'search/__tests__/PhutilSearchQueryCompilerTestCase.php',
'PhutilServiceProfiler' => 'serviceprofiler/PhutilServiceProfiler.php',
'PhutilShellLexer' => 'lexer/PhutilShellLexer.php',
'PhutilShellLexerTestCase' => 'lexer/__tests__/PhutilShellLexerTestCase.php',
@@ -981,6 +984,9 @@
'PhutilSafeHTML' => 'Phobject',
'PhutilSafeHTMLTestCase' => 'PhutilTestCase',
'PhutilSaturateStdoutDaemon' => 'PhutilTortureTestDaemon',
+ 'PhutilSearchQueryCompiler' => 'Phobject',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'Exception',
+ 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase',
'PhutilServiceProfiler' => 'Phobject',
'PhutilShellLexer' => 'PhutilLexer',
'PhutilShellLexerTestCase' => 'PhutilTestCase',
diff --git a/src/search/PhutilSearchQueryCompiler.php b/src/search/PhutilSearchQueryCompiler.php
new file mode 100644
--- /dev/null
+++ b/src/search/PhutilSearchQueryCompiler.php
@@ -0,0 +1,231 @@
+<?php
+
+final class PhutilSearchQueryCompiler
+ extends Phobject {
+
+ private $operators = '+ -><()~*:""&|';
+ private $query;
+
+ const OPERATOR_NOT = 'not';
+ const OPERATOR_AND = 'and';
+
+ public function setOperators($operators) {
+ $this->operators = $operators;
+ return $this;
+ }
+
+ public function getOperators() {
+ return $this->operators;
+ }
+
+ public function setQuery($query) {
+ $this->query = $query;
+ return $this;
+ }
+
+ public function getQuery() {
+ return $this->query;
+ }
+
+ public function compileQuery() {
+ $query = $this->getQuery();
+ $tokens = $this->tokenizeQuery($query);
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = $this->renderToken($token);
+ }
+
+ $result = array_unique($result);
+ return implode(' ', $result);
+ }
+
+ private function tokenizeQuery($query) {
+ $maximum_bytes = 1024;
+
+ $query_bytes = strlen($query);
+ if ($query_bytes > $maximum_bytes) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query is too long (%s bytes, maximum is %s bytes).',
+ new PhutilNumber($query_bytes),
+ new PhutilNumber($maximum_bytes)));
+ }
+
+ $query = phutil_utf8v($query);
+ $length = count($query);
+
+ $mode = 'scan';
+ $current_operator = array();
+ $current_token = array();
+ $is_quoted = false;
+ $tokens = array();
+ for ($ii = 0; $ii < $length; $ii++) {
+ $character = $query[$ii];
+
+ if ($mode == 'scan') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ $mode = 'operator';
+ }
+
+ if ($mode == 'operator') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ if (preg_match('/^[+-]\z/', $character)) {
+ $current_operator[] = $character;
+ continue;
+ }
+
+ $mode = 'quote';
+ }
+
+ if ($mode == 'quote') {
+ if (preg_match('/^"\z/', $character)) {
+ $is_quoted = true;
+ $mode = 'token';
+ continue;
+ }
+
+ $mode = 'token';
+ }
+
+ if ($mode == 'token') {
+ $capture = false;
+ $was_quoted = $is_quoted;
+ if ($is_quoted) {
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ $is_quoted = false;
+ }
+ } else {
+ if (preg_match('/^\s\z/u', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ }
+
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'token';
+ $is_quoted = true;
+ }
+ }
+
+ if ($capture) {
+ $tokens[] = array(
+ 'operator' => $current_operator,
+ 'quoted' => $was_quoted,
+ 'value' => $current_token,
+ );
+ $current_operator = array();
+ $current_token = array();
+ continue;
+ } else {
+ $current_token[] = $character;
+ }
+ }
+ }
+
+ if ($is_quoted) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains unmatched double quotes.'));
+ }
+
+ if ($mode == 'operator') {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains operator ("%s") with no search term.',
+ implode('', $current_operator)));
+ }
+
+ $tokens[] = array(
+ 'operator' => $current_operator,
+ 'quoted' => false,
+ 'value' => $current_token,
+ );
+
+ $results = array();
+ foreach ($tokens as $token) {
+ $value = implode('', $token['value']);
+ $operator_string = implode('', $token['operator']);
+
+ if (!strlen($value)) {
+ continue;
+ }
+
+ switch ($operator_string) {
+ case '-':
+ $operator = self::OPERATOR_NOT;
+ break;
+ case '':
+ case '+':
+ $operator = self::OPERATOR_AND;
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query has an invalid sequence of operators ("%s").',
+ $operator_string));
+ }
+
+ $results[] = array(
+ 'operator' => $operator,
+ 'quoted' => $token['quoted'],
+ 'value' => $value,
+ );
+ }
+
+ return $results;
+ }
+
+ private function renderToken(array $token) {
+ $value = $this->quoteToken($token['value']);
+ $operator = $token['operator'];
+ $prefix = $this->getOperatorPrefix($operator);
+
+ $value = $prefix.$value;
+
+ return $value;
+ }
+
+ private function getOperatorPrefix($operator) {
+ $operators = $this->operators;
+
+ switch ($operator) {
+ case self::OPERATOR_AND:
+ $prefix = $operators[0];
+ break;
+ case self::OPERATOR_NOT:
+ $prefix = $operators[2];
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Unsupported operator prefix "%s".',
+ $operator));
+ }
+
+ if ($prefix == ' ') {
+ $prefix = null;
+ }
+
+ return $prefix;
+ }
+
+ private function quoteToken($value) {
+ $operators = $this->operators;
+
+ $open_quote = $this->operators[10];
+ $close_quote = $this->operators[11];
+
+ return $open_quote.$value.$close_quote;
+ }
+
+
+}
diff --git a/src/search/PhutilSearchQueryCompilerSyntaxException.php b/src/search/PhutilSearchQueryCompilerSyntaxException.php
new file mode 100644
--- /dev/null
+++ b/src/search/PhutilSearchQueryCompilerSyntaxException.php
@@ -0,0 +1,4 @@
+<?php
+
+final class PhutilSearchQueryCompilerSyntaxException
+ extends Exception {}
diff --git a/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/search/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -0,0 +1,91 @@
+<?php
+
+final class PhutilSearchQueryCompilerTestCase
+ extends PhutilTestCase {
+
+
+ public function testCompileQueries() {
+ $tests = array(
+ '' => '',
+ 'cat dog' => '+"cat" +"dog"',
+ 'cat -dog' => '+"cat" -"dog"',
+ 'cat-dog' => '+"cat-dog"',
+
+ // If there are spaces after an operator, the operator applies to the
+ // next search term.
+ 'cat - dog' => '+"cat" -"dog"',
+
+ // Double quotes serve as delimiters even if there is no whitespace
+ // between terms.
+ '"cat"dog' => '+"cat" +"dog"',
+
+ // This query is too long.
+ str_repeat('x', 2048) => false,
+
+ // Multiple operators are not permitted.
+ '++cat' => false,
+ '+-cat' => false,
+ '--cat' => false,
+
+ // Stray operators are not permitted.
+ '+' => false,
+ 'cat +' => false,
+
+ // Double quotes must be paired.
+ '"' => false,
+ 'cat "' => false,
+ '"cat' => false,
+ 'A"' => false,
+ 'A"B"' => '+"A" +"B"',
+ );
+
+ $this->assertCompileQueries($tests);
+
+ // Test that we compile queries correctly if the operators have been
+ // swapped to use "AND" by default.
+ $operator_tests = array(
+ 'cat dog' => '"cat" "dog"',
+ 'cat -dog' => '"cat" -"dog"',
+ );
+ $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
+
+
+ // Test that we compile queries correctly if the quote operators have
+ // been swapped to differ.
+ $quote_tests = array(
+ 'cat dog' => '+[cat] +[dog]',
+ 'cat -dog' => '+[cat] -[dog]',
+ );
+ $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
+
+ }
+
+ private function assertCompileQueries(array $tests, $operators = null) {
+ foreach ($tests as $input => $expect) {
+ $caught = null;
+
+ try {
+ $compiler = id(new PhutilSearchQueryCompiler())
+ ->setQuery($input);
+
+ if ($operators !== null) {
+ $compiler->setOperators($operators);
+ }
+
+ $query = $compiler->compileQuery();
+ } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
+ $caught = $ex;
+ }
+
+ if ($caught !== null) {
+ $query = false;
+ }
+
+ $this->assertEqual(
+ $expect,
+ $query,
+ pht('Compilation of query: %s', $input));
+ }
+ }
+
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Oct 17 2024, 12:06 AM (4 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6719761
Default Alt Text
D16938.id.diff (9 KB)
Attached To
Mode
D16938: Add a "query compiler" to convert user search queries into a form MySQL likes
Attached
Detach File
Event Timeline
Log In to Comment