Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15485918
D20939.id49891.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
23 KB
Referenced Files
None
Subscribers
None
D20939.id49891.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -5658,6 +5658,12 @@
'PhutilRemarkupTableBlockRule' => 'infrastructure/markup/blockrule/PhutilRemarkupTableBlockRule.php',
'PhutilRemarkupTestInterpreterRule' => 'infrastructure/markup/blockrule/PhutilRemarkupTestInterpreterRule.php',
'PhutilRemarkupUnderlineRule' => 'infrastructure/markup/markuprule/PhutilRemarkupUnderlineRule.php',
+ 'PhutilSearchQueryCompiler' => 'applications/search/compiler/PhutilSearchQueryCompiler.php',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php',
+ 'PhutilSearchQueryCompilerTestCase' => 'applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php',
+ 'PhutilSearchQueryToken' => 'applications/search/compiler/PhutilSearchQueryToken.php',
+ 'PhutilSearchStemmer' => 'applications/search/compiler/PhutilSearchStemmer.php',
+ 'PhutilSearchStemmerTestCase' => 'applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php',
'PhutilSlackAuthAdapter' => 'applications/auth/adapter/PhutilSlackAuthAdapter.php',
'PhutilTwitchAuthAdapter' => 'applications/auth/adapter/PhutilTwitchAuthAdapter.php',
'PhutilTwitterAuthAdapter' => 'applications/auth/adapter/PhutilTwitterAuthAdapter.php',
@@ -12483,6 +12489,12 @@
'PhutilRemarkupTableBlockRule' => 'PhutilRemarkupBlockRule',
'PhutilRemarkupTestInterpreterRule' => 'PhutilRemarkupBlockInterpreter',
'PhutilRemarkupUnderlineRule' => 'PhutilRemarkupRule',
+ 'PhutilSearchQueryCompiler' => 'Phobject',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'Exception',
+ 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase',
+ 'PhutilSearchQueryToken' => 'Phobject',
+ 'PhutilSearchStemmer' => 'Phobject',
+ 'PhutilSearchStemmerTestCase' => 'PhutilTestCase',
'PhutilSlackAuthAdapter' => 'PhutilOAuthAuthAdapter',
'PhutilTwitchAuthAdapter' => 'PhutilOAuthAuthAdapter',
'PhutilTwitterAuthAdapter' => 'PhutilOAuth1AuthAdapter',
diff --git a/src/applications/search/compiler/PhutilSearchQueryCompiler.php b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
@@ -0,0 +1,374 @@
+<?php
+
+final class PhutilSearchQueryCompiler
+ extends Phobject {
+
+ private $operators = '+ -><()~*:""&|';
+ private $query;
+ private $stemmer;
+ private $enableFunctions = false;
+
+ const OPERATOR_NOT = 'not';
+ const OPERATOR_AND = 'and';
+ const OPERATOR_SUBSTRING = 'sub';
+ const OPERATOR_EXACT = 'exact';
+
+ public function setOperators($operators) {
+ $this->operators = $operators;
+ return $this;
+ }
+
+ public function getOperators() {
+ return $this->operators;
+ }
+
+ public function setStemmer(PhutilSearchStemmer $stemmer) {
+ $this->stemmer = $stemmer;
+ return $this;
+ }
+
+ public function getStemmer() {
+ return $this->stemmer;
+ }
+
+ public function setEnableFunctions($enable_functions) {
+ $this->enableFunctions = $enable_functions;
+ return $this;
+ }
+
+ public function getEnableFunctions() {
+ return $this->enableFunctions;
+ }
+
+ public function compileQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = $this->renderToken($token);
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ public function compileLiteralQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ if (!$token->isQuoted()) {
+ continue;
+ }
+ $result[] = $this->renderToken($token);
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ public function compileStemmedQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ if ($token->isQuoted()) {
+ continue;
+ }
+ $result[] = $this->renderToken($token, $this->getStemmer());
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ private function compileRenderedTokens(array $list) {
+ if (!$list) {
+ return null;
+ }
+
+ $list = array_unique($list);
+ return implode(' ', $list);
+ }
+
+ public function newTokens($query) {
+ $results = $this->tokenizeQuery($query);
+
+ $tokens = array();
+ foreach ($results as $result) {
+ $tokens[] = PhutilSearchQueryToken::newFromDictionary($result);
+ }
+
+ return $tokens;
+ }
+
+ private function tokenizeQuery($query) {
+ $maximum_bytes = 1024;
+
+ $query_bytes = strlen($query);
+ if ($query_bytes > $maximum_bytes) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query is too long (%s bytes, maximum is %s bytes).',
+ new PhutilNumber($query_bytes),
+ new PhutilNumber($maximum_bytes)));
+ }
+
+ $query = phutil_utf8v($query);
+ $length = count($query);
+
+ $enable_functions = $this->getEnableFunctions();
+
+ $mode = 'scan';
+ $current_operator = array();
+ $current_token = array();
+ $current_function = null;
+ $is_quoted = false;
+ $tokens = array();
+
+ if ($enable_functions) {
+ $operator_characters = '[~=+-]';
+ } else {
+ $operator_characters = '[+-]';
+ }
+
+ for ($ii = 0; $ii < $length; $ii++) {
+ $character = $query[$ii];
+
+ if ($mode == 'scan') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ $mode = 'function';
+ }
+
+ if ($mode == 'function') {
+ $mode = 'operator';
+
+ if ($enable_functions) {
+ $found = false;
+ for ($jj = $ii; $jj < $length; $jj++) {
+ if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) {
+ continue;
+ }
+ if ($query[$jj] == ':') {
+ $found = $jj;
+ }
+ break;
+ }
+
+ if ($found !== false) {
+ $function = array_slice($query, $ii, ($jj - $ii));
+ $current_function = implode('', $function);
+
+ if (!strlen($current_function)) {
+ $current_function = null;
+ }
+
+ $ii = $jj;
+ continue;
+ }
+ }
+ }
+
+ if ($mode == 'operator') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ if (preg_match('/^'.$operator_characters.'\z/', $character)) {
+ $current_operator[] = $character;
+ continue;
+ }
+
+ $mode = 'quote';
+ }
+
+ if ($mode == 'quote') {
+ if (preg_match('/^"\z/', $character)) {
+ $is_quoted = true;
+ $mode = 'token';
+ continue;
+ }
+
+ $mode = 'token';
+ }
+
+ if ($mode == 'token') {
+ $capture = false;
+ $was_quoted = $is_quoted;
+ if ($is_quoted) {
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ $is_quoted = false;
+ }
+ } else {
+ if (preg_match('/^\s\z/u', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ }
+
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'token';
+ $is_quoted = true;
+ }
+ }
+
+ if ($capture) {
+ $token = array(
+ 'operator' => $current_operator,
+ 'quoted' => $was_quoted,
+ 'value' => $current_token,
+ );
+
+ if ($enable_functions) {
+ $token['function'] = $current_function;
+ }
+
+ $tokens[] = $token;
+
+ $current_operator = array();
+ $current_token = array();
+ $current_function = null;
+ continue;
+ } else {
+ $current_token[] = $character;
+ }
+ }
+ }
+
+ if ($is_quoted) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains unmatched double quotes.'));
+ }
+
+ if ($mode == 'operator') {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains operator ("%s") with no search term.',
+ implode('', $current_operator)));
+ }
+
+ $token = array(
+ 'operator' => $current_operator,
+ 'quoted' => false,
+ 'value' => $current_token,
+ );
+
+ if ($enable_functions) {
+ $token['function'] = $current_function;
+ }
+
+ $tokens[] = $token;
+
+ $results = array();
+ foreach ($tokens as $token) {
+ $value = implode('', $token['value']);
+ $operator_string = implode('', $token['operator']);
+
+ if (!strlen($value)) {
+ continue;
+ }
+
+ $is_quoted = $token['quoted'];
+
+ switch ($operator_string) {
+ case '-':
+ $operator = self::OPERATOR_NOT;
+ break;
+ case '~':
+ $operator = self::OPERATOR_SUBSTRING;
+ break;
+ case '=':
+ $operator = self::OPERATOR_EXACT;
+ break;
+ case '+':
+ $operator = self::OPERATOR_AND;
+ break;
+ case '':
+ // See T12995. If this query term contains Chinese, Japanese or
+ // Korean characters, treat the term as a substring term by default.
+ // These languages do not separate words with spaces, so the term
+ // search mode is normally useless.
+ if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) {
+ $operator = self::OPERATOR_SUBSTRING;
+ } else {
+ $operator = self::OPERATOR_AND;
+ }
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query has an invalid sequence of operators ("%s").',
+ $operator_string));
+ }
+
+ $result = array(
+ 'operator' => $operator,
+ 'quoted' => $is_quoted,
+ 'value' => $value,
+ );
+
+ if ($enable_functions) {
+ $result['function'] = $token['function'];
+ }
+
+ $results[] = $result;
+ }
+
+ return $results;
+ }
+
+ private function renderToken(
+ PhutilSearchQueryToken $token,
+ PhutilSearchStemmer $stemmer = null) {
+ $value = $token->getValue();
+
+ if ($stemmer) {
+ $value = $stemmer->stemToken($value);
+ }
+
+ $value = $this->quoteToken($value);
+ $operator = $token->getOperator();
+ $prefix = $this->getOperatorPrefix($operator);
+
+ $value = $prefix.$value;
+
+ return $value;
+ }
+
+ private function getOperatorPrefix($operator) {
+ $operators = $this->operators;
+
+ switch ($operator) {
+ case self::OPERATOR_AND:
+ $prefix = $operators[0];
+ break;
+ case self::OPERATOR_NOT:
+ $prefix = $operators[2];
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Unsupported operator prefix "%s".',
+ $operator));
+ }
+
+ if ($prefix == ' ') {
+ $prefix = null;
+ }
+
+ return $prefix;
+ }
+
+ private function quoteToken($value) {
+ $operators = $this->operators;
+
+ $open_quote = $this->operators[10];
+ $close_quote = $this->operators[11];
+
+ return $open_quote.$value.$close_quote;
+ }
+
+}
diff --git a/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php b/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php
@@ -0,0 +1,4 @@
+<?php
+
+final class PhutilSearchQueryCompilerSyntaxException
+ extends Exception {}
diff --git a/src/applications/search/compiler/PhutilSearchQueryToken.php b/src/applications/search/compiler/PhutilSearchQueryToken.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryToken.php
@@ -0,0 +1,37 @@
+<?php
+
+final class PhutilSearchQueryToken extends Phobject {
+
+ private $isQuoted;
+ private $value;
+ private $operator;
+ private $function;
+
+ public static function newFromDictionary(array $dictionary) {
+ $token = new self();
+
+ $token->isQuoted = $dictionary['quoted'];
+ $token->operator = $dictionary['operator'];
+ $token->value = $dictionary['value'];
+ $token->function = idx($dictionary, 'function');
+
+ return $token;
+ }
+
+ public function isQuoted() {
+ return $this->isQuoted;
+ }
+
+ public function getValue() {
+ return $this->value;
+ }
+
+ public function getOperator() {
+ return $this->operator;
+ }
+
+ public function getFunction() {
+ return $this->function;
+ }
+
+}
diff --git a/src/applications/search/compiler/PhutilSearchStemmer.php b/src/applications/search/compiler/PhutilSearchStemmer.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchStemmer.php
@@ -0,0 +1,74 @@
+<?php
+
+final class PhutilSearchStemmer
+ extends Phobject {
+
+ public function stemToken($token) {
+ $token = $this->normalizeToken($token);
+ return $this->applyStemmer($token);
+ }
+
+ public function stemCorpus($corpus) {
+ $corpus = $this->normalizeCorpus($corpus);
+ $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus);
+
+ $words = array();
+ foreach ($tokens as $key => $token) {
+ $token = trim($token, '._');
+
+ if (strlen($token) < 3) {
+ continue;
+ }
+
+ $words[$token] = $token;
+ }
+
+ $stems = array();
+ foreach ($words as $word) {
+ $stems[] = $this->applyStemmer($word);
+ }
+
+ return implode(' ', $stems);
+ }
+
+ private function normalizeToken($token) {
+ return phutil_utf8_strtolower($token);
+ }
+
+ private function normalizeCorpus($corpus) {
+ return phutil_utf8_strtolower($corpus);
+ }
+
+ /**
+ * @phutil-external-symbol class Porter
+ */
+ private function applyStemmer($normalized_token) {
+ // If the token has internal punctuation, handle it literally. This
+ // deals with things like domain names, Conduit API methods, and other
+ // sorts of informal tokens.
+ if (preg_match('/[._]/', $normalized_token)) {
+ return $normalized_token;
+ }
+
+ static $loaded;
+
+ if ($loaded === null) {
+ $root = dirname(phutil_get_library_root('phutil'));
+ require_once $root.'/externals/porter-stemmer/src/Porter.php';
+ $loaded = true;
+ }
+
+
+ $stem = Porter::stem($normalized_token);
+
+ // If the stem is too short, it won't be a candidate for indexing. These
+ // tokens are also likely to be acronyms (like "DNS") rather than real
+ // English words.
+ if (strlen($stem) < 3) {
+ return $normalized_token;
+ }
+
+ return $stem;
+ }
+
+}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -0,0 +1,220 @@
+<?php
+
+final class PhutilSearchQueryCompilerTestCase
+ extends PhutilTestCase {
+
+ public function testCompileQueries() {
+ $tests = array(
+ '' => null,
+ 'cat dog' => '+"cat" +"dog"',
+ 'cat -dog' => '+"cat" -"dog"',
+ 'cat-dog' => '+"cat-dog"',
+
+ // If there are spaces after an operator, the operator applies to the
+ // next search term.
+ 'cat - dog' => '+"cat" -"dog"',
+
+ // Double quotes serve as delimiters even if there is no whitespace
+ // between terms.
+ '"cat"dog' => '+"cat" +"dog"',
+
+ // This query is too long.
+ str_repeat('x', 2048) => false,
+
+ // Multiple operators are not permitted.
+ '++cat' => false,
+ '+-cat' => false,
+ '--cat' => false,
+
+ // Stray operators are not permitted.
+ '+' => false,
+ 'cat +' => false,
+
+ // Double quotes must be paired.
+ '"' => false,
+ 'cat "' => false,
+ '"cat' => false,
+ 'A"' => false,
+ 'A"B"' => '+"A" +"B"',
+ );
+
+ $this->assertCompileQueries($tests);
+
+ // Test that we compile queries correctly if the operators have been
+ // swapped to use "AND" by default.
+ $operator_tests = array(
+ 'cat dog' => '"cat" "dog"',
+ 'cat -dog' => '"cat" -"dog"',
+ );
+ $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
+
+
+ // Test that we compile queries correctly if the quote operators have
+ // been swapped to differ.
+ $quote_tests = array(
+ 'cat dog' => '+[cat] +[dog]',
+ 'cat -dog' => '+[cat] -[dog]',
+ );
+ $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
+
+ }
+
+ public function testCompileQueriesWithStemming() {
+ $stemming_tests = array(
+ 'cat dog' => array(
+ null,
+ '+"cat" +"dog"',
+ ),
+ 'cats dogs' => array(
+ null,
+ '+"cat" +"dog"',
+ ),
+ 'cats "dogs"' => array(
+ '+"dogs"',
+ '+"cat"',
+ ),
+ '"blessed blade" of the windseeker' => array(
+ '+"blessed blade"',
+ '+"of" +"the" +"windseek"',
+ ),
+ 'mailing users for mentions on tasks' => array(
+ null,
+ '+"mail" +"user" +"for" +"mention" +"on" +"task"',
+ ),
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ $this->assertCompileQueries($stemming_tests, null, $stemmer);
+ }
+
+ public function testCompileQueriesWithFunctions() {
+ $op_and = PhutilSearchQueryCompiler::OPERATOR_AND;
+ $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
+ $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
+
+ $mao = "\xE7\x8C\xAB";
+
+ $function_tests = array(
+ 'cat' => array(
+ array(null, $op_and, 'cat'),
+ ),
+ ':cat' => array(
+ array(null, $op_and, 'cat'),
+ ),
+ 'title:cat' => array(
+ array('title', $op_and, 'cat'),
+ ),
+ 'title:cat:dog' => array(
+ array('title', $op_and, 'cat:dog'),
+ ),
+ 'title:~cat' => array(
+ array('title', $op_sub, 'cat'),
+ ),
+ 'cat title:="Meow Meow"' => array(
+ array(null, $op_and, 'cat'),
+ array('title', $op_exact, 'Meow Meow'),
+ ),
+ 'title:cat title:dog' => array(
+ array('title', $op_and, 'cat'),
+ array('title', $op_and, 'dog'),
+ ),
+ '~"core and seven years ag"' => array(
+ array(null, $op_sub, 'core and seven years ag'),
+ ),
+ $mao => array(
+ array(null, $op_sub, $mao),
+ ),
+ '+'.$mao => array(
+ array(null, $op_and, $mao),
+ ),
+ '~'.$mao => array(
+ array(null, $op_sub, $mao),
+ ),
+ '"'.$mao.'"' => array(
+ array(null, $op_and, $mao),
+ ),
+ );
+
+ $this->assertCompileFunctionQueries($function_tests);
+ }
+
+ private function assertCompileQueries(
+ array $tests,
+ $operators = null,
+ PhutilSearchStemmer $stemmer = null) {
+ foreach ($tests as $input => $expect) {
+ $caught = null;
+
+ $query = null;
+ $literal_query = null;
+ $stemmed_query = null;
+
+ try {
+ $compiler = new PhutilSearchQueryCompiler();
+
+ if ($operators !== null) {
+ $compiler->setOperators($operators);
+ }
+
+ if ($stemmer !== null) {
+ $compiler->setStemmer($stemmer);
+ }
+
+ $tokens = $compiler->newTokens($input);
+
+ if ($stemmer) {
+ $literal_query = $compiler->compileLiteralQuery($tokens);
+ $stemmed_query = $compiler->compileStemmedQuery($tokens);
+ } else {
+ $query = $compiler->compileQuery($tokens);
+ }
+ } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
+ $caught = $ex;
+ }
+
+ if ($caught !== null) {
+ $query = false;
+ $literal_query = false;
+ $stemmed_query = false;
+ }
+
+ if (!$stemmer) {
+ $this->assertEqual(
+ $expect,
+ $query,
+ pht('Compilation of query: %s', $input));
+ } else {
+ $this->assertEqual(
+ $expect,
+ ($literal_query === false)
+ ? false
+ : array($literal_query, $stemmed_query),
+ pht('Stemmed compilation of query: %s', $input));
+ }
+ }
+ }
+
+ private function assertCompileFunctionQueries(array $tests) {
+ foreach ($tests as $input => $expect) {
+ $compiler = id(new PhutilSearchQueryCompiler())
+ ->setEnableFunctions(true);
+
+ $tokens = $compiler->newTokens($input);
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = array(
+ $token->getFunction(),
+ $token->getOperator(),
+ $token->getValue(),
+ );
+ }
+
+ $this->assertEqual(
+ $expect,
+ $result,
+ pht('Function compilation of query: %s', $input));
+ }
+ }
+
+}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php
@@ -0,0 +1,85 @@
+<?php
+
+final class PhutilSearchStemmerTestCase
+ extends PhutilTestCase {
+
+ public function testStemTokens() {
+ $tests = array(
+ // Various real-world cases collected from users before we implemented
+ // stemming.
+ 'tokens' => 'token',
+ 'panels' => 'panel',
+
+ 'renames' => 'renam',
+ 'rename' => 'renam',
+
+ 'components' => 'compon',
+ 'component' => 'compon',
+
+ 'implementation' => 'implement',
+ 'implements' => 'implement',
+ 'implementing' => 'implement',
+ 'implementer' => 'implement',
+
+ 'deleting' => 'delet',
+ 'deletion' => 'delet',
+ 'delete' => 'delet',
+
+ 'erratically' => 'errat',
+ 'erratic' => 'errat',
+
+ // Stems should be normalized.
+ 'DOG' => 'dog',
+
+ // If stemming would bring a token under 3 characters, it should not
+ // be stemmed.
+ 'dns' => 'dns',
+ 'nis' => 'nis',
+
+ // Complex tokens with internal punctuation should be left untouched;
+ // these are usually things like domain names, API calls, informal tags,
+ // etc.
+ 'apples' => 'appl',
+ 'bananas' => 'banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas',
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ foreach ($tests as $input => $expect) {
+ $stem = $stemmer->stemToken($input);
+ $this->assertEqual(
+ $expect,
+ $stem,
+ pht('Token stem of "%s".', $input));
+ }
+ }
+
+ public function testStemDocuments() {
+ $tests = array(
+ 'The wild boar meandered erratically.' =>
+ 'the wild boar meander errat',
+ 'Fool me onc, shame on you. Fool me twice, shame on me.' =>
+ 'fool onc shame you twice',
+ 'Fireball is a seventh-level spell which deals 2d16 points of damage '.
+ 'in a 1-meter radius around a target.' =>
+ 'firebal seventh level spell which deal 2d16 point damag meter '.
+ 'radiu around target',
+ 'apples-bananas' => 'appl banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples.bananas' => 'apples.bananas',
+ 'oddly-proportioned' => 'oddli proport',
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ foreach ($tests as $input => $expect) {
+ $stem = $stemmer->stemCorpus($input);
+ $this->assertEqual(
+ $expect,
+ $stem,
+ pht('Corpus stem of: %s', $input));
+ }
+ }
+
+
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 11, 3:34 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7739428
Default Alt Text
D20939.id49891.diff (23 KB)
Attached To
Mode
D20939: Move search query parser/compiler classes to Phabricator
Attached
Detach File
Event Timeline
Log In to Comment