Page MenuHomePhabricator

D20939.id49891.diff
No OneTemporary

D20939.id49891.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -5658,6 +5658,12 @@
'PhutilRemarkupTableBlockRule' => 'infrastructure/markup/blockrule/PhutilRemarkupTableBlockRule.php',
'PhutilRemarkupTestInterpreterRule' => 'infrastructure/markup/blockrule/PhutilRemarkupTestInterpreterRule.php',
'PhutilRemarkupUnderlineRule' => 'infrastructure/markup/markuprule/PhutilRemarkupUnderlineRule.php',
+ 'PhutilSearchQueryCompiler' => 'applications/search/compiler/PhutilSearchQueryCompiler.php',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php',
+ 'PhutilSearchQueryCompilerTestCase' => 'applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php',
+ 'PhutilSearchQueryToken' => 'applications/search/compiler/PhutilSearchQueryToken.php',
+ 'PhutilSearchStemmer' => 'applications/search/compiler/PhutilSearchStemmer.php',
+ 'PhutilSearchStemmerTestCase' => 'applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php',
'PhutilSlackAuthAdapter' => 'applications/auth/adapter/PhutilSlackAuthAdapter.php',
'PhutilTwitchAuthAdapter' => 'applications/auth/adapter/PhutilTwitchAuthAdapter.php',
'PhutilTwitterAuthAdapter' => 'applications/auth/adapter/PhutilTwitterAuthAdapter.php',
@@ -12483,6 +12489,12 @@
'PhutilRemarkupTableBlockRule' => 'PhutilRemarkupBlockRule',
'PhutilRemarkupTestInterpreterRule' => 'PhutilRemarkupBlockInterpreter',
'PhutilRemarkupUnderlineRule' => 'PhutilRemarkupRule',
+ 'PhutilSearchQueryCompiler' => 'Phobject',
+ 'PhutilSearchQueryCompilerSyntaxException' => 'Exception',
+ 'PhutilSearchQueryCompilerTestCase' => 'PhutilTestCase',
+ 'PhutilSearchQueryToken' => 'Phobject',
+ 'PhutilSearchStemmer' => 'Phobject',
+ 'PhutilSearchStemmerTestCase' => 'PhutilTestCase',
'PhutilSlackAuthAdapter' => 'PhutilOAuthAuthAdapter',
'PhutilTwitchAuthAdapter' => 'PhutilOAuthAuthAdapter',
'PhutilTwitterAuthAdapter' => 'PhutilOAuth1AuthAdapter',
diff --git a/src/applications/search/compiler/PhutilSearchQueryCompiler.php b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
@@ -0,0 +1,374 @@
+<?php
+
+final class PhutilSearchQueryCompiler
+ extends Phobject {
+
+ private $operators = '+ -><()~*:""&|';
+ private $query;
+ private $stemmer;
+ private $enableFunctions = false;
+
+ const OPERATOR_NOT = 'not';
+ const OPERATOR_AND = 'and';
+ const OPERATOR_SUBSTRING = 'sub';
+ const OPERATOR_EXACT = 'exact';
+
+ public function setOperators($operators) {
+ $this->operators = $operators;
+ return $this;
+ }
+
+ public function getOperators() {
+ return $this->operators;
+ }
+
+ public function setStemmer(PhutilSearchStemmer $stemmer) {
+ $this->stemmer = $stemmer;
+ return $this;
+ }
+
+ public function getStemmer() {
+ return $this->stemmer;
+ }
+
+ public function setEnableFunctions($enable_functions) {
+ $this->enableFunctions = $enable_functions;
+ return $this;
+ }
+
+ public function getEnableFunctions() {
+ return $this->enableFunctions;
+ }
+
+ public function compileQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = $this->renderToken($token);
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ public function compileLiteralQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ if (!$token->isQuoted()) {
+ continue;
+ }
+ $result[] = $this->renderToken($token);
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ public function compileStemmedQuery(array $tokens) {
+ assert_instances_of($tokens, 'PhutilSearchQueryToken');
+
+ $result = array();
+ foreach ($tokens as $token) {
+ if ($token->isQuoted()) {
+ continue;
+ }
+ $result[] = $this->renderToken($token, $this->getStemmer());
+ }
+
+ return $this->compileRenderedTokens($result);
+ }
+
+ private function compileRenderedTokens(array $list) {
+ if (!$list) {
+ return null;
+ }
+
+ $list = array_unique($list);
+ return implode(' ', $list);
+ }
+
+ public function newTokens($query) {
+ $results = $this->tokenizeQuery($query);
+
+ $tokens = array();
+ foreach ($results as $result) {
+ $tokens[] = PhutilSearchQueryToken::newFromDictionary($result);
+ }
+
+ return $tokens;
+ }
+
+ private function tokenizeQuery($query) {
+ $maximum_bytes = 1024;
+
+ $query_bytes = strlen($query);
+ if ($query_bytes > $maximum_bytes) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query is too long (%s bytes, maximum is %s bytes).',
+ new PhutilNumber($query_bytes),
+ new PhutilNumber($maximum_bytes)));
+ }
+
+ $query = phutil_utf8v($query);
+ $length = count($query);
+
+ $enable_functions = $this->getEnableFunctions();
+
+ $mode = 'scan';
+ $current_operator = array();
+ $current_token = array();
+ $current_function = null;
+ $is_quoted = false;
+ $tokens = array();
+
+ if ($enable_functions) {
+ $operator_characters = '[~=+-]';
+ } else {
+ $operator_characters = '[+-]';
+ }
+
+ for ($ii = 0; $ii < $length; $ii++) {
+ $character = $query[$ii];
+
+ if ($mode == 'scan') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ $mode = 'function';
+ }
+
+ if ($mode == 'function') {
+ $mode = 'operator';
+
+ if ($enable_functions) {
+ $found = false;
+ for ($jj = $ii; $jj < $length; $jj++) {
+ if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) {
+ continue;
+ }
+ if ($query[$jj] == ':') {
+ $found = $jj;
+ }
+ break;
+ }
+
+ if ($found !== false) {
+ $function = array_slice($query, $ii, ($jj - $ii));
+ $current_function = implode('', $function);
+
+ if (!strlen($current_function)) {
+ $current_function = null;
+ }
+
+ $ii = $jj;
+ continue;
+ }
+ }
+ }
+
+ if ($mode == 'operator') {
+ if (preg_match('/^\s\z/u', $character)) {
+ continue;
+ }
+
+ if (preg_match('/^'.$operator_characters.'\z/', $character)) {
+ $current_operator[] = $character;
+ continue;
+ }
+
+ $mode = 'quote';
+ }
+
+ if ($mode == 'quote') {
+ if (preg_match('/^"\z/', $character)) {
+ $is_quoted = true;
+ $mode = 'token';
+ continue;
+ }
+
+ $mode = 'token';
+ }
+
+ if ($mode == 'token') {
+ $capture = false;
+ $was_quoted = $is_quoted;
+ if ($is_quoted) {
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ $is_quoted = false;
+ }
+ } else {
+ if (preg_match('/^\s\z/u', $character)) {
+ $capture = true;
+ $mode = 'scan';
+ }
+
+ if (preg_match('/^"\z/', $character)) {
+ $capture = true;
+ $mode = 'token';
+ $is_quoted = true;
+ }
+ }
+
+ if ($capture) {
+ $token = array(
+ 'operator' => $current_operator,
+ 'quoted' => $was_quoted,
+ 'value' => $current_token,
+ );
+
+ if ($enable_functions) {
+ $token['function'] = $current_function;
+ }
+
+ $tokens[] = $token;
+
+ $current_operator = array();
+ $current_token = array();
+ $current_function = null;
+ continue;
+ } else {
+ $current_token[] = $character;
+ }
+ }
+ }
+
+ if ($is_quoted) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains unmatched double quotes.'));
+ }
+
+ if ($mode == 'operator') {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains operator ("%s") with no search term.',
+ implode('', $current_operator)));
+ }
+
+ $token = array(
+ 'operator' => $current_operator,
+ 'quoted' => false,
+ 'value' => $current_token,
+ );
+
+ if ($enable_functions) {
+ $token['function'] = $current_function;
+ }
+
+ $tokens[] = $token;
+
+ $results = array();
+ foreach ($tokens as $token) {
+ $value = implode('', $token['value']);
+ $operator_string = implode('', $token['operator']);
+
+ if (!strlen($value)) {
+ continue;
+ }
+
+ $is_quoted = $token['quoted'];
+
+ switch ($operator_string) {
+ case '-':
+ $operator = self::OPERATOR_NOT;
+ break;
+ case '~':
+ $operator = self::OPERATOR_SUBSTRING;
+ break;
+ case '=':
+ $operator = self::OPERATOR_EXACT;
+ break;
+ case '+':
+ $operator = self::OPERATOR_AND;
+ break;
+ case '':
+ // See T12995. If this query term contains Chinese, Japanese or
+ // Korean characters, treat the term as a substring term by default.
+ // These languages do not separate words with spaces, so the term
+ // search mode is normally useless.
+ if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) {
+ $operator = self::OPERATOR_SUBSTRING;
+ } else {
+ $operator = self::OPERATOR_AND;
+ }
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query has an invalid sequence of operators ("%s").',
+ $operator_string));
+ }
+
+ $result = array(
+ 'operator' => $operator,
+ 'quoted' => $is_quoted,
+ 'value' => $value,
+ );
+
+ if ($enable_functions) {
+ $result['function'] = $token['function'];
+ }
+
+ $results[] = $result;
+ }
+
+ return $results;
+ }
+
+ private function renderToken(
+ PhutilSearchQueryToken $token,
+ PhutilSearchStemmer $stemmer = null) {
+ $value = $token->getValue();
+
+ if ($stemmer) {
+ $value = $stemmer->stemToken($value);
+ }
+
+ $value = $this->quoteToken($value);
+ $operator = $token->getOperator();
+ $prefix = $this->getOperatorPrefix($operator);
+
+ $value = $prefix.$value;
+
+ return $value;
+ }
+
+ private function getOperatorPrefix($operator) {
+ $operators = $this->operators;
+
+ switch ($operator) {
+ case self::OPERATOR_AND:
+ $prefix = $operators[0];
+ break;
+ case self::OPERATOR_NOT:
+ $prefix = $operators[2];
+ break;
+ default:
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Unsupported operator prefix "%s".',
+ $operator));
+ }
+
+ if ($prefix == ' ') {
+ $prefix = null;
+ }
+
+ return $prefix;
+ }
+
+ private function quoteToken($value) {
+ $operators = $this->operators;
+
+ $open_quote = $this->operators[10];
+ $close_quote = $this->operators[11];
+
+ return $open_quote.$value.$close_quote;
+ }
+
+}
diff --git a/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php b/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryCompilerSyntaxException.php
@@ -0,0 +1,4 @@
+<?php
+
+final class PhutilSearchQueryCompilerSyntaxException
+ extends Exception {}
diff --git a/src/applications/search/compiler/PhutilSearchQueryToken.php b/src/applications/search/compiler/PhutilSearchQueryToken.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchQueryToken.php
@@ -0,0 +1,37 @@
+<?php
+
+final class PhutilSearchQueryToken extends Phobject {
+
+ private $isQuoted;
+ private $value;
+ private $operator;
+ private $function;
+
+ public static function newFromDictionary(array $dictionary) {
+ $token = new self();
+
+ $token->isQuoted = $dictionary['quoted'];
+ $token->operator = $dictionary['operator'];
+ $token->value = $dictionary['value'];
+ $token->function = idx($dictionary, 'function');
+
+ return $token;
+ }
+
+ public function isQuoted() {
+ return $this->isQuoted;
+ }
+
+ public function getValue() {
+ return $this->value;
+ }
+
+ public function getOperator() {
+ return $this->operator;
+ }
+
+ public function getFunction() {
+ return $this->function;
+ }
+
+}
diff --git a/src/applications/search/compiler/PhutilSearchStemmer.php b/src/applications/search/compiler/PhutilSearchStemmer.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/PhutilSearchStemmer.php
@@ -0,0 +1,74 @@
+<?php
+
+final class PhutilSearchStemmer
+ extends Phobject {
+
+ public function stemToken($token) {
+ $token = $this->normalizeToken($token);
+ return $this->applyStemmer($token);
+ }
+
+ public function stemCorpus($corpus) {
+ $corpus = $this->normalizeCorpus($corpus);
+ $tokens = preg_split('/[^a-zA-Z0-9\x7F-\xFF._]+/', $corpus);
+
+ $words = array();
+ foreach ($tokens as $key => $token) {
+ $token = trim($token, '._');
+
+ if (strlen($token) < 3) {
+ continue;
+ }
+
+ $words[$token] = $token;
+ }
+
+ $stems = array();
+ foreach ($words as $word) {
+ $stems[] = $this->applyStemmer($word);
+ }
+
+ return implode(' ', $stems);
+ }
+
+ private function normalizeToken($token) {
+ return phutil_utf8_strtolower($token);
+ }
+
+ private function normalizeCorpus($corpus) {
+ return phutil_utf8_strtolower($corpus);
+ }
+
+ /**
+ * @phutil-external-symbol class Porter
+ */
+ private function applyStemmer($normalized_token) {
+ // If the token has internal punctuation, handle it literally. This
+ // deals with things like domain names, Conduit API methods, and other
+ // sorts of informal tokens.
+ if (preg_match('/[._]/', $normalized_token)) {
+ return $normalized_token;
+ }
+
+ static $loaded;
+
+ if ($loaded === null) {
+ $root = dirname(phutil_get_library_root('phutil'));
+ require_once $root.'/externals/porter-stemmer/src/Porter.php';
+ $loaded = true;
+ }
+
+
+ $stem = Porter::stem($normalized_token);
+
+ // If the stem is too short, it won't be a candidate for indexing. These
+ // tokens are also likely to be acronyms (like "DNS") rather than real
+ // English words.
+ if (strlen($stem) < 3) {
+ return $normalized_token;
+ }
+
+ return $stem;
+ }
+
+}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -0,0 +1,220 @@
+<?php
+
+final class PhutilSearchQueryCompilerTestCase
+ extends PhutilTestCase {
+
+ public function testCompileQueries() {
+ $tests = array(
+ '' => null,
+ 'cat dog' => '+"cat" +"dog"',
+ 'cat -dog' => '+"cat" -"dog"',
+ 'cat-dog' => '+"cat-dog"',
+
+ // If there are spaces after an operator, the operator applies to the
+ // next search term.
+ 'cat - dog' => '+"cat" -"dog"',
+
+ // Double quotes serve as delimiters even if there is no whitespace
+ // between terms.
+ '"cat"dog' => '+"cat" +"dog"',
+
+ // This query is too long.
+ str_repeat('x', 2048) => false,
+
+ // Multiple operators are not permitted.
+ '++cat' => false,
+ '+-cat' => false,
+ '--cat' => false,
+
+ // Stray operators are not permitted.
+ '+' => false,
+ 'cat +' => false,
+
+ // Double quotes must be paired.
+ '"' => false,
+ 'cat "' => false,
+ '"cat' => false,
+ 'A"' => false,
+ 'A"B"' => '+"A" +"B"',
+ );
+
+ $this->assertCompileQueries($tests);
+
+ // Test that we compile queries correctly if the operators have been
+ // swapped to use "AND" by default.
+ $operator_tests = array(
+ 'cat dog' => '"cat" "dog"',
+ 'cat -dog' => '"cat" -"dog"',
+ );
+ $this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
+
+
+ // Test that we compile queries correctly if the quote operators have
+ // been swapped to differ.
+ $quote_tests = array(
+ 'cat dog' => '+[cat] +[dog]',
+ 'cat -dog' => '+[cat] -[dog]',
+ );
+ $this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
+
+ }
+
+ public function testCompileQueriesWithStemming() {
+ $stemming_tests = array(
+ 'cat dog' => array(
+ null,
+ '+"cat" +"dog"',
+ ),
+ 'cats dogs' => array(
+ null,
+ '+"cat" +"dog"',
+ ),
+ 'cats "dogs"' => array(
+ '+"dogs"',
+ '+"cat"',
+ ),
+ '"blessed blade" of the windseeker' => array(
+ '+"blessed blade"',
+ '+"of" +"the" +"windseek"',
+ ),
+ 'mailing users for mentions on tasks' => array(
+ null,
+ '+"mail" +"user" +"for" +"mention" +"on" +"task"',
+ ),
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ $this->assertCompileQueries($stemming_tests, null, $stemmer);
+ }
+
+ public function testCompileQueriesWithFunctions() {
+ $op_and = PhutilSearchQueryCompiler::OPERATOR_AND;
+ $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
+ $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
+
+ $mao = "\xE7\x8C\xAB";
+
+ $function_tests = array(
+ 'cat' => array(
+ array(null, $op_and, 'cat'),
+ ),
+ ':cat' => array(
+ array(null, $op_and, 'cat'),
+ ),
+ 'title:cat' => array(
+ array('title', $op_and, 'cat'),
+ ),
+ 'title:cat:dog' => array(
+ array('title', $op_and, 'cat:dog'),
+ ),
+ 'title:~cat' => array(
+ array('title', $op_sub, 'cat'),
+ ),
+ 'cat title:="Meow Meow"' => array(
+ array(null, $op_and, 'cat'),
+ array('title', $op_exact, 'Meow Meow'),
+ ),
+ 'title:cat title:dog' => array(
+ array('title', $op_and, 'cat'),
+ array('title', $op_and, 'dog'),
+ ),
+ '~"core and seven years ag"' => array(
+ array(null, $op_sub, 'core and seven years ag'),
+ ),
+ $mao => array(
+ array(null, $op_sub, $mao),
+ ),
+ '+'.$mao => array(
+ array(null, $op_and, $mao),
+ ),
+ '~'.$mao => array(
+ array(null, $op_sub, $mao),
+ ),
+ '"'.$mao.'"' => array(
+ array(null, $op_and, $mao),
+ ),
+ );
+
+ $this->assertCompileFunctionQueries($function_tests);
+ }
+
+ private function assertCompileQueries(
+ array $tests,
+ $operators = null,
+ PhutilSearchStemmer $stemmer = null) {
+ foreach ($tests as $input => $expect) {
+ $caught = null;
+
+ $query = null;
+ $literal_query = null;
+ $stemmed_query = null;
+
+ try {
+ $compiler = new PhutilSearchQueryCompiler();
+
+ if ($operators !== null) {
+ $compiler->setOperators($operators);
+ }
+
+ if ($stemmer !== null) {
+ $compiler->setStemmer($stemmer);
+ }
+
+ $tokens = $compiler->newTokens($input);
+
+ if ($stemmer) {
+ $literal_query = $compiler->compileLiteralQuery($tokens);
+ $stemmed_query = $compiler->compileStemmedQuery($tokens);
+ } else {
+ $query = $compiler->compileQuery($tokens);
+ }
+ } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
+ $caught = $ex;
+ }
+
+ if ($caught !== null) {
+ $query = false;
+ $literal_query = false;
+ $stemmed_query = false;
+ }
+
+ if (!$stemmer) {
+ $this->assertEqual(
+ $expect,
+ $query,
+ pht('Compilation of query: %s', $input));
+ } else {
+ $this->assertEqual(
+ $expect,
+ ($literal_query === false)
+ ? false
+ : array($literal_query, $stemmed_query),
+ pht('Stemmed compilation of query: %s', $input));
+ }
+ }
+ }
+
+ private function assertCompileFunctionQueries(array $tests) {
+ foreach ($tests as $input => $expect) {
+ $compiler = id(new PhutilSearchQueryCompiler())
+ ->setEnableFunctions(true);
+
+ $tokens = $compiler->newTokens($input);
+
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = array(
+ $token->getFunction(),
+ $token->getOperator(),
+ $token->getValue(),
+ );
+ }
+
+ $this->assertEqual(
+ $expect,
+ $result,
+ pht('Function compilation of query: %s', $input));
+ }
+ }
+
+}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/compiler/__tests__/PhutilSearchStemmerTestCase.php
@@ -0,0 +1,85 @@
+<?php
+
+final class PhutilSearchStemmerTestCase
+ extends PhutilTestCase {
+
+ public function testStemTokens() {
+ $tests = array(
+ // Various real-world cases collected from users before we implemented
+ // stemming.
+ 'tokens' => 'token',
+ 'panels' => 'panel',
+
+ 'renames' => 'renam',
+ 'rename' => 'renam',
+
+ 'components' => 'compon',
+ 'component' => 'compon',
+
+ 'implementation' => 'implement',
+ 'implements' => 'implement',
+ 'implementing' => 'implement',
+ 'implementer' => 'implement',
+
+ 'deleting' => 'delet',
+ 'deletion' => 'delet',
+ 'delete' => 'delet',
+
+ 'erratically' => 'errat',
+ 'erratic' => 'errat',
+
+ // Stems should be normalized.
+ 'DOG' => 'dog',
+
+ // If stemming would bring a token under 3 characters, it should not
+ // be stemmed.
+ 'dns' => 'dns',
+ 'nis' => 'nis',
+
+ // Complex tokens with internal punctuation should be left untouched;
+ // these are usually things like domain names, API calls, informal tags,
+ // etc.
+ 'apples' => 'appl',
+ 'bananas' => 'banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas',
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ foreach ($tests as $input => $expect) {
+ $stem = $stemmer->stemToken($input);
+ $this->assertEqual(
+ $expect,
+ $stem,
+ pht('Token stem of "%s".', $input));
+ }
+ }
+
+ public function testStemDocuments() {
+ $tests = array(
+ 'The wild boar meandered erratically.' =>
+ 'the wild boar meander errat',
+ 'Fool me onc, shame on you. Fool me twice, shame on me.' =>
+ 'fool onc shame you twice',
+ 'Fireball is a seventh-level spell which deals 2d16 points of damage '.
+ 'in a 1-meter radius around a target.' =>
+ 'firebal seventh level spell which deal 2d16 point damag meter '.
+ 'radiu around target',
+ 'apples-bananas' => 'appl banana',
+ 'apples_bananas' => 'apples_bananas',
+ 'apples.bananas' => 'apples.bananas',
+ 'oddly-proportioned' => 'oddli proport',
+ );
+
+ $stemmer = new PhutilSearchStemmer();
+ foreach ($tests as $input => $expect) {
+ $stem = $stemmer->stemCorpus($input);
+ $this->assertEqual(
+ $expect,
+ $stem,
+ pht('Corpus stem of: %s', $input));
+ }
+ }
+
+
+}

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 11, 3:34 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7739428
Default Alt Text
D20939.id49891.diff (23 KB)

Event Timeline