Differential D17672 Diff 42503 src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
Changeset View
Changeset View
Standalone View
Standalone View
src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
| <?php | <?php | ||||
| final class PhabricatorMySQLFulltextStorageEngine | final class PhabricatorMySQLFulltextStorageEngine | ||||
| extends PhabricatorFulltextStorageEngine { | extends PhabricatorFulltextStorageEngine { | ||||
| private $fulltextTokens = array(); | |||||
| private $engineLimits; | |||||
| public function getEngineIdentifier() { | public function getEngineIdentifier() { | ||||
| return 'mysql'; | return 'mysql'; | ||||
| } | } | ||||
| public function getHostType() { | public function getHostType() { | ||||
| return new PhabricatorMySQLSearchHost($this); | return new PhabricatorMySQLSearchHost($this); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines | private function newFulltextSubquery( | ||||
| $select[] = 'document.phid AS documentPHID'; | $select[] = 'document.phid AS documentPHID'; | ||||
| $join = array(); | $join = array(); | ||||
| $where = array(); | $where = array(); | ||||
| $title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE; | $title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE; | ||||
| $title_boost = 1024; | $title_boost = 1024; | ||||
| $stemmer = new PhutilSearchStemmer(); | |||||
| $raw_query = $query->getParameter('query'); | $raw_query = $query->getParameter('query'); | ||||
| $compiled_query = $this->compileQuery($raw_query); | $raw_query = trim($raw_query); | ||||
| if (strlen($raw_query)) { | |||||
| $compiler = PhabricatorSearchDocument::newQueryCompiler() | |||||
| ->setStemmer($stemmer); | |||||
| $tokens = $compiler->newTokens($raw_query); | |||||
| list($min_length, $stopword_list) = $this->getEngineLimits($conn); | |||||
| // Process all the parts of the user's query so we can show them which | |||||
| // parts we searched for and which ones we ignored. | |||||
| $fulltext_tokens = array(); | |||||
| foreach ($tokens as $key => $token) { | |||||
| $fulltext_token = id(new PhabricatorFulltextToken()) | |||||
| ->setToken($token); | |||||
| $fulltext_tokens[$key] = $fulltext_token; | |||||
| $value = $token->getValue(); | |||||
| if (phutil_utf8_strlen($value) < $min_length) { | |||||
| $fulltext_token->setIsShort(true); | |||||
| continue; | |||||
| } | |||||
| if (isset($stopword_list[phutil_utf8_strtolower($value)])) { | |||||
| $fulltext_token->setIsStopword(true); | |||||
| continue; | |||||
| } | |||||
| } | |||||
| $this->fulltextTokens = $fulltext_tokens; | |||||
| // Remove tokens which aren't queryable from the query. This is mostly | |||||
| // a workaround for the peculiar behaviors described in T12137. | |||||
| foreach ($this->fulltextTokens as $key => $fulltext_token) { | |||||
| if (!$fulltext_token->isQueryable()) { | |||||
| unset($tokens[$key]); | |||||
| } | |||||
| } | |||||
| if (!$tokens) { | |||||
| throw new PhutilSearchQueryCompilerSyntaxException( | |||||
| pht( | |||||
| 'All of your search terms are too short or too common to '. | |||||
| 'appear in the search index. Search for longer or more '. | |||||
| 'distinctive terms.')); | |||||
| } | |||||
| $queries = array(); | |||||
| $queries[] = $compiler->compileLiteralQuery($tokens); | |||||
| $queries[] = $compiler->compileStemmedQuery($tokens); | |||||
| $compiled_query = implode(' ', array_filter($queries)); | |||||
| } else { | |||||
| $compiled_query = null; | |||||
| } | |||||
| if (strlen($compiled_query)) { | if (strlen($compiled_query)) { | ||||
| $select[] = qsprintf( | $select[] = qsprintf( | ||||
| $conn, | $conn, | ||||
| 'IF(field.field = %s, %d, 0) + | 'IF(field.field = %s, %d, 0) + | ||||
| MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) | MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) | ||||
| AS fieldScore', | AS fieldScore', | ||||
| $title_field, | $title_field, | ||||
| $title_boost, | $title_boost, | ||||
| ▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | if (!$is_existence) { | ||||
| ' AND %C.relatedPHID in (%Ls)', | ' AND %C.relatedPHID in (%Ls)', | ||||
| $field, | $field, | ||||
| $phids); | $phids); | ||||
| } | } | ||||
| return $sql; | return $sql; | ||||
| } | } | ||||
| private function compileQuery($raw_query) { | public function indexExists() { | ||||
| $stemmer = new PhutilSearchStemmer(); | return true; | ||||
| } | |||||
| $compiler = PhabricatorSearchDocument::newQueryCompiler() | public function getIndexStats() { | ||||
| ->setStemmer($stemmer); | return false; | ||||
| } | |||||
| $tokens = $compiler->newTokens($raw_query); | public function getFulltextTokens() { | ||||
| return $this->fulltextTokens; | |||||
| } | |||||
| $queries = array(); | private function getEngineLimits(AphrontDatabaseConnection $conn) { | ||||
| $queries[] = $compiler->compileLiteralQuery($tokens); | if ($this->engineLimits === null) { | ||||
| $queries[] = $compiler->compileStemmedQuery($tokens); | $this->engineLimits = $this->newEngineLimits($conn); | ||||
| } | |||||
| return $this->engineLimits; | |||||
| } | |||||
| return implode(' ', array_filter($queries)); | private function newEngineLimits(AphrontDatabaseConnection $conn) { | ||||
| $result = queryfx_one( | |||||
| $conn, | |||||
| 'SELECT | |||||
| @@innodb_ft_min_token_size innodb_max, | |||||
| @@ft_min_word_len myisam_max, | |||||
| @@ft_stopword_file myisam_stopwords'); | |||||
| if ($result['innodb_max']) { | |||||
| $min_len = $result['innodb_max']; | |||||
| $stopwords = queryfx_all( | |||||
| $conn, | |||||
| 'SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD'); | |||||
| $stopwords = ipull($stopwords, 'value'); | |||||
| $stopwords = array_fuse($stopwords); | |||||
| } else { | |||||
| $min_len = $result['myisam_max']; | |||||
| $file = $result['myisam_stopwords']; | |||||
| if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) { | |||||
| // If this is set to something that looks like the Phabricator | |||||
| // stopword file, read that. | |||||
| $file = 'stopwords.txt'; | |||||
| } else { | |||||
| // Otherwise, just use the default stopwords. This might be wrong | |||||
| // but we can't read the actual value dynamically and reading | |||||
| // whatever file the variable is set to could be a big headache | |||||
| // to get right from a security perspective. | |||||
| $file = 'stopwords_myisam.txt'; | |||||
| } | } | ||||
| public function indexExists() { | $root = dirname(phutil_get_library_root('phabricator')); | ||||
| return true; | $data = Filesystem::readFile($root.'/resources/sql/'.$file); | ||||
| $stopwords = explode("\n", $data); | |||||
| $stopwords = array_filter($stopwords); | |||||
| $stopwords = array_fuse($stopwords); | |||||
| } | } | ||||
| public function getIndexStats() { | return array($min_len, $stopwords); | ||||
| return false; | |||||
| } | } | ||||
| } | } | ||||