diff --git a/resources/sql/autopatches/20161125.search.01.stemmed.sql b/resources/sql/autopatches/20161125.search.01.stemmed.sql new file mode 100644 --- /dev/null +++ b/resources/sql/autopatches/20161125.search.01.stemmed.sql @@ -0,0 +1,2 @@ +ALTER TABLE {$NAMESPACE}_search.search_documentfield + ADD stemmedCorpus LONGTEXT COLLATE {$COLLATE_FULLTEXT}; diff --git a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php --- a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php @@ -33,6 +33,8 @@ $conn_w = $store->establishConnection('w'); + $stemmer = new PhutilSearchStemmer(); + $field_dao = new PhabricatorSearchDocumentField(); queryfx( $conn_w, @@ -41,16 +43,21 @@ $phid); foreach ($doc->getFieldData() as $field) { list($ftype, $corpus, $aux_phid) = $field; + + $stemmed_corpus = $stemmer->stemCorpus($corpus); + queryfx( $conn_w, - 'INSERT INTO %T (phid, phidType, field, auxPHID, corpus) '. - 'VALUES (%s, %s, %s, %ns, %s)', + 'INSERT INTO %T + (phid, phidType, field, auxPHID, corpus, stemmedCorpus) '. + 'VALUES (%s, %s, %s, %ns, %s, %s)', $field_dao->getTableName(), $phid, $doc->getDocumentType(), $ftype, $aux_phid, - $corpus); + $corpus, + $stemmed_corpus); } @@ -205,8 +212,9 @@ if (strlen($compiled_query)) { $select[] = qsprintf( $conn, - 'IF(field.field = %s, %d, 0) + '. - 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore', + 'IF(field.field = %s, %d, 0) + + MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) + AS fieldScore', $title_field, $title_boost, $compiled_query); @@ -218,7 +226,7 @@ $where[] = qsprintf( $conn, - 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE)', + 'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)', $compiled_query); if ($query->getParameter('field')) { @@ -380,11 +388,17 @@ } private function compileQuery($raw_query) { - $compiler = PhabricatorSearchDocument::newQueryCompiler(); + $stemmer = new PhutilSearchStemmer(); - return $compiler + $compiler = PhabricatorSearchDocument::newQueryCompiler() ->setQuery($raw_query) - ->compileQuery(); + ->setStemmer($stemmer); + + $queries = array(); + $queries[] = $compiler->compileLiteralQuery(); + $queries[] = $compiler->compileStemmedQuery(); + + return implode(' ', array_filter($queries)); } public function indexExists() { diff --git a/src/applications/search/storage/document/PhabricatorSearchDocumentField.php b/src/applications/search/storage/document/PhabricatorSearchDocumentField.php --- a/src/applications/search/storage/document/PhabricatorSearchDocumentField.php +++ b/src/applications/search/storage/document/PhabricatorSearchDocumentField.php @@ -6,6 +6,7 @@ protected $field; protected $auxPHID; protected $corpus; + protected $stemmedCorpus; protected function getConfiguration() { return array( @@ -16,14 +17,15 @@ 'field' => 'text4', 'auxPHID' => 'phid?', 'corpus' => 'fulltext?', + 'stemmedCorpus' => 'fulltext?', ), self::CONFIG_KEY_SCHEMA => array( 'key_phid' => null, 'phid' => array( 'columns' => array('phid'), ), - 'corpus' => array( - 'columns' => array('corpus'), + 'key_corpus' => array( + 'columns' => array('corpus', 'stemmedCorpus'), 'type' => 'FULLTEXT', ), ),