diff --git a/resources/celerity/map.php b/resources/celerity/map.php --- a/resources/celerity/map.php +++ b/resources/celerity/map.php @@ -103,7 +103,7 @@ 'rsrc/css/application/releeph/releeph-request-differential-create-dialog.css' => '8d8b92cd', 'rsrc/css/application/releeph/releeph-request-typeahead.css' => '667a48ae', 'rsrc/css/application/search/application-search-view.css' => '66ee5d46', - 'rsrc/css/application/search/search-results.css' => '64ad079a', + 'rsrc/css/application/search/search-results.css' => 'f87d23ad', 'rsrc/css/application/slowvote/slowvote.css' => 'a94b7230', 'rsrc/css/application/tokens/tokens.css' => '3d0f239e', 'rsrc/css/application/uiexample/example.css' => '528b19de', @@ -794,7 +794,7 @@ 'phabricator-phtize' => 'd254d646', 'phabricator-prefab' => '8d40ae75', 'phabricator-remarkup-css' => '17c0fb37', - 'phabricator-search-results-css' => '64ad079a', + 'phabricator-search-results-css' => 'f87d23ad', 'phabricator-shaped-request' => '7cbe244b', 'phabricator-slowvote-css' => 'a94b7230', 'phabricator-source-code-view-css' => '4383192f', diff --git a/resources/sql/stopwords_myisam.txt b/resources/sql/stopwords_myisam.txt new file mode 100644 --- /dev/null +++ b/resources/sql/stopwords_myisam.txt @@ -0,0 +1,543 @@ +a's +able +about +above +according +accordingly +across +actually +after +afterwards +again +against +ain't +all +allow +allows +almost +alone +along +already +also +although +always +am +among +amongst +an +and +another +any +anybody +anyhow +anyone +anything +anyway +anyways +anywhere +apart +appear +appreciate +appropriate +are +aren't +around +as +aside +ask +asking +associated +at +available +away +awfully +be +became +because +become +becomes +becoming +been +before +beforehand +behind +being +believe +below +beside +besides +best +better +between +beyond +both +brief +but +by +c'mon +c's +came +can +can't +cannot +cant +cause +causes +certain +certainly +changes +clearly +co +com +come +comes +concerning +consequently +consider +considering +contain +containing +contains +corresponding +could +couldn't +course +currently +definitely +described +despite +did +didn't +different +do +does +doesn't +doing +don't +done +down +downwards +during +each +edu +eg +eight +either +else +elsewhere +enough +entirely +especially +et +etc +even +ever +every +everybody +everyone +everything +everywhere +ex +exactly +example +except +far +few +fifth +first +five +followed +following +follows +for +former +formerly +forth +four +from +further +furthermore +get +gets +getting +given +gives +go +goes +going +gone +got +gotten +greetings +had +hadn't +happens +hardly +has +hasn't +have +haven't +having +he +he's +hello +help +hence +her +here +here's +hereafter +hereby +herein +hereupon +hers +herself +hi +him +himself +his +hither +hopefully +how +howbeit +however +i'd +i'll +i'm +i've +ie +if +ignored +immediate +in +inasmuch +inc +indeed +indicate +indicated +indicates +inner +insofar +instead +into +inward +is +isn't +it +it'd +it'll +it's +its +itself +just +keep +keeps +kept +know +known +knows +last +lately +later +latter +latterly +least +less +lest +let +let's +like +liked +likely +little +look +looking +looks +ltd +mainly +many +may +maybe +me +mean +meanwhile +merely +might +more +moreover +most +mostly +much +must +my +myself +name +namely +nd +near +nearly +necessary +need +needs +neither +never +nevertheless +new +next +nine +no +nobody +non +none +noone +nor +normally +not +nothing +novel +now +nowhere +obviously +of +off +often +oh +ok +okay +old +on +once +one +ones +only +onto +or +other +others +otherwise +ought +our +ours +ourselves +out +outside +over +overall +own +particular +particularly +per +perhaps +placed +please +plus +possible +presumably +probably +provides +que +quite +qv +rather +rd +re +really +reasonably +regarding +regardless +regards +relatively +respectively +right +said +same +saw +say +saying +says +second +secondly +see +seeing +seem +seemed +seeming +seems +seen +self +selves +sensible +sent +serious +seriously +seven +several +shall +she +should +shouldn't +since +six +so +some +somebody +somehow +someone +something +sometime +sometimes +somewhat +somewhere +soon +sorry +specified +specify +specifying +still +sub +such +sup +sure +t's +take +taken +tell +tends +th +than +thank +thanks +thanx +that +that's +thats +the +their +theirs +them +themselves +then +thence +there +there's +thereafter +thereby +therefore +therein +theres +thereupon +these +they +they'd +they'll +they're +they've +think +third +this +thorough +thoroughly +those +though +three +through +throughout +thru +thus +to +together +too +took +toward +towards +tried +tries +truly +try +trying +twice +two +un +under +unfortunately +unless +unlikely +until +unto +up +upon +us +use +used +useful +uses +using +usually +value +various +very +via +viz +vs +want +wants +was +wasn't +way +we +we'd +we'll +we're +we've +welcome +well +went +were +weren't +what +what's +whatever +when +whence +whenever +where +where's +whereafter +whereas +whereby +wherein +whereupon +wherever +whether +which +while +whither +who +who's +whoever +whole +whom +whose +why +will +willing +wish +with +within +without +won't +wonder +would +wouldn't +yes +yet +you +you'd +you'll +you're +you've +your +yours +yourself +yourselves +zero diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2844,7 +2844,9 @@ 'PhabricatorFulltextEngineExtensionModule' => 'applications/search/index/PhabricatorFulltextEngineExtensionModule.php', 'PhabricatorFulltextIndexEngineExtension' => 'applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php', 'PhabricatorFulltextInterface' => 'applications/search/interface/PhabricatorFulltextInterface.php', + 'PhabricatorFulltextResultSet' => 'applications/search/query/PhabricatorFulltextResultSet.php', 'PhabricatorFulltextStorageEngine' => 'applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php', + 'PhabricatorFulltextToken' => 'applications/search/query/PhabricatorFulltextToken.php', 'PhabricatorFundApplication' => 'applications/fund/application/PhabricatorFundApplication.php', 'PhabricatorGDSetupCheck' => 'applications/config/check/PhabricatorGDSetupCheck.php', 'PhabricatorGarbageCollector' => 'infrastructure/daemon/garbagecollector/PhabricatorGarbageCollector.php', @@ -8005,7 +8007,9 @@ 'PhabricatorFulltextEngineExtension' => 'Phobject', 'PhabricatorFulltextEngineExtensionModule' => 'PhabricatorConfigModule', 'PhabricatorFulltextIndexEngineExtension' => 'PhabricatorIndexEngineExtension', + 'PhabricatorFulltextResultSet' => 'Phobject', 'PhabricatorFulltextStorageEngine' => 'Phobject', + 'PhabricatorFulltextToken' => 'Phobject', 'PhabricatorFundApplication' => 'PhabricatorApplication', 'PhabricatorGDSetupCheck' => 'PhabricatorSetupCheck', 'PhabricatorGarbageCollector' => 'Phobject', diff --git a/src/applications/search/engine/PhabricatorApplicationSearchEngine.php b/src/applications/search/engine/PhabricatorApplicationSearchEngine.php --- a/src/applications/search/engine/PhabricatorApplicationSearchEngine.php +++ b/src/applications/search/engine/PhabricatorApplicationSearchEngine.php @@ -978,9 +978,15 @@ $objects = $query->executeWithCursorPager($pager); } + $this->didExecuteQuery($query); + return $objects; } + protected function didExecuteQuery(PhabricatorPolicyAwareQuery $query) { + return; + } + /* -( Rendering )---------------------------------------------------------- */ diff --git a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php --- a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php @@ -101,4 +101,8 @@ public function initIndex() {} + public function getFulltextTokens() { + return array(); + } + } diff --git a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php --- a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php @@ -3,6 +3,9 @@ final class PhabricatorMySQLFulltextStorageEngine extends PhabricatorFulltextStorageEngine { + private $fulltextTokens = array(); + private $engineLimits; + public function getEngineIdentifier() { return 'mysql'; } @@ -203,8 +206,64 @@ $title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE; $title_boost = 1024; + $stemmer = new PhutilSearchStemmer(); + $raw_query = $query->getParameter('query'); - $compiled_query = $this->compileQuery($raw_query); + $raw_query = trim($raw_query); + if (strlen($raw_query)) { + $compiler = PhabricatorSearchDocument::newQueryCompiler() + ->setStemmer($stemmer); + + $tokens = $compiler->newTokens($raw_query); + + list($min_length, $stopword_list) = $this->getEngineLimits($conn); + + // Process all the parts of the user's query so we can show them which + // parts we searched for and which ones we ignored. + $fulltext_tokens = array(); + foreach ($tokens as $key => $token) { + $fulltext_token = id(new PhabricatorFulltextToken()) + ->setToken($token); + + $fulltext_tokens[$key] = $fulltext_token; + + $value = $token->getValue(); + if (phutil_utf8_strlen($value) < $min_length) { + $fulltext_token->setIsShort(true); + continue; + } + + if (isset($stopword_list[phutil_utf8_strtolower($value)])) { + $fulltext_token->setIsStopword(true); + continue; + } + } + $this->fulltextTokens = $fulltext_tokens; + + // Remove tokens which aren't queryable from the query. This is mostly + // a workaround for the peculiar behaviors described in T12137. + foreach ($this->fulltextTokens as $key => $fulltext_token) { + if (!$fulltext_token->isQueryable()) { + unset($tokens[$key]); + } + } + + if (!$tokens) { + throw new PhutilSearchQueryCompilerSyntaxException( + pht( + 'All of your search terms are too short or too common to '. + 'appear in the search index. Search for longer or more '. + 'distinctive terms.')); + } + + $queries = array(); + $queries[] = $compiler->compileLiteralQuery($tokens); + $queries[] = $compiler->compileStemmedQuery($tokens); + $compiled_query = implode(' ', array_filter($queries)); + } else { + $compiled_query = null; + } + if (strlen($compiled_query)) { $select[] = qsprintf( $conn, @@ -394,21 +453,6 @@ return $sql; } - private function compileQuery($raw_query) { - $stemmer = new PhutilSearchStemmer(); - - $compiler = PhabricatorSearchDocument::newQueryCompiler() - ->setStemmer($stemmer); - - $tokens = $compiler->newTokens($raw_query); - - $queries = array(); - $queries[] = $compiler->compileLiteralQuery($tokens); - $queries[] = $compiler->compileStemmedQuery($tokens); - - return implode(' ', array_filter($queries)); - } - public function indexExists() { return true; } @@ -417,4 +461,56 @@ return false; } + public function getFulltextTokens() { + return $this->fulltextTokens; + } + + private function getEngineLimits(AphrontDatabaseConnection $conn) { + if ($this->engineLimits === null) { + $this->engineLimits = $this->newEngineLimits($conn); + } + return $this->engineLimits; + } + + private function newEngineLimits(AphrontDatabaseConnection $conn) { + $result = queryfx_one( + $conn, + 'SELECT + @@innodb_ft_min_token_size innodb_max, + @@ft_min_word_len myisam_max, + @@ft_stopword_file myisam_stopwords'); + + if ($result['innodb_max']) { + $min_len = $result['innodb_max']; + $stopwords = queryfx_all( + $conn, + 'SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD'); + $stopwords = ipull($stopwords, 'value'); + $stopwords = array_fuse($stopwords); + } else { + $min_len = $result['myisam_max']; + + $file = $result['myisam_stopwords']; + if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) { + // If this is set to something that looks like the Phabricator + // stopword file, read that. + $file = 'stopwords.txt'; + } else { + // Otherwise, just use the default stopwords. This might be wrong + // but we can't read the actual value dynamically and reading + // whatever file the variable is set to could be a big headache + // to get right from a security perspective. + $file = 'stopwords_myisam.txt'; + } + + $root = dirname(phutil_get_library_root('phabricator')); + $data = Filesystem::readFile($root.'/resources/sql/'.$file); + $stopwords = explode("\n", $data); + $stopwords = array_filter($stopwords); + $stopwords = array_fuse($stopwords); + } + + return array($min_len, $stopwords); + } + } diff --git a/src/applications/search/query/PhabricatorFulltextResultSet.php b/src/applications/search/query/PhabricatorFulltextResultSet.php new file mode 100644 --- /dev/null +++ b/src/applications/search/query/PhabricatorFulltextResultSet.php @@ -0,0 +1,26 @@ +phids = $phids; + return $this; + } + + public function getPHIDs() { + return $this->phids; + } + + public function setFulltextTokens($fulltext_tokens) { + $this->fulltextTokens = $fulltext_tokens; + return $this; + } + + public function getFulltextTokens() { + return $this->fulltextTokens; + } + +} diff --git a/src/applications/search/query/PhabricatorFulltextToken.php b/src/applications/search/query/PhabricatorFulltextToken.php new file mode 100644 --- /dev/null +++ b/src/applications/search/query/PhabricatorFulltextToken.php @@ -0,0 +1,88 @@ +token = $token; + return $this; + } + + public function getToken() { + return $this->token; + } + + public function isQueryable() { + return !$this->getIsShort() && !$this->getIsStopword(); + } + + public function setIsShort($is_short) { + $this->isShort = $is_short; + return $this; + } + + public function getIsShort() { + return $this->isShort; + } + + public function setIsStopword($is_stopword) { + $this->isStopword = $is_stopword; + return $this; + } + + public function getIsStopword() { + return $this->isStopword; + } + + public function newTag() { + $token = $this->getToken(); + + $tip = null; + $icon = null; + + if ($this->getIsShort()) { + $shade = PHUITagView::COLOR_GREY; + $tip = pht('Ignored Short Word'); + } else if ($this->getIsStopword()) { + $shade = PHUITagView::COLOR_GREY; + $tip = pht('Ignored Common Word'); + } else { + $operator = $token->getOperator(); + switch ($operator) { + case PhutilSearchQueryCompiler::OPERATOR_NOT: + $shade = PHUITagView::COLOR_RED; + $icon = 'fa-minus'; + break; + default: + $shade = PHUITagView::COLOR_BLUE; + break; + } + } + + $tag = id(new PHUITagView()) + ->setType(PHUITagView::TYPE_SHADE) + ->setShade($shade) + ->setName($token->getValue()); + + if ($tip !== null) { + Javelin::initBehavior('phabricator-tooltips'); + + $tag + ->addSigil('has-tooltip') + ->setMetadata( + array( + 'tip' => $tip, + )); + } + + if ($icon !== null) { + $tag->setIcon($icon); + } + + return $tag; + } + +} diff --git a/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php b/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php --- a/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php +++ b/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php @@ -3,6 +3,8 @@ final class PhabricatorSearchApplicationSearchEngine extends PhabricatorApplicationSearchEngine { + private $resultSet; + public function getResultTypeDescription() { return pht('Fulltext Search Results'); } @@ -243,6 +245,9 @@ PhabricatorSavedQuery $query, array $handles) { + $result_set = $this->resultSet; + $fulltext_tokens = $result_set->getFulltextTokens(); + $viewer = $this->requireViewer(); $list = new PHUIObjectItemListView(); $list->setNoDataString(pht('No results found.')); @@ -263,7 +268,28 @@ } } + $fulltext_view = null; + if ($fulltext_tokens) { + require_celerity_resource('phabricator-search-results-css'); + + $fulltext_view = array(); + foreach ($fulltext_tokens as $token) { + $fulltext_view[] = $token->newTag(); + } + $fulltext_view = phutil_tag( + 'div', + array( + 'class' => 'phui-fulltext-tokens', + ), + array( + pht('Searched For:'), + ' ', + $fulltext_view, + )); + } + $result = new PhabricatorApplicationSearchResultView(); + $result->setContent($fulltext_view); $result->setObjectList($list); return $result; @@ -280,4 +306,8 @@ return $owner_phids; } + protected function didExecuteQuery(PhabricatorPolicyAwareQuery $query) { + $this->resultSet = $query->getFulltextResultSet(); + } + } diff --git a/src/applications/search/query/PhabricatorSearchDocumentQuery.php b/src/applications/search/query/PhabricatorSearchDocumentQuery.php --- a/src/applications/search/query/PhabricatorSearchDocumentQuery.php +++ b/src/applications/search/query/PhabricatorSearchDocumentQuery.php @@ -6,6 +6,7 @@ private $savedQuery; private $objectCapabilities; private $unfilteredOffset; + private $fulltextResultSet; public function withSavedQuery(PhabricatorSavedQuery $query) { $this->savedQuery = $query; @@ -25,8 +26,17 @@ return $this->getRequiredCapabilities(); } + public function getFulltextResultSet() { + if (!$this->fulltextResultSet) { + throw new PhutilInvalidStateException('execute'); + } + + return $this->fulltextResultSet; + } + protected function willExecute() { $this->unfilteredOffset = 0; + $this->fulltextResultSet = null; } protected function loadPage() { @@ -39,8 +49,10 @@ ->setParameter('offset', $this->unfilteredOffset) ->setParameter('limit', $this->getRawResultLimit()); - $phids = PhabricatorSearchService::executeSearch($query); + $result_set = PhabricatorSearchService::newResultSet($query, $this); + $phids = $result_set->getPHIDs(); + $this->fulltextResultSet = $result_set; $this->unfilteredOffset += count($phids); $handles = id(new PhabricatorHandleQuery()) diff --git a/src/infrastructure/cluster/search/PhabricatorSearchService.php b/src/infrastructure/cluster/search/PhabricatorSearchService.php --- a/src/infrastructure/cluster/search/PhabricatorSearchService.php +++ b/src/infrastructure/cluster/search/PhabricatorSearchService.php @@ -245,14 +245,25 @@ * @throws PhutilAggregateException */ public static function executeSearch(PhabricatorSavedQuery $query) { + $result_set = self::newResultSet($query); + return $result_set->getPHIDs(); + } + + public static function newResultSet(PhabricatorSavedQuery $query) { $exceptions = array(); // try all services until one succeeds foreach (self::getAllServices() as $service) { + if (!$service->isReadable()) { + continue; + } + try { $engine = $service->getEngine(); - $res = $engine->executeSearch($query); - // return immediately if we get results - return $res; + $phids = $engine->executeSearch($query); + + return id(new PhabricatorFulltextResultSet()) + ->setPHIDs($phids) + ->setFulltextTokens($engine->getFulltextTokens()); } catch (PhutilSearchQueryCompilerSyntaxException $ex) { // If there's a query compilation error, return it directly to the // user: they issued a query with bad syntax. diff --git a/webroot/rsrc/css/application/search/search-results.css b/webroot/rsrc/css/application/search/search-results.css --- a/webroot/rsrc/css/application/search/search-results.css +++ b/webroot/rsrc/css/application/search/search-results.css @@ -16,3 +16,16 @@ font-weight: normal; color: #000; } + +.phui-fulltext-tokens { + margin: 16px 8px; + font-weight: bold; +} + +.phui-fulltext-tokens .phui-tag-view { + margin: 0 2px; +} + +.phui-fulltext-tokens .phui-tag-view.phui-tag-shade-grey { + opacity: 0.5; +}