Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F14039107
D18635.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Referenced Files
None
Subscribers
None
D18635.diff
View Options
diff --git a/src/applications/search/query/PhabricatorFulltextToken.php b/src/applications/search/query/PhabricatorFulltextToken.php
--- a/src/applications/search/query/PhabricatorFulltextToken.php
+++ b/src/applications/search/query/PhabricatorFulltextToken.php
@@ -56,6 +56,10 @@
$shade = PHUITagView::COLOR_RED;
$icon = 'fa-minus';
break;
+ case PhutilSearchQueryCompiler::OPERATOR_SUBSTRING:
+ $tip = pht('Substring Search');
+ $shade = PHUITagView::COLOR_VIOLET;
+ break;
default:
$shade = PHUITagView::COLOR_BLUE;
break;
diff --git a/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php b/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php
--- a/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php
+++ b/src/applications/search/query/PhabricatorSearchApplicationSearchEngine.php
@@ -261,7 +261,7 @@
foreach ($results as $phid => $handle) {
$view = id(new PhabricatorSearchResultView())
->setHandle($handle)
- ->setQuery($query)
+ ->setTokens($fulltext_tokens)
->setObject(idx($objects, $phid))
->render();
$list->addItem($view);
diff --git a/src/applications/search/view/PhabricatorSearchResultView.php b/src/applications/search/view/PhabricatorSearchResultView.php
--- a/src/applications/search/view/PhabricatorSearchResultView.php
+++ b/src/applications/search/view/PhabricatorSearchResultView.php
@@ -3,16 +3,17 @@
final class PhabricatorSearchResultView extends AphrontView {
private $handle;
- private $query;
private $object;
+ private $tokens;
public function setHandle(PhabricatorObjectHandle $handle) {
$this->handle = $handle;
return $this;
}
- public function setQuery(PhabricatorSavedQuery $query) {
- $this->query = $query;
+ public function setTokens(array $tokens) {
+ assert_instances_of($tokens, 'PhabricatorFulltextToken');
+ $this->tokens = $tokens;
return $this;
}
@@ -56,88 +57,129 @@
* matched their query.
*/
private function emboldenQuery($str) {
- $query = $this->query->getParameter('query');
+ $tokens = $this->tokens;
- if (!strlen($query) || !strlen($str)) {
+ if (!$tokens) {
return $str;
}
- // This algorithm is safe but not especially fast, so don't bother if
- // we're dealing with a lot of data. This mostly prevents silly/malicious
- // queries from doing anything bad.
- if (strlen($query) + strlen($str) > 2048) {
+ if (count($tokens) > 16) {
return $str;
}
- // Keep track of which characters we're going to make bold. This is
- // byte oriented, but we'll make sure we don't put a bold in the middle
- // of a character later.
- $bold = array_fill(0, strlen($str), false);
+ if (!strlen($str)) {
+ return $str;
+ }
- // Split the query into words.
- $parts = preg_split('/ +/', $query);
+ if (strlen($str) > 2048) {
+ return $str;
+ }
- // Find all occurrences of each word, and mark them to be emboldened.
- foreach ($parts as $part) {
- $part = trim($part);
- $part = trim($part, '"+');
- if (!strlen($part)) {
- continue;
+ $patterns = array();
+ foreach ($tokens as $token) {
+ $raw_token = $token->getToken();
+ $operator = $raw_token->getOperator();
+
+ $value = $raw_token->getValue();
+
+ switch ($operator) {
+ case PhutilSearchQueryCompiler::OPERATOR_SUBSTRING:
+ $patterns[] = '(('.preg_quote($value).'))ui';
+ break;
+ case PhutilSearchQueryCompiler::OPERATOR_AND:
+ $patterns[] = '((?<=\W|^)('.preg_quote($value).')(?=\W|\z))ui';
+ break;
+ default:
+ // Don't highlight anything else, particularly "NOT".
+ break;
}
+ }
+ // Find all matches for all query terms in the document title, then reduce
+ // them to a map from offsets to highlighted sequence lengths. If two terms
+ // match at the same position, we choose the longer one.
+ $all_matches = array();
+ foreach ($patterns as $pattern) {
$matches = null;
- $has_matches = preg_match_all(
- '/(?:^|\b)('.preg_quote($part, '/').')/i',
+ $ok = preg_match_all(
+ $pattern,
$str,
$matches,
PREG_OFFSET_CAPTURE);
-
- if (!$has_matches) {
+ if (!$ok) {
continue;
}
- // Flag the matching part of the range for boldening.
foreach ($matches[1] as $match) {
- $offset = $match[1];
- for ($ii = 0; $ii < strlen($match[0]); $ii++) {
- $bold[$offset + $ii] = true;
+ $match_text = $match[0];
+ $match_offset = $match[1];
+
+ if (!isset($all_matches[$match_offset])) {
+ $all_matches[$match_offset] = 0;
}
+
+ $all_matches[$match_offset] = max(
+ $all_matches[$match_offset],
+ strlen($match_text));
}
}
- // Split the string into ranges, applying bold styling as required.
- $out = array();
- $buf = '';
- $pos = 0;
- $is_bold = false;
-
- // Make sure this is UTF8 because phutil_utf8v() will explode if it isn't.
- $str = phutil_utf8ize($str);
- foreach (phutil_utf8v($str) as $chr) {
- if ($bold[$pos] != $is_bold) {
- if (strlen($buf)) {
- if ($is_bold) {
- $out[] = phutil_tag('strong', array(), $buf);
- } else {
- $out[] = $buf;
- }
- $buf = '';
+ // Go through the string one display glyph at a time. If a glyph starts
+ // on a highlighted byte position, turn on highlighting for the nubmer
+ // of matching bytes. If a query searches for "e" and the document contains
+ // an "e" followed by a bunch of combining marks, this will correctly
+ // highlight the entire glyph.
+ $parts = array();
+ $highlight = 0;
+ $offset = 0;
+ foreach (phutil_utf8v_combined($str) as $character) {
+ $length = strlen($character);
+
+ if (isset($all_matches[$offset])) {
+ $highlight = $all_matches[$offset];
+ }
+
+ if ($highlight > 0) {
+ $is_highlighted = true;
+ $highlight -= $length;
+ } else {
+ $is_highlighted = false;
+ }
+
+ $parts[] = array(
+ 'text' => $character,
+ 'highlighted' => $is_highlighted,
+ );
+
+ $offset += $length;
+ }
+
+ // Combine all the sequences together so we aren't emitting a tag around
+ // every individual character.
+ $last = null;
+ foreach ($parts as $key => $part) {
+ if ($last !== null) {
+ if ($part['highlighted'] == $parts[$last]['highlighted']) {
+ $parts[$last]['text'] .= $part['text'];
+ unset($parts[$key]);
+ continue;
}
- $is_bold = !$is_bold;
}
- $buf .= $chr;
- $pos += strlen($chr);
+
+ $last = $key;
}
- if (strlen($buf)) {
- if ($is_bold) {
- $out[] = phutil_tag('strong', array(), $buf);
+ // Finally, add tags.
+ $result = array();
+ foreach ($parts as $part) {
+ if ($part['highlighted']) {
+ $result[] = phutil_tag('strong', array(), $part['text']);
} else {
- $out[] = $buf;
+ $result[] = $part['text'];
}
}
- return $out;
+ return $result;
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 12, 3:46 AM (5 d, 10 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6753043
Default Alt Text
D18635.diff (7 KB)
Attached To
Mode
D18635: Improve search highlighting for CJK and substring queries
Attached
Detach File
Event Timeline
Log In to Comment