Changeset View
Changeset View
Standalone View
Standalone View
src/applications/search/view/PhabricatorSearchResultView.php
| <?php | <?php | ||||
| final class PhabricatorSearchResultView extends AphrontView { | final class PhabricatorSearchResultView extends AphrontView { | ||||
| private $handle; | private $handle; | ||||
| private $query; | |||||
| private $object; | private $object; | ||||
| private $tokens; | |||||
| public function setHandle(PhabricatorObjectHandle $handle) { | public function setHandle(PhabricatorObjectHandle $handle) { | ||||
| $this->handle = $handle; | $this->handle = $handle; | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| public function setQuery(PhabricatorSavedQuery $query) { | public function setTokens(array $tokens) { | ||||
| $this->query = $query; | assert_instances_of($tokens, 'PhabricatorFulltextToken'); | ||||
| $this->tokens = $tokens; | |||||
| return $this; | return $this; | ||||
| } | } | ||||
| public function setObject($object) { | public function setObject($object) { | ||||
| $this->object = $object; | $this->object = $object; | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| Show All 27 Lines | final class PhabricatorSearchResultView extends AphrontView { | ||||
| /** | /** | ||||
| * Find the words which are part of the query string, and bold them in a | * Find the words which are part of the query string, and bold them in a | ||||
| * result string. This makes it easier for users to see why a result | * result string. This makes it easier for users to see why a result | ||||
| * matched their query. | * matched their query. | ||||
| */ | */ | ||||
| private function emboldenQuery($str) { | private function emboldenQuery($str) { | ||||
| $query = $this->query->getParameter('query'); | $tokens = $this->tokens; | ||||
| if (!strlen($query) || !strlen($str)) { | if (!$tokens) { | ||||
| return $str; | return $str; | ||||
| } | } | ||||
| // This algorithm is safe but not especially fast, so don't bother if | if (count($tokens) > 16) { | ||||
| // we're dealing with a lot of data. This mostly prevents silly/malicious | |||||
| // queries from doing anything bad. | |||||
| if (strlen($query) + strlen($str) > 2048) { | |||||
| return $str; | return $str; | ||||
| } | } | ||||
| // Keep track of which characters we're going to make bold. This is | if (!strlen($str)) { | ||||
| // byte oriented, but we'll make sure we don't put a bold in the middle | return $str; | ||||
| // of a character later. | } | ||||
| $bold = array_fill(0, strlen($str), false); | |||||
| // Split the query into words. | if (strlen($str) > 2048) { | ||||
| $parts = preg_split('/ +/', $query); | return $str; | ||||
| } | |||||
| // Find all occurrences of each word, and mark them to be emboldened. | $patterns = array(); | ||||
| foreach ($parts as $part) { | foreach ($tokens as $token) { | ||||
| $part = trim($part); | $raw_token = $token->getToken(); | ||||
| $part = trim($part, '"+'); | $operator = $raw_token->getOperator(); | ||||
| if (!strlen($part)) { | |||||
| continue; | $value = $raw_token->getValue(); | ||||
| switch ($operator) { | |||||
| case PhutilSearchQueryCompiler::OPERATOR_SUBSTRING: | |||||
| $patterns[] = '(('.preg_quote($value).'))ui'; | |||||
| break; | |||||
| case PhutilSearchQueryCompiler::OPERATOR_AND: | |||||
| $patterns[] = '((?<=\W|^)('.preg_quote($value).')(?=\W|\z))ui'; | |||||
| break; | |||||
| default: | |||||
| // Don't highlight anything else, particularly "NOT". | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| // Find all matches for all query terms in the document title, then reduce | |||||
| // them to a map from offsets to highlighted sequence lengths. If two terms | |||||
| // match at the same position, we choose the longer one. | |||||
| $all_matches = array(); | |||||
| foreach ($patterns as $pattern) { | |||||
| $matches = null; | $matches = null; | ||||
| $has_matches = preg_match_all( | $ok = preg_match_all( | ||||
| '/(?:^|\b)('.preg_quote($part, '/').')/i', | $pattern, | ||||
| $str, | $str, | ||||
| $matches, | $matches, | ||||
| PREG_OFFSET_CAPTURE); | PREG_OFFSET_CAPTURE); | ||||
| if (!$ok) { | |||||
| if (!$has_matches) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| // Flag the matching part of the range for boldening. | |||||
| foreach ($matches[1] as $match) { | foreach ($matches[1] as $match) { | ||||
| $offset = $match[1]; | $match_text = $match[0]; | ||||
| for ($ii = 0; $ii < strlen($match[0]); $ii++) { | $match_offset = $match[1]; | ||||
| $bold[$offset + $ii] = true; | |||||
| if (!isset($all_matches[$match_offset])) { | |||||
| $all_matches[$match_offset] = 0; | |||||
| } | |||||
| $all_matches[$match_offset] = max( | |||||
| $all_matches[$match_offset], | |||||
| strlen($match_text)); | |||||
| } | } | ||||
| } | } | ||||
| // Go through the string one display glyph at a time. If a glyph starts | |||||
| // on a highlighted byte position, turn on highlighting for the nubmer | |||||
| // of matching bytes. If a query searches for "e" and the document contains | |||||
| // an "e" followed by a bunch of combining marks, this will correctly | |||||
amckinley: "correctly" | |||||
| // highlight the entire glyph. | |||||
| $parts = array(); | |||||
| $highlight = 0; | |||||
| $offset = 0; | |||||
| foreach (phutil_utf8v_combined($str) as $character) { | |||||
| $length = strlen($character); | |||||
| if (isset($all_matches[$offset])) { | |||||
| $highlight = $all_matches[$offset]; | |||||
| } | } | ||||
| // Split the string into ranges, applying bold styling as required. | if ($highlight > 0) { | ||||
| $out = array(); | $is_highlighted = true; | ||||
| $buf = ''; | $highlight -= $length; | ||||
| $pos = 0; | |||||
| $is_bold = false; | |||||
| // Make sure this is UTF8 because phutil_utf8v() will explode if it isn't. | |||||
| $str = phutil_utf8ize($str); | |||||
| foreach (phutil_utf8v($str) as $chr) { | |||||
| if ($bold[$pos] != $is_bold) { | |||||
| if (strlen($buf)) { | |||||
| if ($is_bold) { | |||||
| $out[] = phutil_tag('strong', array(), $buf); | |||||
| } else { | } else { | ||||
| $out[] = $buf; | $is_highlighted = false; | ||||
| } | |||||
| $parts[] = array( | |||||
| 'text' => $character, | |||||
| 'highlighted' => $is_highlighted, | |||||
| ); | |||||
| $offset += $length; | |||||
| } | } | ||||
| $buf = ''; | |||||
| // Combine all the sequences together so we aren't emitting a tag around | |||||
| // every individual character. | |||||
| $last = null; | |||||
| foreach ($parts as $key => $part) { | |||||
| if ($last !== null) { | |||||
| if ($part['highlighted'] == $parts[$last]['highlighted']) { | |||||
| $parts[$last]['text'] .= $part['text']; | |||||
| unset($parts[$key]); | |||||
| continue; | |||||
| } | } | ||||
| $is_bold = !$is_bold; | |||||
| } | } | ||||
| $buf .= $chr; | |||||
| $pos += strlen($chr); | $last = $key; | ||||
| } | } | ||||
| if (strlen($buf)) { | // Finally, add tags. | ||||
| if ($is_bold) { | $result = array(); | ||||
| $out[] = phutil_tag('strong', array(), $buf); | foreach ($parts as $part) { | ||||
| if ($part['highlighted']) { | |||||
| $result[] = phutil_tag('strong', array(), $part['text']); | |||||
| } else { | } else { | ||||
| $out[] = $buf; | $result[] = $part['text']; | ||||
| } | } | ||||
| } | } | ||||
| return $out; | return $result; | ||||
| } | } | ||||
| } | } | ||||
"correctly"