Page MenuHomePhabricator

D17509.id.diff
No OneTemporary

D17509.id.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -2651,6 +2651,7 @@
'PhabricatorEditorMultipleSetting' => 'applications/settings/setting/PhabricatorEditorMultipleSetting.php',
'PhabricatorEditorSetting' => 'applications/settings/setting/PhabricatorEditorSetting.php',
'PhabricatorElasticFulltextStorageEngine' => 'applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php',
+ 'PhabricatorElasticSearchQueryBuilder' => 'applications/search/fulltextstorage/PhabricatorElasticSearchQueryBuilder.php',
'PhabricatorElasticSearchSetupCheck' => 'applications/config/check/PhabricatorElasticSearchSetupCheck.php',
'PhabricatorEmailAddressesSettingsPanel' => 'applications/settings/panel/PhabricatorEmailAddressesSettingsPanel.php',
'PhabricatorEmailContentSource' => 'applications/metamta/contentsource/PhabricatorEmailContentSource.php',
diff --git a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
--- a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
+++ b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
@@ -6,10 +6,13 @@
private $uri;
private $index;
private $timeout;
+ /** elasticsearch version */
+ private $version;
public function __construct() {
$this->uri = PhabricatorEnv::getEnvConfig('search.elastic.host');
$this->index = PhabricatorEnv::getEnvConfig('search.elastic.namespace');
+ $this->version = 5;
}
public function getEngineIdentifier() {
@@ -51,6 +54,55 @@
return $this->timeout;
}
+ protected function resolveTags($tags) {
+
+ $lookup_phids = array();
+ foreach ($tags as $phid) {
+ if (!isset(self::$tagCache[$phid])) {
+ $lookup_phids[] = $phid;
+ }
+ }
+ if (count($lookup_phids)) {
+ $projects = id(new PhabricatorProjectQuery())
+ ->setViewer(PhabricatorUser::getOmnipotentUser())
+ ->withPHIDs($lookup_phids)
+ ->needSlugs(true)
+ ->execute();
+
+ foreach ($projects as $project) {
+ $phid = $project->getPHID();
+ $slugs = $project->getSlugs();
+ $slugs = mpull($slugs, 'getSlug');
+ $keywords = $project->getDisplayName().' '.implode(' ', $slugs);
+ $keywords = strtolower($keywords);
+ $keywords = str_replace('_', ' ', $keywords);
+ $keywords = explode(' ', $keywords);
+ $keywords = array_unique($keywords);
+ self::$tagCache[$phid] = $keywords;
+ }
+ }
+
+ $keywords = array();
+ foreach ($tags as $phid) {
+ if (isset(self::$tagCache[$phid])) {
+ $keywords += self::$tagCache[$phid];
+ }
+ }
+ $keywords = array_unique($keywords);
+ return implode(' ', $keywords);
+ }
+
+ public function getTypeConstants($class) {
+ static $typeconstants = array();
+ if (!empty($typeconstants[$class])) {
+ return $typeconstants[$class];
+ }
+
+ $relationship_class = new ReflectionClass($class);
+ $typeconstants[$class] = $relationship_class->getConstants();
+ return array_unique(array_values($typeconstants[$class]));
+ }
+
public function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $doc) {
@@ -61,27 +113,42 @@
->withPHIDs(array($phid))
->executeOne();
+ $timestamp_key = $this->timestampFieldKey;
+
// URL is not used internally but it can be useful externally.
$spec = array(
'title' => $doc->getDocumentTitle(),
'url' => PhabricatorEnv::getProductionURI($handle->getURI()),
'dateCreated' => $doc->getDocumentCreated(),
- '_timestamp' => $doc->getDocumentModified(),
- 'field' => array(),
- 'relationship' => array(),
+ $timestamp_key => $doc->getDocumentModified(),
);
foreach ($doc->getFieldData() as $field) {
- $spec['field'][] = array_combine(array('type', 'corpus', 'aux'), $field);
+ list($field_name, $corpus, $aux) = $field;
+ if (!isset($spec[$field_name])) {
+ $spec[$field_name] = $corpus;
+ } else if (!is_array($spec[$field_name])) {
+ $spec[$field_name] = array($spec[$field_name], $corpus);
+ } else {
+ $spec[$field_name][] = $corpus;
+ }
+ if ($aux != null) {
+ $spec[$field_name.'_aux_phid'] = $aux;
+ }
}
+ $tags = array();
+
foreach ($doc->getRelationshipData() as $relationship) {
list($rtype, $to_phid, $to_type, $time) = $relationship;
- $spec['relationship'][$rtype][] = array(
- 'phid' => $to_phid,
- 'phidType' => $to_type,
- 'when' => (int)$time,
- );
+ $spec[$rtype][] = $to_phid;
+ if ($rtype == PhabricatorSearchRelationship::RELATIONSHIP_PROJECT) {
+ $tags[] = $to_phid;
+ }
+ }
+
+ if (!empty($tags)) {
+ $spec['tags'] = $this->resolveTags($tags);
}
$this->executeRequest("/{$type}/{$phid}/", $spec, 'PUT');
@@ -103,10 +170,11 @@
$doc->setDocumentType($response['_type']);
$doc->setDocumentTitle($hit['title']);
$doc->setDocumentCreated($hit['dateCreated']);
- $doc->setDocumentModified($hit['_timestamp']);
+ $doc->setDocumentModified($hit[$this->timestampFieldKey]);
foreach ($hit['field'] as $fdef) {
- $doc->addField($fdef['type'], $fdef['corpus'], $fdef['aux']);
+ $field_type = $fdef['type'];
+ $doc->addField($field_type, $hit[$field_type], $fdef['aux']);
}
foreach ($hit['relationship'] as $rtype => $rships) {
@@ -123,35 +191,45 @@
}
private function buildSpec(PhabricatorSavedQuery $query) {
- $spec = array();
- $filter = array();
- $title_spec = array();
+ $q = new PhabricatorElasticSearchQueryBuilder('bool');
+ $query_string = $query->getParameter('query');
+ if (strlen($query_string)) {
+ $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
- if (strlen($query->getParameter('query'))) {
- $spec[] = array(
+ $q->addMustClause(array(
'simple_query_string' => array(
- 'query' => $query->getParameter('query'),
- 'fields' => array('field.corpus'),
+ 'query' => $query_string,
+ 'fields' => array(
+ 'title^4',
+ 'body^3',
+ 'cmnt^2',
+ 'tags',
+ '_all',
+ ),
+ 'default_operator' => 'and',
),
- );
+ ));
- $title_spec = array(
+ $q->addShouldClause(array(
'simple_query_string' => array(
- 'query' => $query->getParameter('query'),
- 'fields' => array('title'),
+ 'query' => $query_string,
+ 'fields' => array_values($fields),
+ 'analyzer' => 'english_exact',
+ 'default_operator' => 'and',
),
- );
+ ));
+
}
$exclude = $query->getParameter('exclude');
if ($exclude) {
- $filter[] = array(
+ $q->addFilterClause(array(
'not' => array(
'ids' => array(
'values' => array($exclude),
),
),
- );
+ ));
}
$relationship_map = array(
@@ -176,76 +254,59 @@
$include_closed = !empty($statuses[$rel_closed]);
if ($include_open && !$include_closed) {
- $relationship_map[$rel_open] = true;
+ $q->addExistsClause($rel_open);
} else if (!$include_open && $include_closed) {
- $relationship_map[$rel_closed] = true;
+ $q->addExistsClause($rel_closed);
}
if ($query->getParameter('withUnowned')) {
- $relationship_map[$rel_unowned] = true;
+ $q->addExistsClause($rel_unowned);
}
$rel_owner = PhabricatorSearchRelationship::RELATIONSHIP_OWNER;
if ($query->getParameter('withAnyOwner')) {
- $relationship_map[$rel_owner] = true;
+ $q->addExistsClause($rel_owner);
} else {
$owner_phids = $query->getParameter('ownerPHIDs', array());
- $relationship_map[$rel_owner] = $owner_phids;
- }
-
- foreach ($relationship_map as $field => $param) {
- if (is_array($param) && $param) {
- $should = array();
- foreach ($param as $val) {
- $should[] = array(
- 'match' => array(
- "relationship.{$field}.phid" => array(
- 'query' => $val,
- 'type' => 'phrase',
- ),
- ),
- );
- }
- // We couldn't solve it by minimum_number_should_match because it can
- // match multiple owners without matching author.
- $spec[] = array('bool' => array('should' => $should));
- } else if ($param) {
- $filter[] = array(
- 'exists' => array(
- 'field' => "relationship.{$field}.phid",
- ),
- );
+ if (count($owner_phids)) {
+ $q->addTermsClause($rel_owner, $owner_phids);
}
}
- if ($spec) {
- $spec = array('query' => array('bool' => array('must' => $spec)));
- if ($title_spec) {
- $spec['query']['bool']['should'] = $title_spec;
+ foreach ($relationship_map as $field => $phids) {
+ if (is_array($phids) && !empty($phids)) {
+ $q->addTermsClause($field, $phids);
}
}
- if ($filter) {
- $filter = array('filter' => array('and' => $filter));
- if (!$spec) {
- $spec = array('query' => array('match_all' => new stdClass()));
- }
- $spec = array(
- 'query' => array(
- 'filtered' => $spec + $filter,
- ),
- );
+ if (!$q->getClauseCount('must')) {
+ $q->addMustClause(array('match_all' => array('boost' => 1 )));
}
+ $spec = array(
+ '_source' => false,
+ 'query' => array(
+ 'bool' => $q->toArray(),
+ ),
+ );
+
+
if (!$query->getParameter('query')) {
$spec['sort'] = array(
array('dateCreated' => 'desc'),
);
}
- $spec['from'] = (int)$query->getParameter('offset', 0);
- $spec['size'] = (int)$query->getParameter('limit', 25);
-
+ $offset = (int)$query->getParameter('offset', 0);
+ $limit = (int)$query->getParameter('limit', 101);
+ if ($offset + $limit > 10000) {
+ throw new Exception(pht(
+ 'Query offset is too large. offset+limit=%s (max=%s)',
+ $offset + $limit,
+ 10000));
+ }
+ $spec['from'] = $offset;
+ $spec['size'] = $limit;
return $spec;
}
@@ -261,22 +322,8 @@
// some bigger index). Use '/$types/_search' instead.
$uri = '/'.implode(',', $types).'/_search';
- try {
- $response = $this->executeRequest($uri, $this->buildSpec($query));
- } catch (HTTPFutureHTTPResponseStatus $ex) {
- // elasticsearch probably uses Lucene query syntax:
- // http://lucene.apache.org/core/3_6_1/queryparsersyntax.html
- // Try literal search if operator search fails.
- if (!strlen($query->getParameter('query'))) {
- throw $ex;
- }
- $query = clone $query;
- $query->setParameter(
- 'query',
- addcslashes(
- $query->getParameter('query'), '+-&|!(){}[]^"~*?:\\'));
- $response = $this->executeRequest($uri, $this->buildSpec($query));
- }
+ $spec = $this->buildSpec($query);
+ $response = $this->executeRequest($uri, $spec);
$phids = ipull($response['hits']['hits'], '_id');
return $phids;
@@ -284,7 +331,17 @@
public function indexExists() {
try {
- return (bool)$this->executeRequest('/_status/', array());
+
+ if ($this->version >= 5) {
+ $uri = '/_stats/';
+ $res = $this->executeRequest($uri, array());
+ return isset($res['indices']['phabricator']);
+ } else if ($this->version >= 2) {
+ $uri = '';
+ } else {
+ $uri = '/_status/';
+ }
+ return (bool)$this->executeRequest($uri, array());
} catch (HTTPFutureHTTPResponseStatus $e) {
if ($e->getStatusCode() == 404) {
return false;
@@ -299,43 +356,63 @@
'index' => array(
'auto_expand_replicas' => '0-2',
'analysis' => array(
- 'filter' => array(
- 'trigrams_filter' => array(
- 'min_gram' => 3,
- 'type' => 'ngram',
- 'max_gram' => 3,
- ),
- ),
'analyzer' => array(
- 'custom_trigrams' => array(
- 'type' => 'custom',
- 'filter' => array(
- 'lowercase',
- 'kstem',
- 'trigrams_filter',
- ),
+ 'english_exact' => array(
'tokenizer' => 'standard',
+ 'filter' => array('lowercase'),
),
),
),
),
);
+ $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
+ $relationships = $this->getTypeConstants('PhabricatorSearchRelationship');
+
$types = array_keys(
PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes());
+
foreach ($types as $type) {
- // Use the custom trigram analyzer for the corpus of text
- $data['mappings'][$type]['properties']['field']['properties']['corpus'] =
- array('type' => 'string', 'analyzer' => 'custom_trigrams');
+ $properties = array();
+ foreach ($fields as $field) {
+ // Use the custom analyzer for the corpus of text
+ $properties[$field] = array(
+ 'type' => $this->textFieldType,
+ 'analyzer' => 'english_exact',
+ 'search_analyzer' => 'english',
+ 'search_quote_analyzer' => 'english_exact',
+ );
+ }
+
+ foreach ($relationships as $rel) {
+ $properties[$rel] = array(
+ 'type' => $this->textFieldType,
+ );
+ if ($this->version < 5) {
+ $properties[$rel]['index'] = 'not_analyzed';
+ }
+ }
// Ensure we have dateCreated since the default query requires it
- $data['mappings'][$type]['properties']['dateCreated']['type'] = 'string';
- }
+ $properties['dateCreated']['type'] = 'date';
+
+ // Replaces deprecated _timestamp for elasticsearch 2
+ if ((int)$this->version >= 2) {
+ $properties['lastModified']['type'] = 'date';
+ }
+ $properties['tags'] = array(
+ 'type' => $this->textFieldType,
+ 'analyzer' => 'english',
+ 'store' => true,
+ );
+ $data['mappings'][$type]['properties'] = $properties;
+ }
return $data;
}
public function indexIsSane() {
+
if (!$this->indexExists()) {
return false;
}
@@ -345,7 +422,8 @@
$actual = array_merge($cur_settings[$this->index],
$cur_mapping[$this->index]);
- return $this->check($actual, $this->getIndexConfiguration());
+ $res = $this->check($actual, $this->getIndexConfiguration());
+ return $res;
}
/**
diff --git a/src/applications/search/fulltextstorage/PhabricatorElasticSearchQueryBuilder.php b/src/applications/search/fulltextstorage/PhabricatorElasticSearchQueryBuilder.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/fulltextstorage/PhabricatorElasticSearchQueryBuilder.php
@@ -0,0 +1,78 @@
+<?php
+
+class PhabricatorElasticSearchQueryBuilder {
+ protected $name;
+ protected $clauses = array();
+
+
+ public function getClauses($termkey = null) {
+ $clauses = $this->clauses;
+ if ($termkey == null) {
+ return $clauses;
+ }
+ if (isset($clauses[$termkey])) {
+ return $clauses[$termkey];
+ }
+ return array();
+ }
+
+ public function getClauseCount($clausekey) {
+ if (isset($this->clauses[$clausekey])) {
+ return count($this->clauses[$clausekey]);
+ } else {
+ return 0;
+ }
+ }
+
+ public function addExistsClause($field) {
+ return $this->addClause('filter', array(
+ 'exists' => array(
+ 'field' => $field,
+ ),
+ ));
+ }
+
+ public function addTermsClause($field, $values) {
+ return $this->addClause('filter', array(
+ 'terms' => array(
+ $field => array_values($values),
+ ),
+ ));
+ }
+
+ public function addMustClause($clause) {
+ return $this->addClause('must', $clause);
+ }
+
+ public function addFilterClause($clause) {
+ return $this->addClause('filter', $clause);
+ }
+
+ public function addShouldClause($clause) {
+ return $this->addClause('should', $clause);
+ }
+
+ public function addMustNotClause($clause) {
+ return $this->addClause('must_not', $clause);
+ }
+
+ public function addClause($clause, $terms) {
+ $this->clauses[$clause][] = $terms;
+ return $this;
+ }
+
+ public function toArray() {
+ $clauses = $this->getClauses();
+ return $clauses;
+ $cleaned = array();
+ foreach ($clauses as $clause => $subclauses) {
+ if (is_array($subclauses) && count($subclauses) == 1) {
+ $cleaned[$clause] = array_shift($subclauses);
+ } else {
+ $cleaned[$clause] = $subclauses;
+ }
+ }
+ return $cleaned;
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 12, 11:02 PM (16 h, 43 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7125482
Default Alt Text
D17509.id.diff (16 KB)

Event Timeline