Differential D21495 Diff 51158 src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
Changeset View
Changeset View
Standalone View
Standalone View
src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines | foreach ($ferret_corpus_map as $key => $fields) { | ||||
'normalCorpus' => $normal_corpus, | 'normalCorpus' => $normal_corpus, | ||||
); | ); | ||||
} | } | ||||
$ngrams_source = implode("\n", $ngrams_source); | $ngrams_source = implode("\n", $ngrams_source); | ||||
$ngram_engine = new PhabricatorSearchNgramEngine(); | $ngram_engine = new PhabricatorSearchNgramEngine(); | ||||
$ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source); | $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source); | ||||
$conn = $object->establishConnection('w'); | |||||
if ($ngrams) { | |||||
$common = queryfx_all( | |||||
$conn, | |||||
'SELECT ngram FROM %T WHERE ngram IN (%Ls)', | |||||
$engine->getCommonNgramsTableName(), | |||||
$ngrams); | |||||
$common = ipull($common, 'ngram', 'ngram'); | |||||
foreach ($ngrams as $key => $ngram) { | |||||
if (isset($common[$ngram])) { | |||||
unset($ngrams[$key]); | |||||
continue; | |||||
} | |||||
// NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | |||||
$trimmed_ngram = rtrim($ngram, ' '); | |||||
if (isset($common[$trimmed_ngram])) { | |||||
unset($ngrams[$key]); | |||||
continue; | |||||
} | |||||
} | |||||
} | |||||
$object->openTransaction(); | $object->openTransaction(); | ||||
try { | try { | ||||
$conn = $object->establishConnection('w'); | // See T13587. If this document already exists in the index, we try to | ||||
$this->deleteOldDocument($engine, $object, $document); | // update the existing rows to avoid leaving the ngrams table heavily | ||||
// fragmented. | |||||
$old_document = queryfx_one( | |||||
$conn, | |||||
'SELECT id FROM %T WHERE objectPHID = %s', | |||||
$engine->getDocumentTableName(), | |||||
$object->getPHID()); | |||||
if ($old_document) { | |||||
$old_document_id = (int)$old_document['id']; | |||||
} else { | |||||
$old_document_id = null; | |||||
} | |||||
if ($old_document_id === null) { | |||||
queryfx( | queryfx( | ||||
$conn, | $conn, | ||||
'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified, | 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified, | ||||
authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)', | authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)', | ||||
$engine->getDocumentTableName(), | $engine->getDocumentTableName(), | ||||
$object->getPHID(), | $object->getPHID(), | ||||
$is_closed, | $is_closed, | ||||
$document->getDocumentCreated(), | $document->getDocumentCreated(), | ||||
$document->getDocumentModified(), | $document->getDocumentModified(), | ||||
$author_phid, | $author_phid, | ||||
$owner_phid); | $owner_phid); | ||||
$document_id = $conn->getInsertID(); | $document_id = $conn->getInsertID(); | ||||
foreach ($ferret_fields as $ferret_field) { | |||||
$is_new = true; | |||||
} else { | |||||
$document_id = $old_document_id; | |||||
queryfx( | queryfx( | ||||
$conn, | $conn, | ||||
'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus, | 'UPDATE %T | ||||
normalCorpus) VALUES (%d, %s, %s, %s, %s)', | SET | ||||
$engine->getFieldTableName(), | isClosed = %d, | ||||
$document_id, | epochCreated = %d, | ||||
$ferret_field['fieldKey'], | epochModified = %d, | ||||
$ferret_field['rawCorpus'], | authorPHID = %ns, | ||||
$ferret_field['termCorpus'], | ownerPHID = %ns | ||||
$ferret_field['normalCorpus']); | WHERE id = %d', | ||||
$engine->getDocumentTableName(), | |||||
$is_closed, | |||||
$document->getDocumentCreated(), | |||||
$document->getDocumentModified(), | |||||
$author_phid, | |||||
$owner_phid, | |||||
$document_id); | |||||
$is_new = false; | |||||
} | } | ||||
if ($ngrams) { | $this->updateStoredFields( | ||||
$common = queryfx_all( | |||||
$conn, | $conn, | ||||
'SELECT ngram FROM %T WHERE ngram IN (%Ls)', | $is_new, | ||||
$engine->getCommonNgramsTableName(), | $document_id, | ||||
$engine, | |||||
$ferret_fields); | |||||
$this->updateStoredNgrams( | |||||
$conn, | |||||
$is_new, | |||||
$document_id, | |||||
$engine, | |||||
$ngrams); | $ngrams); | ||||
$common = ipull($common, 'ngram', 'ngram'); | |||||
foreach ($ngrams as $key => $ngram) { | } catch (Exception $ex) { | ||||
if (isset($common[$ngram])) { | $object->killTransaction(); | ||||
unset($ngrams[$key]); | throw $ex; | ||||
} | |||||
$object->saveTransaction(); | |||||
} | |||||
private function updateStoredFields( | |||||
AphrontDatabaseConnection $conn, | |||||
$is_new, | |||||
$document_id, | |||||
PhabricatorFerretEngine $engine, | |||||
$new_fields) { | |||||
if (!$is_new) { | |||||
$old_fields = queryfx_all( | |||||
$conn, | |||||
'SELECT * FROM %T WHERE documentID = %d', | |||||
$engine->getFieldTableName(), | |||||
$document_id); | |||||
} else { | |||||
$old_fields = array(); | |||||
} | |||||
$old_fields = ipull($old_fields, null, 'fieldKey'); | |||||
$new_fields = ipull($new_fields, null, 'fieldKey'); | |||||
$delete_rows = array(); | |||||
$insert_rows = array(); | |||||
$update_rows = array(); | |||||
foreach ($old_fields as $field_key => $old_field) { | |||||
if (!isset($new_fields[$field_key])) { | |||||
$delete_rows[] = $old_field; | |||||
} | |||||
} | |||||
$compare_keys = array( | |||||
'rawCorpus', | |||||
'termCorpus', | |||||
'normalCorpus', | |||||
); | |||||
foreach ($new_fields as $field_key => $new_field) { | |||||
if (!isset($old_fields[$field_key])) { | |||||
$insert_rows[] = $new_field; | |||||
continue; | continue; | ||||
} | } | ||||
// NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | $old_field = $old_fields[$field_key]; | ||||
$trim_ngram = rtrim($ngram, ' '); | |||||
if (isset($common[$ngram])) { | $same_row = true; | ||||
unset($ngrams[$key]); | foreach ($compare_keys as $compare_key) { | ||||
if ($old_field[$compare_key] !== $new_field[$compare_key]) { | |||||
$same_row = false; | |||||
break; | |||||
} | |||||
} | |||||
if ($same_row) { | |||||
continue; | continue; | ||||
} | } | ||||
$new_field['id'] = $old_field['id']; | |||||
$update_rows[] = $new_field; | |||||
} | } | ||||
if ($delete_rows) { | |||||
queryfx( | |||||
$conn, | |||||
'DELETE FROM %T WHERE id IN (%Ld)', | |||||
$engine->getFieldTableName(), | |||||
ipull($delete_rows, 'id')); | |||||
} | } | ||||
if ($ngrams) { | foreach ($update_rows as $update_row) { | ||||
$sql = array(); | queryfx( | ||||
foreach ($ngrams as $ngram) { | |||||
$sql[] = qsprintf( | |||||
$conn, | $conn, | ||||
'(%d, %s)', | 'UPDATE %T | ||||
$document_id, | SET | ||||
$ngram); | rawCorpus = %s, | ||||
termCorpus = %s, | |||||
normalCorpus = %s | |||||
WHERE id = %d', | |||||
$engine->getFieldTableName(), | |||||
$update_row['rawCorpus'], | |||||
$update_row['termCorpus'], | |||||
$update_row['normalCorpus'], | |||||
$update_row['id']); | |||||
} | } | ||||
foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | foreach ($insert_rows as $insert_row) { | ||||
queryfx( | queryfx( | ||||
$conn, | $conn, | ||||
'INSERT INTO %T (documentID, ngram) VALUES %LQ', | 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus, | ||||
normalCorpus) VALUES (%d, %s, %s, %s, %s)', | |||||
$engine->getFieldTableName(), | |||||
$document_id, | |||||
$insert_row['fieldKey'], | |||||
$insert_row['rawCorpus'], | |||||
$insert_row['termCorpus'], | |||||
$insert_row['normalCorpus']); | |||||
} | |||||
} | |||||
private function updateStoredNgrams( | |||||
AphrontDatabaseConnection $conn, | |||||
$is_new, | |||||
$document_id, | |||||
PhabricatorFerretEngine $engine, | |||||
$new_ngrams) { | |||||
if ($is_new) { | |||||
$old_ngrams = array(); | |||||
} else { | |||||
$old_ngrams = queryfx_all( | |||||
$conn, | |||||
'SELECT id, ngram FROM %T WHERE documentID = %d', | |||||
$engine->getNgramsTableName(), | $engine->getNgramsTableName(), | ||||
$chunk); | $document_id); | ||||
} | } | ||||
$old_ngrams = ipull($old_ngrams, 'id', 'ngram'); | |||||
$new_ngrams = array_fuse($new_ngrams); | |||||
$delete_ids = array(); | |||||
$insert_ngrams = array(); | |||||
// NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | |||||
foreach ($old_ngrams as $ngram => $id) { | |||||
if (isset($new_ngrams[$ngram])) { | |||||
continue; | |||||
} | } | ||||
} catch (Exception $ex) { | |||||
$object->killTransaction(); | $untrimmed_ngram = $ngram.' '; | ||||
throw $ex; | if (isset($new_ngrams[$untrimmed_ngram])) { | ||||
continue; | |||||
} | } | ||||
$object->saveTransaction(); | $delete_ids[] = $id; | ||||
} | } | ||||
foreach ($new_ngrams as $ngram) { | |||||
if (isset($old_ngrams[$ngram])) { | |||||
continue; | |||||
} | |||||
private function deleteOldDocument( | $trimmed_ngram = rtrim($ngram, ' '); | ||||
PhabricatorFerretEngine $engine, | if (isset($old_ngrams[$trimmed_ngram])) { | ||||
$object, | continue; | ||||
PhabricatorSearchAbstractDocument $document) { | } | ||||
$conn = $object->establishConnection('w'); | $insert_ngrams[] = $ngram; | ||||
} | |||||
$old_document = queryfx_one( | if ($delete_ids) { | ||||
$sql = array(); | |||||
foreach ($delete_ids as $id) { | |||||
$sql[] = qsprintf( | |||||
$conn, | $conn, | ||||
'SELECT * FROM %T WHERE objectPHID = %s', | '%d', | ||||
$engine->getDocumentTableName(), | $id); | ||||
$object->getPHID()); | |||||
if (!$old_document) { | |||||
return; | |||||
} | } | ||||
$old_id = $old_document['id']; | foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | ||||
queryfx( | queryfx( | ||||
$conn, | $conn, | ||||
'DELETE FROM %T WHERE id = %d', | 'DELETE FROM %T WHERE id IN (%LQ)', | ||||
$engine->getDocumentTableName(), | $engine->getNgramsTableName(), | ||||
$old_id); | $chunk); | ||||
} | |||||
} | |||||
queryfx( | if ($insert_ngrams) { | ||||
$sql = array(); | |||||
foreach ($insert_ngrams as $ngram) { | |||||
$sql[] = qsprintf( | |||||
$conn, | $conn, | ||||
'DELETE FROM %T WHERE documentID = %d', | '(%d, %s)', | ||||
$engine->getFieldTableName(), | $document_id, | ||||
$old_id); | $ngram); | ||||
} | |||||
foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | |||||
queryfx( | queryfx( | ||||
$conn, | $conn, | ||||
'DELETE FROM %T WHERE documentID = %d', | 'INSERT INTO %T (documentID, ngram) VALUES %LQ', | ||||
$engine->getNgramsTableName(), | $engine->getNgramsTableName(), | ||||
$old_id); | $chunk); | ||||
} | |||||
} | |||||
} | } | ||||
public function newFerretSearchFunctions() { | public function newFerretSearchFunctions() { | ||||
return array( | return array( | ||||
id(new FerretConfigurableSearchFunction()) | id(new FerretConfigurableSearchFunction()) | ||||
->setFerretFunctionName('all') | ->setFerretFunctionName('all') | ||||
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), | ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), | ||||
id(new FerretConfigurableSearchFunction()) | id(new FerretConfigurableSearchFunction()) | ||||
Show All 15 Lines |