Differential D21495 Diff 51323 src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
Changeset View
Changeset View
Standalone View
Standalone View
src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
| Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines | foreach ($ferret_corpus_map as $key => $fields) { | ||||
| 'normalCorpus' => $normal_corpus, | 'normalCorpus' => $normal_corpus, | ||||
| ); | ); | ||||
| } | } | ||||
| $ngrams_source = implode("\n", $ngrams_source); | $ngrams_source = implode("\n", $ngrams_source); | ||||
| $ngram_engine = new PhabricatorSearchNgramEngine(); | $ngram_engine = new PhabricatorSearchNgramEngine(); | ||||
| $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source); | $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source); | ||||
| $conn = $object->establishConnection('w'); | |||||
| if ($ngrams) { | |||||
| $common = queryfx_all( | |||||
| $conn, | |||||
| 'SELECT ngram FROM %T WHERE ngram IN (%Ls)', | |||||
| $engine->getCommonNgramsTableName(), | |||||
| $ngrams); | |||||
| $common = ipull($common, 'ngram', 'ngram'); | |||||
| foreach ($ngrams as $key => $ngram) { | |||||
| if (isset($common[$ngram])) { | |||||
| unset($ngrams[$key]); | |||||
| continue; | |||||
| } | |||||
| // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | |||||
| $trimmed_ngram = rtrim($ngram, ' '); | |||||
| if (isset($common[$trimmed_ngram])) { | |||||
| unset($ngrams[$key]); | |||||
| continue; | |||||
| } | |||||
| } | |||||
| } | |||||
| $object->openTransaction(); | $object->openTransaction(); | ||||
| try { | try { | ||||
| $conn = $object->establishConnection('w'); | // See T13587. If this document already exists in the index, we try to | ||||
| $this->deleteOldDocument($engine, $object, $document); | // update the existing rows to avoid leaving the ngrams table heavily | ||||
| // fragmented. | |||||
| $old_document = queryfx_one( | |||||
| $conn, | |||||
| 'SELECT id FROM %T WHERE objectPHID = %s', | |||||
| $engine->getDocumentTableName(), | |||||
| $object->getPHID()); | |||||
| if ($old_document) { | |||||
| $old_document_id = (int)$old_document['id']; | |||||
| } else { | |||||
| $old_document_id = null; | |||||
| } | |||||
| if ($old_document_id === null) { | |||||
| queryfx( | queryfx( | ||||
| $conn, | $conn, | ||||
| 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified, | 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified, | ||||
| authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)', | authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)', | ||||
| $engine->getDocumentTableName(), | $engine->getDocumentTableName(), | ||||
| $object->getPHID(), | $object->getPHID(), | ||||
| $is_closed, | $is_closed, | ||||
| $document->getDocumentCreated(), | $document->getDocumentCreated(), | ||||
| $document->getDocumentModified(), | $document->getDocumentModified(), | ||||
| $author_phid, | $author_phid, | ||||
| $owner_phid); | $owner_phid); | ||||
| $document_id = $conn->getInsertID(); | $document_id = $conn->getInsertID(); | ||||
| foreach ($ferret_fields as $ferret_field) { | |||||
| $is_new = true; | |||||
| } else { | |||||
| $document_id = $old_document_id; | |||||
| queryfx( | queryfx( | ||||
| $conn, | $conn, | ||||
| 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus, | 'UPDATE %T | ||||
| normalCorpus) VALUES (%d, %s, %s, %s, %s)', | SET | ||||
| $engine->getFieldTableName(), | isClosed = %d, | ||||
| $document_id, | epochCreated = %d, | ||||
| $ferret_field['fieldKey'], | epochModified = %d, | ||||
| $ferret_field['rawCorpus'], | authorPHID = %ns, | ||||
| $ferret_field['termCorpus'], | ownerPHID = %ns | ||||
| $ferret_field['normalCorpus']); | WHERE id = %d', | ||||
| $engine->getDocumentTableName(), | |||||
| $is_closed, | |||||
| $document->getDocumentCreated(), | |||||
| $document->getDocumentModified(), | |||||
| $author_phid, | |||||
| $owner_phid, | |||||
| $document_id); | |||||
| $is_new = false; | |||||
| } | } | ||||
| if ($ngrams) { | $this->updateStoredFields( | ||||
| $common = queryfx_all( | |||||
| $conn, | $conn, | ||||
| 'SELECT ngram FROM %T WHERE ngram IN (%Ls)', | $is_new, | ||||
| $engine->getCommonNgramsTableName(), | $document_id, | ||||
| $engine, | |||||
| $ferret_fields); | |||||
| $this->updateStoredNgrams( | |||||
| $conn, | |||||
| $is_new, | |||||
| $document_id, | |||||
| $engine, | |||||
| $ngrams); | $ngrams); | ||||
| $common = ipull($common, 'ngram', 'ngram'); | |||||
| foreach ($ngrams as $key => $ngram) { | } catch (Exception $ex) { | ||||
| if (isset($common[$ngram])) { | $object->killTransaction(); | ||||
| unset($ngrams[$key]); | throw $ex; | ||||
| } catch (Throwable $ex) { | |||||
| $object->killTransaction(); | |||||
| throw $ex; | |||||
| } | |||||
| $object->saveTransaction(); | |||||
| } | |||||
| private function updateStoredFields( | |||||
| AphrontDatabaseConnection $conn, | |||||
| $is_new, | |||||
| $document_id, | |||||
| PhabricatorFerretEngine $engine, | |||||
| $new_fields) { | |||||
| if (!$is_new) { | |||||
| $old_fields = queryfx_all( | |||||
| $conn, | |||||
| 'SELECT * FROM %T WHERE documentID = %d', | |||||
| $engine->getFieldTableName(), | |||||
| $document_id); | |||||
| } else { | |||||
| $old_fields = array(); | |||||
| } | |||||
| $old_fields = ipull($old_fields, null, 'fieldKey'); | |||||
| $new_fields = ipull($new_fields, null, 'fieldKey'); | |||||
| $delete_rows = array(); | |||||
| $insert_rows = array(); | |||||
| $update_rows = array(); | |||||
| foreach ($old_fields as $field_key => $old_field) { | |||||
| if (!isset($new_fields[$field_key])) { | |||||
| $delete_rows[] = $old_field; | |||||
| } | |||||
| } | |||||
| $compare_keys = array( | |||||
| 'rawCorpus', | |||||
| 'termCorpus', | |||||
| 'normalCorpus', | |||||
| ); | |||||
| foreach ($new_fields as $field_key => $new_field) { | |||||
| if (!isset($old_fields[$field_key])) { | |||||
| $insert_rows[] = $new_field; | |||||
| continue; | continue; | ||||
| } | } | ||||
| // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | $old_field = $old_fields[$field_key]; | ||||
| $trim_ngram = rtrim($ngram, ' '); | |||||
| if (isset($common[$ngram])) { | $same_row = true; | ||||
| unset($ngrams[$key]); | foreach ($compare_keys as $compare_key) { | ||||
| if ($old_field[$compare_key] !== $new_field[$compare_key]) { | |||||
| $same_row = false; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if ($same_row) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| $new_field['id'] = $old_field['id']; | |||||
| $update_rows[] = $new_field; | |||||
| } | } | ||||
| if ($delete_rows) { | |||||
| queryfx( | |||||
| $conn, | |||||
| 'DELETE FROM %T WHERE id IN (%Ld)', | |||||
| $engine->getFieldTableName(), | |||||
| ipull($delete_rows, 'id')); | |||||
| } | } | ||||
| if ($ngrams) { | foreach ($update_rows as $update_row) { | ||||
| $sql = array(); | queryfx( | ||||
| foreach ($ngrams as $ngram) { | |||||
| $sql[] = qsprintf( | |||||
| $conn, | $conn, | ||||
| '(%d, %s)', | 'UPDATE %T | ||||
| $document_id, | SET | ||||
| $ngram); | rawCorpus = %s, | ||||
| termCorpus = %s, | |||||
| normalCorpus = %s | |||||
| WHERE id = %d', | |||||
| $engine->getFieldTableName(), | |||||
| $update_row['rawCorpus'], | |||||
| $update_row['termCorpus'], | |||||
| $update_row['normalCorpus'], | |||||
| $update_row['id']); | |||||
| } | } | ||||
| foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | foreach ($insert_rows as $insert_row) { | ||||
| queryfx( | queryfx( | ||||
| $conn, | $conn, | ||||
| 'INSERT INTO %T (documentID, ngram) VALUES %LQ', | 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus, | ||||
| normalCorpus) VALUES (%d, %s, %s, %s, %s)', | |||||
| $engine->getFieldTableName(), | |||||
| $document_id, | |||||
| $insert_row['fieldKey'], | |||||
| $insert_row['rawCorpus'], | |||||
| $insert_row['termCorpus'], | |||||
| $insert_row['normalCorpus']); | |||||
| } | |||||
| } | |||||
| private function updateStoredNgrams( | |||||
| AphrontDatabaseConnection $conn, | |||||
| $is_new, | |||||
| $document_id, | |||||
| PhabricatorFerretEngine $engine, | |||||
| $new_ngrams) { | |||||
| if ($is_new) { | |||||
| $old_ngrams = array(); | |||||
| } else { | |||||
| $old_ngrams = queryfx_all( | |||||
| $conn, | |||||
| 'SELECT id, ngram FROM %T WHERE documentID = %d', | |||||
| $engine->getNgramsTableName(), | $engine->getNgramsTableName(), | ||||
| $chunk); | $document_id); | ||||
| } | } | ||||
| $old_ngrams = ipull($old_ngrams, 'id', 'ngram'); | |||||
| $new_ngrams = array_fuse($new_ngrams); | |||||
| $delete_ids = array(); | |||||
| $insert_ngrams = array(); | |||||
| // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. | |||||
| foreach ($old_ngrams as $ngram => $id) { | |||||
| if (isset($new_ngrams[$ngram])) { | |||||
| continue; | |||||
| } | } | ||||
| } catch (Exception $ex) { | |||||
| $object->killTransaction(); | $untrimmed_ngram = $ngram.' '; | ||||
| throw $ex; | if (isset($new_ngrams[$untrimmed_ngram])) { | ||||
| continue; | |||||
| } | } | ||||
| $object->saveTransaction(); | $delete_ids[] = $id; | ||||
| } | } | ||||
| foreach ($new_ngrams as $ngram) { | |||||
| if (isset($old_ngrams[$ngram])) { | |||||
| continue; | |||||
| } | |||||
| private function deleteOldDocument( | $trimmed_ngram = rtrim($ngram, ' '); | ||||
| PhabricatorFerretEngine $engine, | if (isset($old_ngrams[$trimmed_ngram])) { | ||||
| $object, | continue; | ||||
| PhabricatorSearchAbstractDocument $document) { | } | ||||
| $conn = $object->establishConnection('w'); | $insert_ngrams[] = $ngram; | ||||
| } | |||||
| $old_document = queryfx_one( | if ($delete_ids) { | ||||
| $sql = array(); | |||||
| foreach ($delete_ids as $id) { | |||||
| $sql[] = qsprintf( | |||||
| $conn, | $conn, | ||||
| 'SELECT * FROM %T WHERE objectPHID = %s', | '%d', | ||||
| $engine->getDocumentTableName(), | $id); | ||||
| $object->getPHID()); | |||||
| if (!$old_document) { | |||||
| return; | |||||
| } | } | ||||
| $old_id = $old_document['id']; | foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | ||||
| queryfx( | queryfx( | ||||
| $conn, | $conn, | ||||
| 'DELETE FROM %T WHERE id = %d', | 'DELETE FROM %T WHERE id IN (%LQ)', | ||||
| $engine->getDocumentTableName(), | $engine->getNgramsTableName(), | ||||
| $old_id); | $chunk); | ||||
| } | |||||
| } | |||||
| queryfx( | if ($insert_ngrams) { | ||||
| $sql = array(); | |||||
| foreach ($insert_ngrams as $ngram) { | |||||
| $sql[] = qsprintf( | |||||
| $conn, | $conn, | ||||
| 'DELETE FROM %T WHERE documentID = %d', | '(%d, %s)', | ||||
| $engine->getFieldTableName(), | $document_id, | ||||
| $old_id); | $ngram); | ||||
| } | |||||
| foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { | |||||
| queryfx( | queryfx( | ||||
| $conn, | $conn, | ||||
| 'DELETE FROM %T WHERE documentID = %d', | 'INSERT INTO %T (documentID, ngram) VALUES %LQ', | ||||
| $engine->getNgramsTableName(), | $engine->getNgramsTableName(), | ||||
| $old_id); | $chunk); | ||||
| } | |||||
| } | |||||
| } | } | ||||
| public function newFerretSearchFunctions() { | public function newFerretSearchFunctions() { | ||||
| return array( | return array( | ||||
| id(new FerretConfigurableSearchFunction()) | id(new FerretConfigurableSearchFunction()) | ||||
| ->setFerretFunctionName('all') | ->setFerretFunctionName('all') | ||||
| ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), | ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), | ||||
| id(new FerretConfigurableSearchFunction()) | id(new FerretConfigurableSearchFunction()) | ||||
| Show All 15 Lines | |||||