Page MenuHomePhabricator

D18673.id44839.diff
No OneTemporary

D18673.id44839.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -3948,6 +3948,7 @@
'PhabricatorSearchIndexVersionDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchIndexVersionDestructionEngineExtension.php',
'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php',
'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php',
+ 'PhabricatorSearchManagementNgramsWorkflow' => 'applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php',
'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php',
'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php',
'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php',
@@ -9528,6 +9529,7 @@
'PhabricatorSearchIndexVersionDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension',
'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow',
'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow',
+ 'PhabricatorSearchManagementNgramsWorkflow' => 'PhabricatorSearchManagementWorkflow',
'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow',
'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO',
'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension',
diff --git a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php
new file mode 100644
--- /dev/null
+++ b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php
@@ -0,0 +1,106 @@
+<?php
+
+final class PhabricatorSearchManagementNgramsWorkflow
+ extends PhabricatorSearchManagementWorkflow {
+
+ protected function didConstruct() {
+ $this
+ ->setName('ngrams')
+ ->setSynopsis(pht('Recompute common ngrams.'))
+ ->setArguments(
+ array(
+ array(
+ 'name' => 'reset',
+ 'help' => pht('Reset all common ngram records.'),
+ ),
+ ));
+ }
+
+ public function execute(PhutilArgumentParser $args) {
+ $is_reset = $args->getArg('reset');
+
+ $all_objects = id(new PhutilClassMapQuery())
+ ->setAncestorClass('PhabricatorFerretInterface')
+ ->execute();
+
+ $min_documents = 4096;
+ $threshold = 0.15;
+
+ foreach ($all_objects as $object) {
+ $engine = $object->newFerretEngine();
+ $conn = $object->establishConnection('w');
+ $display_name = get_class($object);
+
+ if ($is_reset) {
+ echo tsprintf(
+ "%s\n",
+ pht(
+ 'Resetting common ngrams for "%s".',
+ $display_name));
+
+ queryfx(
+ $conn,
+ 'DELETE FROM %T',
+ $engine->getCommonNgramsTableName());
+ continue;
+ }
+
+ $document_count = queryfx_one(
+ $conn,
+ 'SELECT COUNT(*) N FROM %T',
+ $engine->getDocumentTableName());
+ $document_count = $document_count['N'];
+
+ if ($document_count < $min_documents) {
+ echo tsprintf(
+ "%s\n",
+ pht(
+ 'Too few documents of type "%s" for any ngrams to be common.',
+ $display_name));
+ continue;
+ }
+
+ $min_frequency = (int)ceil($document_count * $threshold);
+ $common_ngrams = queryfx_all(
+ $conn,
+ 'SELECT ngram, COUNT(*) N FROM %T
+ GROUP BY ngram
+ HAVING N >= %d',
+ $engine->getNgramsTableName(),
+ $min_frequency);
+
+ if (!$common_ngrams) {
+ echo tsprintf(
+ "%s\n",
+ pht(
+ 'No new common ngrams exist for "%s".',
+ $display_name));
+ continue;
+ }
+
+ $sql = array();
+ foreach ($common_ngrams as $ngram) {
+ $sql[] = qsprintf(
+ $conn,
+ '(%s, 1)',
+ $ngram['ngram']);
+ }
+
+ foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
+ queryfx(
+ $conn,
+ 'INSERT IGNORE INTO %T (ngram, needsCollection)
+ VALUES %Q',
+ $engine->getCommonNgramsTableName(),
+ $chunk);
+ }
+
+ echo tsprintf(
+ "%s\n",
+ pht(
+ 'Updated common ngrams for "%s".',
+ $display_name));
+ }
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 24, 11:24 AM (17 h, 55 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7040127
Default Alt Text
D18673.id44839.diff (4 KB)

Event Timeline