diff --git a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php index e66ba784f7..b838c8a5d9 100644 --- a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php +++ b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php @@ -1,365 +1,373 @@ disableUTF8MB4 = $disable_utf8_mb4; return $this; } public function getDisableUTF8MB4() { return $this->disableUTF8MB4; } public function setNamespace($namespace) { $this->namespace = $namespace; PhabricatorLiskDAO::pushStorageNamespace($namespace); return $this; } public function getNamespace() { return $this->namespace; } public function setUser($user) { $this->user = $user; return $this; } public function getUser() { return $this->user; } public function setPassword($password) { $this->password = $password; return $this; } public function getPassword() { return $this->password; } public function setHost($host) { $this->host = $host; return $this; } public function getHost() { return $this->host; } public function setPort($port) { $this->port = $port; return $this; } public function getPort() { return $this->port; } public function setRef(PhabricatorDatabaseRef $ref) { $this->ref = $ref; return $this; } public function getRef() { return $this->ref; } public function getDatabaseName($fragment) { return $this->namespace.'_'.$fragment; } public function getDatabaseList(array $patches, $only_living = false) { assert_instances_of($patches, 'PhabricatorStoragePatch'); $list = array(); foreach ($patches as $patch) { if ($patch->getType() == 'db') { if ($only_living && $patch->isDead()) { continue; } $list[] = $this->getDatabaseName($patch->getName()); } } return $list; } public function getConn($fragment) { $database = $this->getDatabaseName($fragment); $return = &$this->conns[$this->host][$this->user][$database]; if (!$return) { $return = PhabricatorDatabaseRef::newRawConnection( array( 'user' => $this->user, 'pass' => $this->password, 'host' => $this->host, 'port' => $this->port, 'database' => $fragment ? $database : null, )); } return $return; } public function getAppliedPatches() { try { $applied = queryfx_all( $this->getConn('meta_data'), 'SELECT patch FROM %T', self::TABLE_STATUS); return ipull($applied, 'patch'); } catch (AphrontAccessDeniedQueryException $ex) { throw new PhutilProxyException( pht( 'Failed while trying to read schema status: the database "%s" '. 'exists, but the current user ("%s") does not have permission to '. 'access it. GRANT the current user more permissions, or use a '. 'different user.', $this->getDatabaseName('meta_data'), $this->getUser()), $ex); } catch (AphrontQueryException $ex) { return null; } } public function getPatchDurations() { try { $rows = queryfx_all( $this->getConn('meta_data'), 'SELECT patch, duration FROM %T WHERE duration IS NOT NULL', self::TABLE_STATUS); return ipull($rows, 'duration', 'patch'); } catch (AphrontQueryException $ex) { return array(); } } public function createDatabase($fragment) { $info = $this->getCharsetInfo(); queryfx( $this->getConn(null), 'CREATE DATABASE IF NOT EXISTS %T COLLATE %T', $this->getDatabaseName($fragment), $info[self::COLLATE_TEXT]); } public function createTable($fragment, $table, array $cols) { queryfx( $this->getConn($fragment), 'CREATE TABLE IF NOT EXISTS %T.%T (%Q) '. 'ENGINE=InnoDB, COLLATE utf8_general_ci', $this->getDatabaseName($fragment), $table, implode(', ', $cols)); } public function getLegacyPatches(array $patches) { assert_instances_of($patches, 'PhabricatorStoragePatch'); try { $row = queryfx_one( $this->getConn('meta_data'), 'SELECT version FROM %T', 'schema_version'); $version = $row['version']; } catch (AphrontQueryException $ex) { return array(); } $legacy = array(); foreach ($patches as $key => $patch) { if ($patch->getLegacy() !== false && $patch->getLegacy() <= $version) { $legacy[] = $key; } } return $legacy; } public function markPatchApplied($patch, $duration = null) { $conn = $this->getConn('meta_data'); queryfx( $conn, 'INSERT INTO %T (patch, applied) VALUES (%s, %d)', self::TABLE_STATUS, $patch, time()); // We didn't add this column for a long time, so it may not exist yet. if ($duration !== null) { try { queryfx( $conn, 'UPDATE %T SET duration = %d WHERE patch = %s', self::TABLE_STATUS, (int)floor($duration * 1000000), $patch); } catch (AphrontQueryException $ex) { // Just ignore this, as it almost certainly indicates that we just // don't have the column yet. } } } public function applyPatch(PhabricatorStoragePatch $patch) { $type = $patch->getType(); $name = $patch->getName(); switch ($type) { case 'db': $this->createDatabase($name); break; case 'sql': $this->applyPatchSQL($name); break; case 'php': $this->applyPatchPHP($name); break; default: throw new Exception(pht("Unable to apply patch of type '%s'.", $type)); } } public function applyPatchSQL($sql) { $sql = Filesystem::readFile($sql); $queries = preg_split('/;\s+/', $sql); $queries = array_filter($queries); $conn = $this->getConn(null); $charset_info = $this->getCharsetInfo(); foreach ($charset_info as $key => $value) { $charset_info[$key] = qsprintf($conn, '%T', $value); } foreach ($queries as $query) { $query = str_replace('{$NAMESPACE}', $this->namespace, $query); foreach ($charset_info as $key => $value) { $query = str_replace('{$'.$key.'}', $value, $query); } try { // NOTE: We're using the unsafe "%Z" conversion here. There's no // avoiding it since we're executing raw text files full of SQL. queryfx($conn, '%Z', $query); } catch (AphrontAccessDeniedQueryException $ex) { throw new PhutilProxyException( pht( 'Unable to access a required database or table. This almost '. 'always means that the user you are connecting with ("%s") does '. 'not have sufficient permissions granted in MySQL. You can '. 'use `bin/storage databases` to get a list of all databases '. 'permission is required on.', $this->getUser()), $ex); } } } public function applyPatchPHP($script) { $schema_conn = $this->getConn(null); require_once $script; } public function isCharacterSetAvailable($character_set) { if ($character_set == 'utf8mb4') { if ($this->getDisableUTF8MB4()) { return false; } } $conn = $this->getConn(null); return self::isCharacterSetAvailableOnConnection($character_set, $conn); } + public function getClientCharset() { + if ($this->isCharacterSetAvailable('utf8mb4')) { + return 'utf8mb4'; + } else { + return 'utf8'; + } + } + public static function isCharacterSetAvailableOnConnection( $character_set, AphrontDatabaseConnection $conn) { $result = queryfx_one( $conn, 'SELECT CHARACTER_SET_NAME FROM INFORMATION_SCHEMA.CHARACTER_SETS WHERE CHARACTER_SET_NAME = %s', $character_set); return (bool)$result; } public function getCharsetInfo() { if ($this->isCharacterSetAvailable('utf8mb4')) { // If utf8mb4 is available, we use it with the utf8mb4_unicode_ci // collation. This is most correct, and will sort properly. $charset = 'utf8mb4'; $charset_sort = 'utf8mb4'; $charset_full = 'utf8mb4'; $collate_text = 'utf8mb4_bin'; $collate_sort = 'utf8mb4_unicode_ci'; $collate_full = 'utf8mb4_unicode_ci'; } else { // If utf8mb4 is not available, we use binary for most data. This allows // us to store 4-byte unicode characters. // // It's possible that strings will be truncated in the middle of a // character on insert. We encourage users to set STRICT_ALL_TABLES // to prevent this. // // For "fulltext" and "sort" columns, we don't use binary. // // With "fulltext", we can not use binary because MySQL won't let us. // We use 3-byte utf8 instead and accept being unable to index 4-byte // characters. // // With "sort", if we use binary we lose case insensitivity (for // example, "ALincoln@logcabin.com" and "alincoln@logcabin.com" would no // longer be identified as the same email address). This can be very // confusing and is far worse overall than not supporting 4-byte unicode // characters, so we use 3-byte utf8 and accept limited 4-byte support as // a tradeoff to get sensible collation behavior. Many columns where // collation is important rarely contain 4-byte characters anyway, so we // are not giving up too much. $charset = 'binary'; $charset_sort = 'utf8'; $charset_full = 'utf8'; $collate_text = 'binary'; $collate_sort = 'utf8_general_ci'; $collate_full = 'utf8_general_ci'; } return array( self::CHARSET_DEFAULT => $charset, self::CHARSET_SORT => $charset_sort, self::CHARSET_FULLTEXT => $charset_full, self::COLLATE_TEXT => $collate_text, self::COLLATE_SORT => $collate_sort, self::COLLATE_FULLTEXT => $collate_full, ); } } diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php index 28b188a873..3a18578a30 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php @@ -1,366 +1,368 @@ setName('dump') ->setExamples('**dump** [__options__]') ->setSynopsis(pht('Dump all data in storage to stdout.')) ->setArguments( array( array( 'name' => 'for-replica', 'help' => pht( 'Add __--master-data__ to the __mysqldump__ command, '. 'generating a CHANGE MASTER statement in the output.'), ), array( 'name' => 'output', 'param' => 'file', 'help' => pht( 'Write output directly to disk. This handles errors better '. 'than using pipes. Use with __--compress__ to gzip the '. 'output.'), ), array( 'name' => 'compress', 'help' => pht( 'With __--output__, write a compressed file to disk instead '. 'of a plaintext file.'), ), array( 'name' => 'no-indexes', 'help' => pht( 'Do not dump data in rebuildable index tables. This means '. 'backups are smaller and faster, but you will need to manually '. 'rebuild indexes after performing a restore.'), ), array( 'name' => 'overwrite', 'help' => pht( 'With __--output__, overwrite the output file if it already '. 'exists.'), ), )); } protected function isReadOnlyWorkflow() { return true; } public function didExecute(PhutilArgumentParser $args) { $output_file = $args->getArg('output'); $is_compress = $args->getArg('compress'); $is_overwrite = $args->getArg('overwrite'); if ($is_compress) { if ($output_file === null) { throw new PhutilArgumentUsageException( pht( 'The "--compress" flag can only be used alongside "--output".')); } if (!function_exists('gzopen')) { throw new PhutilArgumentUsageException( pht( 'The "--compress" flag requires the PHP "zlib" extension, but '. 'that extension is not available. Install the extension or '. 'omit the "--compress" option.')); } } if ($is_overwrite) { if ($output_file === null) { throw new PhutilArgumentUsageException( pht( 'The "--overwrite" flag can only be used alongside "--output".')); } } if ($output_file !== null) { if (Filesystem::pathExists($output_file)) { if (!$is_overwrite) { throw new PhutilArgumentUsageException( pht( 'Output file "%s" already exists. Use "--overwrite" '. 'to overwrite.', $output_file)); } } } $api = $this->getSingleAPI(); $patches = $this->getPatches(); $with_indexes = !$args->getArg('no-indexes'); $applied = $api->getAppliedPatches(); if ($applied === null) { throw new PhutilArgumentUsageException( pht( 'There is no database storage initialized in the current storage '. 'namespace ("%s"). Use "bin/storage upgrade" to initialize '. 'storage or use "--namespace" to choose a different namespace.', $api->getNamespace())); } $ref = $api->getRef(); $ref_key = $ref->getRefKey(); $schemata_query = id(new PhabricatorConfigSchemaQuery()) ->setAPIs(array($api)) ->setRefs(array($ref)); $actual_map = $schemata_query->loadActualSchemata(); $expect_map = $schemata_query->loadExpectedSchemata(); $schemata = $actual_map[$ref_key]; $expect = $expect_map[$ref_key]; $targets = array(); foreach ($schemata->getDatabases() as $database_name => $database) { $expect_database = $expect->getDatabase($database_name); foreach ($database->getTables() as $table_name => $table) { // NOTE: It's possible for us to find tables in these database which // we don't expect to be there. For example, an older version of // Phabricator may have had a table that was later dropped. We assume // these are data tables and always dump them, erring on the side of // caution. $persistence = PhabricatorConfigTableSchema::PERSISTENCE_DATA; if ($expect_database) { $expect_table = $expect_database->getTable($table_name); if ($expect_table) { $persistence = $expect_table->getPersistenceType(); } } switch ($persistence) { case PhabricatorConfigTableSchema::PERSISTENCE_CACHE: // When dumping tables, leave the data in cache tables in the // database. This will be automatically rebuild after the data // is restored and does not need to be persisted in backups. $with_data = false; break; case PhabricatorConfigTableSchema::PERSISTENCE_INDEX: // When dumping tables, leave index data behind of the caller // specified "--no-indexes". These tables can be rebuilt manually // from other tables, but do not rebuild automatically. $with_data = $with_indexes; break; case PhabricatorConfigTableSchema::PERSISTENCE_DATA: default: $with_data = true; break; } $targets[] = array( 'database' => $database_name, 'table' => $table_name, 'data' => $with_data, ); } } list($host, $port) = $this->getBareHostAndPort($api->getHost()); $has_password = false; $password = $api->getPassword(); if ($password) { if (strlen($password->openEnvelope())) { $has_password = true; } } $argv = array(); $argv[] = '--hex-blob'; $argv[] = '--single-transaction'; - $argv[] = '--default-character-set=utf8'; + + $argv[] = '--default-character-set'; + $argv[] = $api->getClientCharset(); if ($args->getArg('for-replica')) { $argv[] = '--master-data'; } $argv[] = '-u'; $argv[] = $api->getUser(); $argv[] = '-h'; $argv[] = $host; // MySQL's default "max_allowed_packet" setting is fairly conservative // (16MB). If we try to dump a row which is larger than this limit, the // dump will fail. // We encourage users to increase this limit during setup, but modifying // the "[mysqld]" section of the configuration file (instead of // "[mysqldump]" section) won't apply to "mysqldump" and we can not easily // detect what the "mysqldump" setting is. // Since no user would ever reasonably want a dump to fail because a row // was too large, just manually force this setting to the largest supported // value. $argv[] = '--max-allowed-packet'; $argv[] = '1G'; if ($port) { $argv[] = '--port'; $argv[] = $port; } $commands = array(); foreach ($targets as $target) { $target_argv = $argv; if (!$target['data']) { $target_argv[] = '--no-data'; } if ($has_password) { $command = csprintf( 'mysqldump -p%P %Ls -- %R %R', $password, $target_argv, $target['database'], $target['table']); } else { $command = csprintf( 'mysqldump %Ls -- %R %R', $target_argv, $target['database'], $target['table']); } $commands[] = array( 'command' => $command, 'database' => $target['database'], ); } // Decrease the CPU priority of this process so it doesn't contend with // other more important things. if (function_exists('proc_nice')) { proc_nice(19); } // If we are writing to a file, stream the command output to disk. This // mode makes sure the whole command fails if there's an error (commonly, // a full disk). See T6996 for discussion. if ($output_file === null) { $file = null; } else if ($is_compress) { $file = gzopen($output_file, 'wb1'); } else { $file = fopen($output_file, 'wb'); } if (($output_file !== null) && !$file) { throw new Exception( pht( 'Failed to open file "%s" for writing.', $file)); } $created = array(); try { foreach ($commands as $spec) { // Because we're dumping database-by-database, we need to generate our // own CREATE DATABASE and USE statements. $database = $spec['database']; $preamble = array(); if (!isset($created[$database])) { $preamble[] = "CREATE DATABASE /*!32312 IF NOT EXISTS*/ `{$database}` ". "/*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin */;\n"; $created[$database] = true; } $preamble[] = "USE `{$database}`;\n"; $preamble = implode('', $preamble); $this->writeData($preamble, $file, $is_compress, $output_file); // See T13328. The "mysql" command may produce output very quickly. // Don't buffer more than a fixed amount. $future = id(new ExecFuture('%C', $spec['command'])) ->setReadBufferSize(32 * 1024 * 1024); $iterator = id(new FutureIterator(array($future))) ->setUpdateInterval(0.010); foreach ($iterator as $ready) { list($stdout, $stderr) = $future->read(); $future->discardBuffers(); if (strlen($stderr)) { fwrite(STDERR, $stderr); } $this->writeData($stdout, $file, $is_compress, $output_file); if ($ready !== null) { $ready->resolvex(); } } } if (!$file) { $ok = true; } else if ($is_compress) { $ok = gzclose($file); } else { $ok = fclose($file); } if ($ok !== true) { throw new Exception( pht( 'Failed to close file "%s".', $output_file)); } } catch (Exception $ex) { // If we might have written a partial file to disk, try to remove it so // we don't leave any confusing artifacts laying around. try { if ($file !== null) { Filesystem::remove($output_file); } } catch (Exception $ex) { // Ignore any errors we hit. } throw $ex; } return 0; } private function writeData($data, $file, $is_compress, $output_file) { if (!strlen($data)) { return; } if (!$file) { $ok = fwrite(STDOUT, $data); } else if ($is_compress) { $ok = gzwrite($file, $data); } else { $ok = fwrite($file, $data); } if ($ok !== strlen($data)) { throw new Exception( pht( 'Failed to write %d byte(s) to file "%s".', new PhutilNumber(strlen($data)), $output_file)); } } } diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementShellWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementShellWorkflow.php index 0bf185a086..f376ea3e14 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementShellWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementShellWorkflow.php @@ -1,42 +1,42 @@ setName('shell') ->setExamples('**shell** [__options__]') ->setSynopsis(pht('Launch an interactive shell.')); } protected function isReadOnlyWorkflow() { return true; } public function execute(PhutilArgumentParser $args) { $api = $this->getSingleAPI(); list($host, $port) = $this->getBareHostAndPort($api->getHost()); $flag_port = $port ? csprintf('--port %d', $port) : ''; $flag_password = ''; $password = $api->getPassword(); if ($password) { if (strlen($password->openEnvelope())) { $flag_password = csprintf('--password=%P', $password); } } return phutil_passthru( - 'mysql --protocol=TCP --default-character-set=utf8mb4 '. - '-u %s %C -h %s %C', + 'mysql --protocol=TCP --default-character-set %R -u %s %C -h %s %C', + $api->getClientCharset(), $api->getUser(), $flag_password, $host, $flag_port); } }