diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2465,6 +2465,7 @@ 'PhabricatorDataNotAttachedException' => 'infrastructure/storage/lisk/PhabricatorDataNotAttachedException.php', 'PhabricatorDatabaseHealthRecord' => 'infrastructure/cluster/PhabricatorDatabaseHealthRecord.php', 'PhabricatorDatabaseRef' => 'infrastructure/cluster/PhabricatorDatabaseRef.php', + 'PhabricatorDatabaseRefParser' => 'infrastructure/cluster/PhabricatorDatabaseRefParser.php', 'PhabricatorDatabaseSetupCheck' => 'applications/config/check/PhabricatorDatabaseSetupCheck.php', 'PhabricatorDatasourceEditField' => 'applications/transactions/editfield/PhabricatorDatasourceEditField.php', 'PhabricatorDatasourceEditType' => 'applications/transactions/edittype/PhabricatorDatasourceEditType.php', @@ -7399,6 +7400,7 @@ 'PhabricatorDataNotAttachedException' => 'Exception', 'PhabricatorDatabaseHealthRecord' => 'Phobject', 'PhabricatorDatabaseRef' => 'Phobject', + 'PhabricatorDatabaseRefParser' => 'Phobject', 'PhabricatorDatabaseSetupCheck' => 'PhabricatorSetupCheck', 'PhabricatorDatasourceEditField' => 'PhabricatorTokenizerEditField', 'PhabricatorDatasourceEditType' => 'PhabricatorPHIDListEditType', diff --git a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php --- a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php +++ b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php @@ -169,8 +169,37 @@ $messages = phutil_implode_html(phutil_tag('br'), $messages); + $partition = null; + if ($database->getIsMaster()) { + if ($database->getIsDefaultPartition()) { + $partition = id(new PHUIIconView()) + ->setIcon('fa-circle sky') + ->addSigil('has-tooltip') + ->setMetadata( + array( + 'tip' => pht('Default Partition'), + )); + } else { + $map = $database->getApplicationMap(); + if ($map) { + $list = implode(', ', $map); + } else { + $list = pht('Empty'); + } + + $partition = id(new PHUIIconView()) + ->setIcon('fa-adjust sky') + ->addSigil('has-tooltip') + ->setMetadata( + array( + 'tip' => pht('Partition: %s', $list), + )); + } + } + $rows[] = array( $role_icon, + $partition, $database->getHost(), $database->getPort(), $database->getUser(), @@ -188,6 +217,7 @@ ->setHeaders( array( null, + null, pht('Host'), pht('Port'), pht('User'), @@ -205,6 +235,7 @@ null, null, null, + null, 'wide', )); diff --git a/src/docs/user/cluster/cluster_databases.diviner b/src/docs/user/cluster/cluster_databases.diviner --- a/src/docs/user/cluster/cluster_databases.diviner +++ b/src/docs/user/cluster/cluster_databases.diviner @@ -29,6 +29,12 @@ There are no current plans to support multi-master mode or autonomous failover, although this may change in the future. +Phabricator applications //can// be partitioned across multiple database +masters. This does not provide redundancy and generally does not increase +resiliance or resistance to data loss, but can help you scale and operate +Phabricator. For details, see +@{article:Cluster: Partitioning and Advanced Configuration}. + Setting up MySQL Replication ============================ diff --git a/src/docs/user/cluster/cluster_partitioning.diviner b/src/docs/user/cluster/cluster_partitioning.diviner new file mode 100644 --- /dev/null +++ b/src/docs/user/cluster/cluster_partitioning.diviner @@ -0,0 +1,187 @@ +@title Cluster: Partitioning and Advanced Configuration +@group cluster + +Guide to partitioning Phabricator applications across multiple database hosts. + +Overview +======== + +WARNING: Partitioning is a prototype. + +You can partition Phabricator's applications across multiple databases. For +example, you can move an application like Files or Maniphest to a dedicated +database host. + +The advantages of doing this are: + + - moving heavily used applications to dedicated hardware can help you + scale; and + - you can match application workloads to hardware or configuration to make + operating the cluster easier. + +This configuration is complex, and very few installs need to pursue it. +Phabricator will normally run comfortably with a single database master even +for large organizations. + +Partitioning generally does not do much to increase resiliance or make it +easier to recover from disasters, and is primarily a mechanism for scaling. + +If you are considering partitioning, you likely want to configure replication +with a single master first. Even if you choose not to deploy replication, you +should review and understand how replication works before you partition. For +details, see @{Cluster:Databases}. + + +What Partitioning Does +====================== + +When you partition Phabricator, you move all of the data for one or more +applications (like Maniphest) to a new master database host. This is possible +because Phabricator stores data for each application in its own logical +database (like `phabricator_maniphest`) and performs no joins between databases. + +If you're running into scale limits on a single master database, you can move +one or more of your most commonly-used applications to a second database host +and continue adding users. You can keep partitioning applications until all +heavily used applications have dedicated database servers. + +Alternatively or additionally, you can partition applications to make operating +the cluster easier. Some applications have unusual workloads or requirements, +and moving them to separate hosts may make things easier to deal with overall. + +For example: if Files accounts for most of the data on your install, you might +move it to a different host to make backing up everything else easier. + + +Configuration Overview +====================== + +To configure partitioning, you will add multiple entries to `cluster.databases` +with the `master` role. Each `master` should specify a new `partition` key, +which contains a list of application databases it should host. + +One master may be specified as the `default` partition. Applications not +explicitly configured to be assigned elsewhere will be assigned here. + +When you define multiple `master` databases, you must also specify which master +each `replica` database follows. Here's a simple example config: + +```lang=json +... +"cluster.databases": [ + { + "host": "db001.corporation.com", + "role": "master", + "user": "phabricator", + "pass": "hunter2!trustno1", + "port": 3306, + "partition": [ + "default" + ] + }, + { + "host": "db002.corporation.com", + "role": "replica", + "user": "phabricator", + "pass": "hunter2!trustno1", + "port": 3306, + "master": "db001.corporation.com:3306" + }, + { + "host": "db003.corporation.com", + "role": "master", + "user": "phabricator", + "pass": "hunter2!trustno1", + "port": 3306, + "partition": [ + "file", + "passphrase", + "slowvote" + ] + }, + { + "host": "db004.corporation.com", + "role": "replica", + "user": "phabricator", + "pass": "hunter2!trustno1", + "port": 3306, + "master": "db003.corporation.com:3306" + } +], +... +``` + +In this configuration, `db001` is a master and `db002` replicates it. +`db003` is a second master, replicated by `db004`. + +Applications have been partitioned like this: + + - `db003`/`db004`: Files, Passphrase, Slowvote + - `db001`/`db002`: Default (all other applications) + +Not all of the database partition names are the same as the application +names. You can get a list of databases with `bin/storage databases` to identify +the correct database names. + + +Launching a new Partition +========================= + +To add a new partition, follow these steps: + + - Set up the new database host or hosts. + - Add the new database to `cluster.database`, but keep its "partition" + configuration empty (just an empty list). If this is the first time you + are partitioning, you will need to configure your existing master as the + new "default". This will let Phabricator interact with it, but won't send + any traffic to it yet. + - Run `bin/storage upgrade` to initialize the schemata on the new hosts. + - Stop writes to the applications you want to move by putting Phabricator + in read-only mode, or shutting down the webserver and daemons, or telling + everyone not to touch anything. + - Dump the data from the application databases on the old master. + - Load the data into the application databases on the new master. + - Reconfigure the "partition" setup so that Phabricator knows the databases + have moved. + - While still in read-only mode, check that all the data appears to be + intact. + - Resume writes. + +You can do this with a small, rarely-used application first (on most installs, +Slowvote might be a good candidate) if you want to run through the process +end-to-end before performing a larger, higher-stakes migration. + + +How Partitioning Works +====================== + +If you have multiple masters, Phabricator keeps the entire set of schemata up +to date on all of them. When you run `bin/storage upgrade` or other storage +management commands, they generally affect all masters (if they do not, they +will prompt you to be more specific). + +When the application goes to read or write normal data (for example, to query a +list of tasks) it only connects to the master which the application it is +acting on behalf of is assigned to. + +In most cases, a masters will not have any data in most the databases which are +not assigned to it. If they do (for example, because they previously hosted the +application) the data is ignored. This approach (of maintaining all schemata on +all hosts) makes it easier to move data and to quickly revert changes if a +configuration mistake occurs. + +There are some exceptions to this rule. For example, all masters keep track +of which patches have been applied to that particular master so that +`bin/storage upgrade` can upgrade hosts correctly. + +Phabricator does not perform joins across logical databases, so there are no +meaningful differences in runtime behavior if two applications are on the same +physical host or different physical hosts. + + +Next Steps +========== + +Continue by: + + - returning to @{article:Clustering Introduction}. diff --git a/src/infrastructure/cluster/PhabricatorClusterDatabasesConfigOptionType.php b/src/infrastructure/cluster/PhabricatorClusterDatabasesConfigOptionType.php --- a/src/infrastructure/cluster/PhabricatorClusterDatabasesConfigOptionType.php +++ b/src/infrastructure/cluster/PhabricatorClusterDatabasesConfigOptionType.php @@ -35,6 +35,8 @@ 'user' => 'optional string', 'pass' => 'optional string', 'disabled' => 'optional bool', + 'master' => 'optional string', + 'partition' => 'optional list', )); } catch (Exception $ex) { throw new Exception( diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php --- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php +++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php @@ -36,6 +36,11 @@ private $healthRecord; private $didFailToConnect; + private $isDefaultPartition; + private $applicationMap = array(); + private $masterRef; + private $replicaRefs = array(); + public function setHost($host) { $this->host = $host; return $this; @@ -157,6 +162,43 @@ return $this->isIndividual; } + public function setIsDefaultPartition($is_default_partition) { + $this->isDefaultPartition = $is_default_partition; + return $this; + } + + public function getIsDefaultPartition() { + return $this->isDefaultPartition; + } + + public function setApplicationMap(array $application_map) { + $this->applicationMap = $application_map; + return $this; + } + + public function getApplicationMap() { + return $this->applicationMap; + } + + public function setMasterRef(PhabricatorDatabaseRef $master_ref) { + $this->masterRef = $master_ref; + return $this; + } + + public function getMasterRef() { + return $this->masterRef; + } + + public function addReplicaRef(PhabricatorDatabaseRef $replica_ref) { + $this->replicaRefs[] = $replica_ref; + return $this; + } + + public function getReplicaRefs() { + return $this->replicaRefs; + } + + public function getRefKey() { $host = $this->getHost(); @@ -248,8 +290,6 @@ } public static function newRefs() { - $refs = array(); - $default_port = PhabricatorEnv::getEnvConfig('mysql.port'); $default_port = nonempty($default_port, 3306); @@ -259,33 +299,12 @@ $default_pass = new PhutilOpaqueEnvelope($default_pass); $config = PhabricatorEnv::getEnvConfig('cluster.databases'); - foreach ($config as $server) { - $host = $server['host']; - $port = idx($server, 'port', $default_port); - $user = idx($server, 'user', $default_user); - $disabled = idx($server, 'disabled', false); - - $pass = idx($server, 'pass'); - if ($pass) { - $pass = new PhutilOpaqueEnvelope($pass); - } else { - $pass = clone $default_pass; - } - $role = $server['role']; - - $ref = id(new self()) - ->setHost($host) - ->setPort($port) - ->setUser($user) - ->setPass($pass) - ->setDisabled($disabled) - ->setIsMaster(($role == 'master')); - - $refs[] = $ref; - } - - return $refs; + return id(new PhabricatorDatabaseRefParser()) + ->setDefaultPort($default_port) + ->setDefaultUser($default_user) + ->setDefaultPass($default_pass) + ->newRefs($config); } public static function queryAll() { @@ -471,7 +490,7 @@ return $refs; } - public static function getMasterDatabaseRefs() { + public static function getAllMasterDatabaseRefs() { $refs = self::getClusterRefs(); if (!$refs) { @@ -491,12 +510,42 @@ return $masters; } - public static function getMasterDatabaseRefForDatabase($database) { + public static function getMasterDatabaseRefs() { + $refs = self::getAllMasterDatabaseRefs(); + return self::getEnabledRefs($refs); + } + + public function isApplicationHost($database) { + return isset($this->applicationMap[$database]); + } + + public static function getMasterDatabaseRefForApplication($application) { $masters = self::getMasterDatabaseRefs(); - // TODO: Actually implement this. + $application_master = null; + $default_master = null; + foreach ($masters as $master) { + if ($master->isApplicationHost($application)) { + $application_master = $master; + break; + } + if ($master->getIsDefaultPartition()) { + $default_master = $master; + } + } - return head($masters); + if ($application_master) { + $masters = array($application_master); + } else if ($default_master) { + $masters = array($default_master); + } else { + $masters = array(); + } + + $masters = self::getEnabledRefs($masters); + $master = head($masters); + + return $master; } public static function newIndividualRef() { @@ -513,7 +562,7 @@ ->setIsMaster(true); } - public static function getReplicaDatabaseRefs() { + public static function getAllReplicaDatabaseRefs() { $refs = self::getClusterRefs(); if (!$refs) { @@ -522,9 +571,6 @@ $replicas = array(); foreach ($refs as $ref) { - if ($ref->getDisabled()) { - continue; - } if ($ref->getIsMaster()) { continue; } @@ -535,10 +581,44 @@ return $replicas; } - public static function getReplicaDatabaseRefForDatabase($database) { + public static function getReplicaDatabaseRefs() { + $refs = self::getAllReplicaDatabaseRefs(); + return self::getEnabledRefs($refs); + } + + private static function getEnabledRefs(array $refs) { + foreach ($refs as $key => $ref) { + if ($ref->getDisabled()) { + unset($refs[$key]); + } + } + return $refs; + } + + public static function getReplicaDatabaseRefForApplication($application) { $replicas = self::getReplicaDatabaseRefs(); - // TODO: Actually implement this. + $application_replicas = array(); + $default_replicas = array(); + foreach ($replicas as $replica) { + $master = $replica->getMaster(); + + if ($master->isApplicationHost($application)) { + $application_replicas[] = $replica; + } + + if ($master->getIsDefaultPartition()) { + $default_replicas[] = $replica; + } + } + + if ($application_replicas) { + $replicas = $application_replicas; + } else { + $replicas = $default_replicas; + } + + $replicas = self::getEnabledRefs($replicas); // TODO: We may have multiple replicas to choose from, and could make // more of an effort to pick the "best" one here instead of always diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRefParser.php b/src/infrastructure/cluster/PhabricatorDatabaseRefParser.php new file mode 100644 --- /dev/null +++ b/src/infrastructure/cluster/PhabricatorDatabaseRefParser.php @@ -0,0 +1,216 @@ +defaultPort = $default_port; + return $this; + } + + public function getDefaultPort() { + return $this->defaultPort; + } + + public function setDefaultUser($default_user) { + $this->defaultUser = $default_user; + return $this; + } + + public function getDefaultUser() { + return $this->defaultUser; + } + + public function setDefaultPass($default_pass) { + $this->defaultPass = $default_pass; + return $this; + } + + public function getDefaultPass() { + return $this->defaultPass; + } + + public function newRefs(array $config) { + $default_port = $this->getDefaultPort(); + $default_user = $this->getDefaultUser(); + $default_pass = $this->getDefaultPass(); + + $refs = array(); + + $master_count = 0; + foreach ($config as $key => $server) { + $host = $server['host']; + $port = idx($server, 'port', $default_port); + $user = idx($server, 'user', $default_user); + $disabled = idx($server, 'disabled', false); + + $pass = idx($server, 'pass'); + if ($pass) { + $pass = new PhutilOpaqueEnvelope($pass); + } else { + $pass = clone $default_pass; + } + + $role = $server['role']; + $is_master = ($role == 'master'); + + $ref = id(new PhabricatorDatabaseRef()) + ->setHost($host) + ->setPort($port) + ->setUser($user) + ->setPass($pass) + ->setDisabled($disabled) + ->setIsMaster($is_master); + + if ($is_master) { + $master_count++; + } + + $refs[$key] = $ref; + } + + $is_partitioned = ($master_count > 1); + if ($is_partitioned) { + $default_ref = null; + $partition_map = array(); + foreach ($refs as $key => $ref) { + if (!$ref->getIsMaster()) { + continue; + } + + $server = $config[$key]; + $partition = idx($server, 'partition'); + if (!is_array($partition)) { + throw new Exception( + pht( + 'Phabricator is configured with multiple master databases, '. + 'but master "%s" is missing a "partition" configuration key to '. + 'define application partitioning.', + $ref->getRefKey())); + } + + $application_map = array(); + foreach ($partition as $application) { + if ($application === 'default') { + if ($default_ref) { + throw new Exception( + pht( + 'Multiple masters (databases "%s" and "%s") specify that '. + 'they are the "default" partition. Only one master may be '. + 'the default.', + $ref->getRefKey(), + $default_ref->getRefKey())); + } else { + $default_ref = $ref; + $ref->setIsDefaultPartition(true); + } + } else if (isset($partition_map[$application])) { + throw new Exception( + pht( + 'Multiple masters (databases "%s" and "%s") specify that '. + 'they are the partition for application "%s". Each '. + 'application may be allocated to only one partition.', + $partition_map[$application]->getRefKey(), + $ref->getRefKey(), + $application)); + } else { + // TODO: We should check that the application is valid, to + // prevent typos in application names. However, we do not + // currently have an efficient way to enumerate all of the valid + // application database names. + + $partition_map[$application] = $ref; + $application_map[$application] = $application; + } + } + + $ref->setApplicationMap($application_map); + } + } else { + // If we only have one master, make it the default. + foreach ($refs as $ref) { + if ($ref->getIsMaster()) { + $ref->setIsDefaultPartition(true); + } + } + } + + $ref_map = array(); + $master_keys = array(); + foreach ($refs as $ref) { + $ref_key = $ref->getRefKey(); + if (isset($ref_map[$ref_key])) { + throw new Exception( + pht( + 'Multiple configured databases have the same internal '. + 'key, "%s". You may have listed a database multiple times.', + $ref_key)); + } else { + $ref_map[$ref_key] = $ref; + if ($ref->getIsMaster()) { + $master_keys[] = $ref_key; + } + } + } + + foreach ($refs as $key => $ref) { + if ($ref->getIsMaster()) { + continue; + } + + $server = $config[$key]; + + $partition = idx($server, 'partition'); + if ($partition !== null) { + throw new Exception( + pht( + 'Database "%s" is configured as a replica, but specifies a '. + '"partition". Only master databases may have a partition '. + 'configuration. Replicas use the same configuration as the '. + 'master they follow.', + $ref->getRefKey())); + } + + $master_key = idx($server, 'master'); + if ($master_key === null) { + if ($is_partitioned) { + throw new Exception( + pht( + 'Database "%s" is configured as a replica, but does not '. + 'specify which "master" it follows in configuration. Valid '. + 'masters are: %s.', + $ref->getRefKey(), + implode(', ', $master_keys))); + } else if ($master_keys) { + $master_key = head($master_keys); + } else { + throw new Exception( + pht( + 'Database "%s" is configured as a replica, but there is no '. + 'master configured.', + $ref->getRefKey())); + } + } + + if (!isset($ref_map[$master_key])) { + throw new Exception( + pht( + 'Database "%s" is configured as a replica and specifies a '. + 'master ("%s"), but that master is not a valid master. Valid '. + 'masters are: %s.', + implode(', ', $master_keys))); + } + + $master_ref = $ref_map[$master_key]; + $ref->setMasterRef($ref_map[$master_key]); + $master_ref->addReplicaRef($ref); + } + + return array_values($refs); + } + +} diff --git a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php --- a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php +++ b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php @@ -62,7 +62,10 @@ $is_cluster = (bool)PhabricatorEnv::getEnvConfig('cluster.databases'); if ($is_cluster) { - $connection = $this->newClusterConnection($database, $mode); + $connection = $this->newClusterConnection( + $this->getApplicationName(), + $database, + $mode); } else { $connection = $this->newBasicConnection($database, $mode, $namespace); } @@ -113,9 +116,9 @@ )); } - private function newClusterConnection($database, $mode) { - $master = PhabricatorDatabaseRef::getMasterDatabaseRefForDatabase( - $database); + private function newClusterConnection($application, $database, $mode) { + $master = PhabricatorDatabaseRef::getMasterDatabaseRefForApplication( + $application); if ($master && !$master->isSevered()) { $connection = $master->newApplicationConnection($database); @@ -131,8 +134,8 @@ } } - $replica = PhabricatorDatabaseRef::getReplicaDatabaseRefForDatabase( - $database); + $replica = PhabricatorDatabaseRef::getReplicaDatabaseRefForApplication( + $application); if ($replica) { $connection = $replica->newApplicationConnection($database); $connection->setReadOnly(true);