diff --git a/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php b/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php --- a/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php +++ b/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php @@ -251,9 +251,8 @@ pht( 'Repository "%s" exists on more than one device, but no device '. 'has any repository version information. Phabricator can not '. - 'guess which copy of the existing data is authoritative. Remove '. - 'all but one device from service to mark the remaining device '. - 'as the authority.', + 'guess which copy of the existing data is authoritative. Promote '. + 'a device or see "Ambigous Leaders" in the documentation.', $repository->getDisplayName())); } diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementClusterizeWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementClusterizeWorkflow.php --- a/src/applications/repository/management/PhabricatorRepositoryManagementClusterizeWorkflow.php +++ b/src/applications/repository/management/PhabricatorRepositoryManagementClusterizeWorkflow.php @@ -61,6 +61,7 @@ array( AlmanacClusterRepositoryServiceType::SERVICETYPE, )) + ->needBindings(true) ->executeOne(); if (!$service) { throw new PhutilArgumentUsageException( @@ -70,9 +71,41 @@ } } - if ($service) { $service_phid = $service->getPHID(); + + $bindings = $service->getActiveBindings(); + + $unique_devices = array(); + foreach ($bindings as $binding) { + $unique_devices[$binding->getDevicePHID()] = $binding->getDevice(); + } + + if (count($unique_devices) > 1) { + $device_names = mpull($unique_devices, 'getName'); + + echo id(new PhutilConsoleBlock()) + ->addParagraph( + pht( + 'Service "%s" is actively bound to more than one device (%s).', + $service_name, + implode(', ', $device_names))) + ->addParagraph( + pht( + 'If you clusterize a repository onto this service it may be '. + 'unclear which devices have up-to-date copies of the '. + 'repository. If so, leader/follower ambiguity will freeze the '. + 'repository. You may need to manually promote a device to '. + 'unfreeze it. See "Ambiguous Leaders" in the documentation '. + 'for discussion.')) + ->drawConsoleString(); + + $prompt = pht('Continue anyway?'); + if (!phutil_console_confirm($prompt)) { + throw new PhutilArgumentUsageException( + pht('User aborted the workflow.')); + } + } } else { $service_phid = null; } diff --git a/src/docs/user/cluster/cluster_repositories.diviner b/src/docs/user/cluster/cluster_repositories.diviner --- a/src/docs/user/cluster/cluster_repositories.diviner +++ b/src/docs/user/cluster/cluster_repositories.diviner @@ -422,17 +422,22 @@ ================= Repository clusters can also freeze if the leader devices are ambiguous. This -can happen if you replace an entire cluster with new devices suddenly, or -make a mistake with the `--demote` flag. This generally arises from some kind -of operator error, like this: +can happen if you replace an entire cluster with new devices suddenly, or make +a mistake with the `--demote` flag. This may arise from some kind of operator +error, like these: - Someone accidentally uses `bin/repository thaw ... --demote` to demote every device in a cluster. - Someone accidentally deletes all the version information for a repository from the database by making a mistake with a `DELETE` or `UPDATE` query. - - Someone accidentally disable all of the devices in a cluster, then add + - Someone accidentally disables all of the devices in a cluster, then adds entirely new ones before repositories can propagate. +If you are moving repositories into cluster services, you can also reach this +state if you use `clusterize` to associate a repository with a service that is +bound to multiple active devices. In this case, Phabricator will not know which +device or devices have up-to-date information. + When Phabricator can not tell which device in a cluster is a leader, it freezes the cluster because it is possible that some devices have less data and others have more, and if it choses a leader arbitrarily it may destroy some data