diff --git a/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php index 4ae64b39be..9a71474624 100644 --- a/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php @@ -1,289 +1,288 @@ loadServices($blueprint); $bindings = $this->loadAllBindings($services); if (!$bindings) { // If there are no devices bound to the services for this blueprint, // we can not allocate resources. return false; } return true; } public function canAllocateResourceForLease( DrydockBlueprint $blueprint, DrydockLease $lease) { // We will only allocate one resource per unique device bound to the // services for this blueprint. Make sure we have a free device somewhere. $free_bindings = $this->loadFreeBindings($blueprint); if (!$free_bindings) { return false; } return true; } public function allocateResource( DrydockBlueprint $blueprint, DrydockLease $lease) { $free_bindings = $this->loadFreeBindings($blueprint); shuffle($free_bindings); $exceptions = array(); foreach ($free_bindings as $binding) { $device = $binding->getDevice(); $device_name = $device->getName(); $binding_phid = $binding->getPHID(); $resource = $this->newResourceTemplate($blueprint, $device_name) ->setActivateWhenAllocated(true) ->setAttribute('almanacServicePHID', $binding->getServicePHID()) ->setAttribute('almanacBindingPHID', $binding_phid) ->needSlotLock("almanac.host.binding({$binding_phid})"); try { return $resource->allocateResource(); } catch (Exception $ex) { $exceptions[] = $ex; } } throw new PhutilAggregateException( pht('Unable to allocate any binding as a resource.'), $exceptions); } public function destroyResource( DrydockBlueprint $blueprint, DrydockResource $resource) { // We don't create anything when allocating hosts, so we don't need to do // any cleanup here. return; } public function canAcquireLeaseOnResource( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { if (!DrydockSlotLock::isLockFree($this->getLeaseSlotLock($resource))) { return false; } return true; } public function acquireLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { $lease ->setActivateWhenAcquired(true) ->needSlotLock($this->getLeaseSlotLock($resource)) ->acquireOnResource($resource); } public function didReleaseLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { // Almanac hosts stick around indefinitely so we don't need to recycle them // if they don't have any leases. return; } public function destroyLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { // We don't create anything when activating a lease, so we don't need to // throw anything away. return; } private function getLeaseSlotLock(DrydockResource $resource) { $resource_phid = $resource->getPHID(); return "almanac.host.lease({$resource_phid})"; } public function getType() { return 'host'; } public function getInterface( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease, $type) { $viewer = PhabricatorUser::getOmnipotentUser(); switch ($type) { case DrydockCommandInterface::INTERFACE_TYPE: $credential_phid = $blueprint->getFieldValue('credentialPHID'); $binding_phid = $resource->getAttribute('almanacBindingPHID'); $binding = id(new AlmanacBindingQuery()) ->setViewer($viewer) ->withPHIDs(array($binding_phid)) ->executeOne(); if (!$binding) { - // TODO: This is probably a permanent failure, destroy this resource? throw new Exception( pht( 'Unable to load binding "%s" to create command interface.', $binding_phid)); } $interface = $binding->getInterface(); return id(new DrydockSSHCommandInterface()) ->setConfig('credentialPHID', $credential_phid) ->setConfig('host', $interface->getAddress()) ->setConfig('port', $interface->getPort()); } } public function getFieldSpecifications() { return array( 'almanacServicePHIDs' => array( 'name' => pht('Almanac Services'), 'type' => 'datasource', 'datasource.class' => 'AlmanacServiceDatasource', 'datasource.parameters' => array( 'serviceClasses' => $this->getAlmanacServiceClasses(), ), 'required' => true, ), 'credentialPHID' => array( 'name' => pht('Credentials'), 'type' => 'credential', 'credential.provides' => PassphraseSSHPrivateKeyCredentialType::PROVIDES_TYPE, 'credential.type' => PassphraseSSHPrivateKeyTextCredentialType::CREDENTIAL_TYPE, ), ) + parent::getFieldSpecifications(); } private function loadServices(DrydockBlueprint $blueprint) { if (!$this->services) { $service_phids = $blueprint->getFieldValue('almanacServicePHIDs'); if (!$service_phids) { throw new Exception( pht( 'This blueprint ("%s") does not define any Almanac Service PHIDs.', $blueprint->getBlueprintName())); } $viewer = PhabricatorUser::getOmnipotentUser(); $services = id(new AlmanacServiceQuery()) ->setViewer($viewer) ->withPHIDs($service_phids) ->withServiceClasses($this->getAlmanacServiceClasses()) ->needBindings(true) ->execute(); $services = mpull($services, null, 'getPHID'); if (count($services) != count($service_phids)) { $missing_phids = array_diff($service_phids, array_keys($services)); throw new Exception( pht( 'Some of the Almanac Services defined by this blueprint '. 'could not be loaded. They may be invalid, no longer exist, '. 'or be of the wrong type: %s.', implode(', ', $missing_phids))); } $this->services = $services; } return $this->services; } private function loadAllBindings(array $services) { assert_instances_of($services, 'AlmanacService'); $bindings = array_mergev(mpull($services, 'getBindings')); return mpull($bindings, null, 'getPHID'); } private function loadFreeBindings(DrydockBlueprint $blueprint) { if ($this->freeBindings === null) { $viewer = PhabricatorUser::getOmnipotentUser(); $pool = id(new DrydockResourceQuery()) ->setViewer($viewer) ->withBlueprintPHIDs(array($blueprint->getPHID())) ->withStatuses( array( DrydockResourceStatus::STATUS_PENDING, DrydockResourceStatus::STATUS_ACTIVE, DrydockResourceStatus::STATUS_RELEASED, )) ->execute(); $allocated_phids = array(); foreach ($pool as $resource) { $allocated_phids[] = $resource->getAttribute('almanacDevicePHID'); } $allocated_phids = array_fuse($allocated_phids); $services = $this->loadServices($blueprint); $bindings = $this->loadAllBindings($services); $free = array(); foreach ($bindings as $binding) { if (empty($allocated_phids[$binding->getPHID()])) { $free[] = $binding; } } $this->freeBindings = $free; } return $this->freeBindings; } private function getAlmanacServiceClasses() { return array( 'AlmanacDrydockPoolServiceType', ); } } diff --git a/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php index f9a8794391..a61af96ebf 100644 --- a/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php @@ -1,339 +1,344 @@ getAttribute('repositories.map'); if (!is_array($need_map)) { return false; } $have_map = $resource->getAttribute('repositories.map'); if (!is_array($have_map)) { return false; } $have_as = ipull($have_map, 'phid'); $need_as = ipull($need_map, 'phid'); foreach ($need_as as $need_directory => $need_phid) { if (empty($have_as[$need_directory])) { // This resource is missing a required working copy. return false; } if ($have_as[$need_directory] != $need_phid) { // This resource has a required working copy, but it contains // the wrong repository. return false; } unset($have_as[$need_directory]); } if ($have_as && $lease->getAttribute('repositories.strict')) { // This resource has extra repositories, but the lease is strict about // which repositories are allowed to exist. return false; } if (!DrydockSlotLock::isLockFree($this->getLeaseSlotLock($resource))) { return false; } return true; } public function acquireLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { $lease ->needSlotLock($this->getLeaseSlotLock($resource)) ->acquireOnResource($resource); } private function getLeaseSlotLock(DrydockResource $resource) { $resource_phid = $resource->getPHID(); return "workingcopy.lease({$resource_phid})"; } public function allocateResource( DrydockBlueprint $blueprint, DrydockLease $lease) { $resource = $this->newResourceTemplate( $blueprint, pht('Working Copy')); $resource_phid = $resource->getPHID(); $host_lease = $this->newLease($blueprint) ->setResourceType('host') ->setOwnerPHID($resource_phid) ->setAttribute('workingcopy.resourcePHID', $resource_phid) ->queueForActivation(); // TODO: Add some limits to the number of working copies we can have at // once? $map = $lease->getAttribute('repositories.map'); foreach ($map as $key => $value) { $map[$key] = array_select_keys( $value, array( 'phid', )); } return $resource ->setAttribute('repositories.map', $map) ->setAttribute('host.leasePHID', $host_lease->getPHID()) ->allocateResource(); } public function activateResource( DrydockBlueprint $blueprint, DrydockResource $resource) { $lease = $this->loadHostLease($resource); $this->requireActiveLease($lease); $command_type = DrydockCommandInterface::INTERFACE_TYPE; $interface = $lease->getInterface($command_type); // TODO: Make this configurable. $resource_id = $resource->getID(); $root = "/var/drydock/workingcopy-{$resource_id}"; $map = $resource->getAttribute('repositories.map'); $repositories = $this->loadRepositories(ipull($map, 'phid')); foreach ($map as $directory => $spec) { // TODO: Validate directory isn't goofy like "/etc" or "../../lol" // somewhere? $repository = $repositories[$spec['phid']]; $path = "{$root}/repo/{$directory}/"; // TODO: Run these in parallel? $interface->execx( 'git clone -- %s %s', (string)$repository->getCloneURIObject(), $path); } $resource ->setAttribute('workingcopy.root', $root) ->activateResource(); } public function destroyResource( DrydockBlueprint $blueprint, DrydockResource $resource) { $lease = $this->loadHostLease($resource); // Destroy the lease on the host. $lease->releaseOnDestruction(); if ($lease->isActive()) { // Destroy the working copy on disk. $command_type = DrydockCommandInterface::INTERFACE_TYPE; $interface = $lease->getInterface($command_type); $root_key = 'workingcopy.root'; $root = $resource->getAttribute($root_key); if (strlen($root)) { $interface->execx('rm -rf -- %s', $root); } } } public function activateLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { $host_lease = $this->loadHostLease($resource); $command_type = DrydockCommandInterface::INTERFACE_TYPE; $interface = $host_lease->getInterface($command_type); $map = $lease->getAttribute('repositories.map'); $root = $resource->getAttribute('workingcopy.root'); $default = null; foreach ($map as $directory => $spec) { $cmd = array(); $arg = array(); $cmd[] = 'cd %s'; $arg[] = "{$root}/repo/{$directory}/"; $cmd[] = 'git clean -d --force'; $cmd[] = 'git fetch'; $commit = idx($spec, 'commit'); $branch = idx($spec, 'branch'); if ($commit !== null) { $cmd[] = 'git reset --hard %s'; $arg[] = $commit; } else if ($branch !== null) { $cmd[] = 'git checkout %s'; $arg[] = $branch; $cmd[] = 'git reset --hard origin/%s'; $arg[] = $branch; } else { $cmd[] = 'git reset --hard HEAD'; } $cmd = implode(' && ', $cmd); $argv = array_merge(array($cmd), $arg); $result = call_user_func_array( array($interface, 'execx'), $argv); if (idx($spec, 'default')) { $default = $directory; } } if ($default === null) { $default = head_key($map); } // TODO: Use working storage? $lease->setAttribute('workingcopy.default', "{$root}/repo/{$default}/"); $lease->activateOnResource($resource); } public function didReleaseLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { // We leave working copies around even if there are no leases on them, // since the cost to maintain them is nearly zero but rebuilding them is // moderately expensive and it's likely that they'll be reused. return; } public function destroyLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { // When we activate a lease we just reset the working copy state and do // not create any new state, so we don't need to do anything special when // destroying a lease. return; } public function getType() { return 'working-copy'; } public function getInterface( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease, $type) { switch ($type) { case DrydockCommandInterface::INTERFACE_TYPE: $host_lease = $this->loadHostLease($resource); $command_interface = $host_lease->getInterface($type); $path = $lease->getAttribute('workingcopy.default'); $command_interface->setWorkingDirectory($path); return $command_interface; } } private function loadRepositories(array $phids) { $repositories = id(new PhabricatorRepositoryQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withPHIDs($phids) ->execute(); $repositories = mpull($repositories, null, 'getPHID'); foreach ($phids as $phid) { if (empty($repositories[$phid])) { - // TODO: Permanent failure. throw new Exception( pht( 'Repository PHID "%s" does not exist.', $phid)); } } foreach ($repositories as $repository) { - switch ($repository->getVersionControlSystem()) { + $repository_vcs = $repository->getVersionControlSystem(); + switch ($repository_vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: break; default: - // TODO: Permanent failure. - throw new Exception(pht('Unsupported VCS!')); + throw new Exception( + pht( + 'Repository ("%s") has unsupported VCS ("%s").', + $repository->getPHID(), + $repository_vcs)); } } return $repositories; } private function loadHostLease(DrydockResource $resource) { $viewer = PhabricatorUser::getOmnipotentUser(); $lease_phid = $resource->getAttribute('host.leasePHID'); $lease = id(new DrydockLeaseQuery()) ->setViewer($viewer) ->withPHIDs(array($lease_phid)) ->executeOne(); if (!$lease) { - // TODO: Permanent failure. - throw new Exception(pht('Unable to load lease "%s".', $lease_phid)); + throw new Exception( + pht( + 'Unable to load lease ("%s").', + $lease_phid)); } return $lease; } } diff --git a/src/applications/drydock/worker/DrydockResourceUpdateWorker.php b/src/applications/drydock/worker/DrydockResourceUpdateWorker.php index 9027829b9c..e0deabdb1b 100644 --- a/src/applications/drydock/worker/DrydockResourceUpdateWorker.php +++ b/src/applications/drydock/worker/DrydockResourceUpdateWorker.php @@ -1,190 +1,281 @@ getTaskDataValue('resourcePHID'); $hash = PhabricatorHash::digestForIndex($resource_phid); $lock_key = 'drydock.resource:'.$hash; $lock = PhabricatorGlobalLock::newLock($lock_key) ->lock(1); try { $resource = $this->loadResource($resource_phid); - $this->updateResource($resource); + $this->handleUpdate($resource); } catch (Exception $ex) { $lock->unlock(); throw $ex; } $lock->unlock(); } + +/* -( Updating Resources )------------------------------------------------- */ + + + /** + * Update a resource, handling exceptions thrown during the update. + * + * @param DrydockReosource Resource to update. + * @return void + * @task update + */ + private function handleUpdate(DrydockResource $resource) { + try { + $this->updateResource($resource); + } catch (Exception $ex) { + if ($this->isTemporaryException($ex)) { + $this->yieldResource($resource, $ex); + } else { + $this->breakResource($resource, $ex); + } + } + } + + + /** + * Update a resource. + * + * @param DrydockResource Resource to update. + * @return void + * @task update + */ private function updateResource(DrydockResource $resource) { $this->processResourceCommands($resource); $resource_status = $resource->getStatus(); switch ($resource_status) { case DrydockResourceStatus::STATUS_PENDING: $this->activateResource($resource); break; case DrydockResourceStatus::STATUS_ACTIVE: // Nothing to do. break; case DrydockResourceStatus::STATUS_RELEASED: case DrydockResourceStatus::STATUS_BROKEN: $this->destroyResource($resource); break; case DrydockResourceStatus::STATUS_DESTROYED: // Nothing to do. break; } $this->yieldIfExpiringResource($resource); } + /** + * Convert a temporary exception into a yield. + * + * @param DrydockResource Resource to yield. + * @param Exception Temporary exception worker encountered. + * @task update + */ + private function yieldResource(DrydockResource $resource, Exception $ex) { + $duration = $this->getYieldDurationFromException($ex); + + $resource->logEvent( + DrydockResourceActivationYieldLogType::LOGCONST, + array( + 'duration' => $duration, + )); + + throw new PhabricatorWorkerYieldException($duration); + } + + /* -( Processing Commands )------------------------------------------------ */ /** * @task command */ private function processResourceCommands(DrydockResource $resource) { if (!$resource->canReceiveCommands()) { return; } $this->checkResourceExpiration($resource); $commands = $this->loadCommands($resource->getPHID()); foreach ($commands as $command) { if (!$resource->canReceiveCommands()) { break; } $this->processResourceCommand($resource, $command); $command ->setIsConsumed(true) ->save(); } } /** * @task command */ private function processResourceCommand( DrydockResource $resource, DrydockCommand $command) { switch ($command->getCommand()) { case DrydockCommand::COMMAND_RELEASE: $this->releaseResource($resource); break; } } /* -( Activating Resources )----------------------------------------------- */ /** * @task activate */ private function activateResource(DrydockResource $resource) { $blueprint = $resource->getBlueprint(); $blueprint->activateResource($resource); $this->validateActivatedResource($blueprint, $resource); } /** * @task activate */ private function validateActivatedResource( DrydockBlueprint $blueprint, DrydockResource $resource) { if (!$resource->isActivatedResource()) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: %s '. 'must actually allocate the resource it returns.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'allocateResource()')); } } /* -( Releasing Resources )------------------------------------------------ */ /** * @task release */ private function releaseResource(DrydockResource $resource) { $viewer = $this->getViewer(); $drydock_phid = id(new PhabricatorDrydockApplication())->getPHID(); - $resource->openTransaction(); - $resource - ->setStatus(DrydockResourceStatus::STATUS_RELEASED) - ->save(); - - // TODO: Hold slot locks until destruction? - DrydockSlotLock::releaseLocks($resource->getPHID()); - $resource->saveTransaction(); + $resource + ->setStatus(DrydockResourceStatus::STATUS_RELEASED) + ->save(); $statuses = array( DrydockLeaseStatus::STATUS_PENDING, DrydockLeaseStatus::STATUS_ACQUIRED, DrydockLeaseStatus::STATUS_ACTIVE, ); $leases = id(new DrydockLeaseQuery()) ->setViewer($viewer) ->withResourcePHIDs(array($resource->getPHID())) ->withStatuses($statuses) ->execute(); foreach ($leases as $lease) { $command = DrydockCommand::initializeNewCommand($viewer) ->setTargetPHID($lease->getPHID()) ->setAuthorPHID($drydock_phid) ->setCommand(DrydockCommand::COMMAND_RELEASE) ->save(); $lease->scheduleUpdate(); } $this->destroyResource($resource); } +/* -( Breaking Resources )------------------------------------------------- */ + + + /** + * @task break + */ + private function breakResource(DrydockResource $resource, Exception $ex) { + switch ($resource->getStatus()) { + case DrydockResourceStatus::STATUS_BROKEN: + case DrydockResourceStatus::STATUS_RELEASED: + case DrydockResourceStatus::STATUS_DESTROYED: + // If the resource was already broken, just throw a normal exception. + // This will retry the task eventually. + throw new PhutilProxyException( + pht( + 'Unexpected failure while destroying resource ("%s").', + $resource->getPHID()), + $ex); + } + + $resource + ->setStatus(DrydockResourceStatus::STATUS_BROKEN) + ->save(); + + $resource->scheduleUpdate(); + + $resource->logEvent( + DrydockResourceActivationFailureLogType::LOGCONST, + array( + 'class' => get_class($ex), + 'message' => $ex->getMessage(), + )); + + throw new PhabricatorWorkerPermanentFailureException( + pht( + 'Permanent failure while activating resource ("%s"): %s', + $resource->getPHID(), + $ex->getMessage())); + } + + /* -( Destroying Resources )----------------------------------------------- */ /** * @task destroy */ private function destroyResource(DrydockResource $resource) { $blueprint = $resource->getBlueprint(); $blueprint->destroyResource($resource); + DrydockSlotLock::releaseLocks($resource->getPHID()); + $resource ->setStatus(DrydockResourceStatus::STATUS_DESTROYED) ->save(); } } diff --git a/src/applications/drydock/worker/DrydockWorker.php b/src/applications/drydock/worker/DrydockWorker.php index fb2666f735..d2dc1ca399 100644 --- a/src/applications/drydock/worker/DrydockWorker.php +++ b/src/applications/drydock/worker/DrydockWorker.php @@ -1,117 +1,158 @@ getViewer(); $lease = id(new DrydockLeaseQuery()) ->setViewer($viewer) ->withPHIDs(array($lease_phid)) ->executeOne(); if (!$lease) { throw new PhabricatorWorkerPermanentFailureException( pht('No such lease "%s"!', $lease_phid)); } return $lease; } protected function loadResource($resource_phid) { $viewer = $this->getViewer(); $resource = id(new DrydockResourceQuery()) ->setViewer($viewer) ->withPHIDs(array($resource_phid)) ->executeOne(); if (!$resource) { throw new PhabricatorWorkerPermanentFailureException( pht('No such resource "%s"!', $resource_phid)); } return $resource; } protected function loadCommands($target_phid) { $viewer = $this->getViewer(); $commands = id(new DrydockCommandQuery()) ->setViewer($viewer) ->withTargetPHIDs(array($target_phid)) ->withConsumed(false) ->execute(); $commands = msort($commands, 'getID'); return $commands; } protected function checkLeaseExpiration(DrydockLease $lease) { $this->checkObjectExpiration($lease); } protected function checkResourceExpiration(DrydockResource $resource) { $this->checkObjectExpiration($resource); } private function checkObjectExpiration($object) { // Check if the resource or lease has expired. If it has, we're going to // send it a release command. // This command is sent from within the update worker so it is handled // immediately, but doing this generates a log and improves consistency. $expires = $object->getUntil(); if (!$expires) { return; } $now = PhabricatorTime::getNow(); if ($expires > $now) { return; } $viewer = $this->getViewer(); $drydock_phid = id(new PhabricatorDrydockApplication())->getPHID(); $command = DrydockCommand::initializeNewCommand($viewer) ->setTargetPHID($object->getPHID()) ->setAuthorPHID($drydock_phid) ->setCommand(DrydockCommand::COMMAND_RELEASE) ->save(); } protected function yieldIfExpiringLease(DrydockLease $lease) { if (!$lease->canReceiveCommands()) { return; } $this->yieldIfExpiring($lease->getUntil()); } protected function yieldIfExpiringResource(DrydockResource $resource) { if (!$resource->canReceiveCommands()) { return; } $this->yieldIfExpiring($resource->getUntil()); } private function yieldIfExpiring($expires) { if (!$expires) { return; } if (!$this->getTaskDataValue('isExpireTask')) { return; } $now = PhabricatorTime::getNow(); throw new PhabricatorWorkerYieldException($expires - $now); } + protected function isTemporaryException(Exception $ex) { + if ($ex instanceof PhabricatorWorkerYieldException) { + return true; + } + + if ($ex instanceof DrydockSlotLockException) { + return true; + } + + if ($ex instanceof PhutilAggregateException) { + $any_temporary = false; + foreach ($ex->getExceptions() as $sub) { + if ($this->isTemporaryException($sub)) { + $any_temporary = true; + break; + } + } + if ($any_temporary) { + return true; + } + } + + if ($ex instanceof PhutilProxyException) { + return $this->isTemporaryException($ex->getPreviousException()); + } + + return false; + } + + protected function getYieldDurationFromException(Exception $ex) { + if ($ex instanceof PhabricatorWorkerYieldException) { + return $ex->getDuration(); + } + + if ($ex instanceof DrydockSlotLockException) { + return 5; + } + + return 15; + } + }