diff --git a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php @@ -59,6 +59,11 @@ DrydockResource $resource, DrydockLease $lease) { + // Allow other workers to start leasing against this. + $resource + ->setStatus(DrydockResourceStatus::STATUS_PENDING) + ->save(); + // We need to retrieve this as we need to use it for both importing the // key and looking up the ID for the resource attributes. $credential = id(new PassphraseCredentialQuery()) @@ -266,13 +271,28 @@ $resource->setAttribute('eip-status', 'Associating Elastic IP'); $resource->save(); - $result = $this->getAWSEC2Future() - ->setRawAWSQuery( - 'AssociateAddress', - array( - 'InstanceId' => $instance_id, - 'AllocationId' => $allocation_id,)) - ->resolve(); + while (true) { + try { + $result = $this->getAWSEC2Future() + ->setRawAWSQuery( + 'AssociateAddress', + array( + 'InstanceId' => $instance_id, + 'AllocationId' => $allocation_id,)) + ->resolve(); + break; + } catch (PhutilAWSException $exx) { + if (substr_count( + $exx->getMessage(), + 'InvalidAllocationID.NotFound') > 0) { + // AWS eventual consistency. Wait a little while. + sleep(5); + continue; + } else { + throw $exx; + } + } + } $association_id = (string)$result->associationId; @@ -349,6 +369,7 @@ 'credential' => $resource->getAttribute('credential'), 'platform' => $resource->getAttribute('platform'),)); $ssh->setConnectTimeout(60); + $ssh->setExecTimeout(60); $resource->setAttribute( 'aws-status', @@ -357,7 +378,12 @@ while (true) { try { - $ssh->getExecFuture('echo "test"')->resolvex(); + $ssh_future = $ssh->getExecFuture('echo "test"'); + $ssh_future->resolvex(); + if ($ssh_future->getWasKilledByTimeout()) { + throw new Exception('SSH execution timed out.'); + } + break; } catch (Exception $ex) { @@ -432,6 +458,7 @@ // Deallocate and release the public IP address if we allocated one. if ($resource->getAttribute('eip-allocated')) { + try { $this->getAWSEC2Future() ->setRawAWSQuery( 'DisassociateAddress', @@ -445,15 +472,33 @@ array( 'AllocationId' => $resource->getAttribute('eip-allocation-id'),)) ->resolve(); + } catch (PhutilAWSException $ex) { + if (substr_count( + $ex->getMessage(), + 'InvalidAssociationID.NotFound') > 0 || + substr_count($ex->getMessage(), 'InvalidAllocationID.NotFound') > 0) { + // TODO: Should we log this somewhere? + } else { + throw $ex; + } + } } - // Terminate the EC2 instance. - $this->getAWSEC2Future() - ->setRawAWSQuery( - 'TerminateInstances', - array( - 'InstanceId.0' => $resource->getAttribute('instance-id'),)) - ->resolve(); + try { + // Terminate the EC2 instance. + $this->getAWSEC2Future() + ->setRawAWSQuery( + 'TerminateInstances', + array( + 'InstanceId.0' => $resource->getAttribute('instance-id'),)) + ->resolve(); + } catch (PhutilAWSException $exx) { + if (substr_count($exx->getMessage(), 'InvalidInstanceID.NotFound') > 0) { + return; + } else { + throw $exx; + } + } } @@ -488,7 +533,24 @@ $cmd = $lease->getInterface('command'); - $cmd->execx('mkdir %s', $full_path); + $attempts = 10; + while ($attempts > 0) { + $attempts--; + try { + if ($platform === 'windows') { + $cmd->execx('mkdir -Force %s', $full_path); + } else { + $cmd->execx('mkdir %s', $full_path); + } + break; + } catch (Exception $ex) { + if ($attempts == 0) { + throw ex; + } + + sleep(5); + } + } $lease->setAttribute('path', $full_path); } diff --git a/src/applications/drydock/controller/DrydockResourceCloseController.php b/src/applications/drydock/controller/DrydockResourceCloseController.php --- a/src/applications/drydock/controller/DrydockResourceCloseController.php +++ b/src/applications/drydock/controller/DrydockResourceCloseController.php @@ -23,7 +23,8 @@ $resource_uri = '/resource/'.$resource->getID().'/'; $resource_uri = $this->getApplicationURI($resource_uri); - if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN) { + if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN && + $resource->getStatus() != DrydockResourceStatus::STATUS_PENDING) { $dialog = id(new AphrontDialogView()) ->setUser($viewer) ->setTitle(pht('Resource Not Open')) diff --git a/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php b/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php --- a/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php +++ b/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php @@ -4,6 +4,7 @@ private $passphraseSSHKey; private $connectTimeout; + private $execTimeout; private function openCredentialsIfNotOpen() { if ($this->passphraseSSHKey !== null) { @@ -31,6 +32,11 @@ return $this; } + public function setExecTimeout($timeout) { + $this->execTimeout = $timeout; + return $this; + } + public function getExecFuture($command) { $this->openCredentialsIfNotOpen(); @@ -137,6 +143,7 @@ $this->passphraseSSHKey->getUsernameEnvelope(), $this->getConfig('host'), $full_command); + $future->setTimeout($this->execTimeout); $future->setPowershellXML($this->getConfig('platform') === 'windows'); return $future; } diff --git a/src/applications/drydock/management/DrydockManagementSSHWorkflow.php b/src/applications/drydock/management/DrydockManagementSSHWorkflow.php --- a/src/applications/drydock/management/DrydockManagementSSHWorkflow.php +++ b/src/applications/drydock/management/DrydockManagementSSHWorkflow.php @@ -39,8 +39,6 @@ if (!$lease) { $console->writeErr("Lease %d does not exist!\n", $id); - } else if ($lease->getStatus() != DrydockLeaseStatus::STATUS_ACTIVE) { - $console->writeErr("Lease %d is not 'active'!\n", $id); } else { $interface = $lease->getInterface('command'); $future = $interface->getExecFuture('%C', $args->getArg('command')); diff --git a/src/applications/drydock/worker/DrydockAllocatorWorker.php b/src/applications/drydock/worker/DrydockAllocatorWorker.php --- a/src/applications/drydock/worker/DrydockAllocatorWorker.php +++ b/src/applications/drydock/worker/DrydockAllocatorWorker.php @@ -39,6 +39,12 @@ protected function doWork() { $lease = $this->loadLease(); + + if ($lease->getStatus() != DrydockLeaseStatus::STATUS_PENDING) { + // We can't handle non-pending leases. + return; + } + $this->logToDrydock('Allocating Lease'); try { @@ -50,7 +56,9 @@ // and always fail after the first retry right now, so this is // functionally equivalent. $lease->reload(); - if ($lease->getStatus() == DrydockLeaseStatus::STATUS_PENDING) { + if ($lease->getStatus() == DrydockLeaseStatus::STATUS_PENDING || + $lease->getStatus() == DrydockLeaseStatus::STATUS_ACQUIRING) { + $lease->setStatus(DrydockLeaseStatus::STATUS_BROKEN); $lease->save(); } diff --git a/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php b/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php --- a/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php +++ b/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php @@ -29,11 +29,6 @@ ) + $custom_attributes) ->queueForActivation(); - // Wait until the lease is fulfilled. - // TODO: This will throw an exception if the lease can't be fulfilled; - // we should treat that as build failure not build error. - $lease->waitUntilActive(); - // Create the associated artifact. $artifact = $build->createArtifact( $build_target, @@ -43,6 +38,11 @@ 'drydock-lease' => $lease->getID(), )); $artifact->save(); + + // Wait until the lease is fulfilled. + // TODO: This will throw an exception if the lease can't be fulfilled; + // we should treat that as build failure not build error. + $lease->waitUntilActive(); } public function getArtifactOutputs() { diff --git a/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php b/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php --- a/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php +++ b/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php @@ -79,10 +79,14 @@ ->execute(); $lease = $leases[$data['drydock-lease']]; - return id(new PHUIObjectItemView()) - ->setObjectName(pht('Drydock Lease')) - ->setHeader($lease->getID()) - ->setHref('/drydock/lease/'.$lease->getID()); + if ($lease !== null) { + return id(new PHUIObjectItemView()) + ->setObjectName(pht('Drydock Lease')) + ->setHeader($lease->getID()) + ->setHref('/drydock/lease/'.$lease->getID()); + } else { + return null; + } case self::TYPE_URI: return id(new PHUIObjectItemView()) ->setObjectName($data['name']) @@ -148,12 +152,25 @@ } public function releaseDrydockLease() { - $lease = $this->loadDrydockLease(); + try { + $lease = $this->loadDrydockLease(); + } catch (Exception $ex) { + // When a resource fails to allocate correctly, the resource + // is deleted in the database, which will cause loadDrydockLease + // to throw an exception. We ignore the exception since there's + // nothing to clean up if we don't have a valid lease / resource. + return; + } + $resource = $lease->getResource(); $blueprint = $resource->getBlueprint(); if ($lease->isActive()) { - $blueprint->releaseLease($resource, $lease); + try { + $blueprint->releaseLease($resource, $lease); + } catch (Exception $ex) { + // Ignore exception + } } }