diff --git a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php @@ -266,13 +266,26 @@ $resource->setAttribute('eip-status', 'Associating Elastic IP'); $resource->save(); - $result = $this->getAWSEC2Future() - ->setRawAWSQuery( - 'AssociateAddress', - array( - 'InstanceId' => $instance_id, - 'AllocationId' => $allocation_id)) - ->resolve(); + while (true) { + try { + $result = $this->getAWSEC2Future() + ->setRawAWSQuery( + 'AssociateAddress', + array( + 'InstanceId' => $instance_id, + 'AllocationId' => $allocation_id)) + ->resolve(); + break; + } catch (PhutilAWSException $exx) { + if (substr_count($exx->getMessage(), 'InvalidAllocationID.NotFound') > 0) { + // AWS eventual consistency. Wait a little while. + sleep(5); + continue; + } else { + throw $exx; + } + } + } $association_id = (string)$result->associationId; @@ -313,6 +326,8 @@ 'Terminated'); $resource->save(); + throw $ex; + throw new Exception( 'Unable to allocate an elastic IP for the new EC2 instance. '. 'Check your AWS account limits and ensure your limit on '. @@ -349,6 +364,7 @@ 'credential' => $resource->getAttribute('credential'), 'platform' => $resource->getAttribute('platform'))); $ssh->setConnectTimeout(60); + $ssh->setExecTimeout(60); $resource->setAttribute( 'aws-status', @@ -357,7 +373,12 @@ while (true) { try { - $ssh->getExecFuture('echo "test"')->resolvex(); + $ssh_future = $ssh->getExecFuture('echo "test"'); + $ssh_future->resolvex(); + if ($ssh_future->getWasKilledByTimeout()) { + throw new Exception('SSH execution timed out.'); + } + break; } catch (Exception $ex) { @@ -434,6 +455,7 @@ // Deallocate and release the public IP address if we allocated one. if ($resource->getAttribute('eip-allocated')) { + try { $this->getAWSEC2Future() ->setRawAWSQuery( 'DisassociateAddress', @@ -447,6 +469,14 @@ array( 'AllocationId' => $resource->getAttribute('eip-allocation-id'))) ->resolve(); + } catch (PhutilAWSException $ex) { + if (substr_count($ex->getMessage(), 'InvalidAssociationID.NotFound') > 0 || + substr_count($ex->getMessage(), 'InvalidAllocationID.NotFound') > 0) { + // TODO: Should we log this somewhere? + } else { + throw $ex; + } + } } // Terminate the EC2 instance. @@ -463,7 +493,9 @@ DrydockResource $resource, DrydockLease $lease) { - while ($resource->getStatus() == DrydockResourceStatus::STATUS_PENDING) { + while ($resource->getStatus() == DrydockResourceStatus::STATUS_PENDING || + $resource->getStatus() == DrydockResourceStatus::STATUS_ALLOCATING) { + // This resource is still being set up by another allocator, wait until // it is set to open. sleep(5); @@ -490,7 +522,24 @@ $cmd = $lease->getInterface('command'); - $cmd->execx('mkdir %s', $full_path); + $attempts = 10; + while ($attempts > 0) { + $attempts--; + try { + if ($platform === 'windows') { + $cmd->execx('mkdir -Force %s', $full_path); + } else { + $cmd->execx('mkdir %s', $full_path); + } + break; + } catch (Exception $ex) { + if ($attempts == 0) { + throw ex; + } + + sleep(5); + } + } $lease->setAttribute('path', $full_path); } diff --git a/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php @@ -29,20 +29,19 @@ } } + return true; + // Find the resource that has the least leases. $all_lease_counts_grouped = $context->getResourceLeaseCounts(); $minimum_lease_count = $all_lease_counts_grouped[$resource->getID()]; - $minimum_lease_resource_id = $resource->getID(); foreach ($all_lease_counts_grouped as $resource_id => $lease_count) { if ($minimum_lease_count > $lease_count) { $minimum_lease_count = $lease_count; - $minimum_lease_resource_id = $resource_id; } } - // If we are that resource, then allow it, otherwise let the other - // less-leased resource run through this logic and allocate the lease. - return $minimum_lease_resource_id === $resource->getID(); + // As long as we have the minimum lease count, allow the lease. + return $all_lease_counts_grouped[$resource->getID()] <= $minimum_lease_count; } protected function shouldCloseUnleasedResource( diff --git a/src/applications/drydock/controller/DrydockResourceCloseController.php b/src/applications/drydock/controller/DrydockResourceCloseController.php --- a/src/applications/drydock/controller/DrydockResourceCloseController.php +++ b/src/applications/drydock/controller/DrydockResourceCloseController.php @@ -23,7 +23,8 @@ $resource_uri = '/resource/'.$resource->getID().'/'; $resource_uri = $this->getApplicationURI($resource_uri); - if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN) { + if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN && + $resource->getStatus() != DrydockResourceStatus::STATUS_PENDING) { $dialog = id(new AphrontDialogView()) ->setUser($viewer) ->setTitle(pht('Resource Not Open')) diff --git a/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php b/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php --- a/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php +++ b/src/applications/drydock/interface/command/DrydockSSHCommandInterface.php @@ -4,6 +4,7 @@ private $passphraseSSHKey; private $connectTimeout; + private $execTimeout; private function openCredentialsIfNotOpen() { if ($this->passphraseSSHKey !== null) { @@ -31,6 +32,11 @@ return $this; } + public function setExecTimeout($timeout) { + $this->execTimeout = $timeout; + return $this; + } + public function getExecFuture($command) { $this->openCredentialsIfNotOpen(); @@ -137,6 +143,7 @@ $this->passphraseSSHKey->getUsernameEnvelope(), $this->getConfig('host'), $full_command); + $future->setTimeout($this->execTimeout); $future->setPowershellXML($this->getConfig('platform') === 'windows'); return $future; } diff --git a/src/applications/drydock/management/DrydockManagementSSHWorkflow.php b/src/applications/drydock/management/DrydockManagementSSHWorkflow.php --- a/src/applications/drydock/management/DrydockManagementSSHWorkflow.php +++ b/src/applications/drydock/management/DrydockManagementSSHWorkflow.php @@ -39,8 +39,6 @@ if (!$lease) { $console->writeErr("Lease %d does not exist!\n", $id); - } else if ($lease->getStatus() != DrydockLeaseStatus::STATUS_ACTIVE) { - $console->writeErr("Lease %d is not 'active'!\n", $id); } else { $interface = $lease->getInterface('command'); $future = $interface->getExecFuture('%C', $args->getArg('command')); diff --git a/src/applications/drydock/view/DrydockLogListView.php b/src/applications/drydock/view/DrydockLogListView.php --- a/src/applications/drydock/view/DrydockLogListView.php +++ b/src/applications/drydock/view/DrydockLogListView.php @@ -35,7 +35,7 @@ ), $log->getLeaseID()), $log->getMessage(), - phabricator_date($log->getEpoch(), $viewer), + phabricator_datetime($log->getEpoch(), $viewer), ); } diff --git a/src/applications/drydock/worker/DrydockAllocatorWorker.php b/src/applications/drydock/worker/DrydockAllocatorWorker.php --- a/src/applications/drydock/worker/DrydockAllocatorWorker.php +++ b/src/applications/drydock/worker/DrydockAllocatorWorker.php @@ -35,6 +35,12 @@ protected function doWork() { $lease = $this->loadLease(); + + if ($lease->getStatus() != DrydockLeaseStatus::STATUS_PENDING) { + // We can't handle non-pending leases. + return; + } + $this->logToDrydock('Allocating Lease'); try { @@ -115,14 +121,17 @@ } if (!$resource) { - // Attempt to use pending resources if we can. + // Attempt to use allocating and pending resources if we can. $pool = id(new DrydockResource())->loadAllWhere( - 'type = %s AND status = %s', + 'type = %s AND status IN (%Ld)', $lease->getResourceType(), - DrydockResourceStatus::STATUS_PENDING); + array( + DrydockResourceStatus::STATUS_ALLOCATING, + DrydockResourceStatus::STATUS_PENDING, + )); $this->logToDrydock( - pht('Found %d Pending Resource(s)', count($pool))); + pht('Found %d Allocating / Pending Resource(s)', count($pool))); $candidates = array(); foreach ($pool as $key => $candidate) { @@ -140,7 +149,7 @@ } $this->logToDrydock( - pht('%d Pending Resource(s) Remain', + pht('%d Allocating / Pending Resource(s) Remain', count($candidates))); $resource = null; @@ -275,6 +284,7 @@ $blueprint->allocateResource($resource, $lease); } catch (Exception $ex) { $resource->delete(); + $lease->delete(); throw $ex; } diff --git a/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php b/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php --- a/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php +++ b/src/applications/harbormaster/step/HarbormasterLeaseHostBuildStepImplementation.php @@ -29,11 +29,6 @@ ) + $custom_attributes) ->queueForActivation(); - // Wait until the lease is fulfilled. - // TODO: This will throw an exception if the lease can't be fulfilled; - // we should treat that as build failure not build error. - $lease->waitUntilActive(); - // Create the associated artifact. $artifact = $build->createArtifact( $build_target, @@ -42,6 +37,11 @@ $artifact->setArtifactData(array( 'drydock-lease' => $lease->getID())); $artifact->save(); + + // Wait until the lease is fulfilled. + // TODO: This will throw an exception if the lease can't be fulfilled; + // we should treat that as build failure not build error. + $lease->waitUntilActive(); } public function getArtifactOutputs() { diff --git a/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php b/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php --- a/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php +++ b/src/applications/harbormaster/storage/build/HarbormasterBuildArtifact.php @@ -134,7 +134,16 @@ } public function releaseDrydockLease() { - $lease = $this->loadDrydockLease(); + try { + $lease = $this->loadDrydockLease(); + } catch (Exception $ex) { + // When a resource fails to allocate correctly, the resource + // is deleted in the database, which will cause loadDrydockLease + // to throw an exception. We ignore the exception since there's + // nothing to clean up if we don't have a valid lease / resource. + return; + } + $resource = $lease->getResource(); $blueprint = $resource->getBlueprint();