diff --git a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockAmazonEC2HostBlueprintImplementation.php @@ -39,6 +39,14 @@ $lease->getAttributes(), $this->getDetail('attributes')); + if ($platform_match && $custom_match) { + $this->log(pht( + 'This blueprint can allocate a resource for the specified lease.')); + } else { + $this->log(pht( + 'This blueprint can not allocate a resource for the specified lease.')); + } + return $platform_match && $custom_match; } @@ -52,6 +60,14 @@ $lease->getAttributes(), $this->getDetail('attributes')); + if ($platform_match && $custom_match) { + $this->log(pht( + 'This blueprint can allocate the specified lease.')); + } else { + $this->log(pht( + 'This blueprint can not allocate the specified lease.')); + } + return $platform_match && $custom_match; } @@ -71,6 +87,10 @@ ->withPHIDs(array($this->getDetail('keypair'))) ->executeOne(); + $this->log(pht( + 'Using credential %d to allocate.', + $credential->getID())); + try { $existing_keys = $this->getAWSEC2Future() ->setRawAWSQuery( @@ -78,8 +98,11 @@ array( 'KeyName.0' => $this->getAWSKeyPairName(),)) ->resolve(); + + $this->log(pht('Credential\'s public key already exists in Amazon.')); } catch (PhutilAWSException $ex) { // The key pair does not exist, so we need to import it. + $this->log(pht('Credential\'s public key does not exist in Amazon.')); $type = PassphraseCredentialType::getTypeByConstant( $credential->getCredentialType()); @@ -102,6 +125,8 @@ 'KeyName' => $this->getAWSKeyPairName(), 'PublicKeyMaterial' => base64_encode($public_key),)) ->resolve(); + + $this->log(pht('Imported key pair to Amazon.')); } $settings = array( @@ -120,6 +145,7 @@ if (!$this->getDetail('skip-ssh-setup-windows')) { if ($this->getDetail('platform') === 'windows') { + $this->log(pht('Enabled SSH automatic configuration for Windows.')); $settings['UserData'] = id(new WindowsZeroConf()) ->getEncodedUserData($credential); } @@ -128,8 +154,14 @@ if ($this->getDetail('spot-enabled') && $this->getDetail('spot-price') !== null) { + $this->log(pht( + 'Spot price allocation is enabled, at a price of %f.', + $this->getDetail('spot-price'))); + + $spot_price = $this->getDetail('spot-price'); + $spot_settings = array( - 'SpotPrice' => $this->getDetail('spot-price'), + 'SpotPrice' => $spot_price, 'InstanceCount' => 1, 'Type' => 'one-time',); @@ -137,6 +169,9 @@ $spot_settings['LaunchSpecification.'.$key] = $value; } + $this->log(pht( + 'Requesting spot instance from Amazon.')); + $result = $this->getAWSEC2Future() ->setRawAWSQuery( 'RequestSpotInstances', @@ -146,6 +181,9 @@ $spot_request = $result->spotInstanceRequestSet->item[0]; $spot_request_id = (string)$spot_request->spotInstanceRequestId; + $this->log(pht( + 'Spot instance request ID is %s', $spot_request_id)); + // Wait until the spot instance request is fulfilled. while (true) { try { @@ -159,14 +197,66 @@ // AWS does not provide immediate consistency, so this may throw // "spot request does not exist" right after requesting spot // instances. + $this->log(pht( + 'Spot instance request could not be found (due '. + 'to eventual consistency), trying again in 5 seconds.')); + sleep(5); + continue; } $spot_request = $result->spotInstanceRequestSet->item[0]; $spot_state = (string)$spot_request->state; + $spot_status = (string)$spot_request->status->code; + + $this->log(pht( + 'Spot instance request is currently in state \'%s\' '. + 'with status \'%s\'.', + $spot_state, + $spot_status)); if ($spot_state == 'open') { + // Check to see if the price is too low for the request to be + // fulfilled. + if ($spot_status == 'price-too-low') { + // The price is too low. + $message = pht( + 'The spot instance price used to launch this instance of $%f is '. + 'too low in order for the request to be satisified. Change the '. + 'blueprint configuration to a higher price. The Amazon status '. + 'message is \'%s\'', + $spot_price, + (string)$spot_request->status->message); + + $this->log($message); + + $this->log(pht( + 'Cancelling spot instance request \'%s\'.', + $spot_request_id)); + + try { + $result = $this->getAWSEC2Future() + ->setRawAWSQuery( + 'CancelSpotInstanceRequests', + array( + 'SpotInstanceRequestId.0' => $spot_request_id,)) + ->resolve(); + } catch (PhutilAWSException $ex) { + $this->log(pht( + 'Unable to cancel spot request \'%s\'; cancel it from the '. + 'AWS console instead. The error was \'%s\'', + $spot_request_id, + $ex->getMessage())); + } + + $this->log(pht( + 'Spot request \'%s\' cancelled.', + $spot_request_id)); + + throw new Exception($message); + } + // We are waiting for the request to be fulfilled. sleep(5); continue; @@ -176,18 +266,54 @@ break; } else { // The spot request is closed, cancelled or failed. - throw new Exception( + $message = pht( 'Requested a spot instance, but the request is in state '. - '"'.$spot_state.'". This may occur when the current bid '. - 'price exceeds your maximum bid price ('. - $this->getDetail('spot-price'). - ').'); + '\'%s\'. It is likely caused when a spot instance request '. + 'is cancelled from the Amazon Web Console. It may occur '. + 'when the current bid price exceeds your maximum bid price ($%f).', + $spot_state, + $spot_price); + + $this->log($message); + + $this->log(pht( + 'Cancelling spot instance request \'%s\'.', + $spot_request_id)); + + try { + $result = $this->getAWSEC2Future() + ->setRawAWSQuery( + 'CancelSpotInstanceRequests', + array( + 'SpotInstanceRequestId.0' => $spot_request_id,)) + ->resolve(); + } catch (PhutilAWSException $ex) { + $this->log(pht( + 'Unable to cancel spot request \'%s\'; cancel it from the '. + 'AWS console instead. The error was \'%s\'', + $spot_request_id, + $ex->getMessage())); + } + + $this->log(pht( + 'Spot request \'%s\' cancelled.', + $spot_request_id)); + + throw new Exception($message); } } + + $this->log(pht( + 'Spot instance request has been fulfilled, and the instance ID is %s.', + $instance_id)); + } else { $settings['MinCount'] = 1; $settings['MaxCount'] = 1; + $this->log(pht( + 'Requesting on-demand instance from Amazon.')); + $result = $this->getAWSEC2Future() ->setRawAWSQuery( 'RunInstances', @@ -196,6 +322,10 @@ $instance = $result->instancesSet->item[0]; $instance_id = (string)$instance->instanceId; + + $this->log(pht( + 'The instance that was started is %s.', + $instance_id)); } // Allocate the resource and place it into Pending status while @@ -212,6 +342,12 @@ 'aws-status' => 'Instance Requested',)) ->save(); + $this->log(pht( + 'Updated the Drydock resource with the instance information.')); + + $this->log(pht( + 'Waiting for the instance to start according to Amazon')); + // Wait until the instance has started. while (true) { try { @@ -233,9 +369,13 @@ break; } else { // Instance is shutting down or is otherwise terminated. - throw new Exception( - 'Allocated instance, but ended up in unexpected state \''. - $instance_state.'\'!'); + $message = pht( + 'Allocated instance, but ended up in unexpected state \'%s\'! '. + 'Did someone terminate it from the Amazon Web Console?', + $instance_state); + + $this->log($message); + throw new Exception($message); } } catch (PhutilAWSException $ex) { // TODO: This can happen because the instance doesn't exist yet, but @@ -245,12 +385,18 @@ } } + $this->log(pht( + 'Instance has started in Amazon')); + $resource->setAttribute('aws-status', 'Started in Amazon'); $resource->save(); // Calculate the IP address of the instance. $address = ''; if ($this->getDetail('allocate-elastic-ip')) { + $this->log(pht( + 'Allocating an Elastic IP as requested')); + $resource->setAttribute('eip-status', 'Allocating Elastic IP'); $resource->setAttribute('eip-allocated', true); $resource->save(); @@ -333,13 +479,19 @@ 'Terminated'); $resource->save(); - throw new Exception( + $message = 'Unable to allocate an elastic IP for the new EC2 instance. '. 'Check your AWS account limits and ensure your limit on '. 'elastic IP addresses is high enough to complete the '. - 'resource allocation'); + 'resource allocation'; + + $this->log($message); + throw new Exception($message); } } else { + $this->log(pht( + 'Not allocating an elastic IP for this instance')); + $resource->setAttribute('eip-allocated', false); // Use the private IP address. @@ -361,6 +513,9 @@ $resource->setAttribute('port', 22); $resource->save(); + $this->log(pht( + 'Waiting for a successful SSH connection')); + // Wait until we get a successful SSH connection. $ssh = id(new DrydockSSHCommandInterface()) ->setConfiguration(array( @@ -378,6 +533,10 @@ while (true) { try { + $this->log(pht( + 'Attempting to connect to \'%s\' via SSH', + $instance_id)); + $ssh_future = $ssh->getExecFuture('echo "test"'); $ssh_future->resolvex(); if ($ssh_future->getWasKilledByTimeout()) { @@ -400,9 +559,18 @@ $instance = $reservation->instancesSet->item[0]; $instance_state = (string)$instance->instanceState->name; + $this->log(pht( + 'SSH connection not yet ready; instance is in state \'%s\'', + $instance_state)); + if ($instance_state === 'shutting-down' || $instance_state === 'terminated') { + $this->log(pht( + 'Instance has ended up in state \'%s\' while waiting for an '. + 'SSH connection', + $instance_state)); + // Deallocate and release the public IP address if we allocated one. if ($resource->getAttribute('eip-allocated')) { try { @@ -451,27 +619,37 @@ 'aws-status', 'Ready for Use'); $resource->save(); + + $this->log(pht( + 'Resource is now ready for use.')); + return $resource; } protected function executeCloseResource(DrydockResource $resource) { + $this->log(pht( + 'Closing EC2 resource.')); + // Deallocate and release the public IP address if we allocated one. if ($resource->getAttribute('eip-allocated')) { try { - $this->getAWSEC2Future() - ->setRawAWSQuery( - 'DisassociateAddress', - array( - 'AssociationId' => $resource->getAttribute('eip-association-id'),)) - ->resolve(); + $this->log(pht( + 'Elastic IPs are allocated, so releasing them.')); - $this->getAWSEC2Future() - ->setRawAWSQuery( - 'ReleaseAddress', - array( - 'AllocationId' => $resource->getAttribute('eip-allocation-id'),)) - ->resolve(); + $this->getAWSEC2Future() + ->setRawAWSQuery( + 'DisassociateAddress', + array( + 'AssociationId' => $resource->getAttribute('eip-association-id'),)) + ->resolve(); + + $this->getAWSEC2Future() + ->setRawAWSQuery( + 'ReleaseAddress', + array( + 'AllocationId' => $resource->getAttribute('eip-allocation-id'),)) + ->resolve(); } catch (PhutilAWSException $ex) { if (substr_count( $ex->getMessage(), @@ -484,6 +662,10 @@ } } + $this->log(pht( + 'Requesting instance \'%s\' be terminated.', + $resource->getAttribute('instance-id'))); + try { // Terminate the EC2 instance. $this->getAWSEC2Future() @@ -506,13 +688,29 @@ DrydockResource $resource, DrydockLease $lease) { + $this->log(pht( + 'Starting acquisition of lease from resource %d', + $resource->getID())); + while ($resource->getStatus() == DrydockResourceStatus::STATUS_PENDING) { + $this->log(pht( + 'Resource %d is still pending, waiting until it is in an open status', + $resource->getID())); + // This resource is still being set up by another allocator, wait until // it is set to open. sleep(5); $resource->reload(); } + if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN) { + $message = pht( + 'Resource %d did not move into an open status', + $resource->getID()); + $this->log($message); + throw new Exception($message); + } + $platform = $resource->getAttribute('platform'); $path = $resource->getAttribute('path'); @@ -533,6 +731,11 @@ $cmd = $lease->getInterface('command'); + $this->log(pht( + 'Attempting to create directory \'%s\' on resource %d', + $full_path, + $resource->getID())); + $attempts = 10; while ($attempts > 0) { $attempts--; @@ -553,16 +756,29 @@ } $lease->setAttribute('path', $full_path); + + $this->log(pht( + 'Lease %d acquired on resource %d', + $lease->getID(), + $resource->getID())); } protected function executeReleaseLease( DrydockResource $resource, DrydockLease $lease) { + $this->log(pht( + 'Releasing lease %d', + $lease->getID())); + $cmd = $lease->getInterface('command'); $path = $lease->getAttribute('path'); try { + $this->log(pht( + 'Removing contents of \'%s\' on host', + $path)); + if ($resource->getAttribute('platform') !== 'windows') { $cmd->execx('rm -rf %s', $path); } else { @@ -572,6 +788,11 @@ // We try to clean up, but sometimes files are locked or still in // use (this is far more common on Windows). There's nothing we can // do about this, so we ignore it. + $this->log(pht( + 'An exception occurred while removing files on the host. This can '. + 'occur when files are locked by the operating system. The exception '. + 'message was \'%s\'.', + $ex->getMessage())); } } diff --git a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php @@ -344,6 +344,7 @@ if ($context->getCurrentResourceLeaseCount() === 0) { if ($this->shouldCloseUnleasedResource($context, $resource)) { DrydockBlueprintImplementation::writeLog( + $this->instance, $resource, null, pht('Closing resource because it has no more active leases')); @@ -424,7 +425,11 @@ $lease->setStatus(DrydockLeaseStatus::STATUS_RELEASED); break; } - DrydockBlueprintImplementation::writeLog($resource, $lease, $message); + DrydockBlueprintImplementation::writeLog( + $this->instance, + $resource, + $lease, + $message); $lease->save(); } @@ -439,7 +444,7 @@ DrydockResource $resource, DrydockLease $lease) { - $scope = $this->pushActiveScope(null, $lease); + $scope = $this->pushActiveScope($resource, $lease); $this->log( pht( diff --git a/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockMinMaxBlueprintImplementation.php @@ -16,6 +16,13 @@ // If the current resource can allocate a lease, allow it. if ($context->getCurrentResourceLeaseCount() < $this->getDetail('leases-per-resource')) { + + $this->log(pht( + 'Resource %d has %d leases, which is less '. + 'than the maximum of %d leases', + $resource->getID(), + $context->getCurrentResourceLeaseCount(), + $this->getDetail('leases-per-resource'))); return true; } @@ -25,6 +32,15 @@ $open_count = $context->getBlueprintOpenResourceCount(); if ($open_count < $this->getDetail('max-count')) { if ($this->getDetail('max-count') !== null) { + + $this->log(pht( + 'Resource %d has %d leases, which is equal '. + 'to or greater than than %d. This blueprint '. + 'can still allocate more resources, so will not lease '. + 'against this resource.', + $resource->getID(), + $context->getCurrentResourceLeaseCount(), + $this->getDetail('leases-per-resource'))); return false; } } @@ -40,6 +56,11 @@ } } + $this->log(pht( + 'Resource %d has the lowest number of leases, so that is the resource '. + 'that will be leased against.', + $minimum_lease_resource_id)); + // If we are that resource, then allow it, otherwise let the other // less-leased resource run through this logic and allocate the lease. return $minimum_lease_resource_id === $resource->getID(); diff --git a/src/applications/drydock/blueprint/DrydockMinMaxExpiryBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockMinMaxExpiryBlueprintImplementation.php --- a/src/applications/drydock/blueprint/DrydockMinMaxExpiryBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockMinMaxExpiryBlueprintImplementation.php @@ -13,6 +13,11 @@ $expiry = $this->getDetail('expiry'); if ($expiry === null) { + $this->log(pht( + 'Can allocate if %d is less than %d', + count($pool), + $max_count)); + return count($pool) < $max_count; } @@ -28,6 +33,11 @@ } } + $this->log(pht( + 'Can allocate if %d is less than %d', + $count, + $max_count)); + return $count < $max_count; } @@ -44,6 +54,10 @@ // hit `shouldCloseUnleasedResource` in the future and the resource will // be closed. if ($context->getCurrentResourceLeaseCount() === 0) { + $this->log(pht( + 'No leases against resource %d; bypassing expiry checks', + $resource->getID())); + return parent::shouldAllocateLease($context, $resource, $lease); } @@ -55,6 +69,11 @@ if ($lifetime > $expiry) { // Prevent allocation of leases to this resource, since it's over // it's lifetime allowed. + $this->log(pht( + 'Expiry has exceeded on resource %d, '. + 'will not allow this to be leased against', + $resource->getID())); + return false; } } @@ -72,6 +91,10 @@ $lifetime = time() - $resource->getDateCreated(); if ($lifetime > $expiry) { + $this->log(pht( + 'Current lifetime of resource exceeds expiry; triggering close', + $resource->getID())); + // Force closure of resources that have expired. return true; } diff --git a/src/applications/drydock/controller/DrydockResourceCloseController.php b/src/applications/drydock/controller/DrydockResourceCloseController.php --- a/src/applications/drydock/controller/DrydockResourceCloseController.php +++ b/src/applications/drydock/controller/DrydockResourceCloseController.php @@ -24,6 +24,7 @@ $resource_uri = $this->getApplicationURI($resource_uri); if ($resource->getStatus() != DrydockResourceStatus::STATUS_OPEN && + $resource->getStatus() != DrydockResourceStatus::STATUS_ALLOCATING && $resource->getStatus() != DrydockResourceStatus::STATUS_PENDING) { $dialog = id(new AphrontDialogView()) ->setUser($viewer) diff --git a/src/applications/drydock/storage/DrydockLog.php b/src/applications/drydock/storage/DrydockLog.php --- a/src/applications/drydock/storage/DrydockLog.php +++ b/src/applications/drydock/storage/DrydockLog.php @@ -77,14 +77,20 @@ if ($this->getResource()) { return $this->getResource()->getPolicy($capability); } - return $this->getLease()->getPolicy($capability); + if ($this->getLease()) { + return $this->getLease()->getPolicy($capability); + } + return PhabricatorPolicies::POLICY_ADMIN; } public function hasAutomaticCapability($capability, PhabricatorUser $viewer) { if ($this->getResource()) { return $this->getResource()->hasAutomaticCapability($capability, $viewer); } - return $this->getLease()->hasAutomaticCapability($capability, $viewer); + if ($this->getLease()) { + return $this->getLease()->hasAutomaticCapability($capability, $viewer); + } + return PhabricatorPolicies::POLICY_ADMIN; } public function describeAutomaticCapability($capability) { diff --git a/src/applications/drydock/worker/DrydockAllocatorWorker.php b/src/applications/drydock/worker/DrydockAllocatorWorker.php --- a/src/applications/drydock/worker/DrydockAllocatorWorker.php +++ b/src/applications/drydock/worker/DrydockAllocatorWorker.php @@ -277,6 +277,10 @@ ->setStatus(DrydockResourceStatus::STATUS_ALLOCATING) ->save(); + $this->logToDrydock( + pht('Created resources %d in \'allocating\' status.', + $resource->getID())); + // Pre-emptively allocate the lease on the resource inside the lock, // to ensure that other allocators don't cause this worker to lose // an allocation race. If we fail to allocate a lease here, then the @@ -297,6 +301,9 @@ // a resource allocated by a blueprint can have the lease allocated // against it). // + $this->logToDrydock( + pht('Pre-emptively allocating the lease against the new resource.')); + if (!$blueprint->allocateLease($resource, $lease)) { throw new Exception( 'Blueprint allocated a resource, but can\'t lease against it.'); @@ -309,6 +316,10 @@ } try { + $this->logToDrydock(pht( + 'Allocating resource using blueprint \'%s\'.', + $blueprint->getInstance()->getBlueprintName())); + $blueprint->allocateResource($resource, $lease); } catch (Exception $ex) { $resource->delete(); @@ -336,6 +347,12 @@ // } + $this->logToDrydock(pht( + 'Acquiring lease %d on resource %d using blueprint \'%s\'.', + $lease->getID(), + $resource->getID(), + $blueprint->getInstance()->getBlueprintName())); + $blueprint = $resource->getBlueprint(); $blueprint->acquireLease($resource, $lease); }