Page MenuHomePhabricator

D21807.diff
No OneTemporary

D21807.diff

diff --git a/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php b/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php
--- a/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php
+++ b/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php
@@ -188,137 +188,360 @@
// First, try to find a suitable open resource which we can acquire a new
// lease on.
- $resources = $this->loadResourcesForAllocatingLease($blueprints, $lease);
+ $resources = $this->loadAcquirableResourcesForLease($blueprints, $lease);
- // If no resources exist yet, see if we can build one.
- if (!$resources) {
- $usable_blueprints = $this->removeOverallocatedBlueprints(
- $blueprints,
- $lease);
-
- // If we get nothing back here, some blueprint claims it can eventually
- // satisfy the lease, just not right now. This is a temporary failure,
- // and we expect allocation to succeed eventually.
- if (!$usable_blueprints) {
- $blueprints = $this->rankBlueprints($blueprints, $lease);
-
- // Try to actively reclaim unused resources. If we succeed, jump back
- // into the queue in an effort to claim it.
- foreach ($blueprints as $blueprint) {
- $reclaimed = $this->reclaimResources($blueprint, $lease);
- if ($reclaimed) {
- $lease->logEvent(
- DrydockLeaseReclaimLogType::LOGCONST,
- array(
- 'resourcePHIDs' => array($reclaimed->getPHID()),
- ));
-
- throw new PhabricatorWorkerYieldException(15);
- }
- }
+ list($free_resources, $used_resources) = $this->partitionResources(
+ $lease,
+ $resources);
+
+ $resource = $this->leaseAnyResource($lease, $free_resources);
+ if ($resource) {
+ return $resource;
+ }
+
+ // We're about to try creating a resource. If we're already creating
+ // something, just yield until that resolves.
+
+ $this->yieldForPendingResources($lease);
+
+ // We haven't been able to lease an existing resource yet, so now we try to
+ // create one. We may still have some less-desirable "used" resources that
+ // we'll sometimes try to lease later if we fail to allocate a new resource.
+
+ $resource = $this->newLeasedResource($lease, $blueprints);
+ if ($resource) {
+ return $resource;
+ }
+
+ // We haven't been able to lease a desirable "free" resource or create a
+ // new resource. Try to lease a "used" resource.
+
+ $resource = $this->leaseAnyResource($lease, $used_resources);
+ if ($resource) {
+ return $resource;
+ }
+
+ // If this lease has already triggered a reclaim, just yield and wait for
+ // it to resolve.
+ $this->yieldForReclaimingResources($lease);
+
+ // Try to reclaim a resource. This will yield if it reclaims something.
+ $this->reclaimAnyResource($lease, $blueprints);
+
+ // We weren't able to lease, create, or reclaim any resources. We just have
+ // to wait for resources to become available.
+
+ $lease->logEvent(
+ DrydockLeaseWaitingForResourcesLogType::LOGCONST,
+ array(
+ 'blueprintPHIDs' => mpull($blueprints, 'getPHID'),
+ ));
+
+ throw new PhabricatorWorkerYieldException(15);
+ }
+
+ private function reclaimAnyResource(DrydockLease $lease, array $blueprints) {
+ assert_instances_of($blueprints, 'DrydockBlueprint');
+
+ $blueprints = $this->rankBlueprints($blueprints, $lease);
+
+ // Try to actively reclaim unused resources. If we succeed, jump back
+ // into the queue in an effort to claim it.
+
+ foreach ($blueprints as $blueprint) {
+ $reclaimed = $this->reclaimResources($blueprint, $lease);
+ if ($reclaimed) {
$lease->logEvent(
- DrydockLeaseWaitingForResourcesLogType::LOGCONST,
+ DrydockLeaseReclaimLogType::LOGCONST,
array(
- 'blueprintPHIDs' => mpull($blueprints, 'getPHID'),
+ 'resourcePHIDs' => array($reclaimed->getPHID()),
));
+ // Yield explicitly here: we'll be awakened when the resource is
+ // reclaimed.
+
throw new PhabricatorWorkerYieldException(15);
}
+ }
+ }
- $usable_blueprints = $this->rankBlueprints($usable_blueprints, $lease);
-
- $exceptions = array();
- foreach ($usable_blueprints as $blueprint) {
- try {
- $resources[] = $this->allocateResource($blueprint, $lease);
-
- // Bail after allocating one resource, we don't need any more than
- // this.
- break;
- } catch (Exception $ex) {
- // This failure is not normally expected, so log it. It can be
- // caused by something mundane and recoverable, however (see below
- // for discussion).
-
- // We log to the blueprint separately from the log to the lease:
- // the lease is not attached to a blueprint yet so the lease log
- // will not show up on the blueprint; more than one blueprint may
- // fail; and the lease is not really impacted (and won't log) if at
- // least one blueprint actually works.
-
- $blueprint->logEvent(
- DrydockResourceAllocationFailureLogType::LOGCONST,
- array(
- 'class' => get_class($ex),
- 'message' => $ex->getMessage(),
- ));
+ private function yieldForPendingResources(DrydockLease $lease) {
+ // See T13677. If this lease has already triggered the allocation of
+ // one or more resources and they are still pending, just yield and
+ // wait for them.
- $exceptions[] = $ex;
- }
+ $viewer = $this->getViewer();
+
+ $phids = $lease->getAllocatedResourcePHIDs();
+ if (!$phids) {
+ return null;
+ }
+
+ $resources = id(new DrydockResourceQuery())
+ ->setViewer($viewer)
+ ->withPHIDs($phids)
+ ->withStatuses(
+ array(
+ DrydockResourceStatus::STATUS_PENDING,
+ ))
+ ->setLimit(1)
+ ->execute();
+ if (!$resources) {
+ return;
+ }
+
+ $lease->logEvent(
+ DrydockLeaseWaitingForActivationLogType::LOGCONST,
+ array(
+ 'resourcePHIDs' => mpull($resources, 'getPHID'),
+ ));
+
+ throw new PhabricatorWorkerYieldException(15);
+ }
+
+ private function yieldForReclaimingResources(DrydockLease $lease) {
+ $viewer = $this->getViewer();
+
+ $phids = $lease->getReclaimedResourcePHIDs();
+ if (!$phids) {
+ return;
+ }
+
+ $resources = id(new DrydockResourceQuery())
+ ->setViewer($viewer)
+ ->withPHIDs($phids)
+ ->withStatuses(
+ array(
+ DrydockResourceStatus::STATUS_ACTIVE,
+ ))
+ ->setLimit(1)
+ ->execute();
+ if (!$resources) {
+ return;
+ }
+
+ $lease->logEvent(
+ DrydockLeaseWaitingForReclamationLogType::LOGCONST,
+ array(
+ 'resourcePHIDs' => mpull($resources, 'getPHID'),
+ ));
+
+ throw new PhabricatorWorkerYieldException(15);
+ }
+
+ private function newLeasedResource(
+ DrydockLease $lease,
+ array $blueprints) {
+ assert_instances_of($blueprints, 'DrydockBlueprint');
+
+ $usable_blueprints = $this->removeOverallocatedBlueprints(
+ $blueprints,
+ $lease);
+
+ // If we get nothing back here, some blueprint claims it can eventually
+ // satisfy the lease, just not right now. This is a temporary failure,
+ // and we expect allocation to succeed eventually.
+
+ // Return, try to lease a "used" resource, and continue from there.
+
+ if (!$usable_blueprints) {
+ return null;
+ }
+
+ $usable_blueprints = $this->rankBlueprints($usable_blueprints, $lease);
+
+ $new_resources = $this->newResources($lease, $usable_blueprints);
+ if (!$new_resources) {
+ // If we were unable to create any new resources, return and
+ // try to lease a "used" resource.
+ return null;
+ }
+
+ $new_resources = $this->removeUnacquirableResources(
+ $new_resources,
+ $lease);
+ if (!$new_resources) {
+ // If we make it here, we just built a resource but aren't allowed
+ // to acquire it. We expect this to happen if the resource prevents
+ // acquisition until it activates, which is common when a resource
+ // needs to perform setup steps.
+
+ // Explicitly yield and wait for activation, since we don't want to
+ // lease a "used" resource.
+
+ throw new PhabricatorWorkerYieldException(15);
+ }
+
+ $resource = $this->leaseAnyResource($lease, $new_resources);
+ if ($resource) {
+ return $resource;
+ }
+
+ // We may not be able to lease a resource even if we just built it:
+ // another process may snatch it up before we can lease it. This should
+ // be rare, but is not concerning. Just try to build another resource.
+
+ // We likely could try to build the next resource immediately, but err on
+ // the side of caution and yield for now, at least until this code is
+ // better vetted.
+
+ throw new PhabricatorWorkerYieldException(15);
+ }
+
+ private function partitionResources(
+ DrydockLease $lease,
+ array $resources) {
+
+ assert_instances_of($resources, 'DrydockResource');
+ $viewer = $this->getViewer();
+
+ $lease_statuses = array(
+ DrydockLeaseStatus::STATUS_PENDING,
+ DrydockLeaseStatus::STATUS_ACQUIRED,
+ DrydockLeaseStatus::STATUS_ACTIVE,
+ );
+
+ // Partition resources into "free" resources (which we can try to lease
+ // immediately) and "used" resources, which we can only to lease after we
+ // fail to allocate a new resource.
+
+ // "Free" resources are unleased and/or prefer reuse over allocation.
+ // "Used" resources are leased and prefer allocation over reuse.
+
+ $free_resources = array();
+ $used_resources = array();
+
+ foreach ($resources as $resource) {
+ $blueprint = $resource->getBlueprint();
+
+ if (!$blueprint->shouldAllocateSupplementalResource($resource, $lease)) {
+ $free_resources[] = $resource;
+ continue;
}
- if (!$resources) {
- // If one or more blueprints claimed that they would be able to
- // allocate resources but none are actually able to allocate resources,
- // log the failure and yield so we try again soon.
+ $leases = id(new DrydockLeaseQuery())
+ ->setViewer($viewer)
+ ->withResourcePHIDs(array($resource->getPHID()))
+ ->withStatuses($lease_statuses)
+ ->setLimit(1)
+ ->execute();
+ if (!$leases) {
+ $free_resources[] = $resource;
+ continue;
+ }
- // This can happen if some unexpected issue occurs during allocation
- // (for example, a call to build a VM fails for some reason) or if we
- // raced another allocator and the blueprint is now full.
+ $used_resources[] = $resource;
+ }
- $ex = new PhutilAggregateException(
- pht(
- 'All blueprints failed to allocate a suitable new resource when '.
- 'trying to allocate lease ("%s").',
- $lease->getPHID()),
- $exceptions);
+ return array($free_resources, $used_resources);
+ }
- $lease->logEvent(
- DrydockLeaseAllocationFailureLogType::LOGCONST,
+ private function newResources(
+ DrydockLease $lease,
+ array $blueprints) {
+ assert_instances_of($blueprints, 'DrydockBlueprint');
+
+ $resources = array();
+ $exceptions = array();
+ foreach ($blueprints as $blueprint) {
+ $caught = null;
+ try {
+ $resources[] = $this->allocateResource($blueprint, $lease);
+
+ // Bail after allocating one resource, we don't need any more than
+ // this.
+ break;
+ } catch (Exception $ex) {
+ $caught = $ex;
+ } catch (Throwable $ex) {
+ $caught = $ex;
+ }
+
+ if ($caught) {
+ // This failure is not normally expected, so log it. It can be
+ // caused by something mundane and recoverable, however (see below
+ // for discussion).
+
+ // We log to the blueprint separately from the log to the lease:
+ // the lease is not attached to a blueprint yet so the lease log
+ // will not show up on the blueprint; more than one blueprint may
+ // fail; and the lease is not really impacted (and won't log) if at
+ // least one blueprint actually works.
+
+ $blueprint->logEvent(
+ DrydockResourceAllocationFailureLogType::LOGCONST,
array(
- 'class' => get_class($ex),
- 'message' => $ex->getMessage(),
+ 'class' => get_class($caught),
+ 'message' => $caught->getMessage(),
));
- throw new PhabricatorWorkerYieldException(15);
+ $exceptions[] = $caught;
}
+ }
- $resources = $this->removeUnacquirableResources($resources, $lease);
- if (!$resources) {
- // If we make it here, we just built a resource but aren't allowed
- // to acquire it. We expect this during routine operation if the
- // resource prevents acquisition until it activates. Yield and wait
- // for activation.
- throw new PhabricatorWorkerYieldException(15);
- }
+ if (!$resources) {
+ // If one or more blueprints claimed that they would be able to allocate
+ // resources but none are actually able to allocate resources, log the
+ // failure and yield so we try again soon.
+
+ // This can happen if some unexpected issue occurs during allocation
+ // (for example, a call to build a VM fails for some reason) or if we
+ // raced another allocator and the blueprint is now full.
+
+ $ex = new PhutilAggregateException(
+ pht(
+ 'All blueprints failed to allocate a suitable new resource when '.
+ 'trying to allocate lease ("%s").',
+ $lease->getPHID()),
+ $exceptions);
- // NOTE: We have not acquired the lease yet, so it is possible that the
- // resource we just built will be snatched up by some other lease before
- // we can acquire it. This is not problematic: we'll retry a little later
- // and should succeed eventually.
+ $lease->logEvent(
+ DrydockLeaseAllocationFailureLogType::LOGCONST,
+ array(
+ 'class' => get_class($ex),
+ 'message' => $ex->getMessage(),
+ ));
+
+ return null;
+ }
+
+ return $resources;
+ }
+
+
+ private function leaseAnyResource(
+ DrydockLease $lease,
+ array $resources) {
+ assert_instances_of($resources, 'DrydockResource');
+
+ if (!$resources) {
+ return null;
}
$resources = $this->rankResources($resources, $lease);
$exceptions = array();
$yields = array();
- $allocated = false;
+
+ $allocated = null;
foreach ($resources as $resource) {
try {
- $resource = $this->newResourceForAcquisition($resource, $lease);
$this->acquireLease($resource, $lease);
- $allocated = true;
+ $allocated = $resource;
break;
} catch (DrydockResourceLockException $ex) {
// We need to lock the resource to actually acquire it. If we aren't
// able to acquire the lock quickly enough, we can yield and try again
// later.
$yields[] = $ex;
+ } catch (DrydockSlotLockException $ex) {
+ // This also just indicates we ran into some kind of contention,
+ // probably from another lease. Just yield.
+ $yields[] = $ex;
} catch (DrydockAcquiredBrokenResourceException $ex) {
// If a resource was reclaimed or destroyed by the time we actually
- // got around to acquiring it, we just got unlucky. We can yield and
- // try again later.
+ // got around to acquiring it, we just got unlucky.
$yields[] = $ex;
} catch (PhabricatorWorkerYieldException $ex) {
// We can be told to yield, particularly by the supplemental allocator
@@ -329,17 +552,19 @@
}
}
- if (!$allocated) {
- if ($yields) {
- throw new PhabricatorWorkerYieldException(15);
- } else {
- throw new PhutilAggregateException(
- pht(
- 'Unable to acquire lease "%s" on any resource.',
- $lease->getPHID()),
- $exceptions);
- }
+ if ($allocated) {
+ return $allocated;
+ }
+
+ if ($yields) {
+ throw new PhabricatorWorkerYieldException(15);
}
+
+ throw new PhutilAggregateException(
+ pht(
+ 'Unable to acquire lease "%s" on any resource.',
+ $lease->getPHID()),
+ $exceptions);
}
@@ -426,7 +651,7 @@
* the lease.
* @task allocator
*/
- private function loadResourcesForAllocatingLease(
+ private function loadAcquirableResourcesForLease(
array $blueprints,
DrydockLease $lease) {
assert_instances_of($blueprints, 'DrydockBlueprint');
@@ -438,7 +663,6 @@
->withTypes(array($lease->getResourceType()))
->withStatuses(
array(
- DrydockResourceStatus::STATUS_PENDING,
DrydockResourceStatus::STATUS_ACTIVE,
))
->execute();
@@ -558,6 +782,13 @@
// If this resource was allocated as a pending resource, queue a task to
// activate it.
if ($resource->getStatus() == DrydockResourceStatus::STATUS_PENDING) {
+
+ $lease->addAllocatedResourcePHIDs(
+ array(
+ $resource->getPHID(),
+ ));
+ $lease->save();
+
PhabricatorWorker::scheduleTask(
'DrydockResourceUpdateWorker',
array(
@@ -636,26 +867,6 @@
DrydockLease $lease) {
$viewer = $this->getViewer();
- // If this lease is marked as already in the process of reclaiming a
- // resource, don't let it reclaim another one until the first reclaim
- // completes. This stops one lease from reclaiming a large number of
- // resources if the reclaims take a while to complete.
- $reclaiming_phid = $lease->getAttribute('drydock.reclaimingPHID');
- if ($reclaiming_phid) {
- $reclaiming_resource = id(new DrydockResourceQuery())
- ->setViewer($viewer)
- ->withPHIDs(array($reclaiming_phid))
- ->withStatuses(
- array(
- DrydockResourceStatus::STATUS_ACTIVE,
- DrydockResourceStatus::STATUS_RELEASED,
- ))
- ->executeOne();
- if ($reclaiming_resource) {
- return null;
- }
- }
-
$resources = id(new DrydockResourceQuery())
->setViewer($viewer)
->withBlueprintPHIDs(array($blueprint->getPHID()))
@@ -754,73 +965,6 @@
}
}
- private function newResourceForAcquisition(
- DrydockResource $resource,
- DrydockLease $lease) {
-
- // If the resource has no leases against it, never build a new one. This is
- // likely already a new resource that just activated.
- $viewer = $this->getViewer();
-
- $statuses = array(
- DrydockLeaseStatus::STATUS_PENDING,
- DrydockLeaseStatus::STATUS_ACQUIRED,
- DrydockLeaseStatus::STATUS_ACTIVE,
- );
-
- $leases = id(new DrydockLeaseQuery())
- ->setViewer($viewer)
- ->withResourcePHIDs(array($resource->getPHID()))
- ->withStatuses($statuses)
- ->setLimit(1)
- ->execute();
- if (!$leases) {
- return $resource;
- }
-
- // If we're about to get a lease on a resource, check if the blueprint
- // wants to allocate a supplemental resource. If it does, try to perform a
- // new allocation instead.
- $blueprint = $resource->getBlueprint();
- if (!$blueprint->shouldAllocateSupplementalResource($resource, $lease)) {
- return $resource;
- }
-
- // If the blueprint is already overallocated, we can't allocate a new
- // resource. Just return the existing resource.
- $remaining = $this->removeOverallocatedBlueprints(
- array($blueprint),
- $lease);
- if (!$remaining) {
- return $resource;
- }
-
- // Try to build a new resource.
- try {
- $new_resource = $this->allocateResource($blueprint, $lease);
- } catch (Exception $ex) {
- $blueprint->logEvent(
- DrydockResourceAllocationFailureLogType::LOGCONST,
- array(
- 'class' => get_class($ex),
- 'message' => $ex->getMessage(),
- ));
-
- return $resource;
- }
-
- // If we can't actually acquire the new resource yet, just yield.
- // (We could try to move forward with the original resource instead.)
- $acquirable = $this->removeUnacquirableResources(
- array($new_resource),
- $lease);
- if (!$acquirable) {
- throw new PhabricatorWorkerYieldException(15);
- }
-
- return $new_resource;
- }
-
/* -( Activating Leases )-------------------------------------------------- */
diff --git a/src/applications/drydock/worker/DrydockWorker.php b/src/applications/drydock/worker/DrydockWorker.php
--- a/src/applications/drydock/worker/DrydockWorker.php
+++ b/src/applications/drydock/worker/DrydockWorker.php
@@ -260,7 +260,7 @@
// Mark the lease as reclaiming this resource. It won't be allowed to start
// another reclaim as long as this resource is still in the process of
// being reclaimed.
- $lease->setAttribute('drydock.reclaimingPHID', $resource->getPHID());
+ $lease->addReclaimedResourcePHIDs(array($resource->getPHID()));
// When the resource releases, we we want to reawaken this task since it
// should (usually) be able to start building a new resource right away.

File Metadata

Mime Type
text/plain
Expires
Tue, Mar 25, 7:02 AM (5 d, 15 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7389960
Default Alt Text
D21807.diff (21 KB)

Event Timeline