diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2241,6 +2241,7 @@ 'PhabricatorDashboardViewController' => 'applications/dashboard/controller/PhabricatorDashboardViewController.php', 'PhabricatorDataCacheSpec' => 'applications/cache/spec/PhabricatorDataCacheSpec.php', 'PhabricatorDataNotAttachedException' => 'infrastructure/storage/lisk/PhabricatorDataNotAttachedException.php', + 'PhabricatorDatabaseHealthRecord' => 'infrastructure/cluster/PhabricatorDatabaseHealthRecord.php', 'PhabricatorDatabaseRef' => 'infrastructure/cluster/PhabricatorDatabaseRef.php', 'PhabricatorDatabaseSetupCheck' => 'applications/config/check/PhabricatorDatabaseSetupCheck.php', 'PhabricatorDatasourceEditField' => 'applications/transactions/editfield/PhabricatorDatasourceEditField.php', @@ -6697,6 +6698,7 @@ 'PhabricatorDashboardViewController' => 'PhabricatorDashboardController', 'PhabricatorDataCacheSpec' => 'PhabricatorCacheSpec', 'PhabricatorDataNotAttachedException' => 'Exception', + 'PhabricatorDatabaseHealthRecord' => 'Phobject', 'PhabricatorDatabaseRef' => 'Phobject', 'PhabricatorDatabaseSetupCheck' => 'PhabricatorSetupCheck', 'PhabricatorDatasourceEditField' => 'PhabricatorTokenizerEditField', diff --git a/src/applications/cache/PhabricatorCaches.php b/src/applications/cache/PhabricatorCaches.php --- a/src/applications/cache/PhabricatorCaches.php +++ b/src/applications/cache/PhabricatorCaches.php @@ -174,6 +174,11 @@ * @task setup */ private static function buildSetupCaches() { + // If this is the CLI, just build a setup cache. + if (php_sapi_name() == 'cli') { + return array(); + } + // In most cases, we should have APC. This is an ideal cache for our // purposes -- it's fast and empties on server restart. $apc = new PhutilAPCKeyValueCache(); diff --git a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php --- a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php +++ b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php @@ -115,6 +115,29 @@ $replica_label, ); + $health = $database->getHealthRecord(); + $health_up = $health->getUpEventCount(); + $health_down = $health->getDownEventCount(); + + if ($health->getIsHealthy()) { + $health_icon = id(new PHUIIconView()) + ->setIcon('fa-plus green'); + } else { + $health_icon = id(new PHUIIconView()) + ->setIcon('fa-times red'); + } + + $health_count = pht( + '%s / %s', + new PhutilNumber($health_up), + new PhutilNumber($health_up + $health_down)); + + $health_status = array( + $health_icon, + ' ', + $health_count, + ); + $messages = array(); $conn_message = $database->getConnectionMessage(); @@ -136,10 +159,12 @@ $database->getUser(), $connection, $replication, + $health_status, $messages, ); } + $table = id(new AphrontTableView($rows)) ->setNoDataString( pht('Phabricator is not configured in cluster mode.')) @@ -151,6 +176,7 @@ pht('User'), pht('Connection'), pht('Replication'), + pht('Health'), pht('Messages'), )) ->setColumnClasses( @@ -161,6 +187,7 @@ null, null, null, + null, 'wide', )); diff --git a/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php b/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php new file mode 100644 --- /dev/null +++ b/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php @@ -0,0 +1,185 @@ +ref = $ref; + $this->readState(); + } + + + /** + * Is the database currently healthy? + */ + public function getIsHealthy() { + return $this->isHealthy; + } + + + /** + * Should this request check database health? + */ + public function getShouldCheck() { + return $this->shouldCheck; + } + + + /** + * How many recent health checks were successful? + */ + public function getUpEventCount() { + return $this->upEventCount; + } + + + /** + * How many recent health checks failed? + */ + public function getDownEventCount() { + return $this->downEventCount; + } + + + /** + * Number of failures or successes we need to see in a row before we change + * the state. + */ + public function getRequiredEventCount() { + return 5; + } + + + /** + * Seconds to wait between health checks. + */ + public function getHealthCheckFrequency() { + return 3; + } + + + public function didHealthCheck($result) { + $now = microtime(true); + $check_frequency = $this->getHealthCheckFrequency(); + $event_count = $this->getRequiredEventCount(); + + $record = $this->readHealthRecord(); + + $log = $record['log']; + foreach ($log as $key => $event) { + $when = idx($event, 'timestamp'); + + // If the log already has another nearby event, just ignore this one. + // We raced with another process and our result can just be thrown away. + if (($now - $when) <= $check_frequency) { + return $this; + } + } + + $log[] = array( + 'timestamp' => $now, + 'up' => $result, + ); + + // Throw away older events which are now obsolete. + $log = array_slice($log, -$event_count); + + $count_up = 0; + $count_down = 0; + foreach ($log as $event) { + if ($event['up']) { + $count_up++; + } else { + $count_down++; + } + } + + // If all of the events are the same, change the state. + if ($count_up == $event_count) { + $record['up'] = true; + } else if ($count_down == $event_count) { + $record['up'] = false; + } + + $record['log'] = $log; + + $this->writeHealthRecord($record); + + $this->isHealthy = $record['up']; + $this->shouldCheck = false; + $this->updateStatistics($record); + + return $this; + } + + + private function readState() { + $now = microtime(true); + $check_frequency = $this->getHealthCheckFrequency(); + + $record = $this->readHealthRecord(); + + $last_check = $record['lastCheck']; + + if (($now - $last_check) >= $check_frequency) { + $record['lastCheck'] = $now; + $this->writeHealthRecord($record); + $this->shouldCheck = true; + } else { + $this->shouldCheck = false; + } + + $this->isHealthy = $record['up']; + $this->updateStatistics($record); + } + + private function updateStatistics(array $record) { + $this->upEventCount = 0; + $this->downEventCount = 0; + foreach ($record['log'] as $event) { + if ($event['up']) { + $this->upEventCount++; + } else { + $this->downEventCount++; + } + } + } + + private function getHealthRecordCacheKey() { + $ref = $this->ref; + + $host = $ref->getHost(); + $port = $ref->getPort(); + + return "cluster.db.health({$host}, {$port})"; + } + + private function readHealthRecord() { + $cache = PhabricatorCaches::getSetupCache(); + $cache_key = $this->getHealthRecordCacheKey(); + $health_record = $cache->getKey($cache_key); + + if (!is_array($health_record)) { + $health_record = array( + 'up' => true, + 'lastCheck' => 0, + 'log' => array(), + ); + } + + return $health_record; + } + + private function writeHealthRecord(array $record) { + $cache = PhabricatorCaches::getSetupCache(); + $cache_key = $this->getHealthRecordCacheKey(); + $cache->setKey($cache_key, $record); + } + +} diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php --- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php +++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php @@ -30,6 +30,7 @@ private $replicaMessage; private $replicaDelay; + private $healthRecord; private $didFailToConnect; public function setHost($host) { @@ -326,7 +327,7 @@ return $this->newConnection( array( 'retries' => 0, - 'timeout' => 3, + 'timeout' => 2, )); } @@ -338,11 +339,24 @@ } public function isSevered() { - return $this->didFailToConnect; + if ($this->didFailToConnect) { + return true; + } + + $record = $this->getHealthRecord(); + $is_healthy = $record->getIsHealthy(); + if (!$is_healthy) { + return true; + } + + return false; } public function isReachable(AphrontDatabaseConnection $connection) { - if ($this->isSevered()) { + $record = $this->getHealthRecord(); + $should_check = $record->getShouldCheck(); + + if ($this->isSevered() && !$should_check) { return false; } @@ -353,6 +367,10 @@ $reachable = false; } + if ($should_check) { + $record->didHealthCheck($reachable); + } + if (!$reachable) { $this->didFailToConnect = true; } @@ -360,6 +378,26 @@ return $reachable; } + public function checkHealth() { + $health = $this->getHealthRecord(); + + $should_check = $health->getShouldCheck(); + if ($should_check) { + // This does an implicit health update. + $connection = $this->newManagementConnection(); + $this->isReachable($connection); + } + + return $this; + } + + public function getHealthRecord() { + if (!$this->healthRecord) { + $this->healthRecord = new PhabricatorDatabaseHealthRecord($this); + } + return $this->healthRecord; + } + public static function getMasterDatabaseRef() { $refs = self::getLiveRefs(); @@ -415,14 +453,26 @@ } private function newConnection(array $options) { + // If we believe the database is unhealthy, don't spend as much time + // trying to connect to it, since it's likely to continue to fail and + // hammering it can only make the problem worse. + $record = $this->getHealthRecord(); + if ($record->getIsHealthy()) { + $default_retries = 3; + $default_timeout = 10; + } else { + $default_retries = 0; + $default_timeout = 2; + } + $spec = $options + array( 'user' => $this->getUser(), 'pass' => $this->getPass(), 'host' => $this->getHost(), 'port' => $this->getPort(), 'database' => null, - 'retries' => 3, - 'timeout' => 15, + 'retries' => $default_retries, + 'timeout' => $default_timeout, ); return PhabricatorEnv::newObjectFromConfig( diff --git a/src/infrastructure/env/PhabricatorEnv.php b/src/infrastructure/env/PhabricatorEnv.php --- a/src/infrastructure/env/PhabricatorEnv.php +++ b/src/infrastructure/env/PhabricatorEnv.php @@ -220,7 +220,10 @@ if (!$master) { self::setReadOnly(true, self::READONLY_MASTERLESS); } else if ($master->isSevered()) { - self::setReadOnly(true, self::READONLY_SEVERED); + $master->checkHealth(); + if ($master->isSevered()) { + self::setReadOnly(true, self::READONLY_SEVERED); + } } try {