Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13987076
D15677.id37778.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Referenced Files
None
Subscribers
None
D15677.id37778.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -2241,6 +2241,7 @@
'PhabricatorDashboardViewController' => 'applications/dashboard/controller/PhabricatorDashboardViewController.php',
'PhabricatorDataCacheSpec' => 'applications/cache/spec/PhabricatorDataCacheSpec.php',
'PhabricatorDataNotAttachedException' => 'infrastructure/storage/lisk/PhabricatorDataNotAttachedException.php',
+ 'PhabricatorDatabaseHealthRecord' => 'infrastructure/cluster/PhabricatorDatabaseHealthRecord.php',
'PhabricatorDatabaseRef' => 'infrastructure/cluster/PhabricatorDatabaseRef.php',
'PhabricatorDatabaseSetupCheck' => 'applications/config/check/PhabricatorDatabaseSetupCheck.php',
'PhabricatorDatasourceEditField' => 'applications/transactions/editfield/PhabricatorDatasourceEditField.php',
@@ -6697,6 +6698,7 @@
'PhabricatorDashboardViewController' => 'PhabricatorDashboardController',
'PhabricatorDataCacheSpec' => 'PhabricatorCacheSpec',
'PhabricatorDataNotAttachedException' => 'Exception',
+ 'PhabricatorDatabaseHealthRecord' => 'Phobject',
'PhabricatorDatabaseRef' => 'Phobject',
'PhabricatorDatabaseSetupCheck' => 'PhabricatorSetupCheck',
'PhabricatorDatasourceEditField' => 'PhabricatorTokenizerEditField',
diff --git a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php
--- a/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php
+++ b/src/applications/config/controller/PhabricatorConfigClusterDatabasesController.php
@@ -115,6 +115,29 @@
$replica_label,
);
+ $health = $database->getHealthRecord();
+ $health_up = $health->getUpEventCount();
+ $health_down = $health->getDownEventCount();
+
+ if ($health->getIsHealthy()) {
+ $health_icon = id(new PHUIIconView())
+ ->setIcon('fa-plus green');
+ } else {
+ $health_icon = id(new PHUIIconView())
+ ->setIcon('fa-times red');
+ }
+
+ $health_count = pht(
+ '%s / %s',
+ new PhutilNumber($health_up),
+ new PhutilNumber($health_up + $health_down));
+
+ $health_status = array(
+ $health_icon,
+ ' ',
+ $health_count,
+ );
+
$messages = array();
$conn_message = $database->getConnectionMessage();
@@ -136,10 +159,12 @@
$database->getUser(),
$connection,
$replication,
+ $health_status,
$messages,
);
}
+
$table = id(new AphrontTableView($rows))
->setNoDataString(
pht('Phabricator is not configured in cluster mode.'))
@@ -151,6 +176,7 @@
pht('User'),
pht('Connection'),
pht('Replication'),
+ pht('Health'),
pht('Messages'),
))
->setColumnClasses(
@@ -161,6 +187,7 @@
null,
null,
null,
+ null,
'wide',
));
diff --git a/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php b/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php
new file mode 100644
--- /dev/null
+++ b/src/infrastructure/cluster/PhabricatorDatabaseHealthRecord.php
@@ -0,0 +1,185 @@
+<?php
+
+final class PhabricatorDatabaseHealthRecord
+ extends Phobject {
+
+ private $ref;
+ private $shouldCheck;
+ private $isHealthy;
+ private $upEventCount;
+ private $downEventCount;
+
+ public function __construct(PhabricatorDatabaseRef $ref) {
+ $this->ref = $ref;
+ $this->readState();
+ }
+
+
+ /**
+ * Is the database currently healthy?
+ */
+ public function getIsHealthy() {
+ return $this->isHealthy;
+ }
+
+
+ /**
+ * Should this request check database health?
+ */
+ public function getShouldCheck() {
+ return $this->shouldCheck;
+ }
+
+
+ /**
+ * How many recent health checks were successful?
+ */
+ public function getUpEventCount() {
+ return $this->upEventCount;
+ }
+
+
+ /**
+ * How many recent health checks failed?
+ */
+ public function getDownEventCount() {
+ return $this->downEventCount;
+ }
+
+
+ /**
+ * Number of failures or successes we need to see in a row before we change
+ * the state.
+ */
+ public function getRequiredEventCount() {
+ return 5;
+ }
+
+
+ /**
+ * Seconds to wait between health checks.
+ */
+ public function getHealthCheckFrequency() {
+ return 3;
+ }
+
+
+ public function didHealthCheck($result) {
+ $now = microtime(true);
+ $check_frequency = $this->getHealthCheckFrequency();
+ $event_count = $this->getRequiredEventCount();
+
+ $record = $this->readHealthRecord();
+
+ $log = $record['log'];
+ foreach ($log as $key => $event) {
+ $when = idx($event, 'timestamp');
+
+ // If the log already has another nearby event, just ignore this one.
+ // We raced with another process and our result can just be thrown away.
+ if (($now - $when) <= $check_frequency) {
+ return $this;
+ }
+ }
+
+ $log[] = array(
+ 'timestamp' => $now,
+ 'up' => $result,
+ );
+
+ // Throw away older events which are now obsolete.
+ $log = array_slice($log, -$event_count);
+
+ $count_up = 0;
+ $count_down = 0;
+ foreach ($log as $event) {
+ if ($event['up']) {
+ $count_up++;
+ } else {
+ $count_down++;
+ }
+ }
+
+ // If all of the events are the same, change the state.
+ if ($count_up == $event_count) {
+ $record['up'] = true;
+ } else if ($count_down == $event_count) {
+ $record['up'] = false;
+ }
+
+ $record['log'] = $log;
+
+ $this->writeHealthRecord($record);
+
+ $this->isHealthy = $record['up'];
+ $this->shouldCheck = false;
+ $this->updateStatistics($record);
+
+ return $this;
+ }
+
+
+ private function readState() {
+ $now = microtime(true);
+ $check_frequency = $this->getHealthCheckFrequency();
+
+ $record = $this->readHealthRecord();
+
+ $last_check = $record['lastCheck'];
+
+ if (($now - $last_check) >= $check_frequency) {
+ $record['lastCheck'] = $now;
+ $this->writeHealthRecord($record);
+ $this->shouldCheck = true;
+ } else {
+ $this->shouldCheck = false;
+ }
+
+ $this->isHealthy = $record['up'];
+ $this->updateStatistics($record);
+ }
+
+ private function updateStatistics(array $record) {
+ $this->upEventCount = 0;
+ $this->downEventCount = 0;
+ foreach ($record['log'] as $event) {
+ if ($event['up']) {
+ $this->upEventCount++;
+ } else {
+ $this->downEventCount++;
+ }
+ }
+ }
+
+ private function getHealthRecordCacheKey() {
+ $ref = $this->ref;
+
+ $host = $ref->getHost();
+ $port = $ref->getPort();
+
+ return "cluster.db.health({$host}, {$port})";
+ }
+
+ private function readHealthRecord() {
+ $cache = PhabricatorCaches::getSetupCache();
+ $cache_key = $this->getHealthRecordCacheKey();
+ $health_record = $cache->getKey($cache_key);
+
+ if (!is_array($health_record)) {
+ $health_record = array(
+ 'up' => true,
+ 'lastCheck' => 0,
+ 'log' => array(),
+ );
+ }
+
+ return $health_record;
+ }
+
+ private function writeHealthRecord(array $record) {
+ $cache = PhabricatorCaches::getSetupCache();
+ $cache_key = $this->getHealthRecordCacheKey();
+ $cache->setKey($cache_key, $record);
+ }
+
+}
diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php
--- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php
+++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php
@@ -30,6 +30,7 @@
private $replicaMessage;
private $replicaDelay;
+ private $healthRecord;
private $didFailToConnect;
public function setHost($host) {
@@ -326,7 +327,7 @@
return $this->newConnection(
array(
'retries' => 0,
- 'timeout' => 3,
+ 'timeout' => 2,
));
}
@@ -338,11 +339,24 @@
}
public function isSevered() {
- return $this->didFailToConnect;
+ if ($this->didFailToConnect) {
+ return true;
+ }
+
+ $record = $this->getHealthRecord();
+ $is_healthy = $record->getIsHealthy();
+ if (!$is_healthy) {
+ return true;
+ }
+
+ return false;
}
public function isReachable(AphrontDatabaseConnection $connection) {
- if ($this->isSevered()) {
+ $record = $this->getHealthRecord();
+ $should_check = $record->getShouldCheck();
+
+ if ($this->isSevered() && !$should_check) {
return false;
}
@@ -353,6 +367,10 @@
$reachable = false;
}
+ if ($should_check) {
+ $record->didHealthCheck($reachable);
+ }
+
if (!$reachable) {
$this->didFailToConnect = true;
}
@@ -360,6 +378,26 @@
return $reachable;
}
+ public function checkHealth() {
+ $health = $this->getHealthRecord();
+
+ $should_check = $health->getShouldCheck();
+ if ($should_check) {
+ // This does an implicit health update.
+ $connection = $this->newManagementConnection();
+ $this->isReachable($connection);
+ }
+
+ return $this;
+ }
+
+ public function getHealthRecord() {
+ if (!$this->healthRecord) {
+ $this->healthRecord = new PhabricatorDatabaseHealthRecord($this);
+ }
+ return $this->healthRecord;
+ }
+
public static function getMasterDatabaseRef() {
$refs = self::getLiveRefs();
@@ -415,14 +453,26 @@
}
private function newConnection(array $options) {
+ // If we believe the database is unhealthy, don't spend as much time
+ // trying to connect to it, since it's likely to continue to fail and
+ // hammering it can only make the problem worse.
+ $record = $this->getHealthRecord();
+ if ($record->getIsHealthy()) {
+ $default_retries = 3;
+ $default_timeout = 10;
+ } else {
+ $default_retries = 0;
+ $default_timeout = 2;
+ }
+
$spec = $options + array(
'user' => $this->getUser(),
'pass' => $this->getPass(),
'host' => $this->getHost(),
'port' => $this->getPort(),
'database' => null,
- 'retries' => 3,
- 'timeout' => 15,
+ 'retries' => $default_retries,
+ 'timeout' => $default_timeout,
);
return PhabricatorEnv::newObjectFromConfig(
diff --git a/src/infrastructure/env/PhabricatorEnv.php b/src/infrastructure/env/PhabricatorEnv.php
--- a/src/infrastructure/env/PhabricatorEnv.php
+++ b/src/infrastructure/env/PhabricatorEnv.php
@@ -220,7 +220,10 @@
if (!$master) {
self::setReadOnly(true, self::READONLY_MASTERLESS);
} else if ($master->isSevered()) {
- self::setReadOnly(true, self::READONLY_SEVERED);
+ $master->checkHealth();
+ if ($master->isSevered()) {
+ self::setReadOnly(true, self::READONLY_SEVERED);
+ }
}
try {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Oct 22, 7:18 AM (4 w, 13 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6740808
Default Alt Text
D15677.id37778.diff (10 KB)
Attached To
Mode
D15677: Automatically sever databases after prolonged unreachability
Attached
Detach File
Event Timeline
Log In to Comment