diff --git a/src/daemon/PhutilDaemon.php b/src/daemon/PhutilDaemon.php index 1cc0df8..082421f 100644 --- a/src/daemon/PhutilDaemon.php +++ b/src/daemon/PhutilDaemon.php @@ -1,353 +1,383 @@ shouldExit()) { * if (work_available()) { * $this->willBeginWork(); * do_work(); * $this->sleep(0); * } else { * $this->willBeginIdle(); * $this->sleep(1); * } * } * * In particular, call @{method:willBeginWork} before becoming busy, and * @{method:willBeginIdle} when no work is available. If the daemon is launched * into an autoscale pool, this will cause the pool to automatically scale up * when busy and down when idle. * * See @{class:PhutilHighIntensityIntervalDaemon} for an example of a simple * autoscaling daemon. * * Launching a daemon which does not make these callbacks into an autoscale * pool will have no effect. * * @task overseer Communicating With the Overseer * @task autoscale Autoscaling Daemon Pools */ abstract class PhutilDaemon extends Phobject { const MESSAGETYPE_STDOUT = 'stdout'; const MESSAGETYPE_HEARTBEAT = 'heartbeat'; const MESSAGETYPE_BUSY = 'busy'; const MESSAGETYPE_IDLE = 'idle'; const MESSAGETYPE_DOWN = 'down'; + const MESSAGETYPE_HIBERNATE = 'hibernate'; const WORKSTATE_BUSY = 'busy'; const WORKSTATE_IDLE = 'idle'; private $argv; private $traceMode; private $traceMemory; private $verbose; private $notifyReceived; private $inGracefulShutdown; private $workState = null; private $idleSince = null; private $scaledownDuration; final public function setVerbose($verbose) { $this->verbose = $verbose; return $this; } final public function getVerbose() { return $this->verbose; } final public function setScaledownDuration($scaledown_duration) { $this->scaledownDuration = $scaledown_duration; return $this; } final public function getScaledownDuration() { return $this->scaledownDuration; } final public function __construct(array $argv) { $this->argv = $argv; $router = PhutilSignalRouter::getRouter(); $handler_key = 'daemon.term'; if (!$router->getHandler($handler_key)) { $handler = new PhutilCallbackSignalHandler( SIGTERM, __CLASS__.'::onTermSignal'); $router->installHandler($handler_key, $handler); } pcntl_signal(SIGINT, array($this, 'onGracefulSignal')); pcntl_signal(SIGUSR2, array($this, 'onNotifySignal')); // Without discard mode, this consumes unbounded amounts of memory. Keep // memory bounded. PhutilServiceProfiler::getInstance()->enableDiscardMode(); $this->beginStdoutCapture(); } final public function __destruct() { $this->endStdoutCapture(); } final public function stillWorking() { $this->emitOverseerMessage(self::MESSAGETYPE_HEARTBEAT, null); if ($this->traceMemory) { $daemon = get_class($this); fprintf( STDERR, "%s %s %s\n", '', $daemon, pht( 'Memory Usage: %s KB', new PhutilNumber(memory_get_usage() / 1024, 1))); } } final public function shouldExit() { return $this->inGracefulShutdown; } + final protected function shouldHibernate($duration) { + // Don't hibernate if we don't have very long to sleep. + if ($duration < 5) { + return false; + } + + // Never hibernate if we're part of a pool and could scale down instead. + // We only hibernate the last process to drop the pool size to zero. + if ($this->getScaledownDuration()) { + return false; + } + + // Don't hibernate for too long. + $duration = max($duration, phutil_units('3 minutes in seconds')); + + $this->emitOverseerMessage( + self::MESSAGETYPE_HIBERNATE, + array( + 'duration' => $duration, + )); + + $this->log( + pht( + 'Preparing to hibernate for %s second(s).', + new PhutilNumber($duration))); + + return true; + } + final protected function sleep($duration) { $this->notifyReceived = false; $this->willSleep($duration); $this->stillWorking(); $scale_down = $this->getScaledownDuration(); $max_sleep = 60; if ($scale_down) { $max_sleep = min($max_sleep, $scale_down); } if ($scale_down) { if ($this->workState == self::WORKSTATE_IDLE) { $dur = (time() - $this->idleSince); $this->log(pht('Idle for %s seconds.', $dur)); } } while ($duration > 0 && !$this->notifyReceived && !$this->shouldExit()) { // If this is an autoscaling clone and we've been idle for too long, // we're going to scale the pool down by exiting and not restarting. The // DOWN message tells the overseer that we don't want to be restarted. if ($scale_down) { if ($this->workState == self::WORKSTATE_IDLE) { if ($this->idleSince && ($this->idleSince + $scale_down < time())) { $this->inGracefulShutdown = true; $this->emitOverseerMessage(self::MESSAGETYPE_DOWN, null); $this->log( pht( 'Daemon was idle for more than %s second(s), '. 'scaling pool down.', new PhutilNumber($scale_down))); break; } } } sleep(min($duration, $max_sleep)); $duration -= $max_sleep; $this->stillWorking(); } } protected function willSleep($duration) { return; } public static function onTermSignal($signo) { self::didCatchSignal($signo); } final protected function getArgv() { return $this->argv; } final public function execute() { $this->willRun(); $this->run(); } abstract protected function run(); final public function setTraceMemory() { $this->traceMemory = true; return $this; } final public function getTraceMemory() { return $this->traceMemory; } final public function setTraceMode() { $this->traceMode = true; PhutilServiceProfiler::installEchoListener(); PhutilConsole::getConsole()->getServer()->setEnableLog(true); $this->didSetTraceMode(); return $this; } final public function getTraceMode() { return $this->traceMode; } final public function onGracefulSignal($signo) { self::didCatchSignal($signo); $this->inGracefulShutdown = true; } final public function onNotifySignal($signo) { self::didCatchSignal($signo); $this->notifyReceived = true; $this->onNotify($signo); } protected function onNotify($signo) { // This is a hook for subclasses. } protected function willRun() { // This is a hook for subclasses. } protected function didSetTraceMode() { // This is a hook for subclasses. } final protected function log($message) { if ($this->verbose) { $daemon = get_class($this); fprintf(STDERR, "%s %s %s\n", '', $daemon, $message); } } private static function didCatchSignal($signo) { $signame = phutil_get_signal_name($signo); fprintf( STDERR, "%s Caught signal %s (%s).\n", '', $signo, $signame); } /* -( Communicating With the Overseer )------------------------------------ */ private function beginStdoutCapture() { ob_start(array($this, 'didReceiveStdout'), 2); } private function endStdoutCapture() { ob_end_flush(); } public function didReceiveStdout($data) { if (!strlen($data)) { return ''; } return $this->encodeOverseerMessage(self::MESSAGETYPE_STDOUT, $data); } private function encodeOverseerMessage($type, $data) { $structure = array($type); if ($data !== null) { $structure[] = $data; } return json_encode($structure)."\n"; } private function emitOverseerMessage($type, $data) { $this->endStdoutCapture(); echo $this->encodeOverseerMessage($type, $data); $this->beginStdoutCapture(); } public static function errorListener($event, $value, array $metadata) { // If the caller has redirected the error log to a file, PHP won't output // messages to stderr, so the overseer can't capture them. Install a // listener which just echoes errors to stderr, so the overseer is always // aware of errors. $console = PhutilConsole::getConsole(); $message = idx($metadata, 'default_message'); if ($message) { $console->writeErr("%s\n", $message); } if (idx($metadata, 'trace')) { $trace = PhutilErrorHandler::formatStacktrace($metadata['trace']); $console->writeErr("%s\n", $trace); } } /* -( Autoscaling )-------------------------------------------------------- */ /** * Prepare to become busy. This may autoscale the pool up. * * This notifies the overseer that the daemon has become busy. If daemons * that are part of an autoscale pool are continuously busy for a prolonged * period of time, the overseer may scale up the pool. * * @return this * @task autoscale */ protected function willBeginWork() { if ($this->workState != self::WORKSTATE_BUSY) { $this->workState = self::WORKSTATE_BUSY; $this->idleSince = null; $this->emitOverseerMessage(self::MESSAGETYPE_BUSY, null); } return $this; } /** * Prepare to idle. This may autoscale the pool down. * * This notifies the overseer that the daemon is no longer busy. If daemons * that are part of an autoscale pool are idle for a prolonged period of * time, they may exit to scale the pool down. * * @return this * @task autoscale */ protected function willBeginIdle() { if ($this->workState != self::WORKSTATE_IDLE) { $this->workState = self::WORKSTATE_IDLE; $this->idleSince = time(); $this->emitOverseerMessage(self::MESSAGETYPE_IDLE, null); } return $this; } } diff --git a/src/daemon/PhutilDaemonHandle.php b/src/daemon/PhutilDaemonHandle.php index c81256d..6d4e5b1 100644 --- a/src/daemon/PhutilDaemonHandle.php +++ b/src/daemon/PhutilDaemonHandle.php @@ -1,458 +1,478 @@ } public static function newFromConfig(array $config) { PhutilTypeSpec::checkMap( $config, array( 'class' => 'string', 'argv' => 'optional list', 'load' => 'optional list', 'log' => 'optional string|null', 'down' => 'optional int', )); $config = $config + array( 'argv' => array(), 'load' => array(), 'log' => null, 'down' => 15, ); $daemon = new self(); $daemon->properties = $config; $daemon->daemonID = $daemon->generateDaemonID(); return $daemon; } public function setDaemonPool(PhutilDaemonPool $daemon_pool) { $this->pool = $daemon_pool; return $this; } public function getDaemonPool() { return $this->pool; } public function getBusyEpoch() { return $this->busyEpoch; } public function getDaemonClass() { return $this->getProperty('class'); } private function getProperty($key) { return idx($this->properties, $key); } public function setCommandLineArguments(array $arguments) { $this->argv = $arguments; return $this; } public function getCommandLineArguments() { return $this->argv; } public function getDaemonArguments() { return $this->getProperty('argv'); } public function didLaunch() { $this->restartAt = time(); $this->dispatchEvent( self::EVENT_DID_LAUNCH, array( 'argv' => $this->getCommandLineArguments(), 'explicitArgv' => $this->getDaemonArguments(), )); return $this; } public function isRunning() { return (bool)$this->future; } public function isDone() { return (!$this->shouldRestart && !$this->isRunning()); } public function getFuture() { return $this->future; } public function update() { if (!$this->isRunning()) { if (!$this->shouldRestart) { return; } if (!$this->restartAt || (time() < $this->restartAt)) { return; } if ($this->shouldShutdown) { return; } $this->startDaemonProcess(); } $future = $this->future; $result = null; if ($future->isReady()) { $result = $future->resolve(); } list($stdout, $stderr) = $future->read(); $future->discardBuffers(); if (strlen($stdout)) { $this->didReadStdout($stdout); } $stderr = trim($stderr); if (strlen($stderr)) { foreach (phutil_split_lines($stderr, false) as $line) { $this->logMessage('STDE', $line); } } if ($result !== null) { list($err) = $result; if ($err) { $this->logMessage('FAIL', pht('Process exited with error %s.', $err)); } else { $this->logMessage('DONE', pht('Process exited normally.')); } $this->future = null; if ($this->shouldShutdown) { $this->restartAt = null; $this->dispatchEvent(self::EVENT_WILL_EXIT); } else { $this->scheduleRestart(); } } $this->updateHeartbeatEvent(); $this->updateHangDetection(); } private function updateHeartbeatEvent() { if ($this->heartbeat > time()) { return; } $this->heartbeat = time() + $this->getHeartbeatEventFrequency(); $this->dispatchEvent(self::EVENT_DID_HEARTBEAT); } private function updateHangDetection() { if (!$this->isRunning()) { return; } if (time() > $this->deadline) { $this->logMessage('HANG', pht('Hang detected. Restarting process.')); $this->annihilateProcessGroup(); $this->scheduleRestart(); } } private function scheduleRestart() { - $this->logMessage('WAIT', pht('Waiting to restart process.')); - $this->restartAt = time() + self::getWaitBeforeRestart(); + // Wait a minimum of a few sceconds before restarting, but we may wait + // longer if the daemon has initiated hibernation. + $default_restart = time() + self::getWaitBeforeRestart(); + if ($default_restart >= $this->restartAt) { + $this->restartAt = $default_restart; + } + + $this->logMessage( + 'WAIT', + pht( + 'Waiting %s second(s) to restart process.', + new PhutilNumber($this->restartAt - time()))); } /** * Generate a unique ID for this daemon. * * @return string A unique daemon ID. */ private function generateDaemonID() { return substr(getmypid().':'.Filesystem::readRandomCharacters(12), 0, 12); } public function getDaemonID() { return $this->daemonID; } public function getPID() { return $this->pid; } private function getCaptureBufferSize() { return 65535; } private function getRequiredHeartbeatFrequency() { return 86400; } public static function getWaitBeforeRestart() { return 5; } public static function getHeartbeatEventFrequency() { return 120; } private function getKillDelay() { return 3; } private function getDaemonCWD() { $root = dirname(phutil_get_library_root('phutil')); return $root.'/scripts/daemon/exec/'; } private function newExecFuture() { $class = $this->getDaemonClass(); $argv = $this->getCommandLineArguments(); $buffer_size = $this->getCaptureBufferSize(); // NOTE: PHP implements proc_open() by running 'sh -c'. On most systems this // is bash, but on Ubuntu it's dash. When you proc_open() using bash, you // get one new process (the command you ran). When you proc_open() using // dash, you get two new processes: the command you ran and a parent // "dash -c" (or "sh -c") process. This means that the child process's PID // is actually the 'dash' PID, not the command's PID. To avoid this, use // 'exec' to replace the shell process with the real process; without this, // the child will call posix_getppid(), be given the pid of the 'sh -c' // process, and send it SIGUSR1 to keepalive which will terminate it // immediately. We also won't be able to do process group management because // the shell process won't properly posix_setsid() so the pgid of the child // won't be meaningful. $config = $this->properties; unset($config['class']); $config = phutil_json_encode($config); return id(new ExecFuture('exec ./exec_daemon.php %s %Ls', $class, $argv)) ->setCWD($this->getDaemonCWD()) ->setStdoutSizeLimit($buffer_size) ->setStderrSizeLimit($buffer_size) ->write($config); } /** * Dispatch an event to event listeners. * * @param string Event type. * @param dict Event parameters. * @return void */ private function dispatchEvent($type, array $params = array()) { $data = array( 'id' => $this->getDaemonID(), 'daemonClass' => $this->getDaemonClass(), 'childPID' => $this->getPID(), ) + $params; $event = new PhutilEvent($type, $data); try { PhutilEventEngine::dispatchEvent($event); } catch (Exception $ex) { phlog($ex); } } private function annihilateProcessGroup() { $pid = $this->getPID(); $pgid = posix_getpgid($pid); if ($pid && $pgid) { posix_kill(-$pgid, SIGTERM); sleep($this->getKillDelay()); posix_kill(-$pgid, SIGKILL); $this->pid = null; } } private function startDaemonProcess() { $this->logMessage('INIT', pht('Starting process.')); $this->deadline = time() + $this->getRequiredHeartbeatFrequency(); $this->heartbeat = time() + self::getHeartbeatEventFrequency(); $this->stdoutBuffer = ''; $this->future = $this->newExecFuture(); $this->future->start(); $this->pid = $this->future->getPID(); } private function didReadStdout($data) { $this->stdoutBuffer .= $data; while (true) { $pos = strpos($this->stdoutBuffer, "\n"); if ($pos === false) { break; } $message = substr($this->stdoutBuffer, 0, $pos); $this->stdoutBuffer = substr($this->stdoutBuffer, $pos + 1); try { $structure = phutil_json_decode($message); } catch (PhutilJSONParserException $ex) { $structure = array(); } switch (idx($structure, 0)) { case PhutilDaemon::MESSAGETYPE_STDOUT: $this->logMessage('STDO', idx($structure, 1)); break; case PhutilDaemon::MESSAGETYPE_HEARTBEAT: $this->deadline = time() + $this->getRequiredHeartbeatFrequency(); break; case PhutilDaemon::MESSAGETYPE_BUSY: if (!$this->busyEpoch) { $this->busyEpoch = time(); } break; case PhutilDaemon::MESSAGETYPE_IDLE: $this->busyEpoch = null; break; case PhutilDaemon::MESSAGETYPE_DOWN: // The daemon is exiting because it doesn't have enough work and it // is trying to scale the pool down. We should not restart it. $this->shouldRestart = false; $this->shouldShutdown = true; break; + case PhutilDaemon::MESSAGETYPE_HIBERNATE: + $config = idx($structure, 1); + $duration = (int)idx($config, 'duration', 0); + $this->restartAt = time() + $duration; + $this->logMessage( + 'ZZZZ', + pht( + 'Process is preparing to hibernate for %s second(s).', + new PhutilNumber($duration))); + break; default: // If we can't parse this or it isn't a message we understand, just // emit the raw message. $this->logMessage('STDO', pht(' %s', $message)); break; } } } public function didReceiveNotifySignal($signo) { $pid = $this->getPID(); if ($pid) { posix_kill($pid, $signo); } } public function didReceiveReloadSignal($signo) { $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Reloading in response to signal %d (%s).', $signo, $signame); } else { $sigmsg = pht( 'Reloading in response to signal %d.', $signo); } $this->logMessage('RELO', $sigmsg, $signo); // This signal means "stop the current process gracefully, then launch // a new identical process once it exits". This can be used to update // daemons after code changes (the new processes will run the new code) // without aborting any running tasks. // We SIGINT the daemon but don't set the shutdown flag, so it will // naturally be restarted after it exits, as though it had exited after an // unhandled exception. posix_kill($this->getPID(), SIGINT); } public function didReceiveGracefulSignal($signo) { $this->shouldShutdown = true; $this->shouldRestart = false; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Graceful shutdown in response to signal %d (%s).', $signo, $signame); } else { $sigmsg = pht( 'Graceful shutdown in response to signal %d.', $signo); } $this->logMessage('DONE', $sigmsg, $signo); posix_kill($this->getPID(), SIGINT); } public function didReceiveTerminateSignal($signo) { $this->shouldShutdown = true; $this->shouldRestart = false; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Shutting down in response to signal %s (%s).', $signo, $signame); } else { $sigmsg = pht('Shutting down in response to signal %s.', $signo); } $this->logMessage('EXIT', $sigmsg, $signo); $this->annihilateProcessGroup(); } private function logMessage($type, $message, $context = null) { $this->getDaemonPool()->logMessage($type, $message, $context); $this->dispatchEvent( self::EVENT_DID_LOG, array( 'type' => $type, 'message' => $message, 'context' => $context, )); } public function toDictionary() { return array( 'pid' => $this->getPID(), 'id' => $this->getDaemonID(), 'config' => $this->properties, ); } }