diff --git a/scripts/daemon/launch_daemon.php b/scripts/daemon/launch_daemon.php index 3c3996a..4bee9fb 100755 --- a/scripts/daemon/launch_daemon.php +++ b/scripts/daemon/launch_daemon.php @@ -1,7 +1,9 @@ #!/usr/bin/env php run(); diff --git a/src/daemon/PhutilDaemonHandle.php b/src/daemon/PhutilDaemonHandle.php index 3de873c..e6ff223 100644 --- a/src/daemon/PhutilDaemonHandle.php +++ b/src/daemon/PhutilDaemonHandle.php @@ -1,422 +1,409 @@ overseer = $overseer; $this->daemonClass = $daemon_class; $this->argv = $argv; $this->config = $config; $this->restartAt = time(); $this->daemonID = $this->generateDaemonID(); $this->dispatchEvent( self::EVENT_DID_LAUNCH, array( 'argv' => $this->argv, 'explicitArgv' => idx($this->config, 'argv'), )); } public function isRunning() { return (bool)$this->future; } public function isDone() { return (!$this->shouldRestart && !$this->isRunning()); } public function getFuture() { return $this->future; } - public function setSilent($silent) { - $this->silent = $silent; - return $this; - } - - public function getSilent() { - return $this->silent; - } - public function setTraceMemory($trace_memory) { $this->traceMemory = $trace_memory; return $this; } public function getTraceMemory() { return $this->traceMemory; } public function update() { $this->updateMemory(); if (!$this->isRunning()) { if (!$this->shouldRestart) { return; } if (!$this->restartAt || (time() < $this->restartAt)) { return; } if ($this->shouldShutdown) { return; } $this->startDaemonProcess(); } $future = $this->future; $result = null; if ($future->isReady()) { $result = $future->resolve(); } list($stdout, $stderr) = $future->read(); $future->discardBuffers(); if (strlen($stdout)) { $this->didReadStdout($stdout); } $stderr = trim($stderr); if (strlen($stderr)) { foreach (phutil_split_lines($stderr, false) as $line) { $this->logMessage('STDE', $line); } } if ($result !== null) { list($err) = $result; if ($err) { $this->logMessage('FAIL', pht('Process exited with error %s.', $err)); } else { $this->logMessage('DONE', pht('Process exited normally.')); } $this->future = null; if ($this->shouldShutdown) { $this->restartAt = null; } else { $this->scheduleRestart(); } } $this->updateHeartbeatEvent(); $this->updateHangDetection(); } private function updateHeartbeatEvent() { if ($this->heartbeat > time()) { return; } $this->heartbeat = time() + $this->getHeartbeatEventFrequency(); $this->dispatchEvent(self::EVENT_DID_HEARTBEAT); } private function updateHangDetection() { if (!$this->isRunning()) { return; } if (time() > $this->deadline) { $this->logMessage('HANG', pht('Hang detected. Restarting process.')); $this->annihilateProcessGroup(); $this->scheduleRestart(); } } private function scheduleRestart() { $this->logMessage('WAIT', pht('Waiting to restart process.')); $this->restartAt = time() + self::getWaitBeforeRestart(); } /** * Generate a unique ID for this daemon. * * @return string A unique daemon ID. */ private function generateDaemonID() { return substr(getmypid().':'.Filesystem::readRandomCharacters(12), 0, 12); } public function getDaemonID() { return $this->daemonID; } public function getPID() { return $this->pid; } private function getCaptureBufferSize() { return 65535; } private function getRequiredHeartbeatFrequency() { return 86400; } public static function getWaitBeforeRestart() { return 5; } public static function getHeartbeatEventFrequency() { return 120; } private function getKillDelay() { return 3; } private function getDaemonCWD() { $root = dirname(phutil_get_library_root('phutil')); return $root.'/scripts/daemon/exec/'; } private function newExecFuture() { $class = $this->daemonClass; $argv = $this->argv; $buffer_size = $this->getCaptureBufferSize(); // NOTE: PHP implements proc_open() by running 'sh -c'. On most systems this // is bash, but on Ubuntu it's dash. When you proc_open() using bash, you // get one new process (the command you ran). When you proc_open() using // dash, you get two new processes: the command you ran and a parent // "dash -c" (or "sh -c") process. This means that the child process's PID // is actually the 'dash' PID, not the command's PID. To avoid this, use // 'exec' to replace the shell process with the real process; without this, // the child will call posix_getppid(), be given the pid of the 'sh -c' // process, and send it SIGUSR1 to keepalive which will terminate it // immediately. We also won't be able to do process group management because // the shell process won't properly posix_setsid() so the pgid of the child // won't be meaningful. return id(new ExecFuture('exec ./exec_daemon.php %s %Ls', $class, $argv)) ->setCWD($this->getDaemonCWD()) ->setStdoutSizeLimit($buffer_size) ->setStderrSizeLimit($buffer_size) ->write(json_encode($this->config)); } /** * Dispatch an event to event listeners. * * @param string Event type. * @param dict Event parameters. * @return void */ private function dispatchEvent($type, array $params = array()) { $data = array( 'id' => $this->daemonID, 'daemonClass' => $this->daemonClass, 'childPID' => $this->pid, ) + $params; $event = new PhutilEvent($type, $data); try { PhutilEventEngine::dispatchEvent($event); } catch (Exception $ex) { phlog($ex); } } private function annihilateProcessGroup() { $pid = $this->pid; $pgid = posix_getpgid($pid); if ($pid && $pgid) { posix_kill(-$pgid, SIGTERM); sleep($this->getKillDelay()); posix_kill(-$pgid, SIGKILL); $this->pid = null; } } private function updateMemory() { if ($this->traceMemory) { $this->logMessage( 'RAMS', pht( 'Overseer Memory Usage: %s KB', new PhutilNumber(memory_get_usage() / 1024, 1))); } } private function startDaemonProcess() { $this->logMessage('INIT', pht('Starting process.')); $this->deadline = time() + $this->getRequiredHeartbeatFrequency(); $this->heartbeat = time() + self::getHeartbeatEventFrequency(); $this->stdoutBuffer = ''; $this->future = $this->newExecFuture(); $this->future->start(); $this->pid = $this->future->getPID(); } private function didReadStdout($data) { $this->stdoutBuffer .= $data; while (true) { $pos = strpos($this->stdoutBuffer, "\n"); if ($pos === false) { break; } $message = substr($this->stdoutBuffer, 0, $pos); $this->stdoutBuffer = substr($this->stdoutBuffer, $pos + 1); try { $structure = phutil_json_decode($message); } catch (PhutilJSONParserException $ex) { $structure = array(); } switch (idx($structure, 0)) { case PhutilDaemon::MESSAGETYPE_STDOUT: $this->logMessage('STDO', idx($structure, 1)); break; case PhutilDaemon::MESSAGETYPE_HEARTBEAT: $this->deadline = time() + $this->getRequiredHeartbeatFrequency(); break; case PhutilDaemon::MESSAGETYPE_BUSY: $this->overseer->didBeginWork($this); break; case PhutilDaemon::MESSAGETYPE_IDLE: $this->overseer->didBeginIdle($this); break; case PhutilDaemon::MESSAGETYPE_DOWN: // The daemon is exiting because it doesn't have enough work and it // is trying to scale the pool down. We should not restart it. $this->shouldRestart = false; $this->shouldShutdown = true; break; default: // If we can't parse this or it isn't a message we understand, just // emit the raw message. $this->logMessage('STDO', pht(' %s', $message)); break; } } } public function didReceiveNotifySignal($signo) { $pid = $this->pid; if ($pid) { posix_kill($pid, $signo); } } public function didReceiveReloadSignal($signo) { $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Reloading in response to signal %d (%s).', $signo, $signame); } else { $sigmsg = pht( 'Reloading in response to signal %d.', $signo); } $this->logMessage('RELO', $sigmsg, $signo); // This signal means "stop the current process gracefully, then launch // a new identical process once it exits". This can be used to update // daemons after code changes (the new processes will run the new code) // without aborting any running tasks. // We SIGINT the daemon but don't set the shutdown flag, so it will // naturally be restarted after it exits, as though it had exited after an // unhandled exception. posix_kill($this->pid, SIGINT); } public function didReceiveGracefulSignal($signo) { $this->shouldShutdown = true; $this->shouldRestart = false; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Graceful shutdown in response to signal %d (%s).', $signo, $signame); } else { $sigmsg = pht( 'Graceful shutdown in response to signal %d.', $signo); } $this->logMessage('DONE', $sigmsg, $signo); posix_kill($this->pid, SIGINT); } public function didReceiveTerminalSignal($signo) { $this->shouldShutdown = true; $this->shouldRestart = false; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Shutting down in response to signal %s (%s).', $signo, $signame); } else { $sigmsg = pht('Shutting down in response to signal %s.', $signo); } $this->logMessage('EXIT', $sigmsg, $signo); $this->annihilateProcessGroup(); } private function logMessage($type, $message, $context = null) { - if (!$this->getSilent()) { - echo date('Y-m-d g:i:s A').' ['.$type.'] '.$message."\n"; - } - + $this->overseer->logMessage($type, $message, $context); $this->dispatchEvent( self::EVENT_DID_LOG, array( 'type' => $type, 'message' => $message, 'context' => $context, )); } public function didRemoveDaemon() { $this->dispatchEvent(self::EVENT_WILL_EXIT); } } diff --git a/src/daemon/PhutilDaemonOverseer.php b/src/daemon/PhutilDaemonOverseer.php index 9feb626..adf9928 100644 --- a/src/daemon/PhutilDaemonOverseer.php +++ b/src/daemon/PhutilDaemonOverseer.php @@ -1,506 +1,516 @@ enableDiscardMode(); $args = new PhutilArgumentParser($argv); $args->setTagline(pht('daemon overseer')); $args->setSynopsis(<<parseStandardArguments(); $args->parse( array( array( 'name' => 'trace-memory', 'help' => pht('Enable debug memory tracing.'), ), array( 'name' => 'verbose', 'help' => pht('Enable verbose activity logging.'), ), array( 'name' => 'label', 'short' => 'l', 'param' => 'label', 'help' => pht( 'Optional process label. Makes "%s" nicer, no behavioral effects.', 'ps'), ), )); $argv = array(); if ($args->getArg('trace')) { $this->traceMode = true; $argv[] = '--trace'; } if ($args->getArg('trace-memory')) { $this->traceMode = true; $this->traceMemory = true; $argv[] = '--trace-memory'; } $verbose = $args->getArg('verbose'); if ($verbose) { $this->verbose = true; $argv[] = '--verbose'; } $label = $args->getArg('label'); if ($label) { $argv[] = '-l'; $argv[] = $label; } $this->argv = $argv; if (function_exists('posix_isatty') && posix_isatty(STDIN)) { fprintf(STDERR, pht('Reading daemon configuration from stdin...')."\n"); } $config = @file_get_contents('php://stdin'); $config = id(new PhutilJSONParser())->parse($config); $this->libraries = idx($config, 'load'); $this->log = idx($config, 'log'); $this->daemonize = idx($config, 'daemonize'); $this->piddir = idx($config, 'piddir'); $this->config = $config; if (self::$instance) { throw new Exception( pht('You may not instantiate more than one Overseer per process.')); } self::$instance = $this; $this->startEpoch = time(); // Check this before we daemonize, since if it's an issue the child will // exit immediately. if ($this->piddir) { $dir = $this->piddir; try { Filesystem::assertWritable($dir); } catch (Exception $ex) { throw new Exception( pht( "Specified daemon PID directory ('%s') does not exist or is ". "not writable by the daemon user!", $dir)); } } if (!idx($config, 'daemons')) { throw new PhutilArgumentUsageException( pht('You must specify at least one daemon to start!')); } if ($this->log) { // NOTE: Now that we're committed to daemonizing, redirect the error // log if we have a `--log` parameter. Do this at the last moment // so as many setup issues as possible are surfaced. ini_set('error_log', $this->log); } if ($this->daemonize) { // We need to get rid of these or the daemon will hang when we TERM it // waiting for something to read the buffers. TODO: Learn how unix works. fclose(STDOUT); fclose(STDERR); ob_start(); $pid = pcntl_fork(); if ($pid === -1) { throw new Exception(pht('Unable to fork!')); } else if ($pid) { exit(0); } } $this->modules = PhutilDaemonOverseerModule::getAllModules(); - declare(ticks = 1); pcntl_signal(SIGUSR2, array($this, 'didReceiveNotifySignal')); pcntl_signal(SIGHUP, array($this, 'didReceiveReloadSignal')); pcntl_signal(SIGINT, array($this, 'didReceiveGracefulSignal')); pcntl_signal(SIGTERM, array($this, 'didReceiveTerminalSignal')); } public function addLibrary($library) { $this->libraries[] = $library; return $this; } public function run() { $this->daemons = array(); foreach ($this->config['daemons'] as $config) { $config += array( 'argv' => array(), 'autoscale' => array(), ); $daemon = new PhutilDaemonHandle( $this, $config['class'], $this->argv, array( 'log' => $this->log, 'argv' => $config['argv'], 'load' => $this->libraries, 'autoscale' => $config['autoscale'], )); - $daemon->setSilent((!$this->traceMode && !$this->verbose)); $daemon->setTraceMemory($this->traceMemory); $this->addDaemon($daemon, $config); } $should_reload = false; while (true) { foreach ($this->modules as $module) { try { if ($module->shouldReloadDaemons()) { + $this->logMessage( + 'RELO', + pht( + 'Reloading daemons (triggered by overseer module "%s").', + get_class($module))); $should_reload = true; } } catch (Exception $ex) { phlog($ex); } } + if ($should_reload) { $this->didReceiveReloadSignal(SIGHUP); $should_reload = false; } $futures = array(); foreach ($this->getDaemonHandles() as $daemon) { $daemon->update(); if ($daemon->isRunning()) { $futures[] = $daemon->getFuture(); } if ($daemon->isDone()) { $this->removeDaemon($daemon); } } $this->updatePidfile(); $this->updateAutoscale(); if ($futures) { $iter = id(new FutureIterator($futures)) ->setUpdateInterval(1); foreach ($iter as $future) { break; } } else { if ($this->inGracefulShutdown) { break; } sleep(1); } } exit($this->err); } private function addDaemon(PhutilDaemonHandle $daemon, array $config) { $id = $daemon->getDaemonID(); $this->daemons[$id] = array( 'handle' => $daemon, 'config' => $config, ); $autoscale_group = $this->getAutoscaleGroup($daemon); if ($autoscale_group) { $this->autoscale[$autoscale_group][$id] = true; } return $this; } private function removeDaemon(PhutilDaemonHandle $daemon) { $id = $daemon->getDaemonID(); $autoscale_group = $this->getAutoscaleGroup($daemon); if ($autoscale_group) { unset($this->autoscale[$autoscale_group][$id]); } unset($this->daemons[$id]); $daemon->didRemoveDaemon(); return $this; } private function getAutoscaleGroup(PhutilDaemonHandle $daemon) { return $this->getAutoscaleProperty($daemon, 'group'); } private function getAutoscaleProperty( PhutilDaemonHandle $daemon, $key, $default = null) { $id = $daemon->getDaemonID(); $autoscale = $this->daemons[$id]['config']['autoscale']; return idx($autoscale, $key, $default); } public function didBeginWork(PhutilDaemonHandle $daemon) { $id = $daemon->getDaemonID(); $busy = idx($this->daemons[$daemon->getDaemonID()], 'busy'); if (!$busy) { $this->daemons[$id]['busy'] = time(); } } public function didBeginIdle(PhutilDaemonHandle $daemon) { $id = $daemon->getDaemonID(); unset($this->daemons[$id]['busy']); } public function updateAutoscale() { foreach ($this->autoscale as $group => $daemons) { $daemon = $this->daemons[head_key($daemons)]['handle']; $scaleup_duration = $this->getAutoscaleProperty($daemon, 'up', 2); $max_pool_size = $this->getAutoscaleProperty($daemon, 'pool', 8); $reserve = $this->getAutoscaleProperty($daemon, 'reserve', 0); // Don't scale a group if it is already at the maximum pool size. if (count($daemons) >= $max_pool_size) { continue; } $should_scale = true; foreach ($daemons as $daemon_id => $ignored) { $busy = idx($this->daemons[$daemon_id], 'busy'); if (!$busy) { // At least one daemon in the group hasn't reported that it has // started work. $should_scale = false; break; } if ((time() - $busy) < $scaleup_duration) { // At least one daemon in the group was idle recently, so we have // not fullly $should_scale = false; break; } } // If we have a configured memory reserve for this pool, it tells us that // we should not scale up unless there's at least that much memory left // on the system (for example, a reserve of 0.25 means that 25% of system // memory must be free to autoscale). if ($should_scale && $reserve) { // On some systems this may be slightly more expensive than other // checks, so only do it once we're prepared to scale up. $memory = PhutilSystem::getSystemMemoryInformation(); $free_ratio = ($memory['free'] / $memory['total']); // If we don't have enough free memory, don't scale. if ($free_ratio <= $reserve) { continue; } } if ($should_scale) { $config = $this->daemons[$daemon_id]['config']; $config['autoscale']['clone'] = true; $clone = new PhutilDaemonHandle( $this, $config['class'], $this->argv, array( 'log' => $this->log, 'argv' => $config['argv'], 'load' => $this->libraries, 'autoscale' => $config['autoscale'], )); $this->addDaemon($clone, $config); // Don't scale more than one pool up per iteration. Otherwise, we could // break the memory barrier if we have a lot of pools and scale them // all up at once. return; } } } public function didReceiveNotifySignal($signo) { foreach ($this->getDaemonHandles() as $daemon) { $daemon->didReceiveNotifySignal($signo); } } public function didReceiveReloadSignal($signo) { foreach ($this->getDaemonHandles() as $daemon) { $daemon->didReceiveReloadSignal($signo); } } public function didReceiveGracefulSignal($signo) { // If we receive SIGINT more than once, interpret it like SIGTERM. if ($this->inGracefulShutdown) { return $this->didReceiveTerminalSignal($signo); } $this->inGracefulShutdown = true; foreach ($this->getDaemonHandles() as $daemon) { $daemon->didReceiveGracefulSignal($signo); } } public function didReceiveTerminalSignal($signo) { $this->err = 128 + $signo; if ($this->inAbruptShutdown) { exit($this->err); } $this->inAbruptShutdown = true; foreach ($this->getDaemonHandles() as $daemon) { $daemon->didReceiveTerminalSignal($signo); } } private function getDaemonHandles() { return ipull($this->daemons, 'handle'); } /** * Identify running daemons by examining the process table. This isn't * completely reliable, but can be used as a fallback if the pid files fail * or we end up with stray daemons by other means. * * Example output (array keys are process IDs): * * array( * 12345 => array( * 'type' => 'overseer', * 'command' => 'php launch_daemon.php --daemonize ...', * 'pid' => 12345, * ), * 12346 => array( * 'type' => 'daemon', * 'command' => 'php exec_daemon.php ...', * 'pid' => 12346, * ), * ); * * @return dict Map of PIDs to process information, identifying running * daemon processes. */ public static function findRunningDaemons() { $results = array(); list($err, $processes) = exec_manual('ps -o pid,command -a -x -w -w -w'); if ($err) { return $results; } $processes = array_filter(explode("\n", trim($processes))); foreach ($processes as $process) { list($pid, $command) = preg_split('/\s+/', trim($process), 2); $pattern = '/((launch|exec)_daemon.php|phd-daemon)/'; $matches = null; if (!preg_match($pattern, $command, $matches)) { continue; } switch ($matches[1]) { case 'exec_daemon.php': $type = 'daemon'; break; case 'launch_daemon.php': case 'phd-daemon': default: $type = 'overseer'; break; } $results[(int)$pid] = array( 'type' => $type, 'command' => $command, 'pid' => (int)$pid, ); } return $results; } private function updatePidfile() { if (!$this->piddir) { return; } $daemons = array(); foreach ($this->daemons as $daemon) { $handle = $daemon['handle']; $config = $daemon['config']; if (!$handle->isRunning()) { continue; } $daemons[] = array( 'pid' => $handle->getPID(), 'id' => $handle->getDaemonID(), 'config' => $config, ); } $pidfile = array( 'pid' => getmypid(), 'start' => $this->startEpoch, 'config' => $this->config, 'daemons' => $daemons, ); if ($pidfile !== $this->lastPidfile) { $this->lastPidfile = $pidfile; $pidfile_path = $this->piddir.'/daemon.'.getmypid(); Filesystem::writeFile($pidfile_path, json_encode($pidfile)); } } + public function logMessage($type, $message, $context = null) { + if ($this->traceMode || $this->verbose) { + error_log(date('Y-m-d g:i:s A').' ['.$type.'] '.$message); + } + } + }