diff --git a/scripts/test/prompt.php b/scripts/test/prompt.php index b58757e..7c08b9a 100755 --- a/scripts/test/prompt.php +++ b/scripts/test/prompt.php @@ -1,35 +1,35 @@ #!/usr/bin/env php setTagline('test console prompting'); $args->setSynopsis(<<parseStandardArguments(); $args->parse( array( array( 'name' => 'history', 'param' => 'file', 'default' => '', 'help' => 'Use specified history __file__.', ), array( 'name' => 'prompt', 'param' => 'text', 'default' => 'Enter some text:', - 'help' => 'Change the prompt text to __text__.' + 'help' => 'Change the prompt text to __text__.', ), )); $result = phutil_console_prompt( $args->getArg('prompt'), $args->getArg('history')); $console = PhutilConsole::getConsole(); $console->writeOut("Input is: %s\n", $result); diff --git a/src/cache/__tests__/PhutilKeyValueCacheTestCase.php b/src/cache/__tests__/PhutilKeyValueCacheTestCase.php index 0de1ad8..db74c73 100644 --- a/src/cache/__tests__/PhutilKeyValueCacheTestCase.php +++ b/src/cache/__tests__/PhutilKeyValueCacheTestCase.php @@ -1,266 +1,267 @@ doCacheTest($cache); $cache->destroyCache(); } public function testInRequestCacheLimit() { $cache = new PhutilInRequestKeyValueCache(); $cache->setLimit(4); $cache->setKey(1, 1); $cache->setKey(2, 2); $cache->setKey(3, 3); $cache->setKey(4, 4); $this->assertEqual( array( 1 => 1, 2 => 2, 3 => 3, 4 => 4, ), $cache->getAllKeys()); $cache->setKey(5, 5); $this->assertEqual( array( 2 => 2, 3 => 3, 4 => 4, 5 => 5, ), $cache->getAllKeys()); } public function testOnDiskCache() { $cache = new PhutilOnDiskKeyValueCache(); $cache->setCacheFile(new TempFile()); $this->doCacheTest($cache); $cache->destroyCache(); } public function testAPCCache() { $cache = new PhutilAPCKeyValueCache(); if (!$cache->isAvailable()) { $this->assertSkipped('Cache not available.'); } $this->doCacheTest($cache); } public function testDirectoryCache() { $cache = new PhutilDirectoryKeyValueCache(); $dir = Filesystem::createTemporaryDirectory(); $cache->setCacheDirectory($dir); $this->doCacheTest($cache); $cache->destroyCache(); } public function testDirectoryCacheSpecialDirectoryRules() { $cache = new PhutilDirectoryKeyValueCache(); $dir = Filesystem::createTemporaryDirectory(); $dir = $dir.'/dircache/'; $cache->setCacheDirectory($dir); $cache->setKey('a', 1); $this->assertEqual(true, Filesystem::pathExists($dir.'/a.cache')); $cache->setKey('a/b', 1); $this->assertEqual(true, Filesystem::pathExists($dir.'/a/')); $this->assertEqual(true, Filesystem::pathExists($dir.'/a/b.cache')); $cache->deleteKey('a/b'); $this->assertEqual(false, Filesystem::pathExists($dir.'/a/')); $this->assertEqual(false, Filesystem::pathExists($dir.'/a/b.cache')); $cache->destroyCache(); $this->assertEqual(false, Filesystem::pathExists($dir)); } public function testNamespaceCache() { $namespace = 'namespace'.mt_rand(); $in_request_cache = new PhutilInRequestKeyValueCache(); $cache = new PhutilKeyValueCacheNamespace($in_request_cache); $cache->setNamespace($namespace); $test_info = get_class($cache); $keys = array( 'key1' => mt_rand(), 'key2' => '', - 'key3' => 'Phabricator'); + 'key3' => 'Phabricator', + ); $cache->setKeys($keys); $cached_keys = $in_request_cache->getAllKeys(); foreach ($keys as $key => $value) { $cached_key = $namespace.':'.$key; $this->assertTrue( isset($cached_keys[$cached_key]), $test_info); $this->assertEqual( $value, $cached_keys[$cached_key], $test_info); } $cache->destroyCache(); $this->doCacheTest($cache); $cache->destroyCache(); } public function testCacheStack() { $req_cache = new PhutilInRequestKeyValueCache(); $disk_cache = new PhutilOnDiskKeyValueCache(); $disk_cache->setCacheFile(new TempFile()); $apc_cache = new PhutilAPCKeyValueCache(); $stack = array( $req_cache, $disk_cache, ); if ($apc_cache->isAvailable()) { $stack[] = $apc_cache; } $cache = new PhutilKeyValueCacheStack(); $cache->setCaches($stack); $this->doCacheTest($cache); $disk_cache->destroyCache(); $req_cache->destroyCache(); } private function doCacheTest(PhutilKeyValueCache $cache) { $key1 = 'test:'.mt_rand(); $key2 = 'test:'.mt_rand(); $default = 'cache-miss'; $value1 = 'cache-hit1'; $value2 = 'cache-hit2'; $test_info = get_class($cache); // Test that we miss correctly on missing values. $this->assertEqual( $default, $cache->getKey($key1, $default), $test_info); $this->assertEqual( array( ), $cache->getKeys(array($key1, $key2)), $test_info); // Test that we can set individual keys. $cache->setKey($key1, $value1); $this->assertEqual( $value1, $cache->getKey($key1, $default), $test_info); $this->assertEqual( array( $key1 => $value1, ), $cache->getKeys(array($key1, $key2)), $test_info); // Test that we can delete individual keys. $cache->deleteKey($key1); $this->assertEqual( $default, $cache->getKey($key1, $default), $test_info); $this->assertEqual( array( ), $cache->getKeys(array($key1, $key2)), $test_info); // Test that we can set multiple keys. $cache->setKeys( array( $key1 => $value1, $key2 => $value2, )); $this->assertEqual( $value1, $cache->getKey($key1, $default), $test_info); $this->assertEqual( array( $key1 => $value1, $key2 => $value2, ), $cache->getKeys(array($key1, $key2)), $test_info); // Test that we can delete multiple keys. $cache->deleteKeys(array($key1, $key2)); $this->assertEqual( $default, $cache->getKey($key1, $default), $test_info); $this->assertEqual( array( ), $cache->getKeys(array($key1, $key2)), $test_info); // NOTE: The TTL tests are necessarily slow (we must sleep() through the // TTLs) and do not work with APC (it does not TTL until the next request) // so they're disabled by default. If you're developing the cache stack, // it may be useful to run them. return; // Test that keys expire when they TTL. $cache->setKey($key1, $value1, 1); $cache->setKey($key2, $value2, 5); $this->assertEqual($value1, $cache->getKey($key1, $default)); $this->assertEqual($value2, $cache->getKey($key2, $default)); sleep(2); $this->assertEqual($default, $cache->getKey($key1, $default)); $this->assertEqual($value2, $cache->getKey($key2, $default)); // Test that setting a 0 TTL overwrites a nonzero TTL. $cache->setKey($key1, $value1, 1); $this->assertEqual($value1, $cache->getKey($key1, $default)); $cache->setKey($key1, $value1, 0); $this->assertEqual($value1, $cache->getKey($key1, $default)); sleep(2); $this->assertEqual($value1, $cache->getKey($key1, $default)); } } diff --git a/src/daemon/PhutilDaemonOverseer.php b/src/daemon/PhutilDaemonOverseer.php index 07556a0..4d5d746 100644 --- a/src/daemon/PhutilDaemonOverseer.php +++ b/src/daemon/PhutilDaemonOverseer.php @@ -1,507 +1,508 @@ enableDiscardMode(); $original_argv = $argv; $args = new PhutilArgumentParser($argv); $args->setTagline('daemon overseer'); $args->setSynopsis(<<parseStandardArguments(); $args->parsePartial( array( array( 'name' => 'trace-memory', 'help' => 'Enable debug memory tracing.', ), array( 'name' => 'log', 'param' => 'file', 'help' => 'Send output to __file__.', ), array( 'name' => 'daemonize', 'help' => 'Run in the background.', ), array( 'name' => 'phd', 'param' => 'dir', 'help' => 'Write PID information to __dir__.', ), array( 'name' => 'verbose', 'help' => 'Enable verbose activity logging.', ), array( 'name' => 'load-phutil-library', 'param' => 'library', 'repeat' => true, 'help' => 'Load __library__.', ), )); $argv = array(); $more = $args->getUnconsumedArgumentVector(); $this->daemon = array_shift($more); if (!$this->daemon) { $args->printHelpAndExit(); } if ($args->getArg('trace')) { $this->traceMode = true; $argv[] = '--trace'; } if ($args->getArg('trace-memory')) { $this->traceMode = true; $this->traceMemory = true; $argv[] = '--trace-memory'; } if ($args->getArg('load-phutil-library')) { foreach ($args->getArg('load-phutil-library') as $library) { $argv[] = '--load-phutil-library='.$library; } } $log = $args->getArg('log'); if ($log) { ini_set('error_log', $log); $argv[] = '--log='.$log; } $verbose = $args->getArg('verbose'); if ($verbose) { $this->verbose = true; $argv[] = '--verbose'; } $this->daemonize = $args->getArg('daemonize'); $this->phddir = $args->getArg('phd'); $this->argv = $argv; $this->moreArgs = coalesce($more, array()); error_log("Bringing daemon '{$this->daemon}' online..."); if (self::$instance) { throw new Exception( 'You may not instantiate more than one Overseer per process.'); } self::$instance = $this; if ($this->daemonize) { // We need to get rid of these or the daemon will hang when we TERM it // waiting for something to read the buffers. TODO: Learn how unix works. fclose(STDOUT); fclose(STDERR); ob_start(); $pid = pcntl_fork(); if ($pid === -1) { throw new Exception('Unable to fork!'); } else if ($pid) { exit(0); } } if ($this->phddir) { $desc = array( 'name' => $this->daemon, 'argv' => $this->moreArgs, 'pid' => getmypid(), 'start' => time(), ); Filesystem::writeFile( $this->phddir.'/daemon.'.getmypid(), json_encode($desc)); } $this->daemonID = $this->generateDaemonID(); $this->dispatchEvent( self::EVENT_DID_LAUNCH, array( 'argv' => array_slice($original_argv, 1), - 'explicitArgv' => $this->moreArgs)); + 'explicitArgv' => $this->moreArgs, + )); declare(ticks = 1); pcntl_signal(SIGUSR1, array($this, 'didReceiveKeepaliveSignal')); pcntl_signal(SIGUSR2, array($this, 'didReceiveNotifySignal')); pcntl_signal(SIGINT, array($this, 'didReceiveGracefulSignal')); pcntl_signal(SIGTERM, array($this, 'didReceiveTerminalSignal')); } public function run() { if ($this->shouldRunSilently()) { echo "Running daemon '{$this->daemon}' silently. Use '--trace' or ". "'--verbose' to produce debugging output.\n"; } $root = phutil_get_library_root('phutil'); $root = dirname($root); $exec_dir = $root.'/scripts/daemon/exec/'; // NOTE: PHP implements proc_open() by running 'sh -c'. On most systems this // is bash, but on Ubuntu it's dash. When you proc_open() using bash, you // get one new process (the command you ran). When you proc_open() using // dash, you get two new processes: the command you ran and a parent // "dash -c" (or "sh -c") process. This means that the child process's PID // is actually the 'dash' PID, not the command's PID. To avoid this, use // 'exec' to replace the shell process with the real process; without this, // the child will call posix_getppid(), be given the pid of the 'sh -c' // process, and send it SIGUSR1 to keepalive which will terminate it // immediately. We also won't be able to do process group management because // the shell process won't properly posix_setsid() so the pgid of the child // won't be meaningful. // Format the exec command, which looks something like: // // exec ./exec_daemon DaemonName --trace -- --no-discovery $argv = array(); $argv[] = csprintf('exec ./exec_daemon.php %s', $this->daemon); foreach ($this->argv as $k => $arg) { $argv[] = csprintf('%s', $arg); } $argv[] = '--'; foreach ($this->moreArgs as $k => $arg) { $argv[] = csprintf('%s', $arg); } $command = implode(' ', $argv); while (true) { $this->logMessage('INIT', 'Starting process.'); $future = new ExecFuture('%C', $command); $future->setCWD($exec_dir); $future->setStdoutSizeLimit($this->captureBufferSize); $future->setStderrSizeLimit($this->captureBufferSize); $this->deadline = time() + $this->deadlineTimeout; $this->heartbeat = time() + self::HEARTBEAT_WAIT; $future->isReady(); $this->childPID = $future->getPID(); do { do { if ($this->traceMemory) { $memuse = number_format(memory_get_usage() / 1024, 1); $this->logMessage('RAMS', 'Overseer Memory Usage: '.$memuse.' KB'); } // We need a shortish timeout here so we can run the tick handler // frequently in order to process signals. $result = $future->resolve(1); list($stdout, $stderr) = $future->read(); $stdout = trim($stdout); $stderr = trim($stderr); if (strlen($stdout)) { $this->logMessage('STDO', $stdout); } if (strlen($stderr)) { $this->logMessage('STDE', $stderr); } $future->discardBuffers(); if ($result !== null) { list($err) = $result; if ($err) { $this->logMessage( 'FAIL', 'Process exited with error '.$err.'.', $err); } else { $this->logMessage('DONE', 'Process exited successfully.'); } break 2; } if ($this->heartbeat < time()) { $this->heartbeat = time() + self::HEARTBEAT_WAIT; $this->dispatchEvent(self::EVENT_DID_HEARTBEAT); } } while (time() < $this->deadline); $this->logMessage('HANG', 'Hang detected. Restarting process.'); $this->annihilateProcessGroup(); } while (false); if ($this->inGracefulShutdown) { // If we just exited because of a graceful shutdown, break now. break; } $this->logMessage('WAIT', 'Waiting to restart process.'); sleep(self::RESTART_WAIT); if ($this->inGracefulShutdown) { // If we were awakend by a graceful shutdown, break now. break; } } // This is a clean exit after a graceful shutdown. $this->dispatchEvent(self::EVENT_WILL_EXIT); exit(0); } public function didReceiveNotifySignal($signo) { $pid = $this->childPID; if ($pid) { posix_kill($pid, $signo); } } public function didReceiveKeepaliveSignal($signo) { $this->deadline = time() + $this->deadlineTimeout; } public function didReceiveGracefulSignal($signo) { // If we receive SIGINT more than once, interpret it like SIGTERM. if ($this->inGracefulShutdown) { return $this->didReceiveTerminalSignal($signo); } $this->inGracefulShutdown = true; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = pht( 'Graceful shutdown in response to signal %d (%s).', $signo, $signame); } else { $sigmsg = pht( 'Graceful shutdown in response to signal %d.', $signo); } $this->logMessage('DONE', $sigmsg, $signo); $this->gracefulProcessGroup(); } public function didReceiveTerminalSignal($signo) { if ($this->inAbruptShutdown) { exit(128 + $signo); } $this->inAbruptShutdown = true; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = "Shutting down in response to signal {$signo} ({$signame})."; } else { $sigmsg = "Shutting down in response to signal {$signo}."; } $this->logMessage('EXIT', $sigmsg, $signo); @fflush(STDOUT); @fflush(STDERR); @fclose(STDOUT); @fclose(STDERR); $this->annihilateProcessGroup(); $this->dispatchEvent(self::EVENT_WILL_EXIT); exit(128 + $signo); } private function logMessage($type, $message, $context = null) { if (!$this->shouldRunSilently()) { echo date('Y-m-d g:i:s A').' ['.$type.'] '.$message."\n"; } $this->dispatchEvent( self::EVENT_DID_LOG, array( 'type' => $type, 'message' => $message, 'context' => $context, )); } private function shouldRunSilently() { if ($this->traceMode || $this->verbose) { return false; } else { return true; } } private function annihilateProcessGroup() { $pid = $this->childPID; $pgid = posix_getpgid($pid); if ($pid && $pgid) { // NOTE: On Ubuntu, 'kill' does not recognize the use of "--" to // explicitly delineate PID/PGIDs from signals. We don't actually need it, // so use the implicit "kill -TERM -pgid" form instead of the explicit // "kill -TERM -- -pgid" form. exec("kill -TERM -{$pgid}"); sleep($this->killDelay); // On OSX, we'll get a permission error on stderr if the SIGTERM was // successful in ending the life of the process group, presumably because // all that's left is the daemon itself as a zombie waiting for us to // reap it. However, we still need to issue this command for process // groups that resist SIGTERM. Rather than trying to figure out if the // process group is still around or not, just SIGKILL unconditionally and // ignore any error which may be raised. exec("kill -KILL -{$pgid} 2>/dev/null"); $this->childPID = null; } } private function gracefulProcessGroup() { $pid = $this->childPID; $pgid = posix_getpgid($pid); if ($pid && $pgid) { exec("kill -INT -{$pgid}"); } } /** * Identify running daemons by examining the process table. This isn't * completely reliable, but can be used as a fallback if the pid files fail * or we end up with stray daemons by other means. * * Example output (array keys are process IDs): * * array( * 12345 => array( * 'type' => 'overseer', * 'command' => 'php launch_daemon.php --daemonize ...', * 'pid' => 12345, * ), * 12346 => array( * 'type' => 'daemon', * 'command' => 'php exec_daemon.php ...', * 'pid' => 12346, * ), * ); * * @return dict Map of PIDs to process information, identifying running * daemon processes. */ public static function findRunningDaemons() { $results = array(); list($err, $processes) = exec_manual('ps -o pid,command -a -x -w -w -w'); if ($err) { return $results; } $processes = array_filter(explode("\n", trim($processes))); foreach ($processes as $process) { list($pid, $command) = explode(' ', $process, 2); $matches = null; if (!preg_match('/((launch|exec)_daemon.php|phd-daemon)/', $command, $matches)) { continue; } switch ($matches[1]) { case 'exec_daemon.php': $type = 'daemon'; break; case 'launch_daemon.php': case 'phd-daemon': default: $type = 'overseer'; break; } $results[(int)$pid] = array( 'type' => $type, 'command' => $command, 'pid' => (int) $pid, ); } return $results; } /** * Generate a unique ID for this daemon. * * @return string A unique daemon ID. */ private function generateDaemonID() { return substr(getmypid().':'.Filesystem::readRandomCharacters(12), 0, 12); } /** * Dispatch an event to event listeners. * * @param string Event type. * @param dict Event parameters. * @return void */ private function dispatchEvent($type, array $params = array()) { $data = array( 'id' => $this->daemonID, 'daemonClass' => $this->daemon, 'childPID' => $this->childPID, ) + $params; $event = new PhutilEvent($type, $data); try { PhutilEventEngine::dispatchEvent($event); } catch (Exception $ex) { phlog($ex); } } } diff --git a/src/future/exec/ExecFuture.php b/src/future/exec/ExecFuture.php index 886fe5c..ddb5e6f 100644 --- a/src/future/exec/ExecFuture.php +++ b/src/future/exec/ExecFuture.php @@ -1,873 +1,874 @@ array('pipe', 'r'), // stdin 1 => array('pipe', 'w'), // stdout 2 => array('pipe', 'w'), // stderr ); /* -( Creating ExecFutures )----------------------------------------------- */ /** * Create a new ExecFuture. * * $future = new ExecFuture('wc -l %s', $file_path); * * @param string `sprintf()`-style command string which will be passed * through @{function:csprintf} with the rest of the arguments. * @param ... Zero or more additional arguments for @{function:csprintf}. * @return ExecFuture ExecFuture for running the specified command. * @task create */ public function __construct($command) { $argv = func_get_args(); $this->command = call_user_func_array('csprintf', $argv); $this->stdin = new PhutilRope(); } /* -( Command Information )------------------------------------------------ */ /** * Retrieve the raw command to be executed. * * @return string Raw command. * @task info */ public function getCommand() { return $this->command; } /** * Retrieve the byte limit for the stderr buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStderrSizeLimit() { return $this->stderrSizeLimit; } /** * Retrieve the byte limit for the stdout buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStdoutSizeLimit() { return $this->stdoutSizeLimit; } /** * Get the process's pid. This only works after execution is initiated, e.g. * by a call to start(). * * @return int Process ID of the executing process. * @task info */ public function getPID() { $status = $this->procGetStatus(); return $status['pid']; } /* -( Configuring Execution )---------------------------------------------- */ /** * Set a maximum size for the stdout read buffer. To limit stderr, see * @{method:setStderrSizeLimit}. The major use of these methods is to use less * memory if you are running a command which sometimes produces huge volumes * of output that you don't really care about. * * NOTE: Setting this to 0 means "no buffer", not "unlimited buffer". * * @param int Maximum size of the stdout read buffer. * @return this * @task config */ public function setStdoutSizeLimit($limit) { $this->stdoutSizeLimit = $limit; return $this; } /** * Set a maximum size for the stderr read buffer. * See @{method:setStdoutSizeLimit} for discussion. * * @param int Maximum size of the stderr read buffer. * @return this * @task config */ public function setStderrSizeLimit($limit) { $this->stderrSizeLimit = $limit; return $this; } /** * Set the maximum internal read buffer size this future. The future will * block reads once the internal stdout or stderr buffer exceeds this size. * * NOTE: If you @{method:resolve} a future with a read buffer limit, you may * block forever! * * TODO: We should probably release the read buffer limit during * @{method:resolve}, or otherwise detect this. For now, be careful. * * @param int|null Maximum buffer size, or `null` for unlimited. * @return this */ public function setReadBufferSize($read_buffer_size) { $this->readBufferSize = $read_buffer_size; return $this; } /** * Set the current working directory to use when executing the command. * * @param string Directory to set as CWD before executing the command. * @return this * @task config */ public function setCWD($cwd) { $this->cwd = $cwd; return $this; } /** * Set the environment variables to use when executing the command. * * @param array Environment variables to use when executing the command. * @return this * @task config */ public function setEnv($env, $wipe_process_env = false) { if ($wipe_process_env) { $this->env = $env; } else { $this->env = $env + $_ENV; } return $this; } /** * Set the value of a specific environmental variable for this command. * * @param string Environmental variable name. * @param string|null New value, or null to remove this variable. * @return this * @task config */ public function updateEnv($key, $value) { if (!is_array($this->env)) { $this->env = $_ENV; } if ($value === null) { unset($this->env[$key]); } else { $this->env[$key] = $value; } return $this; } /* -( Interacting With Commands )------------------------------------------ */ /** * Read and return output from stdout and stderr, if any is available. This * method keeps a read cursor on each stream, but the entire streams are * still returned when the future resolves. You can call read() again after * resolving the future to retrieve only the parts of the streams you did not * previously read: * * $future = new ExecFuture('...'); * // ... * list($stdout) = $future->read(); // Returns output so far * list($stdout) = $future->read(); // Returns new output since first call * // ... * list($stdout) = $future->resolvex(); // Returns ALL output * list($stdout) = $future->read(); // Returns unread output * * NOTE: If you set a limit with @{method:setStdoutSizeLimit} or * @{method:setStderrSizeLimit}, this method will not be able to read data * past the limit. * * NOTE: If you call @{method:discardBuffers}, all the stdout/stderr data * will be thrown away and the cursors will be reset. * * @return pair <$stdout, $stderr> pair with new output since the last call * to this method. * @task interact */ public function read() { $stdout = $this->readStdout(); $result = array( $stdout, (string)substr($this->stderr, $this->stderrPos), ); $this->stderrPos = strlen($this->stderr); return $result; } public function readStdout() { if ($this->start) { $this->isReady(); // Sync } $result = (string)substr($this->stdout, $this->stdoutPos); $this->stdoutPos = strlen($this->stdout); return $result; } /** * Write data to stdin of the command. * * @param string Data to write. * @param bool If true, keep the pipe open for writing. By default, the pipe * will be closed as soon as possible so that commands which * listen for EOF will execute. If you want to keep the pipe open * past the start of command execution, do an empty write with * `$keep_pipe = true` first. * @return this * @task interact */ public function write($data, $keep_pipe = false) { if (strlen($data)) { if (!$this->stdin) { throw new Exception(pht('Writing to a closed pipe!')); } $this->stdin->append($data); } $this->closePipe = !$keep_pipe; return $this; } /** * Permanently discard the stdout and stderr buffers and reset the read * cursors. This is basically useful only if you are streaming a large amount * of data from some process: * * $future = new ExecFuture('zcat huge_file.gz'); * do { * $done = $future->resolve(0.1); // Every 100ms, * list($stdout) = $future->read(); // read output... * echo $stdout; // send it somewhere... * $future->discardBuffers(); // and then free the buffers. * } while ($done === null); * * Conceivably you might also need to do this if you're writing a client using * @{class:ExecFuture} and `netcat`, but you probably should not do that. * * NOTE: This completely discards the data. It won't be available when the * future resolves. This is almost certainly only useful if you need the * buffer memory for some reason. * * @return this * @task interact */ public function discardBuffers() { $this->discardStdoutBuffer(); $this->stderr = ''; $this->stderrPos = 0; return $this; } public function discardStdoutBuffer() { $this->stdout = ''; $this->stdoutPos = 0; return $this; } /** * Returns true if this future was killed by a timeout configured with * @{method:setTimeout}. * * @return bool True if the future was killed for exceeding its time limit. */ public function getWasKilledByTimeout() { return $this->killedByTimeout; } /* -( Configuring Execution )---------------------------------------------- */ /** * Set a hard limit on execution time. If the command runs longer, it will * be killed and the future will resolve with an error code. You can test * if a future was killed by a timeout with @{method:getWasKilledByTimeout}. * * @param int Maximum number of seconds this command may execute for. * @return this * @task config */ public function setTimeout($seconds) { $this->timeout = $seconds; return $this; } /* -( Resolving Execution )------------------------------------------------ */ /** * Resolve a command you expect to exit with return code 0. Works like * @{method:resolve}, but throws if $err is nonempty. Returns only * $stdout and $stderr. See also @{function:execx}. * * list($stdout, $stderr) = $future->resolvex(); * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return pair <$stdout, $stderr> pair. * @task resolve */ public function resolvex($timeout = null) { list($err, $stdout, $stderr) = $this->resolve($timeout); if ($err) { $cmd = $this->command; throw new CommandException( "Command failed with error #{$err}!", $cmd, $err, $stdout, $stderr); } return array($stdout, $stderr); } /** * Resolve a command you expect to return valid JSON. Works like * @{method:resolvex}, but also throws if stderr is nonempty, or stdout is not * valid JSON. Returns a PHP array, decoded from the JSON command output. * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return array PHP array, decoded from JSON command output. * @task resolve */ public function resolveJSON($timeout = null) { list($stdout, $stderr) = $this->resolvex($timeout); if (strlen($stderr)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' emitted text to stderr when none was expected: ". $stderr, $cmd, 0, $stdout, $stderr); } $object = json_decode($stdout, true); if (!is_array($object)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' did not produce a valid JSON object on stdout: ". $stdout, $cmd, 0, $stdout, $stderr); } return $object; } /** * Resolve the process by abruptly terminating it. * * @return list List of results. * @task resolve */ public function resolveKill() { if (!$this->result) { if (defined('SIGKILL')) { $signal = SIGKILL; } else { $signal = 9; } proc_terminate($this->proc, $signal); $this->result = array( 128 + $signal, $this->stdout, - $this->stderr); + $this->stderr, + ); $this->closeProcess(); } return $this->result; } /* -( Internals )---------------------------------------------------------- */ /** * Provides read sockets to the future core. * * @return list List of read sockets. * @task internal */ public function getReadSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdout) && !feof($stdout)) { $sockets[] = $stdout; } if (isset($stderr) && !feof($stderr)) { $sockets[] = $stderr; } return $sockets; } /** * Provides write sockets to the future core. * * @return list List of write sockets. * @task internal */ public function getWriteSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdin) && $this->stdin->getByteLength() && !feof($stdin)) { $sockets[] = $stdin; } return $sockets; } /** * Determine if the read buffer is empty. * * @return bool True if the read buffer is empty. * @task internal */ public function isReadBufferEmpty() { return !strlen($this->stdout); } /** * Determine if the write buffer is empty. * * @return bool True if the write buffer is empty. * @task internal */ public function isWriteBufferEmpty() { return !$this->getWriteBufferSize(); } /** * Determine the number of bytes in the write buffer. * * @return int Number of bytes in the write buffer. * @task internal */ public function getWriteBufferSize() { if (!$this->stdin) { return 0; } return $this->stdin->getByteLength(); } /** * Reads some bytes from a stream, discarding output once a certain amount * has been accumulated. * * @param resource Stream to read from. * @param int Maximum number of bytes to return from $stream. If * additional bytes are available, they will be read and * discarded. * @param string Human-readable description of stream, for exception * message. * @param int Maximum number of bytes to read. * @return string The data read from the stream. * @task internal */ private function readAndDiscard($stream, $limit, $description, $length) { $output = ''; if ($length <= 0) { return ''; } do { $data = fread($stream, min($length, 64 * 1024)); if (false === $data) { throw new Exception('Failed to read from '.$description); } $read_bytes = strlen($data); if ($read_bytes > 0 && $limit > 0) { if ($read_bytes > $limit) { $data = substr($data, 0, $limit); } $output .= $data; $limit -= strlen($data); } if (strlen($output) >= $length) { break; } } while ($read_bytes > 0); return $output; } /** * Begin or continue command execution. * * @return bool True if future has resolved. * @task internal */ public function isReady() { // NOTE: We have soft dependencies on PhutilServiceProfiler and // PhutilErrorTrap here. These dependencies are soft to avoid the need to // build them into the Phage agent. Under normal circumstances, these // classes are always available. if (!$this->pipes) { // NOTE: See note above about Phage. if (class_exists('PhutilServiceProfiler')) { $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'exec', 'command' => (string)$this->command, )); } if (!$this->start) { // We might already have started the timer via initiating resolution. $this->start = microtime(true); } $unmasked_command = $this->command; if ($unmasked_command instanceof PhutilCommandString) { $unmasked_command = $unmasked_command->getUnmaskedString(); } $pipes = array(); if (phutil_is_windows()) { // See T4395. proc_open under Windows uses "cmd /C [cmd]", which will // strip the first and last quote when there aren't exactly two quotes // (and some other conditions as well). This results in a command that // looks like `command" "path to my file" "something something` which is // clearly wrong. By surrounding the command string with quotes we can // be sure this process is harmless. if (strpos($unmasked_command, '"') !== false) { $unmasked_command = '"'.$unmasked_command.'"'; } } // NOTE: See note above about Phage. if (class_exists('PhutilErrorTrap')) { $trap = new PhutilErrorTrap(); } else { $trap = null; } $proc = @proc_open( $unmasked_command, self::$descriptorSpec, $pipes, $this->cwd, $this->env); if ($trap) { $err = $trap->getErrorsAsString(); $trap->destroy(); } else { $err = error_get_last(); } if (!is_resource($proc)) { throw new Exception("Failed to proc_open(): {$err}"); } $this->pipes = $pipes; $this->proc = $proc; list($stdin, $stdout, $stderr) = $pipes; if (!phutil_is_windows()) { // On Windows, there's no such thing as nonblocking interprocess I/O. // Just leave the sockets blocking and hope for the best. Some features // will not work. if ((!stream_set_blocking($stdout, false)) || (!stream_set_blocking($stderr, false)) || (!stream_set_blocking($stdin, false))) { $this->__destruct(); throw new Exception('Failed to set streams nonblocking.'); } } $this->tryToCloseStdin(); return false; } if (!$this->proc) { return true; } list($stdin, $stdout, $stderr) = $this->pipes; while (isset($this->stdin) && $this->stdin->getByteLength()) { $write_segment = $this->stdin->getAnyPrefix(); $bytes = fwrite($stdin, $write_segment); if ($bytes === false) { throw new Exception('Unable to write to stdin!'); } else if ($bytes) { $this->stdin->removeBytesFromHead($bytes); } else { // Writes are blocked for now. break; } } $this->tryToCloseStdin(); // Read status before reading pipes so that we can never miss data that // arrives between our last read and the process exiting. $status = $this->procGetStatus(); $read_buffer_size = $this->readBufferSize; $max_stdout_read_bytes = PHP_INT_MAX; $max_stderr_read_bytes = PHP_INT_MAX; if ($read_buffer_size !== null) { $max_stdout_read_bytes = $read_buffer_size - strlen($this->stdout); $max_stderr_read_bytes = $read_buffer_size - strlen($this->stderr); } if ($max_stdout_read_bytes > 0) { $this->stdout .= $this->readAndDiscard( $stdout, $this->getStdoutSizeLimit() - strlen($this->stdout), 'stdout', $max_stdout_read_bytes); } if ($max_stderr_read_bytes > 0) { $this->stderr .= $this->readAndDiscard( $stderr, $this->getStderrSizeLimit() - strlen($this->stderr), 'stderr', $max_stderr_read_bytes); } if (!$status['running']) { $this->result = array( $status['exitcode'], $this->stdout, $this->stderr, ); $this->closeProcess(); return true; } $elapsed = (microtime(true) - $this->start); if ($this->timeout && ($elapsed >= $this->timeout)) { $this->killedByTimeout = true; $this->resolveKill(); return true; } } /** * @return void * @task internal */ public function __destruct() { if (!$this->proc) { return; } // NOTE: If we try to proc_close() an open process, we hang indefinitely. To // avoid this, kill the process explicitly if it's still running. $status = $this->procGetStatus(); if ($status['running']) { $this->resolveKill(); } else { $this->closeProcess(); } } /** * Close and free resources if necessary. * * @return void * @task internal */ private function closeProcess() { foreach ($this->pipes as $pipe) { if (isset($pipe)) { @fclose($pipe); } } $this->pipes = array(null, null, null); if ($this->proc) { @proc_close($this->proc); $this->proc = null; } $this->stdin = null; if ($this->profilerCallID !== null) { $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall( $this->profilerCallID, array( 'err' => $this->result ? idx($this->result, 0) : null, )); $this->profilerCallID = null; } } /** * Execute `proc_get_status()`, but avoid pitfalls. * * @return dict Process status. * @task internal */ private function procGetStatus() { // After the process exits, we only get one chance to read proc_get_status() // before it starts returning garbage. Make sure we don't throw away the // last good read. if ($this->procStatus) { if (!$this->procStatus['running']) { return $this->procStatus; } } $this->procStatus = proc_get_status($this->proc); return $this->procStatus; } /** * Try to close stdin, if we're done using it. This keeps us from hanging if * the process on the other end of the pipe is waiting for EOF. * * @return void * @task internal */ private function tryToCloseStdin() { if (!$this->closePipe) { // We've been told to keep the pipe open by a call to write(..., true). return; } if ($this->stdin->getByteLength()) { // We still have bytes to write. return; } list($stdin) = $this->pipes; if (!$stdin) { // We've already closed stdin. return; } // There's nothing stopping us from closing stdin, so close it. @fclose($stdin); $this->pipes[0] = null; } public function getDefaultWait() { $wait = parent::getDefaultWait(); if ($this->timeout) { if (!$this->start) { $this->start = microtime(true); } $elapsed = (microtime(true) - $this->start); $wait = max(0, min($this->timeout - $elapsed, $wait)); } return $wait; } } diff --git a/src/future/http/HTTPFuture.php b/src/future/http/HTTPFuture.php index 6487595..4a42645 100644 --- a/src/future/http/HTTPFuture.php +++ b/src/future/http/HTTPFuture.php @@ -1,294 +1,299 @@ resolvex(); * * Or * * $future = new HTTPFuture('http://www.example.com/'); * list($http_response_status_object, * $response_body, * $headers) = $future->resolve(); * * Prefer @{method:resolvex} to @{method:resolve} as the former throws * @{class:HTTPFutureHTTPResponseStatus} on failures, which includes an * informative exception message. */ final class HTTPFuture extends BaseHTTPFuture { private $host; private $port = 80; private $fullRequestPath; private $socket; private $writeBuffer; private $response; private $stateConnected = false; private $stateWriteComplete = false; private $stateReady = false; private $stateStartTime; private $profilerCallID; public function setURI($uri) { $parts = parse_url($uri); if (!$parts) { throw new Exception("Could not parse URI '{$uri}'."); } if (empty($parts['scheme']) || $parts['scheme'] !== 'http') { throw new Exception( "URI '{$uri}' must be fully qualified with 'http://' scheme."); } if (!isset($parts['host'])) { throw new Exception( "URI '{$uri}' must be fully qualified and include host name."); } $this->host = $parts['host']; if (!empty($parts['port'])) { $this->port = $parts['port']; } if (isset($parts['user']) || isset($parts['pass'])) { throw new Exception( 'HTTP Basic Auth is not supported by HTTPFuture.'); } if (isset($parts['path'])) { $this->fullRequestPath = $parts['path']; } else { $this->fullRequestPath = '/'; } if (isset($parts['query'])) { $this->fullRequestPath .= '?'.$parts['query']; } return parent::setURI($uri); } public function __destruct() { if ($this->socket) { @fclose($this->socket); $this->socket = null; } } public function getReadSockets() { if ($this->socket) { return array($this->socket); } return array(); } public function getWriteSockets() { if (strlen($this->writeBuffer)) { return array($this->socket); } return array(); } public function isWriteComplete() { return $this->stateWriteComplete; } private function getDefaultUserAgent() { return 'HTTPFuture/1.0'; } public function isReady() { if ($this->stateReady) { return true; } if (!$this->socket) { $this->stateStartTime = microtime(true); $this->socket = $this->buildSocket(); if (!$this->socket) { return $this->stateReady; } $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'http', 'uri' => $this->getURI(), )); } if (!$this->stateConnected) { $read = array(); $write = array($this->socket); $except = array(); $select = stream_select($read, $write, $except, $tv_sec = 0); if ($write) { $this->stateConnected = true; } } if ($this->stateConnected) { if (strlen($this->writeBuffer)) { $bytes = @fwrite($this->socket, $this->writeBuffer); if ($bytes === false) { throw new Exception('Failed to write to buffer.'); } else if ($bytes) { $this->writeBuffer = substr($this->writeBuffer, $bytes); } } if (!strlen($this->writeBuffer)) { $this->stateWriteComplete = true; } while (($data = fread($this->socket, 32768)) || strlen($data)) { $this->response .= $data; } if ($data === false) { throw new Exception('Failed to read socket.'); } } return $this->checkSocket(); } private function buildSocket() { $errno = null; $errstr = null; $socket = @stream_socket_client( 'tcp://'.$this->host.':'.$this->port, $errno, $errstr, $ignored_connection_timeout = 1.0, STREAM_CLIENT_CONNECT | STREAM_CLIENT_ASYNC_CONNECT); if (!$socket) { $this->stateReady = true; $this->result = $this->buildErrorResult( HTTPFutureTransportResponseStatus::ERROR_CONNECTION_FAILED); return null; } $ok = stream_set_blocking($socket, 0); if (!$ok) { throw new Exception('Failed to set stream nonblocking.'); } $this->writeBuffer = $this->buildHTTPRequest(); return $socket; } private function checkSocket() { $timeout = false; $now = microtime(true); if (($now - $this->stateStartTime) > $this->getTimeout()) { $timeout = true; } if (!feof($this->socket) && !$timeout) { return false; } $this->stateReady = true; if ($timeout) { $this->result = $this->buildErrorResult( HTTPFutureTransportResponseStatus::ERROR_TIMEOUT); } else if (!$this->stateConnected) { $this->result = $this->buildErrorResult( HTTPFutureTransportResponseStatus::ERROR_CONNECTION_REFUSED); } else if (!$this->stateWriteComplete) { $this->result = $this->buildErrorResult( HTTPFutureTransportResponseStatus::ERROR_CONNECTION_FAILED); } else { $this->result = $this->parseRawHTTPResponse($this->response); } $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall($this->profilerCallID, array()); return true; } private function buildErrorResult($error) { return array( $status = new HTTPFutureTransportResponseStatus($error, $this->getURI()), $body = null, - $headers = array()); + $headers = array(), + ); } private function buildHTTPRequest() { $data = $this->getData(); $method = $this->getMethod(); $uri = $this->fullRequestPath; $add_headers = array(); if ($this->getMethod() == 'GET') { if (is_array($data)) { $data = http_build_query($data, '', '&'); if (strpos($uri, '?') !== false) { $uri .= '&'.$data; } else { $uri .= '?'.$data; } $data = ''; } } else { if (is_array($data)) { $data = http_build_query($data, '', '&')."\r\n"; $add_headers[] = array( 'Content-Type', - 'application/x-www-form-urlencoded'); + 'application/x-www-form-urlencoded', + ); } } $length = strlen($data); $add_headers[] = array( 'Content-Length', - $length); + $length, + ); if (!$this->getHeaders('User-Agent')) { $add_headers[] = array( 'User-Agent', - $this->getDefaultUserAgent()); + $this->getDefaultUserAgent(), + ); } if (!$this->getHeaders('Host')) { $add_headers[] = array( 'Host', - $this->host); + $this->host, + ); } $headers = array_merge($this->getHeaders(), $add_headers); foreach ($headers as $key => $header) { list($name, $value) = $header; if (strlen($value)) { $value = ': '.$value; } $headers[$key] = $name.$value."\r\n"; } return "{$method} {$uri} HTTP/1.0\r\n". implode('', $headers). "\r\n". $data; } } diff --git a/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php index 65b8179..baf665e 100644 --- a/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php +++ b/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php @@ -1,254 +1,254 @@ getStmtTerminationGrammarSet(), $this->getVarNameGrammarSet(), $this->getNullExprGrammarSet(), $this->getNumberGrammarSet(), $this->getExprGrammarSet(), $this->getCondGrammarSet(), $this->getLoopGrammarSet(), $this->getStmtGrammarSet(), $this->getAssignmentGrammarSet(), $this->getArithExprGrammarSet(), $this->getBoolExprGrammarSet(), $this->getBoolValGrammarSet(), $this->getTernaryExprGrammarSet(), $this->getFuncNameGrammarSet(), $this->getFuncCallGrammarSet(), $this->getFuncCallParamGrammarSet(), $this->getFuncDeclGrammarSet(), $this->getFuncParamGrammarSet(), $this->getFuncBodyGrammarSet(), $this->getFuncReturnGrammarSet(), ); } protected function getStartGrammarSet() { $start_grammar = parent::getStartGrammarSet(); $start_grammar['start'][] = '[funcdecl]'; return $start_grammar; } protected function getStmtTerminationGrammarSet() { return $this->buildGrammarSet('term', array(';')); } protected function getFuncCallGrammarSet() { return $this->buildGrammarSet('funccall', array( '[funcname]([funccallparam])', )); } protected function getFuncCallParamGrammarSet() { return $this->buildGrammarSet('funccallparam', array( '', '[expr]', '[expr], [expr]', )); } protected function getFuncDeclGrammarSet() { return $this->buildGrammarSet('funcdecl', array( 'function [funcname]([funcparam]) '. - '{[funcbody, indent, block, trim=right]}' + '{[funcbody, indent, block, trim=right]}', )); } protected function getFuncParamGrammarSet() { return $this->buildGrammarSet('funcparam', array( '', '[varname]', '[varname], [varname]', '[varname], [varname], [varname]', )); } protected function getFuncBodyGrammarSet() { return $this->buildGrammarSet('funcbody', array( "[stmt]\n[stmt]\n[funcreturn]", "[stmt]\n[stmt]\n[stmt]\n[funcreturn]", "[stmt]\n[stmt]\n[stmt]\n[stmt]\n[funcreturn]", )); } protected function getFuncReturnGrammarSet() { return $this->buildGrammarSet('funcreturn', array( 'return [expr][term]', - '' + '', )); } // Not really C, but put it here because of the curly braces and mostly shared // among Java and PHP protected function getClassDeclGrammarSet() { return $this->buildGrammarSet('classdecl', array( '[classinheritancemod] class [classname] {[classbody, indent, block]}', 'class [classname] {[classbody, indent, block]}', )); } protected function getClassNameGrammarSet() { return $this->buildGrammarSet('classname', array( 'MuffinHouse', 'MuffinReader', 'MuffinAwesomizer', 'SuperException', 'Librarian', 'Book', 'Ball', 'BallOfCode', 'AliceAndBobsSharedSecret', 'FileInputStream', 'FileOutputStream', 'BufferedReader', 'BufferedWriter', 'Cardigan', 'HouseOfCards', 'UmbrellaClass', 'GenericThing', )); } protected function getClassBodyGrammarSet() { return $this->buildGrammarSet('classbody', array( '[methoddecl]', "[methoddecl]\n\n[methoddecl]", "[propdecl]\n[propdecl]\n\n[methoddecl]\n\n[methoddecl]", "[propdecl]\n[propdecl]\n[propdecl]\n\n[methoddecl]\n\n[methoddecl]". "\n\n[methoddecl]", )); } protected function getVisibilityGrammarSet() { return $this->buildGrammarSet('visibility', array( 'private', 'protected', 'public', )); } protected function getClassInheritanceModGrammarSet() { return $this->buildGrammarSet('classinheritancemod', array( 'final', 'abstract', )); } // Keeping this separate so we won't give abstract methods a function body protected function getMethodInheritanceModGrammarSet() { return $this->buildGrammarSet('methodinheritancemod', array( 'final', )); } protected function getMethodDeclGrammarSet() { return $this->buildGrammarSet('methoddecl', array( '[visibility] [methodfuncdecl]', '[visibility] [methodfuncdecl]', '[methodinheritancemod] [visibility] [methodfuncdecl]', '[abstractmethoddecl]', )); } protected function getMethodFuncDeclGrammarSet() { return $this->buildGrammarSet('methodfuncdecl', array( 'function [funcname]([funcparam]) '. '{[methodbody, indent, block, trim=right]}', )); } protected function getMethodBodyGrammarSet() { return $this->buildGrammarSet('methodbody', array( "[methodstmt]\n[methodbody]", "[methodstmt]\n[funcreturn]", )); } protected function getMethodStmtGrammarSet() { $stmts = $this->getStmtGrammarSet(); return $this->buildGrammarSet('methodstmt', array_merge( $stmts['stmt'], array( '[methodcall][term]', ))); } protected function getMethodCallGrammarSet() { // Java/JavaScript return $this->buildGrammarSet('methodcall', array( 'this.[funccall]', '[varname].[funccall]', '[classname].[funccall]', )); } protected function getAbstractMethodDeclGrammarSet() { return $this->buildGrammarSet('abstractmethoddecl', array( 'abstract function [funcname]([funcparam])[term]', )); } protected function getPropDeclGrammarSet() { return $this->buildGrammarSet('propdecl', array( '[visibility] [varname][term]', )); } protected function getClassRuleSets() { return array( $this->getClassInheritanceModGrammarSet(), $this->getMethodInheritanceModGrammarSet(), $this->getClassDeclGrammarSet(), $this->getClassNameGrammarSet(), $this->getClassBodyGrammarSet(), $this->getMethodDeclGrammarSet(), $this->getMethodFuncDeclGrammarSet(), $this->getMethodBodyGrammarSet(), $this->getMethodStmtGrammarSet(), $this->getMethodCallGrammarSet(), $this->getAbstractMethodDeclGrammarSet(), $this->getPropDeclGrammarSet(), $this->getVisibilityGrammarSet(), ); } public function generateClass() { $rules = array_merge($this->getRules(), $this->getClassRuleSets()); $rules['start'] = array('[classdecl]'); $count = 0; return $this->applyRules('[start]', $count, $rules); } } diff --git a/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php index 76a6860..28a6990 100644 --- a/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php +++ b/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php @@ -1,184 +1,184 @@ getClassRuleSets()); $rulesset[] = $this->getTypeNameGrammarSet(); $rulesset[] = $this->getNamespaceDeclGrammarSet(); $rulesset[] = $this->getNamespaceNameGrammarSet(); $rulesset[] = $this->getImportGrammarSet(); $rulesset[] = $this->getMethodReturnTypeGrammarSet(); $rulesset[] = $this->getMethodNameGrammarSet(); $rulesset[] = $this->getVarDeclGrammarSet(); $rulesset[] = $this->getClassDerivGrammarSet(); return $rulesset; } protected function getStartGrammarSet() { return $this->buildGrammarSet('start', array( - '[import, block][nmspdecl, block][classdecl, block]' + '[import, block][nmspdecl, block][classdecl, block]', )); } protected function getClassDeclGrammarSet() { return $this->buildGrammarSet('classdecl', array( '[classinheritancemod] [visibility] class [classname][classderiv] '. '{[classbody, indent, block]}', '[visibility] class [classname][classderiv] '. '{[classbody, indent, block]}', )); } protected function getClassDerivGrammarSet() { return $this->buildGrammarSet('classderiv', array( ' extends [classname]', '', '', )); } protected function getTypeNameGrammarSet() { return $this->buildGrammarSet('type', array( 'int', 'boolean', 'char', 'short', 'long', 'float', 'double', '[classname]', '[type][]', )); } protected function getMethodReturnTypeGrammarSet() { return $this->buildGrammarSet('methodreturn', array( '[type]', 'void', )); } protected function getNamespaceDeclGrammarSet() { return $this->buildGrammarSet('nmspdecl', array( 'package [nmspname][term]', )); } protected function getNamespaceNameGrammarSet() { return $this->buildGrammarSet('nmspname', array( 'java.lang', 'java.io', 'com.example.proj.std', 'derp.example.www', )); } protected function getImportGrammarSet() { return $this->buildGrammarSet('import', array( 'import [nmspname][term]', 'import [nmspname].*[term]', 'import [nmspname].[classname][term]', )); } protected function getExprGrammarSet() { $expr = parent::getExprGrammarSet(); $expr['expr'][] = 'new [classname]([funccallparam])'; $expr['expr'][] = '[methodcall]'; $expr['expr'][] = '[methodcall]'; $expr['expr'][] = '[methodcall]'; $expr['expr'][] = '[methodcall]'; // Add some 'char's for ($ii = 0; $ii < 2; $ii++) { $expr['expr'][] = "'".Filesystem::readRandomCharacters(1)."'"; } return $expr; } protected function getStmtGrammarSet() { $stmt = parent::getStmtGrammarSet(); $stmt['stmt'][] = '[vardecl]'; $stmt['stmt'][] = '[vardecl]'; // `try` to `throw` a `Ball`! $stmt['stmt'][] = 'throw [classname][term]'; return $stmt; } protected function getPropDeclGrammarSet() { return $this->buildGrammarSet('propdecl', array( '[visibility] [type] [varname][term]', )); } protected function getVarDeclGrammarSet() { return $this->buildGrammarSet('vardecl', array( '[type] [varname][term]', '[type] [assignment][term]', )); } protected function getFuncNameGrammarSet() { return $this->buildGrammarSet('funcname', array( '[methodname]', '[classname].[methodname]', // This is just silly (too much recursion) // '[classname].[funcname]', // Don't do this for now, it just clutters up output (thanks to rec.) // '[nmspname].[classname].[methodname]', )); } // Renamed from `funcname` protected function getMethodNameGrammarSet() { $funcnames = head(parent::getFuncNameGrammarSet()); return $this->buildGrammarSet('methodname', $funcnames); } protected function getMethodFuncDeclGrammarSet() { return $this->buildGrammarSet('methodfuncdecl', array( '[methodreturn] [methodname]([funcparam]) '. '{[methodbody, indent, block, trim=right]}', )); } protected function getFuncParamGrammarSet() { return $this->buildGrammarSet('funcparam', array( '', '[type] [varname]', '[type] [varname], [type] [varname]', '[type] [varname], [type] [varname], [type] [varname]', )); } protected function getAbstractMethodDeclGrammarSet() { return $this->buildGrammarSet('abstractmethoddecl', array( 'abstract [methodreturn] [methodname]([funcparam])[term]', )); } } diff --git a/src/internationalization/__tests__/PhutilTranslatorTestCase.php b/src/internationalization/__tests__/PhutilTranslatorTestCase.php index 15a9c2d..4d09f3e 100644 --- a/src/internationalization/__tests__/PhutilTranslatorTestCase.php +++ b/src/internationalization/__tests__/PhutilTranslatorTestCase.php @@ -1,246 +1,246 @@ addTranslations( array( '%d line(s)' => array('%d line', '%d lines'), '%d char(s) on %d row(s)' => array( array('%d char on %d row', '%d char on %d rows'), array('%d chars on %d row', '%d chars on %d rows'), ), )); $this->assertEqual('line', $translator->translate('line')); $this->assertEqual('param', $translator->translate('%s', 'param')); $this->assertEqual('0 lines', $translator->translate('%d line(s)', 0)); $this->assertEqual('1 line', $translator->translate('%d line(s)', 1)); $this->assertEqual('2 lines', $translator->translate('%d line(s)', 2)); $this->assertEqual( '1 char on 1 row', $translator->translate('%d char(s) on %d row(s)', 1, 1)); $this->assertEqual( '5 chars on 2 rows', $translator->translate('%d char(s) on %d row(s)', 5, 2)); $this->assertEqual('1 beer(s)', $translator->translate('%d beer(s)', 1)); } public function testSingleVariant() { $translator = new PhutilTranslator(); $translator->setLanguage('en'); // In this translation, we have no alternatives for the first conversion. $translator->addTranslations( array( 'Run the command %s %d time(s).' => array( array( 'Run the command %s once.', 'Run the command %s %d times.', ), ), )); $this->assertEqual( 'Run the command ls 123 times.', (string)$translator->translate( 'Run the command %s %d time(s).', hsprintf('%s', 'ls'), 123)); } public function testCzech() { $translator = new PhutilTranslator(); $translator->setLanguage('cs'); $translator->addTranslations( array( '%d beer(s)' => array('%d pivo', '%d piva', '%d piv'), )); $this->assertEqual('0 piv', $translator->translate('%d beer(s)', 0)); $this->assertEqual('1 pivo', $translator->translate('%d beer(s)', 1)); $this->assertEqual('2 piva', $translator->translate('%d beer(s)', 2)); $this->assertEqual('5 piv', $translator->translate('%d beer(s)', 5)); $this->assertEqual('1 line(s)', $translator->translate('%d line(s)', 1)); } public function testPerson() { $translator = new PhutilTranslator(); $translator->setLanguage('cs'); $translator->addTranslations( array( '%s wrote.' => array('%s napsal.', '%s napsala.'), )); $person = new PhutilPersonTest(); $this->assertEqual( 'Test () napsal.', $translator->translate('%s wrote.', $person)); $person->setSex(PhutilPerson::SEX_MALE); $this->assertEqual( 'Test (m) napsal.', $translator->translate('%s wrote.', $person)); $person->setSex(PhutilPerson::SEX_FEMALE); $this->assertEqual( 'Test (f) napsala.', $translator->translate('%s wrote.', $person)); } public function testTranslateDate() { $date = new DateTime('2012-06-21'); $translator = new PhutilTranslator(); $this->assertEqual('June', $translator->translateDate('F', $date)); $this->assertEqual('June 21', $translator->translateDate('F d', $date)); $this->assertEqual('F', $translator->translateDate('\F', $date)); $translator->addTranslations( array( 'June' => 'correct', '21' => 'wrong', - 'F' => 'wrong' + 'F' => 'wrong', )); $this->assertEqual('correct', $translator->translateDate('F', $date)); $this->assertEqual('correct 21', $translator->translateDate('F d', $date)); $this->assertEqual('F', $translator->translateDate('\F', $date)); } public function testSetInstance() { PhutilTranslator::setInstance(new PhutilTranslator()); $original = PhutilTranslator::getInstance(); $this->assertEqual('color', pht('color')); $british = new PhutilTranslator(); $british->addTranslations( array( 'color' => 'colour', )); PhutilTranslator::setInstance($british); $this->assertEqual('colour', pht('color')); PhutilTranslator::setInstance($original); $this->assertEqual('color', pht('color')); } public function testFormatNumber() { $translator = new PhutilTranslator(); $this->assertEqual('1,234', $translator->formatNumber(1234)); $this->assertEqual('1,234.5', $translator->formatNumber(1234.5, 1)); $this->assertEqual('1,234.5678', $translator->formatNumber(1234.5678, 4)); $translator->addTranslations( array( ',' => ' ', - '.' => ',' + '.' => ',', )); $this->assertEqual('1 234', $translator->formatNumber(1234)); $this->assertEqual('1 234,5', $translator->formatNumber(1234.5, 1)); $this->assertEqual('1 234,5678', $translator->formatNumber(1234.5678, 4)); } public function testNumberTranslations() { $translator = new PhutilTranslator(); $translator->addTranslations( array( '%s line(s)' => array('%s line', '%s lines'), )); $this->assertEqual( '1 line', $translator->translate('%s line(s)', new PhutilNumber(1))); $this->assertEqual( '1,000 lines', $translator->translate('%s line(s)', new PhutilNumber(1000))); $this->assertEqual( '8.5 lines', $translator->translate( '%s line(s)', id(new PhutilNumber(8.5))->setDecimals(1))); } public function testValidateTranslation() { $tests = array( 'a < 2' => array( 'a < 2' => true, 'b < 3' => true, '2 > a' => false, 'a<2' => false, ), 'We win' => array( 'We win' => true, 'We win' => true, // false positive 'We win' => false, 'We win' => false, ), 'We win & triumph' => array( 'We triumph & win' => true, 'We win and triumph' => false, ), 'beer' => array( 'pivo' => true, 'b<>r' => false, 'b&&r' => false, ), ); $translator = new PhutilTranslator(); foreach ($tests as $original => $translations) { foreach ($translations as $translation => $expect) { $valid = ($expect ? 'valid' : 'invalid'); $this->assertEqual( $expect, $translator->validateTranslation($original, $translation), "'{$original}' should be {$valid} with '{$translation}'."); } } } public function testHTMLTranslations() { $string = '%s awoke suddenly at %s.'; $when = '<4 AM>'; $translator = new PhutilTranslator(); // When no components are HTML, everything is treated as a string. $who = 'Abraham'; $translation = $translator->translate( $string, $who, $when); $this->assertEqual( 'string', gettype($translation)); $this->assertEqual( 'Abraham awoke suddenly at <4 AM>.', $translation); // When at least one component is HTML, everything is treated as HTML. $who = phutil_tag('span', array(), 'Abraham'); $translation = $translator->translate( $string, $who, $when); $this->assertTrue($translation instanceof PhutilSafeHTML); $this->assertEqual( 'Abraham awoke suddenly at <4 AM>.', $translation->getHTMLContent()); $translation = $translator->translate( $string, $who, new PhutilNumber(1383930802)); $this->assertEqual( 'Abraham awoke suddenly at 1,383,930,802.', $translation->getHTMLContent()); } } diff --git a/src/lexer/PhutilPHPFragmentLexer.php b/src/lexer/PhutilPHPFragmentLexer.php index cac0ab2..e932f49 100644 --- a/src/lexer/PhutilPHPFragmentLexer.php +++ b/src/lexer/PhutilPHPFragmentLexer.php @@ -1,268 +1,269 @@ array( array('<\\?(?i:php)?', 'cp', 'php'), array('[^<]+', null), array('<', null), ), 'php' => array_merge(array( array('\\?>', 'cp', '!pop'), array( '<<<([\'"]?)('.$identifier_pattern.')\\1\\n.*?\\n\\2\\;?\\n', - 's'), + 's', + ), ), $nonsemantic_rules, array( array('(?i:__halt_compiler)\\b', 'cp', 'halt_compiler'), array('(->|::)', 'o', 'attr'), array('[~!%^&*+=|:.<>/?@-]+', 'o'), array('[\\[\\]{}();,]', 'o'), // After 'new', try to match an unadorned symbol. array('(?i:new|instanceof)\\b', 'k', 'possible_classname'), array('(?i:function)\\b', 'k', 'function_definition'), // After 'extends' or 'implements', match a list of classes/interfaces. array('(?i:extends|implements)\\b', 'k', 'class_list'), array('(?i:catch)\\b', 'k', 'catch'), array('(?i:'.implode('|', $keywords).')\\b', 'k'), array('(?i:'.implode('|', $constants).')\\b', 'kc'), array('\\$+'.$identifier_pattern, 'nv'), // Match "f(" as a function and "C::" as a class. These won't work // if you put a comment between the symbol and the operator, but // that's a bizarre usage. array($identifier_ns_pattern.'(?=\s*[\\(])', 'nf'), array($identifier_ns_pattern.'(?=\s*::)', 'nc', 'context_attr', array( 'context' => 'push', ), ), array($identifier_ns_pattern, 'no'), array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?', 'mf'), array('\\d+[eE][+-]?[0-9]+', 'mf'), array('0[0-7]+', 'mo'), array('0[xX][a-fA-F0-9]+', 'mh'), array('0[bB][0-1]+', 'm'), array('\d+', 'mi'), array("'", 's1', 'string1'), array('`', 'sb', 'stringb'), array('"', 's2', 'string2'), array('.', null), )), // We've just matched a class name, with a "::" lookahead. The name of // the class is on the top of the context stack. We want to try to match // the attribute or method (e.g., "X::C" or "X::f()"). 'context_attr' => array_merge($nonsemantic_rules, array( array('::', 'o'), array($identifier_pattern.'(?=\s*[\\(])', 'nf', '!pop', array( 'context' => 'pop', ), ), array($identifier_pattern, 'na', '!pop', array( 'context' => 'pop', ), ), array('', null, '!pop', array( 'context' => 'discard', ), ), )), // After '->' or '::', a symbol is an attribute name. Note that we end // up in 'context_attr' instead of here in some cases. 'attr' => array_merge($nonsemantic_rules, array( array($identifier_pattern, 'na', '!pop'), array('', null, '!pop'), )), // After 'new', a symbol is a class name. 'possible_classname' => array_merge($nonsemantic_rules, array( array($identifier_ns_pattern, 'nc', '!pop'), array('', null, '!pop'), )), 'string1' => array( array('[^\'\\\\]+', 's1'), array("'", 's1', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'stringb' => array( array('[^`\\\\]+', 'sb'), array('`', 'sb', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'string2' => array( array('[^"\\\\]+', 's2'), array('"', 's2', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), // In a function definition (after "function"), we don't link the name // as a "nf" (name.function) since it is its own definition. 'function_definition' => array_merge($nonsemantic_rules, array( array('&', 'o'), array('\\(', 'o', '!pop'), array($identifier_pattern, 'no', '!pop'), array('', null, '!pop'), )), // For "//" and "#" comments, we need to break out if we see "?" followed // by ">". 'line_comment' => array( array('[^?\\n]+', 'c'), array('\\n', null, '!pop'), array('(?=\\?>)', null, '!pop'), array('\\?', 'c'), ), // We've seen __halt_compiler. Grab the '();' afterward and then eat // the rest of the file as raw data. 'halt_compiler' => array_merge($nonsemantic_rules, array( array('[()]', 'o'), array(';', 'o', 'compiler_halted'), array('\\?>', 'o', 'compiler_halted'), // Just halt on anything else. array('', null, 'compiler_halted'), )), // __halt_compiler has taken effect. 'compiler_halted' => array( array('.+', null), ), 'class_list' => array_merge($nonsemantic_rules, array( array(',', 'o'), array('(?i:implements)', 'k'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), 'catch' => array_merge($nonsemantic_rules, array( array('\\(', 'o'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), ); } } diff --git a/src/lexer/PhutilPythonFragmentLexer.php b/src/lexer/PhutilPythonFragmentLexer.php index b5536f8..efcc9ac 100644 --- a/src/lexer/PhutilPythonFragmentLexer.php +++ b/src/lexer/PhutilPythonFragmentLexer.php @@ -1,314 +1,314 @@ array_merge(array( array('\\n', null), // TODO: Docstrings should match only at the start of a line array('""".*?"""', 'sd'), array('\'\'\'.*?\'\'\'', 'sd'), ), $nonsemantic_rules, array( array('[]{}:(),;[]', 'p'), array('\\\\\\n', null), array('\\\\', null), array('(?:in|is|and|or|not)\\b', 'ow'), array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'), array('(?:'.implode('|', $keywords).')\\b', 'k'), array('def(?=\\s)', 'k', 'funcname'), array('class(?=\\s)', 'k', 'classname'), array('from(?=\\s)', 'kn', 'fromimport'), array('import(?=\\s)', 'kn', 'import'), array('(? array_merge($nonsemantic_rules, array( array('[a-zA-Z_]\w*', 'nf', '!pop'), array('', null, '!pop'), )), 'classname' => array_merge($nonsemantic_rules, array( array('[a-zA-Z_]\w*', 'nc', '!pop'), array('', null, '!pop'), )), 'fromimport' => array_merge($nonsemantic_rules, array( array('import\b', 'kn', '!pop'), // if None occurs here, it's "raise x from None", since None can // never be a module name array('None\b', 'bp', '!pop'), // sadly, in "raise x from y" y will be highlighted as namespace too array('[a-zA-Z_.][\w.]*', 'nn'), array('', null, '!pop'), )), 'import' => array_merge($nonsemantic_rules, array( array('as\b', 'kn'), array(',', 'o'), array('[a-zA-Z_.][\w.]*', 'nn'), array('', null, '!pop'), )), 'dqs_raw' => $dqs, 'sqs_raw' => $sqs, 'dqs' => array_merge($stringescape, $dqs), 'sqs' => array_merge($stringescape, $sqs), 'tdqs_raw' => $tdqs, 'tsqs_raw' => $tsqs, 'tdqs' => array_merge($stringescape, $tdqs), 'tsqs' => array_merge($stringescape, $tsqs), ); } } diff --git a/src/lexer/PhutilTypeLexer.php b/src/lexer/PhutilTypeLexer.php index fe9a408..c977bf1 100644 --- a/src/lexer/PhutilTypeLexer.php +++ b/src/lexer/PhutilTypeLexer.php @@ -1,32 +1,32 @@ array( array('\s+', ' '), array('\\|', '|'), array('<', '<'), array('>', '>'), array(',', ','), array('\\?', '?'), array('optional', 'opt'), array('map', 'map'), array('list', 'list'), array('int|float|bool|string|null|callable|wild|regex', 'k'), array('[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*', 'k'), - array('\\(', '(', 'comment') + array('\\(', '(', 'comment'), ), 'comment' => array( array('\\)', ')', '!pop'), array('[^\\)]+', 'cm'), ), ); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php index 64e9fbb..b885932 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php @@ -1,92 +1,93 @@ getRegEx(), $lines[$cursor])) { $num_lines++; $cursor++; while (isset($lines[$cursor])) { if (trim($lines[$cursor])) { $num_lines++; $cursor++; continue; } break; } } return $num_lines; } public function markupText($text, $children) { $matches = array(); preg_match($this->getRegEx(), $text, $matches); if (idx($matches, 'showword')) { $word = $matches['showword']; $show = true; } else { $word = $matches['hideword']; $show = false; } $class_suffix = phutil_utf8_strtolower($word); // This is the "(IMPORTANT)" or "NOTE:" part. $word_part = rtrim(substr($text, 0, strlen($matches[0]))); // This is the actual text. $text_part = substr($text, strlen($matches[0])); $text_part = $this->applyRules(rtrim($text_part)); $text_mode = $this->getEngine()->isTextMode(); if ($text_mode) { return $word_part.' '.$text_part; } if ($show) { $content = array( phutil_tag( 'span', array( 'class' => 'remarkup-note-word', ), $word_part), ' ', - $text_part); + $text_part, + ); } else { $content = $text_part; } return phutil_tag( 'div', array( - 'class' => 'remarkup-'.$class_suffix + 'class' => 'remarkup-'.$class_suffix, ), $content); } private function getRegEx() { $words = array( 'NOTE', 'IMPORTANT', 'WARNING', ); foreach ($words as $k => $word) { $words[$k] = preg_quote($word, '/'); } $words = implode('|', $words); return '/^(?:'. '(?:\((?P'.$words.')\))'. '|'. '(?:(?P'.$words.'):))\s*'. '/'; } } diff --git a/src/parser/PhutilParserGenerator.php b/src/parser/PhutilParserGenerator.php index dd29045..7c04281 100644 --- a/src/parser/PhutilParserGenerator.php +++ b/src/parser/PhutilParserGenerator.php @@ -1,880 +1,883 @@ setTerminals(array('a', 'b')) * ->setStartRule('S') * ->setRules( * array( * 'S' => 'A b', * 'A' => array( * 'A a', * 'a', * ))) * ->processGrammar(); * * To actually parse token streams, use @{method:parseTokens}. * * $tokens = get_tokens(); // Usually from PhutilLexer * $callback = 'some_callback'; * $tree = $parser->parseTokens($tokens, $callback); * * The callback is invoked when a grammar rule matches. It should have this * signature: * * function parser_callback($rule, $production, array $tokens) { * // ... * } * * The `$rule` is the matching rule; the `$production` is the matching * production, and `$tokens` is the matching tokens (for terminal rules) or the * return value of previous parse callbacks (for nonterminal rules). * * You should either return a result of evaluation, or some sort of abstract * representation of the parse tree (this is more likely to be useful for more * complex grammars). * * NOTE: This class generates LR(1) parsers, which perform less-than-optimally * on large grammars. Worse, it is written in PHP. It is suitable only for * very simple grammars with few states. * * NOTE: These parsers silently resolve reduce/reduce conflicts by choosing the * first reduction, and silently resolve shift/reduce conflicts by shifting. * These are the same rules used by Yacc, but are implicit. * * @task rules Grammar Rules * @task rvalidation Rule Validation * @task first Computing First() * @task tables Computing Action and Goto Tables * @task inspect Inspecting Generator State */ final class PhutilParserGenerator { private $terminals; private $rules; private $startRule = 'start'; private $sets = array(); private $successor = array(); private $setHashes = array(); private $actionTable; private $gotoTable; private $rulesValidated = false; private $eofSymbol; private $initSymbol; private $epsilonSymbol; private $endSymbol; private $firstTable; public function processGrammar() { $this->validateRules(); $this->buildFirstTable(); $init = $this->getInitSymbol(); $eof = $this->getEOFSymbol(); $end = $this->getEndSymbol(); $this->rules[$init] = array( array($this->startRule, $end), ); list($is_new, $state) = $this->addState( array( array($this->getInitSymbol(), 0, 0, $eof), )); $this->buildSuccessors($state); $this->buildTables(); return $this; } /* -( Grammar Rules )------------------------------------------------------ */ public function setTerminals(array $terminals) { $this->terminals = array_fill_keys($terminals, true); return $this; } public function setRules(array $rules) { $this->rules = $rules; return $this; } public function setStartRule($rule_name) { $this->startRule = $rule_name; return $this; } public function getStartRule() { return $this->startRule; } public function getEOFSymbol() { if ($this->eofSymbol === null) { throw new Exception('Call processGrammar() before getEOFSymbol()!'); } return $this->eofSymbol; } public function getInitSymbol() { if ($this->initSymbol === null) { throw new Exception('Call processGrammar() before getInitSymbol()!'); } return $this->initSymbol; } public function getEpsilonSymbol() { if ($this->epsilonSymbol === null) { throw new Exception('Call processGrammar() before getEpsilonSymbol()!'); } return $this->epsilonSymbol; } public function getEndSymbol() { if ($this->endSymbol === null) { throw new Exception('Call processGrammar() before getEndSymbol()!'); } return $this->endSymbol; } public function isTerminal($symbol) { return isset($this->terminals[$symbol]); } public function isRule($symbol) { return isset($this->rules[$symbol]); } /* -( Rule Validation )---------------------------------------------------- */ /** * Perform a battery of tests on the provided rules to detect problems which * would prevent us from generating a parser. * * @return void * @task rvalidation */ private function validateRules() { // Rules must be specified in the right format. $this->parseRules(); // Rules must contain only known symbols. $this->validateRuleSymbols(); // The start rule must exist and be valid. $this->validateStartRule(); // Now, we select printable names for special symbols (EOF, epsilon, etc) // that don't conflict with any symbols in the grammar. $this->chooseSpecialSymbols(); // Make sure every terminal can be reached by some rule. $this->validateAllTerminalsReachable(); // Make sure every rule can be reached. $this->validateAllRulesReachable(); // Make sure every rule has some valid reduction. $this->validateAllRulesReducible(); $this->rulesValidated = true; } /** * @task rvalidation */ private function parseRules() { foreach ($this->rules as $rule_name => $rule_variants) { if (!is_array($rule_variants)) { $rule_variants = array($rule_variants); $this->rules[$rule_name] = $rule_variants; } foreach ($rule_variants as $vkey => $variant) { if ($variant === null) { $variant = array(null); } else if (!is_array($variant)) { $variant = preg_split('/\s+/', $variant); } else { foreach ($variant as $symbol) { if (($symbol === null) && count($variant) > 1) { throw new PhutilInvalidRuleParserGeneratorException( "Rule '{$rule_name}' contains a production '{$vkey}' which ". "is nonempty but has a null in it. A rule with other symbols ". "may not contain null."); } } } $this->rules[$rule_name][$vkey] = array_values($variant); } } } /** * @task rvalidation */ private function validateRuleSymbols() { foreach ($this->rules as $rule => $productions) { foreach ($productions as $production_name => $production) { foreach ($production as $symbol) { if ($symbol === null) { continue; } if ($this->isTerminal($symbol)) { continue; } if ($this->isRule($symbol)) { continue; } $production_string = implode(' ', $production); throw new PhutilUnknownSymbolParserGeneratorException( "Symbol '{$symbol}' in production '{$production_name}' ". "('{$production_string}') of rule '{$rule}' does not name a rule ". "or terminal. Did you misspell a symbol, fail to specify a ". "terminal, or forget a rule?"); } } } } /** * @task rvalidation */ private function validateStartRule() { $start_rule = $this->getStartRule(); if (!$this->isRule($start_rule)) { throw new PhutilUnknownSymbolParserGeneratorException( "Start rule '{$start_rule}' does not appear in the rules for the ". "grammar. Use setStartRule() to choose a different start rule, or ". "add a rule named '{$start_rule}'."); } } /** * @task rvalidation */ private function chooseSpecialSymbols() { $special = array( 'eofSymbol' => '(end-of-file)', 'epsilonSymbol' => '(epsilon)', 'initSymbol' => '(init)', 'endSymbol' => '(end)', ); foreach ($special as $key => $value) { while ($this->isRule($value) || $this->isTerminal($value)) { $value .= "'"; } $special[$key] = $value; } $this->eofSymbol = $special['eofSymbol']; $this->epsilonSymbol = $special['epsilonSymbol']; $this->initSymbol = $special['initSymbol']; $this->endSymbol = $special['endSymbol']; foreach ($this->rules as $rule => $productions) { foreach ($productions as $production_name => $production) { foreach ($production as $key => $symbol) { if ($symbol === null) { $this->rules[$rule][$production_name][$key] = $this->epsilonSymbol; } } $this->rules[$rule][$production_name][] = $this->endSymbol; } } $this->terminals[$this->getEOFSymbol()] = true; } /** * @task rvalidation */ private function validateAllTerminalsReachable() { $seen = array(); foreach ($this->rules as $rule => $productions) { foreach ($productions as $production) { foreach ($production as $symbol) { $seen[$symbol] = true; } } } $missing = array_diff_key($this->terminals, $seen); unset($missing[$this->getEOFSymbol()]); if ($missing) { $missing_terminals = array_keys($missing); $missing_terminals = implode(', ', $missing_terminals); throw new PhutilUnreachableTerminalParserGeneratorException( 'Some terminals do not appear in any rule: '. $missing_terminals); } } /** * @task rvalidation */ private function validateAllRulesReachable() { $stack = array(); $reachable = $this->computeReachableRules($this->getStartRule(), $stack); $missing = array_diff_key($this->rules, $reachable); unset($missing[$this->getStartRule()]); if ($missing) { $missing_rules = array_keys($missing); $missing_rules = implode(', ', $missing_rules); throw new PhutilUnreachableRuleParserGeneratorException( 'Some rules can never be reached from any production: '. $missing_rules); } } /** * @task rvalidation */ private function computeReachableRules($rule, array &$stack) { if (isset($stack[$rule])) { return $stack[$rule]; } $stack[$rule] = array(); foreach ($this->rules[$rule] as $production) { foreach ($production as $symbol) { if ($this->isRule($symbol)) { $stack[$rule][$symbol] = true; $stack[$rule] += $this->computeReachableRules($symbol, $stack); } } } return $stack[$rule]; } /** * @task rvalidation */ private function validateAllRulesReducible() { $reducible = array(); foreach ($this->rules as $rule => $productions) { if (!$this->isRuleReducible($rule, $reducible)) { throw new PhutilIrreducibleRuleParserGeneratorException( "Rule '{$rule}' can never be reduced: it recurses indefinitely ". "and reaches no production of terminals."); } } } /** * @task rvalidation */ private function isRuleReducible($rule, array &$reducible) { if (isset($reducible[$rule])) { return $reducible[$rule]; } // Set this ahead of time so we don't end up in an infinite loop if // rules recurse. We'll overwrite it if we find a reduction. $reducible[$rule] = false; $reducible[$rule] = $this->computeRuleReducible($rule, $reducible); return $reducible[$rule]; } /** * @task rvalidation */ private function computeRuleReducible($rule, array &$reducible) { $epsilon = $this->getEpsilonSymbol(); $end = $this->getEndSymbol(); $productions = $this->rules[$rule]; // In the first pass, try to find a trivially reducible production, e.g. one // with epsilon or only terminals. Also, remove recursive productions (those // which directly involve the rule itself) because we know we won't be able // to reduce them. If we're lucky, this will allow us to determine that the // rule is reducible without recursion. For example, we can immediately // reduce these productions: // // R -> a // R -> b c d // R -> (epsilon) // // We can never reduce these productions: // // R -> R // R -> a R b // // We might be able to reduce these productions, but they aren't as cheap // or easy to figure out, since we need to first determine if other rules // can be reduced: // // R -> X Y // R -> X a // // If we find a reduction, we return immediately. foreach ($productions as $key => $production) { $has_only_terminals = true; foreach ($production as $symbol) { if ($symbol == $end) { break; } else if ($symbol == $epsilon) { // The rule contains an epsilon production, which can always reduce // it. return true; } else if ($symbol == $rule) { // The rule contains itself; this production is never reducible. We // must find another reducible production. unset($productions[$key]); continue 2; } else if ($this->isTerminal($symbol)) { // This is a terminal; keep looking. We'll be able to reduce the // production if it contains only terminals. continue; } else { // This is a rule, so we can't trivially reduce it. We'll keep it // for the next round if we can't find any trivial reductions. $has_only_terminals = false; break; } } if ($has_only_terminals) { return true; } } // If we have no productions left, this rule can't be reduced. if (empty($productions)) { return false; } // We have remaining productions which include other rules. Look for a // nontrivial reduction. For example: // // R -> X Y // X -> x // Y -> y // // In this case, X and Y are both reducible, so "X Y" is reducible and thus // R is reducible. foreach ($productions as $production) { $can_reduce = true; foreach ($production as $symbol) { // NOTE: We don't need to check for epsilon here, because we would // already have determined the rule was reducible if we had an epsilon // production. if ($symbol == $end) { break; } else if ($this->isTerminal($symbol)) { continue; } else if (!$this->isRuleReducible($symbol, $reducible)) { $can_reduce = false; break; } } if ($can_reduce) { // The production contained only terminals and reducible rules, so it // is reducible. We're good and don't need to examine remaining // productions. return true; } } // We didn't find any reducible productions. return false; } /* -( Computing First() )-------------------------------------------------- */ private function buildFirstTable() { $this->firstTable = array(); foreach ($this->rules as $rule => $productions) { $this->buildRuleFirst($rule); } } private function buildRuleFirst($rule) { if (isset($this->firstTable[$rule])) { return $this->firstTable[$rule]; } $this->firstTable[$rule] = array(); $productions = $this->rules[$rule]; foreach ($productions as $key => $production) { $this->firstTable[$rule] += $this->getFirstForProduction($production); } return $this->firstTable[$rule]; } private function getFirstForProduction(array $production) { $set = array(); $end = $this->getEndSymbol(); $epsilon = $this->getEpsilonSymbol(); $eof = $this->getEOFSymbol(); $accept_epsilon = true; foreach ($production as $symbol) { if ($symbol === $end) { break; } else if ($symbol === $epsilon) { break; } else if ($this->isTerminal($symbol)) { $set[$symbol] = true; $accept_epsilon = false; break; } else { $symbol_set = $this->buildRuleFirst($symbol); $has_epsilon = isset($symbol_set[$epsilon]); unset($symbol_set[$epsilon]); $set += $symbol_set; if (!$has_epsilon) { $accept_epsilon = false; break; } } } if ($accept_epsilon) { $set[$epsilon] = true; } return $set; } /* -( Computing States )--------------------------------------------------- */ private function addState(array $set) { $seen = array(); foreach ($set as $item) { $seen[$item[0]][$item[1]][$item[2]][$item[3]] = true; } $end = $this->getEndSymbol(); $epsilon = $this->getEpsilonSymbol(); for ($ii = 0; $ii < count($set); $ii++) { $item = $set[$ii]; $production = $this->rules[$item[0]][$item[1]]; $next = $production[$item[2]]; if ($this->isTerminal($next)) { continue; } else if ($next === $epsilon) { continue; } else if ($next === $end) { continue; } $v = array_slice($production, $item[2] + 1, -1); $v[] = $item[3]; $v[] = $end; $firsts = $this->getFirstForProduction($v); foreach ($firsts as $nfirst => $ignored) { if (!$this->isTerminal($nfirst)) { unset($firsts[$nfirst]); } } foreach ($this->rules[$next] as $pkey => $nproduction) { foreach ($firsts as $nfirst => $ignored) { if (isset($seen[$next][$pkey][0][$nfirst])) { continue; } $set[] = array($next, $pkey, 0, $nfirst); $seen[$next][$pkey][0][$nfirst] = true; } } } $hash = $this->hashSet($set); if (isset($this->setHashes[$hash])) { return array(false, $this->setHashes[$hash]); } $this->states[] = $set; $state = last_key($this->states); $this->setHashes[$hash] = $state; return array(true, $state); } private function buildSuccessors($start_state) { $end = $this->getEndSymbol(); $nexts = array(); foreach ($this->states[$start_state] as $item) { $next = $this->rules[$item[0]][$item[1]][$item[2]]; if ($next === $end) { continue; } $nexts[$next][] = array( $item[0], $item[1], $item[2] + 1, - $item[3]); + $item[3], + ); } foreach ($nexts as $next => $items) { list($is_new, $state) = $this->addState($items); $this->successor[$start_state][$next] = $state; if ($is_new) { $this->buildSuccessors($state); } } } private function hashSet(array $set) { foreach ($set as $k => $item) { $set[$k] = implode("\0", $item); } sort($set); $set = implode("\1", $set); return md5($set); } private function buildTables() { $action = array(); $goto = array(); $end = $this->getEndSymbol(); $eof = $this->getEOFSymbol(); $init = $this->getInitSymbol(); foreach ($this->states as $state => $items) { $shift = array(); $reduce = array(); $accept = false; foreach ($items as $item) { $next = $this->rules[$item[0]][$item[1]][$item[2]]; if ($next == $end) { if ($item[0] !== $init) { $reduce[$item[3]][] = $item; } else if ($item[0] === $init && $item[3] === $eof) { $accept = $item; } } else if ($this->isTerminal($next)) { $shift[$next] = $item; } else { $goto[$state][$next] = $this->successor[$state][$next]; } } foreach ($reduce as $next => $reductions) { if (count($reductions) > 1) { $ways = array(); foreach ($reductions as $reduction) { $ways[] = "{$reduction[0]}/{$reduction[1]}"; } $ways = implode('; ', $ways); // TODO: As below, we should have more explicit handling of // reduce/reduce conflicts. For now, just pick the first one. if (false) { throw new Exception( "Reduce/reduce conflict: from state '{$state}', when a ". "'{$next}' is encountered, it may be reduced in multiple ". "ways: {$ways}"); } } $reduce[$next] = head($reductions); } $srconflicts = array_intersect_key($shift, $reduce); foreach ($srconflicts as $next => $ignored) { // TODO: We should probably have better or more explicit handling of // shift/reduce conflicts. For now, we just shift. if (false) { $what = $reduce[$next][0]; throw new Exception( "Shift/reduce conflict: from state '{$state}', when a '{$next}' ". "is encountered, shifting conflicts with reducing '{$what}'."); } else { // Resolve the shift/reduce by shifting. $reduce = array(); } } if ($accept && isset($shift[$eof])) { throw new Exception('Accept/shift conflict!'); } if ($accept && isset($reduce[$eof])) { throw new Exception('Accept/reduce conflict!'); } foreach ($reduce as $next => $item) { $action[$state][$next] = array( 'R', array( $item[0], $item[1], - count($this->rules[$item[0]][$item[1]]) - 1)); + count($this->rules[$item[0]][$item[1]]) - 1), + ); } foreach ($shift as $next => $item) { $action[$state][$next] = array( 'S', - $this->successor[$state][$next]); + $this->successor[$state][$next], + ); } if ($accept) { $action[$state][$eof] = array('A'); } } $this->actionTable = $action; $this->gotoTable = $goto; } public function generateParserFunction($name) { $out = array(); $out[] = 'function '.$name.'(array $tokens, $callback) {'; $out[] = ' return PhutilParserGenerator::parseTokensWithTables('; $out[] = ' '.$this->formatAndIndent($this->actionTable, 4).','; $out[] = ' '.$this->formatAndIndent($this->gotoTable, 4).','; $out[] = ' '.$this->formatAndIndent($this->getEOFSymbol(), 4).','; $out[] = ' $tokens,'; $out[] = ' $callback);'; $out[] = '}'; return implode("\n", $out); } private function formatAndIndent($var, $depth) { $var = phutil_var_export($var); $var = str_replace("\n", "\n".str_repeat(' ', $depth), $var); return $var; } public function parseTokens(array $tokens, $callback) { return self::parseTokensWithTables( $this->actionTable, $this->gotoTable, $this->getEOFSymbol(), $tokens, $callback); } public static function parseTokensWithTables( $action_table, $goto_table, $eof_symbol, array $tokens, $callback) { $state_stack = array(0); $token_stack = array(); $tokens = array_reverse($tokens); while (true) { $state = end($state_stack); if (empty($tokens)) { $next = $eof_symbol; } else { $next_token = end($tokens); $next = $next_token[0]; } if (!isset($action_table[$state][$next])) { $expected = implode(', ', array_keys($action_table[$state])); throw new Exception( "Unexpected '{$next}' in state {$state}! Expected: ". $expected); } $action = $action_table[$state][$next]; switch ($action[0]) { case 'S': $state_stack[] = $action[1]; $token_stack[] = array_pop($tokens); break; case 'R': $r_rule = $action[1][0]; $r_prod = $action[1][1]; $r_size = $action[1][2]; $token_v = array(); while ($r_size--) { $token_v[] = array_pop($token_stack); array_pop($state_stack); } $token_v = array_reverse($token_v); $token_stack[] = call_user_func_array( $callback, array($r_rule, $r_prod, $token_v)); $goto = $goto_table[end($state_stack)][$r_rule]; $state_stack[] = $goto; break; case 'A': break 2; } } return head($token_stack); } /* -( Inspecting Generator State )----------------------------------------- */ /** * @task inspect */ public function inspectRules() { if (!$this->rulesValidated) { throw new Exception('Call processGrammar() before inspectRules()!'); } return $this->rules; } /** * @task inspect */ public function inspectFirstTable() { if ($this->firstTable === null) { throw new Exception('Call processGrammar() before inspectFirstTable()!'); } return $this->firstTable; } } diff --git a/src/parser/__tests__/PhutilTypeSpecTestCase.php b/src/parser/__tests__/PhutilTypeSpecTestCase.php index bf50715..e4ec484 100644 --- a/src/parser/__tests__/PhutilTypeSpecTestCase.php +++ b/src/parser/__tests__/PhutilTypeSpecTestCase.php @@ -1,290 +1,291 @@ ', 'int | null', 'list < string >', 'int (must be even)', 'optional int', 'int?', 'int|null?', 'optional int? (minimum 300)', 'list', 'list>>> (easy)', ); $bad = array( '', 'list<>', 'list', 'map|map', 'int optional', '(derp)', 'list', 'int?|string', ); $good = array_fill_keys($good, true); $bad = array_fill_keys($bad, false); foreach ($good + $bad as $input => $expect) { $caught = null; try { PhutilTypeSpec::newFromString($input); } catch (Exception $ex) { $caught = $ex; } $this->assertEqual( $expect, ($caught === null), $input); } } public function testTypeSpecStringify() { $types = array( 'int', 'list', 'map', 'list>', 'map>', 'int|null', 'int|string|null', 'list', 'list', 'optional int', 'int (even)', ); foreach ($types as $type) { $this->assertEqual( $type, PhutilTypeSpec::newFromString($type)->toString()); } } public function testCanonicalize() { $tests = array( 'int?' => 'optional int', 'int | null' => 'int|null', 'list < map < int , string > > ?' => 'optional list>', 'int ( x )' => 'int ( x )', ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, PhutilTypeSpec::newFromString($input)->toString(), $input); } } public function testGetCommonParentClass() { $map = array( 'stdClass' => array( array('stdClass', 'stdClass'), ), false => array( array('Exception', 'stdClass'), ), 'Exception' => array( array('Exception', 'RuntimeException'), array('LogicException', 'RuntimeException'), array('BadMethodCallException', 'OutOfBoundsException'), ), ); foreach ($map as $expect => $tests) { if (is_int($expect)) { $expect = (bool) $expect; } foreach ($tests as $input) { list($class_a, $class_b) = $input; $this->assertEqual( $expect, PhutilTypeSpec::getCommonParentClass($class_a, $class_b), print_r($input, true)); } } } public function testGetTypeOf() { $map = array( 'int' => 1, 'string' => 'asdf', 'float' => 1.5, 'bool' => true, 'null' => null, 'map' => array(), 'list' => array('a', 'b'), 'list' => array(1, 2, 3), 'map' => array('x' => 3), 'map>' => array(1 => array('x', 'y')), 'stdClass' => new stdClass(), 'list' => array( - new Exception(), - new LogicException(), - new RuntimeException()), + new Exception(), + new LogicException(), + new RuntimeException(), + ), 'map' => array('x' => new stdClass()), ); foreach ($map as $expect => $input) { $this->assertEqual( $expect, PhutilTypeSpec::getTypeOf($input), print_r($input, true)); PhutilTypeSpec::newFromString($expect)->check($input); } } public function testTypeCheckFailures() { $map = array( 'int' => 'string', 'string' => 32, 'null' => true, 'bool' => null, 'map' => 16, 'list' => array('y' => 'z'), 'int|null' => 'ducks', 'stdClass' => new Exception(), 'list' => array(new Exception()), ); foreach ($map as $type => $value) { $caught = null; try { PhutilTypeSpec::newFromString($type)->check($value); } catch (PhutilTypeCheckException $ex) { $caught = $ex; } $this->assertTrue($ex instanceof PhutilTypeCheckException); } } public function testCheckMap() { $spec = array( 'count' => 'int', 'color' => 'optional string', ); // Valid PhutilTypeSpec::checkMap( array( 'count' => 1, ), $spec); // Valid, with optional parameter. PhutilTypeSpec::checkMap( array( 'count' => 3, 'color' => 'red', ), $spec); // Parameter "count" is required but missing. $caught = null; try { PhutilTypeSpec::checkMap( array(), $spec); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($ex instanceof PhutilTypeMissingParametersException); // Parameter "size" is specified but does not exist. $caught = null; try { PhutilTypeSpec::checkMap( array( 'count' => 4, 'size' => 'large', ), $spec); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($ex instanceof PhutilTypeExtraParametersException); } public function testRegexValidation() { PhutilTypeSpec::checkMap( array( 'regex' => '/.*/', ), array( 'regex' => 'regex', )); $caught = null; try { PhutilTypeSpec::checkMap( array( 'regex' => '.*', ), array( 'regex' => 'regex', )); } catch (PhutilTypeCheckException $ex) { $caught = $ex; } $this->assertTrue($ex instanceof PhutilTypeCheckException); } public function testScalarOrListRegexp() { PhutilTypeSpec::checkMap( array( 'regex' => '/.*/', ), array( 'regex' => 'regex | list', )); PhutilTypeSpec::checkMap( array( 'regex' => array('/.*/'), ), array( 'regex' => 'regex | list', )); PhutilTypeSpec::checkMap( array( 'regex' => '/.*/', ), array( 'regex' => 'list | regex', )); PhutilTypeSpec::checkMap( array( 'regex' => array('/.*/'), ), array( 'regex' => 'list | regex', )); $this->assertTrue(true); } } diff --git a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php index 78a9af7..0b3dcbd 100644 --- a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php +++ b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php @@ -1,406 +1,412 @@ 'flag', - )); + ), + ); $args = new PhutilArgumentParser(array('bin')); $args->parseFull($specs); $this->assertEqual(false, $args->getArg('flag')); $args = new PhutilArgumentParser(array('bin', '--flag')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); } public function testWildcards() { $specs = array( array( 'name' => 'flag', ), array( 'name' => 'files', 'wildcard' => true, ), ); $args = new PhutilArgumentParser(array('bin', '--flag', 'a', 'b')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); $this->assertEqual( array('a', 'b'), $args->getArg('files')); $caught = null; try { $args = new PhutilArgumentParser(array('bin', '--derp', 'a', 'b')); $args->parseFull($specs); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); $args = new PhutilArgumentParser(array('bin', '--', '--derp', 'a', 'b')); $args->parseFull($specs); $this->assertEqual( array('--derp', 'a', 'b'), $args->getArg('files')); } public function testPartialParse() { $specs = array( array( 'name' => 'flag', ), ); $args = new PhutilArgumentParser(array('bin', 'a', '--flag', '--', 'b')); $args->parsePartial($specs); $this->assertEqual( array('a', '--', 'b'), $args->getUnconsumedArgumentVector()); } public function testBadArg() { $args = new PhutilArgumentParser(array('bin')); $args->parseFull(array()); $caught = null; try { $args->getArg('flag'); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNames() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', ), array( 'name' => 'x', - ))); + ), + )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNamesWithParsePartial() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', - ))); + ), + )); $args->parsePartial( array( array( 'name' => 'x', - ))); + ), + )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateShortAliases() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'short' => 'x', ), array( 'name' => 'y', 'short' => 'x', - ))); + ), + )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'wildcard' => true, ), array( 'name' => 'y', 'wildcard' => true, - ))); + ), + )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicatePartialWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', 'wildcard' => true, ), )); $args->parsePartial( array( array( 'name' => 'y', 'wildcard' => true, ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithUnrecognizedArg() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'y' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithSelf() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'x' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testUnrecognizedFlag() { $args = new PhutilArgumentParser(array('bin', '--flag')); $caught = null; try { $args->parseFull(array()); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateFlag() { $args = new PhutilArgumentParser(array('bin', '--flag', '--flag')); $caught = null; try { $args->parseFull( array( array( 'name' => 'flag', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testMissingParameterValue() { $args = new PhutilArgumentParser(array('bin', '--with')); $caught = null; try { $args->parseFull( array( array( 'name' => 'with', 'param' => 'stuff', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testExtraParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true=apple')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true', '--false')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', 'conflicts' => array( 'false' => true, ), ), array( 'name' => 'false', 'conflicts' => array( 'true' => true, ), ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testParameterValues() { $specs = array( array( 'name' => 'a', 'param' => 'value', ), array( 'name' => 'b', 'param' => 'value', ), array( 'name' => 'cee', 'short' => 'c', 'param' => 'value', ), array( 'name' => 'dee', 'short' => 'd', 'param' => 'value', ), ); $args = new PhutilArgumentParser( array( 'bin', '--a', 'a', '--b=b', '-c', 'c', '-d=d', )); $args->parseFull($specs); $this->assertEqual('a', $args->getArg('a')); $this->assertEqual('b', $args->getArg('b')); $this->assertEqual('c', $args->getArg('cee')); $this->assertEqual('d', $args->getArg('dee')); } public function testStdinValidParameter() { $specs = array( array( 'name' => 'file', 'param' => 'file', ), ); $args = new PhutilArgumentParser( array( 'bin', '-', '--file', '-', )); $args->parsePartial($specs); $this->assertEqual('-', $args->getArg('file')); } public function testRepeatableFlag() { $specs = array( array( 'name' => 'verbose', 'short' => 'v', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array('bin', '-v', '-v', '-v')); $args->parseFull($specs); $this->assertEqual(3, $args->getArg('verbose')); } public function testRepeatableParam() { $specs = array( array( 'name' => 'eat', 'param' => 'fruit', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array( 'bin', '--eat', 'apple', '--eat', 'pear', '--eat=orange', )); $args->parseFull($specs); $this->assertEqual( array('apple', 'pear', 'orange'), $args->getArg('eat')); } } diff --git a/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php b/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php index 8a445ff..3143ec7 100644 --- a/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php +++ b/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php @@ -1,44 +1,45 @@ setName('help'); $this->setExamples(<<setSynopsis(<<setArguments( array( array( 'name' => 'help-with-what', 'wildcard' => true, - ))); + ), + )); } public function isExecutable() { return true; } public function execute(PhutilArgumentParser $args) { $with = $args->getArg('help-with-what'); if (!$with) { $args->printHelpAndExit(); } else { foreach ($with as $thing) { echo phutil_console_format( "**%s WORKFLOW**\n\n", strtoupper($thing)); echo $args->renderWorkflowHelp($thing, $show_flags = true); echo "\n"; } exit(PhutilArgumentParser::PARSE_ERROR_CODE); } } } diff --git a/src/utils/AbstractDirectedGraph.php b/src/utils/AbstractDirectedGraph.php index 288985f..36ae431 100644 --- a/src/utils/AbstractDirectedGraph.php +++ b/src/utils/AbstractDirectedGraph.php @@ -1,322 +1,324 @@ addNodes( * array( * $object->getPHID() => $object->getChildPHIDs(), * )); * $detector->loadGraph(); * * Now you can query the graph, e.g. by detecting cycles: * * $cycle = $detector->detectCycles($object->getPHID()); * * If ##$cycle## is empty, no graph cycle is reachable from the node. If it * is nonempty, it contains a list of nodes which form a graph cycle. * * NOTE: Nodes must be represented with scalars. * * @task build Graph Construction * @task cycle Cycle Detection * @task explore Graph Exploration */ abstract class AbstractDirectedGraph { private $knownNodes = array(); private $graphLoaded = false; /* -( Graph Construction )------------------------------------------------- */ /** * Load the edges for a list of nodes. You must override this method. You * will be passed a list of nodes, and should return a dictionary mapping * each node to the list of nodes that can be reached by following its the * edges which originate at it: for example, the child nodes of an object * which has a parent-child relationship to other objects. * * The intent of this method is to allow you to issue a single query per * graph level for graphs which are stored as edge tables in the database. * Generally, you will load all the objects which correspond to the list of * nodes, and then return a map from each of their IDs to all their children. * * NOTE: You must return an entry for every node you are passed, even if it * is invalid or can not be loaded. Either return an empty array (if this is * acceptable for your application) or throw an exception if you can't satisfy * this requirement. * * @param list A list of nodes. * @return dict A map of nodes to the nodes reachable along their edges. * There must be an entry for each node you were provided. * @task build */ abstract protected function loadEdges(array $nodes); /** * Seed the graph with known nodes. Often, you will provide the candidate * edges that a user is trying to create here, or the initial set of edges * you know about. * * @param dict A map of nodes to the nodes reachable along their edges. * @return this * @task build */ final public function addNodes(array $nodes) { if ($this->graphLoaded) { throw new Exception( 'Call addNodes() before calling loadGraph(). You can not add more '. 'nodes once you have loaded the graph.'); } $this->knownNodes += $nodes; return $this; } final public function getNodes() { return $this->knownNodes; } /** * Utility function to get a list of topographically sorted nodes out of a * graph. * * This could be useful for example to figure out what order you can safely * apply dependencies. * * Note this will loop indefinitely if the graph has a cycle. */ final public function getTopographicallySortedNodes() { $sorted = array(); $nodes = $this->getNodes(); $inverse_map = array(); foreach ($nodes as $node => $edges) { if (!isset($inverse_map[$node])) { $inverse_map[$node] = array(); } foreach ($edges as $edge) { if (!isset($inverse_map[$edge])) { $inverse_map[$edge] = array(); } $inverse_map[$edge][$node] = $node; } } $end_nodes = array(); foreach ($inverse_map as $node => $edges) { if (empty($edges)) { $end_nodes[] = $node; } } while (!empty($end_nodes)) { $current_node = array_pop($end_nodes); $sorted[] = $current_node; $current_edges = $nodes[$current_node]; foreach ($current_edges as $index => $current_edge) { // delete the edge from the normal map unset($nodes[$current_node][$index]); // and from the inverse map which is modestly trickier $inverse_nodes = $inverse_map[$current_edge]; unset($inverse_nodes[$current_node]); $inverse_map[$current_edge] = $inverse_nodes; // no more edges means this is an "end node" now if (empty($inverse_map[$current_edge])) { $end_nodes[] = $current_edge; } } } return $sorted; } /** * Utility function to get the best effort topographically sorted * nodes out of a graph. */ final public function getBestEffortTopographicallySortedNodes() { $nodes = $this->getNodes(); $edges = $this->loadEdges($nodes); $results = array(); $completed = array(); $depth = 0; while (true) { $next = array(); foreach ($nodes as $node) { if (isset($completed[$node])) { continue; } $capable = true; foreach ($edges[$node] as $edge) { if (!isset($completed[$edge])) { $capable = false; break; } } if ($capable) { $next[] = $node; } } if (count($next) === 0) { // No more nodes to traverse; we are deadlocked if the number // of completed nodes is less than the total number of nodes. break; } foreach ($next as $node) { $results[] = array( 'node' => $node, 'depth' => $depth, - 'cycle' => false); + 'cycle' => false, + ); $completed[$node] = true; } $depth++; } foreach ($nodes as $node) { if (!isset($completed[$node])) { $results[] = array( 'node' => $node, 'depth' => $depth, - 'cycle' => true); + 'cycle' => true, + ); } } return $results; } /** * Load the graph, building it out so operations can be performed on it. This * constructs the graph level-by-level, calling @{method:loadEdges} to * expand the graph at each stage until it is complete. * * @return this * @task build */ final public function loadGraph() { $new_nodes = $this->knownNodes; while (true) { $load = array(); foreach ($new_nodes as $node => $edges) { foreach ($edges as $edge) { if (!isset($this->knownNodes[$edge])) { $load[$edge] = true; } } } if (empty($load)) { break; } $load = array_keys($load); $new_nodes = $this->loadEdges($load); foreach ($load as $node) { if (!isset($new_nodes[$node]) || !is_array($new_nodes[$node])) { throw new Exception( 'loadEdges() must return an edge list array for each provided '. 'node, or the cycle detection algorithm may not terminate.'); } } $this->addNodes($new_nodes); } $this->graphLoaded = true; return $this; } /* -( Cycle Detection )---------------------------------------------------- */ /** * Detect if there are any cycles reachable from a given node. * * If cycles are reachable, it returns a list of nodes which create a cycle. * Note that this list may include nodes which aren't actually part of the * cycle, but lie on the graph between the specified node and the cycle. * For example, it might return something like this (when passed "A"): * * A, B, C, D, E, C * * This means you can walk from A to B to C to D to E and then back to C, * which forms a cycle. A and B are included even though they are not part * of the cycle. When presenting information about graph cycles to users, * including these nodes is generally useful. This also shouldn't ever happen * if you've vetted prior edges before writing them, because it means there * is a preexisting cycle in the graph. * * NOTE: This only detects cycles reachable from a node. It does not detect * cycles in the entire graph. * * @param scalar The node to walk from, looking for graph cycles. * @return list|null Returns null if no cycles are reachable from the node, * or a list of nodes that form a cycle. * @task cycle */ final public function detectCycles($node) { if (!$this->graphLoaded) { throw new Exception( 'Call loadGraph() to build the graph out before calling '. 'detectCycles().'); } if (!isset($this->knownNodes[$node])) { throw new Exception( "The node '{$node}' is not known. Call addNodes() to seed the graph ". "with nodes."); } $visited = array(); return $this->performCycleDetection($node, $visited); } /** * Internal cycle detection implementation. Recursively walks the graph, * keeping track of where it's been, and returns the first cycle it finds. * * @param scalar The node to walk from. * @param list Previously visited nodes. * @return null|list Null if no cycles are found, or a list of nodes * which cycle. * @task cycle */ final private function performCycleDetection($node, array $visited) { $visited[$node] = true; foreach ($this->knownNodes[$node] as $edge) { if (isset($visited[$edge])) { $result = array_keys($visited); $result[] = $edge; return $result; } $result = $this->performCycleDetection($edge, $visited); if ($result) { return $result; } } return null; } } diff --git a/src/utils/__tests__/AbstractDirectedGraphTestCase.php b/src/utils/__tests__/AbstractDirectedGraphTestCase.php index 8fd93d5..ab872d7 100644 --- a/src/utils/__tests__/AbstractDirectedGraphTestCase.php +++ b/src/utils/__tests__/AbstractDirectedGraphTestCase.php @@ -1,179 +1,179 @@ array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'Trivial Graph'); } public function testNoncyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array(), 'D' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'Noncyclic Graph'); } public function testTrivialCyclicGraph() { $graph = array( 'A' => array('A'), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'A'), $cycle, 'Trivial Cycle'); } public function testCyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array('E', 'F'), 'D' => array(), 'E' => array(), 'F' => array('G', 'C'), 'G' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'C', 'F', 'C'), $cycle, 'Cyclic Graph'); } public function testNonTreeGraph() { // This graph is non-cyclic, but C is both a child and a grandchild of A. // This is permitted. $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'NonTreeGraph'); } public function testEdgeLoadFailure() { $graph = array( 'A' => array('B'), ); $raised = null; try { $this->findGraphCycle($graph); } catch (Exception $ex) { $raised = $ex; } $this->assertTrue( (bool)$raised, 'Exception raised by unloadable edges.'); } public function testTopographicSortTree() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D', 'E'), 'C' => array(), 'D' => array(), - 'E' => array() + 'E' => array(), ); $sorted = $this->getTopographicSort($graph); $this->assertEqual( array('A', 'C', 'B', 'E', 'D'), $sorted, 'Topographically sorted tree.'); $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array('D', 'E'), 'D' => array('E'), - 'E' => array() + 'E' => array(), ); $sorted = $this->getTopographicSort($graph); $this->assertEqual( array('A', 'B', 'C', 'D', 'E'), $sorted, 'Topographically sorted tree with nesting.'); } public function testBestEffortTopographicSortTree() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D', 'E'), 'C' => array(), 'D' => array(), 'E' => array(), 'F' => array('H'), 'G' => array('F', 'E'), 'H' => array('G'), ); $sorted = $this->getBestEffortTopographicSort($graph); $this->assertEqual(count($graph), count($sorted)); $this->assertEqual('C', $sorted[0]['node']); $this->assertEqual('D', $sorted[1]['node']); $this->assertEqual('E', $sorted[2]['node']); $this->assertEqual('B', $sorted[3]['node']); $this->assertEqual('A', $sorted[4]['node']); $this->assertEqual('F', $sorted[5]['node']); $this->assertEqual('G', $sorted[6]['node']); $this->assertEqual('H', $sorted[7]['node']); $this->assertEqual(0, $sorted[0]['depth']); $this->assertEqual(0, $sorted[1]['depth']); $this->assertEqual(0, $sorted[2]['depth']); $this->assertEqual(1, $sorted[3]['depth']); $this->assertEqual(2, $sorted[4]['depth']); $this->assertEqual(3, $sorted[5]['depth']); $this->assertEqual(3, $sorted[6]['depth']); $this->assertEqual(3, $sorted[7]['depth']); $this->assertEqual(false, $sorted[0]['cycle']); $this->assertEqual(false, $sorted[1]['cycle']); $this->assertEqual(false, $sorted[2]['cycle']); $this->assertEqual(false, $sorted[3]['cycle']); $this->assertEqual(false, $sorted[4]['cycle']); $this->assertEqual(true, $sorted[5]['cycle']); $this->assertEqual(true, $sorted[6]['cycle']); $this->assertEqual(true, $sorted[7]['cycle']); } private function findGraphCycle(array $graph, $seed = 'A', $search = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); return $detector->detectCycles($search); } private function getTopographicSort(array $graph, $seed = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); return $detector->getTopographicallySortedNodes(); } private function getBestEffortTopographicSort(array $graph) { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_keys($graph)); return $detector->getBestEffortTopographicallySortedNodes(); } } diff --git a/src/utils/__tests__/PhutilUTF8TestCase.php b/src/utils/__tests__/PhutilUTF8TestCase.php index d6abfce..6eb038f 100644 --- a/src/utils/__tests__/PhutilUTF8TestCase.php +++ b/src/utils/__tests__/PhutilUTF8TestCase.php @@ -1,557 +1,571 @@ assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeUTF8Ignored() { $input = "\xc3\x9c \xc3\xbc \xe6\x9d\xb1!"; $this->assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeLongStringNosegfault() { // For some reason my laptop is segfaulting on long inputs inside // preg_match(). Forestall this craziness in the common case, at least. phutil_utf8ize(str_repeat('x', 1024 * 1024)); $this->assertTrue(true); } public function testUTF8izeInvalidUTF8Fixed() { $input = "\xc3 this has \xe6\x9d some invalid utf8 \xe6"; $expect = "\xEF\xBF\xBD this has \xEF\xBF\xBD\xEF\xBF\xBD some invalid utf8 ". "\xEF\xBF\xBD"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8izeOwlIsCuteAndFerocious() { // This was once a ferocious owl when we used to use "?" as the replacement // character instead of U+FFFD, but now he is sort of not as cute or // ferocious. $input = "M(o\xEE\xFF\xFFo)M"; $expect = "M(o\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBDo)M"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8len() { $strings = array( '' => 0, 'x' => 1, "\xEF\xBF\xBD" => 1, "x\xe6\x9d\xb1y" => 3, 'xyz' => 3, 'quack' => 5, ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8_strlen($str), 'Length of '.$str); } } public function testUTF8v() { $strings = array( '' => array(), 'x' => array('x'), 'quack' => array('q', 'u', 'a', 'c', 'k'), "x\xe6\x9d\xb1y" => array('x', "\xe6\x9d\xb1", 'y'), // This is a combining character. "x\xCD\xA0y" => array('x', "\xCD\xA0", 'y'), ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8v($str), 'Vector of '.$str); } } public function testUTF8vCodepoints() { $strings = array( '' => array(), 'x' => array(0x78), 'quack' => array(0x71, 0x75, 0x61, 0x63, 0x6B), "x\xe6\x9d\xb1y" => array(0x78, 0x6771, 0x79), "\xC2\xBB" => array(0x00BB), "\xE2\x98\x83" => array(0x2603), "\xEF\xBF\xBF" => array(0xFFFF), "\xF0\x9F\x92\xA9" => array(0x1F4A9), // This is a combining character. "x\xCD\xA0y" => array(0x78, 0x0360, 0x79), ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8v_codepoints($str), 'Codepoint Vector of '.$str); } } public function testUTF8ConsoleStrlen() { $strings = array( '' => 0, "\0" => 0, 'x' => 1, // Double-width chinese character. "\xe6\x9d\xb1" => 2, // Combining character. "x\xCD\xA0y" => 2, // Combining plus double-width. "\xe6\x9d\xb1\xCD\xA0y" => 3, // Colors and formatting. "\x1B[1mx\x1B[m" => 1, "\x1B[1m\x1B[31mx\x1B[m" => 1, ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8_console_strlen($str), 'Console Length of '.$str); } } public function testUTF8shorten() { $inputs = array( array('1erp derp derp', 9, '', '1erp derp'), array('2erp derp derp', 12, '...', '2erp derp...'), array('derpxderpxderp', 12, '...', 'derpxderp...'), array("derp\xE2\x99\x83derpderp", 12, '...', "derp\xE2\x99\x83derp..."), array('', 12, '...', ''), array('derp', 12, '...', 'derp'), array('11111', 5, '2222', '11111'), array('111111', 5, '2222', '12222'), array('D1rp. Derp derp.', 7, '...', 'D1rp.'), // "D2rp." is a better shortening of this, but it's dramatically more // complicated to implement with the newer byte/glyph/character // shortening code. array('D2rp. Derp derp.', 5, '...', 'D2...'), array('D3rp. Derp derp.', 4, '...', 'D...'), array('D4rp. Derp derp.', 14, '...', 'D4rp. Derp...'), array('D5rpderp, derp derp', 16, '...', 'D5rpderp...'), array('D6rpderp, derp derp', 17, '...', 'D6rpderp, derp...'), // Strings with combining characters. array("Gr\xCD\xA0mpyCatSmiles", 8, '...', "Gr\xCD\xA0mpy..."), array("X\xCD\xA0\xCD\xA0\xCD\xA0Y", 1, '', "X\xCD\xA0\xCD\xA0\xCD\xA0"), // This behavior is maybe a little bad, but it seems mostly reasonable, // at least for latin languages. - array('Derp, supercalafragalisticexpialadoshus', 30, '...', - 'Derp...'), + array( + 'Derp, supercalafragalisticexpialadoshus', 30, '...', + 'Derp...', + ), // If a string has only word-break characters in it, we should just cut // it, not produce only the terminal. array('((((((((((', 8, '...', '(((((...'), // Terminal is longer than requested input. array('derp', 3, 'quack', 'quack'), ); foreach ($inputs as $input) { list($string, $length, $terminal, $expect) = $input; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($length) ->setTerminator($terminal) ->truncateString($string); $this->assertEqual($expect, $result, 'Shortening of '.$string); } } public function testUTF8StringTruncator() { $cases = array( array( "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", 6, "o\xCD\xA0!", 6, "o\xCD\xA0o\xCD\xA0!", 6, "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0Y", 6, '!', 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0YZ", 6, '!', 5, "X\xCD\xA0\xCD\xA0\xCD\xA0!", 2, "X\xCD\xA0\xCD\xA0\xCD\xA0!", ), array( "\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83", 4, "\xE2\x98\x83!", 3, "\xE2\x98\x83\xE2\x98\x83!", 3, "\xE2\x98\x83\xE2\x98\x83!", ), ); foreach ($cases as $case) { list($input, $b_len, $b_out, $p_len, $p_out, $g_len, $g_out) = $case; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumBytes($b_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($b_out, $result, 'byte-short of '.$input); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumCodepoints($p_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($p_out, $result, 'codepoint-short of '.$input); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($g_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($g_out, $result, 'glyph-short of '.$input); } } public function testUTF8Wrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', - )), + ), + ), array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', - )), + ), + ), array( 'aa&aaaa', 3, array( 'aa&', 'aaa', 'a', - )), + ), + ), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', - )), + ), + ), array( '', 80, array( - )), + ), + ), array( 'a', 80, array( 'a', - )), + ), + ), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap_html($string, $width), "Wrapping of '".$string."'"); } } public function testUTF8NonHTMLWrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', - )), + ), + ), array( 'abracadabra!', 4, array( 'abra', 'cada', 'bra!', - )), + ), + ), array( '', 10, array( - )), + ), + ), array( 'a', 20, array( 'a', - )), + ), + ), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', - )), + ), + ), array( "mmm\nmmm\nmmmm", 3, array( 'mmm', 'mmm', 'mmm', 'm', - )), + ), + ), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap($string, $width), "Wrapping of '".$string."'"); } } public function testUTF8ConvertParams() { $caught = null; try { phutil_utf8_convert('', 'utf8', ''); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Requires source encoding.'); $caught = null; try { phutil_utf8_convert('', '', 'utf8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Requires target encoding.'); } public function testUTF8Convert() { if (!function_exists('mb_convert_encoding')) { $this->assertSkipped('Requires mbstring extension.'); } // "[ae]gis se[n]or [(c)] 1970 [+/-] 1 [degree]" $input = "\xE6gis SE\xD1OR \xA9 1970 \xB11\xB0"; $expect = "\xC3\xA6gis SE\xC3\x91OR \xC2\xA9 1970 \xC2\xB11\xC2\xB0"; $output = phutil_utf8_convert($input, 'UTF-8', 'ISO-8859-1'); $this->assertEqual($expect, $output, 'Conversion from ISO-8859-1.'); $caught = null; try { phutil_utf8_convert('xyz', 'moon language', 'UTF-8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Conversion with bogus encoding.'); } public function testUTF8ucwords() { $tests = array( '' => '', 'x' => 'X', 'X' => 'X', 'five short graybles' => 'Five Short Graybles', 'xXxSNiPeRKiLLeRxXx' => 'XXxSNiPeRKiLLeRxXx', ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_ucwords($input), 'phutil_utf8_ucwords("'.$input.'")'); } } public function testUTF8strtolower() { $tests = array( '' => '', 'a' => 'a', 'A' => 'a', '!' => '!', 'OMG!~ LOLolol ROFLwaffle11~' => 'omg!~ lololol roflwaffle11~', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtolower($input), 'phutil_utf8_strtolower("'.$input.'")'); } } public function testUTF8strtoupper() { $tests = array( '' => '', 'a' => 'A', 'A' => 'A', '!' => '!', 'Cats have 9 lives.' => 'CATS HAVE 9 LIVES.', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtoupper($input), 'phutil_utf8_strtoupper("'.$input.'")'); } } public function testUTF8IsCombiningCharacter() { $character = "\xCD\xA0"; $this->assertEqual( true, phutil_utf8_is_combining_character($character)); $character = 'a'; $this->assertEqual( false, phutil_utf8_is_combining_character($character)); } public function testUTF8vCombined() { // Empty string. $string = ''; $this->assertEqual(array(), phutil_utf8v_combined($string)); // Single character. $string = 'x'; $this->assertEqual(array('x'), phutil_utf8v_combined($string)); // No combining characters. $string = 'cat'; $this->assertEqual(array('c', 'a', 't'), phutil_utf8v_combined($string)); // String with a combining character in the middle. $string = "ca\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String starting with a combined character. $string = "c\xCD\xA0at"; $this->assertEqual( array("c\xCD\xA0", 'a', 't'), phutil_utf8v_combined($string)); // String with trailing combining character. $string = "cat\xCD\xA0"; $this->assertEqual( array('c', 'a', "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with muliple combined characters. $string = "c\xCD\xA0a\xCD\xA0t\xCD\xA0"; $this->assertEqual( array("c\xCD\xA0", "a\xCD\xA0", "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with multiple combining characters. $string = "ca\xCD\xA0\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String beginning with a combining character. $string = "\xCD\xA0\xCD\xA0c"; $this->assertEqual( array(" \xCD\xA0\xCD\xA0", 'c'), phutil_utf8v_combined($string)); } public function testUTF8BMPSegfaults() { // This test case fails by segfaulting, or passes by not segfaulting. See // the function implementation for details. $input = str_repeat("\xEF\xBF\xBF", 1024 * 32); phutil_is_utf8_with_only_bmp_characters($input); $this->assertTrue(true); } public function testUTF8BMP() { $tests = array( '' => array(true, true, 'empty string'), 'a' => array(true, true, 'a'), "a\xCD\xA0\xCD\xA0" => array(true, true, 'a with combining'), "\xE2\x98\x83" => array(true, true, 'snowman'), // This is the last character in BMP, U+FFFF. "\xEF\xBF\xBF" => array(true, true, 'U+FFFF'), // This isn't valid. "\xEF\xBF\xC0" => array(false, false, 'Invalid, byte range.'), // This is an invalid nonminimal representation. "\xF0\x81\x80\x80" => array(false, false, 'Nonminimal 4-byte characer.'), // This is the first character above BMP, U+10000. "\xF0\x90\x80\x80" => array(true, false, 'U+10000'), "\xF0\x9D\x84\x9E" => array(true, false, 'gclef'), "musical \xF0\x9D\x84\x9E g-clef" => array(true, false, 'gclef text'), "\xF0\x9D\x84" => array(false, false, 'Invalid, truncated.'), "\xE0\x80\x80" => array(false, false, 'Nonminimal 3-byte character.'), // Partial BMP characters. "\xCD" => array(false, false, 'Partial 2-byte character.'), "\xE0\xA0" => array(false, false, 'Partial BMP 0xE0 character.'), "\xE2\x98" => array(false, false, 'Partial BMP cahracter.'), ); foreach ($tests as $input => $test) { list($expect_utf8, $expect_bmp, $test_name) = $test; // Depending on what's installed on the system, this may use an // extension. $this->assertEqual( $expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name)); // Also test this against the pure PHP implementation, explicitly. $this->assertEqual( $expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name)); $this->assertEqual( $expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name)); } } } diff --git a/src/xsprintf/PhutilCommandString.php b/src/xsprintf/PhutilCommandString.php index 5c62b3a..21c51da 100644 --- a/src/xsprintf/PhutilCommandString.php +++ b/src/xsprintf/PhutilCommandString.php @@ -1,85 +1,85 @@ argv = $argv; $this->escapingMode = self::MODE_DEFAULT; // This makes sure we throw immediately if there are errors in the // parameters. $this->getMaskedString(); } public function __toString() { return $this->getMaskedString(); } public function getUnmaskedString() { return $this->renderString(true); } public function getMaskedString() { return $this->renderString(false); } public function setEscapingMode($escaping_mode) { $this->escapingMode = $escaping_mode; return $this; } private function renderString($unmasked) { return xsprintf( 'xsprintf_command', array( 'unmasked' => $unmasked, - 'mode' => $this->escapingMode + 'mode' => $this->escapingMode, ), $this->argv); } public static function escapeArgument($value, $mode) { switch ($mode) { case self::MODE_DEFAULT: return escapeshellarg($value); case self::MODE_POWERSHELL: return self::escapePowershell($value); default: throw new Exception('Unknown escaping mode!'); } } private static function escapePowershell($value) { // These escape sequences are from http://ss64.com/ps/syntax-esc.html // Replace backticks first. $value = str_replace('`', '``', $value); // Now replace other required notations. $value = str_replace("\0", '`0', $value); $value = str_replace(chr(7), '`a', $value); $value = str_replace(chr(8), '`b', $value); $value = str_replace("\f", '`f', $value); $value = str_replace("\n", '`n', $value); $value = str_replace("\r", '`r', $value); $value = str_replace("\t", '`t', $value); $value = str_replace("\v", '`v', $value); $value = str_replace('#', '`#', $value); $value = str_replace("'", '`\'', $value); $value = str_replace('"', '`"', $value); // The rule on dollar signs is mentioned further down the page, and // they only need to be escaped when using double quotes (which we are). $value = str_replace('$', '`$', $value); return '"'.$value.'"'; } }