diff --git a/scripts/__init_script__.php b/scripts/__init_script__.php index 53cb235..6f6b36f 100644 --- a/scripts/__init_script__.php +++ b/scripts/__init_script__.php @@ -1,91 +1,95 @@ 0) { ob_end_clean(); } error_reporting(E_ALL | E_STRICT); $config_map = array( // Always display script errors. Without this, they may not appear, which is // unhelpful when users encounter a problem. On the web this is a security // concern because you don't want to expose errors to clients, but in a // script context we always want to show errors. 'display_errors' => true, // Send script error messages to the server's `error_log` setting. 'log_errors' => true, // Set the error log to the default, so errors go to stderr. Without this // errors may end up in some log, and users may not know where the log is // or check it. 'error_log' => null, // XDebug raises a fatal error if the call stack gets too deep, but the // default setting is 100, which we may exceed legitimately with module // includes (and in other cases, like recursive filesystem operations // applied to 100+ levels of directory nesting). Stop it from triggering: // we explicitly limit recursive algorithms which should be limited. // // After Feb 2014, XDebug interprets a value of 0 to mean "do not allow any // function calls". Previously, 0 effectively disabled this check. For // context, see T5027. 'xdebug.max_nesting_level' => PHP_INT_MAX, // Don't limit memory, doing so just generally just prevents us from // processing large inputs without many tangible benefits. 'memory_limit' => -1, ); foreach ($config_map as $config_key => $config_value) { ini_set($config_key, $config_value); } if (!ini_get('date.timezone')) { // If the timezone isn't set, PHP issues a warning whenever you try to parse // a date (like those from Git or Mercurial logs), even if the date contains // timezone information (like "PST" or "-0700") which makes the // environmental timezone setting is completely irrelevant. We never rely on // the system timezone setting in any capacity, so prevent PHP from flipping // out by setting it to a safe default (UTC) if it isn't set to some other // value. date_default_timezone_set('UTC'); } // Adjust `include_path`. ini_set('include_path', implode(PATH_SEPARATOR, array( dirname(dirname(__FILE__)).'/externals/includes', ini_get('include_path'), ))); // Disable the insanely dangerous XML entity loader by default. if (function_exists('libxml_disable_entity_loader')) { libxml_disable_entity_loader(true); } // Now, load libphutil. $root = dirname(dirname(__FILE__)); require_once $root.'/src/__phutil_library_init__.php'; PhutilErrorHandler::initialize(); $router = PhutilSignalRouter::initialize(); $handler = new PhutilBacktraceSignalHandler(); $router->installHandler('phutil.backtrace', $handler); } __phutil_init_script__(); diff --git a/scripts/daemon/exec/exec_daemon.php b/scripts/daemon/exec/exec_daemon.php index 2cb206e..41da2c0 100755 --- a/scripts/daemon/exec/exec_daemon.php +++ b/scripts/daemon/exec/exec_daemon.php @@ -1,127 +1,131 @@ #!/usr/bin/env php setTagline(pht('daemon executor')); $args->setSynopsis(<<parse( array( array( 'name' => 'trace', 'help' => pht('Enable debug tracing.'), ), array( 'name' => 'trace-memory', 'help' => pht('Enable debug memory tracing.'), ), array( 'name' => 'verbose', 'help' => pht('Enable verbose activity logging.'), ), array( 'name' => 'label', 'short' => 'l', 'param' => 'label', 'help' => pht( 'Optional process label. Makes "%s" nicer, no behavioral effects.', 'ps'), ), array( 'name' => 'daemon', 'wildcard' => true, ), )); $trace_memory = $args->getArg('trace-memory'); $trace_mode = $args->getArg('trace') || $trace_memory; $verbose = $args->getArg('verbose'); if (function_exists('posix_isatty') && posix_isatty(STDIN)) { fprintf(STDERR, pht('Reading daemon configuration from stdin...')."\n"); } $config = @file_get_contents('php://stdin'); $config = id(new PhutilJSONParser())->parse($config); PhutilTypeSpec::checkMap( $config, array( 'log' => 'optional string|null', 'argv' => 'optional list', 'load' => 'optional list', 'autoscale' => 'optional wild', )); $log = idx($config, 'log'); if ($log) { ini_set('error_log', $log); PhutilErrorHandler::setErrorListener(array('PhutilDaemon', 'errorListener')); } $load = idx($config, 'load', array()); foreach ($load as $library) { $library = Filesystem::resolvePath($library); phutil_load_library($library); } PhutilErrorHandler::initialize(); $daemon = $args->getArg('daemon'); if (!$daemon) { throw new PhutilArgumentUsageException( pht('Specify which class of daemon to start.')); } else if (count($daemon) > 1) { throw new PhutilArgumentUsageException( pht('Specify exactly one daemon to start.')); } else { $daemon = head($daemon); if (!class_exists($daemon)) { throw new PhutilArgumentUsageException( pht( 'No class "%s" exists in any known library.', $daemon)); } else if (!is_subclass_of($daemon, 'PhutilDaemon')) { throw new PhutilArgumentUsageException( pht( 'Class "%s" is not a subclass of "%s".', $daemon, 'PhutilDaemon')); } } $argv = idx($config, 'argv', array()); $daemon = newv($daemon, array($argv)); if ($trace_mode) { $daemon->setTraceMode(); } if ($trace_memory) { $daemon->setTraceMemory(); } if ($verbose) { $daemon->setVerbose(true); } $autoscale = idx($config, 'autoscale'); if ($autoscale) { $daemon->setAutoscaleProperties($autoscale); } $daemon->execute(); diff --git a/scripts/daemon/launch_daemon.php b/scripts/daemon/launch_daemon.php index 4bee9fb..f1e6107 100755 --- a/scripts/daemon/launch_daemon.php +++ b/scripts/daemon/launch_daemon.php @@ -1,9 +1,13 @@ #!/usr/bin/env php run(); diff --git a/src/cache/PhutilAPCKeyValueCache.php b/src/cache/PhutilAPCKeyValueCache.php index b103d20..bf7f7df 100644 --- a/src/cache/PhutilAPCKeyValueCache.php +++ b/src/cache/PhutilAPCKeyValueCache.php @@ -1,56 +1,97 @@ $value) { - apc_store($key, $value, $ttl); + if ($is_apcu) { + apcu_store($key, $value, $ttl); + } else { + apc_store($key, $value, $ttl); + } } return $this; } public function deleteKeys(array $keys) { + static $is_apcu; + if ($is_apcu === null) { + $is_apcu = self::isAPCu(); + } + foreach ($keys as $key) { - apc_delete($key); + if ($is_apcu) { + apcu_delete($key); + } else { + apc_delete($key); + } } return $this; } public function destroyCache() { - apc_clear_cache('user'); + static $is_apcu; + if ($is_apcu === null) { + $is_apcu = self::isAPCu(); + } + + if ($is_apcu) { + apcu_clear_cache(); + } else { + apc_clear_cache('user'); + } return $this; } + private static function isAPCU() { + return function_exists('apcu_fetch'); + } + } diff --git a/src/utils/__tests__/PhutilUtilsTestCase.php b/src/utils/__tests__/PhutilUtilsTestCase.php index 1547039..5128255 100644 --- a/src/utils/__tests__/PhutilUtilsTestCase.php +++ b/src/utils/__tests__/PhutilUtilsTestCase.php @@ -1,887 +1,885 @@ assertTrue($caught instanceof InvalidArgumentException); } public function testMFilterWithEmptyValueFiltered() { $a = new MFilterTestHelper('o', 'p', 'q'); $b = new MFilterTestHelper('o', '', 'q'); $c = new MFilterTestHelper('o', 'p', 'q'); $list = array( 'a' => $a, 'b' => $b, 'c' => $c, ); $actual = mfilter($list, 'getI'); $expected = array( 'a' => $a, 'c' => $c, ); $this->assertEqual($expected, $actual); } public function testMFilterWithEmptyValueNegateFiltered() { $a = new MFilterTestHelper('o', 'p', 'q'); $b = new MFilterTestHelper('o', '', 'q'); $c = new MFilterTestHelper('o', 'p', 'q'); $list = array( 'a' => $a, 'b' => $b, 'c' => $c, ); $actual = mfilter($list, 'getI', true); $expected = array( 'b' => $b, ); $this->assertEqual($expected, $actual); } public function testIFilterInvalidIndexThrowException() { $caught = null; try { ifilter(array(), null); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); } public function testIFilterWithEmptyValueFiltered() { $list = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $actual = ifilter($list, 'i'); $expected = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), ); $this->assertEqual($expected, $actual); } public function testIFilterIndexNotExistsAllFiltered() { $list = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $actual = ifilter($list, 'NoneExisting'); $expected = array(); $this->assertEqual($expected, $actual); } public function testIFilterWithEmptyValueNegateFiltered() { $list = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $actual = ifilter($list, 'i', true); $expected = array( 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $this->assertEqual($expected, $actual); } public function testIFilterIndexNotExistsNotFiltered() { $list = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $actual = ifilter($list, 'NoneExisting', true); $expected = array( 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $this->assertEqual($expected, $actual); } public function testmergevMergingBasicallyWorksCorrectly() { $this->assertEqual( array(), array_mergev( array( // ))); $this->assertEqual( array(), array_mergev( array( array(), array(), array(), ))); $this->assertEqual( array(1, 2, 3, 4, 5), array_mergev( array( array(1, 2), array(3), array(), array(4, 5), ))); $not_valid = array( 'scalar' => array(1), 'array plus scalar' => array(array(), 1), 'null' => array(null), ); foreach ($not_valid as $key => $invalid_input) { $caught = null; try { array_mergev($invalid_input); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue( ($caught instanceof InvalidArgumentException), pht('%s invalid on %s', 'array_mergev()', $key)); } } public function testNonempty() { $this->assertEqual( 'zebra', nonempty(false, null, 0, '', array(), 'zebra')); $this->assertEqual( null, nonempty()); $this->assertEqual( false, nonempty(null, false)); $this->assertEqual( null, nonempty(false, null)); } protected function tryAssertInstancesOfArray($input) { assert_instances_of($input, 'array'); } protected function tryAssertInstancesOfStdClass($input) { assert_instances_of($input, 'stdClass'); } public function testAssertInstancesOf() { $object = new stdClass(); $inputs = array( 'empty' => array(), 'stdClass' => array($object, $object), __CLASS__ => array($object, $this), 'array' => array(array(), array()), 'integer' => array($object, 1), ); $this->tryTestCases( $inputs, array(true, true, false, false, false), array($this, 'tryAssertInstancesOfStdClass'), 'InvalidArgumentException'); $this->tryTestCases( $inputs, array(true, false, false, true, false), array($this, 'tryAssertInstancesOfArray'), 'InvalidArgumentException'); } public function testAssertStringLike() { $this->assertEqual( null, assert_stringlike(null)); $this->assertEqual( null, assert_stringlike('')); $this->assertEqual( null, assert_stringlike('Hello World')); $this->assertEqual( null, assert_stringlike(1)); $this->assertEqual( null, assert_stringlike(9.9999)); $this->assertEqual( null, assert_stringlike(true)); $obj = new Exception('.'); $this->assertEqual( null, assert_stringlike($obj)); $obj = (object)array(); try { assert_stringlike($obj); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); $array = array( 'foo' => 'bar', 'bar' => 'foo', ); try { assert_stringlike($array); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); $tmp = new TempFile(); $resource = fopen($tmp, 'r'); try { assert_stringlike($resource); } catch (InvalidArgumentException $ex) { $caught = $ex; } fclose($resource); $this->assertTrue($caught instanceof InvalidArgumentException); } public function testCoalesce() { $this->assertEqual( 'zebra', coalesce(null, 'zebra')); $this->assertEqual( null, coalesce()); $this->assertEqual( false, coalesce(false, null)); $this->assertEqual( false, coalesce(null, false)); } public function testHeadLast() { $this->assertEqual( 'a', head(explode('.', 'a.b'))); $this->assertEqual( 'b', last(explode('.', 'a.b'))); } public function testHeadKeyLastKey() { $this->assertEqual( 'a', head_key(array('a' => 0, 'b' => 1))); $this->assertEqual( 'b', last_key(array('a' => 0, 'b' => 1))); $this->assertEqual(null, head_key(array())); $this->assertEqual(null, last_key(array())); } public function testID() { $this->assertEqual(true, id(true)); $this->assertEqual(false, id(false)); } public function testIdx() { $array = array( 'present' => true, 'null' => null, ); $this->assertEqual(true, idx($array, 'present')); $this->assertEqual(true, idx($array, 'present', false)); $this->assertEqual(null, idx($array, 'null')); $this->assertEqual(null, idx($array, 'null', false)); $this->assertEqual(null, idx($array, 'missing')); $this->assertEqual(false, idx($array, 'missing', false)); } public function testSplitLines() { $retain_cases = array( '' => array(''), 'x' => array('x'), "x\n" => array("x\n"), "\n" => array("\n"), "\n\n\n" => array("\n", "\n", "\n"), "\r\n" => array("\r\n"), "x\r\ny\n" => array("x\r\n", "y\n"), "x\ry\nz\r\n" => array("x\ry\n", "z\r\n"), "x\ry\nz\r\n\n" => array("x\ry\n", "z\r\n", "\n"), ); foreach ($retain_cases as $input => $expect) { $this->assertEqual( $expect, phutil_split_lines($input, $retain_endings = true), pht('(Retained) %s', addcslashes($input, "\r\n\\"))); } $discard_cases = array( '' => array(''), 'x' => array('x'), "x\n" => array('x'), "\n" => array(''), "\n\n\n" => array('', '', ''), "\r\n" => array(''), "x\r\ny\n" => array('x', 'y'), "x\ry\nz\r\n" => array("x\ry", 'z'), "x\ry\nz\r\n\n" => array("x\ry", 'z', ''), ); foreach ($discard_cases as $input => $expect) { $this->assertEqual( $expect, phutil_split_lines($input, $retain_endings = false), pht('(Discarded) %s', addcslashes($input, "\r\n\\"))); } } public function testArrayFuse() { $this->assertEqual(array(), array_fuse(array())); $this->assertEqual(array('x' => 'x'), array_fuse(array('x'))); } public function testArrayInterleave() { $this->assertEqual(array(), array_interleave('x', array())); $this->assertEqual(array('y'), array_interleave('x', array('y'))); $this->assertEqual( array('y', 'x', 'z'), array_interleave('x', array('y', 'z'))); $this->assertEqual( array('y', 'x', 'z'), array_interleave( 'x', array( 'kangaroo' => 'y', 'marmoset' => 'z', ))); $obj1 = (object)array(); $obj2 = (object)array(); $this->assertEqual( array($obj1, $obj2, $obj1, $obj2, $obj1), array_interleave( $obj2, array( $obj1, $obj1, $obj1, ))); $implode_tests = array( '' => array(1, 2, 3), 'x' => array(1, 2, 3), 'y' => array(), 'z' => array(1), ); foreach ($implode_tests as $x => $y) { $this->assertEqual( implode('', array_interleave($x, $y)), implode($x, $y)); } } public function testLoggableString() { $this->assertEqual( '', phutil_loggable_string('')); $this->assertEqual( "a\\nb", phutil_loggable_string("a\nb")); $this->assertEqual( "a\\x01b", phutil_loggable_string("a\x01b")); $this->assertEqual( "a\\x1Fb", phutil_loggable_string("a\x1Fb")); } public function testPhutilUnits() { $cases = array( '0 seconds in seconds' => 0, '1 second in seconds' => 1, '2 seconds in seconds' => 2, '100 seconds in seconds' => 100, '2 minutes in seconds' => 120, '1 hour in seconds' => 3600, '1 day in seconds' => 86400, '3 days in seconds' => 259200, '128 bits in bytes' => 16, '1 byte in bytes' => 1, '8 bits in bytes' => 1, ); foreach ($cases as $input => $expect) { $this->assertEqual( $expect, phutil_units($input), 'phutil_units("'.$input.'")'); } $bad_cases = array( 'quack', '3 years in seconds', '1 minute in milliseconds', '1 day in days', '-1 minutes in seconds', '1.5 minutes in seconds', '7 bits in bytes', '2 hours in bytes', '1 dram in bytes', '24 bits in seconds', ); foreach ($bad_cases as $input) { $caught = null; try { phutil_units($input); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue( ($caught instanceof InvalidArgumentException), 'phutil_units("'.$input.'")'); } } public function testPhutilJSONDecode() { $valid_cases = array( '{}' => array(), '[]' => array(), '[1, 2]' => array(1, 2), '{"a":"b"}' => array('a' => 'b'), ); foreach ($valid_cases as $input => $expect) { $result = phutil_json_decode($input); $this->assertEqual($expect, $result, 'phutil_json_decode('.$input.')'); } $invalid_cases = array( '', '"a"', '{,}', 'null', '"null"', ); foreach ($invalid_cases as $input) { $caught = null; try { phutil_json_decode($input); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof PhutilJSONParserException); } } public function testPhutilINIDecode() { // Skip the test if we are using an older version of PHP that doesn't // have the `parse_ini_string` function. try { phutil_ini_decode(''); } catch (PhutilMethodNotImplementedException $ex) { $this->assertSkipped($ex->getMessage()); } $valid_cases = array( '' => array(), 'foo=' => array('foo' => ''), 'foo=bar' => array('foo' => 'bar'), 'foo = bar' => array('foo' => 'bar'), "foo = bar\n" => array('foo' => 'bar'), "foo\nbar = baz" => array('bar' => 'baz'), "[foo]\nbar = baz" => array('foo' => array('bar' => 'baz')), "[foo]\n[bar]\nbaz = foo" => array( 'foo' => array(), 'bar' => array('baz' => 'foo'), ), "[foo]\nbar = baz\n\n[bar]\nbaz = foo" => array( 'foo' => array('bar' => 'baz'), 'bar' => array('baz' => 'foo'), ), "; Comment\n[foo]\nbar = baz" => array('foo' => array('bar' => 'baz')), "# Comment\n[foo]\nbar = baz" => array('foo' => array('bar' => 'baz')), "foo = true\n[bar]\nbaz = false" => array('foo' => true, 'bar' => array('baz' => false)), "foo = 1\nbar = 1.234" => array('foo' => 1, 'bar' => 1.234), 'x = {"foo": "bar"}' => array('x' => '{"foo": "bar"}'), ); foreach ($valid_cases as $input => $expect) { $result = phutil_ini_decode($input); $this->assertEqual($expect, $result, 'phutil_ini_decode('.$input.')'); } $invalid_cases = array( - '[' => - 'syntax error, unexpected $end, expecting \']\' in Unknown on line 1', + '[' => new PhutilINIParserException(), ); foreach ($invalid_cases as $input => $expect) { $caught = null; try { phutil_ini_decode($input); } catch (Exception $ex) { $caught = $ex; } - $this->assertTrue($caught instanceof PhutilINIParserException); - $this->assertEqual($expect, $caught->getMessage()); + $this->assertTrue($caught instanceof $expect); } } public function testCensorCredentials() { $cases = array( '' => '', 'abc' => 'abc', // NOTE: We're liberal about censoring here, since we can't tell // if this is a truncated password at the end of an input string // or a domain name. The version with a "/" isn't censored. 'http://example.com' => 'http://********', 'http://example.com/' => 'http://example.com/', 'http://username@example.com' => 'http://********@example.com', 'http://user:pass@example.com' => 'http://********@example.com', // We censor these because they might be truncated credentials at the end // of the string. 'http://user' => 'http://********', "http://user\n" => "http://********\n", 'svn+ssh://user:pass@example.com' => 'svn+ssh://********@example.com', ); foreach ($cases as $input => $expect) { $this->assertEqual( $expect, phutil_censor_credentials($input), pht('Credential censoring for: %s', $input)); } } public function testVarExport() { // Constants $this->assertEqual('null', phutil_var_export(null)); $this->assertEqual('true', phutil_var_export(true)); $this->assertEqual('false', phutil_var_export(false)); $this->assertEqual("'quack'", phutil_var_export('quack')); $this->assertEqual('1234567', phutil_var_export(1234567)); // Arrays $this->assertEqual( 'array()', phutil_var_export(array())); $this->assertEqual( implode("\n", array( 'array(', ' 1,', ' 2,', ' 3,', ')', )), phutil_var_export(array(1, 2, 3))); $this->assertEqual( implode("\n", array( 'array(', " 'foo' => 'bar',", " 'bar' => 'baz',", ')', )), phutil_var_export(array('foo' => 'bar', 'bar' => 'baz'))); $this->assertEqual( implode("\n", array( 'array(', " 'foo' => array(", " 'bar' => array(", " 'baz' => array(),", ' ),', ' ),', ')', )), phutil_var_export( array('foo' => array('bar' => array('baz' => array()))))); // Objects $this->assertEqual( "stdClass::__set_state(array(\n))", phutil_var_export(new stdClass())); $this->assertEqual( "PhutilTestPhobject::__set_state(array(\n))", phutil_var_export(new PhutilTestPhobject())); } public function testFnmatch() { $cases = array( '' => array( array(''), array('.', '/'), ), '*' => array( array('file'), array('dir/', '/dir'), ), '**' => array( array('file', 'dir/', '/dir', 'dir/subdir/file'), array(), ), '**/file' => array( array('file', 'dir/file', 'dir/subdir/file', 'dir/subdir/subdir/file'), array('file/', 'file/dir'), ), 'file.*' => array( array('file.php', 'file.a', 'file.'), array('files.php', 'file.php/blah'), ), 'fo?' => array( array('foo', 'fot'), array('fooo', 'ffoo', 'fo/', 'foo/'), ), 'fo{o,t}' => array( array('foo', 'fot'), array('fob', 'fo/', 'foo/'), ), 'fo{o,\\,}' => array( array('foo', 'fo,'), array('foo/', 'fo,/'), ), 'fo{o,\\\\}' => array( array('foo', 'fo\\'), array('foo/', 'fo\\/'), ), '/foo' => array( array('/foo'), array('foo', '/foo/'), ), // Tests for various `fnmatch` flags. '*.txt' => array( array( 'file.txt', // FNM_PERIOD '.secret-file.txt', ), array( // FNM_PATHNAME 'dir/file.txt', // FNM_CASEFOLD 'file.TXT', ), '\\*.txt' => array( array( // FNM_NOESCAPE '*.txt', ), array( 'file.txt', ), ), ), ); $invalid = array( '{', 'asdf\\', ); foreach ($cases as $input => $expect) { list($matches, $no_matches) = $expect; foreach ($matches as $match) { $this->assertTrue( phutil_fnmatch($input, $match), pht('Expecting "%s" to match "%s".', $input, $match)); } foreach ($no_matches as $no_match) { $this->assertFalse( phutil_fnmatch($input, $no_match), pht('Expecting "%s" not to match "%s".', $input, $no_match)); } } foreach ($invalid as $input) { $caught = null; try { phutil_fnmatch($input, ''); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); } } public function testJSONEncode() { $in = array( 'example' => "Not Valid UTF8: \x80", ); $caught = null; try { $value = phutil_json_encode($in); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue(($caught instanceof Exception)); } public function testHashComparisons() { $tests = array( array('1', '12', false), array('0', '0e123', false), array('0e123', '0e124', false), array('', '0', false), array('000', '0e0', false), array('001', '002', false), array('0', '', false), array('987654321', '123456789', false), array('A', 'a', false), array('123456789', '123456789', true), array('hunter42', 'hunter42', true), ); foreach ($tests as $key => $test) { list($u, $v, $expect) = $test; $actual = phutil_hashes_are_identical($u, $v); $this->assertEqual( $expect, $actual, pht('Test Case: "%s" vs "%s"', $u, $v)); } } public function testVectorSortInt() { $original = array( ~PHP_INT_MAX, -2147483648, -5, -3, -1, 0, 1, 2, 3, 100, PHP_INT_MAX, ); $items = $this->shuffleMap($original); foreach ($items as $key => $value) { $items[$key] = (string)id(new PhutilSortVector()) ->addInt($value); } asort($items, SORT_STRING); $this->assertEqual( array_keys($original), array_keys($items)); } public function testVectorSortString() { $original = array( '', "\1", 'A', 'AB', 'Z', "Z\1", 'ZZZ', ); $items = $this->shuffleMap($original); foreach ($items as $key => $value) { $items[$key] = (string)id(new PhutilSortVector()) ->addString($value); } asort($items, SORT_STRING); $this->assertEqual( array_keys($original), array_keys($items)); } private function shuffleMap(array $map) { $keys = array_keys($map); shuffle($keys); return array_select_keys($map, $keys); } } diff --git a/src/utils/utf8.php b/src/utils/utf8.php index fb51c90..d50ed01 100644 --- a/src/utils/utf8.php +++ b/src/utils/utf8.php @@ -1,838 +1,841 @@ = 0xD800 && $codepoint <= 0xDFFF) { $result[] = str_repeat($replacement, strlen($match)); $offset += strlen($matches[0]); continue; } } $result[] = $match; } else { // Unicode replacement character, U+FFFD. $result[] = $replacement; } $offset += strlen($matches[0]); } return implode('', $result); } /** * Determine if a string is valid UTF-8, with only basic multilingual plane * characters. This is particularly important because MySQL's `utf8` column * types silently truncate strings which contain characters outside of this * set. * * @param string String to test for being valid UTF-8 with only characters in * the basic multilingual plane. * @return bool True if the string is valid UTF-8 with only BMP characters. */ function phutil_is_utf8_with_only_bmp_characters($string) { return phutil_is_utf8_slowly($string, $only_bmp = true); } /** * Determine if a string is valid UTF-8. * * @param string Some string which may or may not be valid UTF-8. * @return bool True if the string is valid UTF-8. */ function phutil_is_utf8($string) { if (function_exists('mb_check_encoding')) { // If mbstring is available, this is significantly faster than using PHP. return mb_check_encoding($string, 'UTF-8'); } return phutil_is_utf8_slowly($string); } /** * Determine if a string is valid UTF-8, slowly. * * This works on any system, but has very poor performance. * * You should call @{function:phutil_is_utf8} instead of this function, as * that function can use more performant mechanisms if they are available on * the system. * * @param string Some string which may or may not be valid UTF-8. * @param bool True to require all characters be part of the basic * multilingual plane (no more than 3-bytes long). * @return bool True if the string is valid UTF-8. */ function phutil_is_utf8_slowly($string, $only_bmp = false) { // First, check the common case of normal ASCII strings. We're fine if // the string contains no bytes larger than 127. if (preg_match('/^[\x01-\x7F]+\z/', $string)) { return true; } // NOTE: In the past, we used a large regular expression in the form of // '(x|y|z)+' to match UTF8 strings. However, PCRE can segfaults on patterns // like this at relatively small input sizes, at least on some systems // (observed on OSX and Windows). This is apparently because the internal // implementation is recursive and it blows the stack. // See for some discussion. Since the // input limit is extremely low (less than 50KB on my system), do this check // very very slowly in PHP instead. See also T5316. $len = strlen($string); for ($ii = 0; $ii < $len; $ii++) { $chr = ord($string[$ii]); if ($chr >= 0x01 && $chr <= 0x7F) { continue; } else if ($chr >= 0xC2 && $chr <= 0xDF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } return false; } else if ($chr > 0xE0 && $chr <= 0xEF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } return false; } else if ($chr == 0xE0) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); // NOTE: This range starts at 0xA0, not 0x80. The values 0x80-0xA0 are // "valid", but not minimal representations, and MySQL rejects them. We're // special casing this part of the range. if ($chr >= 0xA0 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } return false; } else if (!$only_bmp) { if ($chr > 0xF0 && $chr <= 0xF4) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } } } else if ($chr == 0xF0) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); // NOTE: As above, this range starts at 0x90, not 0x80. The values // 0x80-0x90 are not minimal representations. if ($chr >= 0x90 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } } } } return false; } return true; } /** * Find the character length of a UTF-8 string. * * @param string A valid utf-8 string. * @return int The character length of the string. */ function phutil_utf8_strlen($string) { - return strlen(utf8_decode($string)); + if (function_exists('utf8_decode')) { + return strlen(utf8_decode($string)); + } + return count(phutil_utf8v($string)); } /** * Find the console display length of a UTF-8 string. This may differ from the * character length of the string if it contains double-width characters, like * many Chinese characters. * * This method is based on a C implementation here, which is based on the IEEE * standards. The source has more discussion and addresses more considerations * than this implementation does. * * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c * * NOTE: We currently assume width 1 for East-Asian ambiguous characters. * * NOTE: This function is VERY slow. * * @param string A valid UTF-8 string. * @return int The console display length of the string. */ function phutil_utf8_console_strlen($string) { // Formatting and colors don't contribute any width in the console. $string = preg_replace("/\x1B\[\d*m/", '', $string); // In the common case of an ASCII string, just return the string length. if (preg_match('/^[\x01-\x7F]*\z/', $string)) { return strlen($string); } $len = 0; // NOTE: To deal with combining characters, we're splitting the string into // glyphs first (characters with combiners) and then counting just the width // of the first character in each glyph. $display_glyphs = phutil_utf8v_combined($string); foreach ($display_glyphs as $display_glyph) { $glyph_codepoints = phutil_utf8v_codepoints($display_glyph); foreach ($glyph_codepoints as $c) { if ($c == 0) { break; } $len += 1 + ($c >= 0x1100 && ($c <= 0x115F || /* Hangul Jamo init. consonants */ $c == 0x2329 || $c == 0x232A || ($c >= 0x2E80 && $c <= 0xA4CF && $c != 0x303F) || /* CJK ... Yi */ ($c >= 0xAC00 && $c <= 0xD7A3) || /* Hangul Syllables */ ($c >= 0xF900 && $c <= 0xFAFF) || /* CJK Compatibility Ideographs */ ($c >= 0xFE10 && $c <= 0xFE19) || /* Vertical forms */ ($c >= 0xFE30 && $c <= 0xFE6F) || /* CJK Compatibility Forms */ ($c >= 0xFF00 && $c <= 0xFF60) || /* Fullwidth Forms */ ($c >= 0xFFE0 && $c <= 0xFFE6) || ($c >= 0x20000 && $c <= 0x2FFFD) || ($c >= 0x30000 && $c <= 0x3FFFD))); break; } } return $len; } /** * Split a UTF-8 string into an array of characters. Combining characters are * also split. * * @param string A valid utf-8 string. * @param int|null Stop processing after examining this many bytes. * @return list A list of characters in the string. */ function phutil_utf8v($string, $byte_limit = null) { $res = array(); $len = strlen($string); $ii = 0; while ($ii < $len) { $byte = $string[$ii]; if ($byte <= "\x7F") { $res[] = $byte; $ii += 1; if ($byte_limit && ($ii >= $byte_limit)) { break; } continue; } else if ($byte < "\xC0") { throw new Exception( pht('Invalid UTF-8 string passed to %s.', __FUNCTION__)); } else if ($byte <= "\xDF") { $seq_len = 2; } else if ($byte <= "\xEF") { $seq_len = 3; } else if ($byte <= "\xF7") { $seq_len = 4; } else if ($byte <= "\xFB") { $seq_len = 5; } else if ($byte <= "\xFD") { $seq_len = 6; } else { throw new Exception( pht('Invalid UTF-8 string passed to %s.', __FUNCTION__)); } if ($ii + $seq_len > $len) { throw new Exception( pht('Invalid UTF-8 string passed to %s.', __FUNCTION__)); } for ($jj = 1; $jj < $seq_len; ++$jj) { if ($string[$ii + $jj] >= "\xC0") { throw new Exception( pht('Invalid UTF-8 string passed to %s.', __FUNCTION__)); } } $res[] = substr($string, $ii, $seq_len); $ii += $seq_len; if ($byte_limit && ($ii >= $byte_limit)) { break; } } return $res; } /** * Split a UTF-8 string into an array of codepoints (as integers). * * @param string A valid UTF-8 string. * @return list A list of codepoints, as integers. */ function phutil_utf8v_codepoints($string) { $str_v = phutil_utf8v($string); foreach ($str_v as $key => $char) { $c = ord($char[0]); $v = 0; if (($c & 0x80) == 0) { $v = $c; } else if (($c & 0xE0) == 0xC0) { $v = (($c & 0x1F) << 6) + ((ord($char[1]) & 0x3F)); } else if (($c & 0xF0) == 0xE0) { $v = (($c & 0x0F) << 12) + ((ord($char[1]) & 0x3F) << 6) + ((ord($char[2]) & 0x3F)); } else if (($c & 0xF8) == 0xF0) { $v = (($c & 0x07) << 18) + ((ord($char[1]) & 0x3F) << 12) + ((ord($char[2]) & 0x3F) << 6) + ((ord($char[3]) & 0x3F)); } else if (($c & 0xFC) == 0xF8) { $v = (($c & 0x03) << 24) + ((ord($char[1]) & 0x3F) << 18) + ((ord($char[2]) & 0x3F) << 12) + ((ord($char[3]) & 0x3F) << 6) + ((ord($char[4]) & 0x3F)); } else if (($c & 0xFE) == 0xFC) { $v = (($c & 0x01) << 30) + ((ord($char[1]) & 0x3F) << 24) + ((ord($char[2]) & 0x3F) << 18) + ((ord($char[3]) & 0x3F) << 12) + ((ord($char[4]) & 0x3F) << 6) + ((ord($char[5]) & 0x3F)); } $str_v[$key] = $v; } return $str_v; } /** * Convert a Unicode codepoint into a UTF8-encoded string. * * @param int Unicode codepoint. * @return string UTF8 encoding. */ function phutil_utf8_encode_codepoint($codepoint) { if ($codepoint < 0x80) { $r = chr($codepoint); } else if ($codepoint < 0x800) { $r = chr(0xC0 | (($codepoint >> 6) & 0x1F)). chr(0x80 | (($codepoint) & 0x3F)); } else if ($codepoint < 0x10000) { $r = chr(0xE0 | (($codepoint >> 12) & 0x0F)). chr(0x80 | (($codepoint >> 6) & 0x3F)). chr(0x80 | (($codepoint) & 0x3F)); } else if ($codepoint < 0x110000) { $r = chr(0xF0 | (($codepoint >> 18) & 0x07)). chr(0x80 | (($codepoint >> 12) & 0x3F)). chr(0x80 | (($codepoint >> 6) & 0x3F)). chr(0x80 | (($codepoint) & 0x3F)); } else { throw new Exception( pht( 'Encoding UTF8 codepoint "%s" is not supported.', $codepoint)); } return $r; } /** * Hard-wrap a block of UTF-8 text with embedded HTML tags and entities. * * @param string An HTML string with tags and entities. * @return list List of hard-wrapped lines. */ function phutil_utf8_hard_wrap_html($string, $width) { $break_here = array(); // Convert the UTF-8 string into a list of UTF-8 characters. $vector = phutil_utf8v($string); $len = count($vector); $char_pos = 0; for ($ii = 0; $ii < $len; ++$ii) { // An ampersand indicates an HTML entity; consume the whole thing (until // ";") but treat it all as one character. if ($vector[$ii] == '&') { do { ++$ii; } while ($vector[$ii] != ';'); ++$char_pos; // An "<" indicates an HTML tag, consume the whole thing but don't treat // it as a character. } else if ($vector[$ii] == '<') { do { ++$ii; } while ($vector[$ii] != '>'); } else { ++$char_pos; } // Keep track of where we need to break the string later. if ($char_pos == $width) { $break_here[$ii] = true; $char_pos = 0; } } $result = array(); $string = ''; foreach ($vector as $ii => $char) { $string .= $char; if (isset($break_here[$ii])) { $result[] = $string; $string = ''; } } if (strlen($string)) { $result[] = $string; } return $result; } /** * Hard-wrap a block of UTF-8 text with no embedded HTML tags and entities. * * @param string A non HTML string * @param int Width of the hard-wrapped lines * @return list List of hard-wrapped lines. */ function phutil_utf8_hard_wrap($string, $width) { $result = array(); $lines = phutil_split_lines($string, $retain_endings = false); foreach ($lines as $line) { // Convert the UTF-8 string into a list of UTF-8 characters. $vector = phutil_utf8v($line); $len = count($vector); $buffer = ''; for ($ii = 1; $ii <= $len; ++$ii) { $buffer .= $vector[$ii - 1]; if (($ii % $width) === 0) { $result[] = $buffer; $buffer = ''; } } if (strlen($buffer)) { $result[] = $buffer; } } return $result; } /** * Convert a string from one encoding (like ISO-8859-1) to another encoding * (like UTF-8). * * This is primarily a thin wrapper around `mb_convert_encoding()` which checks * you have the extension installed, since we try to require the extension * only if you actually need it (i.e., you want to work with encodings other * than UTF-8). * * NOTE: This function assumes that the input is in the given source encoding. * If it is not, it may not output in the specified target encoding. If you * need to perform a hard conversion to UTF-8, use this function in conjunction * with @{function:phutil_utf8ize}. We can detect failures caused by invalid * encoding names, but `mb_convert_encoding()` fails silently if the * encoding name identifies a real encoding but the string is not actually * encoded with that encoding. * * @param string String to re-encode. * @param string Target encoding name, like "UTF-8". * @param string Source encoding name, like "ISO-8859-1". * @return string Input string, with converted character encoding. * * @phutil-external-symbol function mb_convert_encoding */ function phutil_utf8_convert($string, $to_encoding, $from_encoding) { if (!$from_encoding) { throw new InvalidArgumentException( pht( 'Attempting to convert a string encoding, but no source encoding '. 'was provided. Explicitly provide the source encoding.')); } if (!$to_encoding) { throw new InvalidArgumentException( pht( 'Attempting to convert a string encoding, but no target encoding '. 'was provided. Explicitly provide the target encoding.')); } // Normalize encoding names so we can no-op the very common case of UTF8 // to UTF8 (or any other conversion where both encodings are identical). $to_upper = strtoupper(str_replace('-', '', $to_encoding)); $from_upper = strtoupper(str_replace('-', '', $from_encoding)); if ($from_upper == $to_upper) { return $string; } if (!function_exists('mb_convert_encoding')) { throw new Exception( pht( "Attempting to convert a string encoding from '%s' to '%s', ". "but the '%s' PHP extension is not available. Install %s to ". "work with encodings other than UTF-8.", $from_encoding, $to_encoding, 'mbstring', 'mbstring')); } $result = @mb_convert_encoding($string, $to_encoding, $from_encoding); if ($result === false) { $message = error_get_last(); if ($message) { $message = idx($message, 'message', pht('Unknown error.')); } throw new Exception( pht( "String conversion from encoding '%s' to encoding '%s' failed: %s", $from_encoding, $to_encoding, $message)); } return $result; } /** * Convert a string to title case in a UTF8-aware way. This function doesn't * necessarily do a great job, but the builtin implementation of `ucwords()` can * completely destroy inputs, so it just has to be better than that. Similar to * @{function:ucwords}. * * @param string UTF-8 input string. * @return string Input, in some semblance of title case. */ function phutil_utf8_ucwords($str) { // NOTE: mb_convert_case() discards uppercase letters in words when converting // to title case. For example, it will convert "AAA" into "Aaa", which is // undesirable. $v = phutil_utf8v($str); $result = ''; $last = null; $ord_a = ord('a'); $ord_z = ord('z'); foreach ($v as $c) { $convert = false; if ($last === null || $last === ' ') { $o = ord($c[0]); if ($o >= $ord_a && $o <= $ord_z) { $convert = true; } } if ($convert) { $result .= phutil_utf8_strtoupper($c); } else { $result .= $c; } $last = $c; } return $result; } /** * Convert a string to lower case in a UTF8-aware way. Similar to * @{function:strtolower}. * * @param string UTF-8 input string. * @return string Input, in some semblance of lower case. * * @phutil-external-symbol function mb_convert_case */ function phutil_utf8_strtolower($str) { if (function_exists('mb_convert_case')) { return mb_convert_case($str, MB_CASE_LOWER, 'UTF-8'); } static $map; if ($map === null) { $map = array_combine( range('A', 'Z'), range('a', 'z')); } return phutil_utf8_strtr($str, $map); } /** * Convert a string to upper case in a UTF8-aware way. Similar to * @{function:strtoupper}. * * @param string UTF-8 input string. * @return string Input, in some semblance of upper case. * * @phutil-external-symbol function mb_convert_case */ function phutil_utf8_strtoupper($str) { if (function_exists('mb_convert_case')) { return mb_convert_case($str, MB_CASE_UPPER, 'UTF-8'); } static $map; if ($map === null) { $map = array_combine( range('a', 'z'), range('A', 'Z')); } return phutil_utf8_strtr($str, $map); } /** * Replace characters in a string in a UTF-aware way. Similar to * @{function:strtr}. * * @param string UTF-8 input string. * @param map Map of characters to replace. * @return string Input with translated characters. */ function phutil_utf8_strtr($str, array $map) { $v = phutil_utf8v($str); $result = ''; foreach ($v as $c) { if (isset($map[$c])) { $result .= $map[$c]; } else { $result .= $c; } } return $result; } /** * Determine if a given unicode character is a combining character or not. * * @param string A single unicode character. * @return boolean True or false. */ function phutil_utf8_is_combining_character($character) { $components = phutil_utf8v_codepoints($character); // Combining Diacritical Marks (0300 - 036F). // Combining Diacritical Marks Supplement (1DC0 - 1DFF). // Combining Diacritical Marks for Symbols (20D0 - 20FF). // Combining Half Marks (FE20 - FE2F). foreach ($components as $codepoint) { if ($codepoint >= 0x0300 && $codepoint <= 0x036F || $codepoint >= 0x1DC0 && $codepoint <= 0x1DFF || $codepoint >= 0x20D0 && $codepoint <= 0x20FF || $codepoint >= 0xFE20 && $codepoint <= 0xFE2F) { return true; } } return false; } /** * Split a UTF-8 string into an array of characters. Combining characters * are not split. * * @param string A valid utf-8 string. * @return list A list of characters in the string. */ function phutil_utf8v_combined($string) { $components = phutil_utf8v($string); return phutil_utf8v_combine_characters($components); } /** * Merge combining characters in a UTF-8 string. * * This is a low-level method which can allow other operations to do less work. * If you have a string, call @{method:phutil_utf8v_combined} instead. * * @param list List of UTF-8 characters. * @return list List of UTF-8 strings with combining characters merged. */ function phutil_utf8v_combine_characters(array $characters) { if (!$characters) { return array(); } // If the first character in the string is a combining character, // start with a space. if (phutil_utf8_is_combining_character($characters[0])) { $buf = ' '; } else { $buf = null; } $parts = array(); foreach ($characters as $character) { if (!isset($character[1])) { // This an optimization: there are no one-byte combining characters, // so we can just pass these through unmodified. $is_combining = false; } else { $is_combining = phutil_utf8_is_combining_character($character); } if ($is_combining) { $buf .= $character; } else { if ($buf !== null) { $parts[] = $buf; } $buf = $character; } } $parts[] = $buf; return $parts; }