\d+) ?(?P.*?)".
+ "(?:\r?\n(?P.*))?$@s";
+
+ // We need to parse one or more header blocks in case we got any
+ // "HTTP/1.X 100 Continue" nonsense back as part of the response. This
+ // happens with HTTPS requests, at the least.
+ $response = $raw_response;
+ while (true) {
+ $matches = null;
+ if (!preg_match($rex_base, $response, $matches)) {
+ return $this->buildMalformedResult($raw_response);
+ }
+
+ $head = $matches['head'];
+ $body = $matches['body'];
+
+ if (!preg_match($rex_head, $head, $matches)) {
+ return $this->buildMalformedResult($raw_response);
+ }
+
+ $response_code = (int)$matches['code'];
+ $response_status = strtolower($matches['status']);
+ if ($response_code == 100) {
+ // This is HTTP/1.X 100 Continue, so this whole chunk is moot.
+ $response = $body;
+ } else if (($response_code == 200) &&
+ ($response_status == 'connection established')) {
+ // When tunneling through an HTTPS proxy, we get an initial header
+ // block like "HTTP/1.X 200 Connection established", then newlines,
+ // then the normal response. Drop this chunk.
+ $response = $body;
+ } else {
+ $headers = $this->parseHeaders(idx($matches, 'headers'));
+ break;
+ }
+ }
+
+ $status = new HTTPFutureHTTPResponseStatus(
+ $response_code,
+ $body,
+ $headers,
+ $this->expect);
+
+ return array($status, $body, $headers);
+ }
+
+ /**
+ * Parse an HTTP header block.
+ *
+ * @param string Raw HTTP headers.
+ * @return list List of HTTP header tuples.
+ * @task internal
+ */
+ protected function parseHeaders($head_raw) {
+ $rex_header = '@^(?P.*?):\s*(?P.*)$@';
+
+ $headers = array();
+
+ if (!$head_raw) {
+ return $headers;
+ }
+
+ $headers_raw = preg_split("/\r?\n/", $head_raw);
+ foreach ($headers_raw as $header) {
+ $m = null;
+ if (preg_match($rex_header, $header, $m)) {
+ $headers[] = array($m['name'], $m['value']);
+ } else {
+ $headers[] = array($header, null);
+ }
+ }
+
+ return $headers;
+ }
+
+
+ /**
+ * Find value of the first header with given name.
+ *
+ * @param list List of headers from `resolve()`.
+ * @param string Case insensitive header name.
+ * @return string Value of the header or null if not found.
+ * @task resolve
+ */
+ public static function getHeader(array $headers, $search) {
+ assert_instances_of($headers, 'array');
+ foreach ($headers as $header) {
+ list($name, $value) = $header;
+ if (strcasecmp($name, $search) == 0) {
+ return $value;
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Build a result tuple indicating a parse error resulting from a malformed
+ * HTTP response.
+ *
+ * @return tuple Valid resolution tuple.
+ * @task internal
+ */
+ protected function buildMalformedResult($raw_response) {
+ $body = null;
+ $headers = array();
+
+ $status = new HTTPFutureParseResponseStatus(
+ HTTPFutureParseResponseStatus::ERROR_MALFORMED_RESPONSE,
+ $raw_response);
+ return array($status, $body, $headers);
+ }
+
+}
diff --git a/src/future/http/HTTPFuture.php b/src/future/http/HTTPFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/HTTPFuture.php
@@ -0,0 +1,302 @@
+resolvex();
+ *
+ * Or
+ *
+ * $future = new HTTPFuture('http://www.example.com/');
+ * list($http_response_status_object,
+ * $response_body,
+ * $headers) = $future->resolve();
+ *
+ * Prefer @{method:resolvex} to @{method:resolve} as the former throws
+ * @{class:HTTPFutureHTTPResponseStatus} on failures, which includes an
+ * informative exception message.
+ */
+final class HTTPFuture extends BaseHTTPFuture {
+
+ private $host;
+ private $port = 80;
+ private $fullRequestPath;
+
+ private $socket;
+ private $writeBuffer;
+ private $response;
+
+ private $stateConnected = false;
+ private $stateWriteComplete = false;
+ private $stateReady = false;
+ private $stateStartTime;
+
+ private $profilerCallID;
+
+ public function setURI($uri) {
+ $parts = parse_url($uri);
+ if (!$parts) {
+ throw new Exception(pht("Could not parse URI '%s'.", $uri));
+ }
+
+ if (empty($parts['scheme']) || $parts['scheme'] !== 'http') {
+ throw new Exception(
+ pht(
+ "URI '%s' must be fully qualified with '%s' scheme.",
+ $uri,
+ 'http://'));
+ }
+
+ if (!isset($parts['host'])) {
+ throw new Exception(
+ pht("URI '%s' must be fully qualified and include host name.", $uri));
+ }
+
+ $this->host = $parts['host'];
+
+ if (!empty($parts['port'])) {
+ $this->port = $parts['port'];
+ }
+
+ if (isset($parts['user']) || isset($parts['pass'])) {
+ throw new Exception(
+ pht('HTTP Basic Auth is not supported by %s.', __CLASS__));
+ }
+
+ if (isset($parts['path'])) {
+ $this->fullRequestPath = $parts['path'];
+ } else {
+ $this->fullRequestPath = '/';
+ }
+
+ if (isset($parts['query'])) {
+ $this->fullRequestPath .= '?'.$parts['query'];
+ }
+
+ return parent::setURI($uri);
+ }
+
+ public function __destruct() {
+ if ($this->socket) {
+ @fclose($this->socket);
+ $this->socket = null;
+ }
+ }
+
+ public function getReadSockets() {
+ if ($this->socket) {
+ return array($this->socket);
+ }
+ return array();
+ }
+
+ public function getWriteSockets() {
+ if (strlen($this->writeBuffer)) {
+ return array($this->socket);
+ }
+ return array();
+ }
+
+ public function isWriteComplete() {
+ return $this->stateWriteComplete;
+ }
+
+ private function getDefaultUserAgent() {
+ return __CLASS__.'/1.0';
+ }
+
+ public function isReady() {
+ if ($this->stateReady) {
+ return true;
+ }
+
+ if (!$this->socket) {
+ $this->stateStartTime = microtime(true);
+ $this->socket = $this->buildSocket();
+ if (!$this->socket) {
+ return $this->stateReady;
+ }
+
+ $profiler = PhutilServiceProfiler::getInstance();
+ $this->profilerCallID = $profiler->beginServiceCall(
+ array(
+ 'type' => 'http',
+ 'uri' => $this->getURI(),
+ ));
+ }
+
+ if (!$this->stateConnected) {
+ $read = array();
+ $write = array($this->socket);
+ $except = array();
+ $select = stream_select($read, $write, $except, $tv_sec = 0);
+ if ($write) {
+ $this->stateConnected = true;
+ }
+ }
+
+ if ($this->stateConnected) {
+ if (strlen($this->writeBuffer)) {
+ $bytes = @fwrite($this->socket, $this->writeBuffer);
+ if ($bytes === false) {
+ throw new Exception(pht('Failed to write to buffer.'));
+ } else if ($bytes) {
+ $this->writeBuffer = substr($this->writeBuffer, $bytes);
+ }
+ }
+
+ if (!strlen($this->writeBuffer)) {
+ $this->stateWriteComplete = true;
+ }
+
+ while (($data = fread($this->socket, 32768)) || strlen($data)) {
+ $this->response .= $data;
+ }
+
+ if ($data === false) {
+ throw new Exception(pht('Failed to read socket.'));
+ }
+ }
+
+ return $this->checkSocket();
+ }
+
+ private function buildSocket() {
+ $errno = null;
+ $errstr = null;
+ $socket = @stream_socket_client(
+ 'tcp://'.$this->host.':'.$this->port,
+ $errno,
+ $errstr,
+ $ignored_connection_timeout = 1.0,
+ STREAM_CLIENT_CONNECT | STREAM_CLIENT_ASYNC_CONNECT);
+
+ if (!$socket) {
+ $this->stateReady = true;
+ $this->result = $this->buildErrorResult(
+ HTTPFutureTransportResponseStatus::ERROR_CONNECTION_FAILED);
+ return null;
+ }
+
+ $ok = stream_set_blocking($socket, 0);
+ if (!$ok) {
+ throw new Exception(pht('Failed to set stream nonblocking.'));
+ }
+
+ $this->writeBuffer = $this->buildHTTPRequest();
+
+ return $socket;
+ }
+
+ private function checkSocket() {
+ $timeout = false;
+ $now = microtime(true);
+ if (($now - $this->stateStartTime) > $this->getTimeout()) {
+ $timeout = true;
+ }
+
+ if (!feof($this->socket) && !$timeout) {
+ return false;
+ }
+
+ $this->stateReady = true;
+
+ if ($timeout) {
+ $this->result = $this->buildErrorResult(
+ HTTPFutureTransportResponseStatus::ERROR_TIMEOUT);
+ } else if (!$this->stateConnected) {
+ $this->result = $this->buildErrorResult(
+ HTTPFutureTransportResponseStatus::ERROR_CONNECTION_REFUSED);
+ } else if (!$this->stateWriteComplete) {
+ $this->result = $this->buildErrorResult(
+ HTTPFutureTransportResponseStatus::ERROR_CONNECTION_FAILED);
+ } else {
+ $this->result = $this->parseRawHTTPResponse($this->response);
+ }
+
+ $profiler = PhutilServiceProfiler::getInstance();
+ $profiler->endServiceCall($this->profilerCallID, array());
+
+ return true;
+ }
+
+ private function buildErrorResult($error) {
+ return array(
+ $status = new HTTPFutureTransportResponseStatus($error, $this->getURI()),
+ $body = null,
+ $headers = array(),
+ );
+ }
+
+ private function buildHTTPRequest() {
+ $data = $this->getData();
+ $method = $this->getMethod();
+ $uri = $this->fullRequestPath;
+
+ $add_headers = array();
+
+ if ($this->getMethod() == 'GET') {
+ if (is_array($data)) {
+ $data = http_build_query($data, '', '&');
+ if (strpos($uri, '?') !== false) {
+ $uri .= '&'.$data;
+ } else {
+ $uri .= '?'.$data;
+ }
+ $data = '';
+ }
+ } else {
+ if (is_array($data)) {
+ $data = http_build_query($data, '', '&')."\r\n";
+ $add_headers[] = array(
+ 'Content-Type',
+ 'application/x-www-form-urlencoded',
+ );
+ }
+ }
+
+ $length = strlen($data);
+
+ $add_headers[] = array(
+ 'Content-Length',
+ $length,
+ );
+
+ if (!$this->getHeaders('User-Agent')) {
+ $add_headers[] = array(
+ 'User-Agent',
+ $this->getDefaultUserAgent(),
+ );
+ }
+
+ if (!$this->getHeaders('Host')) {
+ $add_headers[] = array(
+ 'Host',
+ $this->host,
+ );
+ }
+
+ $headers = array_merge($this->getHeaders(), $add_headers);
+ foreach ($headers as $key => $header) {
+ list($name, $value) = $header;
+ if (strlen($value)) {
+ $value = ': '.$value;
+ }
+ $headers[$key] = $name.$value."\r\n";
+ }
+
+ return
+ "{$method} {$uri} HTTP/1.0\r\n".
+ implode('', $headers).
+ "\r\n".
+ $data;
+ }
+
+}
diff --git a/src/future/http/HTTPSFuture.php b/src/future/http/HTTPSFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/HTTPSFuture.php
@@ -0,0 +1,689 @@
+cabundle = $temp;
+ return $this;
+ }
+
+ /**
+ * Set the SSL certificate to use for this session, given a path.
+ *
+ * @param string The path to a valid SSL certificate for this session
+ * @return this
+ */
+ public function setCABundleFromPath($path) {
+ $this->cabundle = $path;
+ return $this;
+ }
+
+ /**
+ * Get the path to the SSL certificate for this session.
+ *
+ * @return string|null
+ */
+ public function getCABundle() {
+ return $this->cabundle;
+ }
+
+ /**
+ * Set whether Location headers in the response will be respected.
+ * The default is true.
+ *
+ * @param boolean true to follow any Location header present in the response,
+ * false to return the request directly
+ * @return this
+ */
+ public function setFollowLocation($follow) {
+ $this->followLocation = $follow;
+ return $this;
+ }
+
+ /**
+ * Get whether Location headers in the response will be respected.
+ *
+ * @return boolean
+ */
+ public function getFollowLocation() {
+ return $this->followLocation;
+ }
+
+ /**
+ * Set the fallback CA certificate if one is not specified
+ * for the session, given a path.
+ *
+ * @param string The path to a valid SSL certificate
+ * @return void
+ */
+ public static function setGlobalCABundleFromPath($path) {
+ self::$globalCABundle = $path;
+ }
+ /**
+ * Set the fallback CA certificate if one is not specified
+ * for the session, given a string.
+ *
+ * @param string The certificate
+ * @return void
+ */
+ public static function setGlobalCABundleFromString($certificate) {
+ $temp = new TempFile();
+ Filesystem::writeFile($temp, $certificate);
+ self::$globalCABundle = $temp;
+ }
+
+ /**
+ * Get the fallback global CA certificate
+ *
+ * @return string
+ */
+ public static function getGlobalCABundle() {
+ return self::$globalCABundle;
+ }
+
+ /**
+ * Load contents of remote URI. Behaves pretty much like
+ * `@file_get_contents($uri)` but doesn't require `allow_url_fopen`.
+ *
+ * @param string
+ * @param float
+ * @return string|false
+ */
+ public static function loadContent($uri, $timeout = null) {
+ $future = new HTTPSFuture($uri);
+ if ($timeout !== null) {
+ $future->setTimeout($timeout);
+ }
+ try {
+ list($body) = $future->resolvex();
+ return $body;
+ } catch (HTTPFutureResponseStatus $ex) {
+ return false;
+ }
+ }
+
+ /**
+ * Attach a file to the request.
+ *
+ * @param string HTTP parameter name.
+ * @param string File content.
+ * @param string File name.
+ * @param string File mime type.
+ * @return this
+ */
+ public function attachFileData($key, $data, $name, $mime_type) {
+ if (isset($this->files[$key])) {
+ throw new Exception(
+ pht(
+ '%s currently supports only one file attachment for each '.
+ 'parameter name. You are trying to attach two different files with '.
+ 'the same parameter, "%s".',
+ __CLASS__,
+ $key));
+ }
+
+ $this->files[$key] = array(
+ 'data' => $data,
+ 'name' => $name,
+ 'mime' => $mime_type,
+ );
+
+ return $this;
+ }
+
+ public function isReady() {
+ if (isset($this->result)) {
+ return true;
+ }
+
+ $uri = $this->getURI();
+ $domain = id(new PhutilURI($uri))->getDomain();
+
+ if (!$this->handle) {
+ $uri_object = new PhutilURI($uri);
+ $proxy = PhutilHTTPEngineExtension::buildHTTPProxyURI($uri_object);
+
+ $profiler = PhutilServiceProfiler::getInstance();
+ $this->profilerCallID = $profiler->beginServiceCall(
+ array(
+ 'type' => 'http',
+ 'uri' => $uri,
+ 'proxy' => (string)$proxy,
+ ));
+
+ if (!self::$multi) {
+ self::$multi = curl_multi_init();
+ if (!self::$multi) {
+ throw new Exception(pht('%s failed!', 'curl_multi_init()'));
+ }
+ }
+
+ if (!empty(self::$pool[$domain])) {
+ $curl = array_pop(self::$pool[$domain]);
+ } else {
+ $curl = curl_init();
+ if (!$curl) {
+ throw new Exception(pht('%s failed!', 'curl_init()'));
+ }
+ }
+
+ $this->handle = $curl;
+ curl_multi_add_handle(self::$multi, $curl);
+
+ curl_setopt($curl, CURLOPT_URL, $uri);
+
+ if (defined('CURLOPT_PROTOCOLS')) {
+ // cURL supports a lot of protocols, and by default it will honor
+ // redirects across protocols (for instance, from HTTP to POP3). Beyond
+ // being very silly, this also has security implications:
+ //
+ // http://blog.volema.com/curl-rce.html
+ //
+ // Disable all protocols other than HTTP and HTTPS.
+
+ $allowed_protocols = CURLPROTO_HTTPS | CURLPROTO_HTTP;
+ curl_setopt($curl, CURLOPT_PROTOCOLS, $allowed_protocols);
+ curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, $allowed_protocols);
+ }
+
+ if (strlen($this->rawBody)) {
+ if ($this->getData()) {
+ throw new Exception(
+ pht(
+ 'You can not execute an HTTP future with both a raw request '.
+ 'body and structured request data.'));
+ }
+
+ // We aren't actually going to use this file handle, since we are
+ // just pushing data through the callback, but cURL gets upset if
+ // we don't hand it a real file handle.
+ $tmp = new TempFile();
+ $this->fileHandle = fopen($tmp, 'r');
+
+ // NOTE: We must set CURLOPT_PUT here to make cURL use CURLOPT_INFILE.
+ // We'll possibly overwrite the method later on, unless this is really
+ // a PUT request.
+ curl_setopt($curl, CURLOPT_PUT, true);
+ curl_setopt($curl, CURLOPT_INFILE, $this->fileHandle);
+ curl_setopt($curl, CURLOPT_INFILESIZE, strlen($this->rawBody));
+ curl_setopt($curl, CURLOPT_READFUNCTION,
+ array($this, 'willWriteBody'));
+ } else {
+ $data = $this->formatRequestDataForCURL();
+ curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
+ }
+
+ $headers = $this->getHeaders();
+
+ $saw_expect = false;
+ for ($ii = 0; $ii < count($headers); $ii++) {
+ list($name, $value) = $headers[$ii];
+ $headers[$ii] = $name.': '.$value;
+ if (!strncasecmp($name, 'Expect', strlen('Expect'))) {
+ $saw_expect = true;
+ }
+ }
+ if (!$saw_expect) {
+ // cURL sends an "Expect" header by default for certain requests. While
+ // there is some reasoning behind this, it causes a practical problem
+ // in that lighttpd servers reject these requests with a 417. Both sides
+ // are locked in an eternal struggle (lighttpd has introduced a
+ // 'server.reject-expect-100-with-417' option to deal with this case).
+ //
+ // The ostensibly correct way to suppress this behavior on the cURL side
+ // is to add an empty "Expect:" header. If we haven't seen some other
+ // explicit "Expect:" header, do so.
+ //
+ // See here, for example, although this issue is fairly widespread:
+ // http://curl.haxx.se/mail/archive-2009-07/0008.html
+ $headers[] = 'Expect:';
+ }
+ curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
+
+ // Set the requested HTTP method, e.g. GET / POST / PUT.
+ curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $this->getMethod());
+
+ // Make sure we get the headers and data back.
+ curl_setopt($curl, CURLOPT_HEADER, true);
+ curl_setopt($curl, CURLOPT_WRITEFUNCTION,
+ array($this, 'didReceiveDataCallback'));
+
+ if ($this->followLocation) {
+ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+ curl_setopt($curl, CURLOPT_MAXREDIRS, 20);
+ }
+
+ if (defined('CURLOPT_TIMEOUT_MS')) {
+ // If CURLOPT_TIMEOUT_MS is available, use the higher-precision timeout.
+ $timeout = max(1, ceil(1000 * $this->getTimeout()));
+ curl_setopt($curl, CURLOPT_TIMEOUT_MS, $timeout);
+ } else {
+ // Otherwise, fall back to the lower-precision timeout.
+ $timeout = max(1, ceil($this->getTimeout()));
+ curl_setopt($curl, CURLOPT_TIMEOUT, $timeout);
+ }
+
+ // We're going to try to set CAINFO below. This doesn't work at all on
+ // OSX around Yosemite (see T5913). On these systems, we'll use the
+ // system CA and then try to tell the user that their settings were
+ // ignored and how to fix things if we encounter a CA-related error.
+ // Assume we have custom CA settings to start with; we'll clear this
+ // flag if we read the default CA info below.
+
+ // Try some decent fallbacks here:
+ // - First, check if a bundle is set explicitly for this request, via
+ // `setCABundle()` or similar.
+ // - Then, check if a global bundle is set explicitly for all requests,
+ // via `setGlobalCABundle()` or similar.
+ // - Then, if a local custom.pem exists, use that, because it probably
+ // means that the user wants to override everything (also because the
+ // user might not have access to change the box's php.ini to add
+ // curl.cainfo).
+ // - Otherwise, try using curl.cainfo. If it's set explicitly, it's
+ // probably reasonable to try using it before we fall back to what
+ // libphutil ships with.
+ // - Lastly, try the default that libphutil ships with. If it doesn't
+ // work, give up and yell at the user.
+
+ if (!$this->getCABundle()) {
+ $caroot = dirname(phutil_get_library_root('phutil')).'/resources/ssl/';
+ $ini_val = ini_get('curl.cainfo');
+ if (self::getGlobalCABundle()) {
+ $this->setCABundleFromPath(self::getGlobalCABundle());
+ } else if (Filesystem::pathExists($caroot.'custom.pem')) {
+ $this->setCABundleFromPath($caroot.'custom.pem');
+ } else if ($ini_val) {
+ // TODO: We can probably do a pathExists() here, even.
+ $this->setCABundleFromPath($ini_val);
+ } else {
+ $this->setCABundleFromPath($caroot.'default.pem');
+ }
+ }
+
+ if ($this->canSetCAInfo()) {
+ curl_setopt($curl, CURLOPT_CAINFO, $this->getCABundle());
+ }
+
+ $verify_peer = 1;
+ $verify_host = 2;
+
+ $extensions = PhutilHTTPEngineExtension::getAllExtensions();
+ foreach ($extensions as $extension) {
+ if ($extension->shouldTrustAnySSLAuthorityForURI($uri_object)) {
+ $verify_peer = 0;
+ }
+ if ($extension->shouldTrustAnySSLHostnameForURI($uri_object)) {
+ $verify_host = 0;
+ }
+ }
+
+ curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, $verify_peer);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, $verify_host);
+ curl_setopt($curl, CURLOPT_SSLVERSION, 0);
+
+ if ($proxy) {
+ curl_setopt($curl, CURLOPT_PROXY, (string)$proxy);
+ }
+ } else {
+ $curl = $this->handle;
+
+ if (!self::$results) {
+ // NOTE: In curl_multi_select(), PHP calls curl_multi_fdset() but does
+ // not check the return value of &maxfd for -1 until recent versions
+ // of PHP (5.4.8 and newer). cURL may return -1 as maxfd in some unusual
+ // situations; if it does, PHP enters select() with nfds=0, which blocks
+ // until the timeout is reached.
+ //
+ // We could try to guess whether this will happen or not by examining
+ // the version identifier, but we can also just sleep for only a short
+ // period of time.
+ curl_multi_select(self::$multi, 0.01);
+ }
+ }
+
+ do {
+ $active = null;
+ $result = curl_multi_exec(self::$multi, $active);
+ } while ($result == CURLM_CALL_MULTI_PERFORM);
+
+ while ($info = curl_multi_info_read(self::$multi)) {
+ if ($info['msg'] == CURLMSG_DONE) {
+ self::$results[(int)$info['handle']] = $info;
+ }
+ }
+
+ if (!array_key_exists((int)$curl, self::$results)) {
+ return false;
+ }
+
+ // The request is complete, so release any temporary files we wrote
+ // earlier.
+ $this->temporaryFiles = array();
+
+ $info = self::$results[(int)$curl];
+ $result = $this->responseBuffer;
+ $err_code = $info['result'];
+
+ if ($err_code) {
+ if (($err_code == CURLE_SSL_CACERT) && !$this->canSetCAInfo()) {
+ $status = new HTTPFutureCertificateResponseStatus(
+ HTTPFutureCertificateResponseStatus::ERROR_IMMUTABLE_CERTIFICATES,
+ $uri);
+ } else {
+ $status = new HTTPFutureCURLResponseStatus($err_code, $uri);
+ }
+
+ $body = null;
+ $headers = array();
+ $this->result = array($status, $body, $headers);
+ } else {
+ // cURL returns headers of all redirects, we strip all but the final one.
+ $redirects = curl_getinfo($curl, CURLINFO_REDIRECT_COUNT);
+ $result = preg_replace('/^(.*\r\n\r\n){'.$redirects.'}/sU', '', $result);
+ $this->result = $this->parseRawHTTPResponse($result);
+ }
+
+ curl_multi_remove_handle(self::$multi, $curl);
+ unset(self::$results[(int)$curl]);
+
+ // NOTE: We want to use keepalive if possible. Return the handle to a
+ // pool for the domain; don't close it.
+ if ($this->shouldReuseHandles()) {
+ self::$pool[$domain][] = $curl;
+ }
+
+ $profiler = PhutilServiceProfiler::getInstance();
+ $profiler->endServiceCall($this->profilerCallID, array());
+
+ return true;
+ }
+
+
+ /**
+ * Callback invoked by cURL as it reads HTTP data from the response. We save
+ * the data to a buffer.
+ */
+ public function didReceiveDataCallback($handle, $data) {
+ $this->responseBuffer .= $data;
+ return strlen($data);
+ }
+
+
+ /**
+ * Read data from the response buffer.
+ *
+ * NOTE: Like @{class:ExecFuture}, this method advances a read cursor but
+ * does not discard the data. The data will still be buffered, and it will
+ * all be returned when the future resolves. To discard the data after
+ * reading it, call @{method:discardBuffers}.
+ *
+ * @return string Response data, if available.
+ */
+ public function read() {
+ $result = substr($this->responseBuffer, $this->responseBufferPos);
+ $this->responseBufferPos = strlen($this->responseBuffer);
+ return $result;
+ }
+
+
+ /**
+ * Discard any buffered data. Normally, you call this after reading the
+ * data with @{method:read}.
+ *
+ * @return this
+ */
+ public function discardBuffers() {
+ $this->responseBuffer = '';
+ $this->responseBufferPos = 0;
+ return $this;
+ }
+
+
+ /**
+ * Produces a value safe to pass to `CURLOPT_POSTFIELDS`.
+ *
+ * @return wild Some value, suitable for use in `CURLOPT_POSTFIELDS`.
+ */
+ private function formatRequestDataForCURL() {
+ // We're generating a value to hand to cURL as CURLOPT_POSTFIELDS. The way
+ // cURL handles this value has some tricky caveats.
+
+ // First, we can return either an array or a query string. If we return
+ // an array, we get a "multipart/form-data" request. If we return a
+ // query string, we get an "application/x-www-form-urlencoded" request.
+
+ // Second, if we return an array we can't duplicate keys. The user might
+ // want to send the same parameter multiple times.
+
+ // Third, if we return an array and any of the values start with "@",
+ // cURL includes arbitrary files off disk and sends them to an untrusted
+ // remote server. For example, an array like:
+ //
+ // array('name' => '@/usr/local/secret')
+ //
+ // ...will attempt to read that file off disk and transmit its contents with
+ // the request. This behavior is pretty surprising, and it can easily
+ // become a relatively severe security vulnerability which allows an
+ // attacker to read any file the HTTP process has access to. Since this
+ // feature is very dangerous and not particularly useful, we prevent its
+ // use. Broadly, this means we must reject some requests because they
+ // contain an "@" in an inconvenient place.
+
+ // Generally, to avoid the "@" case and because most servers usually
+ // expect "application/x-www-form-urlencoded" data, we try to return a
+ // string unless there are files attached to this request.
+
+ $data = $this->getData();
+ $files = $this->files;
+
+ $any_data = ($data || (is_string($data) && strlen($data)));
+ $any_files = (bool)$this->files;
+
+ if (!$any_data && !$any_files) {
+ // No files or data, so just bail.
+ return null;
+ }
+
+ if (!$any_files) {
+ // If we don't have any files, just encode the data as a query string,
+ // make sure it's not including any files, and we're good to go.
+ if (is_array($data)) {
+ $data = http_build_query($data, '', '&');
+ }
+
+ $this->checkForDangerousCURLMagic($data, $is_query_string = true);
+
+ return $data;
+ }
+
+ // If we've made it this far, we have some files, so we need to return
+ // an array. First, convert the other data into an array if it isn't one
+ // already.
+
+ if (is_string($data)) {
+ // NOTE: We explicitly don't want fancy array parsing here, so just
+ // do a basic parse and then convert it into a dictionary ourselves.
+ $parser = new PhutilQueryStringParser();
+ $pairs = $parser->parseQueryStringToPairList($data);
+
+ $map = array();
+ foreach ($pairs as $pair) {
+ list($key, $value) = $pair;
+ if (array_key_exists($key, $map)) {
+ throw new Exception(
+ pht(
+ 'Request specifies two values for key "%s", but parameter '.
+ 'names must be unique if you are posting file data due to '.
+ 'limitations with cURL.',
+ $key));
+ }
+ $map[$key] = $value;
+ }
+
+ $data = $map;
+ }
+
+ foreach ($data as $key => $value) {
+ $this->checkForDangerousCURLMagic($value, $is_query_string = false);
+ }
+
+ foreach ($this->files as $name => $info) {
+ if (array_key_exists($name, $data)) {
+ throw new Exception(
+ pht(
+ 'Request specifies a file with key "%s", but that key is also '.
+ 'defined by normal request data. Due to limitations with cURL, '.
+ 'requests that post file data must use unique keys.',
+ $name));
+ }
+
+ $tmp = new TempFile($info['name']);
+ Filesystem::writeFile($tmp, $info['data']);
+ $this->temporaryFiles[] = $tmp;
+
+ // In 5.5.0 and later, we can use CURLFile. Prior to that, we have to
+ // use this "@" stuff.
+
+ if (class_exists('CURLFile', false)) {
+ $file_value = new CURLFile((string)$tmp, $info['mime'], $info['name']);
+ } else {
+ $file_value = '@'.(string)$tmp;
+ }
+
+ $data[$name] = $file_value;
+ }
+
+ return $data;
+ }
+
+
+ /**
+ * Detect strings which will cause cURL to do horrible, insecure things.
+ *
+ * @param string Possibly dangerous string.
+ * @param bool True if this string is being used as part of a query string.
+ * @return void
+ */
+ private function checkForDangerousCURLMagic($string, $is_query_string) {
+ if (empty($string[0]) || ($string[0] != '@')) {
+ // This isn't an "@..." string, so it's fine.
+ return;
+ }
+
+ if ($is_query_string) {
+ if (version_compare(phpversion(), '5.2.0', '<')) {
+ throw new Exception(
+ pht(
+ 'Attempting to make an HTTP request, but query string data begins '.
+ 'with "%s". Prior to PHP 5.2.0 this reads files off disk, which '.
+ 'creates a wide attack window for security vulnerabilities. '.
+ 'Upgrade PHP or avoid making cURL requests which begin with "%s".',
+ '@',
+ '@'));
+ }
+
+ // This is safe if we're on PHP 5.2.0 or newer.
+ return;
+ }
+
+ throw new Exception(
+ pht(
+ 'Attempting to make an HTTP request which includes file data, but the '.
+ 'value of a query parameter begins with "%s". PHP interprets these '.
+ 'values to mean that it should read arbitrary files off disk and '.
+ 'transmit them to remote servers. Declining to make this request.',
+ '@'));
+ }
+
+
+ /**
+ * Determine whether CURLOPT_CAINFO is usable on this system.
+ */
+ private function canSetCAInfo() {
+ // We cannot set CAInfo on OSX after Yosemite.
+
+ $osx_version = PhutilExecutionEnvironment::getOSXVersion();
+ if ($osx_version) {
+ if (version_compare($osx_version, 14, '>=')) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+
+ /**
+ * Write a raw HTTP body into the request.
+ *
+ * You must write the entire body before starting the request.
+ *
+ * @param string Raw body.
+ * @return this
+ */
+ public function write($raw_body) {
+ $this->rawBody = $raw_body;
+ return $this;
+ }
+
+
+ /**
+ * Callback to pass data to cURL.
+ */
+ public function willWriteBody($handle, $infile, $len) {
+ $bytes = substr($this->rawBody, $this->rawBodyPos, $len);
+ $this->rawBodyPos += $len;
+ return $bytes;
+ }
+
+ private function shouldReuseHandles() {
+ $curl_version = curl_version();
+ $version = idx($curl_version, 'version');
+
+ // NOTE: cURL 7.43.0 has a bug where the POST body length is not recomputed
+ // properly when a handle is reused. For this version of cURL, disable
+ // handle reuse and accept a small performance penalty. See T8654.
+ if ($version == '7.43.0') {
+ return false;
+ }
+
+ return true;
+ }
+
+
+}
diff --git a/src/future/http/PhutilHTTPEngineExtension.php b/src/future/http/PhutilHTTPEngineExtension.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/PhutilHTTPEngineExtension.php
@@ -0,0 +1,141 @@
+getPhobjectClassConstant('EXTENSIONKEY');
+ }
+
+ final public static function getAllExtensions() {
+ return id(new PhutilClassMapQuery())
+ ->setAncestorClass(__CLASS__)
+ ->setUniqueMethod('getExtensionKey')
+ ->execute();
+ }
+
+ final public static function getExtension($key) {
+ $extensions = self::getAllExtensions();
+ return idx($extensions, $key);
+ }
+
+ final public static function requireExtension($key) {
+ $extension = self::getExtension($key);
+
+ if (!$extension) {
+ throw new Exception(
+ pht(
+ 'No HTTP engine extension exists with extension key "%s".',
+ $key));
+ }
+
+ return $extension;
+ }
+
+ final public static function buildHTTPProxyURI(PhutilURI $uri) {
+ $proxy = null;
+ $via = null;
+
+ $extensions = self::getAllExtensions();
+ foreach ($extensions as $extension) {
+ $extension_proxy = $extension->getHTTPProxyURI($uri);
+
+ if ($extension_proxy === null) {
+ continue;
+ }
+
+ if (!($extension_proxy instanceof PhutilURI)) {
+ throw new Exception(
+ pht(
+ 'HTTP extension "%s" (of class "%s") returned an invalid '.
+ 'result from "%s": expected null, or an object of class "%s".',
+ $extension->getExtensionName(),
+ get_class($extension),
+ 'getHTTPProxyURI()',
+ 'PhutilURI'));
+ }
+
+ if ($proxy) {
+ throw new Exception(
+ pht(
+ 'Two different HTTP extensions ("%s" of class "%s" and "%s" of '.
+ 'class "%s") both provided a proxy URI for URI "%s". No more '.
+ 'than one extension may provide a proxy for any URI.',
+ $extension->getExtensionName(),
+ get_class($extension),
+ $via->getExtensionName(),
+ get_class($via),
+ (string)$uri));
+ }
+
+ $proxy = $extension_proxy;
+ $via = $extension;
+ }
+
+ return $proxy;
+ }
+
+}
diff --git a/src/future/http/status/HTTPFutureCURLResponseStatus.php b/src/future/http/status/HTTPFutureCURLResponseStatus.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/status/HTTPFutureCURLResponseStatus.php
@@ -0,0 +1,86 @@
+getStatusCode() == CURLE_OPERATION_TIMEOUTED);
+ }
+
+ protected function getErrorCodeDescription($code) {
+ $constants = get_defined_constants();
+
+ $constant_name = null;
+ foreach ($constants as $constant => $value) {
+ if ($value == $code && preg_match('/^CURLE_/', $constant)) {
+ $constant_name = '<'.$constant.'> ';
+ break;
+ }
+ }
+
+ $map = array(
+ CURLE_COULDNT_RESOLVE_HOST => pht(
+ 'There was an error resolving the server hostname. Check that you are '.
+ 'connected to the internet and that DNS is correctly configured. (Did '.
+ 'you add the domain to `%s` on some other machine, but not this one?)',
+ '/etc/hosts'),
+
+ CURLE_SSL_CACERT => pht(
+ 'There was an error verifying the SSL Certificate Authority while '.
+ 'negotiating the SSL connection. This usually indicates that you are '.
+ 'using a self-signed certificate but have not added your CA to the '.
+ 'CA bundle. See instructions in "%s".',
+ 'libphutil/resources/ssl/README'),
+
+ // Apparently there's no error constant for this? In cURL it's
+ // CURLE_SSL_CACERT_BADFILE but there's no corresponding constant in
+ // PHP.
+ 77 => pht(
+ 'The SSL CA Bundles that we tried to use could not be read or are '.
+ 'not formatted correctly.'),
+
+ CURLE_SSL_CONNECT_ERROR => pht(
+ 'There was an error negotiating the SSL connection. This usually '.
+ 'indicates that the remote host has a bad SSL certificate, or your '.
+ 'local host has some sort of SSL misconfiguration which prevents it '.
+ 'from accepting the CA. If you are using a self-signed certificate, '.
+ 'see instructions in "%s".',
+ 'libphutil/resources/ssl/README'),
+
+ CURLE_OPERATION_TIMEOUTED => pht(
+ 'The request took too long to complete.'),
+
+ CURLE_SSL_PEER_CERTIFICATE => pht(
+ 'There was an error verifying the SSL connection. This usually '.
+ 'indicates that the remote host has an SSL certificate for a '.
+ 'different domain name than you are connecting with. Make sure the '.
+ 'certificate you have installed is signed for the correct domain.'),
+ );
+
+ $default_message = pht(
+ 'The cURL library raised an error while making a request. You may be '.
+ 'able to find more information about this error (error code: %d) '.
+ 'on the cURL site: %s',
+ $code,
+ 'http://curl.haxx.se/libcurl/c/libcurl-errors.html#'.
+ preg_replace('/[^A-Z]/', '', $constant_name));
+
+ $detailed_message = idx($map, $code, $default_message);
+
+ return $constant_name.$detailed_message;
+ }
+
+}
diff --git a/src/future/http/status/HTTPFutureCertificateResponseStatus.php b/src/future/http/status/HTTPFutureCertificateResponseStatus.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/status/HTTPFutureCertificateResponseStatus.php
@@ -0,0 +1,33 @@
+ 512) {
+ $excerpt = substr($body, 0, 512).'...';
+ } else {
+ $excerpt = $body;
+ }
+
+ $content_type = BaseHTTPFuture::getHeader($headers, 'Content-Type');
+ $match = null;
+ if (preg_match('/;\s*charset=([^;]+)/', $content_type, $match)) {
+ $encoding = trim($match[1], "\"'");
+ try {
+ $excerpt = phutil_utf8_convert($excerpt, 'UTF-8', $encoding);
+ } catch (Exception $ex) {}
+ }
+
+ $this->excerpt = phutil_utf8ize($excerpt);
+ $this->expect = $expect;
+
+ parent::__construct($status_code);
+ }
+
+ protected function getErrorCodeType($code) {
+ return 'HTTP';
+ }
+
+ public function isError() {
+ if ($this->expect === null) {
+ return ($this->getStatusCode() < 200) || ($this->getStatusCode() > 299);
+ }
+
+ return !in_array($this->getStatusCode(), $this->expect, true);
+ }
+
+ public function isRedirect() {
+ $code = $this->getStatusCode();
+ return ($code >= 300 && $code < 400);
+ }
+
+ public function isTimeout() {
+ return false;
+ }
+
+ protected function getErrorCodeDescription($code) {
+ static $map = array(
+ 404 => 'Not Found',
+ 500 => 'Internal Server Error',
+ );
+
+ return idx($map, $code)."\n".$this->excerpt."\n";
+ }
+
+}
diff --git a/src/future/http/status/HTTPFutureParseResponseStatus.php b/src/future/http/status/HTTPFutureParseResponseStatus.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/status/HTTPFutureParseResponseStatus.php
@@ -0,0 +1,32 @@
+rawResponse = $raw_response;
+ parent::__construct($code);
+ }
+
+ protected function getErrorCodeType($code) {
+ return 'Parse';
+ }
+
+ public function isError() {
+ return true;
+ }
+
+ public function isTimeout() {
+ return false;
+ }
+
+ protected function getErrorCodeDescription($code) {
+ return pht(
+ 'The remote host returned something other than an HTTP response: %s',
+ $this->rawResponse);
+ }
+
+}
diff --git a/src/future/http/status/HTTPFutureResponseStatus.php b/src/future/http/status/HTTPFutureResponseStatus.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/status/HTTPFutureResponseStatus.php
@@ -0,0 +1,43 @@
+statusCode = $status_code;
+ $this->uri = (string)$uri;
+
+ $type = $this->getErrorCodeType($status_code);
+ $description = $this->getErrorCodeDescription($status_code);
+
+ $uri_info = '';
+ if ($this->uri) {
+ $uri_info = ' ('.$this->uri.')';
+ }
+
+ $message = rtrim("[{$type}/{$status_code}]{$uri_info} {$description}");
+
+ parent::__construct($message);
+ }
+
+ final public function getStatusCode() {
+ return $this->statusCode;
+ }
+
+ final public function getURI() {
+ return $this->uri;
+ }
+
+ abstract public function isError();
+ abstract public function isTimeout();
+
+ public function isRedirect() {
+ return false;
+ }
+
+ abstract protected function getErrorCodeType($code);
+ abstract protected function getErrorCodeDescription($code);
+
+}
diff --git a/src/future/http/status/HTTPFutureTransportResponseStatus.php b/src/future/http/status/HTTPFutureTransportResponseStatus.php
new file mode 100644
--- /dev/null
+++ b/src/future/http/status/HTTPFutureTransportResponseStatus.php
@@ -0,0 +1,44 @@
+getStatusCode() == self::ERROR_TIMEOUT);
+ }
+
+ protected function getErrorCodeDescription($code) {
+ $map = array(
+ self::ERROR_TIMEOUT => pht(
+ 'The request took too long to complete.'),
+ self::ERROR_CONNECTION_ABORTED => pht(
+ 'The remote host closed the connection before the request completed.'),
+ self::ERROR_CONNECTION_REFUSED => pht(
+ 'The remote host refused the connection. This usually means the '.
+ 'host is not running an HTTP server, or the network is blocking '.
+ 'connections from this machine. Verify you can connect to the '.
+ 'remote host from this host.'),
+ self::ERROR_CONNECTION_FAILED => pht(
+ 'Connection could not be initiated. This usually indicates a DNS '.
+ 'problem: verify the domain name is correct, that you can '.
+ 'perform a DNS lookup for it from this machine. (Did you add the '.
+ 'domain to `%s` on some other machine, but not this one?) '.
+ 'This might also indicate that you specified the wrong port.',
+ '/etc/hosts'),
+ );
+ return idx($map, $code);
+ }
+
+}
diff --git a/src/future/oauth/PhutilOAuth1Future.php b/src/future/oauth/PhutilOAuth1Future.php
new file mode 100644
--- /dev/null
+++ b/src/future/oauth/PhutilOAuth1Future.php
@@ -0,0 +1,291 @@
+callbackURI = $callback_uri;
+ return $this;
+ }
+
+ public function setTimestamp($timestamp) {
+ $this->timestamp = $timestamp;
+ return $this;
+ }
+
+ public function setNonce($nonce) {
+ $this->nonce = $nonce;
+ return $this;
+ }
+
+ public function setTokenSecret($token_secret) {
+ $this->tokenSecret = $token_secret;
+ return $this;
+ }
+
+ public function setToken($token) {
+ $this->token = $token;
+ return $this;
+ }
+
+ public function setPrivateKey(PhutilOpaqueEnvelope $private_key) {
+ $this->privateKey = $private_key;
+ return $this;
+ }
+
+ public function setSignatureMethod($signature_method) {
+ $this->signatureMethod = $signature_method;
+ return $this;
+ }
+
+ public function setConsumerKey($consumer_key) {
+ $this->consumerKey = $consumer_key;
+ return $this;
+ }
+
+ public function setConsumerSecret(PhutilOpaqueEnvelope $consumer_secret) {
+ $this->consumerSecret = $consumer_secret;
+ return $this;
+ }
+
+ public function setMethod($method) {
+ $this->method = $method;
+ return $this;
+ }
+
+ public function __construct($uri, $data = array()) {
+ $this->uri = new PhutilURI((string)$uri);
+ $this->data = $data;
+ $this->setProxiedFuture(new HTTPSFuture($uri, $data));
+ }
+
+ public function getSignature() {
+ $params = array();
+
+ // NOTE: The JIRA API uses JSON-encoded request bodies which are not
+ // signed, and OAuth1 provides no real way to sign a nonparameterized
+ // request body. Possibly we should split this apart into flags which
+ // control which data is signed, but for now this rule seems to cover
+ // all the use cases.
+
+ if (is_array($this->data)) {
+ $params = $this->data;
+ }
+
+ $params = $params
+ + $this->uri->getQueryParams()
+ + $this->getOAuth1Headers();
+
+ return $this->sign($params);
+ }
+
+ public function addHeader($name, $value) {
+ // If we haven't built the future yet, hold on to the header until after
+ // we do, since there might be more changes coming which will affect the
+ // signature process.
+
+ if (!$this->hasConstructedFuture) {
+ $this->headers[] = array($name, $value);
+ } else {
+ $this->getProxiedFuture()->addHeader($name, $value);
+ }
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ $future = parent::getProxiedFuture();
+
+ if (!$this->hasConstructedFuture) {
+ $future->setMethod($this->method);
+
+ $oauth_headers = $this->getOAuth1Headers();
+ $oauth_headers['oauth_signature'] = $this->getSignature();
+
+ $full_oauth_header = array();
+ foreach ($oauth_headers as $header => $value) {
+ $full_oauth_header[] = $header.'="'.urlencode($value).'"';
+ }
+ $full_oauth_header = 'OAuth '.implode(', ', $full_oauth_header);
+
+ $future->addHeader('Authorization', $full_oauth_header);
+
+ foreach ($this->headers as $header) {
+ $future->addHeader($header[0], $header[1]);
+ }
+ $this->headers = array();
+
+ $this->hasConstructedFuture = true;
+ }
+
+ return $future;
+ }
+
+ protected function didReceiveResult($result) {
+ return $result;
+ }
+
+ private function getOAuth1Headers() {
+ if (!$this->nonce) {
+ $this->nonce = Filesystem::readRandomCharacters(32);
+ }
+ if (!$this->timestamp) {
+ $this->timestamp = time();
+ }
+
+ $oauth_headers = array(
+ 'oauth_consumer_key' => $this->consumerKey,
+ 'oauth_signature_method' => $this->signatureMethod,
+ 'oauth_timestamp' => $this->timestamp,
+ 'oauth_nonce' => $this->nonce,
+ 'oauth_version' => '1.0',
+ );
+
+ if ($this->callbackURI) {
+ $oauth_headers['oauth_callback'] = (string)$this->callbackURI;
+ }
+
+ if ($this->token) {
+ $oauth_headers['oauth_token'] = $this->token;
+ }
+
+ return $oauth_headers;
+ }
+
+ private function sign(array $params) {
+ ksort($params);
+
+ $pstr = array();
+ foreach ($params as $key => $value) {
+ $pstr[] = rawurlencode($key).'='.rawurlencode($value);
+ }
+ $pstr = implode('&', $pstr);
+
+ $sign_uri = clone $this->uri;
+ $sign_uri->setFragment('');
+ $sign_uri->setQueryParams(array());
+
+ $sign_uri->setProtocol(phutil_utf8_strtolower($sign_uri->getProtocol()));
+ $protocol = $sign_uri->getProtocol();
+ switch ($protocol) {
+ case 'http':
+ if ($sign_uri->getPort() == 80) {
+ $sign_uri->setPort(null);
+ }
+ break;
+ case 'https':
+ if ($sign_uri->getPort() == 443) {
+ $sign_uri->setPort(null);
+ }
+ break;
+ }
+
+ $method = rawurlencode(phutil_utf8_strtoupper($this->method));
+ $sign_uri = rawurlencode((string)$sign_uri);
+ $pstr = rawurlencode($pstr);
+
+ $sign_input = "{$method}&{$sign_uri}&{$pstr}";
+ return $this->signString($sign_input);
+ }
+
+ private function signString($string) {
+ $consumer_secret = null;
+ if ($this->consumerSecret) {
+ $consumer_secret = $this->consumerSecret->openEnvelope();
+ }
+
+ $key = urlencode($consumer_secret).'&'.urlencode($this->tokenSecret);
+
+ switch ($this->signatureMethod) {
+ case 'HMAC-SHA1':
+ if (!$this->consumerSecret) {
+ throw new Exception(
+ pht(
+ "Signature method '%s' requires %s!",
+ 'HMAC-SHA1',
+ 'setConsumerSecret()'));
+ }
+
+ $hash = hash_hmac('sha1', $string, $key, true);
+ return base64_encode($hash);
+ case 'RSA-SHA1':
+ if (!$this->privateKey) {
+ throw new Exception(
+ pht(
+ "Signature method '%s' requires %s!",
+ 'RSA-SHA1',
+ 'setPrivateKey()'));
+ }
+
+ $cert = @openssl_pkey_get_private($this->privateKey->openEnvelope());
+ if (!$cert) {
+ throw new Exception(pht('%s failed!', 'openssl_pkey_get_private()'));
+ }
+
+ $pkey = @openssl_get_privatekey($cert);
+ if (!$pkey) {
+ throw new Exception(pht('%s failed!', 'openssl_get_privatekey()'));
+ }
+
+ $signature = null;
+ $ok = openssl_sign($string, $signature, $pkey, OPENSSL_ALGO_SHA1);
+ if (!$ok) {
+ throw new Exception(pht('%s failed!', 'openssl_sign()'));
+ }
+
+ openssl_free_key($pkey);
+
+ return base64_encode($signature);
+ case 'PLAINTEXT':
+ if (!$this->consumerSecret) {
+ throw new Exception(
+ pht(
+ "Signature method '%s' requires %s!",
+ 'PLAINTEXT',
+ 'setConsumerSecret()'));
+ }
+ return $key;
+ default:
+ throw new Exception(pht("Unknown signature method '%s'!", $string));
+ }
+ }
+
+ public function resolvex() {
+ $result = $this->getProxiedFuture()->resolvex();
+ return $this->didReceiveResult($result);
+ }
+
+ public function resolveJSON() {
+ $result = $this->getProxiedFuture()->resolvex();
+ $result = $this->didReceiveResult($result);
+ list($body) = $result;
+
+ try {
+ return phutil_json_decode($body);
+ } catch (PhutilJSONParserException $ex) {
+ throw new PhutilProxyException(pht('Expected JSON.'), $ex);
+ }
+ }
+
+
+}
diff --git a/src/future/oauth/__tests__/PhutilOAuth1FutureTestCase.php b/src/future/oauth/__tests__/PhutilOAuth1FutureTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/future/oauth/__tests__/PhutilOAuth1FutureTestCase.php
@@ -0,0 +1,159 @@
+setTimestamp(1191242090)
+ ->setNonce('hsu94j3884jdopsl')
+ ->setConsumerKey('dpf43f3p2l4k3l03')
+ ->setConsumerSecret(new PhutilOpaqueEnvelope('kd94hf93k423kf44'))
+ ->setSignatureMethod('PLAINTEXT');
+
+ $this->assertEqual('kd94hf93k423kf44&', $future->getSignature());
+
+
+ $uri = 'http://photos.example.net/photos';
+ $data = array(
+ 'file' => 'vacation.jpg',
+ 'size' => 'original',
+ );
+
+ $future = id(new PhutilOAuth1Future($uri, $data))
+ ->setMethod('GET')
+ ->setTimestamp(1191242096)
+ ->setNonce('kllo9940pd9333jh')
+ ->setConsumerKey('dpf43f3p2l4k3l03')
+ ->setConsumerSecret(new PhutilOpaqueEnvelope('kd94hf93k423kf44'))
+ ->setSignatureMethod('HMAC-SHA1')
+ ->setToken('nnch734d00sl2jdk')
+ ->setTokenSecret('pfkkdhi9sl3r4s00');
+
+ $this->assertEqual('tR3+Ty81lMeYAr/Fid0kMTYa/WM=', $future->getSignature());
+ }
+
+ public function testOAuth1SigningWithTwitterExamples() {
+
+ // NOTE: This example is from Twitter.
+ // https://dev.twitter.com/docs/auth/creating-signature
+
+ $uri = 'https://api.twitter.com/1/statuses/update.json?'.
+ 'include_entities=true';
+ $data = array(
+ 'status' => 'Hello Ladies + Gentlemen, a signed OAuth request!',
+ );
+
+ $future = id(new PhutilOAuth1Future($uri, $data))
+ ->setMethod('POST')
+ ->setConsumerKey('xvz1evFS4wEEPTGEFPHBog')
+ ->setConsumerSecret(
+ new PhutilOpaqueEnvelope('kAcSOqF21Fu85e7zjz7ZN2U4ZRhfV3WpwPAoE3Z7kBw'))
+ ->setNonce('kYjzVBB8Y0ZFabxSWbWovY3uYSQ2pTgmZeNu2VS4cg')
+ ->setSignatureMethod('HMAC-SHA1')
+ ->setTimestamp(1318622958)
+ ->setToken('370773112-GmHxMAgYyLbNEtIKZeRNFsMKPR9EyMZeS9weJAEb')
+ ->setTokenSecret('LswwdoUaIvS8ltyTt5jkRh4J50vUPVVHtR2YPi5kE');
+
+ $this->assertEqual('tnnArxj06cWHq44gCs1OSKk/jLY=', $future->getSignature());
+ }
+
+ public function testOAuth1SigningWithJIRAExamples() {
+
+ // NOTE: This is an emprically example against JIRA v6.0.6, in that the
+ // code seems to work when actually authing. It primarily serves as a check
+ // of the RSA-SHA1 signature method.
+
+$public_key = <<setConsumerKey('quackquack')
+ ->setPrivateKey(new PhutilOpaqueEnvelope($private_key))
+ ->setTimestamp('1375984131')
+ ->setNonce('iamaduck')
+ ->setSignatureMethod('RSA-SHA1');
+
+ // The actual signature is 684 bytes and begins "QwigfVxpOm0AKoWJkFRwbyseso
+ // VJobhiXpyY0J79Kzki+vwlT4Xz2Tr4vlwDLsra5gJbfdeme4qJ2rE..."
+ $this->assertEqual(
+ '5e63e65237e2b8078426996d5ef1a706',
+ md5($future->getSignature()));
+ }
+
+}
diff --git a/src/future/paypal/PhutilPayPalAPIFuture.php b/src/future/paypal/PhutilPayPalAPIFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/paypal/PhutilPayPalAPIFuture.php
@@ -0,0 +1,88 @@
+host = $host;
+ return $this;
+ }
+
+ public function getHost() {
+ return $this->host;
+ }
+
+ public function setAPIUsername($api_username) {
+ $this->apiUsername = $api_username;
+ return $this;
+ }
+
+ public function setAPIPassword($api_password) {
+ $this->apiPassword = $api_password;
+ return $this;
+ }
+
+ public function setAPISignature($api_signature) {
+ $this->apiSignature = $api_signature;
+ return $this;
+ }
+
+ public function setRawPayPalQuery($action, array $params = array()) {
+ $this->params = array('METHOD' => $action) + $params + $this->params;
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ if (!$this->future) {
+ $params = $this->params;
+
+ if (!$this->params) {
+ throw new Exception(pht('You must %s!', 'setRawPayPalQuery()'));
+ }
+
+ if (!$this->apiUsername) {
+ throw new Exception(pht('You must set PayPal API credentials!'));
+ }
+
+ $params['VERSION'] = '98.0';
+ $params['USER'] = $this->apiUsername;
+ $params['PWD'] = $this->apiPassword;
+ $params['SIGNATURE'] = $this->apiSignature;
+
+ $this->future = id(new HTTPSFuture($this->getHost(), $params))
+ ->setMethod('POST');
+ }
+
+ return $this->future;
+ }
+
+ protected function didReceiveResult($result) {
+ list($status, $body, $headers) = $result;
+
+ if ($status->isError()) {
+ throw $status;
+ }
+
+ $dict = array();
+ parse_str($body, $dict);
+
+ if (idx($dict, 'ACK') !== 'Success') {
+ throw new Exception(
+ pht('PayPal API call failed: %s', print_r($dict, true)));
+ }
+
+ return $dict;
+ }
+
+}
diff --git a/src/future/postmark/PhutilPostmarkFuture.php b/src/future/postmark/PhutilPostmarkFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/postmark/PhutilPostmarkFuture.php
@@ -0,0 +1,85 @@
+accessToken = $token;
+ return $this;
+ }
+
+ public function setClientID($client_id) {
+ $this->clientID = $client_id;
+ return $this;
+ }
+
+ public function setMethod($method, array $parameters) {
+ $this->method = $method;
+ $this->parameters = $parameters;
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ if (!$this->future) {
+ if ($this->accessToken === null) {
+ throw new PhutilInvalidStateException('setAccessToken');
+ }
+
+ if ($this->method === null || $this->parameters === null) {
+ throw new PhutilInvalidStateException('setMethod');
+ }
+
+ $uri = id(new PhutilURI('https://api.postmarkapp.com/'))
+ ->setPath('/'.$this->method);
+
+ $request_body = phutil_json_encode($this->parameters);
+
+ $future = id(new HTTPSFuture($uri))
+ ->setData($request_body)
+ ->setMethod('POST')
+ ->addHeader('X-Postmark-Server-Token', $this->accessToken)
+ ->addHeader('Accept', 'application/json')
+ ->addHeader('Content-Type', 'application/json');
+
+ $this->future = $future;
+ }
+
+ return $this->future;
+ }
+
+ protected function didReceiveResult($result) {
+ list($status, $body, $headers) = $result;
+
+ if ($status->isError()) {
+ throw $status;
+ }
+
+ try {
+ $data = phutil_json_decode($body);
+ } catch (PhutilJSONParserException $ex) {
+ throw new PhutilProxyException(
+ pht('Expected JSON response from Postmark.'),
+ $ex);
+ }
+
+ if (idx($data, 'ErrorCode')) {
+ $error = $data['ErrorCode'];
+ throw new Exception(
+ pht(
+ 'Received error from Postmark: (%s) %s',
+ $error,
+ idx($data, 'Message')));
+ }
+
+ return $data;
+ }
+
+}
diff --git a/src/future/query/QueryFuture.php b/src/future/query/QueryFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/query/QueryFuture.php
@@ -0,0 +1,129 @@
+resolve();
+ * } catch (AphrontQueryException $ex) {
+ * }
+ * }
+ *
+ * `$result` contains a list of dicts for select queries or number of modified
+ * rows for modification queries.
+ */
+final class QueryFuture extends Future {
+
+ private static $futures = array();
+
+ private $conn;
+ private $query;
+ private $id;
+ private $async;
+ private $profilerCallID;
+
+ public function __construct(
+ AphrontDatabaseConnection $conn,
+ $pattern/* , ... */) {
+
+ $this->conn = $conn;
+
+ $args = func_get_args();
+ $args = array_slice($args, 2);
+ $this->query = vqsprintf($conn, $pattern, $args);
+
+ self::$futures[] = $this;
+ $this->id = last_key(self::$futures);
+ }
+
+ public function isReady() {
+ if ($this->result !== null || $this->exception) {
+ return true;
+ }
+
+ if (!$this->conn->supportsAsyncQueries()) {
+ if ($this->conn->supportsParallelQueries()) {
+ $queries = array();
+ $conns = array();
+ foreach (self::$futures as $id => $future) {
+ $queries[$id] = $future->query;
+ $conns[$id] = $future->conn;
+ }
+ $results = $this->conn->executeParallelQueries($queries, $conns);
+ $this->processResults($results);
+ return true;
+ }
+
+ $conns = array();
+ $conn_queries = array();
+ foreach (self::$futures as $id => $future) {
+ $hash = spl_object_hash($future->conn);
+ $conns[$hash] = $future->conn;
+ $conn_queries[$hash][$id] = $future->query;
+ }
+ foreach ($conn_queries as $hash => $queries) {
+ $this->processResults($conns[$hash]->executeRawQueries($queries));
+ }
+ return true;
+ }
+
+ if (!$this->async) {
+ $profiler = PhutilServiceProfiler::getInstance();
+ $this->profilerCallID = $profiler->beginServiceCall(
+ array(
+ 'type' => 'query',
+ 'query' => $this->query,
+ 'async' => true,
+ ));
+
+ $this->async = $this->conn->asyncQuery($this->query);
+ return false;
+ }
+
+ $conns = array();
+ $asyncs = array();
+ foreach (self::$futures as $id => $future) {
+ if ($future->async) {
+ $conns[$id] = $future->conn;
+ $asyncs[$id] = $future->async;
+ }
+ }
+
+ $this->processResults($this->conn->resolveAsyncQueries($conns, $asyncs));
+
+ if ($this->result !== null || $this->exception) {
+ return true;
+ }
+ return false;
+ }
+
+ private function processResults(array $results) {
+ foreach ($results as $id => $result) {
+ $future = self::$futures[$id];
+ if ($result instanceof Exception) {
+ $future->exception = $result;
+ } else {
+ $future->result = $result;
+ }
+ unset(self::$futures[$id]);
+ if ($future->profilerCallID) {
+ $profiler = PhutilServiceProfiler::getInstance();
+ $profiler->endServiceCall($future->profilerCallID, array());
+ }
+ }
+ }
+}
diff --git a/src/future/slack/PhutilSlackFuture.php b/src/future/slack/PhutilSlackFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/slack/PhutilSlackFuture.php
@@ -0,0 +1,87 @@
+accessToken = $token;
+ return $this;
+ }
+
+ public function setClientID($client_id) {
+ $this->clientID = $client_id;
+ return $this;
+ }
+
+ public function setRawSlackQuery($action, array $params = array()) {
+ $this->action = $action;
+ $this->params = $params;
+ return $this;
+ }
+
+ public function setMethod($method) {
+ $this->method = $method;
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ if (!$this->future) {
+ $params = $this->params;
+
+ if (!$this->action) {
+ throw new Exception(pht('You must %s!', 'setRawSlackQuery()'));
+ }
+
+ if (!$this->accessToken) {
+ throw new Exception(pht('You must %s!', 'setAccessToken()'));
+ }
+
+ $uri = new PhutilURI('https://slack.com/');
+ $uri->setPath('/api/'.$this->action);
+ $uri->setQueryParam('token', $this->accessToken);
+
+ $future = new HTTPSFuture($uri);
+ $future->setData($this->params);
+ $future->setMethod($this->method);
+
+ $this->future = $future;
+ }
+
+ return $this->future;
+ }
+
+ protected function didReceiveResult($result) {
+ list($status, $body, $headers) = $result;
+
+ if ($status->isError()) {
+ throw $status;
+ }
+
+ $data = null;
+ try {
+ $data = phutil_json_decode($body);
+ } catch (PhutilJSONParserException $ex) {
+ throw new PhutilProxyException(
+ pht('Expected JSON response from Slack.'),
+ $ex);
+ }
+
+ if (idx($data, 'error')) {
+ $error = $data['error'];
+ throw new Exception(pht('Received error from Slack: %s', $error));
+ }
+
+ return $data;
+ }
+
+}
diff --git a/src/future/twitch/PhutilTwitchFuture.php b/src/future/twitch/PhutilTwitchFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/twitch/PhutilTwitchFuture.php
@@ -0,0 +1,93 @@
+accessToken = $token;
+ return $this;
+ }
+
+ public function setClientID($client_id) {
+ $this->clientID = $client_id;
+ return $this;
+ }
+
+ public function setRawTwitchQuery($action, array $params = array()) {
+ $this->action = $action;
+ $this->params = $params;
+ return $this;
+ }
+
+ public function setMethod($method) {
+ $this->method = $method;
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ if (!$this->future) {
+ $params = $this->params;
+
+ if (!$this->action) {
+ throw new Exception(pht('You must %s!', 'setRawTwitchQuery()'));
+ }
+
+ if (!$this->accessToken) {
+ throw new Exception(pht('You must %s!', 'setAccessToken()'));
+ }
+
+ $uri = new PhutilURI('https://api.twitch.tv/');
+ $uri->setPath('/kraken/'.ltrim($this->action, '/'));
+ $uri->setQueryParam('oauth_token', $this->accessToken);
+
+ $future = new HTTPSFuture($uri);
+ $future->setData($this->params);
+ $future->setMethod($this->method);
+
+ // NOTE: This is how the Twitch API is versioned.
+ $future->addHeader('Accept', 'application/vnd.twitchtv.2+json');
+
+ // NOTE: This is required to avoid rate limiting.
+ $future->addHeader('Client-ID', $this->clientID);
+
+ $this->future = $future;
+ }
+
+ return $this->future;
+ }
+
+ protected function didReceiveResult($result) {
+ list($status, $body, $headers) = $result;
+
+ if ($status->isError()) {
+ throw $status;
+ }
+
+ $data = null;
+ try {
+ $data = phutil_json_decode($body);
+ } catch (PhutilJSONParserException $ex) {
+ throw new PhutilProxyException(
+ pht('Expected JSON response from Twitch.'),
+ $ex);
+ }
+
+ if (idx($data, 'error')) {
+ $error = $data['error'];
+ throw new Exception(pht('Received error from Twitch: %s', $error));
+ }
+
+ return $data;
+ }
+
+}
diff --git a/src/future/wordpress/PhutilWordPressFuture.php b/src/future/wordpress/PhutilWordPressFuture.php
new file mode 100644
--- /dev/null
+++ b/src/future/wordpress/PhutilWordPressFuture.php
@@ -0,0 +1,89 @@
+accessToken = $token;
+ return $this;
+ }
+
+ public function setClientID($client_id) {
+ $this->clientID = $client_id;
+ return $this;
+ }
+
+ public function setRawWordPressQuery($action, array $params = array()) {
+ $this->action = $action;
+ $this->params = $params;
+ return $this;
+ }
+
+ public function setMethod($method) {
+ $this->method = $method;
+ return $this;
+ }
+
+ protected function getProxiedFuture() {
+ if (!$this->future) {
+ $params = $this->params;
+
+ if (!$this->action) {
+ throw new Exception(pht('You must %s!', 'setRawWordPressQuery()'));
+ }
+
+ if (!$this->accessToken) {
+ throw new Exception(pht('You must %s!', 'setAccessToken()'));
+ }
+
+ $uri = new PhutilURI('https://public-api.wordpress.com/');
+ $uri->setPath('/rest/v1/'.ltrim($this->action, '/'));
+
+ $future = new HTTPSFuture($uri);
+ $future->setData($this->params);
+ $future->setMethod($this->method);
+
+ // NOTE: This is how WordPress.com REST API authenticates
+ $future->addHeader('Authorization', 'Bearer '.$this->accessToken);
+
+ $this->future = $future;
+ }
+
+ return $this->future;
+ }
+
+ protected function didReceiveResult($result) {
+ list($status, $body, $headers) = $result;
+
+ if ($status->isError()) {
+ throw $status;
+ }
+
+ try {
+ $data = phutil_json_decode($body);
+ } catch (PhutilJSONParserException $ex) {
+ throw new PhutilProxyException(
+ pht('Expected JSON response from WordPress.com.'),
+ $ex);
+ }
+
+ if (idx($data, 'error')) {
+ $error = $data['error'];
+ throw new Exception(
+ pht('Received error from WordPress.com: %s', $error));
+ }
+
+ return $data;
+ }
+
+}
diff --git a/src/grammar/PhutilContextFreeGrammar.php b/src/grammar/PhutilContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/PhutilContextFreeGrammar.php
@@ -0,0 +1,93 @@
+generate();
+ }
+ return implode($implode, $paragraph);
+ }
+
+ public function generate() {
+ $count = 0;
+ $rules = $this->getRules();
+ return $this->applyRules('[start]', $count, $rules);
+ }
+
+ final protected function applyRules($input, &$count, array $rules) {
+ if (++$count > $this->limit) {
+ throw new Exception(pht('Token replacement count exceeded limit!'));
+ }
+
+ $matches = null;
+ preg_match_all('/(\\[[^\\]]+\\])/', $input, $matches, PREG_OFFSET_CAPTURE);
+
+ foreach (array_reverse($matches[1]) as $token_spec) {
+ list($token, $offset) = $token_spec;
+ $token_name = substr($token, 1, -1);
+ $options = array();
+
+ if (($name_end = strpos($token_name, ','))) {
+ $options_parser = new PhutilSimpleOptions();
+ $options = $options_parser->parse($token_name);
+ $token_name = substr($token_name, 0, $name_end);
+ }
+
+ if (empty($rules[$token_name])) {
+ throw new Exception(pht("Invalid token '%s' in grammar.", $token_name));
+ }
+
+ $key = array_rand($rules[$token_name]);
+ $replacement = $this->applyRules($rules[$token_name][$key],
+ $count, $rules);
+
+ if (isset($options['indent'])) {
+ if (is_numeric($options['indent'])) {
+ $replacement = self::strPadLines($replacement, $options['indent']);
+ } else {
+ $replacement = self::strPadLines($replacement);
+ }
+ }
+ if (isset($options['trim'])) {
+ switch ($options['trim']) {
+ case 'left':
+ $replacement = ltrim($replacement);
+ break;
+ case 'right':
+ $replacement = rtrim($replacement);
+ break;
+ default:
+ case 'both':
+ $replacement = trim($replacement);
+ break;
+ }
+ }
+ if (isset($options['block'])) {
+ $replacement = "\n".$replacement."\n";
+ }
+
+ $input = substr_replace($input, $replacement, $offset, strlen($token));
+ }
+
+ return $input;
+ }
+
+ private static function strPadLines($text, $num_spaces = 2) {
+ $text_lines = phutil_split_lines($text);
+ foreach ($text_lines as $linenr => $line) {
+ $text_lines[$linenr] = str_repeat(' ', $num_spaces).$line;
+ }
+
+ return implode('', $text_lines);
+ }
+
+}
diff --git a/src/grammar/PhutilLipsumContextFreeGrammar.php b/src/grammar/PhutilLipsumContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/PhutilLipsumContextFreeGrammar.php
@@ -0,0 +1,107 @@
+ array(
+ '[words].',
+ '[words].',
+ '[words].',
+ '[words]: [word], [word], [word] [word].',
+ '[words]; [lowerwords].',
+ '[words]!',
+ '[words], "[words]."',
+ '[words] ("[upperword] [upperword] [upperword]") [lowerwords].',
+ '[words]?',
+ ),
+ 'words' => array(
+ '[upperword] [lowerwords]',
+ ),
+ 'upperword' => array(
+ 'Lorem',
+ 'Ipsum',
+ 'Dolor',
+ 'Sit',
+ 'Amet',
+ ),
+ 'lowerwords' => array(
+ '[word]',
+ '[word] [word]',
+ '[word] [word] [word]',
+ '[word] [word] [word] [word]',
+ '[word] [word] [word] [word] [word]',
+ '[word] [word] [word] [word] [word]',
+ '[word] [word] [word] [word] [word] [word]',
+ '[word] [word] [word] [word] [word] [word]',
+ ),
+ 'word' => array(
+ 'ad',
+ 'adipisicing',
+ 'aliqua',
+ 'aliquip',
+ 'amet',
+ 'anim',
+ 'aute',
+ 'cillum',
+ 'commodo',
+ 'consectetur',
+ 'consequat',
+ 'culpa',
+ 'cupidatat',
+ 'deserunt',
+ 'do',
+ 'dolor',
+ 'dolore',
+ 'duis',
+ 'ea',
+ 'eiusmod',
+ 'elit',
+ 'enim',
+ 'esse',
+ 'est',
+ 'et',
+ 'eu',
+ 'ex',
+ 'excepteur',
+ 'exercitation',
+ 'fugiat',
+ 'id',
+ 'in',
+ 'incididunt',
+ 'ipsum',
+ 'irure',
+ 'labore',
+ 'laboris',
+ 'laborum',
+ 'lorem',
+ 'magna',
+ 'minim',
+ 'mollit',
+ 'nisi',
+ 'non',
+ 'nostrud',
+ 'nulla',
+ 'occaecat',
+ 'officia',
+ 'pariatur',
+ 'proident',
+ 'qui',
+ 'quis',
+ 'reprehenderit',
+ 'sed',
+ 'sint',
+ 'sit',
+ 'sunt',
+ 'tempor',
+ 'ullamco',
+ 'ut',
+ 'velit',
+ 'veniam',
+ 'voluptate',
+ ),
+ );
+ }
+
+}
diff --git a/src/grammar/PhutilRealNameContextFreeGrammar.php b/src/grammar/PhutilRealNameContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/PhutilRealNameContextFreeGrammar.php
@@ -0,0 +1,155 @@
+ array(
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]',
+ '[first] [last]-[last]',
+ '[first] [middle] [last]',
+ '[first] "[nick]" [last]',
+ '[first] [particle] [particle] [particle]',
+ ),
+ 'first' => array(
+ 'Mohamed',
+ 'Youssef',
+ 'Ahmed',
+ 'Mahmoud',
+ 'Mustafa',
+ 'Fatma',
+ 'Aya',
+ 'Noam',
+ 'Adam',
+ 'Lucas',
+ 'Noah',
+ 'Jakub',
+ 'Victor',
+ 'Harry',
+ 'Rasmus',
+ 'Nathan',
+ 'Emil',
+ 'Charlie',
+ 'Leon',
+ 'Dylan',
+ 'Alexander',
+ 'Emma',
+ 'Marie',
+ 'Lea',
+ 'Amelia',
+ 'Hanna',
+ 'Emily',
+ 'Sofia',
+ 'Julia',
+ 'Santiago',
+ 'Sebastian',
+ 'Olivia',
+ 'Madison',
+ 'Isabella',
+ 'Esther',
+ 'Anya',
+ 'Camila',
+ 'Jack',
+ 'Oliver',
+ ),
+ 'nick' => array(
+ 'Buzz',
+ 'Juggernaut',
+ 'Haze',
+ 'Hawk',
+ 'Iceman',
+ 'Killer',
+ 'Apex',
+ 'Ocelot',
+ ),
+ 'middle' => array(
+ 'Rose',
+ 'Grace',
+ 'Jane',
+ 'Louise',
+ 'Jade',
+ 'James',
+ 'John',
+ 'William',
+ 'Thomas',
+ 'Alexander',
+ ),
+ 'last' => array(
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ '[termlast]',
+ 'O\'[termlast]',
+ 'Mc[termlast]',
+ ),
+ 'termlast' => array(
+ 'Smith',
+ 'Johnson',
+ 'Williams',
+ 'Jones',
+ 'Brown',
+ 'Davis',
+ 'Miller',
+ 'Wilson',
+ 'Moore',
+ 'Taylor',
+ 'Anderson',
+ 'Thomas',
+ 'Jackson',
+ 'White',
+ 'Harris',
+ 'Martin',
+ 'Thompson',
+ 'Garcia',
+ 'Marinez',
+ 'Robinson',
+ 'Clark',
+ 'Rodrigues',
+ 'Lewis',
+ 'Lee',
+ 'Walker',
+ 'Hall',
+ 'Allen',
+ 'Young',
+ 'Hernandex',
+ 'King',
+ 'Wang',
+ 'Li',
+ 'Zhang',
+ 'Liu',
+ 'Chen',
+ 'Yang',
+ 'Huang',
+ 'Zhao',
+ 'Wu',
+ 'Zhou',
+ 'Xu',
+ 'Sun',
+ 'Ma',
+ ),
+ 'particle' => array(
+ 'Wu',
+ 'Xu',
+ 'Ma',
+ 'Li',
+ 'Liu',
+ 'Shao',
+ 'Lin',
+ 'Khan',
+ ),
+ );
+ }
+
+}
diff --git a/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/code/PhutilCLikeCodeSnippetContextFreeGrammar.php
@@ -0,0 +1,254 @@
+getStmtTerminationGrammarSet(),
+ $this->getVarNameGrammarSet(),
+ $this->getNullExprGrammarSet(),
+ $this->getNumberGrammarSet(),
+ $this->getExprGrammarSet(),
+ $this->getCondGrammarSet(),
+ $this->getLoopGrammarSet(),
+ $this->getStmtGrammarSet(),
+ $this->getAssignmentGrammarSet(),
+ $this->getArithExprGrammarSet(),
+ $this->getBoolExprGrammarSet(),
+ $this->getBoolValGrammarSet(),
+ $this->getTernaryExprGrammarSet(),
+
+ $this->getFuncNameGrammarSet(),
+ $this->getFuncCallGrammarSet(),
+ $this->getFuncCallParamGrammarSet(),
+ $this->getFuncDeclGrammarSet(),
+ $this->getFuncParamGrammarSet(),
+ $this->getFuncBodyGrammarSet(),
+ $this->getFuncReturnGrammarSet(),
+ );
+ }
+
+ protected function getStartGrammarSet() {
+ $start_grammar = parent::getStartGrammarSet();
+
+ $start_grammar['start'][] = '[funcdecl]';
+
+ return $start_grammar;
+ }
+
+ protected function getStmtTerminationGrammarSet() {
+ return $this->buildGrammarSet('term', array(';'));
+ }
+
+ protected function getFuncCallGrammarSet() {
+ return $this->buildGrammarSet('funccall',
+ array(
+ '[funcname]([funccallparam])',
+ ));
+ }
+
+ protected function getFuncCallParamGrammarSet() {
+ return $this->buildGrammarSet('funccallparam',
+ array(
+ '',
+ '[expr]',
+ '[expr], [expr]',
+ ));
+ }
+
+ protected function getFuncDeclGrammarSet() {
+ return $this->buildGrammarSet('funcdecl',
+ array(
+ 'function [funcname]([funcparam]) '.
+ '{[funcbody, indent, block, trim=right]}',
+ ));
+ }
+
+ protected function getFuncParamGrammarSet() {
+ return $this->buildGrammarSet('funcparam',
+ array(
+ '',
+ '[varname]',
+ '[varname], [varname]',
+ '[varname], [varname], [varname]',
+ ));
+ }
+
+ protected function getFuncBodyGrammarSet() {
+ return $this->buildGrammarSet('funcbody',
+ array(
+ "[stmt]\n[stmt]\n[funcreturn]",
+ "[stmt]\n[stmt]\n[stmt]\n[funcreturn]",
+ "[stmt]\n[stmt]\n[stmt]\n[stmt]\n[funcreturn]",
+ ));
+ }
+
+ protected function getFuncReturnGrammarSet() {
+ return $this->buildGrammarSet('funcreturn',
+ array(
+ 'return [expr][term]',
+ '',
+ ));
+ }
+
+ // Not really C, but put it here because of the curly braces and mostly shared
+ // among Java and PHP
+ protected function getClassDeclGrammarSet() {
+ return $this->buildGrammarSet('classdecl',
+ array(
+ '[classinheritancemod] class [classname] {[classbody, indent, block]}',
+ 'class [classname] {[classbody, indent, block]}',
+ ));
+ }
+
+ protected function getClassNameGrammarSet() {
+ return $this->buildGrammarSet('classname',
+ array(
+ 'MuffinHouse',
+ 'MuffinReader',
+ 'MuffinAwesomizer',
+ 'SuperException',
+ 'Librarian',
+ 'Book',
+ 'Ball',
+ 'BallOfCode',
+ 'AliceAndBobsSharedSecret',
+ 'FileInputStream',
+ 'FileOutputStream',
+ 'BufferedReader',
+ 'BufferedWriter',
+ 'Cardigan',
+ 'HouseOfCards',
+ 'UmbrellaClass',
+ 'GenericThing',
+ ));
+ }
+
+ protected function getClassBodyGrammarSet() {
+ return $this->buildGrammarSet('classbody',
+ array(
+ '[methoddecl]',
+ "[methoddecl]\n\n[methoddecl]",
+ "[propdecl]\n[propdecl]\n\n[methoddecl]\n\n[methoddecl]",
+ "[propdecl]\n[propdecl]\n[propdecl]\n\n[methoddecl]\n\n[methoddecl]".
+ "\n\n[methoddecl]",
+ ));
+ }
+
+ protected function getVisibilityGrammarSet() {
+ return $this->buildGrammarSet('visibility',
+ array(
+ 'private',
+ 'protected',
+ 'public',
+ ));
+ }
+
+ protected function getClassInheritanceModGrammarSet() {
+ return $this->buildGrammarSet('classinheritancemod',
+ array(
+ 'final',
+ 'abstract',
+ ));
+ }
+
+ // Keeping this separate so we won't give abstract methods a function body
+ protected function getMethodInheritanceModGrammarSet() {
+ return $this->buildGrammarSet('methodinheritancemod',
+ array(
+ 'final',
+ ));
+ }
+
+ protected function getMethodDeclGrammarSet() {
+ return $this->buildGrammarSet('methoddecl',
+ array(
+ '[visibility] [methodfuncdecl]',
+ '[visibility] [methodfuncdecl]',
+ '[methodinheritancemod] [visibility] [methodfuncdecl]',
+ '[abstractmethoddecl]',
+ ));
+ }
+
+ protected function getMethodFuncDeclGrammarSet() {
+ return $this->buildGrammarSet('methodfuncdecl',
+ array(
+ 'function [funcname]([funcparam]) '.
+ '{[methodbody, indent, block, trim=right]}',
+ ));
+ }
+
+ protected function getMethodBodyGrammarSet() {
+ return $this->buildGrammarSet('methodbody',
+ array(
+ "[methodstmt]\n[methodbody]",
+ "[methodstmt]\n[funcreturn]",
+ ));
+ }
+
+ protected function getMethodStmtGrammarSet() {
+ $stmts = $this->getStmtGrammarSet();
+
+ return $this->buildGrammarSet('methodstmt',
+ array_merge(
+ $stmts['stmt'],
+ array(
+ '[methodcall][term]',
+ )));
+ }
+
+ protected function getMethodCallGrammarSet() {
+ // Java/JavaScript
+ return $this->buildGrammarSet('methodcall',
+ array(
+ 'this.[funccall]',
+ '[varname].[funccall]',
+ '[classname].[funccall]',
+ ));
+ }
+
+ protected function getAbstractMethodDeclGrammarSet() {
+ return $this->buildGrammarSet('abstractmethoddecl',
+ array(
+ 'abstract function [funcname]([funcparam])[term]',
+ ));
+ }
+
+ protected function getPropDeclGrammarSet() {
+ return $this->buildGrammarSet('propdecl',
+ array(
+ '[visibility] [varname][term]',
+ ));
+ }
+
+ protected function getClassRuleSets() {
+ return array(
+ $this->getClassInheritanceModGrammarSet(),
+ $this->getMethodInheritanceModGrammarSet(),
+ $this->getClassDeclGrammarSet(),
+ $this->getClassNameGrammarSet(),
+ $this->getClassBodyGrammarSet(),
+ $this->getMethodDeclGrammarSet(),
+ $this->getMethodFuncDeclGrammarSet(),
+ $this->getMethodBodyGrammarSet(),
+ $this->getMethodStmtGrammarSet(),
+ $this->getMethodCallGrammarSet(),
+ $this->getAbstractMethodDeclGrammarSet(),
+ $this->getPropDeclGrammarSet(),
+ $this->getVisibilityGrammarSet(),
+ );
+ }
+
+ public function generateClass() {
+ $rules = array_merge($this->getRules(), $this->getClassRuleSets());
+ $rules['start'] = array('[classdecl]');
+ $count = 0;
+ return $this->applyRules('[start]', $count, $rules);
+ }
+
+}
diff --git a/src/grammar/code/PhutilCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilCodeSnippetContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/code/PhutilCodeSnippetContextFreeGrammar.php
@@ -0,0 +1,205 @@
+getStartGrammarSet(),
+ $this->getStmtGrammarSet(),
+ array_mergev($this->buildRuleSet()));
+ }
+
+ abstract protected function buildRuleSet();
+
+ protected function buildGrammarSet($name, array $set) {
+ return array(
+ $name => $set,
+ );
+ }
+
+ protected function getStartGrammarSet() {
+ return $this->buildGrammarSet('start',
+ array(
+ "[stmt]\n[stmt]",
+ "[stmt]\n[stmt]\n[stmt]",
+ "[stmt]\n[stmt]\n[stmt]\n[stmt]",
+ ));
+ }
+
+ protected function getStmtGrammarSet() {
+ return $this->buildGrammarSet('stmt',
+ array(
+ '[assignment][term]',
+ '[assignment][term]',
+ '[assignment][term]',
+ '[assignment][term]',
+ '[funccall][term]',
+ '[funccall][term]',
+ '[funccall][term]',
+ '[funccall][term]',
+ '[cond]',
+ '[loop]',
+ ));
+ }
+
+ protected function getFuncNameGrammarSet() {
+ return $this->buildGrammarSet('funcname',
+ array(
+ 'do_something',
+ 'nonempty',
+ 'noOp',
+ 'call_user_func',
+ 'getenv',
+ 'render',
+ 'super',
+ 'derpify',
+ 'awesomize',
+ 'equals',
+ 'run',
+ 'flee',
+ 'fight',
+ 'notify',
+ 'listen',
+ 'calculate',
+ 'aim',
+ 'open',
+ ));
+ }
+
+ protected function getVarNameGrammarSet() {
+ return $this->buildGrammarSet('varname',
+ array(
+ 'is_something',
+ 'object',
+ 'name',
+ 'token',
+ 'label',
+ 'piece_of_the_pie',
+ 'type',
+ 'state',
+ 'param',
+ 'action',
+ 'key',
+ 'timeout',
+ 'result',
+ ));
+ }
+
+ protected function getNullExprGrammarSet() {
+ return $this->buildGrammarSet('null', array('null'));
+ }
+
+ protected function getNumberGrammarSet() {
+ return $this->buildGrammarSet('number',
+ array(
+ mt_rand(-1, 100),
+ mt_rand(-100, 1000),
+ mt_rand(-1000, 5000),
+ mt_rand(0, 1).'.'.mt_rand(1, 1000),
+ mt_rand(0, 50).'.'.mt_rand(1, 1000),
+ ));
+ }
+
+ protected function getExprGrammarSet() {
+ return $this->buildGrammarSet('expr',
+ array(
+ '[null]',
+ '[number]',
+ '[number]',
+ '[varname]',
+ '[varname]',
+ '[boolval]',
+ '[boolval]',
+ '[boolexpr]',
+ '[boolexpr]',
+ '[funccall]',
+ '[arithexpr]',
+ '[arithexpr]',
+ // Some random strings
+ '"'.Filesystem::readRandomCharacters(4).'"',
+ '"'.Filesystem::readRandomCharacters(5).'"',
+ ));
+ }
+
+ protected function getBoolExprGrammarSet() {
+ return $this->buildGrammarSet('boolexpr',
+ array(
+ '[varname]',
+ '![varname]',
+ '[varname] == [boolval]',
+ '[varname] != [boolval]',
+ '[ternary]',
+ ));
+ }
+
+ protected function getBoolValGrammarSet() {
+ return $this->buildGrammarSet('boolval',
+ array(
+ 'true',
+ 'false',
+ ));
+ }
+
+ protected function getArithExprGrammarSet() {
+ return $this->buildGrammarSet('arithexpr',
+ array(
+ '[varname]++',
+ '++[varname]',
+ '[varname] + [number]',
+ '[varname]--',
+ '--[varname]',
+ '[varname] - [number]',
+ ));
+ }
+
+ protected function getAssignmentGrammarSet() {
+ return $this->buildGrammarSet('assignment',
+ array(
+ '[varname] = [expr]',
+ '[varname] = [arithexpr]',
+ '[varname] += [expr]',
+ ));
+ }
+
+ protected function getCondGrammarSet() {
+ return $this->buildGrammarSet('cond',
+ array(
+ 'if ([boolexpr]) {[stmt, indent, block]}',
+ 'if ([boolexpr]) {[stmt, indent, block]} else {[stmt, indent, block]}',
+ ));
+ }
+
+ protected function getLoopGrammarSet() {
+ return $this->buildGrammarSet('loop',
+ array(
+ 'while ([boolexpr]) {[stmt, indent, block]}',
+ 'do {[stmt, indent, block]} while ([boolexpr])[term]',
+ 'for ([assignment]; [boolexpr]; [expr]) {[stmt, indent, block]}',
+ ));
+ }
+
+ protected function getTernaryExprGrammarSet() {
+ return $this->buildGrammarSet('ternary',
+ array(
+ '[boolexpr] ? [expr] : [expr]',
+ ));
+ }
+
+ protected function getStmtTerminationGrammarSet() {
+ return $this->buildGrammarSet('term', array(''));
+ }
+
+}
diff --git a/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/code/PhutilJavaCodeSnippetContextFreeGrammar.php
@@ -0,0 +1,184 @@
+getClassRuleSets());
+
+ $rulesset[] = $this->getTypeNameGrammarSet();
+ $rulesset[] = $this->getNamespaceDeclGrammarSet();
+ $rulesset[] = $this->getNamespaceNameGrammarSet();
+ $rulesset[] = $this->getImportGrammarSet();
+ $rulesset[] = $this->getMethodReturnTypeGrammarSet();
+ $rulesset[] = $this->getMethodNameGrammarSet();
+ $rulesset[] = $this->getVarDeclGrammarSet();
+ $rulesset[] = $this->getClassDerivGrammarSet();
+
+ return $rulesset;
+ }
+
+ protected function getStartGrammarSet() {
+ return $this->buildGrammarSet('start',
+ array(
+ '[import, block][nmspdecl, block][classdecl, block]',
+ ));
+ }
+
+ protected function getClassDeclGrammarSet() {
+ return $this->buildGrammarSet('classdecl',
+ array(
+ '[classinheritancemod] [visibility] class [classname][classderiv] '.
+ '{[classbody, indent, block]}',
+ '[visibility] class [classname][classderiv] '.
+ '{[classbody, indent, block]}',
+ ));
+ }
+
+ protected function getClassDerivGrammarSet() {
+ return $this->buildGrammarSet('classderiv',
+ array(
+ ' extends [classname]',
+ '',
+ '',
+ ));
+ }
+
+ protected function getTypeNameGrammarSet() {
+ return $this->buildGrammarSet('type',
+ array(
+ 'int',
+ 'boolean',
+ 'char',
+ 'short',
+ 'long',
+ 'float',
+ 'double',
+ '[classname]',
+ '[type][]',
+ ));
+ }
+
+ protected function getMethodReturnTypeGrammarSet() {
+ return $this->buildGrammarSet('methodreturn',
+ array(
+ '[type]',
+ 'void',
+ ));
+ }
+
+ protected function getNamespaceDeclGrammarSet() {
+ return $this->buildGrammarSet('nmspdecl',
+ array(
+ 'package [nmspname][term]',
+ ));
+ }
+
+ protected function getNamespaceNameGrammarSet() {
+ return $this->buildGrammarSet('nmspname',
+ array(
+ 'java.lang',
+ 'java.io',
+ 'com.example.proj.std',
+ 'derp.example.www',
+ ));
+ }
+
+ protected function getImportGrammarSet() {
+ return $this->buildGrammarSet('import',
+ array(
+ 'import [nmspname][term]',
+ 'import [nmspname].*[term]',
+ 'import [nmspname].[classname][term]',
+ ));
+ }
+
+ protected function getExprGrammarSet() {
+ $expr = parent::getExprGrammarSet();
+
+ $expr['expr'][] = 'new [classname]([funccallparam])';
+
+ $expr['expr'][] = '[methodcall]';
+ $expr['expr'][] = '[methodcall]';
+ $expr['expr'][] = '[methodcall]';
+ $expr['expr'][] = '[methodcall]';
+
+ // Add some 'char's
+ for ($ii = 0; $ii < 2; $ii++) {
+ $expr['expr'][] = "'".Filesystem::readRandomCharacters(1)."'";
+ }
+
+ return $expr;
+ }
+
+ protected function getStmtGrammarSet() {
+ $stmt = parent::getStmtGrammarSet();
+
+ $stmt['stmt'][] = '[vardecl]';
+ $stmt['stmt'][] = '[vardecl]';
+ // `try` to `throw` a `Ball`!
+ $stmt['stmt'][] = 'throw [classname][term]';
+
+ return $stmt;
+ }
+
+ protected function getPropDeclGrammarSet() {
+ return $this->buildGrammarSet('propdecl',
+ array(
+ '[visibility] [type] [varname][term]',
+ ));
+ }
+
+ protected function getVarDeclGrammarSet() {
+ return $this->buildGrammarSet('vardecl',
+ array(
+ '[type] [varname][term]',
+ '[type] [assignment][term]',
+ ));
+ }
+
+ protected function getFuncNameGrammarSet() {
+ return $this->buildGrammarSet('funcname',
+ array(
+ '[methodname]',
+ '[classname].[methodname]',
+ // This is just silly (too much recursion)
+ // '[classname].[funcname]',
+ // Don't do this for now, it just clutters up output (thanks to rec.)
+ // '[nmspname].[classname].[methodname]',
+ ));
+ }
+
+ // Renamed from `funcname`
+ protected function getMethodNameGrammarSet() {
+ $funcnames = head(parent::getFuncNameGrammarSet());
+ return $this->buildGrammarSet('methodname', $funcnames);
+ }
+
+ protected function getMethodFuncDeclGrammarSet() {
+ return $this->buildGrammarSet('methodfuncdecl',
+ array(
+ '[methodreturn] [methodname]([funcparam]) '.
+ '{[methodbody, indent, block, trim=right]}',
+ ));
+ }
+
+ protected function getFuncParamGrammarSet() {
+ return $this->buildGrammarSet('funcparam',
+ array(
+ '',
+ '[type] [varname]',
+ '[type] [varname], [type] [varname]',
+ '[type] [varname], [type] [varname], [type] [varname]',
+ ));
+ }
+
+ protected function getAbstractMethodDeclGrammarSet() {
+ return $this->buildGrammarSet('abstractmethoddecl',
+ array(
+ 'abstract [methodreturn] [methodname]([funcparam])[term]',
+ ));
+ }
+
+}
diff --git a/src/grammar/code/PhutilPHPCodeSnippetContextFreeGrammar.php b/src/grammar/code/PhutilPHPCodeSnippetContextFreeGrammar.php
new file mode 100644
--- /dev/null
+++ b/src/grammar/code/PhutilPHPCodeSnippetContextFreeGrammar.php
@@ -0,0 +1,57 @@
+getClassRuleSets());
+ }
+
+ protected function getStartGrammarSet() {
+ $start_grammar = parent::getStartGrammarSet();
+
+ $start_grammar['start'][] = '[classdecl]';
+ $start_grammar['start'][] = '[classdecl]';
+
+ return $start_grammar;
+ }
+
+ protected function getExprGrammarSet() {
+ $expr = parent::getExprGrammarSet();
+
+ $expr['expr'][] = 'new [classname]([funccallparam])';
+
+ $expr['expr'][] = '[classname]::[funccall]';
+
+ return $expr;
+ }
+
+ protected function getVarNameGrammarSet() {
+ $varnames = parent::getVarNameGrammarSet();
+
+ foreach ($varnames as $vn_key => $vn_val) {
+ foreach ($vn_val as $vv_key => $vv_value) {
+ $varnames[$vn_key][$vv_key] = '$'.$vv_value;
+ }
+ }
+
+ return $varnames;
+ }
+
+ protected function getFuncNameGrammarSet() {
+ return $this->buildGrammarSet('funcname',
+ array_mergev(get_defined_functions()));
+ }
+
+ protected function getMethodCallGrammarSet() {
+ return $this->buildGrammarSet('methodcall',
+ array(
+ '$this->[funccall]',
+ 'self::[funccall]',
+ 'static::[funccall]',
+ '[varname]->[funccall]',
+ '[classname]::[funccall]',
+ ));
+ }
+
+}
diff --git a/src/internationalization/PhutilLocale.php b/src/internationalization/PhutilLocale.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/PhutilLocale.php
@@ -0,0 +1,247 @@
+" order.
+ *
+ * @param const `PhutilPerson` gender constant.
+ * @param list List of variants.
+ * @return string Variant for use.
+ */
+ public function selectGenderVariant($variant, array $translations) {
+ if ($variant == PhutilPerson::GENDER_FEMININE) {
+ return end($translations);
+ } else {
+ return reset($translations);
+ }
+ }
+
+
+ /**
+ * Select a plural variant for this locale. By default, locales use a simple
+ * rule with two plural variants, listed in "" order.
+ *
+ * @param int Plurality of the value.
+ * @param list List of variants.
+ * @return string Variant for use.
+ */
+ public function selectPluralVariant($variant, array $translations) {
+ if ($variant == 1) {
+ return reset($translations);
+ } else {
+ return end($translations);
+ }
+ }
+
+
+ /**
+ * Flags a locale as silly, like "English (Pirate)".
+ *
+ * These locales are fun but disastrously inappropriate for serious
+ * businesses.
+ *
+ * @return bool True if this locale is silly.
+ */
+ public function isSillyLocale() {
+ return false;
+ }
+
+
+ /**
+ * Flags a locale as a testing locale, like "English (US, ALL CAPS)". These
+ * locales are useful for translation development, but not for normal users.
+ *
+ * @return bool True if this is a locale for testing or development.
+ */
+ public function isTestLocale() {
+ return false;
+ }
+
+
+ /**
+ * Indicates that the translator should post-process translations in this
+ * locale by calling @{method:didTranslateString}.
+ *
+ * Doing this incurs a performance penalty, and is not useful for most
+ * languages. However, it can be used to implement test translations like
+ * "English (US, ALL CAPS)".
+ *
+ * @return bool True to postprocess strings.
+ */
+ public function shouldPostProcessTranslations() {
+ return false;
+ }
+
+
+ /**
+ * Callback for post-processing translations.
+ *
+ * By default, this callback is not invoked. To activate it, return `true`
+ * from @{method:shouldPostProcessTranslations}. Activating this callback
+ * incurs a performance penalty.
+ *
+ * @param string The raw input pattern.
+ * @param string The selected translation pattern.
+ * @param list The raw input arguments.
+ * @param string The translated string.
+ * @return string Post-processed translation string.
+ */
+ public function didTranslateString(
+ $raw_pattern,
+ $translated_pattern,
+ array $args,
+ $result_text) {
+ return $result_text;
+ }
+
+
+ /**
+ * Load all available locales.
+ *
+ * @return map Map from codes to locale objects.
+ */
+ public static function loadAllLocales() {
+ static $locales;
+
+ if ($locales === null) {
+ $objects = id(new PhutilClassMapQuery())
+ ->setAncestorClass(__CLASS__)
+ ->execute();
+
+ $locale_map = array();
+ foreach ($objects as $object) {
+ $locale_code = $object->getLocaleCode();
+ if (empty($locale_map[$locale_code])) {
+ $locale_map[$locale_code] = $object;
+ } else {
+ throw new Exception(
+ pht(
+ 'Two subclasses of "%s" ("%s" and "%s") define '.
+ 'locales with the same locale code ("%s"). Each locale must '.
+ 'have a unique locale code.',
+ __CLASS__,
+ get_class($object),
+ get_class($locale_map[$locale_code]),
+ $locale_code));
+ }
+ }
+
+ foreach ($locale_map as $locale_code => $locale) {
+ $fallback_code = $locale->getFallbackLocaleCode();
+ if ($fallback_code !== null) {
+ if (empty($locale_map[$fallback_code])) {
+ throw new Exception(
+ pht(
+ 'The locale "%s" has an invalid fallback locale code ("%s"). '.
+ 'No locale class exists which defines this locale.',
+ get_class($locale),
+ $fallback_code));
+ }
+ }
+ }
+
+ foreach ($locale_map as $locale_code => $locale) {
+ $seen = array($locale_code => get_class($locale));
+ self::checkLocaleFallback($locale_map, $locale, $seen);
+ }
+
+ $locales = $locale_map;
+ }
+ return $locales;
+ }
+
+
+ /**
+ * Load a specific locale using a locale code.
+ *
+ * @param string Locale code.
+ * @return PhutilLocale Locale object.
+ */
+ public static function loadLocale($locale_code) {
+ $all_locales = self::loadAllLocales();
+ $locale = idx($all_locales, $locale_code);
+
+ if (!$locale) {
+ throw new Exception(
+ pht(
+ 'There is no locale with the locale code "%s".',
+ $locale_code));
+ }
+
+ return $locale;
+ }
+
+
+ /**
+ * Recursively check locale fallbacks for cycles.
+ *
+ * @param map Map of locales.
+ * @param PhutilLocale Current locale.
+ * @param map Map of visited locales.
+ * @return void
+ */
+ private static function checkLocaleFallback(
+ array $map,
+ PhutilLocale $locale,
+ array $seen) {
+
+ $fallback_code = $locale->getFallbackLocaleCode();
+ if ($fallback_code === null) {
+ return;
+ }
+
+ if (isset($seen[$fallback_code])) {
+ $seen[] = get_class($locale);
+ $seen[] = pht('...');
+ throw new Exception(
+ pht(
+ 'Locale "%s" is part of a cycle of locales which fall back on '.
+ 'one another in a loop (%s). Locales which fall back on other '.
+ 'locales must not loop.',
+ get_class($locale),
+ implode(' -> ', $seen)));
+ }
+
+ $seen[$fallback_code] = get_class($locale);
+ self::checkLocaleFallback($map, $map[$fallback_code], $seen);
+ }
+
+}
diff --git a/src/internationalization/PhutilNumber.php b/src/internationalization/PhutilNumber.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/PhutilNumber.php
@@ -0,0 +1,26 @@
+value = $value;
+ $this->decimals = $decimals;
+ }
+
+ public function getNumber() {
+ return $this->value;
+ }
+
+ public function setDecimals($decimals) {
+ $this->decimals = $decimals;
+ return $this;
+ }
+
+ public function getDecimals() {
+ return $this->decimals;
+ }
+
+}
diff --git a/src/internationalization/PhutilPerson.php b/src/internationalization/PhutilPerson.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/PhutilPerson.php
@@ -0,0 +1,11 @@
+ Map of raw strings to translations.
+ */
+ abstract protected function getTranslations();
+
+
+ /**
+ * Return a filtered map of all strings in this translation.
+ *
+ * Filters out empty/placeholder translations.
+ *
+ * @return map Map of raw strings to translations.
+ */
+ final public function getFilteredTranslations() {
+ $translations = $this->getTranslations();
+
+ foreach ($translations as $key => $translation) {
+ if ($translation === null) {
+ unset($translations[$key]);
+ }
+ }
+
+ return $translations;
+ }
+
+
+ /**
+ * Load all available translation objects.
+ *
+ * @return list List of available translation sources.
+ */
+ public static function loadAllTranslations() {
+ return id(new PhutilClassMapQuery())
+ ->setAncestorClass(__CLASS__)
+ ->execute();
+ }
+
+
+ /**
+ * Load the complete translation map for a locale.
+ *
+ * This will compile primary and fallback translations into a single
+ * translation map.
+ *
+ * @param string Locale code, like "en_US".
+ * @return map Map of all avialable translations.
+ */
+ public static function getTranslationMapForLocale($locale_code) {
+ $locale = PhutilLocale::loadLocale($locale_code);
+
+ $translations = self::loadAllTranslations();
+
+ $results = array();
+ foreach ($translations as $translation) {
+ if ($translation->getLocaleCode() == $locale_code) {
+ $results += $translation->getFilteredTranslations();
+ }
+ }
+
+ $fallback_code = $locale->getFallbackLocaleCode();
+ if ($fallback_code !== null) {
+ $results += self::getTranslationMapForLocale($fallback_code);
+ }
+
+ return $results;
+ }
+
+}
diff --git a/src/internationalization/PhutilTranslator.php b/src/internationalization/PhutilTranslator.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/PhutilTranslator.php
@@ -0,0 +1,269 @@
+locale = $locale;
+ $this->localeCode = $locale->getLocaleCode();
+ $this->shouldPostProcess = $locale->shouldPostProcessTranslations();
+ return $this;
+ }
+
+ public function setWillTranslateCallback($callback) {
+ $this->willTranslateCallback = $callback;
+ return $this;
+ }
+
+ public function getWillTranslateCallback() {
+ return $this->willTranslateCallback;
+ }
+
+ /**
+ * Add translations which will be later used by @{method:translate}.
+ * The parameter is an array of strings (for simple translations) or arrays
+ * (for translations with variants). The number of items in the array is
+ * language specific. It is `array($singular, $plural)` for English.
+ *
+ * array(
+ * 'color' => 'colour',
+ * '%d beer(s)' => array('%d beer', '%d beers'),
+ * );
+ *
+ * The arrays can be nested for strings with more variant parts:
+ *
+ * array(
+ * '%d char(s) on %d row(s)' => array(
+ * array('%d char on %d row', '%d char on %d rows'),
+ * array('%d chars on %d row', '%d chars on %d rows'),
+ * ),
+ * );
+ *
+ * The translation should have the same placeholders as originals. Swapping
+ * parameter order is possible:
+ *
+ * array(
+ * '%s owns %s.' => '%2$s is owned by %1$s.',
+ * );
+ *
+ * @param array Identifier in key, translation in value.
+ * @return PhutilTranslator Provides fluent interface.
+ */
+ public function setTranslations(array $translations) {
+ $this->translations = $translations;
+ return $this;
+ }
+
+ public function translate($text /* , ... */) {
+ $args = func_get_args();
+
+ if ($this->willTranslateCallback) {
+ call_user_func_array($this->willTranslateCallback, $args);
+ }
+
+ if (isset($this->translations[$text])) {
+ $translation = $this->translations[$text];
+ } else {
+ $translation = $text;
+ }
+
+ while (is_array($translation)) {
+ $arg = next($args);
+ $translation = $this->chooseVariant($translation, $arg);
+ if ($translation === null) {
+ $pos = key($args);
+
+ if (is_object($arg)) {
+ $kind = get_class($arg);
+ } else {
+ $kind = gettype($arg);
+ }
+
+ return sprintf(
+ '[Invalid Translation!] The "%s" language data offers variant '.
+ 'translations for the plurality or gender of argument %s, but '.
+ 'the value for that argument is not an integer, PhutilNumber, or '.
+ 'PhutilPerson (it is a value of type "%s"). Raw input: <%s>.',
+ $this->localeCode,
+ $pos,
+ $kind,
+ $text);
+ }
+ }
+ array_shift($args);
+
+ foreach ($args as $k => $arg) {
+ if ($arg instanceof PhutilNumber) {
+ $args[$k] = $this->formatNumber($arg->getNumber(), $arg->getDecimals());
+ }
+ }
+
+ // Check if any arguments are PhutilSafeHTML. If they are, we will apply
+ // any escaping necessary and output HTML.
+ $is_html = false;
+ foreach ($args as $arg) {
+ if ($arg instanceof PhutilSafeHTML ||
+ $arg instanceof PhutilSafeHTMLProducerInterface) {
+ $is_html = true;
+ break;
+ }
+ }
+
+ if ($is_html) {
+ foreach ($args as $k => $arg) {
+ $args[$k] = (string)phutil_escape_html($arg);
+ }
+ }
+
+ $result = vsprintf($translation, $args);
+ if ($result === false) {
+ // If vsprintf() fails (often because the translated string references
+ // too many parameters), show the bad template with a note instead of
+ // returning an empty string. This makes it easier to figure out what
+ // went wrong and fix it.
+ $result = pht('[Invalid Translation!] %s', $translation);
+ }
+
+ if ($this->shouldPostProcess) {
+ $result = $this->locale->didTranslateString(
+ $text,
+ $translation,
+ $args,
+ $result);
+ }
+
+ if ($is_html) {
+ $result = phutil_safe_html($result);
+ }
+
+ return $result;
+ }
+
+ private function chooseVariant(array $translations, $variant) {
+ if (count($translations) == 1) {
+ // If we only have one variant, we can select it directly.
+ return reset($translations);
+ }
+
+ if ($variant instanceof PhutilNumber) {
+ $is_gender = false;
+ $variant = $variant->getNumber();
+ } else if ($variant instanceof PhutilPerson) {
+ $is_gender = true;
+ $variant = $variant->getGender();
+ } else if (is_int($variant)) {
+ $is_gender = false;
+ } else {
+ return null;
+ }
+
+ if ($is_gender) {
+ return $this->locale->selectGenderVariant($variant, $translations);
+ } else {
+
+ // NOTE: This is a microoptimization which slightly improves performance
+ // for common languages with simple plural rules. Languages do not need
+ // to be added here even if they use the simple rules. The benefit of
+ // inclusion here is small, on the order of 5%.
+ static $simple_plural = array(
+ 'en_US' => true,
+ 'en_GB' => true,
+ 'en_ES' => true,
+ 'ko_KR' => true,
+ );
+
+ if (isset($simple_plural[$this->localeCode])) {
+ if ($variant == 1) {
+ return reset($translations);
+ } else {
+ return end($translations);
+ }
+ } else {
+ return $this->locale->selectPluralVariant($variant, $translations);
+ }
+ }
+
+ }
+
+ /**
+ * Translate date formatted by `$date->format()`.
+ *
+ * @param string Format accepted by `DateTime::format()`.
+ * @param DateTime
+ * @return string Formatted and translated date.
+ */
+ public function translateDate($format, DateTime $date) {
+ static $format_cache = array();
+ if (!isset($format_cache[$format])) {
+ $translatable = 'DlSFMaA';
+ preg_match_all(
+ '/['.$translatable.']|(\\\\.|[^'.$translatable.'])+/',
+ $format,
+ $format_cache[$format],
+ PREG_SET_ORDER);
+ }
+
+ $parts = array();
+ foreach ($format_cache[$format] as $match) {
+ $part = $date->format($match[0]);
+ if (!isset($match[1])) {
+ $part = $this->translate($part);
+ }
+ $parts[] = $part;
+ }
+ return implode('', $parts);
+ }
+
+ /**
+ * Format number with grouped thousands and optional decimal part. Requires
+ * translations of '.' (decimal point) and ',' (thousands separator). Both
+ * these translations must be 1 byte long with PHP < 5.4.0.
+ *
+ * @param float
+ * @param int
+ * @return string
+ */
+ public function formatNumber($number, $decimals = 0) {
+ return number_format(
+ $number,
+ $decimals,
+ $this->translate('.'),
+ $this->translate(','));
+ }
+
+ public function validateTranslation($original, $translation) {
+ $pattern = '/<(\S[^>]*>?)?|&(\S[^;]*;?)?/i';
+ $original_matches = null;
+ $translation_matches = null;
+
+ preg_match_all($pattern, $original, $original_matches);
+ preg_match_all($pattern, $translation, $translation_matches);
+
+ sort($original_matches[0]);
+ sort($translation_matches[0]);
+
+ if ($original_matches[0] !== $translation_matches[0]) {
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/src/internationalization/__tests__/PhutilLocaleTestCase.php b/src/internationalization/__tests__/PhutilLocaleTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/__tests__/PhutilLocaleTestCase.php
@@ -0,0 +1,10 @@
+assertTrue(true);
+ }
+
+}
diff --git a/src/internationalization/__tests__/PhutilPersonTest.php b/src/internationalization/__tests__/PhutilPersonTest.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/__tests__/PhutilPersonTest.php
@@ -0,0 +1,20 @@
+gender;
+ }
+
+ public function setGender($value) {
+ $this->gender = $value;
+ return $this;
+ }
+
+ public function __toString() {
+ return pht('Test (%s)', $this->gender);
+ }
+
+}
diff --git a/src/internationalization/__tests__/PhutilPhtTestCase.php b/src/internationalization/__tests__/PhutilPhtTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/__tests__/PhutilPhtTestCase.php
@@ -0,0 +1,101 @@
+assertEqual('beer', pht('beer'));
+ $this->assertEqual('1 beer(s)', pht('%d beer(s)', 1));
+
+ $english_locale = PhutilLocale::loadLocale('en_US');
+ PhutilTranslator::getInstance()->setLocale($english_locale);
+ PhutilTranslator::getInstance()->setTranslations(
+ array(
+ '%d beer(s)' => array('%d beer', '%d beers'),
+ ));
+
+ $this->assertEqual('1 beer', pht('%d beer(s)', 1));
+
+ $czech_locale = PhutilLocale::loadLocale('cs_CZ');
+ PhutilTranslator::getInstance()->setLocale($czech_locale);
+ PhutilTranslator::getInstance()->setTranslations(
+ array(
+ '%d beer(s)' => array('%d pivo', '%d piva', '%d piv'),
+ ));
+
+ $this->assertEqual('5 piv', pht('%d beer(s)', 5));
+ }
+
+ public function getDateTranslations() {
+ // The only purpose of this function is to provide a static list of
+ // translations which can come from PhutilTranslator::translateDate() to
+ // allow translation extractor getting them.
+ return array(
+ 'D' => array(
+ pht('Sun'),
+ pht('Mon'),
+ pht('Tue'),
+ pht('Wed'),
+ pht('Thu'),
+ pht('Fri'),
+ pht('Sat'),
+ ),
+ 'l' => array(
+ pht('Sunday'),
+ pht('Monday'),
+ pht('Tuesday'),
+ pht('Wednesday'),
+ pht('Thursday'),
+ pht('Friday'),
+ pht('Saturday'),
+ ),
+ 'S' => array(
+ pht('st'),
+ pht('nd'),
+ pht('rd'),
+ pht('th'),
+ ),
+ 'F' => array(
+ pht('January'),
+ pht('February'),
+ pht('March'),
+ pht('April'),
+ pht('May'),
+ pht('June'),
+ pht('July'),
+ pht('August'),
+ pht('September'),
+ pht('October'),
+ pht('November'),
+ pht('December'),
+ ),
+ 'M' => array(
+ pht('Jan'),
+ pht('Feb'),
+ pht('Mar'),
+ pht('Apr'),
+ pht('May'),
+ pht('Jun'),
+ pht('Jul'),
+ pht('Aug'),
+ pht('Sep'),
+ pht('Oct'),
+ pht('Nov'),
+ pht('Dec'),
+ ),
+ 'a' => array(
+ pht('am'),
+ pht('pm'),
+ ),
+ 'A' => array(
+ pht('AM'),
+ pht('PM'),
+ ),
+ );
+ }
+
+}
diff --git a/src/internationalization/__tests__/PhutilTranslationTestCase.php b/src/internationalization/__tests__/PhutilTranslationTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/__tests__/PhutilTranslationTestCase.php
@@ -0,0 +1,10 @@
+assertTrue(true);
+ }
+
+}
diff --git a/src/internationalization/__tests__/PhutilTranslatorTestCase.php b/src/internationalization/__tests__/PhutilTranslatorTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/__tests__/PhutilTranslatorTestCase.php
@@ -0,0 +1,261 @@
+newTranslator('en_US');
+ $translator->setTranslations(
+ array(
+ '%d line(s)' => array('%d line', '%d lines'),
+ '%d char(s) on %d row(s)' => array(
+ array('%d char on %d row', '%d char on %d rows'),
+ array('%d chars on %d row', '%d chars on %d rows'),
+ ),
+ ));
+
+ $this->assertEqual('line', $translator->translate('line'));
+ $this->assertEqual('param', $translator->translate('%s', 'param'));
+
+ $this->assertEqual('0 lines', $translator->translate('%d line(s)', 0));
+ $this->assertEqual('1 line', $translator->translate('%d line(s)', 1));
+ $this->assertEqual('2 lines', $translator->translate('%d line(s)', 2));
+
+ $this->assertEqual(
+ '1 char on 1 row',
+ $translator->translate('%d char(s) on %d row(s)', 1, 1));
+ $this->assertEqual(
+ '5 chars on 2 rows',
+ $translator->translate('%d char(s) on %d row(s)', 5, 2));
+
+ $this->assertEqual('1 beer(s)', $translator->translate('%d beer(s)', 1));
+ }
+
+ public function testSingleVariant() {
+ $translator = $this->newTranslator('en_US');
+
+ // In this translation, we have no alternatives for the first conversion.
+ $translator->setTranslations(
+ array(
+ 'Run the command %s %d time(s).' => array(
+ array(
+ 'Run the command %s once.',
+ 'Run the command %s %d times.',
+ ),
+ ),
+ ));
+
+ $this->assertEqual(
+ 'Run the command ls 123 times.',
+ (string)$translator->translate(
+ 'Run the command %s %d time(s).',
+ hsprintf('%s', 'ls'),
+ 123));
+ }
+
+ public function testCzech() {
+ $translator = $this->newTranslator('cs_CZ');
+ $translator->setTranslations(
+ array(
+ '%d beer(s)' => array('%d pivo', '%d piva', '%d piv'),
+ ));
+
+ $this->assertEqual('0 piv', $translator->translate('%d beer(s)', 0));
+ $this->assertEqual('1 pivo', $translator->translate('%d beer(s)', 1));
+ $this->assertEqual('2 piva', $translator->translate('%d beer(s)', 2));
+ $this->assertEqual('5 piv', $translator->translate('%d beer(s)', 5));
+
+ $this->assertEqual('1 line(s)', $translator->translate('%d line(s)', 1));
+ }
+
+ public function testPerson() {
+ $translator = $this->newTranslator('cs_CZ');
+ $translator->setTranslations(
+ array(
+ '%s wrote.' => array('%s napsal.', '%s napsala.'),
+ ));
+
+ $person = new PhutilPersonTest();
+ $this->assertEqual(
+ 'Test () napsal.',
+ $translator->translate('%s wrote.', $person));
+
+ $person->setGender(PhutilPerson::GENDER_MASCULINE);
+ $this->assertEqual(
+ 'Test (m) napsal.',
+ $translator->translate('%s wrote.', $person));
+
+ $person->setGender(PhutilPerson::GENDER_FEMININE);
+ $this->assertEqual(
+ 'Test (f) napsala.',
+ $translator->translate('%s wrote.', $person));
+ }
+
+ public function testTranslateDate() {
+ $date = new DateTime('2012-06-21');
+ $translator = $this->newTranslator('en_US');
+
+ $this->assertEqual('June', $translator->translateDate('F', $date));
+ $this->assertEqual('June 21', $translator->translateDate('F d', $date));
+ $this->assertEqual('F', $translator->translateDate('\F', $date));
+
+ $translator->setTranslations(
+ array(
+ 'June' => 'correct',
+ '21' => 'wrong',
+ 'F' => 'wrong',
+ ));
+ $this->assertEqual('correct', $translator->translateDate('F', $date));
+ $this->assertEqual('correct 21', $translator->translateDate('F d', $date));
+ $this->assertEqual('F', $translator->translateDate('\F', $date));
+ }
+
+ public function testSetInstance() {
+ $english_translator = $this->newTranslator('en_US');
+
+ PhutilTranslator::setInstance($english_translator);
+ $original = PhutilTranslator::getInstance();
+ $this->assertEqual('color', pht('color'));
+
+ $british_locale = PhutilLocale::loadLocale('en_GB');
+
+ $british = new PhutilTranslator();
+ $british->setLocale($british_locale);
+ $british->setTranslations(
+ array(
+ 'color' => 'colour',
+ ));
+ PhutilTranslator::setInstance($british);
+ $this->assertEqual('colour', pht('color'));
+
+ PhutilTranslator::setInstance($original);
+ $this->assertEqual('color', pht('color'));
+ }
+
+ public function testFormatNumber() {
+ $translator = $this->newTranslator('en_US');
+
+ $this->assertEqual('1,234', $translator->formatNumber(1234));
+ $this->assertEqual('1,234.5', $translator->formatNumber(1234.5, 1));
+ $this->assertEqual('1,234.5678', $translator->formatNumber(1234.5678, 4));
+
+ $translator->setTranslations(
+ array(
+ ',' => ' ',
+ '.' => ',',
+ ));
+ $this->assertEqual('1 234', $translator->formatNumber(1234));
+ $this->assertEqual('1 234,5', $translator->formatNumber(1234.5, 1));
+ $this->assertEqual('1 234,5678', $translator->formatNumber(1234.5678, 4));
+ }
+
+ public function testNumberTranslations() {
+ $translator = $this->newTranslator('en_US');
+
+ $translator->setTranslations(
+ array(
+ '%s line(s)' => array('%s line', '%s lines'),
+ ));
+
+ $this->assertEqual(
+ '1 line',
+ $translator->translate('%s line(s)', new PhutilNumber(1)));
+
+ $this->assertEqual(
+ '1,000 lines',
+ $translator->translate('%s line(s)', new PhutilNumber(1000)));
+
+ $this->assertEqual(
+ '8.5 lines',
+ $translator->translate(
+ '%s line(s)',
+ id(new PhutilNumber(8.5))->setDecimals(1)));
+ }
+
+ public function testValidateTranslation() {
+ $tests = array(
+ 'a < 2' => array(
+ 'a < 2' => true,
+ 'b < 3' => true,
+ '2 > a' => false,
+ 'a<2' => false,
+ ),
+ 'We win' => array(
+ 'We win' => true,
+ 'We win' => true, // false positive
+ 'We win' => false,
+ 'We win' => false,
+ ),
+ 'We win & triumph' => array(
+ 'We triumph & win' => true,
+ 'We win and triumph' => false,
+ ),
+ 'beer' => array(
+ 'pivo' => true,
+ 'b<>r' => false,
+ 'b&&r' => false,
+ ),
+ );
+
+ $translator = $this->newTranslator('en_US');
+
+ foreach ($tests as $original => $translations) {
+ foreach ($translations as $translation => $expect) {
+ $valid = ($expect ? 'valid' : 'invalid');
+ $this->assertEqual(
+ $expect,
+ $translator->validateTranslation($original, $translation),
+ pht(
+ "'%s' should be %s with '%s'.",
+ $original,
+ $valid,
+ $translation));
+ }
+ }
+ }
+
+ public function testHTMLTranslations() {
+ $string = '%s awoke suddenly at %s.';
+ $when = '<4 AM>';
+
+ $translator = $this->newTranslator('en_US');
+
+ // When no components are HTML, everything is treated as a string.
+ $who = 'Abraham';
+ $translation = $translator->translate(
+ $string,
+ $who,
+ $when);
+ $this->assertEqual(
+ 'string',
+ gettype($translation));
+ $this->assertEqual(
+ 'Abraham awoke suddenly at <4 AM>.',
+ $translation);
+
+ // When at least one component is HTML, everything is treated as HTML.
+ $who = phutil_tag('span', array(), 'Abraham');
+ $translation = $translator->translate(
+ $string,
+ $who,
+ $when);
+ $this->assertTrue($translation instanceof PhutilSafeHTML);
+ $this->assertEqual(
+ 'Abraham awoke suddenly at <4 AM>.',
+ $translation->getHTMLContent());
+
+ $translation = $translator->translate(
+ $string,
+ $who,
+ new PhutilNumber(1383930802));
+ $this->assertEqual(
+ 'Abraham awoke suddenly at 1,383,930,802.',
+ $translation->getHTMLContent());
+ }
+
+ private function newTranslator($locale_code) {
+ $locale = PhutilLocale::loadLocale($locale_code);
+ return id(new PhutilTranslator())
+ ->setLocale($locale);
+ }
+
+}
diff --git a/src/internationalization/locales/PhutilAllCapsEnglishLocale.php b/src/internationalization/locales/PhutilAllCapsEnglishLocale.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/locales/PhutilAllCapsEnglishLocale.php
@@ -0,0 +1,38 @@
+= 2 && $variant <= 4) {
+ return $paucal;
+ }
+
+ return $plural;
+ }
+
+}
diff --git a/src/internationalization/locales/PhutilEmojiLocale.php b/src/internationalization/locales/PhutilEmojiLocale.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/locales/PhutilEmojiLocale.php
@@ -0,0 +1,33 @@
+setTranslations()` and language rules set
+ * by `PhutilTranslator::getInstance()->setLocale()`.
+ *
+ * @param string Translation identifier with `sprintf()` placeholders.
+ * @param mixed Value to select the variant from (e.g. singular or plural).
+ * @param ... Next values referenced from $text.
+ * @return string Translated string with substituted values.
+ */
+function pht($text, $variant = null /* , ... */) {
+ $args = func_get_args();
+ $translator = PhutilTranslator::getInstance();
+ return call_user_func_array(array($translator, 'translate'), $args);
+}
+
+/**
+ * Count all elements in an array, or something in an object.
+ *
+ * @param array|Countable A countable object.
+ * @return PhutilNumber Returns the number of elements in the input
+ * parameter.
+ */
+function phutil_count($countable) {
+ if (!(is_array($countable) || $countable instanceof Countable)) {
+ throw new InvalidArgumentException(pht('Argument should be countable.'));
+ }
+
+ return new PhutilNumber(count($countable));
+}
+
+/**
+ * Provide a gendered argument to the translation engine.
+ *
+ * This function does nothing and only serves as a marker for the static
+ * extractor so it knows particular arguments may vary on gender.
+ *
+ * @param PhutilPerson Something implementing @{interface:PhutilPerson}.
+ * @return PhutilPerson The argument, unmodified.
+ */
+function phutil_person(PhutilPerson $person) {
+ return $person;
+}
diff --git a/src/internationalization/translation/LibphutilUSEnglishTranslation.php b/src/internationalization/translation/LibphutilUSEnglishTranslation.php
new file mode 100644
--- /dev/null
+++ b/src/internationalization/translation/LibphutilUSEnglishTranslation.php
@@ -0,0 +1,34 @@
+ array(
+ 'Daemon was idle for more than one second, scaling pool down.',
+ 'Daemon was idle for more than %s seconds, scaling pool down.',
+ ),
+
+ 'Analyzing %s file(s) with %s subprocess(es)...' => array(
+ array(
+ 'Analyzing one file with a single subprocess...',
+ 'Analyzing %s file with %s subprocesses...',
+ ),
+ array(
+ 'Analyzing %s files with a single subprocess...',
+ 'Analyzing %s files with %s subprocesses...',
+ ),
+ ),
+
+ '... (%s more byte(s)) ...' => array(
+ '... (%s more byte) ...',
+ '... (%s more bytes) ...',
+ ),
+ );
+ }
+
+}
diff --git a/src/ip/PhutilCIDRBlock.php b/src/ip/PhutilCIDRBlock.php
new file mode 100644
--- /dev/null
+++ b/src/ip/PhutilCIDRBlock.php
@@ -0,0 +1,84 @@
+
+ }
+
+ public static function newBlock($in) {
+ if ($in instanceof PhutilCIDRBlock) {
+ return clone $in;
+ }
+
+ return self::newFromString($in);
+ }
+
+ private static function newFromString($str) {
+ if (!preg_match('(^[\d.:a-fA-F]+/[\d]+\z)', $str)) {
+ throw new Exception(
+ pht(
+ 'CIDR block "%s" is not formatted correctly. Expected an IP block '.
+ 'in CIDR notation, like "%s" or "%s".',
+ $str,
+ '172.30.0.0/16',
+ '23:45:67:89::/24'));
+ }
+
+ list($ip, $mask) = explode('/', $str);
+
+ $ip = PhutilIPAddress::newAddress($ip);
+
+ if (preg_match('/^0\d/', $mask)) {
+ throw new Exception(
+ pht(
+ 'CIDR block "%s" is not formatted correctly. The IP block mask '.
+ '("%s") must not have leading zeroes.',
+ $str,
+ $mask));
+ }
+
+ $max_bits = $ip->getBitCount();
+
+ $bits = (int)$mask;
+ if ($bits < 0 || $bits > $max_bits) {
+ throw new Exception(
+ pht(
+ 'CIDR block "%s" is not formatted correctly. The IP block mask '.
+ '("%s") must mask between 0 and %s bits, inclusive.',
+ $str,
+ $mask,
+ new PhutilNumber($max_bits)));
+ }
+
+ $obj = new PhutilCIDRBlock();
+ $obj->ip = $ip;
+ $obj->bits = $bits;
+
+ return $obj;
+ }
+
+ public function containsAddress($address) {
+ $address = PhutilIPAddress::newAddress($address);
+
+ $block_bits = $this->ip->toBits();
+ $address_bits = $address->toBits();
+
+ // If the two addresses have different bit widths (IPv4 vs IPv6), this
+ // CIDR block does not match the address.
+ if ($this->ip->getBitCount() != $address->getBitCount()) {
+ return false;
+ }
+
+ return (strncmp($block_bits, $address_bits, $this->bits) === 0);
+ }
+
+}
diff --git a/src/ip/PhutilCIDRList.php b/src/ip/PhutilCIDRList.php
new file mode 100644
--- /dev/null
+++ b/src/ip/PhutilCIDRList.php
@@ -0,0 +1,40 @@
+containsAddrsss('172.30.0.1');
+ */
+final class PhutilCIDRList extends Phobject {
+
+ private $blocks;
+
+ private function __construct() {
+ //
+ }
+
+ public static function newList(array $blocks) {
+ foreach ($blocks as $key => $block) {
+ $blocks[$key] = PhutilCIDRBlock::newBlock($block);
+ }
+
+ $obj = new PhutilCIDRList();
+ $obj->blocks = $blocks;
+ return $obj;
+ }
+
+ public function containsAddress($address) {
+ foreach ($this->blocks as $block) {
+ if ($block->containsAddress($address)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+}
diff --git a/src/ip/PhutilIPAddress.php b/src/ip/PhutilIPAddress.php
new file mode 100644
--- /dev/null
+++ b/src/ip/PhutilIPAddress.php
@@ -0,0 +1,43 @@
+
+ }
+
+ abstract public function toBits();
+ abstract public function getBitCount();
+ abstract public function getAddress();
+
+ public static function newAddress($in) {
+ if ($in instanceof PhutilIPAddress) {
+ return clone $in;
+ }
+
+ try {
+ return PhutilIPv4Address::newFromString($in);
+ } catch (Exception $ex) {
+ // Continue, trying the address as IPv6 instead.
+ }
+
+ try {
+ return PhutilIPv6Address::newFromString($in);
+ } catch (Exception $ex) {
+ // Continue, throwing a more tailored exception below.
+ }
+
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted. Expected an IPv4 address '.
+ 'like "%s", or an IPv6 address like "%s".',
+ $in,
+ '23.45.67.89',
+ '2345:6789:0123:abcd::'));
+ }
+
+}
diff --git a/src/ip/PhutilIPv4Address.php b/src/ip/PhutilIPv4Address.php
new file mode 100644
--- /dev/null
+++ b/src/ip/PhutilIPv4Address.php
@@ -0,0 +1,86 @@
+
+ }
+
+ public function getAddress() {
+ return $this->ip;
+ }
+
+ public function getBitCount() {
+ return 32;
+ }
+
+ protected static function newFromString($str) {
+ $matches = null;
+ $ok = preg_match('(^(\d+)\.(\d+)\.(\d+).(\d+)\z)', $str, $matches);
+ if (!$ok) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted. Expected an IPv4 '.
+ 'address like "%s".',
+ $str,
+ '23.45.67.89'));
+ }
+
+ $parts = array_slice($matches, 1);
+ foreach ($parts as $part) {
+ if (preg_match('/^0\d/', $part)) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted. Address segments '.
+ 'should have no leading zeroes, but segment "%s" has a leading '.
+ 'zero.',
+ $str,
+ $part));
+ }
+
+ $value = (int)$part;
+ if ($value < 0 || $value > 255) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted. Address segments '.
+ 'should be between 0 and 255, inclusive, but segment "%s" has '.
+ 'a value outside of this range.',
+ $str,
+ $part));
+ }
+ }
+
+ $obj = new self();
+ $obj->ip = $str;
+
+ return $obj;
+ }
+
+ public function toBits() {
+ if ($this->bits === null) {
+ $bits = '';
+ foreach (explode('.', $this->ip) as $part) {
+ $value = (int)$part;
+ for ($ii = 7; $ii >= 0; $ii--) {
+ $mask = (1 << $ii);
+ if (($value & $mask) === $mask) {
+ $bits .= '1';
+ } else {
+ $bits .= '0';
+ }
+ }
+ }
+
+ $this->bits = $bits;
+ }
+
+ return $this->bits;
+ }
+
+}
diff --git a/src/ip/PhutilIPv6Address.php b/src/ip/PhutilIPv6Address.php
new file mode 100644
--- /dev/null
+++ b/src/ip/PhutilIPv6Address.php
@@ -0,0 +1,212 @@
+
+ }
+
+ public function getBitCount() {
+ return 128;
+ }
+
+ protected static function newFromString($str) {
+ $parts = explode(':', $str);
+ if (count($parts) > 8) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: is has too many '.
+ 'parts. Expected a maximum of 7 colons, like "%s".',
+ $str,
+ '1:2:3:4:a:b:c:d'));
+ }
+
+ if (count($parts) < 3) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formated: it has too few '.
+ 'parts. Expected a minimum of 2 colons, like "%s".',
+ $str,
+ '::1'));
+ }
+
+ // Look for leading or trailing empty parts. These are valid if the string
+ // begins or ends like "::", "::1", or "1::", but not valid otherwise.
+ $has_omission = false;
+ if ($str === '::') {
+ $parts = array(null);
+ $has_omission = true;
+ } else if ($parts[0] === '') {
+ if ($parts[1] === '') {
+ unset($parts[1]);
+ $parts[0] = null;
+ $parts = array_values($parts);
+ $has_omission = true;
+ } else {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: an address with '.
+ 'omitted leading sements must begin with "::".',
+ $str));
+ }
+ } else if (last($parts) === '') {
+ if ($parts[count($parts) - 2] === '') {
+ array_pop($parts);
+ $parts[count($parts) - 1] = null;
+ $parts = array_values($parts);
+ $has_omission = true;
+ } else {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: an address with '.
+ 'omitted trailing segments must end with "::".',
+ $str));
+ }
+ }
+
+ foreach ($parts as $idx => $part) {
+ if ($part !== '') {
+ continue;
+ }
+
+ if ($has_omission) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: an address may '.
+ 'only contain a maximum of one subsequence omitted with "::".',
+ $str));
+ }
+
+ $has_omission = true;
+ $parts[$idx] = null;
+ }
+
+ if (!$has_omission) {
+ if (count($parts) !== 8) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: an address must '.
+ 'contain exactly 8 segments, or omit a subsequence of segments '.
+ 'with "::".',
+ $str));
+ }
+ }
+
+ $values = array();
+ foreach ($parts as $idx => $part) {
+ // This is a "::" segment, so fill in any missing values with 0.
+ if ($part === null) {
+ for ($ii = count($parts); $ii <= 8; $ii++) {
+ $values[] = 0;
+ }
+ continue;
+ }
+
+ if (!preg_match('/^[0-9a-fA-F]{1,4}\z/', $part)) {
+ throw new Exception(
+ pht(
+ 'IP address "%s" is not properly formatted: the segments of '.
+ 'an address must be hexadecimal values between "0000" and "ffff", '.
+ 'inclusive. Segment "%s" is not.',
+ $str,
+ $part));
+ }
+
+ $values[] = (int)hexdec($part);
+ }
+
+ $obj = new self();
+ $obj->values = $values;
+
+ return $obj;
+ }
+
+ public function getAddress() {
+ if ($this->displayAddress === null) {
+ // Find the longest consecutive sequence of "0" values. We want to
+ // collapse this into "::".
+ $longest_run = 0;
+ $longest_index = 0;
+ $current_run = null;
+ $current_index = null;
+ foreach ($this->values as $idx => $value) {
+ if ($value !== 0) {
+ $current_run = null;
+ continue;
+ }
+
+ if ($current_run === null) {
+ $current_run = 1;
+ $current_index = $idx;
+ } else {
+ $current_run++;
+ }
+
+ if ($current_run > $longest_run) {
+ $longest_run = $current_run;
+ $longest_index = $current_index;
+ }
+ }
+
+ // Render the segments of the IPv6 address, omitting the longest run
+ // of consecutive "0" segments.
+ $pieces = array();
+ for ($idx = 0; $idx < count($this->values); $idx++) {
+ $value = $this->values[$idx];
+
+ if ($idx === $longest_index) {
+ if ($longest_run > 1) {
+ $pieces[] = null;
+ $idx += ($longest_run - 1);
+ continue;
+ }
+ }
+
+ $pieces[] = dechex($value);
+ }
+
+ // If the omitted segment is at the beginning or end of the address, add
+ // an extra piece so we get the leading or trailing "::" when we implode
+ // the pieces.
+ if (head($pieces) === null) {
+ array_unshift($pieces, null);
+ }
+
+ if (last($pieces) === null) {
+ $pieces[] = null;
+ }
+
+ $this->displayAddress = implode(':', $pieces);
+ }
+
+ return $this->displayAddress;
+ }
+
+ public function toBits() {
+ if ($this->bits === null) {
+ $bits = '';
+ foreach ($this->values as $value) {
+ for ($ii = 15; $ii >= 0; $ii--) {
+ $mask = (1 << $ii);
+ if (($value & $mask) === $mask) {
+ $bits .= '1';
+ } else {
+ $bits .= '0';
+ }
+ }
+ }
+
+ $this->bits = $bits;
+ }
+
+ return $this->bits;
+ }
+
+}
diff --git a/src/ip/__tests__/PhutilIPAddressTestCase.php b/src/ip/__tests__/PhutilIPAddressTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/ip/__tests__/PhutilIPAddressTestCase.php
@@ -0,0 +1,339 @@
+ true,
+
+ // No nonsense.
+ '1.2.3' => false,
+ 'duck' => false,
+ '' => false,
+ '1 2 3 4' => false,
+ '.' => false,
+ '1.2.3.4.' => false,
+ '1..3.4' => false,
+
+ // No leading zeroes.
+ '0.0.0.0' => true,
+ '0.0.0.01' => false,
+
+ // No segments > 255.
+ '255.255.255.255' => true,
+ '255.255.255.256' => false,
+ );
+
+ foreach ($cases as $input => $expect) {
+ $caught = null;
+ try {
+ PhutilIPAddress::newAddress($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual(
+ $expect,
+ !($caught instanceof Exception),
+ 'PhutilIPv4Address['.$input.']');
+ }
+ }
+
+ public function testValidIPv6Addresses() {
+ $cases = array(
+ '::' => true,
+ '::1' => true,
+ '1::' => true,
+ '1::1' => true,
+ '1:2:3:4:5:6:7:8' => true,
+ '1:2:3::5:6:7:8' => true,
+ '1:2:3::6:7:8' => true,
+
+ // No nonsense.
+ 'quack:duck' => false,
+ '11111:22222::' => false,
+
+
+ // Too long.
+ '1:2:3:4:5:6:7:8:9' => false,
+
+ // Too short.
+ '1:2:3' => false,
+
+ // Too many omitted segments.
+ '1:2:3:::7:8:9' => false,
+ '1::3::7:8:9' => false,
+ );
+
+ foreach ($cases as $input => $expect) {
+ $caught = null;
+ try {
+ PhutilIPAddress::newAddress($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual(
+ $expect,
+ !($caught instanceof Exception),
+ 'PhutilIPv6Address['.$input.']');
+ }
+ }
+
+ public function testIPv4AddressToBits() {
+ $cases = array(
+ '0.0.0.0' => '00000000000000000000000000000000',
+ '255.255.255.255' => '11111111111111111111111111111111',
+ '255.0.0.0' => '11111111000000000000000000000000',
+ '0.0.0.1' => '00000000000000000000000000000001',
+ '0.0.0.2' => '00000000000000000000000000000010',
+ '0.0.0.3' => '00000000000000000000000000000011',
+ );
+
+ foreach ($cases as $input => $expect) {
+ $actual = PhutilIPAddress::newAddress($input)->toBits();
+ $this->assertEqual(
+ $expect,
+ $actual,
+ 'PhutilIPv4Address['.$input.']->toBits()');
+ }
+ }
+
+ public function testIPv6AddressToBits() {
+ $cases = array(
+ '::' =>
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000',
+ '::1' =>
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000001',
+ '1::' =>
+ '0000000000000001 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000',
+ '::ffff:c000:0280' =>
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 1111111111111111'.
+ PhutilIPAddress::newAddress('192.0.2.128')->toBits(),
+ '21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A' =>
+ '0010000111011010 0000000011010011'.
+ '0000000000000000 0010111100111011'.
+ '0000001010101010 0000000011111111'.
+ '1111111000101000 1001110001011010',
+ '2001:db8::1' =>
+ '0010000000000001 0000110110111000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000000'.
+ '0000000000000000 0000000000000001',
+
+ );
+
+ foreach ($cases as $input => $expect) {
+ // Remove any spaces, these are just to make the tests above easier to
+ // read.
+ $expect = str_replace(' ', '', $expect);
+
+ $actual = PhutilIPAddress::newAddress($input)->toBits();
+ $this->assertEqual(
+ $expect,
+ $actual,
+ 'PhutilIPv6Address['.$input.']->toBits()');
+ }
+ }
+
+ public function testIPv6AddressToAddress() {
+ $cases = array(
+ '::' => '::',
+ '::1' => '::1',
+ '::01' => '::1',
+ '0::0001' => '::1',
+ '0000::0001' => '::1',
+ '0000:0000::001' => '::1',
+
+ '1::' => '1::',
+ '01::' => '1::',
+ '01::0' => '1::',
+ '0001::0000' => '1::',
+
+ '1:0::0:2' => '1::2',
+ '1::0:2' => '1::2',
+ '1:0::2' => '1::2',
+
+ 'CAFE::' => 'cafe::',
+ '0000:aBe:0:0:1::' => '0:abe:0:0:1::',
+
+ '1:0:0:0:2:0:0:0' => '1::2:0:0:0',
+ '1:0:0:2:0:0:0:0' => '1:0:0:2::',
+ );
+
+ foreach ($cases as $input => $expect) {
+ $actual = PhutilIPAddress::newAddress($input)->getAddress();
+ $this->assertEqual(
+ $expect,
+ $actual,
+ 'PhutilIPv6Address['.$input.']->getAddress()');
+ }
+ }
+
+ public function testValidIPv4CIDRBlocks() {
+ $cases = array(
+ // Valid block.
+ '1.0.0.0/16' => true,
+
+ // No nonsense.
+ 'duck' => false,
+ '1/2/3' => false,
+ '23/0.0.0.0' => false,
+ '0.0.0.0/0.0.0.0' => false,
+
+ // No leading zeroes.
+ '1.0.0.0/4' => true,
+ '1.0.0.0/04' => false,
+
+ // No out-of-range masks.
+ '1.0.0.0/32' => true,
+ '1.0.0.0/33' => false,
+ );
+
+ foreach ($cases as $input => $expect) {
+ $caught = null;
+ try {
+ PhutilCIDRBlock::newBlock($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual(
+ $expect,
+ !($caught instanceof Exception),
+ 'PhutilCIDRBlock['.$input.']');
+ }
+ }
+
+ public function testValidIPv6CIDRBlocks() {
+ $cases = array(
+ // Valid block.
+ '::/16' => true,
+ '::/128' => true,
+
+ // No nonsense.
+ '::/1/2' => false,
+ '::/::' => false,
+ '::' => false,
+
+ // No leading zeroes.
+ '::/01' => false,
+
+ // No out-of-range masks.
+ '::/129' => false,
+ );
+
+ foreach ($cases as $input => $expect) {
+ $caught = null;
+ try {
+ PhutilCIDRBlock::newBlock($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual(
+ $expect,
+ !($caught instanceof Exception),
+ 'PhutilCIDRBlock['.$input.']');
+ }
+ }
+
+ public function testIPv4CIDRBlockContains() {
+ $cases = array(
+ '0.0.0.0/0' => array(
+ '0.0.0.0' => true,
+ '1.1.1.1' => true,
+ '2.3.4.5' => true,
+ '::' => false,
+ '::1' => false,
+ '::ffff:0:0' => false,
+ ),
+ '0.0.0.2/32' => array(
+ '0.0.0.1' => false,
+ '0.0.0.2' => true,
+ '0.0.0.3' => false,
+ '::' => false,
+ ),
+ '172.30.0.0/16' => array(
+ '172.29.255.255' => false,
+ '172.30.0.0' => true,
+ '172.30.255.255' => true,
+ '172.31.0.0' => false,
+ '::' => false,
+ ),
+ );
+
+ foreach ($cases as $input_block => $tests) {
+ $block = PhutilCIDRBlock::newBlock($input_block);
+ foreach ($tests as $input => $expect) {
+ $this->assertEqual(
+ $expect,
+ $block->containsAddress($input),
+ 'PhutilCIDRBlock['.$input_block.']->containsAddress('.$input.')');
+ }
+ }
+ }
+
+ public function testIPv6CIDRBlockContains() {
+ $cases = array(
+ '::/0' => array(
+ '1::' => true,
+ '2::' => true,
+ '127.0.0.1' => false,
+ ),
+ '::ffff:0:0/96' => array(
+ '::ffff:0:0' => true,
+ '::ffff:ffff:ffff' => true,
+ '::fffe:0:0' => false,
+ '127.0.0.1' => false,
+ ),
+ );
+
+ foreach ($cases as $input_block => $tests) {
+ $block = PhutilCIDRBlock::newBlock($input_block);
+ foreach ($tests as $input => $expect) {
+ $this->assertEqual(
+ $expect,
+ $block->containsAddress($input),
+ 'PhutilCIDRBlock['.$input_block.']->containsAddress('.$input.')');
+ }
+ }
+ }
+
+ public function testCIDRList() {
+ $list = array(
+ '172.30.0.0/16',
+ '127.0.0.3/32',
+ );
+
+ $cases = array(
+ '0.0.0.0' => false,
+ '172.30.0.5' => true,
+ '127.0.0.2' => false,
+ '127.0.0.3' => true,
+ );
+
+ $list = PhutilCIDRList::newList($list);
+
+ foreach ($cases as $input => $expect) {
+ $this->assertEqual(
+ $expect,
+ $list->containsAddress($input),
+ 'PhutilCIDRList->containsAddress('.$input.')');
+ }
+ }
+
+
+}
diff --git a/src/lexer/PhutilJSONFragmentLexer.php b/src/lexer/PhutilJSONFragmentLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilJSONFragmentLexer.php
@@ -0,0 +1,27 @@
+ array(
+ array('"', 's', 'double-quoted-string'),
+ array('\s+', null),
+ array('[\\[\\]{},:]', 'o'),
+ array('(?:true|false|null)', 'k'),
+ array('-?\d+(\.\d+([eE][-+]?\d+)?)?', 'mf'),
+ array('.', null),
+ ),
+ 'double-quoted-string' => array(
+ array('[^"\\\\]+', 's'),
+ array('"', 's', '!pop'),
+ array('\\\\u[0-9a-fA-F]{4}', 'k'),
+ array('\\\\.', 'k'),
+ ),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilJavaFragmentLexer.php b/src/lexer/PhutilJavaFragmentLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilJavaFragmentLexer.php
@@ -0,0 +1,120 @@
+ array_merge(
+ $nonsemantic_rules,
+ array(
+ array('('.implode('|', $keywords).')\\b', 'k'),
+ array('@[^\\W\\d][\\w.]*', 'nd'),
+ array('('.implode('|', $declarations).')\\b', 'k'),
+ array('('.implode('|', $types).')\\b', 't'),
+ array('(package|import\\s+static|import)\\b', 'kn', 'import'),
+ array('('.implode('|', $constants).')\\b', 'kc'),
+ array('(class|interface)\\b', 'kd', 'class'),
+ array('"(\\\\\\\\|\\\\"|[^"\\\\]+)*"', 's'),
+ array("'(\\\\.|[^\\\\]|\\\\u[0-9a-f-A-F]{4})'", 's'),
+ array('([^\\W\\d]|\\$)[\\w$]*:', 'nl'),
+ array('([^\\W\\d]|\\$)[\\w$]*', 'n'),
+ array(
+ '(([0-9][0-9_]*\\.([0-9][0-9_]*)?|'.
+ '\\.[0-9][0-9_]*)([eE][+-]?[0-9][0-9_]*)?[fFdD]?|'.
+ '[0-9][eE][+-]?[0-9][0-9_]*[fFdD]?|'.
+ '[0-9]([eE][+-]?[0-9][0-9_]*)?[fFdD]|'.
+ '0[xX]([0-9a-fA-F][0-9a-fA-F_]*\\.?|'.
+ '([0-9a-fA-F][0-9a-fA-F_]*)?\\.[0-9a-fA-F][0-9a-fA-F_]*)'.
+ '[pP][+-]?[0-9][0-9_]*[fFdD]?)',
+ 'mf',
+ ),
+ array('0[xX][0-9a-fA-F][0-9a-fA-F_]*[lL]?', 'mh'),
+ array('0[bB][01][01_]*[lL]?', 'mb'),
+ array('0[0-7_]+[lL]?', 'mo'),
+ array('(0|[1-9][0-9_]*[lL]?)', 'mi'),
+ array('([~^*!%&\\[\\](){}<>|+=:;,./?-])', 'o'),
+ array('(\S+|\s+)', null),
+ )),
+ 'class' => array_merge(
+ $nonsemantic_rules,
+ array(
+ array('([^\W\d]|\$)[\w$]*', 'nc', '!pop'),
+ array('', null, '!pop'),
+ )),
+ 'import' => array_merge(
+ $nonsemantic_rules,
+ array(
+ array('[\w.]+\*?', 'nn', '!pop'),
+ array('', null, '!pop'),
+ )),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilLexer.php b/src/lexer/PhutilLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilLexer.php
@@ -0,0 +1,362 @@
+ array(...),
+ * 'state1' => array(...),
+ * 'state2' => array(...),
+ * )
+ *
+ * Lexers start at the state named 'start'. Each state should have a list of
+ * rules which can match in that state. A list of rules looks like this:
+ *
+ * array(
+ * array('\s+', 'space'),
+ * array('\d+', 'digit'),
+ * array('\w+', 'word'),
+ * )
+ *
+ * The lexer operates by processing each rule in the current state in order.
+ * When one matches, it produces a token. For example, the lexer above would
+ * lex this text:
+ *
+ * 3 asdf
+ *
+ * ...to produce these tokens (assuming the rules are for the 'start' state):
+ *
+ * array('digit', '3', null),
+ * array('space', ' ', null),
+ * array('word', 'asdf', null),
+ *
+ * A rule can also cause a state transition:
+ *
+ * array('zebra', 'animal', 'saw_zebra'),
+ *
+ * This would match the text "zebra", emit a token of type "animal", and change
+ * the parser state to "saw_zebra", causing the lexer to start using the rules
+ * from that state.
+ *
+ * To pop the lexer's state, you can use the special state '!pop'.
+ *
+ * Finally, you can provide additional options in the fourth parameter.
+ * Supported options are `case-insensitive` and `context`.
+ *
+ * Possible values for `context` are `push` (push the token value onto the
+ * context stack), `pop` (pop the context stack and use it to provide context
+ * for the token), and `discard` (pop the context stack and throw away the
+ * value).
+ *
+ * For example, to lex text like this:
+ *
+ * Class::CONSTANT
+ *
+ * You can use a rule set like this:
+ *
+ * 'start' => array(
+ * array('\w+(?=::)', 'class', 'saw_class', array('context' => 'push')),
+ * ),
+ * 'saw_class' => array(
+ * array('::', 'operator'),
+ * array('\w+', 'constant, '!pop', array('context' => 'pop')),
+ * ),
+ *
+ * This would parse the above text into this token stream:
+ *
+ * array('class', 'Class', null),
+ * array('operator', '::', null),
+ * array('constant', 'CONSTANT', 'Class'),
+ *
+ * For a concrete implementation, see @{class:PhutilPHPFragmentLexer}.
+ *
+ * @task lexerimpl Lexer Implementation
+ * @task rule Lexer Rules
+ * @task tokens Lexer Tokens
+ */
+abstract class PhutilLexer extends Phobject {
+
+ private $processedRules;
+ private $lastState;
+
+
+/* -( Lexer Rules )-------------------------------------------------------- */
+
+
+ /**
+ * Return a set of rules for this lexer. See description in
+ * @{class:PhutilLexer}.
+ *
+ * @return dict Lexer rules.
+ * @task lexerimpl
+ */
+ abstract protected function getRawRules();
+
+
+/* -( Lexer Rules )-------------------------------------------------------- */
+
+
+ /**
+ * Process, normalize, and validate the raw lexer rules.
+ *
+ * @task rule
+ */
+ protected function getRules() {
+ $class = get_class($this);
+
+ $raw_rules = $this->getRawRules();
+
+ if (!is_array($raw_rules)) {
+ $type = gettype($raw_rules);
+ throw new UnexpectedValueException(
+ pht(
+ 'Expected %s to return array, got %s.',
+ $class.'->getRawRules()',
+ $type));
+ }
+
+ if (empty($raw_rules['start'])) {
+ throw new UnexpectedValueException(
+ pht(
+ "Expected %s rules to define rules for state '%s'.",
+ $class,
+ 'start'));
+ }
+
+ $processed_rules = array();
+ foreach ($raw_rules as $state => $rules) {
+
+ if (!is_array($rules)) {
+ $type = gettype($rules);
+ throw new UnexpectedValueException(
+ pht(
+ "Expected list of rules for state '%s' in %s, got %s.",
+ $state,
+ $class,
+ $type));
+ }
+
+ foreach ($rules as $key => $rule) {
+ $n = count($rule);
+ if ($n < 2 || $n > 4) {
+ throw new UnexpectedValueException(
+ pht(
+ "Expected rule '%s' in state '%s' in %s to have 2-4 elements ".
+ "(regex, token, [next state], [options]), got %d.",
+ $key,
+ $state,
+ $class,
+ $n));
+ }
+ $rule = array_values($rule);
+ if (count($rule) == 2) {
+ $rule[] = null;
+ }
+ if (count($rule) == 3) {
+ $rule[] = array();
+ }
+
+ foreach ($rule[3] as $option => $value) {
+ switch ($option) {
+ case 'context':
+ if ($value !== 'push' &&
+ $value !== 'pop' &&
+ $value !== 'discard' &&
+ $value !== null) {
+ throw new UnexpectedValueException(
+ pht(
+ "Rule '%s' in state '%s' in %s has unknown ".
+ "context rule '%s', expected '%s', '%s' or '%s'.",
+ $key,
+ $state,
+ $class,
+ $value,
+ 'push',
+ 'pop',
+ 'discard'));
+ }
+ break;
+ default:
+ throw new UnexpectedValueException(
+ pht(
+ "Rule '%s' in state '%s' in %s has unknown option '%s'.",
+ $key,
+ $state,
+ $class,
+ $option));
+ }
+ }
+
+ $flags = 'sS';
+
+ // NOTE: The "\G" assertion is an offset-aware version of "^".
+ $rule[0] = '(\\G'.$rule[0].')'.$flags;
+
+ if (@preg_match($rule[0], '') === false) {
+ $error = error_get_last();
+ throw new UnexpectedValueException(
+ pht(
+ "Rule '%s' in state '%s' in %s defines an ".
+ "invalid regular expression ('%s'): %s",
+ $key,
+ $state,
+ $class,
+ $rule[0],
+ idx($error, 'message')));
+ }
+
+ $next_state = $rule[2];
+ if ($next_state !== null && $next_state !== '!pop') {
+ if (empty($raw_rules[$next_state])) {
+ throw new UnexpectedValueException(
+ pht(
+ "Rule '%s' in state '%s' in %s transitions to ".
+ "state '%s', but there are no rules for that state.",
+ $key,
+ $state,
+ $class,
+ $next_state));
+ }
+ }
+
+ $processed_rules[$state][] = $rule;
+ }
+ }
+
+ return $processed_rules;
+ }
+
+
+/* -( Lexer Tokens )------------------------------------------------------- */
+
+
+ /**
+ * Lex an input string into tokens.
+ *
+ * @param string Input string.
+ * @param string Initial lexer state.
+ * @return list List of lexer tokens.
+ * @task tokens
+ */
+ public function getTokens($input, $initial_state = 'start') {
+ if (empty($this->processedRules)) {
+ $this->processedRules = $this->getRules();
+ }
+ $rules = $this->processedRules;
+
+ $this->lastState = null;
+
+ $position = 0;
+ $length = strlen($input);
+
+ $tokens = array();
+ $states = array();
+ $states[] = 'start';
+ if ($initial_state != 'start') {
+ $states[] = $initial_state;
+ }
+ $context = array();
+ while ($position < $length) {
+ $state_rules = idx($rules, end($states), array());
+ foreach ($state_rules as $rule) {
+
+ $matches = null;
+ if (!preg_match($rule[0], $input, $matches, 0, $position)) {
+ continue;
+ }
+
+ list($regexp, $token_type, $next_state, $options) = $rule;
+
+ $match_length = strlen($matches[0]);
+ if (!$match_length) {
+ if ($next_state === null) {
+ throw new UnexpectedValueException(
+ pht(
+ "Rule '%s' matched a zero-length token and causes no ".
+ "state transition.",
+ $regexp));
+ }
+ } else {
+ $position += $match_length;
+ $token = array($token_type, $matches[0]);
+
+ $copt = idx($options, 'context');
+ if ($copt == 'push') {
+ $context[] = $matches[0];
+ $token[] = null;
+ } else if ($copt == 'pop') {
+ if (empty($context)) {
+ throw new UnexpectedValueException(
+ pht("Rule '%s' popped empty context!", $regexp));
+ }
+ $token[] = array_pop($context);
+ } else if ($copt == 'discard') {
+ if (empty($context)) {
+ throw new UnexpectedValueException(
+ pht("Rule '%s' discarded empty context!", $regexp));
+ }
+ array_pop($context);
+ $token[] = null;
+ } else {
+ $token[] = null;
+ }
+
+ $tokens[] = $token;
+ }
+
+ if ($next_state !== null) {
+ if ($next_state == '!pop') {
+ array_pop($states);
+ if (empty($states)) {
+ throw new UnexpectedValueException(
+ pht("Rule '%s' popped off the last state.", $regexp));
+ }
+ } else {
+ $states[] = $next_state;
+ }
+ }
+
+ continue 2;
+ }
+
+ throw new UnexpectedValueException(
+ pht('No lexer rule matched input at char %d.', $position));
+ }
+
+ $this->lastState = $states;
+
+ return $tokens;
+ }
+
+
+ /**
+ * Merge adjacent tokens of the same type. For example, if a comment is
+ * tokenized as <"//", "comment">, this method will merge the two tokens into
+ * a single combined token.
+ */
+ public function mergeTokens(array $tokens) {
+ $last = null;
+ $result = array();
+ foreach ($tokens as $token) {
+ if ($last === null) {
+ $last = $token;
+ continue;
+ }
+ if (($token[0] == $last[0]) && ($token[2] == $last[2])) {
+ $last[1] .= $token[1];
+ } else {
+ $result[] = $last;
+ $last = $token;
+ }
+ }
+ if ($last !== null) {
+ $result[] = $last;
+ }
+ return $result;
+ }
+
+ public function getLexerState() {
+ return $this->lastState;
+ }
+
+}
diff --git a/src/lexer/PhutilPHPFragmentLexer.php b/src/lexer/PhutilPHPFragmentLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilPHPFragmentLexer.php
@@ -0,0 +1,281 @@
+ array(
+ array('<\\?(?i:php)?', 'cp', 'php'),
+ array('[^<]+', null),
+ array('<', null),
+ ),
+
+ 'php' => array_merge(array(
+ array('\\?>', 'cp', '!pop'),
+ array(
+ '<<<([\'"]?)('.$identifier_pattern.')\\1\\n.*?\\n\\2\\;?\\n',
+ 's',
+ ),
+ ), $nonsemantic_rules, array(
+ array('(?i:__halt_compiler)\\b', 'cp', 'halt_compiler'),
+ array('(->|::)', 'o', 'attr'),
+ array('[~!%^&*+=|:.<>/?@-]+', 'o'),
+ array('[\\[\\]{}();,]', 'o'),
+
+ // After 'new', try to match an unadorned symbol.
+ array('(?i:new|instanceof)\\b', 'k', 'possible_classname'),
+ array('(?i:function)\\b', 'k', 'function_definition'),
+
+ // After 'extends' or 'implements', match a list of classes/interfaces.
+ array('(?i:extends|implements)\\b', 'k', 'class_list'),
+
+ array('(?i:catch)\\b', 'k', 'catch'),
+
+ array('(?i:'.implode('|', $keywords).')\\b', 'k'),
+ array('(?i:'.implode('|', $constants).')\\b', 'kc'),
+
+ array('\\$+'.$identifier_pattern, 'nv'),
+
+ // Match "f(" as a function and "C::" as a class. These won't work
+ // if you put a comment between the symbol and the operator, but
+ // that's a bizarre usage.
+ array($identifier_ns_pattern.'(?=\s*[\\(])', 'nf'),
+ array(
+ $identifier_ns_pattern.'(?=\s*::)',
+ 'nc',
+ 'context_attr',
+ array(
+ 'context' => 'push',
+ ),
+ ),
+
+ array($identifier_ns_pattern, 'no'),
+ array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?', 'mf'),
+ array('\\d+[eE][+-]?[0-9]+', 'mf'),
+ array('0[0-7]+', 'mo'),
+ array('0[xX][a-fA-F0-9]+', 'mh'),
+ array('0[bB][0-1]+', 'm'),
+ array('\d+', 'mi'),
+ array("'", 's1', 'string1'),
+ array('`', 'sb', 'stringb'),
+ array('"', 's2', 'string2'),
+ array('.', null),
+ )),
+
+ // We've just matched a class name, with a "::" lookahead. The name of
+ // the class is on the top of the context stack. We want to try to match
+ // the attribute or method (e.g., "X::C" or "X::f()").
+ 'context_attr' => array_merge($nonsemantic_rules, array(
+ array('::', 'o'),
+ array(
+ $identifier_pattern.'(?=\s*[\\(])',
+ 'nf',
+ '!pop',
+ array(
+ 'context' => 'pop',
+ ),
+ ),
+ array(
+ $identifier_pattern,
+ 'na',
+ '!pop',
+ array(
+ 'context' => 'pop',
+ ),
+ ),
+ array(
+ '',
+ null,
+ '!pop',
+ array(
+ 'context' => 'discard',
+ ),
+ ),
+ )),
+
+ // After '->' or '::', a symbol is an attribute name. Note that we end
+ // up in 'context_attr' instead of here in some cases.
+ 'attr' => array_merge($nonsemantic_rules, array(
+ array($identifier_pattern, 'na', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ // After 'new', a symbol is a class name.
+ 'possible_classname' => array_merge($nonsemantic_rules, array(
+ array($identifier_ns_pattern, 'nc', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ 'string1' => array(
+ array('[^\'\\\\]+', 's1'),
+ array("'", 's1', '!pop'),
+ array('\\\\.', 'k'),
+ array('\\\\$', 'k'),
+ ),
+
+ 'stringb' => array(
+ array('[^`\\\\]+', 'sb'),
+ array('`', 'sb', '!pop'),
+ array('\\\\.', 'k'),
+ array('\\\\$', 'k'),
+ ),
+
+ 'string2' => array(
+ array('[^"\\\\]+', 's2'),
+ array('"', 's2', '!pop'),
+ array('\\\\.', 'k'),
+ array('\\\\$', 'k'),
+ ),
+
+ // In a function definition (after "function"), we don't link the name
+ // as a "nf" (name.function) since it is its own definition.
+ 'function_definition' => array_merge($nonsemantic_rules, array(
+ array('&', 'o'),
+ array('\\(', 'o', '!pop'),
+ array($identifier_pattern, 'no', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ // For "//" and "#" comments, we need to break out if we see "?" followed
+ // by ">".
+ 'line_comment' => array(
+ array('[^?\\n]+', 'c'),
+ array('\\n', null, '!pop'),
+ array('(?=\\?>)', null, '!pop'),
+ array('\\?', 'c'),
+ ),
+
+ // We've seen __halt_compiler. Grab the '();' afterward and then eat
+ // the rest of the file as raw data.
+ 'halt_compiler' => array_merge($nonsemantic_rules, array(
+ array('[()]', 'o'),
+
+ array(';', 'o', 'compiler_halted'),
+ array('\\?>', 'o', 'compiler_halted'),
+
+ // Just halt on anything else.
+ array('', null, 'compiler_halted'),
+ )),
+
+ // __halt_compiler has taken effect.
+ 'compiler_halted' => array(
+ array('.+', null),
+ ),
+
+ 'class_list' => array_merge($nonsemantic_rules, array(
+ array(',', 'o'),
+ array('(?i:implements)', 'k'),
+ array($identifier_ns_pattern, 'nc'),
+ array('', null, '!pop'),
+ )),
+
+ 'catch' => array_merge($nonsemantic_rules, array(
+ array('\\(', 'o'),
+ array($identifier_ns_pattern, 'nc'),
+ array('', null, '!pop'),
+ )),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilPythonFragmentLexer.php b/src/lexer/PhutilPythonFragmentLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilPythonFragmentLexer.php
@@ -0,0 +1,314 @@
+ array_merge(array(
+ array('\\n', null),
+ // TODO: Docstrings should match only at the start of a line
+ array('""".*?"""', 'sd'),
+ array('\'\'\'.*?\'\'\'', 'sd'),
+ ), $nonsemantic_rules, array(
+ array('[]{}:(),;[]', 'p'),
+ array('\\\\\\n', null),
+ array('\\\\', null),
+ array('(?:in|is|and|or|not)\\b', 'ow'),
+ array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'),
+ array('(?:'.implode('|', $keywords).')\\b', 'k'),
+ array('def(?=\\s)', 'k', 'funcname'),
+ array('class(?=\\s)', 'k', 'classname'),
+ array('from(?=\\s)', 'kn', 'fromimport'),
+ array('import(?=\\s)', 'kn', 'import'),
+ array('(? array_merge($nonsemantic_rules, array(
+ array('[a-zA-Z_]\w*', 'nf', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ 'classname' => array_merge($nonsemantic_rules, array(
+ array('[a-zA-Z_]\w*', 'nc', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ 'fromimport' => array_merge($nonsemantic_rules, array(
+ array('import\b', 'kn', '!pop'),
+ // if None occurs here, it's "raise x from None", since None can
+ // never be a module name
+ array('None\b', 'bp', '!pop'),
+ // sadly, in "raise x from y" y will be highlighted as namespace too
+ array('[a-zA-Z_.][\w.]*', 'nn'),
+ array('', null, '!pop'),
+ )),
+
+ 'import' => array_merge($nonsemantic_rules, array(
+ array('as\b', 'kn'),
+ array(',', 'o'),
+ array('[a-zA-Z_.][\w.]*', 'nn'),
+ array('', null, '!pop'),
+ )),
+
+ 'dqs_raw' => $dqs,
+ 'sqs_raw' => $sqs,
+ 'dqs' => array_merge($stringescape, $dqs),
+ 'sqs' => array_merge($stringescape, $sqs),
+ 'tdqs_raw' => $tdqs,
+ 'tsqs_raw' => $tsqs,
+ 'tdqs' => array_merge($stringescape, $tdqs),
+ 'tsqs' => array_merge($stringescape, $tsqs),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilShellLexer.php b/src/lexer/PhutilShellLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilShellLexer.php
@@ -0,0 +1,86 @@
+getTokens($string);
+ if (count($this->getLexerState()) > 1) {
+ throw new UnexpectedValueException(
+ pht('Unterminated string in argument list!'));
+ }
+
+ foreach ($tokens as $key => $token) {
+ switch ($token[0]) {
+ case "'":
+ case '"':
+ unset($tokens[$key]);
+ break;
+ case 'esc':
+ $tokens[$key][0] = 'arg';
+ $tokens[$key][1] = substr($token[1], 1);
+ break;
+ default:
+ break;
+ }
+ }
+
+ $tokens = $this->mergeTokens(array_values($tokens));
+
+ $argv = array();
+ foreach ($tokens as $token) {
+ if ($token[0] == 'arg') {
+ $argv[] = $token[1];
+ }
+ }
+
+ return $argv;
+ }
+
+ protected function getRawRules() {
+ return array(
+ 'start' => array(
+ array('\s+', ' '),
+ array("'", "'", 'string1'),
+ array('"', '"', 'string2'),
+ array('\\\\.', 'esc'),
+ array('[^\\s\'"\\\\]+', 'arg'),
+ ),
+ 'string1' => array(
+ // NOTE: In a single-quoted string, backslash is not an escape.
+ array('[^\']+', 'arg'),
+ array("'", "'", '!pop'),
+ ),
+ 'string2' => array(
+ // NOTE: In a double-quoted string, backslash IS an escape, but only
+ // for some characters: ", $, `, \ and newline.
+ array('[^"\\\\]+', 'arg'),
+ array('"', '"', '!pop'),
+ array('\\\\["$`\\\\\\n]', 'esc'),
+ array('\\\\.', 'arg'),
+ ),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilSimpleOptionsLexer.php b/src/lexer/PhutilSimpleOptionsLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilSimpleOptionsLexer.php
@@ -0,0 +1,90 @@
+getTokens($input);
+
+ foreach ($tokens as $key => $token) {
+ list($type, $value) = $token;
+ switch ($type) {
+ case 'esc':
+ $tokens[$key][0] = 'word';
+ $tokens[$key][1] = substr($value, 1);
+ break;
+ }
+ }
+
+ $tokens = $this->mergeTokens($tokens);
+
+ // Find spaces in between two words and turn them into words. This allows
+ // us to parse unescaped spaces in values correctly.
+ for ($ii = 0; $ii < count($tokens); $ii++) {
+ list($type, $value) = $tokens[$ii];
+ if ($type != ' ') {
+ continue;
+ }
+ $last = idx($tokens, $ii - 1);
+ if (!$last) {
+ continue;
+ }
+ $next = idx($tokens, $ii + 1);
+ if (!$next) {
+ continue;
+ }
+ if ($last[0] == 'word' && $next[0] == 'word') {
+ $tokens[$ii][0] = 'word';
+ }
+ }
+
+ // NOTE: Strip these only after merging tokens, so "a b" merges into two
+ // words, "a" and "b", not a single "ab" word.
+ foreach ($tokens as $key => $token) {
+ list($type, $value) = $token;
+ switch ($type) {
+ case "'":
+ case '"':
+ case ' ':
+ unset($tokens[$key]);
+ break;
+ }
+ }
+
+ return array_values($tokens);
+ }
+
+ protected function getRawRules() {
+ return array(
+ 'start' => array(
+ array('\s+', ' '),
+ array("'", "'", 'string1'),
+ array('"', '"', 'string2'),
+ array(',', ','),
+ array('=', '='),
+ array('[^\\s\'"=,]+', 'word'),
+ ),
+ 'string1' => array(
+ array('[^\'\\\\]+', 'word'),
+ array("'", "'", '!pop'),
+ array('\\\\.', 'esc'),
+ array('\\\\$', '!pop'),
+ ),
+ 'string2' => array(
+ array('[^"\\\\]+', 'word'),
+ array('"', '"', '!pop'),
+ array('\\\\.', 'esc'),
+ array('\\\\$', '!pop'),
+ ),
+ );
+ }
+
+}
diff --git a/src/lexer/PhutilTypeLexer.php b/src/lexer/PhutilTypeLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilTypeLexer.php
@@ -0,0 +1,32 @@
+ array(
+ array('\s+', ' '),
+ array('\\|', '|'),
+ array('<', '<'),
+ array('>', '>'),
+ array(',', ','),
+ array('\\?', '?'),
+ array('optional', 'opt'),
+ array('map', 'map'),
+ array('list', 'list'),
+ array('int|float|bool|string|null|callable|wild|regex', 'k'),
+ array('\\\\?[a-zA-Z_\x7f-\xff]+(\\\\[a-zA-Z_\x7f-\xff]+)*', 'k'),
+ array('\\(', '(', 'comment'),
+ ),
+ 'comment' => array(
+ array('\\)', ')', '!pop'),
+ array('[^\\)]+', 'cm'),
+ ),
+ );
+ }
+
+}
diff --git a/src/lexer/__tests__/PhutilPHPFragmentLexerTestCase.php b/src/lexer/__tests__/PhutilPHPFragmentLexerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/PhutilPHPFragmentLexerTestCase.php
@@ -0,0 +1,317 @@
+runLexer($file, $data);
+ }
+ }
+
+ private function runLexer($file, $data) {
+ $lexer = new PhutilPHPFragmentLexer();
+
+ $initial_state = 'start';
+ switch ($file) {
+ case 'pop-from-php.txt':
+ $initial_state = 'php';
+ break;
+ case 'trailing-backslash-1.txt':
+ case 'trailing-backslash-2.txt':
+ case 'trailing-backslash-b.txt':
+ // It's important these test cases not have trailing newlines.
+ $data = rtrim($data);
+ break;
+ }
+
+ $caught = null;
+ $tokens = null;
+ try {
+ $tokens = $lexer->getTokens($data, $initial_state);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ switch ($file) {
+ case 'basics.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $tokens = $lexer->mergeTokens($tokens);
+ $this->assertEqual(
+ array(
+ array('cp', '', null),
+ array(null, "\n\nd\n", null),
+ ),
+ $tokens,
+ $file);
+ break;
+ case 'extendsimplements.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', 'assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('cp', '?>', null),
+ array(null, "\n", null),
+ ),
+ $tokens,
+ $file);
+ break;
+ case 'trailing-backslash-1.txt':
+ case 'trailing-backslash-2.txt':
+ case 'trailing-backslash-b.txt':
+ $this->assertEqual(null, $caught);
+ break;
+ default:
+ throw new Exception(pht("No assertion block for test '%s'!", $file));
+ }
+ }
+
+
+}
diff --git a/src/lexer/__tests__/PhutilShellLexerTestCase.php b/src/lexer/__tests__/PhutilShellLexerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/PhutilShellLexerTestCase.php
@@ -0,0 +1,208 @@
+runLexer($file, $data);
+ }
+ }
+
+ private function runLexer($file, $data) {
+ $lexer = new PhutilShellLexer();
+
+ $initial_state = 'start';
+
+ $caught = null;
+ $tokens = null;
+ try {
+ $tokens = $lexer->getTokens($data, $initial_state);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $argv = null;
+ try {
+ $argv = $lexer->splitArguments($data);
+ } catch (Exception $ex) {
+ // Ignore; not diagnostically useful.
+ }
+
+ switch ($file) {
+ case 'basic.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('arg', 'arg1', null),
+ array(' ', ' ', null),
+ array('arg', 'arg2', null),
+ array(' ', ' ', null),
+ array('arg', 'arg3', null),
+ ),
+ $tokens,
+ $file);
+ $this->assertEqual(
+ array(
+ 'arg1',
+ 'arg2',
+ 'arg3',
+ ),
+ $argv,
+ $file);
+ break;
+ case 'escape.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array("'", "'", null),
+ array('arg', '\\', null),
+ array("'", "'", null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('esc', '\\"', null),
+ array('"', '"', null),
+ ),
+ $tokens,
+ $file);
+ $this->assertEqual(
+ array(
+ '\\',
+ '"',
+ ),
+ $argv,
+ $file);
+ break;
+ case 'slashes.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('arg', 'a', null),
+ array('esc', '\\ ', null),
+ array('arg', 'b', null),
+ array(' ', ' ', null),
+ array("'", "'", null),
+ array('arg', 'a\\b', null),
+ array("'", "'", null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('arg', 'a', null),
+ array('arg', '\\b', null),
+ array('"', '"', null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('esc', '\\$', null),
+ array('esc', '\\`', null),
+ array('esc', '\\\\', null),
+ array('esc', '\\"', null),
+ array('esc', '\\'."\n", null),
+ array('arg', 'xyz', null),
+ array('"', '"', null),
+ ),
+ $tokens,
+ $file);
+ $this->assertEqual(
+ array(
+ 'a b',
+ 'a\\b',
+ 'a\\b',
+ '$`\\"'."\n".'xyz',
+ ),
+ $argv,
+ $file);
+ break;
+ case 'spaces.txt':
+ $this->assertEqual(
+ array(
+ array('arg', 'arg1', null),
+ array(' ', ' ', null),
+ array('arg', 'arg2', null),
+ array(' ', ' ', null),
+ array('arg', 'arg3', null),
+ ),
+ $tokens,
+ $file);
+ $this->assertEqual(
+ array(
+ 'arg1',
+ 'arg2',
+ 'arg3',
+ ),
+ $argv,
+ $file);
+ break;
+ case 'strings.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ array('arg', 'a', null),
+ array(' ', ' ', null),
+ array("'", "'", null),
+ array('arg', 'b', null),
+ array("'", "'", null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('arg', 'c', null),
+ array('"', '"', null),
+ array(' ', ' ', null),
+ array("'", "'", null),
+ array('arg', 'd', null),
+ array("'", "'", null),
+ array("'", "'", null),
+ array('arg', 'e', null),
+ array("'", "'", null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('arg', 'f', null),
+ array('"', '"', null),
+ array('"', '"', null),
+ array('arg', 'g', null),
+ array('"', '"', null),
+ array(' ', ' ', null),
+ array('"', '"', null),
+ array('arg', 'h', null),
+ array('"', '"', null),
+ array('"', '"', null),
+ array('arg', "'", null),
+ array('"', '"', null),
+ array('"', '"', null),
+ array('arg', 'i', null),
+ array('"', '"', null),
+ ),
+ $tokens,
+ $file);
+ $this->assertEqual(
+ array(
+ 'a',
+ 'b',
+ 'c',
+ 'de',
+ 'fg',
+ 'h\'i',
+ ),
+ $argv,
+ $file);
+ break;
+ case 'unterminated.txt':
+ $this->assertEqual(null, $caught);
+ $this->assertEqual(
+ array(
+ 'start',
+ 'string1',
+ ),
+ $lexer->getLexerState(),
+ $file);
+ $this->assertEqual(
+ null,
+ $argv,
+ $file);
+ break;
+ default:
+ throw new Exception(pht("No assertion block for test '%s'!", $file));
+ }
+ }
+
+
+}
diff --git a/src/lexer/__tests__/PhutilSimpleOptionsLexerTestCase.php b/src/lexer/__tests__/PhutilSimpleOptionsLexerTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/PhutilSimpleOptionsLexerTestCase.php
@@ -0,0 +1,61 @@
+assertEqual(
+ array(
+ array('word', 'legs', null),
+ array('=', '=', null),
+ array('word', '4', null),
+ ),
+ $this->getTokens('legs=4'));
+
+ $this->assertEqual(
+ array(
+ array('word', 'legs', null),
+ array('=', '=', null),
+ array('word', '4', null),
+ array(',', ',', null),
+ array(' ', ' ', null),
+ array('word', 'LEGS', null),
+ array('=', '=', null),
+ array('word', '4', null),
+ ),
+ $this->getTokens('legs=4, LEGS=4'));
+ }
+
+ public function testSimpleOptionsLexerNiceTokens() {
+ $this->assertEqual(
+ array(
+ array('word', 'legs', null),
+ ),
+ $this->getNiceTokens(' legs '));
+
+ $this->assertEqual(
+ array(
+ array('word', 'a', null),
+ array('word', ' ', null),
+ array('word', 'b', null),
+ ),
+ $this->getNiceTokens(' a b '));
+
+ $this->assertEqual(
+ array(
+ array('word', 'a', null),
+ array('word', 'b', null),
+ ),
+ $this->getNiceTokens('"a""b"'));
+ }
+
+ private function getTokens($input) {
+ $lexer = new PhutilSimpleOptionsLexer();
+ return $lexer->getTokens($input);
+ }
+
+ private function getNiceTokens($input) {
+ $lexer = new PhutilSimpleOptionsLexer();
+ return $lexer->getNiceTokens($input);
+ }
+
+}
diff --git a/src/lexer/__tests__/php/basics.txt b/src/lexer/__tests__/php/basics.txt
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/php/basics.txt
@@ -0,0 +1 @@
+
+
+d
diff --git a/src/lexer/__tests__/php/extendsimplements.txt b/src/lexer/__tests__/php/extendsimplements.txt
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/php/extendsimplements.txt
@@ -0,0 +1,3 @@
+
diff --git a/src/lexer/__tests__/php/symbols.txt b/src/lexer/__tests__/php/symbols.txt
new file mode 100644
--- /dev/null
+++ b/src/lexer/__tests__/php/symbols.txt
@@ -0,0 +1,8 @@
+content = (string)$content;
+ }
+
+ public function __toString() {
+ return $this->content;
+ }
+
+ public function getHTMLContent() {
+ return $this->content;
+ }
+
+ public function appendHTML($html /* , ... */) {
+ foreach (func_get_args() as $html) {
+ $this->content .= phutil_escape_html($html);
+ }
+ return $this;
+ }
+
+ public static function applyFunction($function, $string /* , ... */) {
+ $args = func_get_args();
+ array_shift($args);
+ $args = array_map('phutil_escape_html', $args);
+ return new PhutilSafeHTML(call_user_func_array($function, $args));
+ }
+
+// Requires http://pecl.php.net/operator.
+
+ public function __concat($html) {
+ $clone = clone $this;
+ return $clone->appendHTML($html);
+ }
+
+ public function __assign_concat($html) {
+ return $this->appendHTML($html);
+ }
+
+}
diff --git a/src/markup/PhutilSafeHTMLProducerInterface.php b/src/markup/PhutilSafeHTMLProducerInterface.php
new file mode 100644
--- /dev/null
+++ b/src/markup/PhutilSafeHTMLProducerInterface.php
@@ -0,0 +1,12 @@
+assertEqual(
+ (string)phutil_tag('br'),
+ (string)phutil_tag('br', array()));
+
+ $this->assertEqual(
+ (string)phutil_tag('br', array()),
+ (string)phutil_tag('br', array(), null));
+ }
+
+ public function testTagEmpty() {
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('br', array(), null));
+
+ $this->assertEqual(
+ '',
+ (string)phutil_tag('div', array(), null));
+
+ $this->assertEqual(
+ '',
+ (string)phutil_tag('div', array(), ''));
+ }
+
+ public function testTagBasics() {
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('br'));
+
+ $this->assertEqual(
+ 'y',
+ (string)phutil_tag('div', array(), 'y'));
+ }
+
+ public function testTagAttributes() {
+ $this->assertEqual(
+ 'y',
+ (string)phutil_tag('div', array('u' => 'v'), 'y'));
+
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('br', array('u' => 'v')));
+ }
+
+ public function testTagEscapes() {
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('br', array('u' => '<')));
+
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('div', array(), phutil_tag('br')));
+ }
+
+ public function testTagNullAttribute() {
+ $this->assertEqual(
+ '
',
+ (string)phutil_tag('br', array('y' => null)));
+ }
+
+ public function testTagJavascriptProtocolRejection() {
+ $hrefs = array(
+ 'javascript:alert(1)' => true,
+ 'JAVASCRIPT:alert(2)' => true,
+
+ // NOTE: When interpreted as a URI, this is dropped because of leading
+ // whitespace.
+ ' javascript:alert(3)' => array(true, false),
+ '/' => false,
+ '/path/to/stuff/' => false,
+ '' => false,
+ 'http://example.com/' => false,
+ '#' => false,
+ 'javascript://anything' => true,
+
+ // Chrome 33 and IE11, at a minimum, treat this as Javascript.
+ "javascript\n:alert(4)" => true,
+
+ // Opera currently accepts a variety of unicode spaces. This test case
+ // has a smattering of them.
+ "\xE2\x80\x89javascript:" => true,
+ "javascript\xE2\x80\x89:" => true,
+ "\xE2\x80\x84javascript:" => true,
+ "javascript\xE2\x80\x84:" => true,
+
+ // Because we're aggressive, all of unicode should trigger detection
+ // by default.
+ "\xE2\x98\x83javascript:" => true,
+ "javascript\xE2\x98\x83:" => true,
+ "\xE2\x98\x83javascript\xE2\x98\x83:" => true,
+
+ // We're aggressive about this, so we'll intentionally raise false
+ // positives in these cases.
+ 'javascript~:alert(5)' => true,
+ '!!!javascript!!!!:alert(6)' => true,
+
+ // However, we should raise true negatives in these slightly more
+ // reasonable cases.
+ 'javascript/:docs.html' => false,
+ 'javascripts:x.png' => false,
+ 'COOLjavascript:page' => false,
+ '/javascript:alert(1)' => false,
+ );
+
+ foreach (array(true, false) as $use_uri) {
+ foreach ($hrefs as $href => $expect) {
+ if (is_array($expect)) {
+ $expect = ($use_uri ? $expect[1] : $expect[0]);
+ }
+
+ if ($use_uri) {
+ $href_value = new PhutilURI($href);
+ } else {
+ $href_value = $href;
+ }
+
+ $caught = null;
+ try {
+ phutil_tag('a', array('href' => $href_value), 'click for candy');
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $desc = pht(
+ 'Unexpected result for "%s". ',
+ $href,
+ $use_uri ? pht('Yes') : pht('No'),
+ $expect ? pht('Yes') : pht('No'));
+
+ $this->assertEqual(
+ $expect,
+ $caught instanceof Exception,
+ $desc);
+ }
+ }
+ }
+
+ public function testURIEscape() {
+ $this->assertEqual(
+ '%2B/%20%3F%23%26%3A%21xyz%25',
+ phutil_escape_uri('+/ ?#&:!xyz%'));
+ }
+
+ public function testURIPathComponentEscape() {
+ $this->assertEqual(
+ 'a%252Fb',
+ phutil_escape_uri_path_component('a/b'));
+
+ $str = '';
+ for ($ii = 0; $ii <= 255; $ii++) {
+ $str .= chr($ii);
+ }
+
+ $this->assertEqual(
+ $str,
+ phutil_unescape_uri_path_component(
+ rawurldecode( // Simulates webserver.
+ phutil_escape_uri_path_component($str))));
+ }
+
+ public function testHsprintf() {
+ $this->assertEqual(
+ '<3',
+ (string)hsprintf('%s', '<3'));
+ }
+
+ public function testAppendHTML() {
+ $html = phutil_tag('hr');
+ $html->appendHTML(phutil_tag('br'), '');
+ $this->assertEqual('
<evil>', $html->getHTMLContent());
+ }
+
+ public function testArrayEscaping() {
+ $this->assertEqual(
+ '<div>',
+ phutil_escape_html(
+ array(
+ hsprintf(''),
+ array(
+ array(
+ '<',
+ array(
+ 'd',
+ array(
+ array(
+ hsprintf('i'),
+ ),
+ 'v',
+ ),
+ ),
+ array(
+ array(
+ '>',
+ ),
+ ),
+ ),
+ ),
+ hsprintf(''),
+ )));
+
+ $this->assertEqual(
+ '
',
+ phutil_tag(
+ 'div',
+ array(),
+ array(
+ array(
+ array(
+ phutil_tag('br'),
+ array(
+ phutil_tag('hr'),
+ ),
+ phutil_tag('wbr'),
+ ),
+ ),
+ ))->getHTMLContent());
+ }
+
+}
diff --git a/src/markup/__tests__/PhutilSafeHTMLTestCase.php b/src/markup/__tests__/PhutilSafeHTMLTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/markup/__tests__/PhutilSafeHTMLTestCase.php
@@ -0,0 +1,19 @@
+assertSkipped(pht('Operator extension not available.'));
+ }
+
+ $a = phutil_tag('a');
+ $ab = $a.phutil_tag('b');
+ $this->assertEqual('', $ab->getHTMLContent());
+ $this->assertEqual('', $a->getHTMLContent());
+
+ $a .= phutil_tag('a');
+ $this->assertEqual('', $a->getHTMLContent());
+ }
+
+}
diff --git a/src/markup/engine/PhutilRemarkupEngine.php b/src/markup/engine/PhutilRemarkupEngine.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/PhutilRemarkupEngine.php
@@ -0,0 +1,302 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getConfig($key, $default = null) {
+ return idx($this->config, $key, $default);
+ }
+
+ public function setMode($mode) {
+ $this->mode = $mode;
+ return $this;
+ }
+
+ public function isTextMode() {
+ return $this->mode & self::MODE_TEXT;
+ }
+
+ public function isHTMLMailMode() {
+ return $this->mode & self::MODE_HTML_MAIL;
+ }
+
+ public function setBlockRules(array $rules) {
+ assert_instances_of($rules, 'PhutilRemarkupBlockRule');
+
+ $rules = msortv($rules, 'getPriorityVector');
+
+ $this->blockRules = $rules;
+ foreach ($this->blockRules as $rule) {
+ $rule->setEngine($this);
+ }
+
+ $post_rules = array();
+ foreach ($this->blockRules as $block_rule) {
+ foreach ($block_rule->getMarkupRules() as $rule) {
+ $key = $rule->getPostprocessKey();
+ if ($key !== null) {
+ $post_rules[$key] = $rule;
+ }
+ }
+ }
+
+ $this->postprocessRules = $post_rules;
+
+ return $this;
+ }
+
+ public function getTextMetadata($key, $default = null) {
+ if (isset($this->metadata[$key])) {
+ return $this->metadata[$key];
+ }
+ return idx($this->metadata, $key, $default);
+ }
+
+ public function setTextMetadata($key, $value) {
+ $this->metadata[$key] = $value;
+ return $this;
+ }
+
+ public function storeText($text) {
+ if ($this->isTextMode()) {
+ $text = phutil_safe_html($text);
+ }
+ return $this->storage->store($text);
+ }
+
+ public function overwriteStoredText($token, $new_text) {
+ if ($this->isTextMode()) {
+ $new_text = phutil_safe_html($new_text);
+ }
+ $this->storage->overwrite($token, $new_text);
+ return $this;
+ }
+
+ public function markupText($text) {
+ return $this->postprocessText($this->preprocessText($text));
+ }
+
+ public function pushState($state) {
+ if (empty($this->states[$state])) {
+ $this->states[$state] = 0;
+ }
+ $this->states[$state]++;
+ return $this;
+ }
+
+ public function popState($state) {
+ if (empty($this->states[$state])) {
+ throw new Exception(pht("State '%s' pushed more than popped!", $state));
+ }
+ $this->states[$state]--;
+ if (!$this->states[$state]) {
+ unset($this->states[$state]);
+ }
+ return $this;
+ }
+
+ public function getState($state) {
+ return !empty($this->states[$state]);
+ }
+
+ public function preprocessText($text) {
+ $this->metadata = array();
+ $this->storage = new PhutilRemarkupBlockStorage();
+
+ $blocks = $this->splitTextIntoBlocks($text);
+
+ $output = array();
+ foreach ($blocks as $block) {
+ $output[] = $this->markupBlock($block);
+ }
+ $output = $this->flattenOutput($output);
+
+ $map = $this->storage->getMap();
+ $this->storage = null;
+ $metadata = $this->metadata;
+
+
+ return array(
+ 'output' => $output,
+ 'storage' => $map,
+ 'metadata' => $metadata,
+ );
+ }
+
+ private function splitTextIntoBlocks($text, $depth = 0) {
+ // Apply basic block and paragraph normalization to the text. NOTE: We don't
+ // strip trailing whitespace because it is semantic in some contexts,
+ // notably inlined diffs that the author intends to show as a code block.
+ $text = phutil_split_lines($text, true);
+ $block_rules = $this->blockRules;
+ $blocks = array();
+ $cursor = 0;
+ $prev_block = array();
+
+ while (isset($text[$cursor])) {
+ $starting_cursor = $cursor;
+ foreach ($block_rules as $block_rule) {
+ $num_lines = $block_rule->getMatchingLineCount($text, $cursor);
+
+ if ($num_lines) {
+ if ($blocks) {
+ $prev_block = last($blocks);
+ }
+
+ $curr_block = array(
+ 'start' => $cursor,
+ 'num_lines' => $num_lines,
+ 'rule' => $block_rule,
+ 'is_empty' => self::isEmptyBlock($text, $cursor, $num_lines),
+ 'children' => array(),
+ );
+
+ if ($prev_block
+ && self::shouldMergeBlocks($text, $prev_block, $curr_block)) {
+ $blocks[last_key($blocks)]['num_lines'] += $curr_block['num_lines'];
+ $blocks[last_key($blocks)]['is_empty'] =
+ $blocks[last_key($blocks)]['is_empty'] && $curr_block['is_empty'];
+ } else {
+ $blocks[] = $curr_block;
+ }
+
+ $cursor += $num_lines;
+ break;
+ }
+ }
+
+ if ($starting_cursor === $cursor) {
+ throw new Exception(pht('Block in text did not match any block rule.'));
+ }
+ }
+
+ foreach ($blocks as $key => $block) {
+ $lines = array_slice($text, $block['start'], $block['num_lines']);
+ $blocks[$key]['text'] = implode('', $lines);
+ }
+
+ // Stop splitting child blocks apart if we get too deep. This arrests
+ // any blocks which have looping child rules, and stops the stack from
+ // exploding if someone writes a hilarious comment with 5,000 levels of
+ // quoted text.
+
+ if ($depth < self::MAX_CHILD_DEPTH) {
+ foreach ($blocks as $key => $block) {
+ $rule = $block['rule'];
+ if (!$rule->supportsChildBlocks()) {
+ continue;
+ }
+
+ list($parent_text, $child_text) = $rule->extractChildText(
+ $block['text']);
+ $blocks[$key]['text'] = $parent_text;
+ $blocks[$key]['children'] = $this->splitTextIntoBlocks(
+ $child_text,
+ $depth + 1);
+ }
+ }
+
+ return $blocks;
+ }
+
+ private function markupBlock(array $block) {
+ $children = array();
+ foreach ($block['children'] as $child) {
+ $children[] = $this->markupBlock($child);
+ }
+
+ if ($children) {
+ $children = $this->flattenOutput($children);
+ } else {
+ $children = null;
+ }
+
+ return $block['rule']->markupText($block['text'], $children);
+ }
+
+ private function flattenOutput(array $output) {
+ if ($this->isTextMode()) {
+ $output = implode("\n\n", $output)."\n";
+ } else {
+ $output = phutil_implode_html("\n\n", $output);
+ }
+
+ return $output;
+ }
+
+ private static function shouldMergeBlocks($text, $prev_block, $curr_block) {
+ $block_rules = ipull(array($prev_block, $curr_block), 'rule');
+
+ $default_rule = 'PhutilRemarkupDefaultBlockRule';
+ try {
+ assert_instances_of($block_rules, $default_rule);
+
+ // If the last block was empty keep merging
+ if ($prev_block['is_empty']) {
+ return true;
+ }
+
+ // If this line is blank keep merging
+ if ($curr_block['is_empty']) {
+ return true;
+ }
+
+ // If the current line and the last line have content, keep merging
+ if (strlen(trim($text[$curr_block['start'] - 1]))) {
+ if (strlen(trim($text[$curr_block['start']]))) {
+ return true;
+ }
+ }
+ } catch (Exception $e) {}
+
+ return false;
+ }
+
+ private static function isEmptyBlock($text, $start, $num_lines) {
+ for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) {
+ if (strlen(trim($text[$cursor]))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public function postprocessText(array $dict) {
+ $this->metadata = idx($dict, 'metadata', array());
+
+ $this->storage = new PhutilRemarkupBlockStorage();
+ $this->storage->setMap(idx($dict, 'storage', array()));
+
+ foreach ($this->blockRules as $block_rule) {
+ $block_rule->postprocess();
+ }
+
+ foreach ($this->postprocessRules as $rule) {
+ $rule->didMarkupText();
+ }
+
+ return $this->restoreText(idx($dict, 'output'));
+ }
+
+ public function restoreText($text) {
+ return $this->storage->restore($text, $this->isTextMode());
+ }
+}
diff --git a/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php
@@ -0,0 +1,121 @@
+markupText($root.$file);
+ }
+ }
+
+ private function markupText($markup_file) {
+ $contents = Filesystem::readFile($markup_file);
+ $file = basename($markup_file);
+
+ $parts = explode("\n~~~~~~~~~~\n", $contents);
+ $this->assertEqual(3, count($parts), $markup_file);
+
+ list($input_remarkup, $expected_output, $expected_text) = $parts;
+
+ $engine = $this->buildNewTestEngine();
+
+ switch ($file) {
+ case 'raw-escape.txt':
+
+ // NOTE: Here, we want to test PhutilRemarkupEscapeRemarkupRule and
+ // PhutilRemarkupBlockStorage, which are triggered by "\1". In the
+ // test, "~" is used as a placeholder for "\1" since it's hard to type
+ // "\1".
+
+ $input_remarkup = str_replace('~', "\1", $input_remarkup);
+ $expected_output = str_replace('~', "\1", $expected_output);
+ $expected_text = str_replace('~', "\1", $expected_text);
+ break;
+ case 'toc.txt':
+ $engine->setConfig('header.generate-toc', true);
+ break;
+ case 'link-same-window.txt':
+ $engine->setConfig('uri.same-window', true);
+ break;
+ case 'link-square.txt':
+ $engine->setConfig('uri.base', 'http://www.example.com/');
+ $engine->setConfig('uri.here', 'http://www.example.com/page/');
+ break;
+ }
+
+ $actual_output = (string)$engine->markupText($input_remarkup);
+
+ switch ($file) {
+ case 'toc.txt':
+ $table_of_contents =
+ PhutilRemarkupHeaderBlockRule::renderTableOfContents($engine);
+ $actual_output = $table_of_contents."\n\n".$actual_output;
+ break;
+ }
+
+ $this->assertEqual(
+ $expected_output,
+ $actual_output,
+ pht("Failed to markup HTML in file '%s'.", $file));
+
+ $engine->setMode(PhutilRemarkupEngine::MODE_TEXT);
+ $actual_output = (string)$engine->markupText($input_remarkup);
+
+ $this->assertEqual(
+ $expected_text,
+ $actual_output,
+ pht("Failed to markup text in file '%s'.", $file));
+ }
+
+ private function buildNewTestEngine() {
+ $engine = new PhutilRemarkupEngine();
+
+ $engine->setConfig(
+ 'uri.allowed-protocols',
+ array(
+ 'http' => true,
+ 'mailto' => true,
+ 'tel' => true,
+ ));
+
+ $rules = array();
+ $rules[] = new PhutilRemarkupEscapeRemarkupRule();
+ $rules[] = new PhutilRemarkupMonospaceRule();
+ $rules[] = new PhutilRemarkupDocumentLinkRule();
+ $rules[] = new PhutilRemarkupHyperlinkRule();
+ $rules[] = new PhutilRemarkupBoldRule();
+ $rules[] = new PhutilRemarkupItalicRule();
+ $rules[] = new PhutilRemarkupDelRule();
+ $rules[] = new PhutilRemarkupUnderlineRule();
+ $rules[] = new PhutilRemarkupHighlightRule();
+
+ $blocks = array();
+ $blocks[] = new PhutilRemarkupQuotesBlockRule();
+ $blocks[] = new PhutilRemarkupReplyBlockRule();
+ $blocks[] = new PhutilRemarkupHeaderBlockRule();
+ $blocks[] = new PhutilRemarkupHorizontalRuleBlockRule();
+ $blocks[] = new PhutilRemarkupCodeBlockRule();
+ $blocks[] = new PhutilRemarkupLiteralBlockRule();
+ $blocks[] = new PhutilRemarkupNoteBlockRule();
+ $blocks[] = new PhutilRemarkupTableBlockRule();
+ $blocks[] = new PhutilRemarkupSimpleTableBlockRule();
+ $blocks[] = new PhutilRemarkupDefaultBlockRule();
+ $blocks[] = new PhutilRemarkupListBlockRule();
+ $blocks[] = new PhutilRemarkupInterpreterBlockRule();
+
+ foreach ($blocks as $block) {
+ if (!($block instanceof PhutilRemarkupCodeBlockRule)) {
+ $block->setMarkupRules($rules);
+ }
+ }
+
+ $engine->setBlockRules($blocks);
+
+ return $engine;
+ }
+
+}
diff --git a/src/markup/engine/__tests__/remarkup/across-newlines.txt b/src/markup/engine/__tests__/remarkup/across-newlines.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/across-newlines.txt
@@ -0,0 +1,7 @@
+**duck
+quack**
+~~~~~~~~~~
+duck
+quack
+~~~~~~~~~~
+**duck quack**
diff --git a/src/markup/engine/__tests__/remarkup/backticks-whitespace.txt b/src/markup/engine/__tests__/remarkup/backticks-whitespace.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/backticks-whitespace.txt
@@ -0,0 +1,17 @@
+```x```
+
+```
+y
+```
+~~~~~~~~~~
+x
+
+
+
+y
+~~~~~~~~~~
+ x
+
+
+
+ y
diff --git a/src/markup/engine/__tests__/remarkup/block-then-list.txt b/src/markup/engine/__tests__/remarkup/block-then-list.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/block-then-list.txt
@@ -0,0 +1,12 @@
+ lang=txt
+ code block
+
+ - still a code block
+~~~~~~~~~~
+code block
+
+- still a code block
+~~~~~~~~~~
+ code block
+
+ - still a code block
diff --git a/src/markup/engine/__tests__/remarkup/code-block-whitespace.txt b/src/markup/engine/__tests__/remarkup/code-block-whitespace.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/code-block-whitespace.txt
@@ -0,0 +1,9 @@
+ lang=txt
+ x
+ y
+~~~~~~~~~~
+ x
+y
+~~~~~~~~~~
+ x
+ y
diff --git a/src/markup/engine/__tests__/remarkup/del.txt b/src/markup/engine/__tests__/remarkup/del.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/del.txt
@@ -0,0 +1,11 @@
+omg~~ wtf~~~~~ bbq~~~ lol~~
+~~deleted text~~
+~~This is a great idea~~~ die forever please
+~~~~~~
+~~~~~~~~~~
+omg~~ wtf~~~~~ bbq~~~ lol~~
+deleted text
+This is a great idea~ die forever please
+~~~~~~
+~~~~~~~~~~
+omg~~ wtf~~~~~ bbq~~~ lol~~ ~~deleted text~~ ~~This is a great idea~~~ die forever please ~~~~~~
diff --git a/src/markup/engine/__tests__/remarkup/diff.txt b/src/markup/engine/__tests__/remarkup/diff.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/diff.txt
@@ -0,0 +1,36 @@
+here is a diff
+
+ lang=diff
+ @@ derp derp @@
+ x
+ y
+
+ - x
+ - y
+ + z
+
+derp derp
+~~~~~~~~~~
+here is a diff
+
+@@ derp derp @@
+x
+y
+
+- x
+- y
++ z
+
+derp derp
+~~~~~~~~~~
+here is a diff
+
+ @@ derp derp @@
+ x
+ y
+
+ - x
+ - y
+ + z
+
+derp derp
diff --git a/src/markup/engine/__tests__/remarkup/disallowed-link.txt b/src/markup/engine/__tests__/remarkup/disallowed-link.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/disallowed-link.txt
@@ -0,0 +1,5 @@
+javascript://www.example.com/
+~~~~~~~~~~
+javascript://www.example.com/
+~~~~~~~~~~
+javascript://www.example.com/
diff --git a/src/markup/engine/__tests__/remarkup/entities.txt b/src/markup/engine/__tests__/remarkup/entities.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/entities.txt
@@ -0,0 +1,5 @@
+< > & "
+~~~~~~~~~~
+< > & "
+~~~~~~~~~~
+< > & "
diff --git a/src/markup/engine/__tests__/remarkup/header-skip.txt b/src/markup/engine/__tests__/remarkup/header-skip.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/header-skip.txt
@@ -0,0 +1,11 @@
+#2 is my favorite.
+
+#project
+~~~~~~~~~~
+#2 is my favorite.
+
+#project
+~~~~~~~~~~
+#2 is my favorite.
+
+#project
diff --git a/src/markup/engine/__tests__/remarkup/headers.txt b/src/markup/engine/__tests__/remarkup/headers.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/headers.txt
@@ -0,0 +1,57 @@
+@nolint (UTF8)
+
+=a=
+
+blah blah blah
+
+
+= b =
+
+Markdown-Style Large Header
+====
+
+Markdown-Style Small Header
+----
+
+=== Remarkup-Style Smaller Header
+
+
+= ☃☃☃ UTF8 Header ☃☃☃ =
+~~~~~~~~~~
+@nolint (UTF8)
+
+a
+
+blah blah blah
+
+b
+
+Markdown-Style Large Header
+
+Markdown-Style Small Header
+
+Remarkup-Style Smaller Header
+
+☃☃☃ UTF8 Header ☃☃☃
+~~~~~~~~~~
+@nolint (UTF8)
+
+a
+=
+
+blah blah blah
+
+b
+=
+
+Markdown-Style Large Header
+===========================
+
+Markdown-Style Small Header
+---------------------------
+
+Remarkup-Style Smaller Header
+-----------------------------
+
+☃☃☃ UTF8 Header ☃☃☃
+===================
diff --git a/src/markup/engine/__tests__/remarkup/highlight.txt b/src/markup/engine/__tests__/remarkup/highlight.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/highlight.txt
@@ -0,0 +1,9 @@
+how about we !!highlight!! some !!TEXT!!!
+wow this must be **!!very important!!**
+omg!!!!!
+~~~~~~~~~~
+how about we highlight some TEXT!
+wow this must be very important
+omg!!!!!
+~~~~~~~~~~
+how about we !!highlight!! some !!TEXT!!! wow this must be **!!very important!!** omg!!!!!
diff --git a/src/markup/engine/__tests__/remarkup/horizonal-rule.txt b/src/markup/engine/__tests__/remarkup/horizonal-rule.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/horizonal-rule.txt
@@ -0,0 +1,41 @@
+___
+
+_____
+
+***
+
+* * * * * * *
+
+---
+
+- - - - - - -
+
+ ---
+~~~~~~~~~~
+
+
+
+
+
+
+
+
+
+
+
+
+
+~~~~~~~~~~
+___
+
+_____
+
+***
+
+* * * * * * *
+
+---
+
+- - - - - - -
+
+ ---
diff --git a/src/markup/engine/__tests__/remarkup/important.txt b/src/markup/engine/__tests__/remarkup/important.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/important.txt
@@ -0,0 +1,15 @@
+IMPORTANT: interesting **stuff**
+
+(IMPORTANT) interesting **stuff**
+~~~~~~~~~~
+IMPORTANT: interesting stuff
+
+
+
+interesting stuff
+~~~~~~~~~~
+IMPORTANT: interesting **stuff**
+
+
+
+(IMPORTANT) interesting **stuff**
diff --git a/src/markup/engine/__tests__/remarkup/interpreter-test.txt b/src/markup/engine/__tests__/remarkup/interpreter-test.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/interpreter-test.txt
@@ -0,0 +1,58 @@
+phutil_test_block_interpreter (foo=bar) {{{
+content
+}}}
+
+phutil_test_block_interpreter {{{ content
+content }}}
+
+phutil_test_block_interpreter {{{ content }}}
+
+phutil_test_block_interpreter(x=y){{{content}}}
+
+phutil_fake_test_block_interpreter {{{ content }}}
+~~~~~~~~~~
+Content: (content)
+Argv: (foo=bar)
+
+
+
+Content: ( content
+content )
+Argv: ()
+
+
+
+Content: ( content )
+Argv: ()
+
+
+
+Content: (content)
+Argv: (x=y)
+
+
+
+No interpreter found: phutil_fake_test_block_interpreter
+~~~~~~~~~~
+Content: (content)
+Argv: (foo=bar)
+
+
+
+Content: ( content
+content )
+Argv: ()
+
+
+
+Content: ( content )
+Argv: ()
+
+
+
+Content: (content)
+Argv: (x=y)
+
+
+
+(No interpreter found: phutil_fake_test_block_interpreter)
diff --git a/src/markup/engine/__tests__/remarkup/just-backticks.txt b/src/markup/engine/__tests__/remarkup/just-backticks.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/just-backticks.txt
@@ -0,0 +1,5 @@
+```
+~~~~~~~~~~
+
+~~~~~~~~~~
+
diff --git a/src/markup/engine/__tests__/remarkup/leading-newline.txt b/src/markup/engine/__tests__/remarkup/leading-newline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/leading-newline.txt
@@ -0,0 +1,6 @@
+
+a
+~~~~~~~~~~
+a
+~~~~~~~~~~
+a
diff --git a/src/markup/engine/__tests__/remarkup/link-alternate.txt b/src/markup/engine/__tests__/remarkup/link-alternate.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-alternate.txt
@@ -0,0 +1,12 @@
+[Example](http://www.example.com/)
+
+x[0][1](**ptr);
+
+~~~~~~~~~~
+
+
+x[0][1](**ptr);
+~~~~~~~~~~
+Example
+
+x[0][1](**ptr);
diff --git a/src/markup/engine/__tests__/remarkup/link-brackets.txt b/src/markup/engine/__tests__/remarkup/link-brackets.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-brackets.txt
@@ -0,0 +1,5 @@
+
+~~~~~~~~~~
+http://www.zany.com/omg/weird_url,,,
+~~~~~~~~~~
+http://www.zany.com/omg/weird_url,,,
diff --git a/src/markup/engine/__tests__/remarkup/link-edge-cases.txt b/src/markup/engine/__tests__/remarkup/link-edge-cases.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-edge-cases.txt
@@ -0,0 +1,35 @@
+http://www.example.com/
+
+(http://www.example.com/)
+
+
+
+http://www.example.com/wiki/example_(disambiguation)
+
+(example http://www.example.com/)
+
+Quick! http://www.example.com/!
+~~~~~~~~~~
+
+
+
+
+
+
+http://www.example.com/wiki/example_(disambiguation)
+
+(example http://www.example.com/)
+
+Quick! http://www.example.com/!
+~~~~~~~~~~
+http://www.example.com/
+
+(http://www.example.com/)
+
+http://www.example.com/
+
+http://www.example.com/wiki/example_(disambiguation)
+
+(example http://www.example.com/)
+
+Quick! http://www.example.com/!
diff --git a/src/markup/engine/__tests__/remarkup/link-mailto.txt b/src/markup/engine/__tests__/remarkup/link-mailto.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-mailto.txt
@@ -0,0 +1,18 @@
+[[ mailto:alincoln@example.com | mail me ]]
+
+[ mail me ]( mailto:alincoln@example.com )
+
+[[mailto:alincoln@example.com]]
+
+~~~~~~~~~~
+
+
+
+
+
+~~~~~~~~~~
+mail me
+
+mail me
+
+alincoln@example.com
diff --git a/src/markup/engine/__tests__/remarkup/link-mixed.txt b/src/markup/engine/__tests__/remarkup/link-mixed.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-mixed.txt
@@ -0,0 +1,18 @@
+[[http://www.example.com/ | Example]](http://www.alternate.org/)
+
+(http://www.alternate.org/)[[http://www.example.com/ | Example]]
+
+
+
+~~~~~~~~~~
+Example(http://www.alternate.org/)
+
+(http://www.alternate.org/)Example
+
+<http://www.example.com/ Example>
+~~~~~~~~~~
+Example (http://www.alternate.org/)
+
+(http://www.alternate.org/)Example
+
+ >
diff --git a/src/markup/engine/__tests__/remarkup/link-noreferrer.txt b/src/markup/engine/__tests__/remarkup/link-noreferrer.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-noreferrer.txt
@@ -0,0 +1,16 @@
+[[ /\evil.com ]]
+
+[[ /
+/evil.com ]]
+
+~~~~~~~~~~
+
+
+
+~~~~~~~~~~
+/\evil.com
+
+/
+/evil.com
diff --git a/src/markup/engine/__tests__/remarkup/link-same-window.txt b/src/markup/engine/__tests__/remarkup/link-same-window.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-same-window.txt
@@ -0,0 +1,11 @@
+[[http://www.example.com/]]
+
+http://www.example.com/
+~~~~~~~~~~
+
+
+
+~~~~~~~~~~
+http://www.example.com/
+
+http://www.example.com/
diff --git a/src/markup/engine/__tests__/remarkup/link-square.txt b/src/markup/engine/__tests__/remarkup/link-square.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-square.txt
@@ -0,0 +1,29 @@
+[[http://www.example.com/]]
+
+[[http://www.example.com/ | example.com]]
+
+[[/x/]]
+
+[[#anchor]]
+
+[[#anchor | Anchors ]]
+~~~~~~~~~~
+
+
+
+
+
+
+http://www.example.com/page/#anchor
+
+
+~~~~~~~~~~
+http://www.example.com/
+
+example.com
+
+http://www.example.com/x/
+
+http://www.example.com/page/#anchor
+
+Anchors
diff --git a/src/markup/engine/__tests__/remarkup/link-tel.txt b/src/markup/engine/__tests__/remarkup/link-tel.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-tel.txt
@@ -0,0 +1,18 @@
+[[ tel:18005555555 | call me ]]
+
+[ call me ]( tel:18005555555 )
+
+[[tel:18005555555]]
+
+~~~~~~~~~~
+
+
+
+
+
+~~~~~~~~~~
+call me <18005555555>
+
+call me <18005555555>
+
+18005555555
diff --git a/src/markup/engine/__tests__/remarkup/link-with-angle-brackets.txt b/src/markup/engine/__tests__/remarkup/link-with-angle-brackets.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-with-angle-brackets.txt
@@ -0,0 +1,5 @@
+http://.example.com/
+~~~~~~~~~~
+http://<www>.example.com/
+~~~~~~~~~~
+http://.example.com/
diff --git a/src/markup/engine/__tests__/remarkup/link-with-angle-link-anchor.txt b/src/markup/engine/__tests__/remarkup/link-with-angle-link-anchor.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-with-angle-link-anchor.txt
@@ -0,0 +1,5 @@
+
+~~~~~~~~~~
+<http://x.y#http://x.y#>
+~~~~~~~~~~
+
diff --git a/src/markup/engine/__tests__/remarkup/link-with-link-anchor.txt b/src/markup/engine/__tests__/remarkup/link-with-link-anchor.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-with-link-anchor.txt
@@ -0,0 +1,5 @@
+http://x.y#http://x.y#
+~~~~~~~~~~
+http://x.y#http://x.y#
+~~~~~~~~~~
+http://x.y#http://x.y#
diff --git a/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt b/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt
@@ -0,0 +1,9 @@
+http://www.example.com/,
+http://www.example.com/..
+http://www.example.com/!!!
+~~~~~~~~~~
+http://www.example.com/,
+http://www.example.com/..
+http://www.example.com/!!!
+~~~~~~~~~~
+http://www.example.com/, http://www.example.com/.. http://www.example.com/!!!
diff --git a/src/markup/engine/__tests__/remarkup/link-with-tilde.txt b/src/markup/engine/__tests__/remarkup/link-with-tilde.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link-with-tilde.txt
@@ -0,0 +1,5 @@
+http://www.example.com/~
+~~~~~~~~~~
+
+~~~~~~~~~~
+http://www.example.com/~
diff --git a/src/markup/engine/__tests__/remarkup/link.txt b/src/markup/engine/__tests__/remarkup/link.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/link.txt
@@ -0,0 +1,5 @@
+http://www.example.com/
+~~~~~~~~~~
+
+~~~~~~~~~~
+http://www.example.com/
diff --git a/src/markup/engine/__tests__/remarkup/list-alternate-style.txt b/src/markup/engine/__tests__/remarkup/list-alternate-style.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-alternate-style.txt
@@ -0,0 +1,15 @@
+- a
+-- b
+--- c
+~~~~~~~~~~
+
+- a
+- b
+- c
+
+
+
+~~~~~~~~~~
+- a
+ - b
+ - c
diff --git a/src/markup/engine/__tests__/remarkup/list-blow-stack.txt b/src/markup/engine/__tests__/remarkup/list-blow-stack.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-blow-stack.txt
@@ -0,0 +1,138 @@
+- a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+
+
+derp
+~~~~~~~~~~
+
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+- a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+derp
+~~~~~~~~~~
+- a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+ - a
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-checkboxes.txt b/src/markup/engine/__tests__/remarkup/list-checkboxes.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-checkboxes.txt
@@ -0,0 +1,41 @@
+- [] a
+- [ ] b
+- [X] c
+- d
+
+[ ] A
+[X] B
+ [ ] C
+ [ ] D
+
+[1] footnote
+
+~~~~~~~~~~
+
+- a
+- b
+- c
+- d
+
+
+
+- A
+- B
+- C
+- D
+
+
+
+[1] footnote
+~~~~~~~~~~
+[ ] a
+[ ] b
+[X] c
+- d
+
+[ ] A
+[X] B
+ [ ] C
+ [ ] D
+
+[1] footnote
diff --git a/src/markup/engine/__tests__/remarkup/list-crazystairs.txt b/src/markup/engine/__tests__/remarkup/list-crazystairs.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-crazystairs.txt
@@ -0,0 +1,15 @@
+## Fruit
+- Apple
+- Banana
+~~~~~~~~~~
+
+
+- Fruit
+
+- Apple
+- Banana
+
+~~~~~~~~~~
+ 1. Fruit
+- Apple
+- Banana
diff --git a/src/markup/engine/__tests__/remarkup/list-first-style-wins.txt b/src/markup/engine/__tests__/remarkup/list-first-style-wins.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-first-style-wins.txt
@@ -0,0 +1,19 @@
+# item
+- item
+- item
+
+derp
+~~~~~~~~~~
+
+- item
+- item
+- item
+
+
+derp
+~~~~~~~~~~
+1. item
+2. item
+3. item
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-hash.txt b/src/markup/engine/__tests__/remarkup/list-hash.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-hash.txt
@@ -0,0 +1,19 @@
+# item
+# item
+# item
+
+derp
+~~~~~~~~~~
+
+- item
+- item
+- item
+
+
+derp
+~~~~~~~~~~
+1. item
+2. item
+3. item
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-header-last.txt b/src/markup/engine/__tests__/remarkup/list-header-last.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-header-last.txt
@@ -0,0 +1,7 @@
+# At the end of a block, this should be a list.
+~~~~~~~~~~
+
+- At the end of a block, this should be a list.
+
+~~~~~~~~~~
+1. At the end of a block, this should be a list.
diff --git a/src/markup/engine/__tests__/remarkup/list-header.txt b/src/markup/engine/__tests__/remarkup/list-header.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-header.txt
@@ -0,0 +1,12 @@
+## Small Header
+
+This should be a small header.
+~~~~~~~~~~
+Small Header
+
+This should be a small header.
+~~~~~~~~~~
+Small Header
+------------
+
+This should be a small header.
diff --git a/src/markup/engine/__tests__/remarkup/list-mixed-styles.txt b/src/markup/engine/__tests__/remarkup/list-mixed-styles.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-mixed-styles.txt
@@ -0,0 +1,15 @@
+ - a
+ -- b
+ --- c
+~~~~~~~~~~
+
+- a
+- b
+- c
+
+
+
+~~~~~~~~~~
+- a
+ - b
+ - c
diff --git a/src/markup/engine/__tests__/remarkup/list-multi.txt b/src/markup/engine/__tests__/remarkup/list-multi.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-multi.txt
@@ -0,0 +1,14 @@
+- a
+ -- b
+ -- c
+~~~~~~~~~~
+
+- a
+- b
+- c
+
+
+~~~~~~~~~~
+- a
+ - b
+ - c
diff --git a/src/markup/engine/__tests__/remarkup/list-multiline.txt b/src/markup/engine/__tests__/remarkup/list-multiline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-multiline.txt
@@ -0,0 +1,16 @@
+- a
+ a
+- b
+b
+~~~~~~~~~~
+
+- a a
+- b
+
+
+b
+~~~~~~~~~~
+- a a
+- b
+
+b
diff --git a/src/markup/engine/__tests__/remarkup/list-nest.txt b/src/markup/engine/__tests__/remarkup/list-nest.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-nest.txt
@@ -0,0 +1,30 @@
+- item
+ - sub
+- item
+ # sub
+ # sub
+- item
+
+derp
+~~~~~~~~~~
+
+- item
+- sub
+
+- item
+- sub
+- sub
+
+- item
+
+
+derp
+~~~~~~~~~~
+- item
+ - sub
+- item
+ 1. sub
+ 2. sub
+- item
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-paragraphs.txt b/src/markup/engine/__tests__/remarkup/list-paragraphs.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-paragraphs.txt
@@ -0,0 +1,27 @@
+- This is a list item
+ with several paragraphs.
+
+ This is the second paragraph
+ of the first list item.
+- This is the second item
+ in the list.
+ - This is a sublist.
+- This is the third item in the list.
+
+~~~~~~~~~~
+
+- This is a list item with several paragraphs.
+
+This is the second paragraph of the first list item.
+- This is the second item in the list.
+- This is a sublist.
+
+- This is the third item in the list.
+
+~~~~~~~~~~
+- This is a list item with several paragraphs.
+
+ This is the second paragraph of the first list item.
+- This is the second item in the list.
+ - This is a sublist.
+- This is the third item in the list.
diff --git a/src/markup/engine/__tests__/remarkup/list-staircase.txt b/src/markup/engine/__tests__/remarkup/list-staircase.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-staircase.txt
@@ -0,0 +1,23 @@
+ - top
+ - mid
+# bot
+
+derp
+~~~~~~~~~~
+
+
+
+- top
+
+- mid
+
+- bot
+
+
+derp
+~~~~~~~~~~
+ - top
+ - mid
+1. bot
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-star.txt b/src/markup/engine/__tests__/remarkup/list-star.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-star.txt
@@ -0,0 +1,19 @@
+* item
+* item
+* item
+
+derp
+~~~~~~~~~~
+
+- item
+- item
+- item
+
+
+derp
+~~~~~~~~~~
+- item
+- item
+- item
+
+derp
diff --git a/src/markup/engine/__tests__/remarkup/list-then-a-list.txt b/src/markup/engine/__tests__/remarkup/list-then-a-list.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-then-a-list.txt
@@ -0,0 +1,15 @@
+1) one
+
+- a
+~~~~~~~~~~
+
+- one
+
+
+
+- a
+
+~~~~~~~~~~
+1. one
+
+- a
diff --git a/src/markup/engine/__tests__/remarkup/list-vs-codeblock.txt b/src/markup/engine/__tests__/remarkup/list-vs-codeblock.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list-vs-codeblock.txt
@@ -0,0 +1,17 @@
+This should be a list:
+
+ - apple
+ - banana
+
+~~~~~~~~~~
+This should be a list:
+
+
+- apple
+- banana
+
+~~~~~~~~~~
+This should be a list:
+
+- apple
+- banana
diff --git a/src/markup/engine/__tests__/remarkup/list.txt b/src/markup/engine/__tests__/remarkup/list.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/list.txt
@@ -0,0 +1,13 @@
+ - < > & "
+
+text block
+~~~~~~~~~~
+
+- < > & "
+
+
+text block
+~~~~~~~~~~
+- < > & "
+
+text block
diff --git a/src/markup/engine/__tests__/remarkup/monospaced-in-monospaced.txt b/src/markup/engine/__tests__/remarkup/monospaced-in-monospaced.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/monospaced-in-monospaced.txt
@@ -0,0 +1,18 @@
+query ##SELECT * FROM `table`##
+
+`SELECT * FROM ##table##`
+
+`**x**`
+
+~~~~~~~~~~
+query SELECT * FROM `table`
+
+SELECT * FROM ##table##
+
+**x**
+~~~~~~~~~~
+query ##SELECT * FROM `table`##
+
+`SELECT * FROM ##table##`
+
+`**x**`
diff --git a/src/markup/engine/__tests__/remarkup/monospaced-plural.txt b/src/markup/engine/__tests__/remarkup/monospaced-plural.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/monospaced-plural.txt
@@ -0,0 +1,11 @@
+`Zebra`s
+
+I can`t and I won`t.
+~~~~~~~~~~
+Zebras
+
+I can`t and I won`t.
+~~~~~~~~~~
+`Zebra`s
+
+I can`t and I won`t.
diff --git a/src/markup/engine/__tests__/remarkup/monospaced.txt b/src/markup/engine/__tests__/remarkup/monospaced.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/monospaced.txt
@@ -0,0 +1,5 @@
+cmd ##ls --color > /dev/null##
+~~~~~~~~~~
+cmd ls --color > /dev/null
+~~~~~~~~~~
+cmd ##ls --color > /dev/null##
diff --git a/src/markup/engine/__tests__/remarkup/newline-then-block.txt b/src/markup/engine/__tests__/remarkup/newline-then-block.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/newline-then-block.txt
@@ -0,0 +1,30 @@
+This is a paragraph.
+
+
+ lang=txt
+ First line of code block.
+ Second line of code block.
+
+
+
+
+ Cell 1
+ Cell 2
+
+
+~~~~~~~~~~
+This is a paragraph.
+
+First line of code block.
+Second line of code block.
+
+
+Cell 1 Cell 2
+
+~~~~~~~~~~
+This is a paragraph.
+
+ First line of code block.
+ Second line of code block.
+
+| Cell 1 | Cell 2 |
diff --git a/src/markup/engine/__tests__/remarkup/note-multiline.txt b/src/markup/engine/__tests__/remarkup/note-multiline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/note-multiline.txt
@@ -0,0 +1,14 @@
+NOTE: a
+a
+
+b
+~~~~~~~~~~
+NOTE: a
+a
+
+b
+~~~~~~~~~~
+NOTE: a
+a
+
+b
diff --git a/src/markup/engine/__tests__/remarkup/note.txt b/src/markup/engine/__tests__/remarkup/note.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/note.txt
@@ -0,0 +1,15 @@
+NOTE: interesting **stuff**
+
+(NOTE) interesting **stuff**
+~~~~~~~~~~
+NOTE: interesting stuff
+
+
+
+interesting stuff
+~~~~~~~~~~
+NOTE: interesting **stuff**
+
+
+
+(NOTE) interesting **stuff**
diff --git a/src/markup/engine/__tests__/remarkup/ordered-list-with-numbers.txt b/src/markup/engine/__tests__/remarkup/ordered-list-with-numbers.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/ordered-list-with-numbers.txt
@@ -0,0 +1,64 @@
+# aasdx
+# asdf
+
+1. asa
+ # asdf
+234) asdf
+
+234) asd
+
+1. asd
+234) asd
+
+10. ten
+11. eleven
+12. twelve
+
+1/ This explicitly should not be formatted as a list.
+~~~~~~~~~~
+
+- aasdx
+- asdf
+
+
+
+- asa
+- asdf
+
+- asdf
+
+
+
+- asd
+
+
+
+- asd
+- asd
+
+
+
+- ten
+- eleven
+- twelve
+
+
+1/ This explicitly should not be formatted as a list.
+~~~~~~~~~~
+1. aasdx
+2. asdf
+
+1. asa
+ 1. asdf
+2. asdf
+
+234. asd
+
+1. asd
+2. asd
+
+10. ten
+11. eleven
+12. twelve
+
+1/ This explicitly should not be formatted as a list.
diff --git a/src/markup/engine/__tests__/remarkup/percent-block-adjacent.txt b/src/markup/engine/__tests__/remarkup/percent-block-adjacent.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/percent-block-adjacent.txt
@@ -0,0 +1,29 @@
+%%%a%%%
+%%%b%%%
+
+%%%a
+b%%%
+
+%%%a%%%
+
+%%%b%%%
+~~~~~~~~~~
+a
+
b
+
+a
+
b
+
+a
+
+b
+~~~~~~~~~~
+a
+b
+
+a
+b
+
+a
+
+b
diff --git a/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt b/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt
@@ -0,0 +1,21 @@
+**foo**
+%%%- first
+- second
+- third%%%
+[[http://hello | world]]
+~~~~~~~~~~
+foo
+
+- first
+
- second
+
- third
+
+
+~~~~~~~~~~
+**foo**
+
+- first
+- second
+- third
+
+world
diff --git a/src/markup/engine/__tests__/remarkup/percent-block-oneline.txt b/src/markup/engine/__tests__/remarkup/percent-block-oneline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/percent-block-oneline.txt
@@ -0,0 +1,11 @@
+%%%[[http://hello | world]] **bold**%%%
+
+ %%%[[http://hello | world]] **bold**%%%
+~~~~~~~~~~
+[[http://hello | world]] **bold**
+
+[[http://hello | world]] **bold**
+~~~~~~~~~~
+[[http://hello | world]] **bold**
+
+[[http://hello | world]] **bold**
diff --git a/src/markup/engine/__tests__/remarkup/percent-block-solo.txt b/src/markup/engine/__tests__/remarkup/percent-block-solo.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/percent-block-solo.txt
@@ -0,0 +1,8 @@
+%%%
+**x**%%%
+~~~~~~~~~~
+
+
**x**
+~~~~~~~~~~
+
+**x**
diff --git a/src/markup/engine/__tests__/remarkup/quoted-angry.txt b/src/markup/engine/__tests__/remarkup/quoted-angry.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/quoted-angry.txt
@@ -0,0 +1,5 @@
+>>> REQUESTING CHANGES BECAUSE I'M ANGRY!
+~~~~~~~~~~
+REQUESTING CHANGES BECAUSE I'M ANGRY!
+~~~~~~~~~~
+>>> REQUESTING CHANGES BECAUSE I'M ANGRY!
diff --git a/src/markup/engine/__tests__/remarkup/quoted-code-block.txt b/src/markup/engine/__tests__/remarkup/quoted-code-block.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/quoted-code-block.txt
@@ -0,0 +1,16 @@
+> This should be a code block:
+>
+> ```lang=php
+> $foo = 'bar';
+> ```
+~~~~~~~~~~
+This should be a code block:
+
+<?php
+$foo = 'bar';
+~~~~~~~~~~
+> This should be a code block:
+>
+> $foo = 'bar';
diff --git a/src/markup/engine/__tests__/remarkup/quotes.txt b/src/markup/engine/__tests__/remarkup/quotes.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/quotes.txt
@@ -0,0 +1,9 @@
+> Dear Sir,
+> I am utterly disgusted with the quality
+> of your inflight food service.
+~~~~~~~~~~
+Dear Sir,
+ I am utterly disgusted with the quality
+ of your inflight food service.
+~~~~~~~~~~
+> Dear Sir, I am utterly disgusted with the quality of your inflight food service.
diff --git a/src/markup/engine/__tests__/remarkup/raw-escape.txt b/src/markup/engine/__tests__/remarkup/raw-escape.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/raw-escape.txt
@@ -0,0 +1,17 @@
+~1~
+
+~2Z
+
+~a
+~~~~~~~~~~
+~1~
+
+~2Z
+
+~a
+~~~~~~~~~~
+~1~
+
+~2Z
+
+~a
diff --git a/src/markup/engine/__tests__/remarkup/reply-basic.txt b/src/markup/engine/__tests__/remarkup/reply-basic.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/reply-basic.txt
@@ -0,0 +1,12 @@
+>>! In comment #123, alincoln wrote:
+> Four score and twenty years ago...
+~~~~~~~~~~
+
+~~~~~~~~~~
+In comment #123, alincoln wrote:
+
+> Four score and twenty years ago...
+
diff --git a/src/markup/engine/__tests__/remarkup/reply-nested.txt b/src/markup/engine/__tests__/remarkup/reply-nested.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/reply-nested.txt
@@ -0,0 +1,50 @@
+>>! Previously, fruit:
+>
+> - Apple
+> - Banana
+> - Cherry
+>
+>>>! More previously, vegetables:
+>>
+>> - Potato
+>> - Potato
+>> - Potato
+>
+> The end.
+
+~~~~~~~~~~
+
+~~~~~~~~~~
+Previously, fruit:
+
+> - Apple
+> - Banana
+> - Cherry
+>
+> More previously, vegetables:
+>
+> > - Potato
+> > - Potato
+> > - Potato
+>
+>
+> The end.
+
diff --git a/src/markup/engine/__tests__/remarkup/simple-table-with-empty-row.txt b/src/markup/engine/__tests__/remarkup/simple-table-with-empty-row.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/simple-table-with-empty-row.txt
@@ -0,0 +1,13 @@
+| Alpaca |
+| |
+| Zebra |
+~~~~~~~~~~
+
+Alpaca
+
+Zebra
+
+~~~~~~~~~~
+| Alpaca |
+| |
+| Zebra |
diff --git a/src/markup/engine/__tests__/remarkup/simple-table-with-leading-space.txt b/src/markup/engine/__tests__/remarkup/simple-table-with-leading-space.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/simple-table-with-leading-space.txt
@@ -0,0 +1,7 @@
+ |a|b|
+~~~~~~~~~~
+
+a b
+
+~~~~~~~~~~
+| a | b |
diff --git a/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt b/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt
@@ -0,0 +1,7 @@
+| [[ http://example.com | name ]] | [x] |
+~~~~~~~~~~
+
+name [x]
+
+~~~~~~~~~~
+| name | [x] |
diff --git a/src/markup/engine/__tests__/remarkup/simple-table.txt b/src/markup/engine/__tests__/remarkup/simple-table.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/simple-table.txt
@@ -0,0 +1,24 @@
+| analyze_resources | original | mobile only | www only | both |
+| | -------- | ----------- | -------- | ---- |
+| //real// | 31 s | 24 s | 31 s | 31 s
+| --------
+| //user// | 49 s | 25 s | 31 s | 49 s
+| --------
+| //sys// | 24 s | 12 s | 13 s | 24 s
+| -------
+~~~~~~~~~~
+
+analyze_resources original mobile only www only both
+real 31 s 24 s 31 s 31 s
+user 49 s 25 s 31 s 49 s
+sys 24 s 12 s 13 s 24 s
+
+~~~~~~~~~~
+| analyze_resources | original | mobile only | www only | both |
+| | -------- | ----------- | -------- | ---- |
+| //real// | 31 s | 24 s | 31 s | 31 s |
+| ----------------- | | | | |
+| //user// | 49 s | 25 s | 31 s | 49 s |
+| ----------------- | | | | |
+| //sys// | 24 s | 12 s | 13 s | 24 s |
+| ----------------- | | | | |
diff --git a/src/markup/engine/__tests__/remarkup/simple.txt b/src/markup/engine/__tests__/remarkup/simple.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/simple.txt
@@ -0,0 +1,5 @@
+hello
+~~~~~~~~~~
+hello
+~~~~~~~~~~
+hello
diff --git a/src/markup/engine/__tests__/remarkup/table-with-leading-space.txt b/src/markup/engine/__tests__/remarkup/table-with-leading-space.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/table-with-leading-space.txt
@@ -0,0 +1,7 @@
+ cell
+~~~~~~~~~~
+
+cell
+
+~~~~~~~~~~
+| cell |
diff --git a/src/markup/engine/__tests__/remarkup/table-with-long-header.txt b/src/markup/engine/__tests__/remarkup/table-with-long-header.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/table-with-long-header.txt
@@ -0,0 +1,8 @@
+|x|
+||--
+~~~~~~~~~~
+
+x
+
+~~~~~~~~~~
+| x |
diff --git a/src/markup/engine/__tests__/remarkup/table.txt b/src/markup/engine/__tests__/remarkup/table.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/table.txt
@@ -0,0 +1,16 @@
+
+Table Storage
+`differential_diff` InnoDB
+`edge` ?
+
+~~~~~~~~~~
+
+Table Storage
+differential_diff InnoDB
+edge ?
+
+~~~~~~~~~~
+| Table | Storage |
+| ------------------- | ------- |
+| `differential_diff` | InnoDB |
+| `edge` | ? |
diff --git a/src/markup/engine/__tests__/remarkup/tick-block-multi.txt b/src/markup/engine/__tests__/remarkup/tick-block-multi.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/tick-block-multi.txt
@@ -0,0 +1,18 @@
+```code
+
+more code
+
+more code```
+
+~~~~~~~~~~
+code
+
+more code
+
+more code
+~~~~~~~~~~
+ code
+
+ more code
+
+ more code
diff --git a/src/markup/engine/__tests__/remarkup/tick-block.txt b/src/markup/engine/__tests__/remarkup/tick-block.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/tick-block.txt
@@ -0,0 +1,5 @@
+```code```
+~~~~~~~~~~
+code
+~~~~~~~~~~
+ code
diff --git a/src/markup/engine/__tests__/remarkup/toc.txt b/src/markup/engine/__tests__/remarkup/toc.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/toc.txt
@@ -0,0 +1,29 @@
+= [[ http://www.example.com/ | link_name ]] =
+
+== **bold** ==
+
+= http://www.example.com =
+
+~~~~~~~~~~
+
+- link_name
+
+- bold
+
+- http://www.example.com
+
+
+link_name
+
+bold
+
+http://www.example.com
+~~~~~~~~~~
+[[ http://www.example.com/ | link_name ]]
+=========================================
+
+**bold**
+--------
+
+http://www.example.com
+======================
diff --git a/src/markup/engine/__tests__/remarkup/trailing-whitespace-codeblock.txt b/src/markup/engine/__tests__/remarkup/trailing-whitespace-codeblock.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/trailing-whitespace-codeblock.txt
@@ -0,0 +1,39 @@
+ lang=txt
+ code block
+ code block
+
+
+
+
+ code block
+
+
+
+
+ code block
+~~~~~~~~~~
+code block
+code block
+
+
+
+
+code block
+
+
+
+
+code block
+~~~~~~~~~~
+ code block
+ code block
+
+
+
+
+ code block
+
+
+
+
+ code block
diff --git a/src/markup/engine/__tests__/remarkup/underline.txt b/src/markup/engine/__tests__/remarkup/underline.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/underline.txt
@@ -0,0 +1,13 @@
+omg__ wtf_____ bbq___ lol__
+__underlined text__
+__This is a great idea___ die forever please
+__
+/__notunderlined__/ and also /__notunderlined__.c
+~~~~~~~~~~
+omg__ wtf_____ bbq___ lol__
+underlined text
+This is a great idea_ die forever please
+__
+/__notunderlined__/ and also /__notunderlined__.c
+~~~~~~~~~~
+omg__ wtf_____ bbq___ lol__ __underlined text__ __This is a great idea___ die forever please __ /__notunderlined__/ and also /__notunderlined__.c
diff --git a/src/markup/engine/__tests__/remarkup/warning.txt b/src/markup/engine/__tests__/remarkup/warning.txt
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/__tests__/remarkup/warning.txt
@@ -0,0 +1,15 @@
+WARNING: interesting **stuff**
+
+(WARNING) interesting **stuff**
+~~~~~~~~~~
+WARNING: interesting stuff
+
+
+
+interesting stuff
+~~~~~~~~~~
+WARNING: interesting **stuff**
+
+
+
+(WARNING) interesting **stuff**
diff --git a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php
@@ -0,0 +1,77 @@
+\11Z
+ *
+ * Then:
+ *
+ * ...
+ *
+ * If we didn't do this, the italics rule could match the "//" in "http://",
+ * or any other number of processing mistakes could occur, some of which create
+ * security risks.
+ *
+ * This class generates keys, and stores the map of keys to replacement text.
+ */
+final class PhutilRemarkupBlockStorage extends Phobject {
+
+ const MAGIC_BYTE = "\1";
+
+ private $map = array();
+ private $index;
+
+ public function store($text) {
+ $key = self::MAGIC_BYTE.(++$this->index).'Z';
+ $this->map[$key] = $text;
+ return $key;
+ }
+
+ public function restore($corpus, $text_mode = false) {
+ if ($this->map) {
+ if ($text_mode) {
+ $corpus = str_replace(
+ array_reverse(array_keys($this->map)),
+ array_reverse($this->map),
+ $corpus);
+ } else {
+ $corpus = phutil_safe_html(str_replace(
+ array_reverse(array_keys($this->map)),
+ array_map('phutil_escape_html', array_reverse($this->map)),
+ phutil_escape_html($corpus)));
+ }
+ }
+ return $corpus;
+ }
+
+ public function overwrite($key, $new_text) {
+ $this->map[$key] = $new_text;
+ return $this;
+ }
+
+ public function getMap() {
+ return $this->map;
+ }
+
+ public function setMap(array $map) {
+ $this->map = $map;
+ return $this;
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php
@@ -0,0 +1,36 @@
+engine = $engine;
+ return $this;
+ }
+
+ final public function getEngine() {
+ return $this->engine;
+ }
+
+ /**
+ * @return string
+ */
+ abstract public function getInterpreterName();
+
+ abstract public function markupContent($content, array $argv);
+
+ protected function markupError($string) {
+ if ($this->getEngine()->isTextMode()) {
+ return '('.$string.')';
+ } else {
+ return phutil_tag(
+ 'div',
+ array(
+ 'class' => 'remarkup-interpreter-error',
+ ),
+ $string);
+ }
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php
@@ -0,0 +1,170 @@
+addInt($this->getPriority())
+ ->addString(get_class($this));
+ }
+
+ abstract public function markupText($text, $children);
+
+ /**
+ * This will get an array of unparsed lines and return the number of lines
+ * from the first array value that it can parse.
+ *
+ * @param array $lines
+ * @param int $cursor
+ *
+ * @return int
+ */
+ abstract public function getMatchingLineCount(array $lines, $cursor);
+
+ protected function didMarkupText() {
+ return;
+ }
+
+ final public function setEngine(PhutilRemarkupEngine $engine) {
+ $this->engine = $engine;
+ $this->updateRules();
+ return $this;
+ }
+
+ final protected function getEngine() {
+ return $this->engine;
+ }
+
+ public function setMarkupRules(array $rules) {
+ assert_instances_of($rules, 'PhutilRemarkupRule');
+ $this->rules = $rules;
+ $this->updateRules();
+ return $this;
+ }
+
+ private function updateRules() {
+ $engine = $this->getEngine();
+ if ($engine) {
+ $this->rules = msort($this->rules, 'getPriority');
+ foreach ($this->rules as $rule) {
+ $rule->setEngine($engine);
+ }
+ }
+ return $this;
+ }
+
+ final public function getMarkupRules() {
+ return $this->rules;
+ }
+
+ final public function postprocess() {
+ $this->didMarkupText();
+ }
+
+ final protected function applyRules($text) {
+ foreach ($this->getMarkupRules() as $rule) {
+ $text = $rule->apply($text);
+ }
+ return $text;
+ }
+
+ public function supportsChildBlocks() {
+ return false;
+ }
+
+ public function extractChildText($text) {
+ throw new PhutilMethodNotImplementedException();
+ }
+
+ protected function renderRemarkupTable(array $out_rows) {
+ assert_instances_of($out_rows, 'array');
+
+ if ($this->getEngine()->isTextMode()) {
+ $lengths = array();
+ foreach ($out_rows as $r => $row) {
+ foreach ($row['content'] as $c => $cell) {
+ $text = $this->getEngine()->restoreText($cell['content']);
+ $lengths[$c][$r] = phutil_utf8_strlen($text);
+ }
+ }
+ $max_lengths = array_map('max', $lengths);
+
+ $out = array();
+ foreach ($out_rows as $r => $row) {
+ $headings = false;
+ foreach ($row['content'] as $c => $cell) {
+ $length = $max_lengths[$c] - $lengths[$c][$r];
+ $out[] = '| '.$cell['content'].str_repeat(' ', $length).' ';
+ if ($cell['type'] == 'th') {
+ $headings = true;
+ }
+ }
+ $out[] = "|\n";
+
+ if ($headings) {
+ foreach ($row['content'] as $c => $cell) {
+ $char = ($cell['type'] == 'th' ? '-' : ' ');
+ $out[] = '| '.str_repeat($char, $max_lengths[$c]).' ';
+ }
+ $out[] = "|\n";
+ }
+ }
+
+ return rtrim(implode('', $out), "\n");
+ }
+
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $table_attributes = array(
+ 'style' => 'border-collapse: separate;
+ border-spacing: 1px;
+ background: #d3d3d3;
+ margin: 12px 0;',
+ );
+ $cell_attributes = array(
+ 'style' => 'background: #ffffff;
+ padding: 3px 6px;',
+ );
+ } else {
+ $table_attributes = array(
+ 'class' => 'remarkup-table',
+ );
+ $cell_attributes = array();
+ }
+
+ $out = array();
+ $out[] = "\n";
+ foreach ($out_rows as $row) {
+ $cells = array();
+ foreach ($row['content'] as $cell) {
+ $cells[] = phutil_tag(
+ $cell['type'],
+ $cell_attributes,
+ $cell['content']);
+ }
+ $out[] = phutil_tag($row['type'], array(), $cells);
+ $out[] = "\n";
+ }
+
+ $table = phutil_tag('table', $table_attributes, $out);
+ return phutil_tag_div('remarkup-table-wrap', $table);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php
@@ -0,0 +1,252 @@
+ false,
+ 'lang' => null,
+ 'name' => null,
+ 'lines' => null,
+ );
+
+ $parser = new PhutilSimpleOptions();
+ $custom = $parser->parse(head($lines));
+ if ($custom) {
+ $valid = true;
+ foreach ($custom as $key => $value) {
+ if (!array_key_exists($key, $options)) {
+ $valid = false;
+ break;
+ }
+ }
+ if ($valid) {
+ array_shift($lines);
+ $options = $custom + $options;
+ }
+ }
+
+ // Normalize the text back to a 0-level indent.
+ $min_indent = 80;
+ foreach ($lines as $line) {
+ for ($ii = 0; $ii < strlen($line); $ii++) {
+ if ($line[$ii] != ' ') {
+ $min_indent = min($ii, $min_indent);
+ break;
+ }
+ }
+ }
+
+ $text = implode("\n", $lines);
+ if ($min_indent) {
+ $indent_string = str_repeat(' ', $min_indent);
+ $text = preg_replace('/^'.$indent_string.'/m', '', $text);
+ }
+
+ if ($this->getEngine()->isTextMode()) {
+ $out = array();
+
+ $header = array();
+ if ($options['counterexample']) {
+ $header[] = 'counterexample';
+ }
+ if ($options['name'] != '') {
+ $header[] = 'name='.$options['name'];
+ }
+ if ($header) {
+ $out[] = implode(', ', $header);
+ }
+
+ $text = preg_replace('/^/m', ' ', $text);
+ $out[] = $text;
+
+ return implode("\n", $out);
+ }
+
+ if (empty($options['lang'])) {
+ // If the user hasn't specified "lang=..." explicitly, try to guess the
+ // language. If we fail, fall back to configured defaults.
+ $lang = PhutilLanguageGuesser::guessLanguage($text);
+ if (!$lang) {
+ $lang = nonempty(
+ $this->getEngine()->getConfig('phutil.codeblock.language-default'),
+ 'text');
+ }
+ $options['lang'] = $lang;
+ }
+
+ $code_body = $this->highlightSource($text, $options);
+
+ $name_header = null;
+ $block_style = null;
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $map = $this->getEngine()->getConfig('phutil.codeblock.style-map');
+
+ if ($map) {
+ $raw_body = id(new PhutilPygmentizeParser())
+ ->setMap($map)
+ ->parse((string)$code_body);
+ $code_body = phutil_safe_html($raw_body);
+ }
+
+ $style_rules = array(
+ 'padding: 6px 12px;',
+ 'font-size: 13px;',
+ 'font-weight: bold;',
+ 'display: inline-block;',
+ 'border-top-left-radius: 3px;',
+ 'border-top-right-radius: 3px;',
+ 'color: rgba(0,0,0,.75);',
+ );
+
+ if ($options['counterexample']) {
+ $style_rules[] = 'background: #f7e6e6';
+ } else {
+ $style_rules[] = 'background: rgba(71, 87, 120, 0.08);';
+ }
+
+ $header_attributes = array(
+ 'style' => implode(' ', $style_rules),
+ );
+
+ $block_style = 'margin: 12px 0;';
+ } else {
+ $header_attributes = array(
+ 'class' => 'remarkup-code-header',
+ );
+ }
+
+ if ($options['name']) {
+ $name_header = phutil_tag(
+ 'div',
+ $header_attributes,
+ $options['name']);
+ }
+
+ $class = 'remarkup-code-block';
+ if ($options['counterexample']) {
+ $class = 'remarkup-code-block code-block-counterexample';
+ }
+
+ $attributes = array(
+ 'class' => $class,
+ 'style' => $block_style,
+ 'data-code-lang' => $options['lang'],
+ 'data-sigil' => 'remarkup-code-block',
+ );
+
+ return phutil_tag(
+ 'div',
+ $attributes,
+ array($name_header, $code_body));
+ }
+
+ private function highlightSource($text, array $options) {
+ if ($options['counterexample']) {
+ $aux_class = ' remarkup-counterexample';
+ } else {
+ $aux_class = null;
+ }
+
+ $aux_style = null;
+
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $aux_style = array(
+ 'font: 11px/15px "Menlo", "Consolas", "Monaco", monospace;',
+ 'padding: 12px;',
+ 'margin: 0;',
+ );
+
+ if ($options['counterexample']) {
+ $aux_style[] = 'background: #f7e6e6;';
+ } else {
+ $aux_style[] = 'background: rgba(71, 87, 120, 0.08);';
+ }
+
+ $aux_style = implode(' ', $aux_style);
+ }
+
+ if ($options['lines']) {
+ // Put a minimum size on this because the scrollbar is otherwise
+ // unusable.
+ $height = max(6, (int)$options['lines']);
+ $aux_style = $aux_style
+ .' '
+ .'max-height: '
+ .(2 * $height)
+ .'em; overflow: auto;';
+ }
+
+ $engine = $this->getEngine()->getConfig('syntax-highlighter.engine');
+ if (!$engine) {
+ $engine = 'PhutilDefaultSyntaxHighlighterEngine';
+ }
+ $engine = newv($engine, array());
+ $engine->setConfig(
+ 'pygments.enabled',
+ $this->getEngine()->getConfig('pygments.enabled'));
+ return phutil_tag(
+ 'pre',
+ array(
+ 'class' => 'remarkup-code'.$aux_class,
+ 'style' => $aux_style,
+ ),
+ PhutilSafeHTML::applyFunction(
+ 'rtrim',
+ $engine->highlightSource($options['lang'], $text)));
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupDefaultBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupDefaultBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupDefaultBlockRule.php
@@ -0,0 +1,44 @@
+getEngine();
+
+ $text = trim($text);
+ $text = $this->applyRules($text);
+
+ if ($engine->isTextMode()) {
+ if (!$this->getEngine()->getConfig('preserve-linebreaks')) {
+ $text = preg_replace('/ *\n */', ' ', $text);
+ }
+ return $text;
+ }
+
+ if ($engine->getConfig('preserve-linebreaks')) {
+ $text = phutil_escape_html_newlines($text);
+ }
+
+ if (!strlen($text)) {
+ return null;
+ }
+
+ $default_attributes = $engine->getConfig('default.p.attributes');
+ if ($default_attributes) {
+ $attributes = $default_attributes;
+ } else {
+ $attributes = array();
+ }
+
+ return phutil_tag('p', $attributes, $text);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupHeaderBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupHeaderBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupHeaderBlockRule.php
@@ -0,0 +1,162 @@
+ 1) {
+ $level = ($lines[1][0] == '=') ? 1 : 2;
+ $text = trim($lines[0]);
+ } else {
+ $level = 0;
+ for ($ii = 0; $ii < min(5, strlen($text)); $ii++) {
+ if ($text[$ii] == '=' || $text[$ii] == '#') {
+ ++$level;
+ } else {
+ break;
+ }
+ }
+ $text = trim($text, ' =#');
+ }
+
+ $engine = $this->getEngine();
+
+ if ($engine->isTextMode()) {
+ $char = ($level == 1) ? '=' : '-';
+ return $text."\n".str_repeat($char, phutil_utf8_strlen($text));
+ }
+
+ $use_anchors = $engine->getConfig('header.generate-toc');
+
+ $anchor = null;
+ if ($use_anchors) {
+ $anchor = $this->generateAnchor($level, $text);
+ }
+
+ $text = phutil_tag(
+ 'h'.($level + 1),
+ array(
+ 'class' => 'remarkup-header',
+ ),
+ array($anchor, $this->applyRules($text)));
+
+ return $text;
+ }
+
+ private function generateAnchor($level, $text) {
+ $anchor = strtolower($text);
+ $anchor = preg_replace('/[^a-z0-9]/', '-', $anchor);
+ $anchor = preg_replace('/--+/', '-', $anchor);
+ $anchor = trim($anchor, '-');
+ $anchor = substr($anchor, 0, 24);
+ $anchor = trim($anchor, '-');
+ $base = $anchor;
+
+ $key = self::KEY_HEADER_TOC;
+ $engine = $this->getEngine();
+ $anchors = $engine->getTextMetadata($key, array());
+
+ $suffix = 1;
+ while (!strlen($anchor) || isset($anchors[$anchor])) {
+ $anchor = $base.'-'.$suffix;
+ $anchor = trim($anchor, '-');
+ $suffix++;
+ }
+
+ // When a document contains a link inside a header, like this:
+ //
+ // = [[ http://wwww.example.com/ | example ]] =
+ //
+ // ...we want to generate a TOC entry with just "example", but link the
+ // header itself. We push the 'toc' state so all the link rules generate
+ // just names.
+ $engine->pushState('toc');
+ $text = $this->applyRules($text);
+ $text = $engine->restoreText($text);
+
+ $anchors[$anchor] = array($level, $text);
+ $engine->popState('toc');
+
+ $engine->setTextMetadata($key, $anchors);
+
+ return phutil_tag(
+ 'a',
+ array(
+ 'name' => $anchor,
+ ),
+ '');
+ }
+
+ public static function renderTableOfContents(PhutilRemarkupEngine $engine) {
+
+ $key = self::KEY_HEADER_TOC;
+ $anchors = $engine->getTextMetadata($key, array());
+
+ if (count($anchors) < 2) {
+ // Don't generate a TOC if there are no headers, or if there's only
+ // one header (since such a TOC would be silly).
+ return null;
+ }
+
+ $depth = 0;
+ $toc = array();
+ foreach ($anchors as $anchor => $info) {
+ list($level, $name) = $info;
+
+ while ($depth < $level) {
+ $toc[] = hsprintf('');
+ $depth++;
+ }
+ while ($depth > $level) {
+ $toc[] = hsprintf('
');
+ $depth--;
+ }
+
+ $toc[] = phutil_tag(
+ 'li',
+ array(),
+ phutil_tag(
+ 'a',
+ array(
+ 'href' => '#'.$anchor,
+ ),
+ $name));
+ }
+ while ($depth > 0) {
+ $toc[] = hsprintf('');
+ $depth--;
+ }
+
+ return phutil_implode_html("\n", $toc);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupHorizontalRuleBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupHorizontalRuleBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupHorizontalRuleBlockRule.php
@@ -0,0 +1,37 @@
+getEngine()->isTextMode()) {
+ return rtrim($text);
+ }
+
+ return phutil_tag('hr', array('class' => 'remarkup-hr'));
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupInlineBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupInlineBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupInlineBlockRule.php
@@ -0,0 +1,13 @@
+applyRules($text);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupInterpreterBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupInterpreterBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupInterpreterBlockRule.php
@@ -0,0 +1,89 @@
+parse($matches[2]);
+ }
+
+ $interpreters = id(new PhutilClassMapQuery())
+ ->setAncestorClass('PhutilRemarkupBlockInterpreter')
+ ->execute();
+
+ foreach ($interpreters as $interpreter) {
+ $interpreter->setEngine($this->getEngine());
+ }
+
+ $lines[$first_key] = preg_replace(
+ self::START_BLOCK_PATTERN,
+ '',
+ $lines[$first_key]);
+ $lines[$last_key] = preg_replace(
+ self::END_BLOCK_PATTERN,
+ '',
+ $lines[$last_key]);
+
+ if (trim($lines[$first_key]) === '') {
+ unset($lines[$first_key]);
+ }
+ if (trim($lines[$last_key]) === '') {
+ unset($lines[$last_key]);
+ }
+
+ $content = implode("\n", $lines);
+
+ $interpreters = mpull($interpreters, null, 'getInterpreterName');
+
+ if (isset($interpreters[$matches[1]])) {
+ return $interpreters[$matches[1]]->markupContent($content, $argv);
+ }
+
+ $message = pht('No interpreter found: %s', $matches[1]);
+
+ if ($this->getEngine()->isTextMode()) {
+ return '('.$message.')';
+ }
+
+ return phutil_tag(
+ 'div',
+ array(
+ 'class' => 'remarkup-interpreter-error',
+ ),
+ $message);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupListBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupListBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupListBlockRule.php
@@ -0,0 +1,567 @@
+ $line) {
+ $matches = null;
+ if (preg_match($regex, $line)) {
+ $regex = self::CONT_BLOCK_PATTERN;
+ if (preg_match('/^(\s+)/', $line, $matches)) {
+ $space = strlen($matches[1]);
+ } else {
+ $space = 0;
+ }
+ $min_space = min($min_space, $space);
+ }
+ }
+
+ $regex = self::START_BLOCK_PATTERN;
+ if ($min_space) {
+ foreach ($lines as $key => $line) {
+ if (preg_match($regex, $line)) {
+ $regex = self::CONT_BLOCK_PATTERN;
+ $lines[$key] = substr($line, $min_space);
+ }
+ }
+ }
+
+
+ // The input text may have linewraps in it, like this:
+ //
+ // - derp derp derp derp
+ // derp derp derp derp
+ // - blarp blarp blarp blarp
+ //
+ // Group text lines together into list items, stored in $items. So the
+ // result in the above case will be:
+ //
+ // array(
+ // array(
+ // "- derp derp derp derp",
+ // " derp derp derp derp",
+ // ),
+ // array(
+ // "- blarp blarp blarp blarp",
+ // ),
+ // );
+
+ $item = array();
+ $starts_at = null;
+ $regex = self::START_BLOCK_PATTERN;
+ foreach ($lines as $line) {
+ $match = null;
+ if (preg_match($regex, $line, $match)) {
+ if (!$starts_at && !empty($match[1])) {
+ $starts_at = $match[1];
+ }
+ $regex = self::CONT_BLOCK_PATTERN;
+ if ($item) {
+ $items[] = $item;
+ $item = array();
+ }
+ }
+ $item[] = $line;
+ }
+ if ($item) {
+ $items[] = $item;
+ }
+ if (!$starts_at) {
+ $starts_at = 1;
+ }
+
+
+ // Process each item to normalize the text, remove line wrapping, and
+ // determine its depth (indentation level) and style (ordered vs unordered).
+ //
+ // We preserve consecutive linebreaks and interpret them as paragraph
+ // breaks.
+ //
+ // Given the above example, the processed array will look like:
+ //
+ // array(
+ // array(
+ // 'text' => 'derp derp derp derp derp derp derp derp',
+ // 'depth' => 0,
+ // 'style' => '-',
+ // ),
+ // array(
+ // 'text' => 'blarp blarp blarp blarp',
+ // 'depth' => 0,
+ // 'style' => '-',
+ // ),
+ // );
+
+ $has_marks = false;
+ foreach ($items as $key => $item) {
+ // Trim space around newlines, to strip trailing whitespace and formatting
+ // indentation.
+ $item = preg_replace('/ *(\n+) */', '\1', implode("\n", $item));
+
+ // Replace single newlines with a space. Preserve multiple newlines as
+ // paragraph breaks.
+ $item = preg_replace('/(? $text,
+ 'depth' => $depth,
+ 'style' => $style,
+ 'mark' => $mark,
+ );
+ }
+ $items = array_values($items);
+
+
+ // Users can create a sub-list by indenting any deeper amount than the
+ // previous list, so these are both valid:
+ //
+ // - a
+ // - b
+ //
+ // - a
+ // - b
+ //
+ // In the former case, we'll have depths (0, 2). In the latter case, depths
+ // (0, 4). We don't actually care about how many spaces there are, only
+ // how many list indentation levels (that is, we want to map both of
+ // those cases to (0, 1), indicating "outermost list" and "first sublist").
+ //
+ // This is made more complicated because lists at two different indentation
+ // levels might be at the same list level:
+ //
+ // - a
+ // - b
+ // - c
+ // - d
+ //
+ // Here, 'b' and 'd' are at the same list level (2) but different indent
+ // levels (2, 4).
+ //
+ // Users can also create "staircases" like this:
+ //
+ // - a
+ // - b
+ // # c
+ //
+ // While this is silly, we'd like to render it as faithfully as possible.
+ //
+ // In order to do this, we convert the list of nodes into a tree,
+ // normalizing indentation levels and inserting dummy nodes as necessary to
+ // make the tree well-formed. See additional notes at buildTree().
+ //
+ // In the case above, the result is a tree like this:
+ //
+ // -
+ // -
+ // - a
+ // - b
+ // # c
+
+ $l = 0;
+ $r = count($items);
+ $tree = $this->buildTree($items, $l, $r, $cur_level = 0);
+
+
+ // We may need to open a list on a node, but they do not have
+ // list style information yet. We need to propagate list style information
+ // backward through the tree. In the above example, the tree now looks
+ // like this:
+ //
+ // -
+ // -
+ // - a
+ // - b
+ // # c
+
+ $this->adjustTreeStyleInformation($tree);
+
+ // Finally, we have enough information to render the tree.
+
+ $out = $this->renderTree($tree, 0, $has_marks, $starts_at);
+
+ if ($this->getEngine()->isTextMode()) {
+ $out = implode('', $out);
+ $out = rtrim($out, "\n");
+ $out = preg_replace('/ +$/m', '', $out);
+ return $out;
+ }
+
+ return phutil_implode_html('', $out);
+ }
+
+ /**
+ * See additional notes in @{method:markupText}.
+ */
+ private function buildTree(array $items, $l, $r, $cur_level) {
+ if ($l == $r) {
+ return array();
+ }
+
+ if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) {
+ // This algorithm is recursive and we don't need you blowing the stack
+ // with your oh-so-clever 50,000-item-deep list. Cap indentation levels
+ // at a reasonable number and just shove everything deeper up to this
+ // level.
+ $nodes = array();
+ for ($ii = $l; $ii < $r; $ii++) {
+ $nodes[] = array(
+ 'level' => $cur_level,
+ 'items' => array(),
+ ) + $items[$ii];
+ }
+ return $nodes;
+ }
+
+ $min = $l;
+ for ($ii = $r - 1; $ii >= $l; $ii--) {
+ if ($items[$ii]['depth'] <= $items[$min]['depth']) {
+ $min = $ii;
+ }
+ }
+
+ $min_depth = $items[$min]['depth'];
+
+ $nodes = array();
+ if ($min != $l) {
+ $nodes[] = array(
+ 'text' => null,
+ 'level' => $cur_level,
+ 'style' => null,
+ 'mark' => null,
+ 'items' => $this->buildTree($items, $l, $min, $cur_level + 1),
+ );
+ }
+
+ $last = $min;
+ for ($ii = $last + 1; $ii < $r; $ii++) {
+ if ($items[$ii]['depth'] == $min_depth) {
+ $nodes[] = array(
+ 'level' => $cur_level,
+ 'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1),
+ ) + $items[$last];
+ $last = $ii;
+ }
+ }
+ $nodes[] = array(
+ 'level' => $cur_level,
+ 'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1),
+ ) + $items[$last];
+
+ return $nodes;
+ }
+
+
+ /**
+ * See additional notes in @{method:markupText}.
+ */
+ private function adjustTreeStyleInformation(array &$tree) {
+ // The effect here is just to walk backward through the nodes at this level
+ // and apply the first style in the list to any empty nodes we inserted
+ // before it. As we go, also recurse down the tree.
+
+ $style = '-';
+ for ($ii = count($tree) - 1; $ii >= 0; $ii--) {
+ if ($tree[$ii]['style'] !== null) {
+ // This is the earliest node we've seen with style, so set the
+ // style to its style.
+ $style = $tree[$ii]['style'];
+ } else {
+ // This node has no style, so apply the current style.
+ $tree[$ii]['style'] = $style;
+ }
+ if ($tree[$ii]['items']) {
+ $this->adjustTreeStyleInformation($tree[$ii]['items']);
+ }
+ }
+ }
+
+
+ /**
+ * See additional notes in @{method:markupText}.
+ */
+ private function renderTree(
+ array $tree,
+ $level,
+ $has_marks,
+ $starts_at = 1) {
+
+ $style = idx(head($tree), 'style');
+
+ $out = array();
+
+ if (!$this->getEngine()->isTextMode()) {
+ switch ($style) {
+ case '#':
+ $tag = 'ol';
+ break;
+ case '-':
+ $tag = 'ul';
+ break;
+ }
+
+ $start_attr = null;
+ if (ctype_digit($starts_at) && $starts_at > 1) {
+ $start_attr = hsprintf(' start="%d"', $starts_at);
+ }
+
+ if ($has_marks) {
+ $out[] = hsprintf(
+ '<%s class="remarkup-list remarkup-list-with-checkmarks"%s>',
+ $tag,
+ $start_attr);
+ } else {
+ $out[] = hsprintf(
+ '<%s class="remarkup-list"%s>',
+ $tag,
+ $start_attr);
+ }
+
+ $out[] = "\n";
+ }
+
+ $number = $starts_at;
+ foreach ($tree as $item) {
+ if ($this->getEngine()->isTextMode()) {
+ if ($item['text'] === null) {
+ // Don't render anything.
+ } else {
+ $indent = str_repeat(' ', 2 * $level);
+ $out[] = $indent;
+ if ($item['mark'] !== null) {
+ if ($item['mark']) {
+ $out[] = '[X] ';
+ } else {
+ $out[] = '[ ] ';
+ }
+ } else {
+ switch ($style) {
+ case '#':
+ $out[] = $number.'. ';
+ $number++;
+ break;
+ case '-':
+ $out[] = '- ';
+ break;
+ }
+ }
+
+ $parts = preg_split('/\n{2,}/', $item['text']);
+ foreach ($parts as $key => $part) {
+ if ($key != 0) {
+ $out[] = "\n\n ".$indent;
+ }
+ $out[] = $this->applyRules($part);
+ }
+ $out[] = "\n";
+ }
+ } else {
+ if ($item['text'] === null) {
+ $out[] = hsprintf('');
+ } else {
+ if ($item['mark'] !== null) {
+ if ($item['mark'] == true) {
+ $out[] = hsprintf(
+ ' ');
+ } else {
+ $out[] = hsprintf(
+ ' ');
+ }
+ $out[] = phutil_tag(
+ 'input',
+ array(
+ 'type' => 'checkbox',
+ 'checked' => ($item['mark'] ? 'checked' : null),
+ 'disabled' => 'disabled',
+ ));
+ $out[] = ' ';
+ } else {
+ $out[] = hsprintf(' ');
+ }
+
+ $parts = preg_split('/\n{2,}/', $item['text']);
+ foreach ($parts as $key => $part) {
+ if ($key != 0) {
+ $out[] = array(
+ "\n",
+ phutil_tag('br'),
+ phutil_tag('br'),
+ "\n",
+ );
+ }
+ $out[] = $this->applyRules($part);
+ }
+ }
+ }
+
+ if ($item['items']) {
+ $subitems = $this->renderTree($item['items'], $level + 1, $has_marks);
+ foreach ($subitems as $i) {
+ $out[] = $i;
+ }
+ }
+ if (!$this->getEngine()->isTextMode()) {
+ $out[] = hsprintf(" \n");
+ }
+ }
+
+ if (!$this->getEngine()->isTextMode()) {
+ switch ($style) {
+ case '#':
+ $out[] = hsprintf('');
+ break;
+ case '-':
+ $out[] = hsprintf('');
+ break;
+ }
+ }
+
+ return $out;
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupLiteralBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupLiteralBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupLiteralBlockRule.php
@@ -0,0 +1,93 @@
+ $line) {
+ $line = preg_replace('/^\s*%%%/', '', $line);
+ $line = preg_replace('/%%%(\s*)\z/', '\1', $line);
+ $text[$key] = $line;
+ }
+
+ if ($this->getEngine()->isTextMode()) {
+ return implode('', $text);
+ }
+
+ return phutil_tag(
+ 'p',
+ array(
+ 'class' => 'remarkup-literal',
+ ),
+ phutil_implode_html(phutil_tag('br', array()), $text));
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupNoteBlockRule.php
@@ -0,0 +1,121 @@
+getRegEx(), $lines[$cursor])) {
+ $num_lines++;
+ $cursor++;
+
+ while (isset($lines[$cursor])) {
+ if (trim($lines[$cursor])) {
+ $num_lines++;
+ $cursor++;
+ continue;
+ }
+ break;
+ }
+ }
+
+ return $num_lines;
+ }
+
+ public function markupText($text, $children) {
+ $matches = array();
+ preg_match($this->getRegEx(), $text, $matches);
+
+ if (idx($matches, 'showword')) {
+ $word = $matches['showword'];
+ $show = true;
+ } else {
+ $word = $matches['hideword'];
+ $show = false;
+ }
+
+ $class_suffix = phutil_utf8_strtolower($word);
+
+ // This is the "(IMPORTANT)" or "NOTE:" part.
+ $word_part = rtrim(substr($text, 0, strlen($matches[0])));
+
+ // This is the actual text.
+ $text_part = substr($text, strlen($matches[0]));
+ $text_part = $this->applyRules(rtrim($text_part));
+
+ $text_mode = $this->getEngine()->isTextMode();
+ $html_mail_mode = $this->getEngine()->isHTMLMailMode();
+ if ($text_mode) {
+ return $word_part.' '.$text_part;
+ }
+
+ if ($show) {
+ $content = array(
+ phutil_tag(
+ 'span',
+ array(
+ 'class' => 'remarkup-note-word',
+ ),
+ $word_part),
+ ' ',
+ $text_part,
+ );
+ } else {
+ $content = $text_part;
+ }
+
+ if ($html_mail_mode) {
+ if ($class_suffix == 'important') {
+ $attributes = array(
+ 'style' => 'margin: 16px 0;
+ padding: 12px;
+ border-left: 3px solid #c0392b;
+ background: #f4dddb;',
+ );
+ } else if ($class_suffix == 'note') {
+ $attributes = array(
+ 'style' => 'margin: 16px 0;
+ padding: 12px;
+ border-left: 3px solid #2980b9;
+ background: #daeaf3;',
+ );
+ } else if ($class_suffix == 'warning') {
+ $attributes = array(
+ 'style' => 'margin: 16px 0;
+ padding: 12px;
+ border-left: 3px solid #f1c40f;
+ background: #fdf5d4;',
+ );
+ }
+ } else {
+ $attributes = array(
+ 'class' => 'remarkup-'.$class_suffix,
+ );
+ }
+
+ return phutil_tag(
+ 'div',
+ $attributes,
+ $content);
+ }
+
+ private function getRegEx() {
+ $words = array(
+ 'NOTE',
+ 'IMPORTANT',
+ 'WARNING',
+ );
+
+ foreach ($words as $k => $word) {
+ $words[$k] = preg_quote($word, '/');
+ }
+ $words = implode('|', $words);
+
+ return
+ '/^(?:'.
+ '(?:\((?P'.$words.')\))'.
+ '|'.
+ '(?:(?P'.$words.'):))\s*'.
+ '/';
+ }
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupQuotesBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupQuotesBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupQuotesBlockRule.php
@@ -0,0 +1,65 @@
+/', $lines[$pos])) {
+ do {
+ ++$pos;
+ } while (isset($lines[$pos]) && preg_match('/^>/', $lines[$pos]));
+ }
+
+ return ($pos - $cursor);
+ }
+
+ public function supportsChildBlocks() {
+ return true;
+ }
+
+ public function extractChildText($text) {
+ $text = phutil_split_lines($text, true);
+ foreach ($text as $key => $line) {
+ $text[$key] = substr($line, 1);
+ }
+
+ return array('', implode('', $text));
+ }
+
+ public function markupText($text, $children) {
+ if ($this->getEngine()->isTextMode()) {
+ $lines = rtrim($children, "\n");
+ $lines = phutil_split_lines($lines);
+ foreach ($lines as $key => $line) {
+ if (isset($line[0]) && ($line[0] == '>')) {
+ $line = '>'.$line;
+ } else {
+ $line = '> '.$line;
+ }
+ $lines[$key] = $line;
+ }
+ return implode('', $lines);
+ }
+
+ $attributes = array();
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $style = array(
+ 'border-left: 3px solid #a7b5bf;',
+ 'color: #464c5c;',
+ 'font-style: italic;',
+ 'margin: 4px 0 12px 0;',
+ 'padding: 4px 12px;',
+ 'background-color: #f8f9fc;',
+ );
+
+ $attributes['style'] = implode(' ', $style);
+ }
+
+ return phutil_tag(
+ 'blockquote',
+ $attributes,
+ $children);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupReplyBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupReplyBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupReplyBlockRule.php
@@ -0,0 +1,117 @@
+>!/', $lines[$pos])) {
+ do {
+ ++$pos;
+ } while (isset($lines[$pos]) && preg_match('/^>/', $lines[$pos]));
+ }
+
+ return ($pos - $cursor);
+ }
+
+ public function supportsChildBlocks() {
+ return true;
+ }
+
+ public function extractChildText($text) {
+ $text = phutil_split_lines($text, true);
+
+ $head = array();
+ $body = array();
+
+ $head = substr(reset($text), 3);
+
+ $body = array_slice($text, 1);
+
+ // Remove the carets.
+ foreach ($body as $key => $line) {
+ $body[$key] = substr($line, 1);
+ }
+
+ // Strip leading empty lines.
+ foreach ($body as $key => $line) {
+ if (strlen(trim($line))) {
+ break;
+ }
+ unset($body[$key]);
+ }
+
+ return array(trim($head), implode('', $body));
+ }
+
+ public function markupText($text, $children) {
+ $text = $this->applyRules($text);
+
+ if ($this->getEngine()->isTextMode()) {
+ $children = phutil_split_lines($children, true);
+ foreach ($children as $key => $child) {
+ if (strlen(trim($child))) {
+ $children[$key] = '> '.$child;
+ } else {
+ $children[$key] = '>'.$child;
+ }
+ }
+ $children = implode('', $children);
+
+ return $text."\n\n".$children;
+ }
+
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $block_attributes = array(
+ 'style' => 'border-left: 3px solid #8C98B8;
+ color: #6B748C;
+ font-style: italic;
+ margin: 4px 0 12px 0;
+ padding: 8px 12px;
+ background-color: #F8F9FC;',
+ );
+ $head_attributes = array(
+ 'style' => 'font-style: normal;
+ padding-bottom: 4px;',
+ );
+ $reply_attributes = array(
+ 'style' => 'margin: 0;
+ padding: 0;
+ border: 0;
+ color: rgb(107, 116, 140);',
+ );
+ } else {
+ $block_attributes = array(
+ 'class' => 'remarkup-reply-block',
+ );
+ $head_attributes = array(
+ 'class' => 'remarkup-reply-head',
+ );
+ $reply_attributes = array(
+ 'class' => 'remarkup-reply-body',
+ );
+ }
+
+ return phutil_tag(
+ 'blockquote',
+ $block_attributes,
+ array(
+ "\n",
+ phutil_tag(
+ 'div',
+ $head_attributes,
+ $text),
+ "\n",
+ phutil_tag(
+ 'div',
+ $reply_attributes,
+ $children),
+ "\n",
+ ));
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupSimpleTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupSimpleTableBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupSimpleTableBlockRule.php
@@ -0,0 +1,96 @@
+ cells
+ // instead of cells.
+
+ // If it has other types of cells, it's always a content row.
+
+ // If it has only empty cells, it's an empty row.
+
+ if (strlen($cell)) {
+ if (preg_match('/^--+\z/', $cell)) {
+ $any_header = true;
+ } else {
+ $any_content = true;
+ }
+ }
+
+ $cells[] = array('type' => 'td', 'content' => $this->applyRules($cell));
+ }
+
+ $is_header = ($any_header && !$any_content);
+
+ if (!$is_header) {
+ $rows[] = array('type' => 'tr', 'content' => $cells);
+ } else if ($rows) {
+ // Mark previous row with headings.
+ foreach ($cells as $i => $cell) {
+ if ($cell['content']) {
+ $last_key = last_key($rows);
+ if (!isset($rows[$last_key]['content'][$i])) {
+ // If this row has more cells than the previous row, there may
+ // not be a cell above this one to turn into a .
+ continue;
+ }
+
+ $rows[$last_key]['content'][$i]['type'] = 'th';
+ }
+ }
+ }
+ }
+
+ if (!$rows) {
+ return $this->applyRules($text);
+ }
+
+ return $this->renderRemarkupTable($rows);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTableBlockRule.php
@@ -0,0 +1,107 @@
+/i', $lines[$cursor])) {
+ $num_lines++;
+ $cursor++;
+
+ while (isset($lines[$cursor])) {
+ $num_lines++;
+ if (preg_match('@$@i', $lines[$cursor])) {
+ break;
+ }
+ $cursor++;
+ }
+ }
+
+ return $num_lines;
+ }
+
+ public function markupText($text, $children) {
+ $matches = array();
+
+ if (!preg_match('@^\s*(.*)
$@si', $text, $matches)) {
+ return $this->fail(
+ $text,
+ pht('Bad table (expected %s)', '...
'));
+ }
+
+ $body = $matches[1];
+
+ $row_fragment = '(?:\s*(.*) \s*)';
+ $cell_fragment = '(?:\s*<(td|th)>(.*)(?:td|th)>\s*)';
+
+ // Test that the body contains only valid rows.
+ if (!preg_match('@^'.$row_fragment.'+$@Usi', $body)) {
+ return $this->fail(
+ $body,
+ pht('Bad table syntax (expected rows %s)', '... '));
+ }
+
+ // Capture the rows.
+ $row_regex = '@'.$row_fragment.'@Usi';
+ if (!preg_match_all($row_regex, $body, $matches, PREG_SET_ORDER)) {
+ throw new Exception(
+ pht('Bug in Remarkup tables, parsing fails for input: %s', $text));
+ }
+
+ $out_rows = array();
+
+ $rows = $matches;
+ foreach ($rows as $row) {
+ $content = $row[1];
+
+ // Test that the row contains only valid cells.
+ if (!preg_match('@^'.$cell_fragment.'+$@Usi', $content)) {
+ return $this->fail(
+ $content,
+ pht('Bad table syntax (expected cells %s)', '... '));
+ }
+
+ // Capture the cells.
+ $cell_regex = '@'.$cell_fragment.'@Usi';
+ if (!preg_match_all($cell_regex, $content, $matches, PREG_SET_ORDER)) {
+ throw new Exception(
+ pht('Bug in Remarkup tables, parsing fails for input: %s', $text));
+ }
+
+ $out_cells = array();
+ foreach ($matches as $cell) {
+ $cell_type = $cell[1];
+ $cell_content = $cell[2];
+
+ $out_cells[] = array(
+ 'type' => $cell_type,
+ 'content' => $this->applyRules($cell_content),
+ );
+ }
+
+ $out_rows[] = array(
+ 'type' => 'tr',
+ 'content' => $out_cells,
+ );
+ }
+
+ return $this->renderRemarkupTable($out_rows);
+ }
+
+ private function fail($near, $message) {
+ $message = sprintf(
+ '%s near: %s',
+ $message,
+ id(new PhutilUTF8StringTruncator())
+ ->setMaximumGlyphs(32000)
+ ->truncateString($near));
+
+ if ($this->getEngine()->isTextMode()) {
+ return '('.$message.')';
+ }
+
+ return hsprintf('%s', $message);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupTestInterpreterRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTestInterpreterRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupTestInterpreterRule.php
@@ -0,0 +1,17 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return $this->replaceHTML(
+ '@\\*\\*(.+?)\\*\\*@s',
+ array($this, 'applyCallback'),
+ $text);
+ }
+
+ protected function applyCallback(array $matches) {
+ return hsprintf('%s', $matches[1]);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupDelRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDelRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDelRule.php
@@ -0,0 +1,24 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return $this->replaceHTML(
+ '@(?%s', $matches[1]);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php
@@ -0,0 +1,175 @@
+getEngine();
+
+ $is_anchor = false;
+ if (strncmp($link, '/', 1) == 0) {
+ $base = $engine->getConfig('uri.base');
+ $base = rtrim($base, '/');
+ $link = $base.$link;
+ } else if (strncmp($link, '#', 1) == 0) {
+ $here = $engine->getConfig('uri.here');
+ $link = $here.$link;
+
+ $is_anchor = true;
+ }
+
+ if ($engine->isTextMode()) {
+ // If present, strip off "mailto:" or "tel:".
+ $link = preg_replace('/^(?:mailto|tel):/', '', $link);
+
+ if (!strlen($name)) {
+ return $link;
+ }
+
+ return $name.' <'.$link.'>';
+ }
+
+ if (!strlen($name)) {
+ $name = $link;
+ $name = preg_replace('/^(?:mailto|tel):/', '', $name);
+ }
+
+ if ($engine->getState('toc')) {
+ return $name;
+ }
+
+ $same_window = $engine->getConfig('uri.same-window', false);
+ if ($same_window) {
+ $target = null;
+ } else {
+ $target = '_blank';
+ }
+
+ // For anchors on the same page, always stay here.
+ if ($is_anchor) {
+ $target = null;
+ }
+
+ return phutil_tag(
+ 'a',
+ array(
+ 'href' => $link,
+ 'class' => 'remarkup-link',
+ 'target' => $target,
+ 'rel' => 'noreferrer',
+ ),
+ $name);
+ }
+
+ public function markupAlternateLink(array $matches) {
+ $uri = trim($matches[2]);
+
+ // NOTE: We apply some special rules to avoid false positives here. The
+ // major concern is that we do not want to convert `x[0][1](y)` in a
+ // discussion about C source code into a link. To this end, we:
+ //
+ // - Don't match at word boundaries;
+ // - require the URI to contain a "/" character or "@" character; and
+ // - reject URIs which being with a quote character.
+
+ if ($uri[0] == '"' || $uri[0] == "'" || $uri[0] == '`') {
+ return $matches[0];
+ }
+
+ if (!strlen($uri[0])) {
+ return $matches[0];
+ }
+
+ if (strpos($uri, '/') === false &&
+ strpos($uri, '@') === false &&
+ strncmp($uri, 'tel:', 4)) {
+ return $matches[0];
+ }
+
+ return $this->markupDocumentLink(
+ array(
+ $matches[0],
+ $matches[2],
+ $matches[1],
+ ));
+ }
+
+ public function markupDocumentLink(array $matches) {
+ $uri = trim($matches[1]);
+ $name = trim(idx($matches, 2));
+
+ // If whatever is being linked to begins with "/" or "#", or has "://",
+ // or is "mailto:" or "tel:", treat it as a URI instead of a wiki page.
+ $is_uri = preg_match('@(^/)|(://)|(^#)|(^(?:mailto|tel):)@', $uri);
+
+ if ($is_uri && strncmp('/', $uri, 1) && strncmp('#', $uri, 1)) {
+ $protocols = $this->getEngine()->getConfig(
+ 'uri.allowed-protocols',
+ array());
+
+ try {
+ $protocol = id(new PhutilURI($uri))->getProtocol();
+ if (!idx($protocols, $protocol)) {
+ // Don't treat this as a URI if it's not an allowed protocol.
+ $is_uri = false;
+ }
+ } catch (Exception $ex) {
+ // We can end up here if we try to parse an ambiguous URI, see
+ // T12796.
+ $is_uri = false;
+ }
+ }
+
+ // As a special case, skip "[[ / ]]" so that Phriction picks it up as a
+ // link to the Phriction root. It is more useful to be able to use this
+ // syntax to link to the root document than the home page of the install.
+ if ($uri == '/') {
+ $is_uri = false;
+ }
+
+ if (!$is_uri) {
+ return $matches[0];
+ }
+
+ return $this->getEngine()->storeText($this->renderHyperlink($uri, $name));
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupEscapeRemarkupRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupEscapeRemarkupRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupEscapeRemarkupRule.php
@@ -0,0 +1,19 @@
+getEngine()->storeText("\1");
+
+ return str_replace("\1", $replace, $text);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupHighlightRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHighlightRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHighlightRule.php
@@ -0,0 +1,37 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return $this->replaceHTML(
+ '@!!(.+?)(!{2,})@',
+ array($this, 'applyCallback'),
+ $text);
+ }
+
+ protected function applyCallback(array $matches) {
+ // Remove the two exclamation points that represent syntax.
+ $excitement = substr($matches[2], 2);
+
+ // If the internal content consists of ONLY exclamation points, leave it
+ // untouched so "!!!!!" is five exclamation points instead of one
+ // highlighted exclamation point.
+ if (preg_match('/^!+\z/', $matches[1])) {
+ return $matches[0];
+ }
+
+ // $excitement now has two fewer !'s than we started with.
+ return hsprintf('%s%s',
+ $matches[1], $excitement);
+
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php
@@ -0,0 +1,122 @@
+" around them get linked exactly, without
+ // the "<>". Angle brackets are basically special and mean "this is a URL
+ // with weird characters". This is assumed to be reasonable because they
+ // don't appear in normal text or normal URLs.
+ $text = preg_replace_callback(
+ '@<(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+?)>@',
+ array($this, 'markupHyperlink'),
+ $text);
+
+ // Anything else we match "ungreedily", which means we'll look for
+ // stuff that's probably puncutation or otherwise not part of the URL and
+ // not link it. This lets someone write "QuicK! Go to
+ // http://www.example.com/!". We also apply some paren balancing rules.
+
+ // NOTE: We're explicitly avoiding capturing stored blocks, so text like
+ // `http://www.example.com/[[x | y]]` doesn't get aggressively captured.
+ $text = preg_replace_callback(
+ '@(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+)@',
+ array($this, 'markupHyperlinkUngreedy'),
+ $text);
+
+ return $text;
+ }
+
+ protected function markupHyperlink(array $matches) {
+ try {
+ $uri = new PhutilURI($matches[1]);
+ } catch (Exception $ex) {
+ return $matches[0];
+ }
+
+ $protocol = $uri->getProtocol();
+
+ $protocols = $this->getEngine()->getConfig(
+ 'uri.allowed-protocols',
+ array());
+
+ if (!idx($protocols, $protocol)) {
+ // If this URI doesn't use a whitelisted protocol, don't link it. This
+ // is primarily intended to prevent javascript:// silliness.
+ return $this->getEngine()->storeText($matches[1]);
+ }
+
+ return $this->storeRenderedHyperlink($matches[1]);
+ }
+
+ protected function storeRenderedHyperlink($link) {
+ return $this->getEngine()->storeText($this->renderHyperlink($link));
+ }
+
+ protected function renderHyperlink($link) {
+ $engine = $this->getEngine();
+
+ if ($engine->isTextMode()) {
+ return $link;
+ }
+
+ if ($engine->getState('toc')) {
+ return $link;
+ }
+
+ $same_window = $engine->getConfig('uri.same-window', false);
+ if ($same_window) {
+ $target = null;
+ } else {
+ $target = '_blank';
+ }
+
+ return phutil_tag(
+ 'a',
+ array(
+ 'href' => $link,
+ 'class' => 'remarkup-link',
+ 'target' => $target,
+ 'rel' => 'noreferrer',
+ ),
+ $link);
+ }
+
+ protected function markupHyperlinkUngreedy($matches) {
+ $match = $matches[1];
+ $tail = null;
+ $trailing = null;
+ if (preg_match('/[;,.:!?]+$/', $match, $trailing)) {
+ $tail = $trailing[0];
+ $match = substr($match, 0, -strlen($tail));
+ }
+
+ // If there's a closing paren at the end but no balancing open paren in
+ // the URL, don't link the close paren. This is an attempt to gracefully
+ // handle the two common paren cases, Wikipedia links and English language
+ // parentheticals, e.g.:
+ //
+ // http://en.wikipedia.org/wiki/Noun_(disambiguation)
+ // (see also http://www.example.com)
+ //
+ // We could apply a craftier heuristic here which tries to actually balance
+ // the parens, but this is probably sufficient.
+ if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) {
+ $tail = ')'.$tail;
+ $match = substr($match, 0, -1);
+ }
+
+ try {
+ $uri = new PhutilURI($match);
+ } catch (Exception $ex) {
+ return $matches[0];
+ }
+
+ return hsprintf('%s%s', $this->markupHyperlink(array(null, $match)), $tail);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupItalicRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupItalicRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupItalicRule.php
@@ -0,0 +1,24 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return $this->replaceHTML(
+ '@(?%s', $matches[1]);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupLinebreaksRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupLinebreaksRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupLinebreaksRule.php
@@ -0,0 +1,13 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return phutil_escape_html_newlines($text);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupMonospaceRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupMonospaceRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupMonospaceRule.php
@@ -0,0 +1,49 @@
+getEngine()->isTextMode()) {
+ $result = $matches[0];
+
+ } else
+ if ($this->getEngine()->isHTMLMailMode()) {
+ $match = isset($matches[2]) ? $matches[2] : $matches[1];
+ $result = phutil_tag(
+ 'tt',
+ array(
+ 'style' => 'background: #ebebeb; font-size: 13px;',
+ ),
+ $match);
+
+ } else {
+ $match = isset($matches[2]) ? $matches[2] : $matches[1];
+ $result = phutil_tag(
+ 'tt',
+ array(
+ 'class' => 'remarkup-monospaced',
+ ),
+ $match);
+ }
+
+ return $this->getEngine()->storeText($result);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRule.php
@@ -0,0 +1,109 @@
+engine = $engine;
+ return $this;
+ }
+
+ public function getEngine() {
+ return $this->engine;
+ }
+
+ public function getPriority() {
+ return 500.0;
+ }
+
+ abstract public function apply($text);
+
+ public function getPostprocessKey() {
+ return spl_object_hash($this);
+ }
+
+ public function didMarkupText() {
+ return;
+ }
+
+ protected function replaceHTML($pattern, $callback, $text) {
+ $this->replaceCallback = $callback;
+ return phutil_safe_html(preg_replace_callback(
+ $pattern,
+ array($this, 'replaceHTMLCallback'),
+ phutil_escape_html($text)));
+ }
+
+ private function replaceHTMLCallback(array $match) {
+ return phutil_escape_html(call_user_func(
+ $this->replaceCallback,
+ array_map('phutil_safe_html', $match)));
+ }
+
+
+ /**
+ * Safely generate a tag.
+ *
+ * In Remarkup contexts, it's not safe to use arbitrary text in tag
+ * attributes: even though it will be escaped, it may contain replacement
+ * tokens which are then replaced with markup.
+ *
+ * This method acts as @{function:phutil_tag}, but checks attributes before
+ * using them.
+ *
+ * @param string Tag name.
+ * @param dict Tag attributes.
+ * @param wild Tag content.
+ * @return PhutilSafeHTML Tag object.
+ */
+ protected function newTag($name, array $attrs, $content = null) {
+ foreach ($attrs as $key => $attr) {
+ if ($attr !== null) {
+ $attrs[$key] = $this->assertFlatText($attr);
+ }
+ }
+
+ return phutil_tag($name, $attrs, $content);
+ }
+
+ /**
+ * Assert that a text token is flat (it contains no replacement tokens).
+ *
+ * Because tokens can be replaced with markup, it is dangerous to use
+ * arbitrary input text in tag attributes. Normally, rule precedence should
+ * prevent this. Asserting that text is flat before using it as an attribute
+ * provides an extra layer of security.
+ *
+ * Normally, you can call @{method:newTag} rather than calling this method
+ * directly. @{method:newTag} will check attributes for you.
+ *
+ * @param wild Ostensibly flat text.
+ * @return string Flat text.
+ */
+ protected function assertFlatText($text) {
+ $text = (string)hsprintf('%s', phutil_safe_html($text));
+ $rich = (strpos($text, PhutilRemarkupBlockStorage::MAGIC_BYTE) !== false);
+ if ($rich) {
+ throw new Exception(
+ pht(
+ 'Remarkup rule precedence is dangerous: rendering text with tokens '.
+ 'as flat text!'));
+ }
+
+ return $text;
+ }
+
+ /**
+ * Check whether text is flat (contains no replacement tokens) or not.
+ *
+ * @param wild Ostensibly flat text.
+ * @return bool True if the text is flat.
+ */
+ protected function isFlatText($text) {
+ $text = (string)hsprintf('%s', phutil_safe_html($text));
+ return (strpos($text, PhutilRemarkupBlockStorage::MAGIC_BYTE) === false);
+ }
+
+}
diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupUnderlineRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupUnderlineRule.php
new file mode 100644
--- /dev/null
+++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupUnderlineRule.php
@@ -0,0 +1,24 @@
+getEngine()->isTextMode()) {
+ return $text;
+ }
+
+ return $this->replaceHTML(
+ '@(?%s', $matches[1]);
+ }
+
+}
diff --git a/src/markup/render.php b/src/markup/render.php
new file mode 100644
--- /dev/null
+++ b/src/markup/render.php
@@ -0,0 +1,251 @@
+` tags, if the `rel` attribute is not specified, it
+ * is interpreted as `rel="noreferrer"`.
+ * - When rendering `` tags, the `href` attribute may not begin with
+ * `javascript:`.
+ *
+ * These special cases can not be disabled.
+ *
+ * IMPORTANT: The `$tag` attribute and the keys of the `$attributes` array are
+ * trusted blindly, and not escaped. You should not pass user data in these
+ * parameters.
+ *
+ * @param string The name of the tag, like `a` or `div`.
+ * @param map A map of tag attributes.
+ * @param wild Content to put in the tag.
+ * @return PhutilSafeHTML Tag object.
+ */
+function phutil_tag($tag, array $attributes = array(), $content = null) {
+ // If the `href` attribute is present, make sure it is not a "javascript:"
+ // URI. We never permit these.
+ if (!empty($attributes['href'])) {
+ // This might be a URI object, so cast it to a string.
+ $href = (string)$attributes['href'];
+
+ if (isset($href[0])) {
+ // Block 'javascript:' hrefs at the tag level: no well-designed
+ // application should ever use them, and they are a potent attack vector.
+
+ // This function is deep in the core and performance sensitive, so we're
+ // doing a cheap version of this test first to avoid calling preg_match()
+ // on URIs which begin with '/' or `#`. These cover essentially all URIs
+ // in Phabricator.
+ if (($href[0] !== '/') && ($href[0] !== '#')) {
+ // Chrome 33 and IE 11 both interpret "javascript\n:" as a Javascript
+ // URI, and all browsers interpret " javascript:" as a Javascript URI,
+ // so be aggressive about looking for "javascript:" in the initial
+ // section of the string.
+
+ $normalized_href = preg_replace('([^a-z0-9/:]+)i', '', $href);
+ if (preg_match('/^javascript:/i', $normalized_href)) {
+ throw new Exception(
+ pht(
+ "Attempting to render a tag with an '%s' attribute that begins ".
+ "with '%s'. This is either a serious security concern or a ".
+ "serious architecture concern. Seek urgent remedy.",
+ 'href',
+ 'javascript:'));
+ }
+ }
+ }
+ }
+
+ // For tags which can't self-close, treat null as the empty string -- for
+ // example, always render ``, never ``.
+ static $self_closing_tags = array(
+ 'area' => true,
+ 'base' => true,
+ 'br' => true,
+ 'col' => true,
+ 'command' => true,
+ 'embed' => true,
+ 'frame' => true,
+ 'hr' => true,
+ 'img' => true,
+ 'input' => true,
+ 'keygen' => true,
+ 'link' => true,
+ 'meta' => true,
+ 'param' => true,
+ 'source' => true,
+ 'track' => true,
+ 'wbr' => true,
+ );
+
+ $attr_string = '';
+ foreach ($attributes as $k => $v) {
+ if ($v === null) {
+ continue;
+ }
+ $v = phutil_escape_html($v);
+ $attr_string .= ' '.$k.'="'.$v.'"';
+ }
+
+ if ($content === null) {
+ if (isset($self_closing_tags[$tag])) {
+ return new PhutilSafeHTML('<'.$tag.$attr_string.' />');
+ } else {
+ $content = '';
+ }
+ } else {
+ $content = phutil_escape_html($content);
+ }
+
+ return new PhutilSafeHTML('<'.$tag.$attr_string.'>'.$content.''.$tag.'>');
+}
+
+function phutil_tag_div($class, $content = null) {
+ return phutil_tag('div', array('class' => $class), $content);
+}
+
+function phutil_escape_html($string) {
+ if ($string instanceof PhutilSafeHTML) {
+ return $string;
+ } else if ($string instanceof PhutilSafeHTMLProducerInterface) {
+ $result = $string->producePhutilSafeHTML();
+ if ($result instanceof PhutilSafeHTML) {
+ return phutil_escape_html($result);
+ } else if (is_array($result)) {
+ return phutil_escape_html($result);
+ } else if ($result instanceof PhutilSafeHTMLProducerInterface) {
+ return phutil_escape_html($result);
+ } else {
+ try {
+ assert_stringlike($result);
+ return phutil_escape_html((string)$result);
+ } catch (Exception $ex) {
+ throw new Exception(
+ pht(
+ "Object (of class '%s') implements %s but did not return anything ".
+ "renderable from %s.",
+ get_class($string),
+ 'PhutilSafeHTMLProducerInterface',
+ 'producePhutilSafeHTML()'));
+ }
+ }
+ } else if (is_array($string)) {
+ $result = '';
+ foreach ($string as $item) {
+ $result .= phutil_escape_html($item);
+ }
+ return $result;
+ }
+
+ return htmlspecialchars($string, ENT_QUOTES, 'UTF-8');
+}
+
+function phutil_escape_html_newlines($string) {
+ return PhutilSafeHTML::applyFunction('nl2br', $string);
+}
+
+/**
+ * Mark string as safe for use in HTML.
+ */
+function phutil_safe_html($string) {
+ if ($string == '') {
+ return $string;
+ } else if ($string instanceof PhutilSafeHTML) {
+ return $string;
+ } else {
+ return new PhutilSafeHTML($string);
+ }
+}
+
+/**
+ * HTML safe version of `implode()`.
+ */
+function phutil_implode_html($glue, array $pieces) {
+ $glue = phutil_escape_html($glue);
+
+ foreach ($pieces as $k => $piece) {
+ $pieces[$k] = phutil_escape_html($piece);
+ }
+
+ return phutil_safe_html(implode($glue, $pieces));
+}
+
+/**
+ * Format a HTML code. This function behaves like `sprintf()`, except that all
+ * the normal conversions (like %s) will be properly escaped.
+ */
+function hsprintf($html /* , ... */) {
+ $args = func_get_args();
+ array_shift($args);
+ return new PhutilSafeHTML(
+ vsprintf($html, array_map('phutil_escape_html', $args)));
+}
+
+
+/**
+ * Escape text for inclusion in a URI or a query parameter. Note that this
+ * method does NOT escape '/', because "%2F" is invalid in paths and Apache
+ * will automatically 404 the page if it's present. This will produce correct
+ * (the URIs will work) and desirable (the URIs will be readable) behavior in
+ * these cases:
+ *
+ * '/path/?param='.phutil_escape_uri($string); # OK: Query Parameter
+ * '/path/to/'.phutil_escape_uri($string); # OK: URI Suffix
+ *
+ * It will potentially produce the WRONG behavior in this special case:
+ *
+ * COUNTEREXAMPLE
+ * '/path/to/'.phutil_escape_uri($string).'/thing/'; # BAD: URI Infix
+ *
+ * In this case, any '/' characters in the string will not be escaped, so you
+ * will not be able to distinguish between the string and the suffix (unless
+ * you have more information, like you know the format of the suffix). For infix
+ * URI components, use @{function:phutil_escape_uri_path_component} instead.
+ *
+ * @param string Some string.
+ * @return string URI encoded string, except for '/'.
+ */
+function phutil_escape_uri($string) {
+ return str_replace('%2F', '/', rawurlencode($string));
+}
+
+
+/**
+ * Escape text for inclusion as an infix URI substring. See discussion at
+ * @{function:phutil_escape_uri}. This function covers an unusual special case;
+ * @{function:phutil_escape_uri} is usually the correct function to use.
+ *
+ * This function will escape a string into a format which is safe to put into
+ * a URI path and which does not contain '/' so it can be correctly parsed when
+ * embedded as a URI infix component.
+ *
+ * However, you MUST decode the string with
+ * @{function:phutil_unescape_uri_path_component} before it can be used in the
+ * application.
+ *
+ * @param string Some string.
+ * @return string URI encoded string that is safe for infix composition.
+ */
+function phutil_escape_uri_path_component($string) {
+ return rawurlencode(rawurlencode($string));
+}
+
+
+/**
+ * Unescape text that was escaped by
+ * @{function:phutil_escape_uri_path_component}. See
+ * @{function:phutil_escape_uri} for discussion.
+ *
+ * Note that this function is NOT the inverse of
+ * @{function:phutil_escape_uri_path_component}! It undoes additional escaping
+ * which is added to survive the implied unescaping performed by the webserver
+ * when interpreting the request.
+ *
+ * @param string Some string emitted
+ * from @{function:phutil_escape_uri_path_component} and
+ * then accessed via a web server.
+ * @return string Original string.
+ */
+function phutil_unescape_uri_path_component($string) {
+ return rawurldecode($string);
+}
diff --git a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php
@@ -0,0 +1,115 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getLanguageFromFilename($filename) {
+ static $default_map = array(
+ // All files which have file extensions that we haven't already matched
+ // map to their extensions.
+ '@\\.([^./]+)$@' => 1,
+ );
+
+ $maps = array();
+ if (!empty($this->config['filename.map'])) {
+ $maps[] = $this->config['filename.map'];
+ }
+ $maps[] = $default_map;
+
+ foreach ($maps as $map) {
+ foreach ($map as $regexp => $lang) {
+ $matches = null;
+ if (preg_match($regexp, $filename, $matches)) {
+ if (is_numeric($lang)) {
+ return idx($matches, $lang);
+ } else {
+ return $lang;
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ public function getHighlightFuture($language, $source) {
+ if ($language === null) {
+ $language = PhutilLanguageGuesser::guessLanguage($source);
+ }
+
+ $have_pygments = !empty($this->config['pygments.enabled']);
+
+ if ($language == 'php' && PhutilXHPASTBinary::isAvailable()) {
+ return id(new PhutilXHPASTSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'console') {
+ return id(new PhutilConsoleSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'diviner' || $language == 'remarkup') {
+ return id(new PhutilDivinerSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'rainbow') {
+ return id(new PhutilRainbowSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'php') {
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilPHPFragmentLexer())
+ ->setConfig('language', 'php')
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'py') {
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilPythonFragmentLexer())
+ ->setConfig('language', 'py')
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'java') {
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilJavaFragmentLexer())
+ ->setConfig('language', 'java')
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'json') {
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilJSONFragmentLexer())
+ ->getHighlightFuture($source);
+ }
+
+ if ($language == 'invisible') {
+ return id(new PhutilInvisibleSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ // Don't invoke Pygments for plain text, since it's expensive and has
+ // no effect.
+ if ($language !== 'text' && $language !== 'txt') {
+ if ($have_pygments) {
+ return id(new PhutilPygmentsSyntaxHighlighter())
+ ->setConfig('language', $language)
+ ->getHighlightFuture($source);
+ }
+ }
+
+ return id(new PhutilDefaultSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+}
diff --git a/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php
@@ -0,0 +1,19 @@
+getHighlightFuture($language, $source)->resolve();
+ } catch (PhutilSyntaxHighlighterException $ex) {
+ return id(new PhutilDefaultSyntaxHighlighter())
+ ->getHighlightFuture($source)
+ ->resolve();
+ }
+ }
+
+}
diff --git a/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php b/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php
@@ -0,0 +1,28 @@
+ 'php',
+ '/x.php' => 'php',
+ 'x.y.php' => 'php',
+ '/x.y/z.php' => 'php',
+ '/x.php/' => null,
+ );
+
+ $engine = new PhutilDefaultSyntaxHighlighterEngine();
+ foreach ($names as $path => $language) {
+ $detect = $engine->getLanguageFromFilename($path);
+ $this->assertEqual(
+ $language,
+ $detect,
+ pht('Language detect for %s', $path));
+ }
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php
@@ -0,0 +1,51 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+ $in_command = false;
+ $lines = explode("\n", $source);
+ foreach ($lines as $key => $line) {
+ $matches = null;
+
+ // Parse commands like this:
+ //
+ // some/path/ $ ./bin/example # Do things
+ //
+ // ...into path, command, and comment components.
+
+ $pattern =
+ '@'.
+ ($in_command ? '()(.*?)' : '^(\S+[\\\\/] )?([$] .*?)').
+ '(#.*|\\\\)?$@';
+
+ if (preg_match($pattern, $line, $matches)) {
+ $lines[$key] = hsprintf(
+ '%s%s%s',
+ $matches[1],
+ $matches[2],
+ (!empty($matches[3])
+ ? hsprintf('%s', $matches[3])
+ : ''));
+ $in_command = (idx($matches, 3) == '\\');
+ } else {
+ $lines[$key] = hsprintf('%s', $line);
+ }
+ }
+ $lines = phutil_implode_html("\n", $lines);
+
+ return new ImmediateFuture($lines);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php
@@ -0,0 +1,14 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+ $source = phutil_escape_html($source);
+
+ // This highlighter isn't perfect but tries to do an okay job at getting
+ // some of the basics at least. There's lots of room for improvement.
+
+ $blocks = explode("\n\n", $source);
+ foreach ($blocks as $key => $block) {
+ if (preg_match('/^[^ ](?! )/m', $block)) {
+ $blocks[$key] = $this->highlightBlock($block);
+ }
+ }
+ $source = implode("\n\n", $blocks);
+
+ $source = phutil_safe_html($source);
+ return new ImmediateFuture($source);
+ }
+
+ private function highlightBlock($source) {
+ // Highlight "@{class:...}" links to other documentation pages.
+ $source = $this->highlightPattern('/@{([\w@]+?):([^}]+?)}/', $source, 'nc');
+
+ // Highlight "@title", "@group", etc.
+ $source = $this->highlightPattern('/^@(\w+)/m', $source, 'k');
+
+ // Highlight bold, italic and monospace.
+ $source = $this->highlightPattern('@\\*\\*(.+?)\\*\\*@s', $source, 's');
+ $source = $this->highlightPattern('@(?highlightPattern(
+ '@##([\s\S]+?)##|\B`(.+?)`\B@',
+ $source,
+ 's');
+
+ // Highlight stuff that looks like headers.
+ $source = $this->highlightPattern('/^=(.*)$/m', $source, 'nv');
+
+ return $source;
+ }
+
+ private function highlightPattern($regexp, $source, $class) {
+ $this->replaceClass = $class;
+ $source = preg_replace_callback(
+ $regexp,
+ array($this, 'replacePattern'),
+ $source);
+
+ return $source;
+ }
+
+ public function replacePattern($matches) {
+
+ // NOTE: The goal here is to make sure a never crosses a newline.
+
+ $content = $matches[0];
+ $content = explode("\n", $content);
+ foreach ($content as $key => $line) {
+ $content[$key] =
+ ''.
+ $line.
+ '';
+ }
+ return implode("\n", $content);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php
@@ -0,0 +1,43 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+ $keys = array_map('chr', range(0x0, 0x1F));
+ $vals = array_map(
+ array($this, 'decimalToHtmlEntityDecoded'), range(0x2400, 0x241F));
+
+ $invisible = array_combine($keys, $vals);
+
+ $result = array();
+ foreach (str_split($source) as $character) {
+ if (isset($invisible[$character])) {
+ $result[] = phutil_tag(
+ 'span',
+ array('class' => 'invisible'),
+ $invisible[$character]);
+
+ if ($character === "\n") {
+ $result[] = $character;
+ }
+ } else {
+ $result[] = $character;
+ }
+ }
+
+ $result = phutil_implode_html('', $result);
+ return new ImmediateFuture($result);
+ }
+
+ private function decimalToHtmlEntityDecoded($dec) {
+ return html_entity_decode("{$dec};");
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php
@@ -0,0 +1,72 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+ $strip = false;
+ $state = 'start';
+ $lang = idx($this->config, 'language');
+
+ if ($lang == 'php') {
+ if (strpos($source, '') === false) {
+ $state = 'php';
+ }
+ }
+
+ $lexer = idx($this->config, 'lexer');
+ $tokens = $lexer->getTokens($source, $state);
+ $tokens = $lexer->mergeTokens($tokens);
+
+ $result = array();
+ foreach ($tokens as $token) {
+ list($type, $value, $context) = $token;
+
+ $data_name = null;
+ switch ($type) {
+ case 'nc':
+ case 'nf':
+ case 'na':
+ $data_name = $value;
+ break;
+ }
+
+ if (strpos($value, "\n") !== false) {
+ $value = explode("\n", $value);
+ } else {
+ $value = array($value);
+ }
+ foreach ($value as $part) {
+ if (strlen($part)) {
+ if ($type) {
+ $result[] = phutil_tag(
+ 'span',
+ array(
+ 'class' => $type,
+ 'data-symbol-context' => $context,
+ 'data-symbol-name' => $data_name,
+ ),
+ $part);
+ } else {
+ $result[] = $part;
+ }
+ }
+ $result[] = "\n";
+ }
+
+ // Throw away the last "\n".
+ array_pop($result);
+ }
+
+ $result = phutil_implode_html('', $result);
+
+ return new ImmediateFuture($result);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php
@@ -0,0 +1,216 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+ $language = idx($this->config, 'language');
+
+ if (preg_match('/\r(?!\n)/', $source)) {
+ // TODO: Pygments converts "\r" newlines into "\n" newlines, so we can't
+ // use it on files with "\r" newlines. If we have "\r" not followed by
+ // "\n" in the file, skip highlighting.
+ $language = null;
+ }
+
+ if ($language) {
+ $language = $this->getPygmentsLexerNameFromLanguageName($language);
+ $future = new ExecFuture(
+ 'pygmentize -O encoding=utf-8 -O stripnl=False -f html -l %s',
+ $language);
+ $scrub = false;
+ if ($language == 'php' && strpos($source, '') === false) {
+ $source = "write($source);
+ return new PhutilDefaultSyntaxHighlighterEnginePygmentsFuture(
+ $future,
+ $source,
+ $scrub);
+ }
+
+ return id(new PhutilDefaultSyntaxHighlighter())
+ ->getHighlightFuture($source);
+ }
+
+ private function getPygmentsLexerNameFromLanguageName($language) {
+ static $map = array(
+ 'adb' => 'ada',
+ 'ads' => 'ada',
+ 'ahkl' => 'ahk',
+ 'as' => 'as3',
+ 'asax' => 'aspx-vb',
+ 'ascx' => 'aspx-vb',
+ 'ashx' => 'aspx-vb',
+ 'ASM' => 'nasm',
+ 'asm' => 'nasm',
+ 'asmx' => 'aspx-vb',
+ 'aspx' => 'aspx-vb',
+ 'autodelegate' => 'myghty',
+ 'autohandler' => 'mason',
+ 'aux' => 'tex',
+ 'axd' => 'aspx-vb',
+ 'b' => 'brainfuck',
+ 'bas' => 'vb.net',
+ 'bf' => 'brainfuck',
+ 'bmx' => 'blitzmax',
+ 'c++' => 'cpp',
+ 'c++-objdump' => 'cpp-objdump',
+ 'cc' => 'cpp',
+ 'cfc' => 'cfm',
+ 'cfg' => 'ini',
+ 'cfml' => 'cfm',
+ 'cl' => 'common-lisp',
+ 'clj' => 'clojure',
+ 'cmd' => 'bat',
+ 'coffee' => 'coffee-script',
+ 'cs' => 'csharp',
+ 'csh' => 'tcsh',
+ 'cw' => 'redcode',
+ 'cxx' => 'cpp',
+ 'cxx-objdump' => 'cpp-objdump',
+ 'darcspatch' => 'dpatch',
+ 'def' => 'modula2',
+ 'dhandler' => 'mason',
+ 'di' => 'd',
+ 'duby' => 'rb',
+ 'dyl' => 'dylan',
+ 'ebuild' => 'bash',
+ 'eclass' => 'bash',
+ 'el' => 'common-lisp',
+ 'eps' => 'postscript',
+ 'erl' => 'erlang',
+ 'erl-sh' => 'erl',
+ 'f' => 'fortran',
+ 'f90' => 'fortran',
+ 'feature' => 'Cucumber',
+ 'fhtml' => 'velocity',
+ 'flx' => 'felix',
+ 'flxh' => 'felix',
+ 'frag' => 'glsl',
+ 'g' => 'antlr-ruby',
+ 'G' => 'antlr-ruby',
+ 'gdc' => 'gooddata-cl',
+ 'gemspec' => 'rb',
+ 'geo' => 'glsl',
+ 'GNUmakefile' => 'make',
+ 'h' => 'c',
+ 'h++' => 'cpp',
+ 'hh' => 'cpp',
+ 'hpp' => 'cpp',
+ 'hql' => 'sql',
+ 'hrl' => 'erlang',
+ 'hs' => 'haskell',
+ 'htaccess' => 'apacheconf',
+ 'htm' => 'html',
+ 'html' => 'html+evoque',
+ 'hxx' => 'cpp',
+ 'hy' => 'hybris',
+ 'hyb' => 'hybris',
+ 'ik' => 'ioke',
+ 'inc' => 'pov',
+ 'j' => 'objective-j',
+ 'jbst' => 'duel',
+ 'kid' => 'genshi',
+ 'ksh' => 'bash',
+ 'less' => 'css',
+ 'lgt' => 'logtalk',
+ 'lisp' => 'common-lisp',
+ 'll' => 'llvm',
+ 'm' => 'objective-c',
+ 'mak' => 'make',
+ 'Makefile' => 'make',
+ 'makefile' => 'make',
+ 'man' => 'groff',
+ 'mao' => 'mako',
+ 'mc' => 'mason',
+ 'md' => 'minid',
+ 'mhtml' => 'mason',
+ 'mi' => 'mason',
+ 'ml' => 'ocaml',
+ 'mli' => 'ocaml',
+ 'mll' => 'ocaml',
+ 'mly' => 'ocaml',
+ 'mm' => 'objective-c',
+ 'mo' => 'modelica',
+ 'mod' => 'modula2',
+ 'moo' => 'moocode',
+ 'mu' => 'mupad',
+ 'myt' => 'myghty',
+ 'ns2' => 'newspeak',
+ 'pas' => 'delphi',
+ 'patch' => 'diff',
+ 'phtml' => 'html+php',
+ 'pl' => 'prolog',
+ 'plot' => 'gnuplot',
+ 'plt' => 'gnuplot',
+ 'pm' => 'perl',
+ 'po' => 'pot',
+ 'pp' => 'puppet',
+ 'pro' => 'prolog',
+ 'proto' => 'protobuf',
+ 'ps' => 'postscript',
+ 'pxd' => 'cython',
+ 'pxi' => 'cython',
+ 'py' => 'python',
+ 'pyw' => 'python',
+ 'pyx' => 'cython',
+ 'R' => 'splus',
+ 'r' => 'rebol',
+ 'r3' => 'rebol',
+ 'rake' => 'rb',
+ 'Rakefile' => 'rb',
+ 'rbw' => 'rb',
+ 'rbx' => 'rb',
+ 'rest' => 'rst',
+ 'rl' => 'ragel-em',
+ 'robot' => 'robotframework',
+ 'Rout' => 'rconsole',
+ 'rss' => 'xml',
+ 's' => 'gas',
+ 'S' => 'splus',
+ 'sc' => 'python',
+ 'scm' => 'scheme',
+ 'SConscript' => 'python',
+ 'SConstruct' => 'python',
+ 'scss' => 'css',
+ 'sh' => 'bash',
+ 'sh-session' => 'console',
+ 'spt' => 'cheetah',
+ 'sqlite3-console' => 'sqlite3',
+ 'st' => 'smalltalk',
+ 'sv' => 'v',
+ 'tac' => 'python',
+ 'tmpl' => 'cheetah',
+ 'toc' => 'tex',
+ 'tpl' => 'smarty',
+ 'txt' => 'text',
+ 'vapi' => 'vala',
+ 'vb' => 'vb.net',
+ 'vert' => 'glsl',
+ 'vhd' => 'vhdl',
+ 'vimrc' => 'vim',
+ 'vm' => 'velocity',
+ 'weechatlog' => 'irc',
+ 'wlua' => 'lua',
+ 'wsdl' => 'xml',
+ 'xhtml' => 'html',
+ 'xml' => 'xml+evoque',
+ 'xqy' => 'xquery',
+ 'xsd' => 'xml',
+ 'xsl' => 'xslt',
+ 'xslt' => 'xml',
+ 'yml' => 'yaml',
+ );
+
+ return idx($map, $language, $language);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php
@@ -0,0 +1,46 @@
+config[$key] = $value;
+ return $this;
+ }
+
+ public function getHighlightFuture($source) {
+
+ $color = 0;
+ $colors = array(
+ 'rbw_r',
+ 'rbw_o',
+ 'rbw_y',
+ 'rbw_g',
+ 'rbw_b',
+ 'rbw_i',
+ 'rbw_v',
+ );
+
+ $result = array();
+ foreach (phutil_utf8v($source) as $character) {
+ if ($character == ' ' || $character == "\n") {
+ $result[] = $character;
+ continue;
+ }
+ $result[] = phutil_tag(
+ 'span',
+ array('class' => $colors[$color]),
+ $character);
+ $color = ($color + 1) % count($colors);
+ }
+
+ $result = phutil_implode_html('', $result);
+ return new ImmediateFuture($result);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php
@@ -0,0 +1,6 @@
+setConfig('language', 'json')
+ ->setConfig('lexer', new PhutilJSONFragmentLexer());
+
+ $path = dirname(__FILE__).'/data/jsonfragment/';
+ foreach (Filesystem::listDirectory($path, $include_hidden = false) as $f) {
+ if (preg_match('/.test$/', $f)) {
+ $expect = preg_replace('/.test$/', '.expect', $f);
+ $source = Filesystem::readFile($path.'/'.$f);
+
+ $this->assertEqual(
+ Filesystem::readFile($path.'/'.$expect),
+ (string)$highlighter->getHighlightFuture($source)->resolve(),
+ $f);
+ }
+ }
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/__tests__/PhutilPHPFragmentLexerHighlighterTestCase.php b/src/markup/syntax/highlighter/__tests__/PhutilPHPFragmentLexerHighlighterTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/PhutilPHPFragmentLexerHighlighterTestCase.php
@@ -0,0 +1,25 @@
+setConfig('language', 'php');
+ $highlighter->setConfig('lexer', new PhutilPHPFragmentLexer());
+
+
+ $path = dirname(__FILE__).'/phpfragment/';
+ foreach (Filesystem::listDirectory($path, $include_hidden = false) as $f) {
+ if (preg_match('/.test$/', $f)) {
+ $expect = preg_replace('/.test$/', '.expect', $f);
+ $source = Filesystem::readFile($path.'/'.$f);
+
+ $this->assertEqual(
+ Filesystem::readFile($path.'/'.$expect),
+ (string)$highlighter->getHighlightFuture($source)->resolve(),
+ $f);
+ }
+ }
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php b/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php
@@ -0,0 +1,39 @@
+getHighlightFuture($source);
+ return $future->resolve();
+ }
+
+ private function read($file) {
+ $path = dirname(__FILE__).'/xhpast/'.$file;
+ return Filesystem::readFile($path);
+ }
+
+ public function testBuiltinClassnames() {
+ $this->assertEqual(
+ $this->read('builtin-classname.expect'),
+ (string)$this->highlight($this->read('builtin-classname.source')),
+ pht('Builtin classnames should not be marked as linkable symbols.'));
+ $this->assertEqual(
+ rtrim($this->read('trailing-comment.expect')),
+ (string)$this->highlight($this->read('trailing-comment.source')),
+ pht('Trailing comments should not be dropped.'));
+ $this->assertEqual(
+ $this->read('multiline-token.expect'),
+ (string)$this->highlight($this->read('multiline-token.source')),
+ pht('Multi-line tokens should be split across lines.'));
+ $this->assertEqual(
+ $this->read('leading-whitespace.expect'),
+ (string)$this->highlight($this->read('leading-whitespace.source')),
+ pht('Snippets with leading whitespace should be preserved.'));
+ $this->assertEqual(
+ $this->read('no-leading-whitespace.expect'),
+ (string)$this->highlight($this->read('no-leading-whitespace.source')),
+ pht('Snippets with no leading whitespace should be preserved.'));
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.expect b/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.expect
@@ -0,0 +1,12 @@
+{
+ "key": 3.5,
+ "true": true,
+ "false": false,
+ "null": null,
+ "list": [1, 2, 3],
+ "object": {
+ "k1": "v1"
+ },
+ "numbers": [0e1, 1e-1, -1e-1, -1e+1],
+ "\"\u1234'abc[]{}..."
+}
diff --git a/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.test b/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.test
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/data/jsonfragment/basics.test
@@ -0,0 +1,12 @@
+{
+ "key": 3.5,
+ "true": true,
+ "false": false,
+ "null": null,
+ "list": [1, 2, 3],
+ "object": {
+ "k1": "v1"
+ },
+ "numbers": [0e1, 1e-1, -1e-1, -1e+1],
+ "\"\u1234'abc[]{}..."
+}
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.expect b/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.expect
@@ -0,0 +1,16 @@
+<?
+
+// comment? comment! ?>
+
+data
+
+<?php
+
+__halt_compiler /* ! */ ( // )
+) /* ;;;; */
+
+;
+
+data data
+<?php
+data
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.test b/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.test
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/abuse.test
@@ -0,0 +1,16 @@
+
+
+// comment? comment! ?>
+
+data
+
+public function f() {
+ ExampleClass::EXAMPLE_CONSTANT;
+ ExampleClass::exampleMethod();
+ example_function();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/basics.test b/src/markup/syntax/highlighter/__tests__/phpfragment/basics.test
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/basics.test
@@ -0,0 +1,5 @@
+public function f() {
+ ExampleClass::EXAMPLE_CONSTANT;
+ ExampleClass::exampleMethod();
+ example_function();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.expect b/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.expect
@@ -0,0 +1,3 @@
+ foreach ($x as $y) {
+ z();
+ }
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.test b/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.test
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/leading-whitespace.test
@@ -0,0 +1,3 @@
+ foreach ($x as $y) {
+ z();
+ }
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.expect b/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.expect
@@ -0,0 +1,3 @@
+foreach ($x as $y) {
+ z();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.test b/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.test
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/phpfragment/no-leading-whitespace.test
@@ -0,0 +1,3 @@
+foreach ($x as $y) {
+ z();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.expect b/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.expect
@@ -0,0 +1,10 @@
+<?php
+
+class C {
+ public function f() {
+ D::X;
+ self::X;
+ parent::X;
+ static::X;
+ }
+}
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.source b/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.source
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/builtin-classname.source
@@ -0,0 +1,10 @@
+foreach ($x as $y) {
+ z();
+ }
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/leading-whitespace.source b/src/markup/syntax/highlighter/__tests__/xhpast/leading-whitespace.source
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/leading-whitespace.source
@@ -0,0 +1,3 @@
+ foreach ($x as $y) {
+ z();
+ }
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.expect b/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.expect
@@ -0,0 +1,5 @@
+<?php
+
+/* this comment
+extends across
+multiple lines */
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.source b/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.source
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/multiline-token.source
@@ -0,0 +1,5 @@
+foreach ($x as $y) {
+ z();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/no-leading-whitespace.source b/src/markup/syntax/highlighter/__tests__/xhpast/no-leading-whitespace.source
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/no-leading-whitespace.source
@@ -0,0 +1,3 @@
+foreach ($x as $y) {
+ z();
+}
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.expect b/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.expect
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.expect
@@ -0,0 +1,3 @@
+<?php
+// xyz
+
diff --git a/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.source b/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.source
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/__tests__/xhpast/trailing-comment.source
@@ -0,0 +1,2 @@
+source = $source;
+ $this->scrub = $scrub;
+ }
+
+ protected function didReceiveResult($result) {
+ list($err, $stdout, $stderr) = $result;
+
+ if (!$err && strlen($stdout)) {
+ // Strip off fluff Pygments adds.
+ $stdout = preg_replace(
+ '@^(.*)
\s*$@s',
+ '\1',
+ $stdout);
+ if ($this->scrub) {
+ $stdout = preg_replace('/^.*\n/', '', $stdout);
+ }
+ return phutil_safe_html($stdout);
+ }
+
+ throw new PhutilSyntaxHighlighterException($stderr, $err);
+ }
+
+}
diff --git a/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php b/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php
new file mode 100644
--- /dev/null
+++ b/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php
@@ -0,0 +1,262 @@
+source = $source;
+ $this->scrub = $scrub;
+ }
+
+ protected function didReceiveResult($result) {
+ try {
+ return $this->applyXHPHighlight($result);
+ } catch (Exception $ex) {
+ // XHP can't highlight source that isn't syntactically valid. Fall back
+ // to the fragment lexer.
+ $source = ($this->scrub
+ ? preg_replace('/^.*\n/', '', $this->source)
+ : $this->source);
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilPHPFragmentLexer())
+ ->setConfig('language', 'php')
+ ->getHighlightFuture($source)
+ ->resolve();
+ }
+ }
+
+ private function applyXHPHighlight($result) {
+
+ // We perform two passes here: one using the AST to find symbols we care
+ // about -- particularly, class names and function names. These are used
+ // in the crossreference stuff to link into Diffusion. After we've done our
+ // AST pass, we do a followup pass on the token stream to catch all the
+ // simple stuff like strings and comments.
+
+ $tree = XHPASTTree::newFromDataAndResolvedExecFuture(
+ $this->source,
+ $result);
+
+ $root = $tree->getRootNode();
+
+ $tokens = $root->getTokens();
+ $interesting_symbols = $this->findInterestingSymbols($root);
+
+
+ if ($this->scrub) {
+ // If we're scrubbing, we prepended "= 2) {
+ if ($tokens[0]->getTypeName() === 'T_OPEN_TAG') {
+ if ($tokens[1]->getTypeName() === 'T_WHITESPACE') {
+ $ok = true;
+ }
+ }
+ }
+
+ if (!$ok) {
+ throw new Exception(
+ pht(
+ 'Expected T_OPEN_TAG, T_WHITESPACE tokens at head of results '.
+ 'for highlighting parse of PHP snippet.'));
+ }
+
+ // Remove the "getValue();
+ if ((strlen($value) < 1) || ($value[0] != "\n")) {
+ throw new Exception(
+ pht(
+ 'Expected "\\n" at beginning of T_WHITESPACE token at head of '.
+ 'tokens for highlighting parse of PHP snippet.'));
+ }
+
+ $value = substr($value, 1);
+ $tokens[1]->overwriteValue($value);
+ }
+
+ $out = array();
+ foreach ($tokens as $key => $token) {
+ $value = $token->getValue();
+ $class = null;
+ $multi = false;
+ $attrs = array();
+ if (isset($interesting_symbols[$key])) {
+ $sym = $interesting_symbols[$key];
+ $class = $sym[0];
+ $attrs['data-symbol-context'] = idx($sym, 'context');
+ $attrs['data-symbol-name'] = idx($sym, 'symbol');
+ } else {
+ switch ($token->getTypeName()) {
+ case 'T_WHITESPACE':
+ break;
+ case 'T_DOC_COMMENT':
+ $class = 'dc';
+ $multi = true;
+ break;
+ case 'T_COMMENT':
+ $class = 'c';
+ $multi = true;
+ break;
+ case 'T_CONSTANT_ENCAPSED_STRING':
+ case 'T_ENCAPSED_AND_WHITESPACE':
+ case 'T_INLINE_HTML':
+ $class = 's';
+ $multi = true;
+ break;
+ case 'T_VARIABLE':
+ $class = 'nv';
+ break;
+ case 'T_OPEN_TAG':
+ case 'T_OPEN_TAG_WITH_ECHO':
+ case 'T_CLOSE_TAG':
+ $class = 'o';
+ break;
+ case 'T_LNUMBER':
+ case 'T_DNUMBER':
+ $class = 'm';
+ break;
+ case 'T_STRING':
+ static $magic = array(
+ 'true' => true,
+ 'false' => true,
+ 'null' => true,
+ );
+ if (isset($magic[strtolower($value)])) {
+ $class = 'k';
+ break;
+ }
+ $class = 'nx';
+ break;
+ default:
+ $class = 'k';
+ break;
+ }
+ }
+
+ if ($class) {
+ $attrs['class'] = $class;
+ if ($multi) {
+ // If the token may have multiple lines in it, make sure each
+ // crosses no more than one line so the lines can be put
+ // in a table, etc., later.
+ $value = phutil_split_lines($value, $retain_endings = true);
+ } else {
+ $value = array($value);
+ }
+ foreach ($value as $val) {
+ $out[] = phutil_tag('span', $attrs, $val);
+ }
+ } else {
+ $out[] = $value;
+ }
+ }
+
+ return phutil_implode_html('', $out);
+ }
+
+ private function findInterestingSymbols(XHPASTNode $root) {
+ // Class name symbols appear in:
+ // class X extends X implements X, X { ... }
+ // new X();
+ // $x instanceof X
+ // catch (X $x)
+ // function f(X $x)
+ // X::f();
+ // X::$m;
+ // X::CONST;
+
+ // These are PHP builtin tokens which can appear in a classname context.
+ // Don't link them since they don't go anywhere useful.
+ static $builtin_class_tokens = array(
+ 'self' => true,
+ 'parent' => true,
+ 'static' => true,
+ );
+
+ // Fortunately XHPAST puts all of these in a special node type so it's
+ // easy to find them.
+ $result_map = array();
+ $class_names = $root->selectDescendantsOfType('n_CLASS_NAME');
+ foreach ($class_names as $class_name) {
+ foreach ($class_name->getTokens() as $key => $token) {
+ if (isset($builtin_class_tokens[$token->getValue()])) {
+ // This is something like "self::method()".
+ continue;
+ }
+ $result_map[$key] = array(
+ 'nc', // "Name, Class"
+ 'symbol' => $class_name->getConcreteString(),
+ );
+ }
+ }
+
+ // Function name symbols appear in:
+ // f()
+
+ $function_calls = $root->selectDescendantsOfType('n_FUNCTION_CALL');
+ foreach ($function_calls as $call) {
+ $call = $call->getChildByIndex(0);
+ if ($call->getTypeName() == 'n_SYMBOL_NAME') {
+ // This is a normal function call, not some $f() shenanigans.
+ foreach ($call->getTokens() as $key => $token) {
+ $result_map[$key] = array(
+ 'nf', // "Name, Function"
+ 'symbol' => $call->getConcreteString(),
+ );
+ }
+ }
+ }
+
+ // Upon encountering $x->y, link y without context, since $x is unknown.
+
+ $prop_access = $root->selectDescendantsOfType('n_OBJECT_PROPERTY_ACCESS');
+ foreach ($prop_access as $access) {
+ $right = $access->getChildByIndex(1);
+ if ($right->getTypeName() == 'n_INDEX_ACCESS') {
+ // otherwise $x->y[0] doesn't get highlighted
+ $right = $right->getChildByIndex(0);
+ }
+ if ($right->getTypeName() == 'n_STRING') {
+ foreach ($right->getTokens() as $key => $token) {
+ $result_map[$key] = array(
+ 'na', // "Name, Attribute"
+ 'symbol' => $right->getConcreteString(),
+ );
+ }
+ }
+ }
+
+ // Upon encountering x::y, try to link y with context x.
+
+ $static_access = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS');
+ foreach ($static_access as $access) {
+ $class = $access->getChildByIndex(0);
+ $right = $access->getChildByIndex(1);
+ if ($class->getTypeName() == 'n_CLASS_NAME' &&
+ ($right->getTypeName() == 'n_STRING' ||
+ $right->getTypeName() == 'n_VARIABLE')) {
+ $classname = head($class->getTokens())->getValue();
+ $result = array(
+ 'na',
+ 'symbol' => ltrim($right->getConcreteString(), '$'),
+ );
+ if (!isset($builtin_class_tokens[$classname])) {
+ $result['context'] = $classname;
+ }
+ foreach ($right->getTokens() as $key => $token) {
+ $result_map[$key] = $result;
+ }
+ }
+ }
+
+ return $result_map;
+ }
+
+}
diff --git a/src/moduleutils/PhutilBootloader.php b/src/moduleutils/PhutilBootloader.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/PhutilBootloader.php
@@ -0,0 +1,338 @@
+classTree;
+ }
+
+ public function registerInMemoryLibrary($name, $map) {
+ $this->registeredLibraries[$name] = "memory:$name";
+ $this->inMemoryMaps[$name] = $map;
+
+ $this->getLibraryMap($name);
+ }
+
+ public function registerLibrary($name, $path) {
+ if (basename($path) != '__phutil_library_init__.php') {
+ throw new PhutilBootloaderException(
+ 'Only directories with a __phutil_library_init__.php file may be '.
+ 'registered as libphutil libraries.');
+ }
+
+ $path = dirname($path);
+
+ // Detect attempts to load the same library multiple times from different
+ // locations. This might mean you're doing something silly like trying to
+ // include two different versions of something, or it might mean you're
+ // doing something subtle like running a different version of 'arc' on a
+ // working copy of Arcanist.
+ if (isset($this->registeredLibraries[$name])) {
+ $old_path = $this->registeredLibraries[$name];
+ if ($old_path != $path) {
+ throw new PhutilLibraryConflictException($name, $old_path, $path);
+ }
+ }
+
+ $this->registeredLibraries[$name] = $path;
+
+ // For libphutil v2 libraries, load all functions when we load the library.
+
+ if (!class_exists('PhutilSymbolLoader', false)) {
+ $root = $this->getLibraryRoot('arcanist');
+ $this->executeInclude($root.'/symbols/PhutilSymbolLoader.php');
+ }
+
+ $loader = new PhutilSymbolLoader();
+ $loader
+ ->setLibrary($name)
+ ->setType('function');
+
+ try {
+ $loader->selectAndLoadSymbols();
+ } catch (PhutilBootloaderException $ex) {
+ // Ignore this, it happens if a global function's file is removed or
+ // similar. Worst case is that we fatal when calling the function, which
+ // is no worse than fataling here.
+ } catch (PhutilMissingSymbolException $ex) {
+ // Ignore this, it happens if a global function is removed. Everything
+ // else loaded so proceed forward: worst case is a fatal when we
+ // hit a function call to a function which no longer exists, which is
+ // no worse than fataling here.
+ }
+
+ if (empty($_SERVER['PHUTIL_DISABLE_RUNTIME_EXTENSIONS'])) {
+ $extdir = $path.DIRECTORY_SEPARATOR.'extensions';
+ if (Filesystem::pathExists($extdir)) {
+ $extensions = id(new FileFinder($extdir))
+ ->withSuffix('php')
+ ->withType('f')
+ ->withFollowSymlinks(true)
+ ->setForceMode('php')
+ ->find();
+
+ foreach ($extensions as $extension) {
+ $this->loadExtension(
+ $name,
+ $path,
+ $extdir.DIRECTORY_SEPARATOR.$extension);
+ }
+ }
+ }
+
+ return $this;
+ }
+
+ public function registerLibraryMap(array $map) {
+ $this->libraryMaps[$this->currentLibrary] = $map;
+ return $this;
+ }
+
+ public function getLibraryMap($name) {
+ if (isset($this->extendedMaps[$name])) {
+ return $this->extendedMaps[$name];
+ }
+
+ if (empty($this->libraryMaps[$name])) {
+ $root = $this->getLibraryRoot($name);
+ $this->currentLibrary = $name;
+
+ if (isset($this->inMemoryMaps[$name])) {
+ $this->libraryMaps[$name] = $this->inMemoryMaps[$name];
+ } else {
+ $okay = include $root.'/__phutil_library_map__.php';
+ if (!$okay) {
+ throw new PhutilBootloaderException(
+ "Include of '{$root}/__phutil_library_map__.php' failed!");
+ }
+ }
+
+ $map = $this->libraryMaps[$name];
+
+ $version = isset($map['__library_version__'])
+ ? $map['__library_version__']
+ : 1;
+
+ switch ($version) {
+ case 1:
+ throw new Exception(
+ 'libphutil v1 libraries are no longer supported.');
+ case 2:
+ // NOTE: In version 2 of the library format, all parents (both
+ // classes and interfaces) are stored in the 'xmap'. The value is
+ // either a string for a single parent (the common case) or an array
+ // for multiple parents.
+ foreach ($map['xmap'] as $child => $parents) {
+ foreach ((array)$parents as $parent) {
+ $this->classTree[$parent][] = $child;
+ }
+ }
+ break;
+ default:
+ throw new Exception("Unsupported library version '{$version}'!");
+ }
+ }
+
+ $map = $this->libraryMaps[$name];
+
+ // If there's an extension map for this library, merge the maps.
+ if (isset($this->extensionMaps[$name])) {
+ $emap = $this->extensionMaps[$name];
+ foreach (array('function', 'class', 'xmap') as $dict_key) {
+ if (!isset($emap[$dict_key])) {
+ continue;
+ }
+ $map[$dict_key] += $emap[$dict_key];
+ }
+ }
+
+ $this->extendedMaps[$name] = $map;
+
+ return $map;
+ }
+
+ public function getLibraryMapWithoutExtensions($name) {
+ // This just does all the checks to make sure the library is valid, then
+ // we throw away the result.
+ $this->getLibraryMap($name);
+
+ return $this->libraryMaps[$name];
+ }
+
+ public function getLibraryRoot($name) {
+ if (empty($this->registeredLibraries[$name])) {
+ throw new PhutilBootloaderException(
+ "The phutil library '{$name}' has not been loaded!");
+ }
+ return $this->registeredLibraries[$name];
+ }
+
+ public function getAllLibraries() {
+ return array_keys($this->registeredLibraries);
+ }
+
+ public function loadLibrary($path) {
+ $root = null;
+ if (!empty($_SERVER['PHUTIL_LIBRARY_ROOT'])) {
+ if ($path[0] != '/') {
+ $root = $_SERVER['PHUTIL_LIBRARY_ROOT'];
+ }
+ }
+
+ $this->executeInclude($root.$path.'/__phutil_library_init__.php');
+ }
+
+ public function loadLibrarySource($library, $source) {
+ $path = $this->getLibraryRoot($library).'/'.$source;
+ $this->executeInclude($path);
+ }
+
+ private function executeInclude($path) {
+ // Include the source using `include_once`, but convert any warnings or
+ // recoverable errors into exceptions.
+
+ // Some messages, including "Declaration of X should be compatible with Y",
+ // do not cause `include_once` to return an error code. Use
+ // error_get_last() to make sure we're catching everything in every PHP
+ // version.
+
+ // (Also, the severity of some messages changed between versions of PHP.)
+
+ // Note that we may enter this method after some earlier, unrelated error.
+ // In this case, error_get_last() will return information for that error.
+ // In PHP7 and later we could use error_clear_last() to clear that error,
+ // but the function does not exist in earlier versions of PHP. Instead,
+ // check if the value has changed.
+
+ // Some parser-like errors, including "class must implement all abstract
+ // methods", cause PHP to fatal immediately with an E_ERROR. In these
+ // cases, include_once() does not throw and never returns. We leave
+ // reporting enabled for these errors since we don't have a way to do
+ // anything more graceful.
+
+ // Likewise, some errors, including "cannot redeclare Class::method()"
+ // cause PHP to fatal immediately with E_COMPILE_ERROR. Treat these like
+ // the similar errors which raise E_ERROR.
+
+ // See also T12190.
+
+ $old_last = error_get_last();
+
+ try {
+ $old = error_reporting(E_ERROR | E_COMPILE_ERROR);
+ $okay = include_once $path;
+ error_reporting($old);
+ } catch (Exception $ex) {
+ throw $ex;
+ } catch (ParseError $throwable) {
+ // NOTE: As of PHP7, syntax errors may raise a ParseError (which is a
+ // Throwable, not an Exception) with a useless message (like "syntax
+ // error, unexpected ':'") and a trace which ends a level above this.
+
+ // Treating this object normally results in an unusable message which
+ // does not identify where the syntax error occurred. Converting it to
+ // a string and taking the first line gives us something reasonable,
+ // however.
+ $message = (string)$throwable;
+ $message = preg_split("/\n/", $message);
+ $message = reset($message);
+
+ throw new Exception($message);
+ }
+
+ if (!$okay) {
+ throw new Exception("Source file \"{$path}\" failed to load.");
+ }
+
+ $new_last = error_get_last();
+ if ($new_last !== null) {
+ if ($new_last !== $old_last) {
+ $message = $new_last['message'];
+ throw new Exception(
+ "Error while loading file \"{$path}\": {$message}");
+ }
+ }
+ }
+
+ private function loadExtension($library, $root, $path) {
+ $old_functions = get_defined_functions();
+ $old_functions = array_fill_keys($old_functions['user'], true);
+ $old_classes = array_fill_keys(get_declared_classes(), true);
+ $old_interfaces = array_fill_keys(get_declared_interfaces(), true);
+
+ $this->executeInclude($path);
+
+ $new_functions = get_defined_functions();
+ $new_functions = array_fill_keys($new_functions['user'], true);
+ $new_classes = array_fill_keys(get_declared_classes(), true);
+ $new_interfaces = array_fill_keys(get_declared_interfaces(), true);
+
+ $add_functions = array_diff_key($new_functions, $old_functions);
+ $add_classes = array_diff_key($new_classes, $old_classes);
+ $add_interfaces = array_diff_key($new_interfaces, $old_interfaces);
+
+ // NOTE: We can't trust the path we loaded to be the location of these
+ // symbols, because it might have loaded other paths.
+
+ foreach ($add_functions as $func => $ignored) {
+ $rfunc = new ReflectionFunction($func);
+ $fpath = Filesystem::resolvePath($rfunc->getFileName(), $root);
+ $this->extensionMaps[$library]['function'][$func] = $fpath;
+ }
+
+ foreach ($add_classes + $add_interfaces as $class => $ignored) {
+ $rclass = new ReflectionClass($class);
+ $cpath = Filesystem::resolvePath($rclass->getFileName(), $root);
+ $this->extensionMaps[$library]['class'][$class] = $cpath;
+
+ $xmap = $rclass->getInterfaceNames();
+ $parent = $rclass->getParentClass();
+ if ($parent) {
+ $xmap[] = $parent->getName();
+ }
+
+ if ($xmap) {
+ foreach ($xmap as $parent_class) {
+ $this->classTree[$parent_class][] = $class;
+ }
+
+ if (count($xmap) == 1) {
+ $xmap = head($xmap);
+ }
+
+ $this->extensionMaps[$library]['xmap'][$class] = $xmap;
+ }
+ }
+
+ // Clear the extended library cache (should one exist) so we know that
+ // we need to rebuild it.
+ unset($this->extendedMaps[$library]);
+ }
+
+}
diff --git a/src/moduleutils/PhutilBootloaderException.php b/src/moduleutils/PhutilBootloaderException.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/PhutilBootloaderException.php
@@ -0,0 +1,3 @@
+library = $library;
+ $this->oldPath = $old_path;
+ $this->newPath = $new_path;
+
+ parent::__construct(pht(
+ "Library conflict! The library '%s' has already been loaded (from '%s') ".
+ "but is now being loaded again from a new location ('%s'). You can not ".
+ "load multiple copies of the same library into a program.",
+ $library,
+ $old_path,
+ $new_path));
+ }
+
+ /**
+ * Retrieve the name of the library in conflict.
+ *
+ * @return string The name of the library which conflicts with an existing
+ * library.
+ * @task info
+ */
+ public function getLibrary() {
+ return $this->library;
+ }
+
+ /**
+ * Get the path to the library which has already been loaded earlier in the
+ * program's execution.
+ *
+ * @return string The path of the already-loaded library.
+ * @task info
+ */
+ public function getOldPath() {
+ return $this->oldPath;
+ }
+
+ /**
+ * Get the path to the library which is causing this conflict.
+ *
+ * @return string The path of the attempting-to-load library.
+ * @task info
+ */
+ public function getNewPath() {
+ return $this->newPath;
+ }
+
+}
diff --git a/src/moduleutils/PhutilLibraryMapBuilder.php b/src/moduleutils/PhutilLibraryMapBuilder.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/PhutilLibraryMapBuilder.php
@@ -0,0 +1,511 @@
+root = $root;
+ }
+
+ /**
+ * Control status output. Use `--quiet` to set this.
+ *
+ * @param bool If true, don't show status output.
+ * @return this
+ *
+ * @task map
+ */
+ public function setQuiet($quiet) {
+ $this->quiet = $quiet;
+ return $this;
+ }
+
+ /**
+ * Control subprocess parallelism limit. Use `--limit` to set this.
+ *
+ * @param int Maximum number of subprocesses to run in parallel.
+ * @return this
+ *
+ * @task map
+ */
+ public function setSubprocessLimit($limit) {
+ $this->subprocessLimit = $limit;
+ return $this;
+ }
+
+ /**
+ * Get the map of symbols in this library, analyzing the library to build it
+ * if necessary.
+ *
+ * @return map Information about symbols in this library.
+ *
+ * @task map
+ */
+ public function buildMap() {
+ if ($this->librarySymbolMap === null) {
+ $this->analyzeLibrary();
+ }
+ return $this->librarySymbolMap;
+ }
+
+
+ /**
+ * Get the map of files in this library, analyzing the library to build it
+ * if necessary.
+ *
+ * Returns a map of file paths to information about symbols used and defined
+ * in the file.
+ *
+ * @return map Information about files in this library.
+ *
+ * @task map
+ */
+ public function buildFileSymbolMap() {
+ if ($this->fileSymbolMap === null) {
+ $this->analyzeLibrary();
+ }
+ return $this->fileSymbolMap;
+ }
+
+ /**
+ * Build and update the library map.
+ *
+ * @return void
+ *
+ * @task map
+ */
+ public function buildAndWriteMap() {
+ $library_map = $this->buildMap();
+
+ $this->log(pht('Writing map...'));
+ $this->writeLibraryMap($library_map);
+ }
+
+ /**
+ * Write a status message to the user, if not running in quiet mode.
+ *
+ * @param string Message to write.
+ * @return this
+ *
+ * @task map
+ */
+ private function log($message) {
+ if (!$this->quiet) {
+ @fwrite(STDERR, "%s\n", $message);
+ }
+ return $this;
+ }
+
+
+/* -( Path Management )---------------------------------------------------- */
+
+ /**
+ * Get the path to some file in the library.
+ *
+ * @param string A library-relative path. If omitted, returns the library
+ * root path.
+ * @return string An absolute path.
+ *
+ * @task path
+ */
+ private function getPath($path = '') {
+ return $this->root.'/'.$path;
+ }
+
+ /**
+ * Get the path to the symbol cache file.
+ *
+ * @return string Absolute path to symbol cache.
+ *
+ * @task path
+ */
+ private function getPathForSymbolCache() {
+ return $this->getPath('.phutil_module_cache');
+ }
+
+ /**
+ * Get the path to the map file.
+ *
+ * @return string Absolute path to the library map.
+ *
+ * @task path
+ */
+ private function getPathForLibraryMap() {
+ return $this->getPath('__phutil_library_map__.php');
+ }
+
+ /**
+ * Get the path to the library init file.
+ *
+ * @return string Absolute path to the library init file
+ *
+ * @task path
+ */
+ private function getPathForLibraryInit() {
+ return $this->getPath('__phutil_library_init__.php');
+ }
+
+
+/* -( Symbol Analysis and Caching )---------------------------------------- */
+
+ /**
+ * Load the library symbol cache, if it exists and is readable and valid.
+ *
+ * @return dict Map of content hashes to cache of output from
+ * `phutil_symbols.php`.
+ *
+ * @task symbol
+ */
+ private function loadSymbolCache() {
+ $cache_file = $this->getPathForSymbolCache();
+
+ try {
+ $cache = Filesystem::readFile($cache_file);
+ } catch (Exception $ex) {
+ $cache = null;
+ }
+
+ $symbol_cache = array();
+ if ($cache) {
+ try {
+ $symbol_cache = phutil_json_decode($cache);
+ } catch (PhutilJSONParserException $ex) {
+ $symbol_cache = array();
+ }
+ }
+
+ $version = idx($symbol_cache, self::SYMBOL_CACHE_VERSION_KEY);
+ if ($version != self::SYMBOL_CACHE_VERSION) {
+ // Throw away caches from a different version of the library.
+ $symbol_cache = array();
+ }
+ unset($symbol_cache[self::SYMBOL_CACHE_VERSION_KEY]);
+
+ return $symbol_cache;
+ }
+
+ /**
+ * Write a symbol map to disk cache.
+ *
+ * @param dict Symbol map of relative paths to symbols.
+ * @param dict Source map (like @{method:loadSourceFileMap}).
+ * @return void
+ *
+ * @task symbol
+ */
+ private function writeSymbolCache(array $symbol_map, array $source_map) {
+ $cache_file = $this->getPathForSymbolCache();
+
+ $cache = array(
+ self::SYMBOL_CACHE_VERSION_KEY => self::SYMBOL_CACHE_VERSION,
+ );
+
+ foreach ($symbol_map as $file => $symbols) {
+ $cache[$source_map[$file]] = $symbols;
+ }
+
+ $json = json_encode($cache);
+ try {
+ Filesystem::writeFile($cache_file, $json);
+ } catch (FilesystemException $ex) {
+ $this->log(pht('Unable to save the cache!'));
+ }
+ }
+
+ /**
+ * Drop the symbol cache, forcing a clean rebuild.
+ *
+ * @return this
+ *
+ * @task symbol
+ */
+ public function dropSymbolCache() {
+ $this->log(pht('Dropping symbol cache...'));
+ Filesystem::remove($this->getPathForSymbolCache());
+ }
+
+ /**
+ * Build a future which returns a `phutil_symbols.php` analysis of a source
+ * file.
+ *
+ * @param string Relative path to the source file to analyze.
+ * @return Future Analysis future.
+ *
+ * @task symbol
+ */
+ private function buildSymbolAnalysisFuture($file) {
+ $absolute_file = $this->getPath($file);
+ $bin = dirname(__FILE__).'/../../scripts/phutil_symbols.php';
+
+ return new ExecFuture('php %s --ugly -- %s', $bin, $absolute_file);
+ }
+
+
+/* -( Source Management )-------------------------------------------------- */
+
+ /**
+ * Build a map of all source files in a library to hashes of their content.
+ * Returns an array like this:
+ *
+ * array(
+ * 'src/parser/ExampleParser.php' => '60b725f10c9c85c70d97880dfe8191b3',
+ * // ...
+ * );
+ *
+ * @return dict Map of library-relative paths to content hashes.
+ * @task source
+ */
+ private function loadSourceFileMap() {
+ $root = $this->getPath();
+
+ $init = $this->getPathForLibraryInit();
+ if (!Filesystem::pathExists($init)) {
+ throw new Exception(
+ pht(
+ "Provided path '%s' is not a %s library.",
+ $root,
+ 'phutil'));
+ }
+
+ $files = id(new FileFinder($root))
+ ->withType('f')
+ ->withSuffix('php')
+ ->excludePath('*/.*')
+ ->setGenerateChecksums(true)
+ ->find();
+
+ $map = array();
+ foreach ($files as $file => $hash) {
+ $file = Filesystem::readablePath($file, $root);
+ $file = ltrim($file, '/');
+
+ if (dirname($file) == '.') {
+ // We don't permit normal source files at the root level, so just ignore
+ // them; they're special library files.
+ continue;
+ }
+
+ if (dirname($file) == 'extensions') {
+ // Ignore files in the extensions/ directory.
+ continue;
+ }
+
+ // We include also filename in the hash to handle cases when the file is
+ // moved without modifying its content.
+ $map[$file] = md5($hash.$file);
+ }
+
+ return $map;
+ }
+
+ /**
+ * Convert the symbol analysis of all the source files in the library into
+ * a library map.
+ *
+ * @param dict Symbol analysis of all source files.
+ * @return dict Library map.
+ * @task source
+ */
+ private function buildLibraryMap(array $symbol_map) {
+ $library_map = array(
+ 'class' => array(),
+ 'function' => array(),
+ 'xmap' => array(),
+ );
+
+ // Detect duplicate symbols within the library.
+ foreach ($symbol_map as $file => $info) {
+ foreach ($info['have'] as $type => $symbols) {
+ foreach ($symbols as $symbol => $declaration) {
+ $lib_type = ($type == 'interface') ? 'class' : $type;
+ if (!empty($library_map[$lib_type][$symbol])) {
+ $prior = $library_map[$lib_type][$symbol];
+ throw new Exception(
+ pht(
+ "Definition of %s '%s' in file '%s' duplicates prior ".
+ "definition in file '%s'. You can not declare the ".
+ "same symbol twice.",
+ $type,
+ $symbol,
+ $file,
+ $prior));
+ }
+ $library_map[$lib_type][$symbol] = $file;
+ }
+ }
+ $library_map['xmap'] += $info['xmap'];
+ }
+
+ // Simplify the common case (one parent) to make the file a little easier
+ // to deal with.
+ foreach ($library_map['xmap'] as $class => $extends) {
+ if (count($extends) == 1) {
+ $library_map['xmap'][$class] = reset($extends);
+ }
+ }
+
+ // Sort the map so it is relatively stable across changes.
+ foreach ($library_map as $key => $symbols) {
+ ksort($symbols);
+ $library_map[$key] = $symbols;
+ }
+ ksort($library_map);
+
+ return $library_map;
+ }
+
+ /**
+ * Write a finalized library map.
+ *
+ * @param dict Library map structure to write.
+ * @return void
+ *
+ * @task source
+ */
+ private function writeLibraryMap(array $library_map) {
+ $map_file = $this->getPathForLibraryMap();
+ $version = self::LIBRARY_MAP_VERSION;
+
+ $library_map = array(
+ self::LIBRARY_MAP_VERSION_KEY => $version,
+ ) + $library_map;
+
+ $library_map = phutil_var_export($library_map);
+ $at = '@';
+
+ $source_file = <<log(pht('Finding source files...'));
+ $source_map = $this->loadSourceFileMap();
+ $this->log(
+ pht('Found %s files.', new PhutilNumber(count($source_map))));
+
+ // Load the symbol cache with existing parsed symbols. This allows us
+ // to remap libraries quickly by analyzing only changed files.
+ $this->log(pht('Loading symbol cache...'));
+ $symbol_cache = $this->loadSymbolCache();
+
+ // If the XHPAST binary is not up-to-date, build it now. Otherwise,
+ // `phutil_symbols.php` will attempt to build the binary and will fail
+ // miserably because it will be trying to build the same file multiple
+ // times in parallel.
+ if (!PhutilXHPASTBinary::isAvailable()) {
+ PhutilXHPASTBinary::build();
+ }
+
+ // Build out the symbol analysis for all the files in the library. For
+ // each file, check if it's in cache. If we miss in the cache, do a fresh
+ // analysis.
+ $symbol_map = array();
+ $futures = array();
+ foreach ($source_map as $file => $hash) {
+ if (!empty($symbol_cache[$hash])) {
+ $symbol_map[$file] = $symbol_cache[$hash];
+ continue;
+ }
+ $futures[$file] = $this->buildSymbolAnalysisFuture($file);
+ }
+ $this->log(
+ pht('Found %s files in cache.', new PhutilNumber(count($symbol_map))));
+
+ // Run the analyzer on any files which need analysis.
+ if ($futures) {
+ $limit = $this->subprocessLimit;
+
+ $this->log(
+ pht(
+ 'Analyzing %s file(s) with %s subprocess(es)...',
+ phutil_count($futures),
+ new PhutilNumber($limit)));
+
+ $progress = new PhutilConsoleProgressBar();
+ if ($this->quiet) {
+ $progress->setQuiet(true);
+ }
+ $progress->setTotal(count($futures));
+
+ $futures = id(new FutureIterator($futures))
+ ->limit($limit);
+ foreach ($futures as $file => $future) {
+ $result = $future->resolveJSON();
+ if (empty($result['error'])) {
+ $symbol_map[$file] = $result;
+ } else {
+ $progress->done(false);
+ throw new XHPASTSyntaxErrorException(
+ $result['line'],
+ $file.': '.$result['error']);
+ }
+ $progress->update(1);
+ }
+ $progress->done();
+ }
+
+ $this->fileSymbolMap = $symbol_map;
+
+ // We're done building the cache, so write it out immediately. Note that
+ // we've only retained entries for files we found, so this implicitly cleans
+ // out old cache entries.
+ $this->writeSymbolCache($symbol_map, $source_map);
+
+ // Our map is up to date, so either show it on stdout or write it to disk.
+ $this->log(pht('Building library map...'));
+
+ $this->librarySymbolMap = $this->buildLibraryMap($symbol_map);
+ }
+
+
+}
diff --git a/src/moduleutils/__tests__/PhutilModuleUtilsTestCase.php b/src/moduleutils/__tests__/PhutilModuleUtilsTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/__tests__/PhutilModuleUtilsTestCase.php
@@ -0,0 +1,9 @@
+assertEqual('phutil', phutil_get_current_library_name());
+ }
+
+}
diff --git a/src/moduleutils/core.php b/src/moduleutils/core.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/core.php
@@ -0,0 +1,13 @@
+registerLibrary($library, $path);
+}
+
+function phutil_register_library_map(array $map) {
+ PhutilBootloader::getInstance()->registerLibraryMap($map);
+}
+
+function phutil_load_library($path) {
+ PhutilBootloader::getInstance()->loadLibrary($path);
+}
diff --git a/src/moduleutils/moduleutils.php b/src/moduleutils/moduleutils.php
new file mode 100644
--- /dev/null
+++ b/src/moduleutils/moduleutils.php
@@ -0,0 +1,49 @@
+getLibraryRoot($library);
+}
+
+function phutil_get_library_root_for_path($path) {
+ foreach (Filesystem::walkToRoot($path) as $dir) {
+ if (Filesystem::pathExists($dir.'/__phutil_library_init__.php')) {
+ return $dir;
+ }
+ }
+ return null;
+}
+
+function phutil_get_library_name_for_root($path) {
+ $path = rtrim(Filesystem::resolvePath($path), '/');
+
+ $bootloader = PhutilBootloader::getInstance();
+ $libraries = $bootloader->getAllLibraries();
+ foreach ($libraries as $library) {
+ $root = $bootloader->getLibraryRoot($library);
+ if (rtrim(Filesystem::resolvePath($root), '/') == $path) {
+ return $library;
+ }
+ }
+
+ return null;
+}
+
+function phutil_get_current_library_name() {
+ $caller = head(debug_backtrace(false));
+
+ $root = phutil_get_library_root_for_path($caller['file']);
+ return phutil_get_library_name_for_root($root);
+}
+
+/**
+ * Warns about use of deprecated behavior.
+ */
+function phutil_deprecated($what, $why) {
+ PhutilErrorHandler::dispatchErrorMessage(
+ PhutilErrorHandler::DEPRECATED,
+ $what,
+ array(
+ 'why' => $why,
+ ));
+}
diff --git a/src/object/Phobject.php b/src/object/Phobject.php
new file mode 100644
--- /dev/null
+++ b/src/object/Phobject.php
@@ -0,0 +1,104 @@
+throwOnAttemptedIteration();
+ }
+
+ public function key() {
+ $this->throwOnAttemptedIteration();
+ }
+
+ public function next() {
+ $this->throwOnAttemptedIteration();
+ }
+
+ public function rewind() {
+ $this->throwOnAttemptedIteration();
+ }
+
+ public function valid() {
+ $this->throwOnAttemptedIteration();
+ }
+
+ private function throwOnAttemptedIteration() {
+ throw new DomainException(
+ pht(
+ 'Attempting to iterate an object (of class %s) which is not iterable.',
+ get_class($this)));
+ }
+
+
+ /**
+ * Read the value of a class constant.
+ *
+ * This is the same as just typing `self::CONSTANTNAME`, but throws a more
+ * useful message if the constant is not defined and allows the constant to
+ * be limited to a maximum length.
+ *
+ * @param string Name of the constant.
+ * @param int|null Maximum number of bytes permitted in the value.
+ * @return string Value of the constant.
+ */
+ public function getPhobjectClassConstant($key, $byte_limit = null) {
+ $class = new ReflectionClass($this);
+
+ $const = $class->getConstant($key);
+ if ($const === false) {
+ throw new Exception(
+ pht(
+ '"%s" class "%s" must define a "%s" constant.',
+ __CLASS__,
+ get_class($this),
+ $key));
+ }
+
+ if ($byte_limit !== null) {
+ if (!is_string($const) || (strlen($const) > $byte_limit)) {
+ throw new Exception(
+ pht(
+ '"%s" class "%s" has an invalid "%s" property. Field constants '.
+ 'must be strings and no more than %s bytes in length.',
+ __CLASS__,
+ get_class($this),
+ $key,
+ new PhutilNumber($byte_limit)));
+ }
+ }
+
+ return $const;
+ }
+
+}
diff --git a/src/object/__tests__/PhobjectTestCase.php b/src/object/__tests__/PhobjectTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/object/__tests__/PhobjectTestCase.php
@@ -0,0 +1,40 @@
+duck;
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof DomainException);
+
+ $caught = null;
+ try {
+ $object->duck = 'quack';
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof DomainException);
+ }
+
+ public function testThrowOnIteration() {
+ $object = new PhutilTestPhobject();
+
+ $caught = null;
+ try {
+ foreach ($object as $item) {
+ // ...
+ }
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof DomainException);
+ }
+
+}
diff --git a/src/object/__tests__/PhutilTestPhobject.php b/src/object/__tests__/PhutilTestPhobject.php
new file mode 100644
--- /dev/null
+++ b/src/object/__tests__/PhutilTestPhobject.php
@@ -0,0 +1,3 @@
+setBugtraqPattern('http://bugs.com/%BUGID%')
+ * ->setBugtraqCaptureExpression('/[Ii]ssues?:?(\s*,?\s*#\d+)+/')
+ * ->setBugtraqSelectExpression('/(\d+)/')
+ * ->processCorpus($message);
+ *
+ * This will produce:
+ *
+ * Issues: http://bugs.com/123, http://bugs.com/345
+ *
+ */
+final class PhutilBugtraqParser extends Phobject {
+
+ private $bugtraqPattern;
+ private $bugtraqCaptureExpression;
+ private $bugtraqSelectExpression;
+
+ public function setBugtraqPattern($pattern) {
+ $this->bugtraqPattern = $pattern;
+ return $this;
+ }
+
+ public function setBugtraqCaptureExpression($regex) {
+ PhutilTypeSpec::newFromString('regex')->check($regex);
+
+ $this->bugtraqCaptureExpression = $regex;
+ return $this;
+ }
+
+ public function setBugtraqSelectExpression($regex) {
+ PhutilTypeSpec::newFromString('regex')->check($regex);
+
+ $this->bugtraqSelectExpression = $regex;
+ return $this;
+ }
+
+ public function processCorpus($corpus) {
+ $regexp = $this->bugtraqCaptureExpression;
+ $matches = null;
+ $flags = PREG_SET_ORDER | PREG_OFFSET_CAPTURE;
+
+ // First, find all the matching text ranges. We do this up front because
+ // when we do the replacement pass at the end, the whole thing needs to go
+ // in reverse order.
+
+ preg_match_all($regexp, $corpus, $matches, $flags);
+ $captures = array();
+ foreach ($matches as $match) {
+ list($captured_text, $captured_offset) = $match[0];
+ $captures[] = array(
+ 'text' => $captured_text,
+ 'at' => $captured_offset,
+ );
+ }
+
+ // Find the actual bug IDs. If there's a selection expression, we use that
+ // to pick bug IDs out of a larger context. For example, the syntax may be:
+ //
+ // Issues: 123, 124
+ //
+ // In this case, "123" and "124" are the issue IDs, and could be selected
+ // with an expression like:
+ //
+ // /(\d+)/
+ //
+ // If there's no selection expression, we use the entire match.
+
+ $bug_ids = array();
+ $select_regexp = $this->bugtraqSelectExpression;
+ foreach ($captures as $capture) {
+ $captured_text = $capture['text'];
+ $captured_offset = $capture['at'];
+
+ if (strlen($select_regexp)) {
+ $selections = null;
+ preg_match_all(
+ $select_regexp,
+ $captured_text,
+ $selections,
+ PREG_OFFSET_CAPTURE);
+
+ foreach ($selections[1] as $selection) {
+ $bug_ids[] = array(
+ 'text' => $selection[0],
+ 'at' => $captured_offset + $selection[1],
+ );
+ }
+ } else {
+ $bug_ids[] = array(
+ 'text' => $captured_text,
+ 'at' => $captured_offset,
+ );
+ }
+ }
+
+ // Now that we have all the bug IDs to replace, replace them (in reverse
+ // order, so the offsets don't get messed up).
+ $bug_ids = array_reverse($bug_ids);
+ foreach ($bug_ids as $bug) {
+ $new_text = str_replace(
+ '%BUGID%',
+ $bug['text'],
+ $this->bugtraqPattern);
+
+ $corpus = substr_replace(
+ $corpus,
+ $new_text,
+ $bug['at'],
+ strlen($bug['text']));
+ }
+
+ return $corpus;
+ }
+
+}
diff --git a/src/parser/PhutilDocblockParser.php b/src/parser/PhutilDocblockParser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilDocblockParser.php
@@ -0,0 +1,163 @@
+ line number.
+ $map = array();
+ $lines = explode("\n", $text);
+ $num = 1;
+ foreach ($lines as $line) {
+ $len = strlen($line) + 1;
+ for ($jj = 0; $jj < $len; $jj++) {
+ $map[] = $num;
+ }
+ ++$num;
+ }
+
+ foreach ($matches[0] as $match) {
+ list($data, $offset) = $match;
+ $blocks[] = array($data, $map[$offset]);
+ }
+
+ return $blocks;
+ }
+
+ public function parse($docblock) {
+ // Strip off comments.
+ $docblock = trim($docblock);
+ $docblock = preg_replace('@^/\*\*@', '', $docblock);
+ $docblock = preg_replace('@\*/$@', '', $docblock);
+ $docblock = preg_replace('@^\s*\*@m', '', $docblock);
+
+ // Normalize multi-line @specials.
+ $lines = explode("\n", $docblock);
+ $last = false;
+ foreach ($lines as $k => $line) {
+
+ // NOTE: We allow "@specials" to be preceded by up to two whitespace
+ // characters; more than that and we assume the block is a code block.
+ // Broadly, there's ambiguity between a special like:
+ //
+ // <... lots of indentation ...> @author alincoln
+ //
+ // ...and a code block like:
+ //
+ // <... lots of indentation ...> @def square(x, y):
+ //
+ // Because standard practice is to indent the entire block one level,
+ // we allow that and one additional space before assuming something is
+ // a code block.
+
+ if (preg_match('/^\s{0,2}@\w/i', $line)) {
+ $last = $k;
+ $lines[$last] = trim($line);
+ } else if (preg_match('/^\s*$/', $line)) {
+ $last = false;
+ } else if ($last !== false) {
+ $lines[$last] = $lines[$last].' '.trim($line);
+ unset($lines[$k]);
+ }
+ }
+
+ $docblock = implode("\n", $lines);
+
+ $special = array();
+
+ // Parse @specials.
+ $matches = null;
+ $have_specials = preg_match_all(
+ '/^@([\w-]+)[ \t]*([^\n]*)/m',
+ $docblock,
+ $matches,
+ PREG_SET_ORDER);
+
+ if ($have_specials) {
+ $docblock = preg_replace(
+ '/^@([\w-]+)[ \t]*([^\n]*)?\n*/m',
+ '',
+ $docblock);
+ foreach ($matches as $match) {
+ list($_, $type, $data) = $match;
+ $data = trim($data);
+
+ // For flags like "@stable" which don't have any string data, set the
+ // value to true.
+ if (!strlen($data)) {
+ $data = true;
+ }
+
+ if (!isset($special[$type])) {
+ $special[$type] = $data;
+ } else {
+ if (!is_array($special[$type])) {
+ $special[$type] = (array)$special[$type];
+ }
+ $special[$type][] = $data;
+ }
+ }
+ }
+
+ // Convert `array(true, true, true)` to `true`.
+ foreach ($special as $type => $data) {
+ if (is_array($data)) {
+ $all_trues = true;
+
+ foreach ($data as $value) {
+ if ($value !== true) {
+ $all_trues = false;
+ break;
+ }
+ }
+
+ if ($all_trues) {
+ $special[$type] = true;
+ }
+ }
+ }
+
+ $docblock = str_replace("\t", ' ', $docblock);
+
+ // Smush the whole docblock to the left edge.
+ $min_indent = 80;
+ $indent = 0;
+ foreach (array_filter(explode("\n", $docblock)) as $line) {
+ for ($ii = 0; $ii < strlen($line); $ii++) {
+ if ($line[$ii] != ' ') {
+ break;
+ }
+ $indent++;
+ }
+ $min_indent = min($indent, $min_indent);
+ }
+
+ $docblock = preg_replace(
+ '/^'.str_repeat(' ', $min_indent).'/m',
+ '',
+ $docblock);
+ $docblock = rtrim($docblock);
+ // Trim any empty lines off the front, but leave the indent level if there
+ // is one.
+ $docblock = preg_replace('/^\s*\n/', '', $docblock);
+
+ return array($docblock, $special);
+ }
+
+}
diff --git a/src/parser/PhutilEditorConfig.php b/src/parser/PhutilEditorConfig.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilEditorConfig.php
@@ -0,0 +1,195 @@
+ array(
+ 'latin1',
+ 'utf-8',
+ 'utf-8-bom',
+ 'utf-16be',
+ 'utf-16le',
+ ),
+ self::END_OF_LINE => array('lf', 'cr', 'crlf'),
+ self::INDENT_SIZE => 'int|string',
+ self::INDENT_STYLE => array('space', 'tab'),
+ self::FINAL_NEWLINE => 'bool',
+ self::LINE_LENGTH => 'int',
+ self::TAB_WIDTH => 'int',
+ self::TRAILING_WHITESPACE => 'bool',
+ );
+
+ private $root;
+
+ /**
+ * Constructor.
+ *
+ * @param string The root directory.
+ */
+ public function __construct($root) {
+ $this->root = $root;
+ }
+
+ /**
+ * Get the specified EditorConfig property for the specified path.
+ *
+ * @param string
+ * @param string
+ * @return wild
+ */
+ public function getProperty($path, $key) {
+ if (!idx(self::$knownProperties, $key)) {
+ throw new InvalidArgumentException(pht('Invalid EditorConfig property.'));
+ }
+
+ $props = $this->getProperties($path);
+
+ switch ($key) {
+ case self::INDENT_SIZE:
+ if (idx($props, self::INDENT_SIZE) === null &&
+ idx($props, self::INDENT_STYLE) === 'tab') {
+ return 'tab';
+ } else if (idx($props, self::INDENT_SIZE) === 'tab' &&
+ idx($props, self::TAB_WIDTH) === null) {
+ return idx($props, self::TAB_WIDTH);
+ }
+ break;
+
+ case self::TAB_WIDTH:
+ if (idx($props, self::TAB_WIDTH) === null &&
+ idx($props, self::INDENT_SIZE) !== null &&
+ idx($props, self::INDENT_SIZE) !== 'tab') {
+ return idx($props, self::INDENT_SIZE);
+ }
+ break;
+ }
+
+ return idx($props, $key);
+ }
+
+ /**
+ * Get the EditorConfig properties for the specified path.
+ *
+ * Returns a map containing all of the EditorConfig properties which apply
+ * to the specified path. The following rules are applied when processing
+ * EditorConfig files:
+ *
+ * - If a glob does not contain `/`, it can match a path in any subdirectory.
+ * - If the first character of a glob is `/`, it will only match files in the
+ * same directory as the `.editorconfig` file.
+ * - Properties and values are case-insensitive.
+ * - Unknown properties will be silently ignored.
+ * - Values are not validated against the specification (this may change in
+ * the future).
+ * - Invalid glob patterns will be silently ignored.
+ *
+ * @param string
+ * @return map
+ */
+ public function getProperties($path) {
+ $configs = $this->getEditorConfigs($path);
+ $matches = array();
+
+ foreach ($configs as $config) {
+ list($path_prefix, $editorconfig) = $config;
+
+ foreach ($editorconfig as $glob => $properties) {
+ if (!$glob) {
+ continue;
+ }
+
+ if (strpos($glob, '/') === false) {
+ $glob = '**/'.$glob;
+ } else if (strncmp($glob, '/', 0)) {
+ $glob = substr($glob, 1);
+ }
+
+ $glob = $path_prefix.'/'.$glob;
+ try {
+ if (!phutil_fnmatch($glob, $path)) {
+ continue;
+ }
+ } catch (Exception $ex) {
+ // Invalid glob pattern... ignore it.
+ continue;
+ }
+
+ foreach ($properties as $property => $value) {
+ $property = strtolower($property);
+
+ if (!idx(self::$knownProperties, $property)) {
+ // Unknown property... ignore it.
+ continue;
+ }
+
+ if (is_string($value)) {
+ $value = strtolower($value);
+ }
+ if ($value === '') {
+ $value = null;
+ }
+ $matches[$property] = $value;
+ }
+ }
+ }
+
+ return $matches;
+ }
+
+ /**
+ * Returns the EditorConfig files which affect the specified path.
+ *
+ * Find and parse all `.editorconfig` files between the specified path and
+ * the root directory. The results are returned in the same order that they
+ * should be matched.
+ *
+ * return list>
+ */
+ private function getEditorConfigs($path) {
+ $configs = array();
+ $found_root = false;
+ $root = $this->root;
+
+ do {
+ $path = dirname($path);
+ $file = $path.'/.editorconfig';
+
+ if (!Filesystem::pathExists($file)) {
+ continue;
+ }
+
+ $contents = Filesystem::readFile($file);
+ $config = phutil_ini_decode($contents);
+
+ if (idx($config, 'root') === true) {
+ $found_root = true;
+ }
+ unset($config['root']);
+ array_unshift($configs, array($path, $config));
+
+ if ($found_root) {
+ break;
+ }
+ } while ($path != $root && Filesystem::isDescendant($path, $root));
+
+ return $configs;
+ }
+
+}
diff --git a/src/parser/PhutilEmailAddress.php b/src/parser/PhutilEmailAddress.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilEmailAddress.php
@@ -0,0 +1,114 @@
+$/', $email_address, $matches)) {
+ $display_name = trim($matches[1], '\'" ');
+ if (strpos($matches[2], '@') !== false) {
+ list($local_part, $domain_name) = explode('@', $matches[2], 2);
+ } else {
+ $local_part = $matches[2];
+ $domain_name = null;
+ }
+ } else if (preg_match('/^(.*)@(.*)$/', $email_address, $matches)) {
+ $display_name = null;
+ $local_part = $matches[1];
+ $domain_name = $matches[2];
+ } else {
+ $display_name = null;
+ $local_part = $email_address;
+ $domain_name = null;
+ }
+
+ $this->displayName = $display_name;
+ $this->localPart = $local_part;
+ $this->domainName = $domain_name;
+ }
+
+ public function __toString() {
+ $address = $this->getAddress();
+ if (strlen($this->displayName)) {
+ $display_name = $this->encodeDisplayName($this->displayName);
+ return $display_name.' <'.$address.'>';
+ } else {
+ return $address;
+ }
+ }
+
+ public function setDisplayName($display_name) {
+ $this->displayName = $display_name;
+ return $this;
+ }
+
+ public function getDisplayName() {
+ return $this->displayName;
+ }
+
+ public function setLocalPart($local_part) {
+ $this->localPart = $local_part;
+ return $this;
+ }
+
+ public function getLocalPart() {
+ return $this->localPart;
+ }
+
+ public function setDomainName($domain_name) {
+ $this->domainName = $domain_name;
+ return $this;
+ }
+
+ public function getDomainName() {
+ return $this->domainName;
+ }
+
+ public function setAddress($address) {
+ $parts = explode('@', $address, 2);
+
+ $this->localPart = $parts[0];
+ if (isset($parts[1])) {
+ $this->domainName = $parts[1];
+ }
+
+ return $this;
+ }
+
+ public function getAddress() {
+ $address = $this->localPart;
+ if (strlen($this->domainName)) {
+ $address .= '@'.$this->domainName;
+ }
+ return $address;
+ }
+
+ private function encodeDisplayName($name) {
+ // NOTE: This is a reasonable effort based on a cursory reading of
+ // RFC2822, but may be significantly misguided.
+
+ // Newlines are not permitted, even when escaped. Discard them.
+ $name = preg_replace("/\s*[\r\n]+\s*/", ' ', $name);
+
+ // Escape double quotes and backslashes.
+ $name = addcslashes($name, '\\"');
+
+ // Quote the string.
+ $name = '"'.$name.'"';
+
+ return $name;
+ }
+
+}
diff --git a/src/parser/PhutilGitURI.php b/src/parser/PhutilGitURI.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilGitURI.php
@@ -0,0 +1,92 @@
+parseURI($uri);
+ if ($parts) {
+ $this->user = $parts[1];
+ $this->domain = $parts[2];
+ $this->path = $parts[3];
+ }
+ }
+
+ private static function parseURI($uri) {
+ // See T4913. Fail the parse if there is leading whitespace; stricter
+ // systems will not accept these URIs.
+ if (ltrim($uri) !== $uri) {
+ return null;
+ }
+
+ $user = '(?:([^@]+)@)?';
+ $domain = '([^:]+)';
+ $path = ':(.*)';
+
+ $regexp = '/^'.$user.$domain.$path.'$/';
+ $matches = null;
+ $ok = preg_match($regexp, $uri, $matches);
+ if ($ok) {
+ return array_pad($matches, 4, '');
+ }
+
+ return null;
+ }
+
+ public function __toString() {
+ $user = null;
+ if ($this->user) {
+ $user = $this->user.'@';
+ }
+
+ $domain = $this->domain;
+ $path = $this->path;
+
+ return $user.$domain.':'.$path;
+ }
+
+ public function setDomain($domain) {
+ $this->domain = $domain;
+ return $this;
+ }
+
+ public function getDomain() {
+ return $this->domain;
+ }
+
+ public function setPath($path) {
+ $this->path = $path;
+ return $this;
+ }
+
+ public function getPath() {
+ return $this->path;
+ }
+
+ public function setUser($user) {
+ $this->user = $user;
+ return $this;
+ }
+
+ public function getUser() {
+ return $this->user;
+ }
+
+}
diff --git a/src/parser/PhutilJSON.php b/src/parser/PhutilJSON.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilJSON.php
@@ -0,0 +1,155 @@
+encodeFormattedObject($object, 0)."\n";
+ }
+
+
+ /**
+ * Encode a list in JSON and pretty-print it, discarding keys.
+ *
+ * @param list List to encode in JSON.
+ * @return string Pretty-printed list representation.
+ */
+ public function encodeAsList(array $list) {
+ return $this->encodeFormattedArray($list, 0)."\n";
+ }
+
+
+/* -( Internals )---------------------------------------------------------- */
+
+
+ /**
+ * Pretty-print a JSON object.
+ *
+ * @param dict Object to format.
+ * @param int Current depth, for indentation.
+ * @return string Pretty-printed value.
+ * @task internal
+ */
+ private function encodeFormattedObject($object, $depth) {
+ if (empty($object)) {
+ return '{}';
+ }
+
+ $pre = $this->getIndent($depth);
+ $key_pre = $this->getIndent($depth + 1);
+ $keys = array();
+ $vals = array();
+ $max = 0;
+ foreach ($object as $key => $val) {
+ $ekey = $this->encodeFormattedValue((string)$key, 0);
+ $max = max($max, strlen($ekey));
+ $keys[] = $ekey;
+ $vals[] = $this->encodeFormattedValue($val, $depth + 1);
+ }
+ $key_lines = array();
+ foreach ($keys as $k => $key) {
+ $key_lines[] = $key_pre.$key.': '.$vals[$k];
+ }
+ $key_lines = implode(",\n", $key_lines);
+
+ $out = "{\n";
+ $out .= $key_lines;
+ $out .= "\n";
+ $out .= $pre.'}';
+
+ return $out;
+ }
+
+
+ /**
+ * Pretty-print a JSON list.
+ *
+ * @param list List to format.
+ * @param int Current depth, for indentation.
+ * @return string Pretty-printed value.
+ * @task internal
+ */
+ private function encodeFormattedArray($array, $depth) {
+ if (empty($array)) {
+ return '[]';
+ }
+
+ $pre = $this->getIndent($depth);
+ $val_pre = $this->getIndent($depth + 1);
+
+ $vals = array();
+ foreach ($array as $val) {
+ $vals[] = $val_pre.$this->encodeFormattedValue($val, $depth + 1);
+ }
+ $val_lines = implode(",\n", $vals);
+
+ $out = "[\n";
+ $out .= $val_lines;
+ $out .= "\n";
+ $out .= $pre.']';
+
+ return $out;
+ }
+
+
+ /**
+ * Pretty-print a JSON value.
+ *
+ * @param dict Value to format.
+ * @param int Current depth, for indentation.
+ * @return string Pretty-printed value.
+ * @task internal
+ */
+ private function encodeFormattedValue($value, $depth) {
+ if (is_array($value)) {
+ if (empty($value) || array_keys($value) === range(0, count($value) - 1)) {
+ return $this->encodeFormattedArray($value, $depth);
+ } else {
+ return $this->encodeFormattedObject($value, $depth);
+ }
+ } else {
+ if (defined('JSON_UNESCAPED_SLASHES')) {
+ // If we have a new enough version of PHP, disable escaping of slashes
+ // when pretty-printing values. Escaping slashes can defuse an attack
+ // where the attacker embeds "" inside a JSON string, but that
+ // isn't relevant when rendering JSON for human viewers.
+ return json_encode($value, JSON_UNESCAPED_SLASHES);
+ } else {
+ return json_encode($value);
+ }
+ }
+ }
+
+
+ /**
+ * Render a string corresponding to the current indent depth.
+ *
+ * @param int Current depth.
+ * @return string Indentation.
+ * @task internal
+ */
+ private function getIndent($depth) {
+ if (!$depth) {
+ return '';
+ } else {
+ return str_repeat(' ', $depth);
+ }
+ }
+
+}
diff --git a/src/parser/PhutilJSONParser.php b/src/parser/PhutilJSONParser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilJSONParser.php
@@ -0,0 +1,62 @@
+allowDuplicateKeys = $allow_duplicate_keys;
+ return $this;
+ }
+
+ public function parse($json) {
+ $jsonlint_root = phutil_get_library_root('phutil').'/../externals/jsonlint';
+ require_once $jsonlint_root.'/src/Seld/JsonLint/JsonParser.php';
+ require_once $jsonlint_root.'/src/Seld/JsonLint/Lexer.php';
+ require_once $jsonlint_root.'/src/Seld/JsonLint/ParsingException.php';
+ require_once $jsonlint_root.'/src/Seld/JsonLint/Undefined.php';
+
+ $parser = new JsonLintJsonParser();
+ try {
+ $output = $parser->parse($json, $this->getFlags());
+ } catch (JsonLintParsingException $ex) {
+ $details = $ex->getDetails();
+ $message = preg_replace("/^Parse error .*\\^\n/s", '', $ex->getMessage());
+
+ throw new PhutilJSONParserException(
+ $message,
+ idx(idx($details, 'loc', array()), 'last_line'),
+ idx(idx($details, 'loc', array()), 'last_column'),
+ idx($details, 'token'),
+ idx($details, 'expected'));
+ }
+
+ if (!is_array($output)) {
+ throw new PhutilJSONParserException(
+ pht(
+ '%s is not a valid JSON object.',
+ PhutilReadableSerializer::printShort($json)));
+ }
+
+ return $output;
+ }
+
+ private function getFlags() {
+ $flags = JsonLintJsonParser::PARSE_TO_ASSOC;
+
+ if ($this->allowDuplicateKeys) {
+ $flags |= JsonLintJsonParser::ALLOW_DUPLICATE_KEYS;
+ } else {
+ $flags |= JsonLintJsonParser::DETECT_KEY_CONFLICTS;
+ }
+
+ return $flags;
+ }
+
+}
diff --git a/src/parser/PhutilLanguageGuesser.php b/src/parser/PhutilLanguageGuesser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilLanguageGuesser.php
@@ -0,0 +1,47 @@
+ 1,
+ // Capture "#!/usr/bin/php" sorts of things.
+ '@^#!.*bin/(\S+)@' => 1,
+ // Capture initial " 1,
+ // Capture emacs "mode" header.
+ '@^.*-[*]-.*mode\s*:\s*(\S+).*-[*]-.*$@m' => 1,
+ // Look for things that seem to be diffs.
+ '/^---.*$\n^[+]{3}.*$\n^@@/m' => 'diff',
+ '/^diff --git/' => 'diff',
+ // Look for plausible console output.
+ '@^(?:\S+[\\\\/] )?[$] @' => 'console',
+ );
+
+ foreach ($patterns as $pattern => $language) {
+ $matches = null;
+ if (preg_match($pattern, $source, $matches)) {
+ if (is_numeric($language)) {
+ return $matches[$language];
+ } else {
+ return $language;
+ }
+ }
+ }
+
+ return null;
+ }
+
+}
diff --git a/src/parser/PhutilParserGenerator.php b/src/parser/PhutilParserGenerator.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilParserGenerator.php
@@ -0,0 +1,910 @@
+setTerminals(array('a', 'b'))
+ * ->setStartRule('S')
+ * ->setRules(
+ * array(
+ * 'S' => 'A b',
+ * 'A' => array(
+ * 'A a',
+ * 'a',
+ * )))
+ * ->processGrammar();
+ *
+ * To actually parse token streams, use @{method:parseTokens}.
+ *
+ * $tokens = get_tokens(); // Usually from PhutilLexer
+ * $callback = 'some_callback';
+ * $tree = $parser->parseTokens($tokens, $callback);
+ *
+ * The callback is invoked when a grammar rule matches. It should have this
+ * signature:
+ *
+ * function parser_callback($rule, $production, array $tokens) {
+ * // ...
+ * }
+ *
+ * The `$rule` is the matching rule; the `$production` is the matching
+ * production, and `$tokens` is the matching tokens (for terminal rules) or the
+ * return value of previous parse callbacks (for nonterminal rules).
+ *
+ * You should either return a result of evaluation, or some sort of abstract
+ * representation of the parse tree (this is more likely to be useful for more
+ * complex grammars).
+ *
+ * NOTE: This class generates LR(1) parsers, which perform less-than-optimally
+ * on large grammars. Worse, it is written in PHP. It is suitable only for
+ * very simple grammars with few states.
+ *
+ * NOTE: These parsers silently resolve reduce/reduce conflicts by choosing the
+ * first reduction, and silently resolve shift/reduce conflicts by shifting.
+ * These are the same rules used by Yacc, but are implicit.
+ *
+ * @task rules Grammar Rules
+ * @task rvalidation Rule Validation
+ * @task first Computing First()
+ * @task tables Computing Action and Goto Tables
+ * @task inspect Inspecting Generator State
+ */
+final class PhutilParserGenerator extends Phobject {
+
+ private $terminals;
+ private $rules;
+ private $startRule = 'start';
+ private $states = array();
+ private $sets = array();
+ private $successor = array();
+ private $setHashes = array();
+ private $actionTable;
+ private $gotoTable;
+
+ private $rulesValidated = false;
+ private $eofSymbol;
+ private $initSymbol;
+ private $epsilonSymbol;
+ private $endSymbol;
+
+ private $firstTable;
+
+ public function processGrammar() {
+ $this->validateRules();
+ $this->buildFirstTable();
+
+ $init = $this->getInitSymbol();
+ $eof = $this->getEOFSymbol();
+ $end = $this->getEndSymbol();
+
+ $this->rules[$init] = array(
+ array($this->startRule, $end),
+ );
+ list($is_new, $state) = $this->addState(
+ array(
+ array($this->getInitSymbol(), 0, 0, $eof),
+ ));
+ $this->buildSuccessors($state);
+
+ $this->buildTables();
+
+ return $this;
+ }
+
+
+/* -( Grammar Rules )------------------------------------------------------ */
+
+
+ public function setTerminals(array $terminals) {
+ $this->terminals = array_fill_keys($terminals, true);
+ return $this;
+ }
+
+ public function setRules(array $rules) {
+ $this->rules = $rules;
+ return $this;
+ }
+
+ public function setStartRule($rule_name) {
+ $this->startRule = $rule_name;
+ return $this;
+ }
+
+ public function getStartRule() {
+ return $this->startRule;
+ }
+
+ public function getEOFSymbol() {
+ if ($this->eofSymbol === null) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->eofSymbol;
+ }
+
+ public function getInitSymbol() {
+ if ($this->initSymbol === null) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->initSymbol;
+ }
+
+ public function getEpsilonSymbol() {
+ if ($this->epsilonSymbol === null) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->epsilonSymbol;
+ }
+
+ public function getEndSymbol() {
+ if ($this->endSymbol === null) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->endSymbol;
+ }
+
+ public function isTerminal($symbol) {
+ return isset($this->terminals[$symbol]);
+ }
+
+ public function isRule($symbol) {
+ return isset($this->rules[$symbol]);
+ }
+
+
+/* -( Rule Validation )---------------------------------------------------- */
+
+
+ /**
+ * Perform a battery of tests on the provided rules to detect problems which
+ * would prevent us from generating a parser.
+ *
+ * @return void
+ * @task rvalidation
+ */
+ private function validateRules() {
+ // Rules must be specified in the right format.
+ $this->parseRules();
+
+ // Rules must contain only known symbols.
+ $this->validateRuleSymbols();
+
+ // The start rule must exist and be valid.
+ $this->validateStartRule();
+
+ // Now, we select printable names for special symbols (EOF, epsilon, etc)
+ // that don't conflict with any symbols in the grammar.
+ $this->chooseSpecialSymbols();
+
+ // Make sure every terminal can be reached by some rule.
+ $this->validateAllTerminalsReachable();
+
+ // Make sure every rule can be reached.
+ $this->validateAllRulesReachable();
+
+ // Make sure every rule has some valid reduction.
+ $this->validateAllRulesReducible();
+
+ $this->rulesValidated = true;
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function parseRules() {
+ foreach ($this->rules as $rule_name => $rule_variants) {
+ if (!is_array($rule_variants)) {
+ $rule_variants = array($rule_variants);
+ $this->rules[$rule_name] = $rule_variants;
+ }
+ foreach ($rule_variants as $vkey => $variant) {
+ if ($variant === null) {
+ $variant = array(null);
+ } else if (!is_array($variant)) {
+ $variant = preg_split('/\s+/', $variant);
+ } else {
+ foreach ($variant as $symbol) {
+ if (($symbol === null) && count($variant) > 1) {
+ throw new PhutilInvalidRuleParserGeneratorException(
+ pht(
+ "Rule '%s' contains a production '%s' which is ".
+ "nonempty but has a null in it. A rule with other ".
+ "may not contain null.",
+ $rule_name,
+ $vkey));
+ }
+ }
+ }
+ $this->rules[$rule_name][$vkey] = array_values($variant);
+ }
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function validateRuleSymbols() {
+ foreach ($this->rules as $rule => $productions) {
+ foreach ($productions as $production_name => $production) {
+ foreach ($production as $symbol) {
+ if ($symbol === null) {
+ continue;
+ }
+ if ($this->isTerminal($symbol)) {
+ continue;
+ }
+ if ($this->isRule($symbol)) {
+ continue;
+ }
+ $production_string = implode(' ', $production);
+ throw new PhutilUnknownSymbolParserGeneratorException(
+ pht(
+ "Symbol '%s' in production '%s' ('%s') of rule '%s' does not ".
+ "name a rule or terminal. Did you misspell a symbol, fail to ".
+ "specify a terminal, or forget a rule?",
+ $symbol,
+ $production_name,
+ $production_string,
+ $rule));
+ }
+ }
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function validateStartRule() {
+ $start_rule = $this->getStartRule();
+ if (!$this->isRule($start_rule)) {
+ throw new PhutilUnknownSymbolParserGeneratorException(
+ pht(
+ "Start rule '%s' does not appear in the rules for the grammar. Use ".
+ "%s to choose a different start rule, or add a rule named '%s'.",
+ $start_rule,
+ 'setStartRule()',
+ $start_rule));
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function chooseSpecialSymbols() {
+ $special = array(
+ 'eofSymbol' => '(end-of-file)',
+ 'epsilonSymbol' => '(epsilon)',
+ 'initSymbol' => '(init)',
+ 'endSymbol' => '(end)',
+ );
+
+ foreach ($special as $key => $value) {
+ while ($this->isRule($value) || $this->isTerminal($value)) {
+ $value .= "'";
+ }
+ $special[$key] = $value;
+ }
+
+ $this->eofSymbol = $special['eofSymbol'];
+ $this->epsilonSymbol = $special['epsilonSymbol'];
+ $this->initSymbol = $special['initSymbol'];
+ $this->endSymbol = $special['endSymbol'];
+
+ foreach ($this->rules as $rule => $productions) {
+ foreach ($productions as $production_name => $production) {
+ foreach ($production as $key => $symbol) {
+ if ($symbol === null) {
+ $this->rules[$rule][$production_name][$key] = $this->epsilonSymbol;
+ }
+ }
+ $this->rules[$rule][$production_name][] = $this->endSymbol;
+ }
+ }
+
+ $this->terminals[$this->getEOFSymbol()] = true;
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function validateAllTerminalsReachable() {
+ $seen = array();
+ foreach ($this->rules as $rule => $productions) {
+ foreach ($productions as $production) {
+ foreach ($production as $symbol) {
+ $seen[$symbol] = true;
+ }
+ }
+ }
+
+ $missing = array_diff_key($this->terminals, $seen);
+ unset($missing[$this->getEOFSymbol()]);
+ if ($missing) {
+ $missing_terminals = array_keys($missing);
+ $missing_terminals = implode(', ', $missing_terminals);
+ throw new PhutilUnreachableTerminalParserGeneratorException(
+ pht(
+ 'Some terminals do not appear in any rule: %s',
+ $missing_terminals));
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function validateAllRulesReachable() {
+ $stack = array();
+ $reachable = $this->computeReachableRules($this->getStartRule(), $stack);
+
+ $missing = array_diff_key($this->rules, $reachable);
+ unset($missing[$this->getStartRule()]);
+
+ if ($missing) {
+ $missing_rules = array_keys($missing);
+ $missing_rules = implode(', ', $missing_rules);
+ throw new PhutilUnreachableRuleParserGeneratorException(
+ pht(
+ 'Some rules can never be reached from any production: %s',
+ $missing_rules));
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function computeReachableRules($rule, array &$stack) {
+ if (isset($stack[$rule])) {
+ return $stack[$rule];
+ }
+
+ $stack[$rule] = array();
+
+ foreach ($this->rules[$rule] as $production) {
+ foreach ($production as $symbol) {
+ if ($this->isRule($symbol)) {
+ $stack[$rule][$symbol] = true;
+ $stack[$rule] += $this->computeReachableRules($symbol, $stack);
+ }
+ }
+ }
+
+ return $stack[$rule];
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function validateAllRulesReducible() {
+ $reducible = array();
+ foreach ($this->rules as $rule => $productions) {
+ if (!$this->isRuleReducible($rule, $reducible)) {
+ throw new PhutilIrreducibleRuleParserGeneratorException(
+ pht(
+ "Rule '%s' can never be reduced: it recurses indefinitely ".
+ "and reaches no production of terminals.",
+ $rule));
+ }
+ }
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function isRuleReducible($rule, array &$reducible) {
+ if (isset($reducible[$rule])) {
+ return $reducible[$rule];
+ }
+
+ // Set this ahead of time so we don't end up in an infinite loop if
+ // rules recurse. We'll overwrite it if we find a reduction.
+ $reducible[$rule] = false;
+ $reducible[$rule] = $this->computeRuleReducible($rule, $reducible);
+ return $reducible[$rule];
+ }
+
+
+ /**
+ * @task rvalidation
+ */
+ private function computeRuleReducible($rule, array &$reducible) {
+ $epsilon = $this->getEpsilonSymbol();
+ $end = $this->getEndSymbol();
+
+ $productions = $this->rules[$rule];
+
+ // In the first pass, try to find a trivially reducible production, e.g. one
+ // with epsilon or only terminals. Also, remove recursive productions (those
+ // which directly involve the rule itself) because we know we won't be able
+ // to reduce them. If we're lucky, this will allow us to determine that the
+ // rule is reducible without recursion. For example, we can immediately
+ // reduce these productions:
+ //
+ // R -> a
+ // R -> b c d
+ // R -> (epsilon)
+ //
+ // We can never reduce these productions:
+ //
+ // R -> R
+ // R -> a R b
+ //
+ // We might be able to reduce these productions, but they aren't as cheap
+ // or easy to figure out, since we need to first determine if other rules
+ // can be reduced:
+ //
+ // R -> X Y
+ // R -> X a
+ //
+ // If we find a reduction, we return immediately.
+
+ foreach ($productions as $key => $production) {
+ $has_only_terminals = true;
+ foreach ($production as $symbol) {
+ if ($symbol == $end) {
+ break;
+ } else if ($symbol == $epsilon) {
+ // The rule contains an epsilon production, which can always reduce
+ // it.
+ return true;
+ } else if ($symbol == $rule) {
+ // The rule contains itself; this production is never reducible. We
+ // must find another reducible production.
+ unset($productions[$key]);
+ continue 2;
+ } else if ($this->isTerminal($symbol)) {
+ // This is a terminal; keep looking. We'll be able to reduce the
+ // production if it contains only terminals.
+ continue;
+ } else {
+ // This is a rule, so we can't trivially reduce it. We'll keep it
+ // for the next round if we can't find any trivial reductions.
+ $has_only_terminals = false;
+ break;
+ }
+ }
+
+ if ($has_only_terminals) {
+ return true;
+ }
+ }
+
+ // If we have no productions left, this rule can't be reduced.
+ if (empty($productions)) {
+ return false;
+ }
+
+ // We have remaining productions which include other rules. Look for a
+ // nontrivial reduction. For example:
+ //
+ // R -> X Y
+ // X -> x
+ // Y -> y
+ //
+ // In this case, X and Y are both reducible, so "X Y" is reducible and thus
+ // R is reducible.
+ foreach ($productions as $production) {
+ $can_reduce = true;
+ foreach ($production as $symbol) {
+ // NOTE: We don't need to check for epsilon here, because we would
+ // already have determined the rule was reducible if we had an epsilon
+ // production.
+ if ($symbol == $end) {
+ break;
+ } else if ($this->isTerminal($symbol)) {
+ continue;
+ } else if (!$this->isRuleReducible($symbol, $reducible)) {
+ $can_reduce = false;
+ break;
+ }
+ }
+
+ if ($can_reduce) {
+ // The production contained only terminals and reducible rules, so it
+ // is reducible. We're good and don't need to examine remaining
+ // productions.
+ return true;
+ }
+ }
+
+ // We didn't find any reducible productions.
+ return false;
+ }
+
+
+/* -( Computing First() )-------------------------------------------------- */
+
+
+ private function buildFirstTable() {
+ $this->firstTable = array();
+ foreach ($this->rules as $rule => $productions) {
+ $this->buildRuleFirst($rule);
+ }
+ }
+
+ private function buildRuleFirst($rule) {
+ if (isset($this->firstTable[$rule])) {
+ return $this->firstTable[$rule];
+ }
+
+ $this->firstTable[$rule] = array();
+ $productions = $this->rules[$rule];
+ foreach ($productions as $key => $production) {
+ $this->firstTable[$rule] += $this->getFirstForProduction($production);
+ }
+
+ return $this->firstTable[$rule];
+ }
+
+ private function getFirstForProduction(array $production) {
+ $set = array();
+
+ $end = $this->getEndSymbol();
+ $epsilon = $this->getEpsilonSymbol();
+ $eof = $this->getEOFSymbol();
+
+ $accept_epsilon = true;
+ foreach ($production as $symbol) {
+ if ($symbol === $end) {
+ break;
+ } else if ($symbol === $epsilon) {
+ break;
+ } else if ($this->isTerminal($symbol)) {
+ $set[$symbol] = true;
+ $accept_epsilon = false;
+ break;
+ } else {
+ $symbol_set = $this->buildRuleFirst($symbol);
+
+ $has_epsilon = isset($symbol_set[$epsilon]);
+ unset($symbol_set[$epsilon]);
+ $set += $symbol_set;
+ if (!$has_epsilon) {
+ $accept_epsilon = false;
+ break;
+ }
+ }
+ }
+
+ if ($accept_epsilon) {
+ $set[$epsilon] = true;
+ }
+
+ return $set;
+ }
+
+
+/* -( Computing States )--------------------------------------------------- */
+
+
+ private function addState(array $set) {
+ $seen = array();
+ foreach ($set as $item) {
+ $seen[$item[0]][$item[1]][$item[2]][$item[3]] = true;
+ }
+
+ $end = $this->getEndSymbol();
+ $epsilon = $this->getEpsilonSymbol();
+
+ for ($ii = 0; $ii < count($set); $ii++) {
+ $item = $set[$ii];
+
+ $production = $this->rules[$item[0]][$item[1]];
+ $next = $production[$item[2]];
+ if ($this->isTerminal($next)) {
+ continue;
+ } else if ($next === $epsilon) {
+ continue;
+ } else if ($next === $end) {
+ continue;
+ }
+
+ $v = array_slice($production, $item[2] + 1, -1);
+ $v[] = $item[3];
+ $v[] = $end;
+
+ $firsts = $this->getFirstForProduction($v);
+
+ foreach ($firsts as $nfirst => $ignored) {
+ if (!$this->isTerminal($nfirst)) {
+ unset($firsts[$nfirst]);
+ }
+ }
+
+ foreach ($this->rules[$next] as $pkey => $nproduction) {
+ foreach ($firsts as $nfirst => $ignored) {
+ if (isset($seen[$next][$pkey][0][$nfirst])) {
+ continue;
+ }
+ $set[] = array($next, $pkey, 0, $nfirst);
+ $seen[$next][$pkey][0][$nfirst] = true;
+ }
+ }
+ }
+
+ $hash = $this->hashSet($set);
+ if (isset($this->setHashes[$hash])) {
+ return array(false, $this->setHashes[$hash]);
+ }
+
+ $this->states[] = $set;
+ $state = last_key($this->states);
+ $this->setHashes[$hash] = $state;
+
+ return array(true, $state);
+ }
+
+ private function buildSuccessors($start_state) {
+ $end = $this->getEndSymbol();
+
+ $nexts = array();
+ foreach ($this->states[$start_state] as $item) {
+ $next = $this->rules[$item[0]][$item[1]][$item[2]];
+ if ($next === $end) {
+ continue;
+ }
+ $nexts[$next][] = array(
+ $item[0],
+ $item[1],
+ $item[2] + 1,
+ $item[3],
+ );
+ }
+
+ foreach ($nexts as $next => $items) {
+ list($is_new, $state) = $this->addState($items);
+ $this->successor[$start_state][$next] = $state;
+ if ($is_new) {
+ $this->buildSuccessors($state);
+ }
+ }
+ }
+
+ private function hashSet(array $set) {
+ foreach ($set as $k => $item) {
+ $set[$k] = implode("\0", $item);
+ }
+ sort($set);
+ $set = implode("\1", $set);
+
+ return md5($set);
+ }
+
+
+ private function buildTables() {
+ $action = array();
+ $goto = array();
+
+ $end = $this->getEndSymbol();
+ $eof = $this->getEOFSymbol();
+ $init = $this->getInitSymbol();
+
+ foreach ($this->states as $state => $items) {
+ $shift = array();
+ $reduce = array();
+ $accept = false;
+ foreach ($items as $item) {
+ $next = $this->rules[$item[0]][$item[1]][$item[2]];
+ if ($next == $end) {
+ if ($item[0] !== $init) {
+ $reduce[$item[3]][] = $item;
+ } else if ($item[0] === $init && $item[3] === $eof) {
+ $accept = $item;
+ }
+ } else if ($this->isTerminal($next)) {
+ $shift[$next] = $item;
+ } else {
+ $goto[$state][$next] = $this->successor[$state][$next];
+ }
+ }
+
+ foreach ($reduce as $next => $reductions) {
+ if (count($reductions) > 1) {
+ $ways = array();
+ foreach ($reductions as $reduction) {
+ $ways[] = "{$reduction[0]}/{$reduction[1]}";
+ }
+ $ways = implode('; ', $ways);
+
+ // TODO: As below, we should have more explicit handling of
+ // reduce/reduce conflicts. For now, just pick the first one.
+
+ if (false) {
+ throw new Exception(
+ pht(
+ "Reduce/reduce conflict: from state '%s', when a ".
+ "'%s' is encountered, it may be reduced in multiple ".
+ "ways: %s",
+ $state,
+ $next,
+ $ways));
+ }
+ }
+ $reduce[$next] = head($reductions);
+ }
+
+ $srconflicts = array_intersect_key($shift, $reduce);
+ foreach ($srconflicts as $next => $ignored) {
+
+ // TODO: We should probably have better or more explicit handling of
+ // shift/reduce conflicts. For now, we just shift.
+
+ if (false) {
+ $what = $reduce[$next][0];
+ throw new Exception(
+ pht(
+ "Shift/reduce conflict: from state '%s', when a '%s' ".
+ "is encountered, shifting conflicts with reducing '%s'.",
+ $state,
+ $next,
+ $what));
+ } else {
+ // Resolve the shift/reduce by shifting.
+ $reduce = array();
+ }
+ }
+
+ if ($accept && isset($shift[$eof])) {
+ throw new Exception(pht('Accept/shift conflict!'));
+ }
+
+ if ($accept && isset($reduce[$eof])) {
+ throw new Exception(pht('Accept/reduce conflict!'));
+ }
+
+ foreach ($reduce as $next => $item) {
+ $action[$state][$next] = array(
+ 'R',
+ array(
+ $item[0],
+ $item[1],
+ count($this->rules[$item[0]][$item[1]]) - 1,
+ ),
+ );
+ }
+
+ foreach ($shift as $next => $item) {
+ $action[$state][$next] = array(
+ 'S',
+ $this->successor[$state][$next],
+ );
+ }
+
+ if ($accept) {
+ $action[$state][$eof] = array('A');
+ }
+ }
+
+ $this->actionTable = $action;
+ $this->gotoTable = $goto;
+ }
+
+ public function generateParserFunction($name) {
+ $out = array();
+ $out[] = 'function '.$name.'(array $tokens, $callback) {';
+ $out[] = ' return '.__CLASS__.'::parseTokensWithTables(';
+ $out[] = ' '.$this->formatAndIndent($this->actionTable, 4).',';
+ $out[] = ' '.$this->formatAndIndent($this->gotoTable, 4).',';
+ $out[] = ' '.$this->formatAndIndent($this->getEOFSymbol(), 4).',';
+ $out[] = ' $tokens,';
+ $out[] = ' $callback);';
+ $out[] = '}';
+ return implode("\n", $out);
+ }
+
+ private function formatAndIndent($var, $depth) {
+ $var = phutil_var_export($var);
+ $var = str_replace("\n", "\n".str_repeat(' ', $depth), $var);
+
+ return $var;
+ }
+
+ public function parseTokens(array $tokens, $callback) {
+ return self::parseTokensWithTables(
+ $this->actionTable,
+ $this->gotoTable,
+ $this->getEOFSymbol(),
+ $tokens,
+ $callback);
+ }
+
+ public static function parseTokensWithTables(
+ $action_table,
+ $goto_table,
+ $eof_symbol,
+ array $tokens,
+ $callback) {
+
+ $state_stack = array(0);
+ $token_stack = array();
+
+ $tokens = array_reverse($tokens);
+ while (true) {
+ $state = end($state_stack);
+
+ if (empty($tokens)) {
+ $next = $eof_symbol;
+ } else {
+ $next_token = end($tokens);
+ $next = $next_token[0];
+ }
+
+ if (!isset($action_table[$state][$next])) {
+ $expected = implode(', ', array_keys($action_table[$state]));
+ throw new Exception(
+ pht(
+ "Unexpected '%s' in state %s! Expected: %s",
+ $next,
+ $state,
+ $expected));
+ }
+
+ $action = $action_table[$state][$next];
+
+ switch ($action[0]) {
+ case 'S':
+ $state_stack[] = $action[1];
+ $token_stack[] = array_pop($tokens);
+ break;
+ case 'R':
+ $r_rule = $action[1][0];
+ $r_prod = $action[1][1];
+ $r_size = $action[1][2];
+
+ $token_v = array();
+ while ($r_size--) {
+ $token_v[] = array_pop($token_stack);
+ array_pop($state_stack);
+ }
+ $token_v = array_reverse($token_v);
+ $token_stack[] = call_user_func_array(
+ $callback,
+ array($r_rule, $r_prod, $token_v));
+ $goto = $goto_table[end($state_stack)][$r_rule];
+ $state_stack[] = $goto;
+ break;
+ case 'A':
+ break 2;
+ }
+ }
+
+ return head($token_stack);
+ }
+
+
+/* -( Inspecting Generator State )----------------------------------------- */
+
+
+ /**
+ * @task inspect
+ */
+ public function inspectRules() {
+ if (!$this->rulesValidated) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->rules;
+ }
+
+
+ /**
+ * @task inspect
+ */
+ public function inspectFirstTable() {
+ if ($this->firstTable === null) {
+ throw new PhutilInvalidStateException('processGrammar');
+ }
+ return $this->firstTable;
+ }
+
+
+}
diff --git a/src/parser/PhutilPygmentizeParser.php b/src/parser/PhutilPygmentizeParser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilPygmentizeParser.php
@@ -0,0 +1,83 @@
+map = $map;
+ return $this;
+ }
+
+ public function getMap() {
+ return $this->map;
+ }
+
+ public function parse($block) {
+ $class_look = 'class="';
+ $class_len = strlen($class_look);
+
+ $class_start = null;
+
+ $map = $this->map;
+
+ $len = strlen($block);
+ $out = '';
+ $mode = 'text';
+ for ($ii = 0; $ii < $len; $ii++) {
+ $c = $block[$ii];
+ switch ($mode) {
+ case 'text':
+ // We're in general text between tags, and just passing characers
+ // through unmodified.
+ if ($c == '<') {
+ $mode = 'tag';
+ }
+ $out .= $c;
+ break;
+ case 'tag':
+ // We're inside a tag, and looking for `class="` so we can rewrite
+ // it.
+ if ($c == '>') {
+ $mode = 'text';
+ }
+ if ($c == 'c') {
+ if (!substr_compare($block, $class_look, $ii, $class_len)) {
+ $mode = 'class';
+ $ii += $class_len;
+ $class_start = $ii;
+ }
+ }
+
+ if ($mode != 'class') {
+ $out .= $c;
+ }
+ break;
+ case 'class':
+ // We're inside a `class="..."` tag, and looking for the ending quote
+ // so we can replace it.
+ if ($c == '"') {
+ $class = substr($block, $class_start, $ii - $class_start);
+
+ // If this class is present in the map, rewrite it into an inline
+ // style attribute.
+ if (isset($map[$class])) {
+ $out .= 'style="'.phutil_escape_html($map[$class]).'"';
+ } else {
+ $out .= 'class="'.$class.'"';
+ }
+
+ $mode = 'tag';
+ }
+ break;
+ }
+ }
+
+ return $out;
+ }
+
+}
diff --git a/src/parser/PhutilQueryStringParser.php b/src/parser/PhutilQueryStringParser.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilQueryStringParser.php
@@ -0,0 +1,139 @@
+ 'z',
+ * );
+ *
+ * ...with the `.` replaced with an underscore, `_`. Other characters converted
+ * in this way include space and unmatched opening brackets.
+ *
+ * Broadly, this is part of the terrible legacy of `register_globals`. Since
+ * we'd like to be able to parse all valid query strings without destroying any
+ * data, this class implements a less-encumbered parser.
+ */
+final class PhutilQueryStringParser extends Phobject {
+
+
+ /**
+ * Parses a query string into a dictionary, applying PHP rules for handling
+ * array nomenclature (like `a[]=1`) in parameter names.
+ *
+ * For a more basic parse, see @{method:parseQueryStringToPairList}.
+ *
+ * @param string Query string.
+ * @return map Parsed dictionary.
+ */
+ public function parseQueryString($query_string) {
+ $result = array();
+
+ $list = $this->parseQueryStringToPairList($query_string);
+ foreach ($list as $parts) {
+ list($key, $value) = $parts;
+ if (!strlen($key)) {
+ continue;
+ }
+ $this->parseQueryKeyToArr($key, $value, $result);
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Parses a query string into a basic list of pairs, without handling any
+ * array information in the keys. For example:
+ *
+ * a[]=1&a[]=2
+ *
+ * ...will parse into:
+ *
+ * array(
+ * array('a[]', '1'),
+ * array('a[]', '2'),
+ * );
+ *
+ * Use @{method:parseQueryString} to produce a more sophisticated parse which
+ * applies array rules and returns a dictionary.
+ *
+ * @param string Query string.
+ * @return list> List of parsed parameters.
+ */
+ public function parseQueryStringToPairList($query_string) {
+ $list = array();
+
+ if (!strlen($query_string)) {
+ return $list;
+ }
+
+ $pairs = explode('&', $query_string);
+ foreach ($pairs as $pair) {
+ if (!strlen($pair)) {
+ continue;
+ }
+ $parts = explode('=', $pair, 2);
+ if (count($parts) < 2) {
+ $parts[] = '';
+ }
+ $list[] = array(
+ urldecode($parts[0]),
+ urldecode($parts[1]),
+ );
+ }
+
+ return $list;
+ }
+
+
+ /**
+ * Treats the key as a flat query that potentially has square brackets. If
+ * there are square brackets we parse them into an array.
+ *
+ * Example input:
+ * $key = "email[0]";
+ * $val = "my@example.com";
+ *
+ * Example output:
+ * array("email" => array(0 => "my@example.com"));
+ *
+ * @param string $key
+ * @param string $val
+ * @param array $input_arr
+ */
+ private function parseQueryKeyToArr($key, $val, array &$input_arr) {
+ if (preg_match('/^[^\[\]]+(?:\[[^\[\]]*\])+$/', $key)) {
+ $key_pieces = preg_split('/\]?\[/', rtrim($key, ']'));
+ if ($key_pieces) {
+ $cursor = &$input_arr;
+ foreach ($key_pieces as $piece) {
+ if (strlen($piece)) {
+ if (empty($cursor[$piece]) || !is_array($cursor[$piece])) {
+ $cursor[$piece] = array();
+ }
+ } else {
+ $cursor[] = array();
+ $piece = last_key($cursor);
+ }
+ $cursor = &$cursor[$piece];
+ }
+
+ $cursor = $val;
+ unset($cursor);
+ }
+ } else {
+ $input_arr[$key] = $val;
+ }
+ }
+}
diff --git a/src/parser/PhutilSimpleOptions.php b/src/parser/PhutilSimpleOptions.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilSimpleOptions.php
@@ -0,0 +1,195 @@
+ '4',
+ * 'eyes' => '2',
+ * );
+ *
+ * @param string Input option list.
+ * @return dict Parsed dictionary.
+ * @task parse
+ */
+ public function parse($input) {
+ $result = array();
+
+ $lexer = new PhutilSimpleOptionsLexer();
+ $tokens = $lexer->getNiceTokens($input);
+
+ $state = 'key';
+ $pairs = array();
+ foreach ($tokens as $token) {
+ list($type, $value) = $token;
+ switch ($state) {
+ case 'key':
+ if ($type != 'word') {
+ return array();
+ }
+ if (!strlen($value)) {
+ return array();
+ }
+ $key = $this->normalizeKey($value);
+ $state = '=';
+ break;
+ case '=':
+ if ($type == '=') {
+ $state = 'value';
+ break;
+ }
+ if ($type == ',') {
+ $pairs[] = array($key, true);
+ $state = 'key';
+ break;
+ }
+ return array();
+ case 'value':
+ if ($type == ',') {
+ $pairs[] = array($key, null);
+ $state = 'key';
+ break;
+ }
+ if ($type != 'word') {
+ return array();
+ }
+ $pairs[] = array($key, $value);
+ $state = ',';
+ break;
+ case ',':
+ if ($type == 'word') {
+ $pair = array_pop($pairs);
+ $pair[1] .= $value;
+ $pairs[] = $pair;
+ break;
+ }
+ if ($type != ',') {
+ return array();
+ }
+ $state = 'key';
+ break;
+ }
+ }
+
+ if ($state == '=') {
+ $pairs[] = array($key, true);
+ }
+ if ($state == 'value') {
+ $pairs[] = array($key, null);
+ }
+
+ $result = array();
+ foreach ($pairs as $pair) {
+ list($key, $value) = $pair;
+ if ($value === null) {
+ unset($result[$key]);
+ } else {
+ $result[$key] = $value;
+ }
+ }
+
+ return $result;
+ }
+
+
+/* -( Unparsing Simple Options )------------------------------------------- */
+
+
+ /**
+ * Convert a dictionary into a simple option list. For example:
+ *
+ * array(
+ * 'legs' => '4',
+ * 'eyes' => '2',
+ * );
+ *
+ * ...becomes:
+ *
+ * legs=4, eyes=2
+ *
+ * @param dict Input dictionary.
+ * @param string Additional characters to escape.
+ * @return string Unparsed option list.
+ */
+ public function unparse(array $options, $escape = '') {
+ $result = array();
+ foreach ($options as $name => $value) {
+ $name = $this->normalizeKey($name);
+ if (!strlen($value)) {
+ continue;
+ }
+ if ($value === true) {
+ $result[] = $this->quoteString($name, $escape);
+ } else {
+ $qn = $this->quoteString($name, $escape);
+ $qv = $this->quoteString($value, $escape);
+ $result[] = $qn.'='.$qv;
+ }
+ }
+ return implode(', ', $result);
+ }
+
+
+/* -( Parser Configuration )----------------------------------------------- */
+
+
+ /**
+ * Configure case sensitivity of the parser. By default, the parser is
+ * case insensitive, so "legs=4" has the same meaning as "LEGS=4". If you
+ * set it to be case sensitive, the keys have different meanings.
+ *
+ * @param bool True to make the parser case sensitive, false (default) to
+ * make it case-insensitive.
+ * @return this
+ * @task config
+ */
+ public function setCaseSensitive($case_sensitive) {
+ $this->caseSensitive = $case_sensitive;
+ return $this;
+ }
+
+
+/* -( Internals )---------------------------------------------------------- */
+
+
+ private function normalizeKey($key) {
+ if (!strlen($key)) {
+ throw new Exception(pht('Empty key is invalid!'));
+ }
+ if (!$this->caseSensitive) {
+ $key = strtolower($key);
+ }
+ return $key;
+ }
+
+ private function quoteString($string, $escape) {
+ if (preg_match('/[^a-zA-Z0-9]/', $string)) {
+ $string = '"'.addcslashes($string, '\\\'"'.$escape).'"';
+ }
+ return $string;
+ }
+
+}
diff --git a/src/parser/PhutilTypeSpec.php b/src/parser/PhutilTypeSpec.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilTypeSpec.php
@@ -0,0 +1,1921 @@
+
+ * map
+ * type|type
+ *
+ * A type may be marked as optional by suffixing it with "?" or prefixing it
+ * with the word "optional":
+ *
+ * int?
+ * optional int
+ *
+ * A type may have a human-readable comment in parentheses, at the end:
+ *
+ * int (must be even)
+ *
+ * For example, these are valid type specifications:
+ *
+ * int|string
+ * map
+ * list>
+ * optional int
+ * string (uppercase)
+ *
+ */
+final class PhutilTypeSpec extends Phobject {
+
+ private $type;
+ private $subtypes = array();
+ private $optional;
+ private $comment;
+
+ private function __construct() {}
+
+ public function getType() {
+ return $this->type;
+ }
+
+ public function check($value, $name = null) {
+ switch ($this->type) {
+ case 'int':
+ if (!is_int($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'float':
+ if (!is_float($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'bool':
+ if (!is_bool($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'string':
+ if (!is_string($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'regex':
+ $trap = new PhutilErrorTrap();
+ $ok = @preg_match($value, '');
+ $err = $trap->getErrorsAsString();
+ $trap->destroy();
+
+ if ($ok === false) {
+ throw new PhutilTypeCheckException($this, $value, $name, $err);
+ }
+ break;
+ case 'null':
+ if (!is_null($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'list':
+ if (!is_array($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ if ($value && (array_keys($value) !== range(0, count($value) - 1))) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ try {
+ foreach ($value as $v) {
+ $this->subtypes[0]->check($v);
+ }
+ } catch (PhutilTypeCheckException $ex) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'map':
+ if (!is_array($value)) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ try {
+ foreach ($value as $k => $v) {
+ $this->subtypes[0]->check($k);
+ $this->subtypes[1]->check($v);
+ }
+ } catch (PhutilTypeCheckException $ex) {
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ break;
+ case 'or':
+ foreach ($this->subtypes as $subtype) {
+ try {
+ $subtype->check($value);
+ return;
+ } catch (PhutilTypeCheckException $ex) {
+ // Ignore.
+ }
+ }
+ throw new PhutilTypeCheckException($this, $value, $name);
+ case 'wild':
+ return;
+ default:
+ if (class_exists($this->type, false)) {
+ if ($value instanceof $this->type) {
+ return;
+ }
+ } else if (interface_exists($this->type, false)) {
+ if ($value instanceof $this->type) {
+ return;
+ }
+ }
+
+ throw new PhutilTypeCheckException($this, $value, $name);
+ }
+ }
+
+ public static function checkMap(array $values, array $types) {
+ $extra = array_diff_key($values, $types);
+ if ($extra) {
+ throw new PhutilTypeExtraParametersException($extra);
+ }
+
+ $missing = array();
+ foreach ($types as $key => $type) {
+ $types[$key] = self::newFromString($type);
+ if (!array_key_exists($key, $values)) {
+ if (!$types[$key]->optional) {
+ $missing[] = $key;
+ }
+ }
+ }
+
+ if ($missing) {
+ throw new PhutilTypeMissingParametersException($missing);
+ }
+
+ foreach ($types as $key => $type) {
+ if (array_key_exists($key, $values)) {
+ $type->check($values[$key], $key);
+ }
+ }
+ }
+
+ public static function getCommonParentClass($class_a, $class_b) {
+ // Make sure both classes are really classes.
+ try {
+ if (!class_exists($class_a) || !class_exists($class_b)) {
+ return null;
+ }
+ } catch (PhutilMissingSymbolException $ex) {
+ return null;
+ }
+
+ $ancestors_a = array();
+ do {
+ $ancestors_a[] = $class_a;
+ } while ($class_a = get_parent_class($class_a));
+
+ $ancestors_b = array();
+ do {
+ $ancestors_b[] = $class_b;
+ } while ($class_b = get_parent_class($class_b));
+
+ return head(array_intersect($ancestors_a, $ancestors_b));
+ }
+
+ public static function getTypeOf($value) {
+ if (is_int($value)) {
+ return 'int';
+ } else if (is_float($value)) {
+ return 'float';
+ } else if (is_bool($value)) {
+ return 'bool';
+ } else if (is_string($value)) {
+ return 'string';
+ } else if (is_null($value)) {
+ return 'null';
+ } else if (is_object($value)) {
+ return get_class($value);
+ } else if (is_array($value)) {
+ $vtype = self::getTypeOfVector($value);
+ if ($value && (array_keys($value) === range(0, count($value) - 1))) {
+ return 'list<'.$vtype.'>';
+ } else {
+ $ktype = self::getTypeOfVector(array_keys($value));
+ return "map<{$ktype}, {$vtype}>";
+ }
+ } else {
+ return 'wild';
+ }
+ }
+
+ private static function getTypeOfVector(array $vector) {
+ if (!$vector) {
+ return 'wild';
+ }
+
+ $type = null;
+ foreach ($vector as $value) {
+ $vtype = self::getTypeOf($value);
+ if ($type === null) {
+ $type = $vtype;
+ } else if ($type === $vtype) {
+ continue;
+ } else {
+ $parent = self::getCommonParentClass($type, $vtype);
+ if ($parent) {
+ $type = $parent;
+ } else {
+ return 'wild';
+ }
+ }
+ }
+
+ return $type;
+ }
+
+ public function toString() {
+ $sub = array();
+ foreach ($this->subtypes as $subtype) {
+ $sub[] = $subtype->toString();
+ }
+
+ switch ($this->type) {
+ case 'map':
+ $string = 'map<'.$sub[0].', '.$sub[1].'>';
+ break;
+ case 'list':
+ $string = 'list<'.$sub[0].'>';
+ break;
+ case 'or':
+ $string = implode('|', $sub);
+ break;
+ default:
+ $string = $this->type;
+ break;
+ }
+
+ if ($this->optional) {
+ $string = 'optional '.$string;
+ }
+
+ if ($this->comment) {
+ $string .= ' ('.$this->comment.')';
+ }
+
+ return $string;
+ }
+
+ public static function newFromString($string) {
+ $lexer = self::getLexer();
+ $tokens = $lexer->getTokens($string);
+
+ // Strip whitespace tokens.
+ foreach ($tokens as $key => $token) {
+ $type = $token[0];
+ if ($type == ' ') {
+ unset($tokens[$key]);
+ }
+ }
+
+ $tokens = array_values($tokens);
+ $callback = array(__CLASS__, 'didReduceTokens');
+ return self::parseTokens($tokens, $callback);
+ }
+
+ public static function didReduceTokens($rule, $production, array $tokens) {
+ switch ($rule) {
+ case 'start':
+ case 'some_type':
+ case 'not_or_type':
+ return $tokens[0];
+ case 'type':
+ if ($production == 'yes') {
+ $tokens[0]->optional = true;
+ }
+ return $tokens[0];
+ case 'basic_type':
+ $obj = new PhutilTypeSpec();
+ $obj->type = $tokens[0][1];
+ return $obj;
+ case 'or_type':
+ $l = $tokens[0];
+ $r = $tokens[2];
+
+ if ($l->type == 'or') {
+ if ($r->type == 'or') {
+ foreach ($r->subtypes as $subtype) {
+ $l->subtypes[] = $subtype;
+ }
+ } else {
+ $l->subtypes[] = $r;
+ }
+ return $l;
+ } else if ($r->type == 'or') {
+ $r->subtypes[] = $l;
+ return $r;
+ } else {
+ $obj = new PhutilTypeSpec();
+ $obj->type = 'or';
+ $obj->subtypes[] = $l;
+ $obj->subtypes[] = $r;
+ return $obj;
+ }
+ break;
+ case 'map_type':
+ $obj = new PhutilTypeSpec();
+ $obj->type = 'map';
+ $obj->subtypes[] = $tokens[2];
+ $obj->subtypes[] = $tokens[4];
+ return $obj;
+ case 'list_type':
+ $obj = new PhutilTypeSpec();
+ $obj->type = 'list';
+ $obj->subtypes[] = $tokens[2];
+ return $obj;
+ case 'maybe_optional':
+ if ($production == 'yes') {
+ $tokens[1]->optional = true;
+ return $tokens[1];
+ } else {
+ return $tokens[0];
+ }
+ break;
+ case 'maybe_comment':
+ if ($production == 'yes') {
+ $tokens[0]->comment = $tokens[1];
+ }
+ return $tokens[0];
+ case 'comment':
+ return $tokens[1];
+ case 'comment_text':
+ $result = '';
+ foreach ($tokens as $token) {
+ if (is_array($token)) {
+ $result .= $token[1];
+ } else {
+ $result .= $token;
+ }
+ }
+ return $result;
+ default:
+ throw new Exception(pht("Unhandled parser rule '%s'!", $rule));
+ }
+ }
+
+ private static function getLexer() {
+ static $lexer;
+ if (!$lexer) {
+ $lexer = new PhutilTypeLexer();
+ }
+ return $lexer;
+ }
+
+ private static function parseTokens(array $tokens, $callback) {
+ // NOTE: This is automatically generated by the script
+ // `support/parser/generate-type-parser.php`.
+
+ return PhutilParserGenerator::parseTokensWithTables(
+ array(
+ 0 => array(
+ 'opt' => array(
+ 0 => 'S',
+ 1 => 3,
+ ),
+ 'k' => array(
+ 0 => 'S',
+ 1 => 20,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 21,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 71,
+ ),
+ ),
+ 1 => array(
+ '(end-of-file)' => array(
+ 0 => 'A',
+ ),
+ ),
+ 2 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'start',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 3 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 20,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 21,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 71,
+ ),
+ ),
+ 4 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'maybe_optional',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ ),
+ 5 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'maybe_comment',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'S',
+ 1 => 7,
+ ),
+ ),
+ 6 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'maybe_comment',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ ),
+ 7 => array(
+ 'cm' => array(
+ 0 => 'S',
+ 1 => 11,
+ ),
+ ),
+ 8 => array(
+ ')' => array(
+ 0 => 'S',
+ 1 => 9,
+ ),
+ 'cm' => array(
+ 0 => 'S',
+ 1 => 10,
+ ),
+ ),
+ 9 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'comment',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 10 => array(
+ ')' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'comment_text',
+ 1 => 0,
+ 2 => 2,
+ ),
+ ),
+ 'cm' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'comment_text',
+ 1 => 0,
+ 2 => 2,
+ ),
+ ),
+ ),
+ 11 => array(
+ ')' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'comment_text',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ 'cm' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'comment_text',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 12 => array(
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ '?' => array(
+ 0 => 'S',
+ 1 => 13,
+ ),
+ ),
+ 13 => array(
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ ),
+ 14 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 15,
+ ),
+ ),
+ 15 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 20,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 21,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 71,
+ ),
+ ),
+ 16 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 17 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 18 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 19 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 20 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 21 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 22,
+ ),
+ ),
+ 22 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 57,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 58,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 67,
+ ),
+ ),
+ 23 => array(
+ ',' => array(
+ 0 => 'S',
+ 1 => 24,
+ ),
+ ),
+ 24 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 25 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 26,
+ ),
+ ),
+ 26 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ ),
+ 27 => array(
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ '?' => array(
+ 0 => 'S',
+ 1 => 28,
+ ),
+ ),
+ 28 => array(
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ ),
+ 29 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 30,
+ ),
+ ),
+ 30 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 31 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 32 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 33 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 34 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 35 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 36 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 37,
+ ),
+ ),
+ 37 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 57,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 58,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 67,
+ ),
+ ),
+ 38 => array(
+ ',' => array(
+ 0 => 'S',
+ 1 => 39,
+ ),
+ ),
+ 39 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 40 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 41,
+ ),
+ ),
+ 41 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ ),
+ 42 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 43,
+ ),
+ ),
+ 43 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 44 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 45 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 46,
+ ),
+ ),
+ 46 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 47 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 48,
+ ),
+ ),
+ 48 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '>' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ ),
+ 49 => array(
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ '?' => array(
+ 0 => 'S',
+ 1 => 50,
+ ),
+ ),
+ 50 => array(
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'type',
+ 1 => 'yes',
+ 2 => 2,
+ ),
+ ),
+ ),
+ 51 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 52,
+ ),
+ ),
+ 52 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 57,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 58,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 67,
+ ),
+ ),
+ 53 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 0,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 54 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 55 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 56 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'not_or_type',
+ 1 => 2,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 57 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'basic_type',
+ 1 => 0,
+ 2 => 1,
+ ),
+ ),
+ ),
+ 58 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 59,
+ ),
+ ),
+ 59 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 57,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 58,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 67,
+ ),
+ ),
+ 60 => array(
+ ',' => array(
+ 0 => 'S',
+ 1 => 61,
+ ),
+ ),
+ 61 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 62 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 63,
+ ),
+ ),
+ 63 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'map_type',
+ 1 => 0,
+ 2 => 6,
+ ),
+ ),
+ ),
+ 64 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 65,
+ ),
+ ),
+ 65 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 57,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 58,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 67,
+ ),
+ ),
+ 66 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 67 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 68,
+ ),
+ ),
+ 68 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 69 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 70,
+ ),
+ ),
+ 70 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ ',' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ ),
+ 71 => array(
+ '<' => array(
+ 0 => 'S',
+ 1 => 72,
+ ),
+ ),
+ 72 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 35,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 36,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 45,
+ ),
+ ),
+ 73 => array(
+ '>' => array(
+ 0 => 'S',
+ 1 => 74,
+ ),
+ ),
+ 74 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'list_type',
+ 1 => 0,
+ 2 => 4,
+ ),
+ ),
+ ),
+ 75 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'some_type',
+ 1 => 1,
+ 2 => 1,
+ ),
+ ),
+ '|' => array(
+ 0 => 'S',
+ 1 => 76,
+ ),
+ ),
+ 76 => array(
+ 'k' => array(
+ 0 => 'S',
+ 1 => 20,
+ ),
+ 'map' => array(
+ 0 => 'S',
+ 1 => 21,
+ ),
+ 'list' => array(
+ 0 => 'S',
+ 1 => 71,
+ ),
+ ),
+ 77 => array(
+ '?' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '(' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ '|' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'or_type',
+ 1 => 1,
+ 2 => 3,
+ ),
+ ),
+ ),
+ 78 => array(
+ '(end-of-file)' => array(
+ 0 => 'R',
+ 1 => array(
+ 0 => 'maybe_optional',
+ 1 => 'no',
+ 2 => 1,
+ ),
+ ),
+ ),
+ ),
+ array(
+ 0 => array(
+ 'start' => 1,
+ 'maybe_optional' => 2,
+ 'maybe_comment' => 78,
+ 'type' => 5,
+ 'some_type' => 12,
+ 'or_type' => 14,
+ 'not_or_type' => 75,
+ 'basic_type' => 17,
+ 'map_type' => 18,
+ 'list_type' => 19,
+ ),
+ 3 => array(
+ 'maybe_comment' => 4,
+ 'type' => 5,
+ 'some_type' => 12,
+ 'or_type' => 14,
+ 'not_or_type' => 75,
+ 'basic_type' => 17,
+ 'map_type' => 18,
+ 'list_type' => 19,
+ ),
+ 5 => array(
+ 'comment' => 6,
+ ),
+ 7 => array(
+ 'comment_text' => 8,
+ ),
+ 15 => array(
+ 'not_or_type' => 16,
+ 'basic_type' => 17,
+ 'map_type' => 18,
+ 'list_type' => 19,
+ ),
+ 22 => array(
+ 'type' => 23,
+ 'some_type' => 49,
+ 'or_type' => 51,
+ 'not_or_type' => 64,
+ 'basic_type' => 54,
+ 'map_type' => 55,
+ 'list_type' => 56,
+ ),
+ 24 => array(
+ 'type' => 25,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 30 => array(
+ 'not_or_type' => 31,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 37 => array(
+ 'type' => 38,
+ 'some_type' => 49,
+ 'or_type' => 51,
+ 'not_or_type' => 64,
+ 'basic_type' => 54,
+ 'map_type' => 55,
+ 'list_type' => 56,
+ ),
+ 39 => array(
+ 'type' => 40,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 43 => array(
+ 'not_or_type' => 44,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 46 => array(
+ 'type' => 47,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 52 => array(
+ 'not_or_type' => 53,
+ 'basic_type' => 54,
+ 'map_type' => 55,
+ 'list_type' => 56,
+ ),
+ 59 => array(
+ 'type' => 60,
+ 'some_type' => 49,
+ 'or_type' => 51,
+ 'not_or_type' => 64,
+ 'basic_type' => 54,
+ 'map_type' => 55,
+ 'list_type' => 56,
+ ),
+ 61 => array(
+ 'type' => 62,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 65 => array(
+ 'not_or_type' => 66,
+ 'basic_type' => 54,
+ 'map_type' => 55,
+ 'list_type' => 56,
+ ),
+ 68 => array(
+ 'type' => 69,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 72 => array(
+ 'type' => 73,
+ 'some_type' => 27,
+ 'or_type' => 29,
+ 'not_or_type' => 42,
+ 'basic_type' => 32,
+ 'map_type' => 33,
+ 'list_type' => 34,
+ ),
+ 76 => array(
+ 'not_or_type' => 77,
+ 'basic_type' => 17,
+ 'map_type' => 18,
+ 'list_type' => 19,
+ ),
+ ),
+ '(end-of-file)',
+ $tokens,
+ $callback);
+ }
+
+}
diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php
new file mode 100644
--- /dev/null
+++ b/src/parser/PhutilURI.php
@@ -0,0 +1,387 @@
+protocol = $uri->protocol;
+ $this->user = $uri->user;
+ $this->pass = $uri->pass;
+ $this->domain = $uri->domain;
+ $this->port = $uri->port;
+ $this->path = $uri->path;
+ $this->query = $uri->query;
+ $this->fragment = $uri->fragment;
+ $this->type = $uri->type;
+ return;
+ }
+
+ $uri = (string)$uri;
+
+ $type = self::TYPE_URI;
+
+ // Reject ambiguous URIs outright. Different versions of different clients
+ // parse these in different ways. See T12526 for discussion.
+ if (preg_match('(^[^/:]*://[^/]*[#?].*:)', $uri)) {
+ throw new Exception(
+ pht(
+ 'Rejecting ambiguous URI "%s". This URI is not formatted or '.
+ 'encoded properly.',
+ $uri));
+ }
+
+ $matches = null;
+ if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) {
+ // If the URI is something like `idea://open?file=/path/to/file`, the
+ // `parse_url()` function will parse `open?file=` as the host. This is
+ // not the expected result. Break the URI into two pieces, stick a slash
+ // in between them, parse that, then remove the path. See T6106.
+
+ $parts = parse_url($matches[1].'/'.$matches[2]);
+ unset($parts['path']);
+ } else if ($this->isGitURIPattern($uri)) {
+ // Handle Git/SCP URIs in the form "user@domain:relative/path".
+
+ $user = '(?:(?P[^/@]+)@)?';
+ $host = '(?P[^/:]+)';
+ $path = ':(?P.*)';
+
+ $ok = preg_match('(^'.$user.$host.$path.'\z)', $uri, $matches);
+ if (!$ok) {
+ throw new Exception(
+ pht(
+ 'Failed to parse URI "%s" as a Git URI.',
+ $uri));
+ }
+
+ $parts = $matches;
+ $parts['scheme'] = 'ssh';
+
+ $type = self::TYPE_GIT;
+ } else {
+ $parts = parse_url($uri);
+ }
+
+ // The parse_url() call will accept URIs with leading whitespace, but many
+ // other tools (like git) will not. See T4913 for a specific example. If
+ // the input string has leading whitespace, fail the parse.
+ if ($parts) {
+ if (ltrim($uri) != $uri) {
+ $parts = false;
+ }
+ }
+
+ // NOTE: `parse_url()` is very liberal about host names; fail the parse if
+ // the host looks like garbage. In particular, we do not allow hosts which
+ // begin with "." or "-". See T12961 for a specific attack which relied on
+ // hosts beginning with "-".
+ if ($parts) {
+ $host = idx($parts, 'host', '');
+ if (strlen($host)) {
+ if (!preg_match('/^[a-zA-Z0-9]+[a-zA-Z0-9\\.\\-]*\z/', $host)) {
+ $parts = false;
+ }
+ }
+ }
+
+ if (!$parts) {
+ $parts = array();
+ }
+
+ // stringyness is to preserve API compatibility and
+ // allow the tests to continue passing
+ $this->protocol = idx($parts, 'scheme', '');
+ $this->user = rawurldecode(idx($parts, 'user', ''));
+ $this->pass = rawurldecode(idx($parts, 'pass', ''));
+ $this->domain = idx($parts, 'host', '');
+ $this->port = (string)idx($parts, 'port', '');
+ $this->path = idx($parts, 'path', '');
+ $query = idx($parts, 'query');
+ if ($query) {
+ $this->query = id(new PhutilQueryStringParser())->parseQueryString(
+ $query);
+ }
+ $this->fragment = idx($parts, 'fragment', '');
+
+ $this->type = $type;
+ }
+
+ public function __toString() {
+ $prefix = null;
+
+ if ($this->isGitURI()) {
+ $port = null;
+ } else {
+ $port = $this->port;
+ }
+
+ $domain = $this->domain;
+
+ $user = $this->user;
+ $pass = $this->pass;
+ if (strlen($user) && strlen($pass)) {
+ $auth = rawurlencode($user).':'.rawurlencode($pass).'@';
+ } else if (strlen($user)) {
+ $auth = rawurlencode($user).'@';
+ } else {
+ $auth = null;
+ }
+
+ $protocol = $this->protocol;
+ if ($this->isGitURI()) {
+ $protocol = null;
+ } else {
+ if (strlen($auth)) {
+ $protocol = nonempty($this->protocol, 'http');
+ }
+ }
+
+ if (strlen($protocol) || strlen($auth) || strlen($domain)) {
+ if ($this->isGitURI()) {
+ $prefix = "{$auth}{$domain}";
+ } else {
+ $prefix = "{$protocol}://{$auth}{$domain}";
+ }
+
+ if (strlen($port)) {
+ $prefix .= ':'.$port;
+ }
+ }
+
+ if ($this->query) {
+ $query = '?'.http_build_query($this->query, '', '&');
+ } else {
+ $query = null;
+ }
+
+ if (strlen($this->getFragment())) {
+ $fragment = '#'.$this->getFragment();
+ } else {
+ $fragment = null;
+ }
+
+ $path = $this->getPath();
+ if ($this->isGitURI()) {
+ if (strlen($path)) {
+ $path = ':'.$path;
+ }
+ }
+
+ return $prefix.$path.$query.$fragment;
+ }
+
+ public function setQueryParam($key, $value) {
+ if ($value === null) {
+ unset($this->query[$key]);
+ } else {
+ $this->query[$key] = $value;
+ }
+ return $this;
+ }
+
+ public function setQueryParams(array $params) {
+ $this->query = $params;
+ return $this;
+ }
+
+ public function getQueryParams() {
+ return $this->query;
+ }
+
+ public function setProtocol($protocol) {
+ $this->protocol = $protocol;
+ return $this;
+ }
+
+ public function getProtocol() {
+ return $this->protocol;
+ }
+
+ public function setDomain($domain) {
+ $this->domain = $domain;
+ return $this;
+ }
+
+ public function getDomain() {
+ return $this->domain;
+ }
+
+ public function setPort($port) {
+ $this->port = $port;
+ return $this;
+ }
+ public function getPort() {
+ return $this->port;
+ }
+
+ public function getPortWithProtocolDefault() {
+ static $default_ports = array(
+ 'http' => '80',
+ 'https' => '443',
+ 'ssh' => '22',
+ );
+
+ return nonempty(
+ $this->getPort(),
+ idx($default_ports, $this->getProtocol()),
+ '');
+ }
+
+ public function setPath($path) {
+ if ($this->isGitURI()) {
+ // Git URIs use relative paths which do not need to begin with "/".
+ } else {
+ if ($this->domain && strlen($path) && $path[0] !== '/') {
+ $path = '/'.$path;
+ }
+ }
+
+ $this->path = $path;
+ return $this;
+ }
+
+ public function appendPath($path) {
+ $first = strlen($path) ? $path[0] : null;
+ $last = strlen($this->path) ? $this->path[strlen($this->path) - 1] : null;
+
+ if (!$this->path) {
+ return $this->setPath($path);
+ } else if ($first === '/' && $last === '/') {
+ $path = substr($path, 1);
+ } else if ($first !== '/' && $last !== '/') {
+ $path = '/'.$path;
+ }
+
+ $this->path .= $path;
+ return $this;
+ }
+
+ public function getPath() {
+ return $this->path;
+ }
+
+ public function setFragment($fragment) {
+ $this->fragment = $fragment;
+ return $this;
+ }
+
+ public function getFragment() {
+ return $this->fragment;
+ }
+
+ public function setUser($user) {
+ $this->user = $user;
+ return $this;
+ }
+
+ public function getUser() {
+ return $this->user;
+ }
+
+ public function setPass($pass) {
+ $this->pass = $pass;
+ return $this;
+ }
+
+ public function getPass() {
+ return $this->pass;
+ }
+
+ public function alter($key, $value) {
+ $altered = clone $this;
+ $altered->setQueryParam($key, $value);
+ return $altered;
+ }
+
+ public function isGitURI() {
+ return ($this->type == self::TYPE_GIT);
+ }
+
+ public function setType($type) {
+
+ if ($type == self::TYPE_URI) {
+ $path = $this->getPath();
+ if (strlen($path) && ($path[0] !== '/')) {
+ // Try to catch this here because we are not allowed to throw from
+ // inside __toString() so we don't have a reasonable opportunity to
+ // react properly if we catch it later.
+ throw new Exception(
+ pht(
+ 'Unable to convert URI "%s" into a standard URI because the '.
+ 'path is relative. Standard URIs can not represent relative '.
+ 'paths.',
+ $this));
+ }
+ }
+
+ $this->type = $type;
+ return $this;
+ }
+
+ public function getType() {
+ return $this->type;
+ }
+
+ private function isGitURIPattern($uri) {
+ $matches = null;
+
+ $ok = preg_match('(^(?P[^/]+):(?P(?!//).*)\z)', $uri, $matches);
+ if (!$ok) {
+ return false;
+ }
+
+ $head = $matches['head'];
+ $last = $matches['last'];
+
+ // If any part of this has spaces in it, it's not a Git URI. We fail here
+ // so we fall back and don't fail more abruptly later.
+ if (preg_match('(\s)', $head.$last)) {
+ return false;
+ }
+
+ // If the second part only contains digits, assume we're looking at
+ // casually specified "domain.com:123" URI, not a Git URI pointed at an
+ // entirely numeric relative path.
+ if (preg_match('(^\d+\z)', $last)) {
+ return false;
+ }
+
+ // If the first part has a "." or an "@" in it, interpret it as a domain
+ // or a "user@host" string.
+ if (preg_match('([.@])', $head)) {
+ return true;
+ }
+
+ // Otherwise, interpret the URI conservatively as a "javascript:"-style
+ // URI. This means that "localhost:path" is parsed as a normal URI instead
+ // of a Git URI, but we can't tell which the user intends and it's safer
+ // to treat it as a normal URI.
+ return false;
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilBugtraqParserTestCase.php b/src/parser/__tests__/PhutilBugtraqParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilBugtraqParserTestCase.php
@@ -0,0 +1,61 @@
+setBugtraqPattern('http://bugs.com/%BUGID%')
+ ->setBugtraqCaptureExpression('/[Ii]ssues?:?(\s*,?\s*\d+)+/')
+ ->setBugtraqSelectExpression('/(\d+)/')
+ ->processCorpus('Issues: 123, 345');
+ $this->assertEqual(
+ 'Issues: http://bugs.com/123, http://bugs.com/345',
+ $actual);
+
+ $actual = id(new PhutilBugtraqParser())
+ ->setBugtraqPattern('<%BUGID%>')
+ ->setBugtraqCaptureExpression('/([A-Z]{2,}-\d+)/')
+ ->processCorpus('AB-1 BC-2 CD-3');
+ $this->assertEqual(
+ ' ',
+ $actual);
+
+ $actual = id(new PhutilBugtraqParser())
+ ->setBugtraqPattern('<%BUGID%>')
+ ->setBugtraqCaptureExpression('/\d+/')
+ ->processCorpus('This text has no bugs in it.');
+ $this->assertEqual(
+ 'This text has no bugs in it.',
+ $actual);
+
+ $actual = id(new PhutilBugtraqParser())
+ ->setBugtraqPattern('<%BUGID%>')
+ ->setBugtraqCaptureExpression('/.*/')
+ ->setBugtraqSelectExpression('/(\d+)/')
+ ->processCorpus('This text captures but does not select.');
+ $this->assertEqual(
+ 'This text captures but does not select.',
+ $actual);
+
+ $caught = null;
+ try {
+ id(new PhutilBugtraqParser())
+ ->setBugtraqCaptureExpression('!');
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof PhutilTypeCheckException);
+
+
+ $caught = null;
+ try {
+ id(new PhutilBugtraqParser())
+ ->setBugtraqSelectExpression('!');
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof PhutilTypeCheckException);
+ }
+
+
+}
diff --git a/src/parser/__tests__/PhutilDocblockParserTestCase.php b/src/parser/__tests__/PhutilDocblockParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilDocblockParserTestCase.php
@@ -0,0 +1,127 @@
+parseDocblock($root.$file);
+ }
+ }
+
+ private function parseDocblock($doc_file) {
+ $contents = Filesystem::readFile($doc_file);
+ $file = basename($doc_file);
+
+ $parser = new PhutilDocblockParser();
+ list($docblock, $specials) = $parser->parse($contents);
+
+ switch ($file) {
+ case 'embedded-specials.docblock':
+ $this->assertEqual(array(), $specials);
+ $this->assertEqual(
+ "So long as a @special does not appear at the beginning of a line,\n".
+ "it is parsed as normal text.",
+ $docblock);
+ break;
+ case 'indented-block.docblock':
+ $this->assertEqual(array(), $specials);
+ $this->assertEqual(
+ 'Cozy lummox gives smart squid who asks for job pen.',
+ $docblock);
+ break;
+ case 'indented-text.docblock':
+ $this->assertEqual(array(), $specials);
+ $this->assertEqual(
+ 'Cozy lummox gives smart squid who asks for job pen.',
+ $docblock);
+ break;
+ case 'multiline-special.docblock':
+ $this->assertEqual(
+ array(
+ 'special' => 'x y z',
+ ),
+ $specials);
+ $this->assertEqual(
+ '',
+ $docblock);
+ break;
+ case 'multi-specials.docblock':
+ $this->assertEqual(
+ array(
+ 'special' => array('north', 'south'),
+ 'stable' => true,
+ ),
+ $specials);
+ $this->assertEqual(
+ '',
+ $docblock);
+ break;
+ case 'specials.docblock':
+ $this->assertEqual(
+ array(
+ 'type' => 'type',
+ 'task' => 'task',
+ 'special' => array('dot', 'dot', 'dash'),
+ ),
+ $specials);
+ $this->assertEqual(
+ '',
+ $docblock);
+ break;
+ case 'linebreak-breaks-specials.docblock':
+ $this->assertEqual(
+ array(
+ 'title' => 'title',
+ ),
+ $specials);
+ $this->assertEqual(
+ 'This is normal text, not part of the @title.',
+ $docblock);
+ break;
+ case 'specials-with-hyphen.docblock':
+ $this->assertEqual(
+ array(
+ 'repeat-hyphen' => array('a', 'b'),
+ 'multiline-hyphen' => 'mmm nnn',
+ 'normal-hyphen' => 'x',
+ ),
+ $specials);
+ break;
+ case 'indented-specials.docblock':
+ $this->assertEqual(
+ array(
+ 'title' => 'sendmail',
+ 'special' => 'only a little bit indented',
+ ),
+ $specials);
+ break;
+ case 'flag-specials.docblock':
+ $this->assertEqual(
+ "stuff above\n\nstuff in the middle\n\nstuff below",
+ $docblock);
+ $this->assertEqual(
+ array(
+ 'flag' => true,
+ 'stuff' => true,
+ 'zebra' => true,
+ 'apple' => true,
+ ),
+ $specials);
+ break;
+ case 'mixed-types.docblock':
+ $this->assertEqual(
+ array(
+ 'special' => array('squirrels', true),
+ ),
+ $specials);
+ break;
+ default:
+ throw new Exception(pht("No test case to handle file '%s'!", $file));
+ }
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilEditorConfigTestCase.php b/src/parser/__tests__/PhutilEditorConfigTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilEditorConfigTestCase.php
@@ -0,0 +1,106 @@
+getTestFile());
+
+ $tests = array(
+ 'default' => array(
+ array(
+ 'indent_style' => 'space',
+ 'indent_size' => 2,
+ 'charset' => 'utf-8',
+ 'trim_trailing_whitespace' => true,
+ 'insert_final_newline' => true,
+ ),
+ array(),
+ ),
+ 'file' => array(
+ array(
+ 'indent_style' => 'space',
+ 'indent_size' => 3,
+ 'charset' => 'utf-8',
+ 'trim_trailing_whitespace' => true,
+ 'insert_final_newline' => true,
+ ),
+ array(),
+ ),
+ 'file.txt' => array(
+ array(
+ 'indent_style' => 'space',
+ 'indent_size' => 3,
+ 'charset' => 'latin1',
+ 'trim_trailing_whitespace' => true,
+ 'insert_final_newline' => true,
+ ),
+ array(),
+ ),
+ 'externals/README' => array(
+ array(
+ 'indent_style' => null,
+ 'indent_size' => null,
+ 'charset' => 'utf-8',
+ 'trim_trailing_whitespace' => false,
+ 'insert_final_newline' => false,
+ ),
+ array(),
+ ),
+ 'subdir/file' => array(
+ array(
+ 'indent_style' => 'tab',
+ 'indent_size' => 3,
+ 'charset' => 'utf-8-bom',
+ 'trim_trailing_whitespace' => true,
+ 'insert_final_newline' => true,
+ ),
+ array(),
+ ),
+ 'empty/file' => array(
+ array(),
+ array(
+ 'indent_style' => null,
+ 'indent_size' => null,
+ 'charset' => null,
+ 'trim_trailing_whitespace' => null,
+ 'insert_final_newline' => null,
+ ),
+ ),
+ );
+
+ foreach ($tests as $path => $expected) {
+ list($properties, $property) = $expected;
+ $property = array_merge($properties, $property);
+
+ $this->assertEqual(
+ $properties,
+ $parser->getProperties($this->getTestFile($path)));
+
+ foreach ($property as $key => $value) {
+ $this->assertEqual(
+ $value,
+ $parser->getProperty($this->getTestFile($path), $key));
+ }
+ }
+
+ $invalid_properties = array(
+ 'invalid',
+ );
+
+ foreach ($invalid_properties as $invalid_property) {
+ $caught = null;
+ try {
+ $parser->getProperty('', $invalid_property);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof InvalidArgumentException);
+ }
+ }
+
+ private function getTestFile($path = null) {
+ return dirname(__FILE__).'/editorconfig/'.$path;
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilEmailAddressTestCase.php b/src/parser/__tests__/PhutilEmailAddressTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilEmailAddressTestCase.php
@@ -0,0 +1,130 @@
+');
+ $this->assertEqual(
+ 'Abraham Lincoln',
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getLocalPart());
+ $this->assertEqual(
+ 'logcabin.com',
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln@logcabin.com',
+ $email->getAddress());
+
+ $email = new PhutilEmailAddress('alincoln@logcabin.com');
+ $this->assertEqual(
+ null,
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getLocalPart());
+ $this->assertEqual(
+ 'logcabin.com',
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln@logcabin.com',
+ $email->getAddress());
+
+ $email = new PhutilEmailAddress('"Abraham" ');
+ $this->assertEqual(
+ 'Abraham',
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getLocalPart());
+ $this->assertEqual(
+ 'logcabin.com',
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln@logcabin.com',
+ $email->getAddress());
+
+ $email = new PhutilEmailAddress(' alincoln@logcabin.com ');
+ $this->assertEqual(
+ null,
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getLocalPart());
+ $this->assertEqual(
+ 'logcabin.com',
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln@logcabin.com',
+ $email->getAddress());
+
+ $email = new PhutilEmailAddress('alincoln');
+ $this->assertEqual(
+ null,
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getLocalPart());
+ $this->assertEqual(
+ null,
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln',
+ $email->getAddress());
+
+ $email = new PhutilEmailAddress('alincoln ');
+ $this->assertEqual(
+ 'alincoln',
+ $email->getDisplayName());
+ $this->assertEqual(
+ 'alincoln at logcabin dot com',
+ $email->getLocalPart());
+ $this->assertEqual(
+ null,
+ $email->getDomainName());
+ $this->assertEqual(
+ 'alincoln at logcabin dot com',
+ $email->getAddress());
+ }
+
+ public function testEmailEncoding() {
+ $cases = array(
+ array(
+ 'Tangerine Q. Hawthorne',
+ 'thawthorne@blackspire.bunker',
+ '"Tangerine Q. Hawthorne" ',
+ ),
+ array(
+ 'Hector "\\" Backslash',
+ 'hector@backslash',
+ '"Hector \\"\\\\\\" Backslash" ',
+ ),
+ array(
+ 'My Middle Name "" Is My Email',
+ 'name@domain',
+ '"My Middle Name \\"\\" Is My Email" ',
+ ),
+ array(
+ "My Legal Name\nContains A Newline",
+ 'newline@example',
+ '"My Legal Name Contains A Newline" ',
+ ),
+ );
+
+ foreach ($cases as $case) {
+ list($name, $address, $expect) = $case;
+ $actual = (string)id(new PhutilEmailAddress())
+ ->setDisplayName($name)
+ ->setAddress($address);
+ $this->assertEqual(
+ $expect,
+ $actual,
+ pht('Email: %s + %s -> %s', $name, $address, $expect));
+ }
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilGitURITestCase.php b/src/parser/__tests__/PhutilGitURITestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilGitURITestCase.php
@@ -0,0 +1,28 @@
+assertEqual('git', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('git@host.com:path/to/something', (string)$uri);
+
+ $uri = new PhutilGitURI('host.com:path/to/something');
+ $this->assertEqual('', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('host.com:path/to/something', (string)$uri);
+ }
+
+ public function testStrictGitURIParsingOfLeadingWhitespace() {
+ $uri = new PhutilURI(' user@example.com');
+ $this->assertEqual('', $uri->getDomain());
+ }
+
+
+}
diff --git a/src/parser/__tests__/PhutilJSONParserTestCase.php b/src/parser/__tests__/PhutilJSONParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilJSONParserTestCase.php
@@ -0,0 +1,139 @@
+ array(),
+ '[]' => array(),
+ '{"foo": "bar"}' => array('foo' => 'bar'),
+ '[1, "foo", true, null]' => array(1, 'foo', true, null),
+ '{"foo": {"bar": "baz"}}' => array('foo' => array('bar' => 'baz')),
+ '{"foo": "bar", "bar": ["baz"]}'
+ => array('foo' => 'bar', 'bar' => array('baz')),
+ '{"foo": "bar", "bar": {"baz": "foo"}}'
+ => array('foo' => 'bar', 'bar' => array('baz' => 'foo')),
+ '{"": ""}' => array('' => ''),
+ '{"test":"\u00c9v\u00e9nement"}'
+ => array('test' => "\xC3\x89v\xC3\xA9nement"),
+ '["\u00c9v\u00e9nement"]' => array("\xC3\x89v\xC3\xA9nement"),
+ '{"test":"http:\/\/foo\\\\zomg"}'
+ => array('test' => 'http://foo\\zomg'),
+ '["http:\/\/foo\\\\zomg"]' => array('http://foo\\zomg'),
+ Filesystem::readFile(dirname(__FILE__).'/json/base64.json') => array(
+ 'action' => 'candidate.create',
+ 'actionId' => '80653a26cc46357ff79ff83b47e27c3cb7a668bd',
+ 'params' => array(
+ 'attachments' => array(
+ Filesystem::readFile(dirname(__FILE__).'/json/base64.data'),
+ ),
+ ),
+ ),
+ );
+
+ foreach ($tests as $input => $expect) {
+ $this->assertEqual(
+ $expect,
+ $parser->parse($input),
+ pht('Parsing JSON: %s', $input));
+ }
+ }
+
+ public function testInvalidJSON() {
+ $parser = new PhutilJSONParser();
+
+ $tests = array(
+ '{' => array(
+ 'line' => 1,
+ 'char' => 1,
+ 'token' => 'EOF',
+ ),
+ '[' => array(
+ 'line' => 1,
+ 'char' => 1,
+ 'token' => 'EOF',
+ ),
+ '{"foo":' => array(
+ 'line' => 1,
+ 'char' => 7,
+ 'token' => 'EOF',
+ ),
+ '{"foo":"bar",}' => array(
+ 'line' => 1,
+ 'char' => 13,
+ 'token' => '}',
+ ),
+ '{{}' => array(
+ 'line' => 1,
+ 'char' => 1,
+ 'token' => '{',
+ ),
+ '{}}' => array(
+ 'line' => 1,
+ 'char' => 2,
+ 'token' => '}',
+ ),
+ "{\"foo\":\"bar\",\n\"bar\":\"baz\",}" => array(
+ 'line' => 2,
+ 'char' => 12,
+ 'token' => '}',
+ ),
+ "{'foo': 'bar'}" => array(
+ 'line' => 1,
+ 'char' => 1,
+ 'token' => 'INVALID',
+ ),
+ "{\"foo\": \"bar\nbaz\"}" => array(
+ 'line' => 1,
+ 'char' => 7,
+ 'token' => 'INVALID',
+ ),
+ '{"foo": "bar\z"}' => array(
+ 'line' => 1,
+ 'char' => 7,
+ 'token' => 'INVALID',
+ ),
+ );
+
+ foreach ($tests as $input => $expected) {
+ $caught = null;
+ try {
+ $parser->parse($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof PhutilJSONParserException);
+ $this->assertEqual($expected['line'], $caught->getSourceLine());
+ $this->assertEqual($expected['char'], $caught->getSourceChar());
+ $this->assertEqual($expected['token'], $caught->getSourceToken());
+ }
+ }
+
+ public function testDuplicateKeys() {
+ $parser = new PhutilJSONParser();
+
+ $tests = array(
+ '{"foo": "bar", "foo": "baz"}' => array('foo' => 'baz'),
+ );
+
+ foreach ($tests as $input => $expect) {
+ $parser->setAllowDuplicateKeys(true);
+ $this->assertEqual(
+ $expect,
+ $parser->parse($input),
+ pht('Parsing JSON: %s', $input));
+
+ $parser->setAllowDuplicateKeys(false);
+ $caught = null;
+ try {
+ $parser->parse($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue($caught instanceof PhutilJSONParserException);
+ }
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilJSONTestCase.php b/src/parser/__tests__/PhutilJSONTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilJSONTestCase.php
@@ -0,0 +1,21 @@
+assertEqual(
+ $expect,
+ $serializer->encodeFormatted(array('x' => array())),
+ pht('Empty arrays should serialize as `%s`, not `%s`.', '[]', '{}'));
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilLanguageGuesserTestCase.php b/src/parser/__tests__/PhutilLanguageGuesserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilLanguageGuesserTestCase.php
@@ -0,0 +1,23 @@
+assertEqual(
+ $expect,
+ PhutilLanguageGuesser::guessLanguage($source),
+ pht("Guessed language for '%s'.", $test));
+ }
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilParserGeneratorTestCase.php b/src/parser/__tests__/PhutilParserGeneratorTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilParserGeneratorTestCase.php
@@ -0,0 +1,330 @@
+setTerminals(array('a'))
+ ->setStartRule('S')
+ ->setRules(
+ array(
+ 'S' => 'a b',
+ ));
+
+ $caught = null;
+ try {
+ // Expect "b is not a rule or terminal".
+ $generator->processGrammar();
+ } catch (PhutilUnknownSymbolParserGeneratorException $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
+ public function testBadStartRule() {
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('a'))
+ ->setStartRule('Q')
+ ->setRules(
+ array(
+ 'S' => 'a',
+ ));
+
+ $caught = null;
+ try {
+ // Expect "no start rule Q".
+ $generator->processGrammar();
+ } catch (PhutilUnknownSymbolParserGeneratorException $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
+ public function testMessySymbols() {
+ // This is testing that internal defaults are correctly changed when they
+ // would conflict with the provided grammar. This is a messy test which
+ // relies on a lot of implementation details.
+
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('(init)', 'x', 'y', 'start', '(end)'))
+ ->setStartRule('(epsilon)')
+ ->setRules(
+ array(
+ '(epsilon)' => array(
+ array('s p a c e s'),
+ ),
+ 's p a c e s' => '(init) x start (end-of-file) y',
+ '(end-of-file)' => array(
+ '(end)',
+ null,
+ ),
+ ))
+ ->processGrammar();
+
+ $rules = $generator->inspectRules();
+
+ $init = $generator->getInitSymbol();
+ $eof = $generator->getEOFSymbol();
+ $epsilon = $generator->getEpsilonSymbol();
+ $end = $generator->getEndSymbol();
+
+ $this->assertFalse($init == '(init)');
+ $this->assertFalse($eof == '(end-of-file)');
+ $this->assertFalse($epsilon == '(epsilon)');
+ $this->assertFalse($end == '(end)');
+
+ $keys = array_keys($rules);
+ $expect = array('(end-of-file)', '(epsilon)', 's p a c e s', $init);
+ sort($keys);
+ sort($expect);
+ $this->assertEqual($keys, $expect);
+
+ $this->assertEqual(
+ array(
+ array('s p a c e s', $end),
+ ),
+ $rules['(epsilon)']);
+
+ $this->assertEqual(
+ array(
+ array('(end)', $end),
+ array($epsilon, $end),
+ ),
+ $rules['(end-of-file)']);
+ }
+
+ public function testUnreachableTerminal() {
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('x', 'y'))
+ ->setStartRule('S')
+ ->setRules(
+ array(
+ 'S' => 'x',
+ ));
+
+ $caught = null;
+ try {
+ $generator->processGrammar();
+ } catch (PhutilUnreachableTerminalParserGeneratorException $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
+ public function testUnreachableRule() {
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('x'))
+ ->setStartRule('S')
+ ->setRules(
+ array(
+ 'S' => 'x',
+ 'A' => 'B',
+ 'B' => 'x',
+ ));
+
+ $caught = null;
+ try {
+ $generator->processGrammar();
+ } catch (PhutilUnreachableRuleParserGeneratorException $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
+ public function testIrreducibleGrammars() {
+ $tests = array(
+ 'trivially irreducible' => array(
+ true,
+ array(
+ 'S' => array('E', 'x'),
+ 'E' => 'E',
+ ),
+ ),
+ 'nontrivially irreducible' => array(
+ true,
+ array(
+ 'S' => array('X', 'x'),
+ 'X' => 'Y',
+ 'Y' => 'Z',
+ 'Z' => 'X',
+ ),
+ ),
+ 'left-recursive reducible' => array(
+ false,
+ array(
+ 'S' => 'E',
+ 'E' => array('E x', 'x'),
+ ),
+ ),
+ 'right-recursive reducible' => array(
+ false,
+ array(
+ 'S' => 'E',
+ 'E' => array('x', 'x E'),
+ ),
+ ),
+ );
+
+ foreach ($tests as $test) {
+ list($expect, $rules) = $test;
+
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('x'))
+ ->setStartRule('S')
+ ->setRules($rules);
+
+ $caught = null;
+ try {
+ $generator->processGrammar();
+ } catch (PhutilIrreducibleRuleParserGeneratorException $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual($expect, ($caught instanceof Exception));
+ }
+ }
+
+ public function testFirst() {
+ $generator = $this->buildABCGenerator()->processGrammar();
+
+ $first = $generator->inspectFirstTable();
+ ksort($first);
+
+ foreach ($first as $key => $table) {
+ ksort($table);
+ $first[$key] = $table;
+ }
+
+ $this->assertEqual(
+ array(
+ 'A' => array(
+ '(epsilon)' => true,
+ 'a' => true,
+ 'b' => true,
+ ),
+ 'B' => array(
+ 'c' => true,
+ ),
+ 'C' => array(
+ '(epsilon)' => true,
+ 'b' => true,
+ ),
+ 'S' => array(
+ 'a' => true,
+ 'b' => true,
+ 'c' => true,
+ ),
+ ),
+ $first);
+ }
+
+ public function testStates() {
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals(array('a', 'b'))
+ ->setStartRule('S')
+ ->setRules(
+ array(
+ 'S' => 'X X',
+ 'X' => array(
+ 'a X',
+ 'b',
+ ),
+ ))
+ ->processGrammar();
+
+ $this->assertTrue(true);
+ }
+
+ public function testETParser() {
+ $generator = $this->buildETGenerator()->processGrammar();
+
+ $result = $generator->parseTokens(
+ array(
+ array('n', 3),
+ array('+', '+'),
+ array('n', 5),
+ ),
+ array($this, 'didReduceET'));
+
+ $this->assertEqual(8, $result);
+ }
+
+ public function didReduceET($rule, $production, array $tokens) {
+ switch ($rule) {
+ case 'S':
+ return $tokens[0];
+ case 'E':
+ switch ($production) {
+ case 0:
+ return $tokens[0] + $tokens[2];
+ case 1:
+ return $tokens[0];
+ }
+ break;
+ case 'T':
+ switch ($production) {
+ case 0:
+ return $tokens[1];
+ case 1:
+ return $tokens[0][1];
+ }
+ break;
+ }
+
+ throw new Exception(pht('Unexpected rule in ET grammar.'));
+ }
+
+ private function buildABCGenerator() {
+ $terminals = array(
+ 'a',
+ 'b',
+ 'c',
+ );
+
+ $rules = array(
+ 'S' => 'A B',
+ 'A' => array('C a', null),
+ 'B' => array('B a A C', 'c'),
+ 'C' => array('b', null),
+ );
+
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals($terminals)
+ ->setStartRule('S')
+ ->setRules($rules);
+
+ return $generator;
+ }
+
+ private function buildETGenerator() {
+ // This grammar and the corresponding tests are based on:
+ // http://dragonbook.stanford.edu/lecture-notes/Stanford-CS143/08-Bottom-Up-Parsing.pdf
+ // http://dragonbook.stanford.edu/lecture-notes/Stanford-CS143/09-SLR-Parsing.pdf
+
+ $terminals = array(
+ '(',
+ 'n',
+ ')',
+ '+',
+ );
+
+ $rules = array(
+ 'S' => 'E',
+ 'E' => array('E + T', 'T'),
+ 'T' => array('( E )', 'n'),
+ );
+
+ $generator = id(new PhutilParserGenerator())
+ ->setTerminals($terminals)
+ ->setStartRule('S')
+ ->setRules($rules);
+
+ return $generator;
+ }
+
+
+}
diff --git a/src/parser/__tests__/PhutilPygmentizeParserTestCase.php b/src/parser/__tests__/PhutilPygmentizeParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilPygmentizeParserTestCase.php
@@ -0,0 +1,43 @@
+tryParser(
+ '',
+ '',
+ array(),
+ pht('Empty'));
+
+ $this->tryParser(
+ '1',
+ '1',
+ array(
+ 'mi' => 'color: #ff0000',
+ ),
+ pht('Simple'));
+
+ $this->tryParser(
+ '1',
+ '1',
+ array(),
+ pht('Missing Class'));
+
+ $this->tryParser(
+ 'X',
+ 'X',
+ array(
+ 'nc' => 'color: #ff0000',
+ ),
+ pht('Extra Attribute'));
+ }
+
+ private function tryParser($input, $expect, array $map, $label) {
+ $actual = id(new PhutilPygmentizeParser())
+ ->setMap($map)
+ ->parse($input);
+
+ $this->assertEqual($expect, $actual, pht('Pygmentize Parser: %s', $label));
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilQueryStringParserTestCase.php b/src/parser/__tests__/PhutilQueryStringParserTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilQueryStringParserTestCase.php
@@ -0,0 +1,140 @@
+ array(),
+ 'foo=bar&foobar=barfoo' => array(
+ 'foo' => 'bar',
+ 'foobar' => 'barfoo',
+ ),
+ 'a]b[]=1&a]=2&a[[]=3&a[b]b=4&[][a]=5' => array(
+ 'a]b[]' => '1',
+ 'a]' => '2',
+ 'a[[]' => '3',
+ 'a[b]b' => '4',
+ '[][a]' => '5',
+ ),
+ 'foo[][]=bar&bar[1][3]=foo' => array(
+ 'foo' => array(
+ 0 => array(
+ 0 => 'bar',
+ ),
+ ),
+ 'bar' => array(
+ 1 => array(
+ 3 => 'foo',
+ ),
+ ),
+ ),
+ 'foo[][]=bar&a]b[]=1' => array(
+ 'foo' => array(
+ 0 => array(
+ 0 => 'bar',
+ ),
+ ),
+ 'a]b[]' => '1',
+ ),
+ 'a&&b' => array(
+ 'a' => '',
+ 'b' => '',
+ ),
+ 'a[b][]=foo&a[b][]=bar' => array(
+ 'a' => array(
+ 'b' => array(
+ 0 => 'foo',
+ 1 => 'bar',
+ ),
+ ),
+ ),
+ 'a=1&a=2' => array(
+ 'a' => '2',
+ ),
+ 'a=1&a[]=2' => array(
+ 'a' => array(
+ 0 => '2',
+ ),
+ ),
+ 'a=1&a[b]=2&a[]=3' => array(
+ 'a' => array(
+ 'b' => '2',
+ 0 => '3',
+ ),
+ ),
+ 'a%20b=%20' => array(
+ 'a b' => ' ',
+ ),
+ 'a.b=c' => array(
+ 'a.b' => 'c',
+ ),
+ 'a=b=c' => array(
+ 'a' => 'b=c',
+ ),
+ );
+
+ $parser = new PhutilQueryStringParser();
+
+ foreach ($map as $query_string => $expected) {
+ $this->assertEqual(
+ $expected,
+ $parser->parseQueryString($query_string));
+ }
+ }
+
+ public function testQueryStringListParsing() {
+ $map = array(
+ '' => array(),
+ '&' => array(),
+ '=' => array(
+ array('', ''),
+ ),
+ '=&' => array(
+ array('', ''),
+ ),
+ 'a=b' => array(
+ array('a', 'b'),
+ ),
+ 'a[]=b' => array(
+ array('a[]', 'b'),
+ ),
+ 'a=' => array(
+ array('a', ''),
+ ),
+ '. [=1' => array(
+ array('. [', '1'),
+ ),
+ 'a=b&c=d' => array(
+ array('a', 'b'),
+ array('c', 'd'),
+ ),
+ 'a=b&a=c' => array(
+ array('a', 'b'),
+ array('a', 'c'),
+ ),
+ '&a=b&' => array(
+ array('a', 'b'),
+ ),
+ '=a' => array(
+ array('', 'a'),
+ ),
+ '&&&' => array(
+ ),
+ 'a%20b=c%20d' => array(
+ array('a b', 'c d'),
+ ),
+ );
+
+ $parser = new PhutilQueryStringParser();
+
+ foreach ($map as $query_string => $expected) {
+ $this->assertEqual(
+ $expected,
+ $parser->parseQueryStringToPairList($query_string));
+ }
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilSimpleOptionsTestCase.php b/src/parser/__tests__/PhutilSimpleOptionsTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilSimpleOptionsTestCase.php
@@ -0,0 +1,143 @@
+ array(),
+
+ // Basic parsing.
+ 'legs=4' => array('legs' => '4'),
+ 'legs=4,eyes=2' => array('legs' => '4', 'eyes' => '2'),
+
+ // Repeated keys mean last specification wins.
+ 'legs=4,legs=3' => array('legs' => '3'),
+
+ // Keys with no value should map to true.
+ 'flag' => array('flag' => true),
+ 'legs=4,flag' => array('legs' => '4', 'flag' => true),
+
+ // Leading and trailing spaces should be ignored.
+ ' flag ' => array('flag' => true),
+ ' legs = 4 , eyes = 2' => array('legs' => '4', 'eyes' => '2'),
+
+ // Unescaped spaces inside values are OK.
+ 'legs=a b c d' => array('legs' => 'a b c d'),
+
+ // Case should be ignored.
+ 'LEGS=4' => array('legs' => '4'),
+ 'legs=4, LEGS=4' => array('legs' => '4'),
+
+ // Empty values should be absent.
+ 'legs=' => array(),
+ 'legs=4,legs=,eyes=2' => array('eyes' => '2'),
+
+ // Quoted values should allow parsing comma, equals, etc.
+ 'punctuation=",="' => array('punctuation' => ',='),
+
+ // Quoted keys can also have that stuff.
+ '"backslash\\\\quote\\""=1' => array('backslash\\quote"' => '1'),
+ ' "," = "," , "=" = "=" ' => array(',' => ',', '=' => '='),
+
+ // Strings like this should not parse as simpleoptions.
+ 'SELECT id, name, size FROM table' => array(),
+ '"a""b"' => array(),
+ '=a' => array(),
+ ',a' => array(),
+ 'a==' => array(),
+ 'a=b=' => array(),
+ );
+
+ foreach ($map as $string => $expect) {
+ $parser = new PhutilSimpleOptions();
+ $this->assertEqual(
+ $expect,
+ $parser->parse($string),
+ pht("Correct parse of '%s'", $string));
+ }
+ }
+
+ public function testSimpleOptionsCaseParse() {
+ $map = array(
+ 'legs=4, LEGS=8, LeGs' => array(
+ 'legs' => '4',
+ 'LEGS' => '8',
+ 'LeGs' => true,
+ ),
+ );
+
+ foreach ($map as $string => $expect) {
+ $parser = new PhutilSimpleOptions();
+ $parser->setCaseSensitive(true);
+ $this->assertEqual(
+ $expect,
+ $parser->parse($string),
+ pht("Correct case-sensitive parse of '%s'", $string));
+ }
+ }
+
+ public function testSimpleOptionsUnterminatedStrings() {
+ $list = array(
+ '"',
+ "'",
+ 'a="',
+ "a='",
+ 'a="\\',
+ "a='\\",
+ );
+
+ foreach ($list as $input) {
+ $parser = new PhutilSimpleOptions();
+ $this->assertEqual(
+ array(),
+ $parser->parse($input),
+ pht('Correct failing parse of invalid input: %s', $input));
+ }
+ }
+
+ public function testSimpleOptionsUnparse() {
+ $map = array(
+ '' => array(),
+ 'legs=4' => array('legs' => '4'),
+ 'legs=4, eyes=2' => array('legs' => '4', 'eyes' => '2'),
+ 'eyes=2, legs=4' => array('eyes' => '2', 'legs' => '4'),
+ 'legs=4, head' => array('legs' => '4', 'head' => true),
+ 'eyes=2' => array('legs' => '', 'eyes' => '2'),
+ '"thousands separator"=","' => array('thousands separator' => ','),
+ );
+
+ foreach ($map as $expect => $dict) {
+ $parser = new PhutilSimpleOptions();
+ $this->assertEqual(
+ $expect,
+ $parser->unparse($dict),
+ pht('Correct unparse of %s', print_r($dict, true)));
+ }
+
+ $bogus = array(
+ array('' => ''),
+ array('' => 'x'),
+ );
+
+ foreach ($bogus as $bad_input) {
+ $caught = null;
+ try {
+ $parser = new PhutilSimpleOptions();
+ $parser->unparse($bad_input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+ $this->assertTrue(
+ $caught instanceof Exception,
+ pht('Correct throw on unparse of bad input.'));
+ }
+
+ $parser = new PhutilSimpleOptions();
+ $this->assertEqual(
+ 'a="\\}"',
+ $parser->unparse(array('a' => '}'), '}'),
+ pht('Unparse with extra escape.'));
+ }
+
+}
diff --git a/src/parser/__tests__/PhutilTypeSpecTestCase.php b/src/parser/__tests__/PhutilTypeSpecTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/PhutilTypeSpecTestCase.php
@@ -0,0 +1,320 @@
+',
+ 'int | null',
+ 'list < string >',
+ 'int (must be even)',
+ 'optional int',
+ 'int?',
+ 'int|null?',
+ 'optional int? (minimum 300)',
+ 'list',
+ 'list>>> (easy)',
+ '\\SomeClass',
+ '\\Namespace\\SomeClass',
+ '\\NamespaceA\\NamespaceB\\NamespaceC',
+ 'NamespaceA\\NamespaceB\\NamespaceC',
+ );
+
+ $bad = array(
+ '',
+ 'list<>',
+ 'list',
+ 'map|map',
+ 'int optional',
+ '(derp)',
+ 'list',
+ 'int?|string',
+ '\\',
+ '\\\\',
+ '\\SomeClass\\',
+ 'SomeClass\\',
+ );
+
+ $good = array_fill_keys($good, true);
+ $bad = array_fill_keys($bad, false);
+
+ foreach ($good + $bad as $input => $expect) {
+ $caught = null;
+ try {
+ PhutilTypeSpec::newFromString($input);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertEqual(
+ $expect,
+ ($caught === null),
+ $input);
+ }
+ }
+
+ public function testTypeSpecStringify() {
+ $types = array(
+ 'int',
+ 'list',
+ 'map',
+ 'list>',
+ 'map>',
+ 'int|null',
+ 'int|string|null',
+ 'list',
+ 'list',
+ 'optional int',
+ 'int (even)',
+ );
+
+ foreach ($types as $type) {
+ $this->assertEqual(
+ $type,
+ PhutilTypeSpec::newFromString($type)->toString());
+ }
+ }
+
+ public function testCanonicalize() {
+ $tests = array(
+ 'int?' => 'optional int',
+ 'int | null' => 'int|null',
+ 'list < map < int , string > > ?' => 'optional list