diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php index 83c1ca7..95d1e22 100644 --- a/src/parser/PhutilURI.php +++ b/src/parser/PhutilURI.php @@ -1,373 +1,383 @@ protocol = $uri->protocol; $this->user = $uri->user; $this->pass = $uri->pass; $this->domain = $uri->domain; $this->port = $uri->port; $this->path = $uri->path; $this->query = $uri->query; $this->fragment = $uri->fragment; $this->type = $uri->type; return; } $uri = (string)$uri; $type = self::TYPE_URI; + // Reject ambiguous URIs outright. Different versions of different clients + // parse these in different ways. See T12526 for discussion. + if (preg_match('(^[^/:]*://[^/]*[#?].*:)', $uri)) { + throw new Exception( + pht( + 'Rejecting ambiguous URI "%s". This URI is not formatted or '. + 'encoded properly.', + $uri)); + } + $matches = null; if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) { // If the URI is something like `idea://open?file=/path/to/file`, the // `parse_url()` function will parse `open?file=` as the host. This is // not the expected result. Break the URI into two pieces, stick a slash // in between them, parse that, then remove the path. See T6106. $parts = parse_url($matches[1].'/'.$matches[2]); unset($parts['path']); } else if ($this->isGitURIPattern($uri)) { // Handle Git/SCP URIs in the form "user@domain:relative/path". $user = '(?:(?P[^/@]+)@)?'; $host = '(?P[^/:]+)'; $path = ':(?P.*)'; $ok = preg_match('(^'.$user.$host.$path.'\z)', $uri, $matches); if (!$ok) { throw new Exception( pht( 'Failed to parse URI "%s" as a Git URI.', $uri)); } $parts = $matches; $parts['scheme'] = 'ssh'; $type = self::TYPE_GIT; } else { $parts = parse_url($uri); } // The parse_url() call will accept URIs with leading whitespace, but many // other tools (like git) will not. See T4913 for a specific example. If // the input string has leading whitespace, fail the parse. if ($parts) { if (ltrim($uri) != $uri) { $parts = false; } } // NOTE: `parse_url()` is very liberal about host names; fail the parse if // the host looks like garbage. if ($parts) { $host = idx($parts, 'host', ''); if (!preg_match('/^([a-zA-Z0-9\\.\\-]*)$/', $host)) { $parts = false; } } if (!$parts) { $parts = array(); } // stringyness is to preserve API compatibility and // allow the tests to continue passing $this->protocol = idx($parts, 'scheme', ''); $this->user = rawurldecode(idx($parts, 'user', '')); $this->pass = rawurldecode(idx($parts, 'pass', '')); $this->domain = idx($parts, 'host', ''); $this->port = (string)idx($parts, 'port', ''); $this->path = idx($parts, 'path', ''); $query = idx($parts, 'query'); if ($query) { $this->query = id(new PhutilQueryStringParser())->parseQueryString( $query); } $this->fragment = idx($parts, 'fragment', ''); $this->type = $type; } public function __toString() { $prefix = null; if ($this->isGitURI()) { $port = null; } else { $port = $this->port; } $domain = $this->domain; $user = $this->user; $pass = $this->pass; if (strlen($user) && strlen($pass)) { $auth = rawurlencode($user).':'.rawurlencode($pass).'@'; } else if (strlen($user)) { $auth = rawurlencode($user).'@'; } else { $auth = null; } $protocol = $this->protocol; if ($this->isGitURI()) { $protocol = null; } else { if (strlen($auth)) { $protocol = nonempty($this->protocol, 'http'); } } if (strlen($protocol) || strlen($auth) || strlen($domain)) { if ($this->isGitURI()) { $prefix = "{$auth}{$domain}"; } else { $prefix = "{$protocol}://{$auth}{$domain}"; } if (strlen($port)) { $prefix .= ':'.$port; } } if ($this->query) { $query = '?'.http_build_query($this->query, '', '&'); } else { $query = null; } if (strlen($this->getFragment())) { $fragment = '#'.$this->getFragment(); } else { $fragment = null; } $path = $this->getPath(); if ($this->isGitURI()) { if (strlen($path)) { $path = ':'.$path; } } return $prefix.$path.$query.$fragment; } public function setQueryParam($key, $value) { if ($value === null) { unset($this->query[$key]); } else { $this->query[$key] = $value; } return $this; } public function setQueryParams(array $params) { $this->query = $params; return $this; } public function getQueryParams() { return $this->query; } public function setProtocol($protocol) { $this->protocol = $protocol; return $this; } public function getProtocol() { return $this->protocol; } public function setDomain($domain) { $this->domain = $domain; return $this; } public function getDomain() { return $this->domain; } public function setPort($port) { $this->port = $port; return $this; } public function getPort() { return $this->port; } public function getPortWithProtocolDefault() { static $default_ports = array( 'http' => '80', 'https' => '443', 'ssh' => '22', ); return nonempty( $this->getPort(), idx($default_ports, $this->getProtocol()), ''); } public function setPath($path) { if ($this->isGitURI()) { // Git URIs use relative paths which do not need to begin with "/". } else { if ($this->domain && strlen($path) && $path[0] !== '/') { $path = '/'.$path; } } $this->path = $path; return $this; } public function appendPath($path) { $first = strlen($path) ? $path[0] : null; $last = strlen($this->path) ? $this->path[strlen($this->path) - 1] : null; if (!$this->path) { return $this->setPath($path); } else if ($first === '/' && $last === '/') { $path = substr($path, 1); } else if ($first !== '/' && $last !== '/') { $path = '/'.$path; } $this->path .= $path; return $this; } public function getPath() { return $this->path; } public function setFragment($fragment) { $this->fragment = $fragment; return $this; } public function getFragment() { return $this->fragment; } public function setUser($user) { $this->user = $user; return $this; } public function getUser() { return $this->user; } public function setPass($pass) { $this->pass = $pass; return $this; } public function getPass() { return $this->pass; } public function alter($key, $value) { $altered = clone $this; $altered->setQueryParam($key, $value); return $altered; } public function isGitURI() { return ($this->type == self::TYPE_GIT); } public function setType($type) { if ($type == self::TYPE_URI) { $path = $this->getPath(); if (strlen($path) && ($path[0] !== '/')) { // Try to catch this here because we are not allowed to throw from // inside __toString() so we don't have a reasonable opportunity to // react properly if we catch it later. throw new Exception( pht( 'Unable to convert URI "%s" into a standard URI because the '. 'path is relative. Standard URIs can not represent relative '. 'paths.', $this)); } } $this->type = $type; return $this; } public function getType() { return $this->type; } private function isGitURIPattern($uri) { $matches = null; $ok = preg_match('(^(?P[^/]+):(?P(?!//).*)\z)', $uri, $matches); if (!$ok) { return false; } $head = $matches['head']; $last = $matches['last']; // If any part of this has spaces in it, it's not a Git URI. We fail here // so we fall back and don't fail more abruptly later. if (preg_match('(\s)', $head.$last)) { return false; } // If the second part only contains digits, assume we're looking at // casually specified "domain.com:123" URI, not a Git URI pointed at an // entirely numeric relative path. if (preg_match('(^\d+\z)', $last)) { return false; } // If the first part has a "." or an "@" in it, interpret it as a domain // or a "user@host" string. if (preg_match('([.@])', $head)) { return true; } // Otherwise, interpret the URI conservatively as a "javascript:"-style // URI. This means that "localhost:path" is parsed as a normal URI instead // of a Git URI, but we can't tell which the user intends and it's safer // to treat it as a normal URI. return false; } } diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php index c72a07f..3faa5b1 100644 --- a/src/parser/__tests__/PhutilURITestCase.php +++ b/src/parser/__tests__/PhutilURITestCase.php @@ -1,243 +1,264 @@ assertEqual('http', $uri->getProtocol(), pht('protocol')); $this->assertEqual('user', $uri->getUser(), pht('user')); $this->assertEqual('pass', $uri->getPass(), pht('password')); $this->assertEqual('host', $uri->getDomain(), pht('domain')); $this->assertEqual('99', $uri->getPort(), pht('port')); $this->assertEqual('/path/', $uri->getPath(), pht('path')); $this->assertEqual( array( 'query' => 'value', ), $uri->getQueryParams(), 'query params'); $this->assertEqual('fragment', $uri->getFragment(), pht('fragment')); $this->assertEqual( 'http://user:pass@host:99/path/?query=value#fragment', (string)$uri, 'uri'); $uri = new PhutilURI('ssh://git@example.com/example/example.git'); $this->assertEqual('ssh', $uri->getProtocol(), pht('protocol')); $this->assertEqual('git', $uri->getUser(), pht('user')); $this->assertEqual('', $uri->getPass(), pht('password')); $this->assertEqual('example.com', $uri->getDomain(), pht('domain')); $this->assertEqual('', $uri->getPort(), 'port'); $this->assertEqual('/example/example.git', $uri->getPath(), pht('path')); $this->assertEqual( array(), $uri->getQueryParams(), pht('query parameters')); $this->assertEqual('', $uri->getFragment(), pht('fragment')); $this->assertEqual( 'ssh://git@example.com/example/example.git', (string)$uri, 'uri'); $uri = new PhutilURI('http://0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('http://0@domain.com/', (string)$uri); $uri = new PhutilURI('http://0:0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('0', $uri->getPass()); $this->assertEqual('http://0:0@domain.com/', (string)$uri); $uri = new PhutilURI('http://%20:%20@domain.com/'); $this->assertEqual(' ', $uri->getUser()); $this->assertEqual(' ', $uri->getPass()); $this->assertEqual('http://%20:%20@domain.com/', (string)$uri); $uri = new PhutilURI('http://%40:%40@domain.com/'); $this->assertEqual('@', $uri->getUser()); $this->assertEqual('@', $uri->getPass()); $this->assertEqual('http://%40:%40@domain.com/', (string)$uri); $uri = new PhutilURI('http://%2F:%2F@domain.com/'); $this->assertEqual('/', $uri->getUser()); $this->assertEqual('/', $uri->getPass()); $this->assertEqual('http://%2F:%2F@domain.com/', (string)$uri); // These tests are covering cases where cURL and parse_url() behavior // may differ in potentially dangerous ways. See T6755 for discussion. // In general, we defuse these attacks by emitting URIs which escape // special characters so that they are interpreted unambiguously by // cURL in the same way that parse_url() interpreted them. $uri = new PhutilURI('http://u:p@evil.com?@good.com'); $this->assertEqual('u', $uri->getUser()); $this->assertEqual('p', $uri->getPass()); $this->assertEqual('evil.com', $uri->getDomain()); $this->assertEqual('http://u:p@evil.com?%40good.com=', (string)$uri); - $uri = new PhutilURI('http://good.com#u:p@evil.com/'); - $this->assertEqual('good.com#u', $uri->getUser()); - $this->assertEqual('p', $uri->getPass()); - $this->assertEqual('evil.com', $uri->getDomain()); - $this->assertEqual('http://good.com%23u:p@evil.com/', (string)$uri); - - $uri = new PhutilURI('http://good.com?u:p@evil.com/'); - $this->assertEqual('', $uri->getUser()); - $this->assertEqual('', $uri->getPass()); - $this->assertEqual('good.com', $uri->getDomain()); - $this->assertEqual('http://good.com?u%3Ap%40evil.com%2F=', (string)$uri); + // The behavior of URLs in these forms differs for different versions + // of cURL, PHP, and other software. Because safe parsing is a tricky + // proposition and these URIs are almost certainly malicious, we just + // reject them. See T12526 for discussion. + + $dangerous = array( + // Ambiguous encoding. + 'http://good.com#u:p@evil.com/' => true, + 'http://good.com?u:p@evil.com/' => true, + + // Unambiguous encoding: with a trailing slash. + 'http://good.com/#u:p@evil.com/' => false, + 'http://good.com/?u:p@evil.com/' => false, + + // Unambiguous encoding: with escaping. + 'http://good.com%23u:p@evil.com/' => false, + 'http://good.com%40u:p@evil.com/' => false, + ); + + foreach ($dangerous as $input => $expect) { + $caught = null; + try { + new PhutilURI($input); + } catch (Exception $ex) { + $caught = $ex; + } + + $this->assertEqual( + $expect, + ($caught instanceof $ex), + pht('Unexpected parse result for dangerous URI "%s".', $input)); + } $uri = new PhutilURI('www.example.com'); $this->assertEqual('', $uri->getProtocol()); $this->assertEqual('www.example.com', (string)$uri); } public function testURIGeneration() { $uri = new PhutilURI('http://example.com'); $uri->setPath('bar'); $this->assertEqual('http://example.com/bar', $uri->__toString()); } public function testStrictURIParsingOfHosts() { $uri = new PhutilURI('http://&/'); $this->assertEqual('', $uri->getDomain()); } public function testStrictURIParsingOfLeadingWhitespace() { $uri = new PhutilURI(' http://example.com/'); $this->assertEqual('', $uri->getDomain()); } public function testAppendPath() { $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('bar'); $this->assertEqual('http://example.com/foo/bar', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('/foo/'); $this->assertEqual('http://example.com/foo/', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); } public function testUnusualURIs() { $uri = new PhutilURI('file:///path/to/file'); $this->assertEqual('file', $uri->getProtocol(), pht('protocol')); $this->assertEqual('', $uri->getDomain(), pht('domain')); $this->assertEqual('/path/to/file', $uri->getPath(), pht('path')); $uri = new PhutilURI('idea://open?x=/'); $this->assertEqual('idea', $uri->getProtocol(), pht('protocol')); $this->assertEqual('open', $uri->getDomain(), pht('domain')); $this->assertEqual('', $uri->getPath(), pht('path')); $this->assertEqual( array( 'x' => '/', ), $uri->getQueryParams()); // This is not a legitimate URI and should not parse as one. $uri = new PhutilURI('fruit.list: apple banana cherry'); $this->assertEqual('', $uri->getDomain()); } public function testAmbiguousURIs() { // It's important that this be detected as a Javascript URI, because that // is how browsers will treat it. $uri = new PhutilURI('javascript:evil'); $this->assertEqual('javascript', $uri->getProtocol()); // This is "wrong", in that the user probably intends for this to be a // Git-style URI, but we can not easily parse it as one without making the // "javascript" case above unsafe. $uri = new PhutilURI('localhost:todo.txt'); $this->assertEqual('localhost', $uri->getProtocol()); // These variants are unambiguous and safe. $uri = new PhutilURI('localhost.com:todo.txt'); $this->assertEqual('localhost.com', $uri->getDomain()); $uri = new PhutilURI('user@localhost:todo.txt'); $this->assertEqual('localhost', $uri->getDomain()); // This could either be a Git URI with relative path "22", or a normal URI // with port "22". We should assume it is a port number because this is // relatively common, while relative Git URIs pointing at numeric filenames // are bizarre. $uri = new PhutilURI('domain.com:22'); $this->assertEqual('domain.com', $uri->getDomain()); $this->assertEqual('22', $uri->getPort()); } public function testDefaultPorts() { $uri = new PhutilURI('http://www.example.com'); $this->assertEqual('80', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('https://www.example.com'); $this->assertEqual('443', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('ssh://git@example.com/example/example.git'); $this->assertEqual('22', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('unknown://www.example.com'); $this->assertEqual('', $uri->getPortWithProtocolDefault()); } public function testGitURIParsing() { $uri = new PhutilURI('git@host.com:path/to/something'); $this->assertEqual('ssh', $uri->getProtocol()); $this->assertEqual('git', $uri->getUser()); $this->assertEqual('host.com', $uri->getDomain()); $this->assertEqual('path/to/something', $uri->getPath()); $this->assertEqual('git@host.com:path/to/something', (string)$uri); $uri = new PhutilURI('host.com:path/to/something'); $this->assertEqual('ssh', $uri->getProtocol()); $this->assertEqual('', $uri->getUser()); $this->assertEqual('host.com', $uri->getDomain()); $this->assertEqual('path/to/something', $uri->getPath()); $this->assertEqual('host.com:path/to/something', (string)$uri); $uri_1 = new PhutilURI('host.com:path/to/something'); $uri_2 = new PhutilURI($uri_1); $this->assertEqual((string)$uri_1, (string)$uri_2); } public function testStrictGitURIParsingOfLeadingWhitespace() { $uri = new PhutilURI(' user@example.com:path'); $this->assertEqual('', $uri->getDomain()); } public function testNoRelativeURIPaths() { $uri = new PhutilURI('user@example.com:relative_path'); $caught = null; try { $uri->setType(PhutilURI::TYPE_URI); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } }