diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php index 073ee7e..df6620b 100644 --- a/src/markup/__tests__/PhutilMarkupTestCase.php +++ b/src/markup/__tests__/PhutilMarkupTestCase.php @@ -1,265 +1,274 @@ assertEqual( (string)phutil_tag('br'), (string)phutil_tag('br', array())); $this->assertEqual( (string)phutil_tag('br', array()), (string)phutil_tag('br', array(), null)); } public function testTagEmpty() { $this->assertEqual( '
', (string)phutil_tag('br', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), '')); } public function testTagBasics() { $this->assertEqual( '
', (string)phutil_tag('br')); $this->assertEqual( '
y
', (string)phutil_tag('div', array(), 'y')); } public function testTagAttributes() { $this->assertEqual( '
y
', (string)phutil_tag('div', array('u' => 'v'), 'y')); $this->assertEqual( '
', (string)phutil_tag('br', array('u' => 'v'))); } public function testTagEscapes() { $this->assertEqual( '
', (string)phutil_tag('br', array('u' => '<'))); $this->assertEqual( '

', (string)phutil_tag('div', array(), phutil_tag('br'))); } public function testTagNullAttribute() { $this->assertEqual( '
', (string)phutil_tag('br', array('y' => null))); } public function testDefaultRelNoreferrer() { $map = array( // These should not have rel="nofollow" inserted implicitly. '/' => false, '/path/to/local.html' => false, '#example' => false, '' => false, // These should get the implicit insertion. 'http://www.example.org/' => true, '///evil.com/' => true, ' http://www.example.org/' => true, 'ftp://filez.com' => true, 'mailto:santa@northpole.com' => true, 'tel:18005555555' => true, ); foreach ($map as $input => $expect) { $tag = phutil_tag( 'a', array( 'href' => $input, ), 'link'); $tag = (string)$tag; $this->assertEqual($expect, (bool)preg_match('/noreferrer/', $tag)); } // With an explicit `rel` present, we should not override it. $tag = phutil_tag( 'a', array( 'href' => 'http://www.example.org/', 'rel' => 'nofollow', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); // For tags other than `a`, we should not insert `rel`. $tag = phutil_tag( 'link', array( 'href' => 'http://www.example.org/', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); } public function testTagJavascriptProtocolRejection() { $hrefs = array( 'javascript:alert(1)' => true, 'JAVASCRIPT:alert(2)' => true, // NOTE: When interpreted as a URI, this is dropped because of leading // whitespace. ' javascript:alert(3)' => array(true, false), '/' => false, '/path/to/stuff/' => false, '' => false, 'http://example.com/' => false, '#' => false, 'javascript://anything' => true, // Chrome 33 and IE11, at a minimum, treat this as Javascript. "javascript\n:alert(4)" => true, // Opera currently accepts a variety of unicode spaces. This test case // has a smattering of them. "\xE2\x80\x89javascript:" => true, "javascript\xE2\x80\x89:" => true, "\xE2\x80\x84javascript:" => true, "javascript\xE2\x80\x84:" => true, // Because we're aggressive, all of unicode should trigger detection // by default. "\xE2\x98\x83javascript:" => true, "javascript\xE2\x98\x83:" => true, "\xE2\x98\x83javascript\xE2\x98\x83:" => true, // We're aggressive about this, so we'll intentionally raise false // positives in these cases. 'javascript~:alert(5)' => true, '!!!javascript!!!!:alert(6)' => true, // However, we should raise true negatives in these slightly more // reasonable cases. 'javascript/:docs.html' => false, 'javascripts:x.png' => false, 'COOLjavascript:page' => false, '/javascript:alert(1)' => false, ); foreach (array(true, false) as $use_uri) { foreach ($hrefs as $href => $expect) { if (is_array($expect)) { $expect = ($use_uri ? $expect[1] : $expect[0]); } if ($use_uri) { - $href = new PhutilURI($href); + $href_value = new PhutilURI($href); + } else { + $href_value = $href; } $caught = null; try { - phutil_tag('a', array('href' => $href), 'click for candy'); + phutil_tag('a', array('href' => $href_value), 'click for candy'); } catch (Exception $ex) { $caught = $ex; } + + $desc = pht( + 'Unexpected result for "%s". ', + $href, + $use_uri ? pht('Yes') : pht('No'), + $expect ? pht('Yes') : pht('No')); + $this->assertEqual( $expect, $caught instanceof Exception, - pht('Rejected href: %s', $href)); + $desc); } } } public function testURIEscape() { $this->assertEqual( '%2B/%20%3F%23%26%3A%21xyz%25', phutil_escape_uri('+/ ?#&:!xyz%')); } public function testURIPathComponentEscape() { $this->assertEqual( 'a%252Fb', phutil_escape_uri_path_component('a/b')); $str = ''; for ($ii = 0; $ii <= 255; $ii++) { $str .= chr($ii); } $this->assertEqual( $str, phutil_unescape_uri_path_component( rawurldecode( // Simulates webserver. phutil_escape_uri_path_component($str)))); } public function testHsprintf() { $this->assertEqual( '
<3
', (string)hsprintf('
%s
', '<3')); } public function testAppendHTML() { $html = phutil_tag('hr'); $html->appendHTML(phutil_tag('br'), ''); $this->assertEqual('

<evil>', $html->getHTMLContent()); } public function testArrayEscaping() { $this->assertEqual( '
<div>
', phutil_escape_html( array( hsprintf('
'), array( array( '<', array( 'd', array( array( hsprintf('i'), ), 'v', ), ), array( array( '>', ), ), ), ), hsprintf('
'), ))); $this->assertEqual( '


', phutil_tag( 'div', array(), array( array( array( phutil_tag('br'), array( phutil_tag('hr'), ), phutil_tag('wbr'), ), ), ))->getHTMLContent()); } } diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php index f9612fd..abca039 100644 --- a/src/parser/PhutilURI.php +++ b/src/parser/PhutilURI.php @@ -1,224 +1,360 @@ protocol = $uri->protocol; + $this->user = $uri->user; + $this->pass = $uri->pass; + $this->domain = $uri->domain; + $this->port = $uri->port; + $this->path = $uri->path; + $this->query = $uri->query; + $this->fragment = $uri->fragment; + $this->type = $uri->type; + return; + } + $uri = (string)$uri; + $type = self::TYPE_URI; + $matches = null; if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) { // If the URI is something like `idea://open?file=/path/to/file`, the // `parse_url()` function will parse `open?file=` as the host. This is // not the expected result. Break the URI into two pieces, stick a slash // in between them, parse that, then remove the path. See T6106. $parts = parse_url($matches[1].'/'.$matches[2]); unset($parts['path']); + } else if ($this->isGitURIPattern($uri)) { + // Handle Git/SCP URIs in the form "user@domain:relative/path". + + $user = '(?:(?P[^/@]+)@)?'; + $host = '(?P[^/:]+)'; + $path = ':(?P.*)'; + + $ok = preg_match('(^\s*'.$user.$host.$path.'\z)', $uri, $matches); + if (!$ok) { + throw new Exception( + pht( + 'Failed to parse URI "%s" as a Git URI.', + $uri)); + } + + $parts = $matches; + $parts['scheme'] = 'ssh'; + + $type = self::TYPE_GIT; } else { $parts = parse_url($uri); } // The parse_url() call will accept URIs with leading whitespace, but many // other tools (like git) will not. See T4913 for a specific example. If // the input string has leading whitespace, fail the parse. if ($parts) { if (ltrim($uri) != $uri) { $parts = false; } } - // NOTE: `parse_url()` is very liberal about host names; fail the parse if // the host looks like garbage. if ($parts) { $host = idx($parts, 'host', ''); if (!preg_match('/^([a-zA-Z0-9\\.\\-]*)$/', $host)) { $parts = false; } } if (!$parts) { $parts = array(); } // stringyness is to preserve API compatibility and // allow the tests to continue passing $this->protocol = idx($parts, 'scheme', ''); - $this->user = rawurldecode(idx($parts, 'user', '')); - $this->pass = rawurldecode(idx($parts, 'pass', '')); - $this->domain = idx($parts, 'host', ''); - $this->port = (string)idx($parts, 'port', ''); - $this->path = idx($parts, 'path', ''); + $this->user = rawurldecode(idx($parts, 'user', '')); + $this->pass = rawurldecode(idx($parts, 'pass', '')); + $this->domain = idx($parts, 'host', ''); + $this->port = (string)idx($parts, 'port', ''); + $this->path = idx($parts, 'path', ''); $query = idx($parts, 'query'); if ($query) { $this->query = id(new PhutilQueryStringParser())->parseQueryString( $query); } $this->fragment = idx($parts, 'fragment', ''); + + $this->type = $type; } public function __toString() { $prefix = null; - if ($this->protocol || $this->domain || $this->port) { - $protocol = nonempty($this->protocol, 'http'); - - $auth = ''; - if (strlen($this->user) && strlen($this->pass)) { - $auth = rawurlencode($this->user).':'. - rawurlencode($this->pass).'@'; - } else if (strlen($this->user)) { - $auth = rawurlencode($this->user).'@'; + + if ($this->isGitURI()) { + $port = null; + } else { + $port = $this->port; + } + + $domain = $this->domain; + + $user = $this->user; + $pass = $this->pass; + if (strlen($user) && strlen($pass)) { + $auth = rawurlencode($user).':'.rawurlencode($pass).'@'; + } else if (strlen($user)) { + $auth = rawurlencode($user).'@'; + } else { + $auth = null; + } + + $protocol = $this->protocol; + if ($this->isGitURI()) { + $protocol = null; + } else { + if (strlen($auth)) { + $protocol = nonempty($this->protocol, 'http'); + } + } + + if (strlen($protocol) || strlen($auth) || strlen($domain)) { + if ($this->isGitURI()) { + $prefix = "{$auth}{$domain}"; + } else { + $prefix = "{$protocol}://{$auth}{$domain}"; } - $prefix = $protocol.'://'.$auth.$this->domain; - if ($this->port) { - $prefix .= ':'.$this->port; + if (strlen($port)) { + $prefix .= ':'.$port; } } if ($this->query) { $query = '?'.http_build_query($this->query, '', '&'); } else { $query = null; } if (strlen($this->getFragment())) { $fragment = '#'.$this->getFragment(); } else { $fragment = null; } + $path = $this->getPath(); + if ($this->isGitURI()) { + if (strlen($path)) { + $path = ':'.$path; + } + } - return $prefix.$this->getPath().$query.$fragment; + return $prefix.$path.$query.$fragment; } public function setQueryParam($key, $value) { if ($value === null) { unset($this->query[$key]); } else { $this->query[$key] = $value; } return $this; } public function setQueryParams(array $params) { $this->query = $params; return $this; } public function getQueryParams() { return $this->query; } public function setProtocol($protocol) { $this->protocol = $protocol; return $this; } + public function getProtocol() { return $this->protocol; } public function setDomain($domain) { $this->domain = $domain; return $this; } public function getDomain() { return $this->domain; } public function setPort($port) { $this->port = $port; return $this; } public function getPort() { return $this->port; } public function getPortWithProtocolDefault() { static $default_ports = array( 'http' => '80', 'https' => '443', 'ssh' => '22', ); return nonempty( $this->getPort(), idx($default_ports, $this->getProtocol()), ''); } public function setPath($path) { - if ($this->domain && strlen($path) && $path[0] !== '/') { - $path = '/'.$path; + if ($this->isGitURI()) { + // Git URIs use relative paths which do not need to begin with "/". + } else { + if ($this->domain && strlen($path) && $path[0] !== '/') { + $path = '/'.$path; + } } + $this->path = $path; return $this; } public function appendPath($path) { $first = strlen($path) ? $path[0] : null; $last = strlen($this->path) ? $this->path[strlen($this->path) - 1] : null; if (!$this->path) { return $this->setPath($path); } else if ($first === '/' && $last === '/') { $path = substr($path, 1); } else if ($first !== '/' && $last !== '/') { $path = '/'.$path; } $this->path .= $path; return $this; } public function getPath() { return $this->path; } public function setFragment($fragment) { $this->fragment = $fragment; return $this; } public function getFragment() { return $this->fragment; } public function setUser($user) { $this->user = $user; return $this; } public function getUser() { return $this->user; } public function setPass($pass) { $this->pass = $pass; return $this; } public function getPass() { return $this->pass; } public function alter($key, $value) { $altered = clone $this; $altered->setQueryParam($key, $value); return $altered; } + public function isGitURI() { + return ($this->type == self::TYPE_GIT); + } + + public function setType($type) { + + if ($type == self::TYPE_URI) { + $path = $this->getPath(); + if (strlen($path) && ($path[0] !== '/')) { + // Try to catch this here because we are not allowed to throw from + // inside __toString() so we don't have a reasonable opportunity to + // react properly if we catch it later. + throw new Exception( + pht( + 'Unable to convert URI "%s" into a standard URI because the '. + 'path is relative. Standard URIs can not represent relative '. + 'paths.', + $this)); + } + } + + $this->type = $type; + return $this; + } + + public function getType() { + return $this->type; + } + + private function isGitURIPattern($uri) { + $matches = null; + + $ok = preg_match('(^(?P[^/]+):(?P(?!//).*)\z)', $uri, $matches); + if (!$ok) { + return false; + } + + $head = $matches['head']; + $last = $matches['last']; + + // If the first part has a "." or an "@" in it, interpret it as a domain + // or a "user@host" string. + if (preg_match('([.@])', $head)) { + return true; + } + + // Otherwise, interpret the URI conservatively as a "javascript:"-style + // URI. This means that "localhost:path" is parsed as a normal URI instead + // of a Git URI, but we can't tell which the user intends and it's safer + // to treat it as a normal URI. + return false; + } + } diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php index 57b68f8..60d837d 100644 --- a/src/parser/__tests__/PhutilURITestCase.php +++ b/src/parser/__tests__/PhutilURITestCase.php @@ -1,167 +1,231 @@ assertEqual('http', $uri->getProtocol(), pht('protocol')); $this->assertEqual('user', $uri->getUser(), pht('user')); $this->assertEqual('pass', $uri->getPass(), pht('password')); $this->assertEqual('host', $uri->getDomain(), pht('domain')); $this->assertEqual('99', $uri->getPort(), pht('port')); $this->assertEqual('/path/', $uri->getPath(), pht('path')); $this->assertEqual( array( 'query' => 'value', ), $uri->getQueryParams(), 'query params'); $this->assertEqual('fragment', $uri->getFragment(), pht('fragment')); $this->assertEqual( 'http://user:pass@host:99/path/?query=value#fragment', (string)$uri, 'uri'); $uri = new PhutilURI('ssh://git@example.com/example/example.git'); $this->assertEqual('ssh', $uri->getProtocol(), pht('protocol')); $this->assertEqual('git', $uri->getUser(), pht('user')); $this->assertEqual('', $uri->getPass(), pht('password')); $this->assertEqual('example.com', $uri->getDomain(), pht('domain')); $this->assertEqual('', $uri->getPort(), 'port'); $this->assertEqual('/example/example.git', $uri->getPath(), pht('path')); $this->assertEqual( array(), $uri->getQueryParams(), pht('query parameters')); $this->assertEqual('', $uri->getFragment(), pht('fragment')); $this->assertEqual( 'ssh://git@example.com/example/example.git', (string)$uri, 'uri'); $uri = new PhutilURI('http://0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('http://0@domain.com/', (string)$uri); $uri = new PhutilURI('http://0:0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('0', $uri->getPass()); $this->assertEqual('http://0:0@domain.com/', (string)$uri); $uri = new PhutilURI('http://%20:%20@domain.com/'); $this->assertEqual(' ', $uri->getUser()); $this->assertEqual(' ', $uri->getPass()); $this->assertEqual('http://%20:%20@domain.com/', (string)$uri); $uri = new PhutilURI('http://%40:%40@domain.com/'); $this->assertEqual('@', $uri->getUser()); $this->assertEqual('@', $uri->getPass()); $this->assertEqual('http://%40:%40@domain.com/', (string)$uri); $uri = new PhutilURI('http://%2F:%2F@domain.com/'); $this->assertEqual('/', $uri->getUser()); $this->assertEqual('/', $uri->getPass()); $this->assertEqual('http://%2F:%2F@domain.com/', (string)$uri); // These tests are covering cases where cURL and parse_url() behavior // may differ in potentially dangerous ways. See T6755 for discussion. // In general, we defuse these attacks by emitting URIs which escape // special characters so that they are interpreted unambiguously by // cURL in the same way that parse_url() interpreted them. $uri = new PhutilURI('http://u:p@evil.com?@good.com'); $this->assertEqual('u', $uri->getUser()); $this->assertEqual('p', $uri->getPass()); $this->assertEqual('evil.com', $uri->getDomain()); $this->assertEqual('http://u:p@evil.com?%40good.com=', (string)$uri); $uri = new PhutilURI('http://good.com#u:p@evil.com/'); $this->assertEqual('good.com#u', $uri->getUser()); $this->assertEqual('p', $uri->getPass()); $this->assertEqual('evil.com', $uri->getDomain()); $this->assertEqual('http://good.com%23u:p@evil.com/', (string)$uri); $uri = new PhutilURI('http://good.com?u:p@evil.com/'); $this->assertEqual('', $uri->getUser()); $this->assertEqual('', $uri->getPass()); $this->assertEqual('good.com', $uri->getDomain()); $this->assertEqual('http://good.com?u%3Ap%40evil.com%2F=', (string)$uri); + $uri = new PhutilURI('www.example.com'); + $this->assertEqual('', $uri->getProtocol()); + $this->assertEqual('www.example.com', (string)$uri); } public function testURIGeneration() { $uri = new PhutilURI('http://example.com'); $uri->setPath('bar'); $this->assertEqual('http://example.com/bar', $uri->__toString()); } public function testStrictURIParsingOfHosts() { $uri = new PhutilURI('http://&/'); $this->assertEqual('', $uri->getDomain()); } public function testStrictURIParsingOfLeadingWhitespace() { $uri = new PhutilURI(' http://example.com/'); $this->assertEqual('', $uri->getDomain()); } public function testAppendPath() { $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('bar'); $this->assertEqual('http://example.com/foo/bar', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('/foo/'); $this->assertEqual('http://example.com/foo/', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); } public function testUnusualURIs() { $uri = new PhutilURI('file:///path/to/file'); $this->assertEqual('file', $uri->getProtocol(), pht('protocol')); $this->assertEqual('', $uri->getDomain(), pht('domain')); $this->assertEqual('/path/to/file', $uri->getPath(), pht('path')); $uri = new PhutilURI('idea://open?x=/'); $this->assertEqual('idea', $uri->getProtocol(), pht('protocol')); $this->assertEqual('open', $uri->getDomain(), pht('domain')); $this->assertEqual('', $uri->getPath(), pht('path')); $this->assertEqual( array( 'x' => '/', ), $uri->getQueryParams()); } + public function testAmbiguousURIs() { + // It's important that this be detected as a Javascript URI, because that + // is how browsers will treat it. + $uri = new PhutilURI('javascript:evil'); + $this->assertEqual('javascript', $uri->getProtocol()); + + + // This is "wrong", in that the user probably intends for this to be a + // Git-style URI, but we can not easily parse it as one without making the + // "javascript" case above unsafe. + $uri = new PhutilURI('localhost:todo.txt'); + $this->assertEqual('localhost', $uri->getProtocol()); + + + // These variants are unambiguous and safe. + $uri = new PhutilURI('localhost.com:todo.txt'); + $this->assertEqual('localhost.com', $uri->getDomain()); + + $uri = new PhutilURI('user@localhost:todo.txt'); + $this->assertEqual('localhost', $uri->getDomain()); + } + public function testDefaultPorts() { $uri = new PhutilURI('http://www.example.com'); $this->assertEqual('80', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('https://www.example.com'); $this->assertEqual('443', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('ssh://git@example.com/example/example.git'); $this->assertEqual('22', $uri->getPortWithProtocolDefault()); $uri = new PhutilURI('unknown://www.example.com'); $this->assertEqual('', $uri->getPortWithProtocolDefault()); } + public function testGitURIParsing() { + $uri = new PhutilURI('git@host.com:path/to/something'); + $this->assertEqual('ssh', $uri->getProtocol()); + $this->assertEqual('git', $uri->getUser()); + $this->assertEqual('host.com', $uri->getDomain()); + $this->assertEqual('path/to/something', $uri->getPath()); + $this->assertEqual('git@host.com:path/to/something', (string)$uri); + + $uri = new PhutilURI('host.com:path/to/something'); + $this->assertEqual('ssh', $uri->getProtocol()); + $this->assertEqual('', $uri->getUser()); + $this->assertEqual('host.com', $uri->getDomain()); + $this->assertEqual('path/to/something', $uri->getPath()); + $this->assertEqual('host.com:path/to/something', (string)$uri); + + $uri_1 = new PhutilURI('host.com:path/to/something'); + $uri_2 = new PhutilURI($uri_1); + + $this->assertEqual((string)$uri_1, (string)$uri_2); + } + + public function testStrictGitURIParsingOfLeadingWhitespace() { + $uri = new PhutilURI(' user@example.com:path'); + $this->assertEqual('', $uri->getDomain()); + } + + public function testNoRelativeURIPaths() { + $uri = new PhutilURI('user@example.com:relative_path'); + + $caught = null; + try { + $uri->setType(PhutilURI::TYPE_URI); + } catch (Exception $ex) { + $caught = $ex; + } + + $this->assertTrue($caught instanceof Exception); + } + }