diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php --- a/src/markup/__tests__/PhutilMarkupTestCase.php +++ b/src/markup/__tests__/PhutilMarkupTestCase.php @@ -164,19 +164,28 @@ } if ($use_uri) { - $href = new PhutilURI($href); + $href_value = new PhutilURI($href); + } else { + $href_value = $href; } $caught = null; try { - phutil_tag('a', array('href' => $href), 'click for candy'); + phutil_tag('a', array('href' => $href_value), 'click for candy'); } catch (Exception $ex) { $caught = $ex; } + + $desc = pht( + 'Unexpected result for "%s". ', + $href, + $use_uri ? pht('Yes') : pht('No'), + $expect ? pht('Yes') : pht('No')); + $this->assertEqual( $expect, $caught instanceof Exception, - pht('Rejected href: %s', $href)); + $desc); } } } diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php --- a/src/parser/PhutilURI.php +++ b/src/parser/PhutilURI.php @@ -1,7 +1,16 @@ protocol = $uri->protocol; + $this->user = $uri->user; + $this->pass = $uri->pass; + $this->domain = $uri->domain; + $this->port = $uri->port; + $this->path = $uri->path; + $this->query = $uri->query; + $this->fragment = $uri->fragment; + $this->type = $uri->type; + return; + } + $uri = (string)$uri; + $type = self::TYPE_URI; + $matches = null; if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) { // If the URI is something like `idea://open?file=/path/to/file`, the @@ -26,6 +54,25 @@ $parts = parse_url($matches[1].'/'.$matches[2]); unset($parts['path']); + } else if ($this->isGitURIPattern($uri)) { + // Handle Git/SCP URIs in the form "user@domain:relative/path". + + $user = '(?:(?P[^/@]+)@)?'; + $host = '(?P[^/:]+)'; + $path = ':(?P.*)'; + + $ok = preg_match('(^\s*'.$user.$host.$path.'\z)', $uri, $matches); + if (!$ok) { + throw new Exception( + pht( + 'Failed to parse URI "%s" as a Git URI.', + $uri)); + } + + $parts = $matches; + $parts['scheme'] = 'ssh'; + + $type = self::TYPE_GIT; } else { $parts = parse_url($uri); } @@ -39,7 +86,6 @@ } } - // NOTE: `parse_url()` is very liberal about host names; fail the parse if // the host looks like garbage. if ($parts) { @@ -56,35 +102,60 @@ // stringyness is to preserve API compatibility and // allow the tests to continue passing $this->protocol = idx($parts, 'scheme', ''); - $this->user = rawurldecode(idx($parts, 'user', '')); - $this->pass = rawurldecode(idx($parts, 'pass', '')); - $this->domain = idx($parts, 'host', ''); - $this->port = (string)idx($parts, 'port', ''); - $this->path = idx($parts, 'path', ''); + $this->user = rawurldecode(idx($parts, 'user', '')); + $this->pass = rawurldecode(idx($parts, 'pass', '')); + $this->domain = idx($parts, 'host', ''); + $this->port = (string)idx($parts, 'port', ''); + $this->path = idx($parts, 'path', ''); $query = idx($parts, 'query'); if ($query) { $this->query = id(new PhutilQueryStringParser())->parseQueryString( $query); } $this->fragment = idx($parts, 'fragment', ''); + + $this->type = $type; } public function __toString() { $prefix = null; - if ($this->protocol || $this->domain || $this->port) { - $protocol = nonempty($this->protocol, 'http'); - - $auth = ''; - if (strlen($this->user) && strlen($this->pass)) { - $auth = rawurlencode($this->user).':'. - rawurlencode($this->pass).'@'; - } else if (strlen($this->user)) { - $auth = rawurlencode($this->user).'@'; + + if ($this->isGitURI()) { + $port = null; + } else { + $port = $this->port; + } + + $domain = $this->domain; + + $user = $this->user; + $pass = $this->pass; + if (strlen($user) && strlen($pass)) { + $auth = rawurlencode($user).':'.rawurlencode($pass).'@'; + } else if (strlen($user)) { + $auth = rawurlencode($user).'@'; + } else { + $auth = null; + } + + $protocol = $this->protocol; + if ($this->isGitURI()) { + $protocol = null; + } else { + if (strlen($auth)) { + $protocol = nonempty($this->protocol, 'http'); + } + } + + if (strlen($protocol) || strlen($auth) || strlen($domain)) { + if ($this->isGitURI()) { + $prefix = "{$auth}{$domain}"; + } else { + $prefix = "{$protocol}://{$auth}{$domain}"; } - $prefix = $protocol.'://'.$auth.$this->domain; - if ($this->port) { - $prefix .= ':'.$this->port; + if (strlen($port)) { + $prefix .= ':'.$port; } } @@ -100,8 +171,14 @@ $fragment = null; } + $path = $this->getPath(); + if ($this->isGitURI()) { + if (strlen($path)) { + $path = ':'.$path; + } + } - return $prefix.$this->getPath().$query.$fragment; + return $prefix.$path.$query.$fragment; } public function setQueryParam($key, $value) { @@ -126,6 +203,7 @@ $this->protocol = $protocol; return $this; } + public function getProtocol() { return $this->protocol; } @@ -161,9 +239,14 @@ } public function setPath($path) { - if ($this->domain && strlen($path) && $path[0] !== '/') { - $path = '/'.$path; + if ($this->isGitURI()) { + // Git URIs use relative paths which do not need to begin with "/". + } else { + if ($this->domain && strlen($path) && $path[0] !== '/') { + $path = '/'.$path; + } } + $this->path = $path; return $this; } @@ -221,4 +304,57 @@ return $altered; } + public function isGitURI() { + return ($this->type == self::TYPE_GIT); + } + + public function setType($type) { + + if ($type == self::TYPE_URI) { + $path = $this->getPath(); + if (strlen($path) && ($path[0] !== '/')) { + // Try to catch this here because we are not allowed to throw from + // inside __toString() so we don't have a reasonable opportunity to + // react properly if we catch it later. + throw new Exception( + pht( + 'Unable to convert URI "%s" into a standard URI because the '. + 'path is relative. Standard URIs can not represent relative '. + 'paths.', + $this)); + } + } + + $this->type = $type; + return $this; + } + + public function getType() { + return $this->type; + } + + private function isGitURIPattern($uri) { + $matches = null; + + $ok = preg_match('(^(?P[^/]+):(?P(?!//).*)\z)', $uri, $matches); + if (!$ok) { + return false; + } + + $head = $matches['head']; + $last = $matches['last']; + + // If the first part has a "." or an "@" in it, interpret it as a domain + // or a "user@host" string. + if (preg_match('([.@])', $head)) { + return true; + } + + // Otherwise, interpret the URI conservatively as a "javascript:"-style + // URI. This means that "localhost:path" is parsed as a normal URI instead + // of a Git URI, but we can't tell which the user intends and it's safer + // to treat it as a normal URI. + return false; + } + } diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php --- a/src/parser/__tests__/PhutilURITestCase.php +++ b/src/parser/__tests__/PhutilURITestCase.php @@ -95,6 +95,9 @@ $this->assertEqual('good.com', $uri->getDomain()); $this->assertEqual('http://good.com?u%3Ap%40evil.com%2F=', (string)$uri); + $uri = new PhutilURI('www.example.com'); + $this->assertEqual('', $uri->getProtocol()); + $this->assertEqual('www.example.com', (string)$uri); } public function testURIGeneration() { @@ -150,6 +153,28 @@ $uri->getQueryParams()); } + public function testAmbiguousURIs() { + // It's important that this be detected as a Javascript URI, because that + // is how browsers will treat it. + $uri = new PhutilURI('javascript:evil'); + $this->assertEqual('javascript', $uri->getProtocol()); + + + // This is "wrong", in that the user probably intends for this to be a + // Git-style URI, but we can not easily parse it as one without making the + // "javascript" case above unsafe. + $uri = new PhutilURI('localhost:todo.txt'); + $this->assertEqual('localhost', $uri->getProtocol()); + + + // These variants are unambiguous and safe. + $uri = new PhutilURI('localhost.com:todo.txt'); + $this->assertEqual('localhost.com', $uri->getDomain()); + + $uri = new PhutilURI('user@localhost:todo.txt'); + $this->assertEqual('localhost', $uri->getDomain()); + } + public function testDefaultPorts() { $uri = new PhutilURI('http://www.example.com'); $this->assertEqual('80', $uri->getPortWithProtocolDefault()); @@ -164,4 +189,43 @@ $this->assertEqual('', $uri->getPortWithProtocolDefault()); } + public function testGitURIParsing() { + $uri = new PhutilURI('git@host.com:path/to/something'); + $this->assertEqual('ssh', $uri->getProtocol()); + $this->assertEqual('git', $uri->getUser()); + $this->assertEqual('host.com', $uri->getDomain()); + $this->assertEqual('path/to/something', $uri->getPath()); + $this->assertEqual('git@host.com:path/to/something', (string)$uri); + + $uri = new PhutilURI('host.com:path/to/something'); + $this->assertEqual('ssh', $uri->getProtocol()); + $this->assertEqual('', $uri->getUser()); + $this->assertEqual('host.com', $uri->getDomain()); + $this->assertEqual('path/to/something', $uri->getPath()); + $this->assertEqual('host.com:path/to/something', (string)$uri); + + $uri_1 = new PhutilURI('host.com:path/to/something'); + $uri_2 = new PhutilURI($uri_1); + + $this->assertEqual((string)$uri_1, (string)$uri_2); + } + + public function testStrictGitURIParsingOfLeadingWhitespace() { + $uri = new PhutilURI(' user@example.com:path'); + $this->assertEqual('', $uri->getDomain()); + } + + public function testNoRelativeURIPaths() { + $uri = new PhutilURI('user@example.com:relative_path'); + + $caught = null; + try { + $uri->setType(PhutilURI::TYPE_URI); + } catch (Exception $ex) { + $caught = $ex; + } + + $this->assertTrue($caught instanceof Exception); + } + }