Page MenuHomePhabricator

D16099.id38739.diff
No OneTemporary

D16099.id38739.diff

diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php
--- a/src/markup/__tests__/PhutilMarkupTestCase.php
+++ b/src/markup/__tests__/PhutilMarkupTestCase.php
@@ -164,19 +164,28 @@
}
if ($use_uri) {
- $href = new PhutilURI($href);
+ $href_value = new PhutilURI($href);
+ } else {
+ $href_value = $href;
}
$caught = null;
try {
- phutil_tag('a', array('href' => $href), 'click for candy');
+ phutil_tag('a', array('href' => $href_value), 'click for candy');
} catch (Exception $ex) {
$caught = $ex;
}
+
+ $desc = pht(
+ 'Unexpected result for "%s". <uri = %s, expect exception = %s>',
+ $href,
+ $use_uri ? pht('Yes') : pht('No'),
+ $expect ? pht('Yes') : pht('No'));
+
$this->assertEqual(
$expect,
$caught instanceof Exception,
- pht('Rejected href: %s', $href));
+ $desc);
}
}
}
diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php
--- a/src/parser/PhutilURI.php
+++ b/src/parser/PhutilURI.php
@@ -1,7 +1,16 @@
<?php
/**
- * Basic URI parser object.
+ * Structural representation of a URI.
+ *
+ * This class handles URIs of two types: standard URIs and Git URIs.
+ *
+ * Standard URIs look like `proto://user:pass@domain:port/path?query#fragment`.
+ * Almost all URIs are in this form.
+ *
+ * Git URIs look like `user@host:path`. These URIs are used by Git and SCP
+ * and have an implicit "ssh" protocol, no port, and interpret paths as
+ * relative instead of absolute.
*/
final class PhutilURI extends Phobject {
@@ -13,10 +22,29 @@
private $path;
private $query = array();
private $fragment;
+ private $type;
+
+ const TYPE_URI = 'uri';
+ const TYPE_GIT = 'git';
public function __construct($uri) {
+ if ($uri instanceof PhutilURI) {
+ $this->protocol = $uri->protocol;
+ $this->user = $uri->user;
+ $this->pass = $uri->pass;
+ $this->domain = $uri->domain;
+ $this->port = $uri->port;
+ $this->path = $uri->path;
+ $this->query = $uri->query;
+ $this->fragment = $uri->fragment;
+ $this->type = $uri->type;
+ return;
+ }
+
$uri = (string)$uri;
+ $type = self::TYPE_URI;
+
$matches = null;
if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) {
// If the URI is something like `idea://open?file=/path/to/file`, the
@@ -26,6 +54,25 @@
$parts = parse_url($matches[1].'/'.$matches[2]);
unset($parts['path']);
+ } else if ($this->isGitURIPattern($uri)) {
+ // Handle Git/SCP URIs in the form "user@domain:relative/path".
+
+ $user = '(?:(?P<user>[^/@]+)@)?';
+ $host = '(?P<host>[^/:]+)';
+ $path = ':(?P<path>.*)';
+
+ $ok = preg_match('(^\s*'.$user.$host.$path.'\z)', $uri, $matches);
+ if (!$ok) {
+ throw new Exception(
+ pht(
+ 'Failed to parse URI "%s" as a Git URI.',
+ $uri));
+ }
+
+ $parts = $matches;
+ $parts['scheme'] = 'ssh';
+
+ $type = self::TYPE_GIT;
} else {
$parts = parse_url($uri);
}
@@ -39,7 +86,6 @@
}
}
-
// NOTE: `parse_url()` is very liberal about host names; fail the parse if
// the host looks like garbage.
if ($parts) {
@@ -56,35 +102,60 @@
// stringyness is to preserve API compatibility and
// allow the tests to continue passing
$this->protocol = idx($parts, 'scheme', '');
- $this->user = rawurldecode(idx($parts, 'user', ''));
- $this->pass = rawurldecode(idx($parts, 'pass', ''));
- $this->domain = idx($parts, 'host', '');
- $this->port = (string)idx($parts, 'port', '');
- $this->path = idx($parts, 'path', '');
+ $this->user = rawurldecode(idx($parts, 'user', ''));
+ $this->pass = rawurldecode(idx($parts, 'pass', ''));
+ $this->domain = idx($parts, 'host', '');
+ $this->port = (string)idx($parts, 'port', '');
+ $this->path = idx($parts, 'path', '');
$query = idx($parts, 'query');
if ($query) {
$this->query = id(new PhutilQueryStringParser())->parseQueryString(
$query);
}
$this->fragment = idx($parts, 'fragment', '');
+
+ $this->type = $type;
}
public function __toString() {
$prefix = null;
- if ($this->protocol || $this->domain || $this->port) {
- $protocol = nonempty($this->protocol, 'http');
-
- $auth = '';
- if (strlen($this->user) && strlen($this->pass)) {
- $auth = rawurlencode($this->user).':'.
- rawurlencode($this->pass).'@';
- } else if (strlen($this->user)) {
- $auth = rawurlencode($this->user).'@';
+
+ if ($this->isGitURI()) {
+ $port = null;
+ } else {
+ $port = $this->port;
+ }
+
+ $domain = $this->domain;
+
+ $user = $this->user;
+ $pass = $this->pass;
+ if (strlen($user) && strlen($pass)) {
+ $auth = rawurlencode($user).':'.rawurlencode($pass).'@';
+ } else if (strlen($user)) {
+ $auth = rawurlencode($user).'@';
+ } else {
+ $auth = null;
+ }
+
+ $protocol = $this->protocol;
+ if ($this->isGitURI()) {
+ $protocol = null;
+ } else {
+ if (strlen($auth)) {
+ $protocol = nonempty($this->protocol, 'http');
+ }
+ }
+
+ if (strlen($protocol) || strlen($auth) || strlen($domain)) {
+ if ($this->isGitURI()) {
+ $prefix = "{$auth}{$domain}";
+ } else {
+ $prefix = "{$protocol}://{$auth}{$domain}";
}
- $prefix = $protocol.'://'.$auth.$this->domain;
- if ($this->port) {
- $prefix .= ':'.$this->port;
+ if (strlen($port)) {
+ $prefix .= ':'.$port;
}
}
@@ -100,8 +171,14 @@
$fragment = null;
}
+ $path = $this->getPath();
+ if ($this->isGitURI()) {
+ if (strlen($path)) {
+ $path = ':'.$path;
+ }
+ }
- return $prefix.$this->getPath().$query.$fragment;
+ return $prefix.$path.$query.$fragment;
}
public function setQueryParam($key, $value) {
@@ -126,6 +203,7 @@
$this->protocol = $protocol;
return $this;
}
+
public function getProtocol() {
return $this->protocol;
}
@@ -161,9 +239,14 @@
}
public function setPath($path) {
- if ($this->domain && strlen($path) && $path[0] !== '/') {
- $path = '/'.$path;
+ if ($this->isGitURI()) {
+ // Git URIs use relative paths which do not need to begin with "/".
+ } else {
+ if ($this->domain && strlen($path) && $path[0] !== '/') {
+ $path = '/'.$path;
+ }
}
+
$this->path = $path;
return $this;
}
@@ -221,4 +304,57 @@
return $altered;
}
+ public function isGitURI() {
+ return ($this->type == self::TYPE_GIT);
+ }
+
+ public function setType($type) {
+
+ if ($type == self::TYPE_URI) {
+ $path = $this->getPath();
+ if (strlen($path) && ($path[0] !== '/')) {
+ // Try to catch this here because we are not allowed to throw from
+ // inside __toString() so we don't have a reasonable opportunity to
+ // react properly if we catch it later.
+ throw new Exception(
+ pht(
+ 'Unable to convert URI "%s" into a standard URI because the '.
+ 'path is relative. Standard URIs can not represent relative '.
+ 'paths.',
+ $this));
+ }
+ }
+
+ $this->type = $type;
+ return $this;
+ }
+
+ public function getType() {
+ return $this->type;
+ }
+
+ private function isGitURIPattern($uri) {
+ $matches = null;
+
+ $ok = preg_match('(^(?P<head>[^/]+):(?P<last>(?!//).*)\z)', $uri, $matches);
+ if (!$ok) {
+ return false;
+ }
+
+ $head = $matches['head'];
+ $last = $matches['last'];
+
+ // If the first part has a "." or an "@" in it, interpret it as a domain
+ // or a "user@host" string.
+ if (preg_match('([.@])', $head)) {
+ return true;
+ }
+
+ // Otherwise, interpret the URI conservatively as a "javascript:"-style
+ // URI. This means that "localhost:path" is parsed as a normal URI instead
+ // of a Git URI, but we can't tell which the user intends and it's safer
+ // to treat it as a normal URI.
+ return false;
+ }
+
}
diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php
--- a/src/parser/__tests__/PhutilURITestCase.php
+++ b/src/parser/__tests__/PhutilURITestCase.php
@@ -95,6 +95,9 @@
$this->assertEqual('good.com', $uri->getDomain());
$this->assertEqual('http://good.com?u%3Ap%40evil.com%2F=', (string)$uri);
+ $uri = new PhutilURI('www.example.com');
+ $this->assertEqual('', $uri->getProtocol());
+ $this->assertEqual('www.example.com', (string)$uri);
}
public function testURIGeneration() {
@@ -150,6 +153,28 @@
$uri->getQueryParams());
}
+ public function testAmbiguousURIs() {
+ // It's important that this be detected as a Javascript URI, because that
+ // is how browsers will treat it.
+ $uri = new PhutilURI('javascript:evil');
+ $this->assertEqual('javascript', $uri->getProtocol());
+
+
+ // This is "wrong", in that the user probably intends for this to be a
+ // Git-style URI, but we can not easily parse it as one without making the
+ // "javascript" case above unsafe.
+ $uri = new PhutilURI('localhost:todo.txt');
+ $this->assertEqual('localhost', $uri->getProtocol());
+
+
+ // These variants are unambiguous and safe.
+ $uri = new PhutilURI('localhost.com:todo.txt');
+ $this->assertEqual('localhost.com', $uri->getDomain());
+
+ $uri = new PhutilURI('user@localhost:todo.txt');
+ $this->assertEqual('localhost', $uri->getDomain());
+ }
+
public function testDefaultPorts() {
$uri = new PhutilURI('http://www.example.com');
$this->assertEqual('80', $uri->getPortWithProtocolDefault());
@@ -164,4 +189,43 @@
$this->assertEqual('', $uri->getPortWithProtocolDefault());
}
+ public function testGitURIParsing() {
+ $uri = new PhutilURI('git@host.com:path/to/something');
+ $this->assertEqual('ssh', $uri->getProtocol());
+ $this->assertEqual('git', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('git@host.com:path/to/something', (string)$uri);
+
+ $uri = new PhutilURI('host.com:path/to/something');
+ $this->assertEqual('ssh', $uri->getProtocol());
+ $this->assertEqual('', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('host.com:path/to/something', (string)$uri);
+
+ $uri_1 = new PhutilURI('host.com:path/to/something');
+ $uri_2 = new PhutilURI($uri_1);
+
+ $this->assertEqual((string)$uri_1, (string)$uri_2);
+ }
+
+ public function testStrictGitURIParsingOfLeadingWhitespace() {
+ $uri = new PhutilURI(' user@example.com:path');
+ $this->assertEqual('', $uri->getDomain());
+ }
+
+ public function testNoRelativeURIPaths() {
+ $uri = new PhutilURI('user@example.com:relative_path');
+
+ $caught = null;
+ try {
+ $uri->setType(PhutilURI::TYPE_URI);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
}

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 29, 10:33 AM (1 w, 3 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6732614
Default Alt Text
D16099.id38739.diff (11 KB)

Event Timeline