diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php
index 073ee7e..df6620b 100644
--- a/src/markup/__tests__/PhutilMarkupTestCase.php
+++ b/src/markup/__tests__/PhutilMarkupTestCase.php
@@ -1,265 +1,274 @@
assertEqual(
(string)phutil_tag('br'),
(string)phutil_tag('br', array()));
$this->assertEqual(
(string)phutil_tag('br', array()),
(string)phutil_tag('br', array(), null));
}
public function testTagEmpty() {
$this->assertEqual(
'
',
(string)phutil_tag('br', array(), null));
$this->assertEqual(
'
',
(string)phutil_tag('div', array(), null));
$this->assertEqual(
'',
(string)phutil_tag('div', array(), ''));
}
public function testTagBasics() {
$this->assertEqual(
'
',
(string)phutil_tag('br'));
$this->assertEqual(
'y
',
(string)phutil_tag('div', array(), 'y'));
}
public function testTagAttributes() {
$this->assertEqual(
'y
',
(string)phutil_tag('div', array('u' => 'v'), 'y'));
$this->assertEqual(
'
',
(string)phutil_tag('br', array('u' => 'v')));
}
public function testTagEscapes() {
$this->assertEqual(
'
',
(string)phutil_tag('br', array('u' => '<')));
$this->assertEqual(
'
',
(string)phutil_tag('div', array(), phutil_tag('br')));
}
public function testTagNullAttribute() {
$this->assertEqual(
'
',
(string)phutil_tag('br', array('y' => null)));
}
public function testDefaultRelNoreferrer() {
$map = array(
// These should not have rel="nofollow" inserted implicitly.
'/' => false,
'/path/to/local.html' => false,
'#example' => false,
'' => false,
// These should get the implicit insertion.
'http://www.example.org/' => true,
'///evil.com/' => true,
' http://www.example.org/' => true,
'ftp://filez.com' => true,
'mailto:santa@northpole.com' => true,
'tel:18005555555' => true,
);
foreach ($map as $input => $expect) {
$tag = phutil_tag(
'a',
array(
'href' => $input,
),
'link');
$tag = (string)$tag;
$this->assertEqual($expect, (bool)preg_match('/noreferrer/', $tag));
}
// With an explicit `rel` present, we should not override it.
$tag = phutil_tag(
'a',
array(
'href' => 'http://www.example.org/',
'rel' => 'nofollow',
),
'link');
$this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag));
// For tags other than `a`, we should not insert `rel`.
$tag = phutil_tag(
'link',
array(
'href' => 'http://www.example.org/',
),
'link');
$this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag));
}
public function testTagJavascriptProtocolRejection() {
$hrefs = array(
'javascript:alert(1)' => true,
'JAVASCRIPT:alert(2)' => true,
// NOTE: When interpreted as a URI, this is dropped because of leading
// whitespace.
' javascript:alert(3)' => array(true, false),
'/' => false,
'/path/to/stuff/' => false,
'' => false,
'http://example.com/' => false,
'#' => false,
'javascript://anything' => true,
// Chrome 33 and IE11, at a minimum, treat this as Javascript.
"javascript\n:alert(4)" => true,
// Opera currently accepts a variety of unicode spaces. This test case
// has a smattering of them.
"\xE2\x80\x89javascript:" => true,
"javascript\xE2\x80\x89:" => true,
"\xE2\x80\x84javascript:" => true,
"javascript\xE2\x80\x84:" => true,
// Because we're aggressive, all of unicode should trigger detection
// by default.
"\xE2\x98\x83javascript:" => true,
"javascript\xE2\x98\x83:" => true,
"\xE2\x98\x83javascript\xE2\x98\x83:" => true,
// We're aggressive about this, so we'll intentionally raise false
// positives in these cases.
'javascript~:alert(5)' => true,
'!!!javascript!!!!:alert(6)' => true,
// However, we should raise true negatives in these slightly more
// reasonable cases.
'javascript/:docs.html' => false,
'javascripts:x.png' => false,
'COOLjavascript:page' => false,
'/javascript:alert(1)' => false,
);
foreach (array(true, false) as $use_uri) {
foreach ($hrefs as $href => $expect) {
if (is_array($expect)) {
$expect = ($use_uri ? $expect[1] : $expect[0]);
}
if ($use_uri) {
- $href = new PhutilURI($href);
+ $href_value = new PhutilURI($href);
+ } else {
+ $href_value = $href;
}
$caught = null;
try {
- phutil_tag('a', array('href' => $href), 'click for candy');
+ phutil_tag('a', array('href' => $href_value), 'click for candy');
} catch (Exception $ex) {
$caught = $ex;
}
+
+ $desc = pht(
+ 'Unexpected result for "%s". ',
+ $href,
+ $use_uri ? pht('Yes') : pht('No'),
+ $expect ? pht('Yes') : pht('No'));
+
$this->assertEqual(
$expect,
$caught instanceof Exception,
- pht('Rejected href: %s', $href));
+ $desc);
}
}
}
public function testURIEscape() {
$this->assertEqual(
'%2B/%20%3F%23%26%3A%21xyz%25',
phutil_escape_uri('+/ ?#&:!xyz%'));
}
public function testURIPathComponentEscape() {
$this->assertEqual(
'a%252Fb',
phutil_escape_uri_path_component('a/b'));
$str = '';
for ($ii = 0; $ii <= 255; $ii++) {
$str .= chr($ii);
}
$this->assertEqual(
$str,
phutil_unescape_uri_path_component(
rawurldecode( // Simulates webserver.
phutil_escape_uri_path_component($str))));
}
public function testHsprintf() {
$this->assertEqual(
'<3
',
(string)hsprintf('%s
', '<3'));
}
public function testAppendHTML() {
$html = phutil_tag('hr');
$html->appendHTML(phutil_tag('br'), '');
$this->assertEqual('
<evil>', $html->getHTMLContent());
}
public function testArrayEscaping() {
$this->assertEqual(
'<div>
',
phutil_escape_html(
array(
hsprintf(''),
array(
array(
'<',
array(
'd',
array(
array(
hsprintf('i'),
),
'v',
),
),
array(
array(
'>',
),
),
),
),
hsprintf('
'),
)));
$this->assertEqual(
'
',
phutil_tag(
'div',
array(),
array(
array(
array(
phutil_tag('br'),
array(
phutil_tag('hr'),
),
phutil_tag('wbr'),
),
),
))->getHTMLContent());
}
}
diff --git a/src/parser/PhutilURI.php b/src/parser/PhutilURI.php
index f9612fd..abca039 100644
--- a/src/parser/PhutilURI.php
+++ b/src/parser/PhutilURI.php
@@ -1,224 +1,360 @@
protocol = $uri->protocol;
+ $this->user = $uri->user;
+ $this->pass = $uri->pass;
+ $this->domain = $uri->domain;
+ $this->port = $uri->port;
+ $this->path = $uri->path;
+ $this->query = $uri->query;
+ $this->fragment = $uri->fragment;
+ $this->type = $uri->type;
+ return;
+ }
+
$uri = (string)$uri;
+ $type = self::TYPE_URI;
+
$matches = null;
if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) {
// If the URI is something like `idea://open?file=/path/to/file`, the
// `parse_url()` function will parse `open?file=` as the host. This is
// not the expected result. Break the URI into two pieces, stick a slash
// in between them, parse that, then remove the path. See T6106.
$parts = parse_url($matches[1].'/'.$matches[2]);
unset($parts['path']);
+ } else if ($this->isGitURIPattern($uri)) {
+ // Handle Git/SCP URIs in the form "user@domain:relative/path".
+
+ $user = '(?:(?P[^/@]+)@)?';
+ $host = '(?P[^/:]+)';
+ $path = ':(?P.*)';
+
+ $ok = preg_match('(^\s*'.$user.$host.$path.'\z)', $uri, $matches);
+ if (!$ok) {
+ throw new Exception(
+ pht(
+ 'Failed to parse URI "%s" as a Git URI.',
+ $uri));
+ }
+
+ $parts = $matches;
+ $parts['scheme'] = 'ssh';
+
+ $type = self::TYPE_GIT;
} else {
$parts = parse_url($uri);
}
// The parse_url() call will accept URIs with leading whitespace, but many
// other tools (like git) will not. See T4913 for a specific example. If
// the input string has leading whitespace, fail the parse.
if ($parts) {
if (ltrim($uri) != $uri) {
$parts = false;
}
}
-
// NOTE: `parse_url()` is very liberal about host names; fail the parse if
// the host looks like garbage.
if ($parts) {
$host = idx($parts, 'host', '');
if (!preg_match('/^([a-zA-Z0-9\\.\\-]*)$/', $host)) {
$parts = false;
}
}
if (!$parts) {
$parts = array();
}
// stringyness is to preserve API compatibility and
// allow the tests to continue passing
$this->protocol = idx($parts, 'scheme', '');
- $this->user = rawurldecode(idx($parts, 'user', ''));
- $this->pass = rawurldecode(idx($parts, 'pass', ''));
- $this->domain = idx($parts, 'host', '');
- $this->port = (string)idx($parts, 'port', '');
- $this->path = idx($parts, 'path', '');
+ $this->user = rawurldecode(idx($parts, 'user', ''));
+ $this->pass = rawurldecode(idx($parts, 'pass', ''));
+ $this->domain = idx($parts, 'host', '');
+ $this->port = (string)idx($parts, 'port', '');
+ $this->path = idx($parts, 'path', '');
$query = idx($parts, 'query');
if ($query) {
$this->query = id(new PhutilQueryStringParser())->parseQueryString(
$query);
}
$this->fragment = idx($parts, 'fragment', '');
+
+ $this->type = $type;
}
public function __toString() {
$prefix = null;
- if ($this->protocol || $this->domain || $this->port) {
- $protocol = nonempty($this->protocol, 'http');
-
- $auth = '';
- if (strlen($this->user) && strlen($this->pass)) {
- $auth = rawurlencode($this->user).':'.
- rawurlencode($this->pass).'@';
- } else if (strlen($this->user)) {
- $auth = rawurlencode($this->user).'@';
+
+ if ($this->isGitURI()) {
+ $port = null;
+ } else {
+ $port = $this->port;
+ }
+
+ $domain = $this->domain;
+
+ $user = $this->user;
+ $pass = $this->pass;
+ if (strlen($user) && strlen($pass)) {
+ $auth = rawurlencode($user).':'.rawurlencode($pass).'@';
+ } else if (strlen($user)) {
+ $auth = rawurlencode($user).'@';
+ } else {
+ $auth = null;
+ }
+
+ $protocol = $this->protocol;
+ if ($this->isGitURI()) {
+ $protocol = null;
+ } else {
+ if (strlen($auth)) {
+ $protocol = nonempty($this->protocol, 'http');
+ }
+ }
+
+ if (strlen($protocol) || strlen($auth) || strlen($domain)) {
+ if ($this->isGitURI()) {
+ $prefix = "{$auth}{$domain}";
+ } else {
+ $prefix = "{$protocol}://{$auth}{$domain}";
}
- $prefix = $protocol.'://'.$auth.$this->domain;
- if ($this->port) {
- $prefix .= ':'.$this->port;
+ if (strlen($port)) {
+ $prefix .= ':'.$port;
}
}
if ($this->query) {
$query = '?'.http_build_query($this->query, '', '&');
} else {
$query = null;
}
if (strlen($this->getFragment())) {
$fragment = '#'.$this->getFragment();
} else {
$fragment = null;
}
+ $path = $this->getPath();
+ if ($this->isGitURI()) {
+ if (strlen($path)) {
+ $path = ':'.$path;
+ }
+ }
- return $prefix.$this->getPath().$query.$fragment;
+ return $prefix.$path.$query.$fragment;
}
public function setQueryParam($key, $value) {
if ($value === null) {
unset($this->query[$key]);
} else {
$this->query[$key] = $value;
}
return $this;
}
public function setQueryParams(array $params) {
$this->query = $params;
return $this;
}
public function getQueryParams() {
return $this->query;
}
public function setProtocol($protocol) {
$this->protocol = $protocol;
return $this;
}
+
public function getProtocol() {
return $this->protocol;
}
public function setDomain($domain) {
$this->domain = $domain;
return $this;
}
public function getDomain() {
return $this->domain;
}
public function setPort($port) {
$this->port = $port;
return $this;
}
public function getPort() {
return $this->port;
}
public function getPortWithProtocolDefault() {
static $default_ports = array(
'http' => '80',
'https' => '443',
'ssh' => '22',
);
return nonempty(
$this->getPort(),
idx($default_ports, $this->getProtocol()),
'');
}
public function setPath($path) {
- if ($this->domain && strlen($path) && $path[0] !== '/') {
- $path = '/'.$path;
+ if ($this->isGitURI()) {
+ // Git URIs use relative paths which do not need to begin with "/".
+ } else {
+ if ($this->domain && strlen($path) && $path[0] !== '/') {
+ $path = '/'.$path;
+ }
}
+
$this->path = $path;
return $this;
}
public function appendPath($path) {
$first = strlen($path) ? $path[0] : null;
$last = strlen($this->path) ? $this->path[strlen($this->path) - 1] : null;
if (!$this->path) {
return $this->setPath($path);
} else if ($first === '/' && $last === '/') {
$path = substr($path, 1);
} else if ($first !== '/' && $last !== '/') {
$path = '/'.$path;
}
$this->path .= $path;
return $this;
}
public function getPath() {
return $this->path;
}
public function setFragment($fragment) {
$this->fragment = $fragment;
return $this;
}
public function getFragment() {
return $this->fragment;
}
public function setUser($user) {
$this->user = $user;
return $this;
}
public function getUser() {
return $this->user;
}
public function setPass($pass) {
$this->pass = $pass;
return $this;
}
public function getPass() {
return $this->pass;
}
public function alter($key, $value) {
$altered = clone $this;
$altered->setQueryParam($key, $value);
return $altered;
}
+ public function isGitURI() {
+ return ($this->type == self::TYPE_GIT);
+ }
+
+ public function setType($type) {
+
+ if ($type == self::TYPE_URI) {
+ $path = $this->getPath();
+ if (strlen($path) && ($path[0] !== '/')) {
+ // Try to catch this here because we are not allowed to throw from
+ // inside __toString() so we don't have a reasonable opportunity to
+ // react properly if we catch it later.
+ throw new Exception(
+ pht(
+ 'Unable to convert URI "%s" into a standard URI because the '.
+ 'path is relative. Standard URIs can not represent relative '.
+ 'paths.',
+ $this));
+ }
+ }
+
+ $this->type = $type;
+ return $this;
+ }
+
+ public function getType() {
+ return $this->type;
+ }
+
+ private function isGitURIPattern($uri) {
+ $matches = null;
+
+ $ok = preg_match('(^(?P[^/]+):(?P(?!//).*)\z)', $uri, $matches);
+ if (!$ok) {
+ return false;
+ }
+
+ $head = $matches['head'];
+ $last = $matches['last'];
+
+ // If the first part has a "." or an "@" in it, interpret it as a domain
+ // or a "user@host" string.
+ if (preg_match('([.@])', $head)) {
+ return true;
+ }
+
+ // Otherwise, interpret the URI conservatively as a "javascript:"-style
+ // URI. This means that "localhost:path" is parsed as a normal URI instead
+ // of a Git URI, but we can't tell which the user intends and it's safer
+ // to treat it as a normal URI.
+ return false;
+ }
+
}
diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php
index 57b68f8..60d837d 100644
--- a/src/parser/__tests__/PhutilURITestCase.php
+++ b/src/parser/__tests__/PhutilURITestCase.php
@@ -1,167 +1,231 @@
assertEqual('http', $uri->getProtocol(), pht('protocol'));
$this->assertEqual('user', $uri->getUser(), pht('user'));
$this->assertEqual('pass', $uri->getPass(), pht('password'));
$this->assertEqual('host', $uri->getDomain(), pht('domain'));
$this->assertEqual('99', $uri->getPort(), pht('port'));
$this->assertEqual('/path/', $uri->getPath(), pht('path'));
$this->assertEqual(
array(
'query' => 'value',
),
$uri->getQueryParams(),
'query params');
$this->assertEqual('fragment', $uri->getFragment(), pht('fragment'));
$this->assertEqual(
'http://user:pass@host:99/path/?query=value#fragment',
(string)$uri,
'uri');
$uri = new PhutilURI('ssh://git@example.com/example/example.git');
$this->assertEqual('ssh', $uri->getProtocol(), pht('protocol'));
$this->assertEqual('git', $uri->getUser(), pht('user'));
$this->assertEqual('', $uri->getPass(), pht('password'));
$this->assertEqual('example.com', $uri->getDomain(), pht('domain'));
$this->assertEqual('', $uri->getPort(), 'port');
$this->assertEqual('/example/example.git', $uri->getPath(), pht('path'));
$this->assertEqual(
array(),
$uri->getQueryParams(),
pht('query parameters'));
$this->assertEqual('', $uri->getFragment(), pht('fragment'));
$this->assertEqual(
'ssh://git@example.com/example/example.git',
(string)$uri,
'uri');
$uri = new PhutilURI('http://0@domain.com/');
$this->assertEqual('0', $uri->getUser());
$this->assertEqual('http://0@domain.com/', (string)$uri);
$uri = new PhutilURI('http://0:0@domain.com/');
$this->assertEqual('0', $uri->getUser());
$this->assertEqual('0', $uri->getPass());
$this->assertEqual('http://0:0@domain.com/', (string)$uri);
$uri = new PhutilURI('http://%20:%20@domain.com/');
$this->assertEqual(' ', $uri->getUser());
$this->assertEqual(' ', $uri->getPass());
$this->assertEqual('http://%20:%20@domain.com/', (string)$uri);
$uri = new PhutilURI('http://%40:%40@domain.com/');
$this->assertEqual('@', $uri->getUser());
$this->assertEqual('@', $uri->getPass());
$this->assertEqual('http://%40:%40@domain.com/', (string)$uri);
$uri = new PhutilURI('http://%2F:%2F@domain.com/');
$this->assertEqual('/', $uri->getUser());
$this->assertEqual('/', $uri->getPass());
$this->assertEqual('http://%2F:%2F@domain.com/', (string)$uri);
// These tests are covering cases where cURL and parse_url() behavior
// may differ in potentially dangerous ways. See T6755 for discussion.
// In general, we defuse these attacks by emitting URIs which escape
// special characters so that they are interpreted unambiguously by
// cURL in the same way that parse_url() interpreted them.
$uri = new PhutilURI('http://u:p@evil.com?@good.com');
$this->assertEqual('u', $uri->getUser());
$this->assertEqual('p', $uri->getPass());
$this->assertEqual('evil.com', $uri->getDomain());
$this->assertEqual('http://u:p@evil.com?%40good.com=', (string)$uri);
$uri = new PhutilURI('http://good.com#u:p@evil.com/');
$this->assertEqual('good.com#u', $uri->getUser());
$this->assertEqual('p', $uri->getPass());
$this->assertEqual('evil.com', $uri->getDomain());
$this->assertEqual('http://good.com%23u:p@evil.com/', (string)$uri);
$uri = new PhutilURI('http://good.com?u:p@evil.com/');
$this->assertEqual('', $uri->getUser());
$this->assertEqual('', $uri->getPass());
$this->assertEqual('good.com', $uri->getDomain());
$this->assertEqual('http://good.com?u%3Ap%40evil.com%2F=', (string)$uri);
+ $uri = new PhutilURI('www.example.com');
+ $this->assertEqual('', $uri->getProtocol());
+ $this->assertEqual('www.example.com', (string)$uri);
}
public function testURIGeneration() {
$uri = new PhutilURI('http://example.com');
$uri->setPath('bar');
$this->assertEqual('http://example.com/bar', $uri->__toString());
}
public function testStrictURIParsingOfHosts() {
$uri = new PhutilURI('http://&/');
$this->assertEqual('', $uri->getDomain());
}
public function testStrictURIParsingOfLeadingWhitespace() {
$uri = new PhutilURI(' http://example.com/');
$this->assertEqual('', $uri->getDomain());
}
public function testAppendPath() {
$uri = new PhutilURI('http://example.com');
$uri->appendPath('foo');
$this->assertEqual('http://example.com/foo', $uri->__toString());
$uri->appendPath('bar');
$this->assertEqual('http://example.com/foo/bar', $uri->__toString());
$uri = new PhutilURI('http://example.com');
$uri->appendPath('/foo/');
$this->assertEqual('http://example.com/foo/', $uri->__toString());
$uri->appendPath('/bar/');
$this->assertEqual('http://example.com/foo/bar/', $uri->__toString());
$uri = new PhutilURI('http://example.com');
$uri->appendPath('foo');
$this->assertEqual('http://example.com/foo', $uri->__toString());
$uri->appendPath('/bar/');
$this->assertEqual('http://example.com/foo/bar/', $uri->__toString());
}
public function testUnusualURIs() {
$uri = new PhutilURI('file:///path/to/file');
$this->assertEqual('file', $uri->getProtocol(), pht('protocol'));
$this->assertEqual('', $uri->getDomain(), pht('domain'));
$this->assertEqual('/path/to/file', $uri->getPath(), pht('path'));
$uri = new PhutilURI('idea://open?x=/');
$this->assertEqual('idea', $uri->getProtocol(), pht('protocol'));
$this->assertEqual('open', $uri->getDomain(), pht('domain'));
$this->assertEqual('', $uri->getPath(), pht('path'));
$this->assertEqual(
array(
'x' => '/',
),
$uri->getQueryParams());
}
+ public function testAmbiguousURIs() {
+ // It's important that this be detected as a Javascript URI, because that
+ // is how browsers will treat it.
+ $uri = new PhutilURI('javascript:evil');
+ $this->assertEqual('javascript', $uri->getProtocol());
+
+
+ // This is "wrong", in that the user probably intends for this to be a
+ // Git-style URI, but we can not easily parse it as one without making the
+ // "javascript" case above unsafe.
+ $uri = new PhutilURI('localhost:todo.txt');
+ $this->assertEqual('localhost', $uri->getProtocol());
+
+
+ // These variants are unambiguous and safe.
+ $uri = new PhutilURI('localhost.com:todo.txt');
+ $this->assertEqual('localhost.com', $uri->getDomain());
+
+ $uri = new PhutilURI('user@localhost:todo.txt');
+ $this->assertEqual('localhost', $uri->getDomain());
+ }
+
public function testDefaultPorts() {
$uri = new PhutilURI('http://www.example.com');
$this->assertEqual('80', $uri->getPortWithProtocolDefault());
$uri = new PhutilURI('https://www.example.com');
$this->assertEqual('443', $uri->getPortWithProtocolDefault());
$uri = new PhutilURI('ssh://git@example.com/example/example.git');
$this->assertEqual('22', $uri->getPortWithProtocolDefault());
$uri = new PhutilURI('unknown://www.example.com');
$this->assertEqual('', $uri->getPortWithProtocolDefault());
}
+ public function testGitURIParsing() {
+ $uri = new PhutilURI('git@host.com:path/to/something');
+ $this->assertEqual('ssh', $uri->getProtocol());
+ $this->assertEqual('git', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('git@host.com:path/to/something', (string)$uri);
+
+ $uri = new PhutilURI('host.com:path/to/something');
+ $this->assertEqual('ssh', $uri->getProtocol());
+ $this->assertEqual('', $uri->getUser());
+ $this->assertEqual('host.com', $uri->getDomain());
+ $this->assertEqual('path/to/something', $uri->getPath());
+ $this->assertEqual('host.com:path/to/something', (string)$uri);
+
+ $uri_1 = new PhutilURI('host.com:path/to/something');
+ $uri_2 = new PhutilURI($uri_1);
+
+ $this->assertEqual((string)$uri_1, (string)$uri_2);
+ }
+
+ public function testStrictGitURIParsingOfLeadingWhitespace() {
+ $uri = new PhutilURI(' user@example.com:path');
+ $this->assertEqual('', $uri->getDomain());
+ }
+
+ public function testNoRelativeURIPaths() {
+ $uri = new PhutilURI('user@example.com:relative_path');
+
+ $caught = null;
+ try {
+ $uri->setType(PhutilURI::TYPE_URI);
+ } catch (Exception $ex) {
+ $caught = $ex;
+ }
+
+ $this->assertTrue($caught instanceof Exception);
+ }
+
}