diff --git a/scripts/test/http.php b/scripts/test/http.php --- a/scripts/test/http.php +++ b/scripts/test/http.php @@ -9,6 +9,11 @@ $args->parse( array( array( + 'name' => 'attach', + 'param' => 'file', + 'help' => pht('Attach a file to the request.'), + ), + array( 'name' => 'url', 'wildcard' => true, ), @@ -25,14 +30,13 @@ $data = ''; $timeout = 30; -$parsed = new PhutilURI($uri); -if ($parsed->getProtocol() == 'https') { - $future = new HTTPSFuture($uri, $data); -} else { - $future = new HTTPFuture($uri, $data); -} - +$future = new HTTPSFuture($uri, $data); $future->setMethod($method); $future->setTimeout($timeout); +$attach_file = $args->getArg('attach'); +if ($attach_file !== null) { + $future->attachFile('file', Filesystem::readFile($attach_file)); +} + print_r($future->resolve()); diff --git a/src/future/http/HTTPSFuture.php b/src/future/http/HTTPSFuture.php --- a/src/future/http/HTTPSFuture.php +++ b/src/future/http/HTTPSFuture.php @@ -19,6 +19,7 @@ private $followLocation = true; private $responseBuffer = ''; private $responseBufferPos; + private $files = array(); /** * Create a temp file containing an SSL cert, and use it for this session. @@ -146,6 +147,28 @@ } } + /** + * Attach a file to the request. + * + * @param string HTTP parameter name. + * @param string File content. + * @return this + */ + public function attachFile($key, $data) { + if (isset($this->files[$key])) { + throw new Exception( + pht( + 'HTTPSFuture currently supports only one file attachment for each '. + 'parameter name. You are trying to attach two different files with '. + 'the same parameter, "%s".', + $key)); + } + + $this->files[$key] = $data; + + return $this; + } + public function isReady() { if (isset($this->result)) { return true; @@ -197,39 +220,8 @@ curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, $allowed_protocols); } - $data = $this->getData(); - if ($data) { - - // NOTE: PHP's cURL implementation has a piece of magic which treats - // parameters as file paths if they begin with '@'. This means that - // an array like "array('name' => '@/usr/local/secret')" will attempt to - // read that file off disk and send it to the remote server. This - // behavior is pretty surprising, and it can easily become a relatively - // severe security vulnerability which allows an attacker to read any - // file the HTTP process has access to. Since this feature is very - // dangerous and not particularly useful, we prevent its use. - // - // After PHP 5.2.0, it is sufficient to pass a string to avoid this - // "feature" (it is only invoked in the array version). Prior to - // PHP 5.2.0, we block any request which have string data beginning with - // '@' (they would not work anyway). - - if (is_array($data)) { - // Explicitly build a query string to prevent "@" security problems. - $data = http_build_query($data, '', '&'); - } - - if ($data[0] == '@' && version_compare(phpversion(), '5.2.0', '<')) { - throw new Exception( - "Attempting to make an HTTP request including string data that ". - "begins with '@'. Prior to PHP 5.2.0, this reads files off disk, ". - "which creates a wide attack window for security vulnerabilities. ". - "Upgrade PHP or avoid making cURL requests which begin with '@'."); - } - curl_setopt($curl, CURLOPT_POSTFIELDS, $data); - } else { - curl_setopt($curl, CURLOPT_POSTFIELDS, null); - } + $data = $this->formatRequestDataForCURL(); + curl_setopt($curl, CURLOPT_POSTFIELDS, $data); $headers = $this->getHeaders(); @@ -422,4 +414,145 @@ $this->responseBufferPos = 0; return $this; } + + + /** + * Produces a value safe to pass to `CURLOPT_POSTFIELDS`. + * + * @return wild Some value, suitable for use in `CURLOPT_POSTFIELDS`. + */ + private function formatRequestDataForCURL() { + // We're generating a value to hand to cURL as CURLOPT_POSTFIELDS. The way + // cURL handles this value has some tricky caveats. + + // First, we can return either an array or a query string. If we return + // an array, we get a "multipart/form-data" request. If we return a + // query string, we get an "application/x-www-form-urlencoded" request. + + // Second, if we return an array we can't duplicate keys. The user might + // want to send the same parameter multiple times. + + // Third, if we return an array and any of the values start with "@", + // cURL includes arbitrary files off disk and sends them to an untrusted + // remote server. For example, an array like: + // + // array('name' => '@/usr/local/secret') + // + // ...will attempt to read that file off disk and transmit its contents with + // the request. This behavior is pretty surprising, and it can easily + // become a relatively severe security vulnerability which allows an + // attacker to read any file the HTTP process has access to. Since this + // feature is very dangerous and not particularly useful, we prevent its + // use. Broadly, this means we must reject some requests because they + // contain an "@" in an inconvenient place. + + // Generally, to avoid the "@" case and because most servers usually + // expect "application/x-www-form-urlencoded" data, we try to return a + // string unless there are files attached to this request. + + $data = $this->getData(); + $files = $this->files; + + $any_data = ($data || (is_string($data) && strlen($data))); + $any_files = (bool)$this->files; + + if (!$any_data && !$any_files) { + // No files or data, so just bail. + return null; + } + + if (!$any_files) { + // If we don't have any files, just encode the data as a query string, + // make sure it's not including any files, and we're good to go. + if (is_array($data)) { + $data = http_build_query($data, '', '&'); + } + + $this->checkForDangerousCURLMagic($data, $is_query_string = true); + + return $data; + } + + // If we've made it this far, we have some files, so we need to return + // an array. First, convert the other data into an array if it isn't one + // already. + + if (is_string($data)) { + // NOTE: We explicitly don't want fancy array parsing here, so just + // do a basic parse and then convert it into a dictionary ourselves. + $parser = new PhutilQueryStringParser(); + $pairs = $parser->parseQueryStringToPairList($data); + + $map = array(); + foreach ($pairs as $pair) { + list($key, $value) = $pair; + if (array_key_exists($key, $map)) { + throw new Exception( + pht( + 'Request specifies two values for key "%s", but parameter '. + 'names must be unique if you are posting file data due to '. + 'limitations with cURL.')); + } + $map[$key] = $value; + } + + $data = $map; + } + + foreach ($this->files as $name => $file_content) { + if (array_key_exists($name, $data)) { + throw new Exception( + pht( + 'Request specifies a file with key "%s", but that key is '. + 'also defined by normal request data. Due to limitations '. + 'with cURL, requests that post file data must use unique '. + 'keys.')); + } + $data[$name] = $file_content; + } + + foreach ($data as $key => $value) { + $this->checkForDangerousCURLMagic($value, $is_query_string = false); + } + + return $data; + } + + + /** + * Detect strings which will cause cURL to do horrible, insecure things. + * + * @param string Possibly dangerous string. + * @param bool True if this string is being used as part of a query string. + * @return void + */ + private function checkForDangerousCURLMagic($string, $is_query_string) { + if (empty($string[0]) || ($string[0] != '@')) { + // This isn't an "@..." string, so it's fine. + return; + } + + if ($is_query_string) { + if (version_compare(phpversion(), '5.2.0', '<')) { + throw new Exception( + pht( + 'Attempting to make an HTTP request, but query string data begins '. + 'with "@". Prior to PHP 5.2.0 this reads files off disk, which '. + 'creates a wide attack window for security vulnerabilities. '. + 'Upgrade PHP or avoid making cURL requests which begin with "@".')); + } + + // This is safe if we're on PHP 5.2.0 or newer. + return; + } + + throw new Exception( + pht( + 'Attempting to make an HTTP request which includes file data, but '. + 'the value of a query parameter begins with "@". PHP interprets '. + 'these values to mean that it should read arbitrary files off disk '. + 'and transmit them to remote servers. Declining to make this '. + 'request.')); + } + } diff --git a/src/parser/PhutilQueryStringParser.php b/src/parser/PhutilQueryStringParser.php --- a/src/parser/PhutilQueryStringParser.php +++ b/src/parser/PhutilQueryStringParser.php @@ -1,45 +1,102 @@ <?php /** - * Parses a request string leaving all characters intact. + * Utilities for parsing HTTP query strings. * - * http://php.net/manual/en/language.variables.external.php#language.variables.external.dot-in-names - * http://php.net/manual/en/language.variables.external.php#81080 + * The builtin functions in PHP (notably, `parse_str()` and automatic parsing + * prior to request handling) are not suitable in the general case because they + * silently convert some characters in parameter names into underscores. + * + * For example, if you call `parse_str()` with input like this: + * + * x.y=z + * + * ...the output is this: + * + * array( + * 'x_y' => 'z', + * ); + * + * ...with the `.` replaced with an underscore, `_`. Other characters converted + * in this way include space and unmatched opening brackets. + * + * Broadly, this is part of the terrible legacy of `register_globals`. Since + * we'd like to be able to parse all valid query strings without destroying any + * data, this class implements a less-encumbered parser. */ final class PhutilQueryStringParser { - public function parseQueryString($query_string) { - $parsed_arr = array(); - $query_arr = array(); - if (strlen($query_string)) { - $query_pairs = explode("&", $query_string); + /** + * Parses a query string into a dictionary, applying PHP rules for handling + * array nomenclature (like `a[]=1`) in parameter names. + * + * For a more basic parse, see @{method:parseQueryStringToPairList}. + * + * @param string Query string. + * @return map<string, wild> Parsed dictionary. + */ + public function parseQueryString($query_string) { + $result = array(); - foreach ($query_pairs as $query) { - $query_part_arr = explode("=", $query, 2); - if (strlen($query_part_arr[0])) { - $query_arr[] = array( - "key" => $query_part_arr[0], - "val" => isset($query_part_arr[1]) ? $query_part_arr[1] : "", - ); - } + $list = $this->parseQueryStringToPairList($query_string); + foreach ($list as $parts) { + list($key, $value) = $parts; + if (!strlen($key)) { + continue; } + $this->parseQueryKeyToArr($key, $value, $result); + } - foreach ($query_arr as $query_parts) { - $decoded_key = urldecode($query_parts["key"]); - $decoded_val = urldecode($query_parts["val"]); + return $result; + } - $this->parseQueryKeyToArr( - $decoded_key, - $decoded_val, - $parsed_arr); - } + /** + * Parses a query string into a basic list of pairs, without handling any + * array information in the keys. For example: + * + * a[]=1&a[]=2 + * + * ...will parse into: + * + * array( + * array('a[]', '1'), + * array('a[]', '2'), + * ); + * + * Use @{method:parseQueryString} to produce a more sophisticated parse which + * applies array rules and returns a dictionary. + * + * @param string Query string. + * @return list<pair<string, string>> List of parsed parameters. + */ + public function parseQueryStringToPairList($query_string) { + $list = array(); + + if (!strlen($query_string)) { + return $list; } - return $parsed_arr; + $pairs = explode('&', $query_string); + foreach ($pairs as $pair) { + if (!strlen($pair)) { + continue; + } + $parts = explode('=', $pair, 2); + if (count($parts) < 2) { + $parts[] = ''; + } + $list[] = array( + urldecode($parts[0]), + urldecode($parts[1]), + ); + } + + return $list; } + /** * Treats the key as a flat query that potentially has square brackets. If * there are square brackets we parse them into an array. diff --git a/src/parser/__tests__/PhutilQueryStringParserTestCase.php b/src/parser/__tests__/PhutilQueryStringParserTestCase.php --- a/src/parser/__tests__/PhutilQueryStringParserTestCase.php +++ b/src/parser/__tests__/PhutilQueryStringParserTestCase.php @@ -82,8 +82,62 @@ foreach ($map as $query_string => $expected) { $this->assertEqual( - $expected, $parser->parseQueryString($query_string)); + $expected, + $parser->parseQueryString($query_string)); } } + public function testQueryStringListParsing() { + $map = array( + '' => array(), + '&' => array(), + '=' => array( + array('', ''), + ), + '=&' => array( + array('', ''), + ), + 'a=b' => array( + array('a', 'b'), + ), + 'a[]=b' => array( + array('a[]', 'b'), + ), + 'a=' => array( + array('a', ''), + ), + '. [=1' => array( + array('. [', '1'), + ), + 'a=b&c=d' => array( + array('a', 'b'), + array('c', 'd'), + ), + 'a=b&a=c' => array( + array('a', 'b'), + array('a', 'c'), + ), + '&a=b&' => array( + array('a', 'b'), + ), + '=a' => array( + array('', 'a'), + ), + '&&&' => array( + ), + 'a%20b=c%20d' => array( + array('a b', 'c d'), + ), + ); + + $parser = new PhutilQueryStringParser(); + + foreach ($map as $query_string => $expected) { + $this->assertEqual( + $expected, + $parser->parseQueryStringToPairList($query_string)); + } + } + + }