diff --git a/.editorconfig b/.editorconfig index 663b0f40..ea19bf3c 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,53 +1,53 @@ ; http://editorconfig.org/ [*] indent_style = space indent_size = 2 end_of_line = lf charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true max_line_length = 80 [src/lint/linter/**/__tests__/**.lint-test] -indent_style = -end_of_line = -max_line_length = +indent_style = unset +end_of_line = unset +max_line_length = unset trim_trailing_whitespace = false [src/parser/__tests__/bundle/*] insert_final_newline = false [src/parser/__tests__/diff/*.{git,hg,rcs,svn,u}diff] -indent_style = +indent_style = unset trim_trailing_whitespace = false insert_final_newline = false -max_line_length = +max_line_length = unset [src/parser/__tests__/patches/*.{git,hg,rcs,svn,u}patch] -indent_style = +indent_style = unset trim_trailing_whitespace = false -max_line_length = +max_line_length = unset [src/parser/__tests__/patches/*.gitpatch] -end_of_line = +end_of_line = unset [src/parser/__tests__/**/*.txt] -max_line_length = +max_line_length = unset [src/repository/parser/__tests__/mercurial/*.txt] trim_trailing_whitespace = false insert_final_newline = false -max_line_length = +max_line_length = unset [src/unit/parser/__tests__/testresults/go.*] -indent_style = +indent_style = unset [src/unit/parser/__tests__/testresults/xunit.*] -max_line_length = +max_line_length = unset [externals/**] -indent_style = -indent_size = +indent_style = unset +indent_size = unset trim_trailing_whitespace = false insert_final_newline = false diff --git a/src/error/PhutilOpaqueEnvelope.php b/src/error/PhutilOpaqueEnvelope.php index 1c9024ba..4ba08ae0 100644 --- a/src/error/PhutilOpaqueEnvelope.php +++ b/src/error/PhutilOpaqueEnvelope.php @@ -1,72 +1,75 @@ openEnvelope(); * * Any time you're passing sensitive data into a stack, you should obscure it * with an envelope to prevent it leaking if something goes wrong. * * The key for the envelope is stored elsewhere, in * @{class:PhutilOpaqueEnvelopeKey}. This prevents it from appearing in * any sort of logs related to the envelope, even if the logger is very * aggressive. * * @task envelope Using Opaque Envelopes * @task internal Internals */ final class PhutilOpaqueEnvelope extends Phobject { private $value; /* -( Using Opaque Envelopes )--------------------------------------------- */ /** * @task envelope */ public function __construct($string) { $this->value = $this->mask($string, PhutilOpaqueEnvelopeKey::getKey()); } /** * @task envelope */ public function openEnvelope() { return $this->mask($this->value, PhutilOpaqueEnvelopeKey::getKey()); } /** * @task envelope */ public function __toString() { return pht(''); } /* -( Internals )---------------------------------------------------------- */ /** * @task internal */ private function mask($string, $noise) { $result = ''; + if ($string === null) { + return $result; + } for ($ii = 0; $ii < strlen($string); $ii++) { $s = $string[$ii]; $n = $noise[$ii % strlen($noise)]; $result .= chr(ord($s) ^ ord($n)); } return $result; } } diff --git a/src/future/http/status/HTTPFutureHTTPResponseStatus.php b/src/future/http/status/HTTPFutureHTTPResponseStatus.php index 469f9a0c..2e5a9cdb 100644 --- a/src/future/http/status/HTTPFutureHTTPResponseStatus.php +++ b/src/future/http/status/HTTPFutureHTTPResponseStatus.php @@ -1,67 +1,68 @@ 512) { + if ($body !== null && strlen($body) > 512) { $excerpt = substr($body, 0, 512).'...'; } else { $excerpt = $body; } $content_type = BaseHTTPFuture::getHeader($headers, 'Content-Type'); $match = null; - if (preg_match('/;\s*charset=([^;]+)/', $content_type, $match)) { + if ($content_type !== null && strlen($content_type) + && preg_match('/;\s*charset=([^;]+)/', $content_type, $match)) { $encoding = trim($match[1], "\"'"); try { $excerpt = phutil_utf8_convert($excerpt, 'UTF-8', $encoding); } catch (Exception $ex) {} } $this->excerpt = phutil_utf8ize($excerpt); $this->expect = $expect; parent::__construct($status_code); } protected function getErrorCodeType($code) { return 'HTTP'; } public function isError() { if ($this->expect === null) { return ($this->getStatusCode() < 200) || ($this->getStatusCode() > 299); } return !in_array($this->getStatusCode(), $this->expect, true); } public function isRedirect() { $code = $this->getStatusCode(); return ($code >= 300 && $code < 400); } public function isTimeout() { return false; } protected function getErrorCodeDescription($code) { static $map = array( 404 => 'Not Found', 500 => 'Internal Server Error', ); return idx($map, $code)."\n".$this->excerpt."\n"; } } diff --git a/src/future/oauth/PhutilOAuth1Future.php b/src/future/oauth/PhutilOAuth1Future.php index 8edd6c26..b734e8dd 100644 --- a/src/future/oauth/PhutilOAuth1Future.php +++ b/src/future/oauth/PhutilOAuth1Future.php @@ -1,307 +1,309 @@ callbackURI = $callback_uri; return $this; } public function setTimestamp($timestamp) { $this->timestamp = $timestamp; return $this; } public function setNonce($nonce) { $this->nonce = $nonce; return $this; } public function setTokenSecret($token_secret) { $this->tokenSecret = $token_secret; return $this; } public function setToken($token) { $this->token = $token; return $this; } public function setPrivateKey(PhutilOpaqueEnvelope $private_key) { $this->privateKey = $private_key; return $this; } public function setSignatureMethod($signature_method) { $this->signatureMethod = $signature_method; return $this; } public function setConsumerKey($consumer_key) { $this->consumerKey = $consumer_key; return $this; } public function setConsumerSecret(PhutilOpaqueEnvelope $consumer_secret) { $this->consumerSecret = $consumer_secret; return $this; } public function setMethod($method) { $this->method = $method; return $this; } public function setTimeout($timeout) { $this->timeout = $timeout; return $this; } public function getTimeout() { return $this->timeout; } public function __construct($uri, $data = array()) { $this->uri = new PhutilURI((string)$uri); $this->data = $data; $this->setProxiedFuture(new HTTPSFuture($uri, $data)); } public function getSignature() { $params = array(); // NOTE: The JIRA API uses JSON-encoded request bodies which are not // signed, and OAuth1 provides no real way to sign a nonparameterized // request body. Possibly we should split this apart into flags which // control which data is signed, but for now this rule seems to cover // all the use cases. if (is_array($this->data)) { $params = $this->data; } $params = $params + $this->uri->getQueryParamsAsMap() + $this->getOAuth1Headers(); return $this->sign($params); } public function addHeader($name, $value) { // If we haven't built the future yet, hold on to the header until after // we do, since there might be more changes coming which will affect the // signature process. if (!$this->hasConstructedFuture) { $this->headers[] = array($name, $value); } else { $this->getProxiedFuture()->addHeader($name, $value); } return $this; } protected function getProxiedFuture() { $future = parent::getProxiedFuture(); if (!$this->hasConstructedFuture) { $future->setMethod($this->method); $oauth_headers = $this->getOAuth1Headers(); $oauth_headers['oauth_signature'] = $this->getSignature(); $full_oauth_header = array(); foreach ($oauth_headers as $header => $value) { $full_oauth_header[] = $header.'="'.urlencode($value).'"'; } $full_oauth_header = 'OAuth '.implode(', ', $full_oauth_header); $future->addHeader('Authorization', $full_oauth_header); foreach ($this->headers as $header) { $future->addHeader($header[0], $header[1]); } $this->headers = array(); $timeout = $this->getTimeout(); if ($timeout !== null) { $future->setTimeout($timeout); } $this->hasConstructedFuture = true; } return $future; } protected function didReceiveResult($result) { return $result; } private function getOAuth1Headers() { if (!$this->nonce) { $this->nonce = Filesystem::readRandomCharacters(32); } if (!$this->timestamp) { $this->timestamp = time(); } $oauth_headers = array( 'oauth_consumer_key' => $this->consumerKey, 'oauth_signature_method' => $this->signatureMethod, 'oauth_timestamp' => $this->timestamp, 'oauth_nonce' => $this->nonce, 'oauth_version' => '1.0', ); if ($this->callbackURI) { $oauth_headers['oauth_callback'] = (string)$this->callbackURI; } if ($this->token) { $oauth_headers['oauth_token'] = $this->token; } return $oauth_headers; } private function sign(array $params) { ksort($params); $pstr = array(); foreach ($params as $key => $value) { $pstr[] = rawurlencode($key).'='.rawurlencode($value); } $pstr = implode('&', $pstr); $sign_uri = clone $this->uri; $sign_uri->setFragment(''); $sign_uri->removeAllQueryParams(); $sign_uri->setProtocol(phutil_utf8_strtolower($sign_uri->getProtocol())); $protocol = $sign_uri->getProtocol(); switch ($protocol) { case 'http': if ($sign_uri->getPort() == 80) { $sign_uri->setPort(null); } break; case 'https': if ($sign_uri->getPort() == 443) { $sign_uri->setPort(null); } break; } $method = rawurlencode(phutil_utf8_strtoupper($this->method)); $sign_uri = rawurlencode((string)$sign_uri); $pstr = rawurlencode($pstr); $sign_input = "{$method}&{$sign_uri}&{$pstr}"; return $this->signString($sign_input); } private function signString($string) { $consumer_secret = null; if ($this->consumerSecret) { $consumer_secret = $this->consumerSecret->openEnvelope(); } - $key = urlencode($consumer_secret).'&'.urlencode($this->tokenSecret); + $consumer_secret = coalesce($consumer_secret, ''); + $token_secret = coalesce($this->tokenSecret, ''); + $key = urlencode($consumer_secret).'&'.urlencode($token_secret); switch ($this->signatureMethod) { case 'HMAC-SHA1': if (!$this->consumerSecret) { throw new Exception( pht( "Signature method '%s' requires %s!", 'HMAC-SHA1', 'setConsumerSecret()')); } $hash = hash_hmac('sha1', $string, $key, true); return base64_encode($hash); case 'RSA-SHA1': if (!$this->privateKey) { throw new Exception( pht( "Signature method '%s' requires %s!", 'RSA-SHA1', 'setPrivateKey()')); } $cert = @openssl_pkey_get_private($this->privateKey->openEnvelope()); if (!$cert) { throw new Exception(pht('%s failed!', 'openssl_pkey_get_private()')); } $pkey = @openssl_get_privatekey($cert); if (!$pkey) { throw new Exception(pht('%s failed!', 'openssl_get_privatekey()')); } $signature = null; $ok = openssl_sign($string, $signature, $pkey, OPENSSL_ALGO_SHA1); if (!$ok) { throw new Exception(pht('%s failed!', 'openssl_sign()')); } // Deprecated in PHP 8; key is automatically freed. @openssl_free_key($pkey); return base64_encode($signature); case 'PLAINTEXT': if (!$this->consumerSecret) { throw new Exception( pht( "Signature method '%s' requires %s!", 'PLAINTEXT', 'setConsumerSecret()')); } return $key; default: throw new Exception(pht("Unknown signature method '%s'!", $string)); } } public function resolvex() { $result = $this->getProxiedFuture()->resolvex(); return $this->didReceiveResult($result); } public function resolveJSON() { $result = $this->getProxiedFuture()->resolvex(); $result = $this->didReceiveResult($result); list($body) = $result; try { return phutil_json_decode($body); } catch (PhutilJSONParserException $ex) { throw new PhutilProxyException(pht('Expected JSON.'), $ex); } } } diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 4617c9b6..818ee33a 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -1,1062 +1,1061 @@ authorEmail = $author_email; return $this; } public function getAuthorEmail() { return $this->authorEmail; } public function setAuthorName($author_name) { $this->authorName = $author_name; return $this; } public function getAuthorName() { return $this->authorName; } public function getFullAuthor() { $author_name = $this->getAuthorName(); if ($author_name === null) { return null; } $author_email = $this->getAuthorEmail(); if ($author_email === null) { return null; } $full_author = sprintf('%s <%s>', $author_name, $author_email); // Because git is very picky about the author being in a valid format, // verify that we can parse it. $address = new PhutilEmailAddress($full_author); if (!$address->getDisplayName() || !$address->getAddress()) { return null; } return $full_author; } public function setConduit(ConduitClient $conduit) { $this->conduit = $conduit; return $this; } public function setBaseRevision($base_revision) { $this->baseRevision = $base_revision; return $this; } public function setEncoding($encoding) { $this->encoding = $encoding; return $this; } public function getEncoding() { return $this->encoding; } public function setByteLimit($byte_limit) { $this->byteLimit = $byte_limit; return $this; } public function getByteLimit() { return $this->byteLimit; } public function getBaseRevision() { return $this->baseRevision; } public function setRevisionID($revision_id) { $this->revisionID = $revision_id; return $this; } public function getRevisionID() { return $this->revisionID; } public static function newFromChanges(array $changes) { $obj = new ArcanistBundle(); $obj->changes = $changes; return $obj; } private function getEOL($patch_type) { // NOTE: Git always generates "\n" line endings, even under Windows, and // can not parse certain patches with "\r\n" line endings. SVN generates // patches with "\n" line endings on Mac or Linux and "\r\n" line endings // on Windows. (This EOL style is used only for patch metadata lines, not // for the actual patch content.) // (On Windows, Mercurial generates \n newlines for `--git` diffs, as it // must, but also \n newlines for unified diffs. We never need to deal with // these as we use Git format for Mercurial, so this case is currently // ignored.) switch ($patch_type) { case 'git': return "\n"; case 'unified': return phutil_is_windows() ? "\r\n" : "\n"; default: throw new Exception( pht("Unknown patch type '%s'!", $patch_type)); } } public static function newFromArcBundle($path) { $path = Filesystem::resolvePath($path); $future = new ExecFuture( 'tar tfO %s', $path); list($stdout, $file_list) = $future->resolvex(); $file_list = explode("\n", trim($file_list)); if (in_array('meta.json', $file_list)) { $future = new ExecFuture( 'tar xfO %s meta.json', $path); $meta_info = $future->resolveJSON(); $version = idx($meta_info, 'version', 0); $base_revision = idx($meta_info, 'baseRevision'); $revision_id = idx($meta_info, 'revisionID'); $encoding = idx($meta_info, 'encoding'); $author_name = idx($meta_info, 'authorName'); $author_email = idx($meta_info, 'authorEmail'); } else { // this arc bundle was probably made before we started storing meta info $version = 0; $base_revision = null; $revision_id = null; $encoding = null; $author = null; } $future = new ExecFuture( 'tar xfO %s changes.json', $path); $changes = $future->resolveJSON(); foreach ($changes as $change_key => $change) { foreach ($change['hunks'] as $key => $hunk) { list($hunk_data) = execx('tar xfO %s hunks/%s', $path, $hunk['corpus']); $changes[$change_key]['hunks'][$key]['corpus'] = $hunk_data; } } foreach ($changes as $change_key => $change) { $changes[$change_key] = ArcanistDiffChange::newFromDictionary($change); } $obj = new ArcanistBundle(); $obj->changes = $changes; $obj->diskPath = $path; $obj->setBaseRevision($base_revision); $obj->setRevisionID($revision_id); $obj->setEncoding($encoding); return $obj; } public static function newFromDiff($data) { $obj = new ArcanistBundle(); $parser = new ArcanistDiffParser(); $obj->changes = $parser->parseDiff($data); return $obj; } private function __construct() {} public function writeToDisk($path) { $changes = $this->getChanges(); $change_list = array(); foreach ($changes as $change) { $change_list[] = $change->toDictionary(); } $hunks = array(); foreach ($change_list as $change_key => $change) { foreach ($change['hunks'] as $key => $hunk) { $hunks[] = $hunk['corpus']; $change_list[$change_key]['hunks'][$key]['corpus'] = count($hunks) - 1; } } $blobs = array(); foreach ($change_list as $change) { if (!empty($change['metadata']['old:binary-phid'])) { $blobs[$change['metadata']['old:binary-phid']] = null; } if (!empty($change['metadata']['new:binary-phid'])) { $blobs[$change['metadata']['new:binary-phid']] = null; } } foreach ($blobs as $phid => $null) { $blobs[$phid] = $this->getBlob($phid); } $meta_info = array( 'version' => 5, 'baseRevision' => $this->getBaseRevision(), 'revisionID' => $this->getRevisionID(), 'encoding' => $this->getEncoding(), 'authorName' => $this->getAuthorName(), 'authorEmail' => $this->getAuthorEmail(), ); $dir = Filesystem::createTemporaryDirectory(); Filesystem::createDirectory($dir.'/hunks'); Filesystem::createDirectory($dir.'/blobs'); Filesystem::writeFile($dir.'/changes.json', json_encode($change_list)); Filesystem::writeFile($dir.'/meta.json', json_encode($meta_info)); foreach ($hunks as $key => $hunk) { Filesystem::writeFile($dir.'/hunks/'.$key, $hunk); } foreach ($blobs as $key => $blob) { Filesystem::writeFile($dir.'/blobs/'.$key, $blob); } execx( '(cd %s; tar -czf %s *)', $dir, Filesystem::resolvePath($path)); Filesystem::remove($dir); } public function toUnifiedDiff() { $this->reservedBytes = 0; $eol = $this->getEOL('unified'); $result = array(); $changes = $this->getChanges(); foreach ($changes as $change) { $hunk_changes = $this->buildHunkChanges($change->getHunks(), $eol); if (!$hunk_changes) { continue; } $old_path = $this->getOldPath($change); $cur_path = $this->getCurrentPath($change); $index_path = $cur_path; if ($index_path === null) { $index_path = $old_path; } $result[] = 'Index: '.$index_path; $result[] = $eol; $result[] = str_repeat('=', 67); $result[] = $eol; if ($old_path === null) { $old_path = '/dev/null'; } if ($cur_path === null) { $cur_path = '/dev/null'; } // When the diff is used by `patch`, `patch` ignores what is listed as the // current path and just makes changes to the file at the old path (unless // the current path is '/dev/null'. // If the old path and the current path aren't the same (and neither is // /dev/null), this indicates the file was moved or copied. By listing // both paths as the new file, `patch` will apply the diff to the new // file. if ($cur_path !== '/dev/null' && $old_path !== '/dev/null') { $old_path = $cur_path; } $result[] = '--- '.$old_path.$eol; $result[] = '+++ '.$cur_path.$eol; $result[] = $hunk_changes; } if (!$result) { return ''; } $diff = implode('', $result); return $this->convertNonUTF8Diff($diff); } public function toGitPatch() { $this->reservedBytes = 0; $eol = $this->getEOL('git'); $result = array(); $changes = $this->getChanges(); $binary_sources = array(); foreach ($changes as $change) { if (!$this->isGitBinaryChange($change)) { continue; } $type = $change->getType(); if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY || $type == ArcanistDiffChangeType::TYPE_COPY_AWAY || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { foreach ($change->getAwayPaths() as $path) { $binary_sources[$path] = $change; } } } foreach (array_keys($changes) as $multicopy_key) { $multicopy_change = $changes[$multicopy_key]; $type = $multicopy_change->getType(); if ($type != ArcanistDiffChangeType::TYPE_MULTICOPY) { continue; } // Decompose MULTICOPY into one MOVE_HERE and several COPY_HERE because // we need more information than we have in order to build a delete patch // and represent it as a bunch of COPY_HERE plus a delete. For details, // see T419. // Basically, MULTICOPY means there are 2 or more corresponding COPY_HERE // changes, so find one of them arbitrarily and turn it into a MOVE_HERE. // TODO: We might be able to do this more cleanly after T230 is resolved. $decompose_okay = false; foreach ($changes as $change_key => $change) { if ($change->getType() != ArcanistDiffChangeType::TYPE_COPY_HERE) { continue; } if ($change->getOldPath() != $multicopy_change->getCurrentPath()) { continue; } $decompose_okay = true; $change = clone $change; $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); $changes[$change_key] = $change; // The multicopy is now fully represented by MOVE_HERE plus one or more // COPY_HERE, so throw it away. unset($changes[$multicopy_key]); break; } if (!$decompose_okay) { throw new Exception( pht( 'Failed to decompose multicopy changeset in '. 'order to generate diff.')); } } foreach ($changes as $change) { $type = $change->getType(); $file_type = $change->getFileType(); if ($file_type == ArcanistDiffChangeType::FILE_DIRECTORY) { // TODO: We should raise a FYI about this, so the user is aware // that we omitted it, if the directory is empty or has permissions // which git can't represent. // Git doesn't support empty directories, so we simply ignore them. If // the directory is nonempty, 'git apply' will create it when processing // the changesets for files inside it. continue; } if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { // Git will apply this in the corresponding MOVE_HERE. continue; } $old_mode = idx($change->getOldProperties(), 'unix:filemode', '100644'); $new_mode = idx($change->getNewProperties(), 'unix:filemode', '100644'); $is_binary = $this->isGitBinaryChange($change); if ($is_binary) { $old_binary = idx($binary_sources, $this->getCurrentPath($change)); $change_body = $this->buildBinaryChange($change, $old_binary); } else { $change_body = $this->buildHunkChanges($change->getHunks(), $eol); } if ($type == ArcanistDiffChangeType::TYPE_COPY_AWAY) { // TODO: This is only relevant when patching old Differential diffs // which were created prior to arc pruning TYPE_COPY_AWAY for files // with no modifications. if (!strlen($change_body) && ($old_mode == $new_mode)) { continue; } } $old_path = $this->getOldPath($change); $cur_path = $this->getCurrentPath($change); if ($old_path === null) { $old_index = 'a/'.$cur_path; $old_target = '/dev/null'; } else { $old_index = 'a/'.$old_path; $old_target = 'a/'.$old_path; } if ($cur_path === null) { $cur_index = 'b/'.$old_path; $cur_target = '/dev/null'; } else { $cur_index = 'b/'.$cur_path; $cur_target = 'b/'.$cur_path; } $old_target = $this->encodeGitTargetPath($old_target); $cur_target = $this->encodeGitTargetPath($cur_target); $result[] = "diff --git {$old_index} {$cur_index}".$eol; if ($type == ArcanistDiffChangeType::TYPE_ADD) { $result[] = "new file mode {$new_mode}".$eol; } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE || $type == ArcanistDiffChangeType::TYPE_MOVE_HERE || $type == ArcanistDiffChangeType::TYPE_COPY_AWAY || $type == ArcanistDiffChangeType::TYPE_CHANGE) { if ($old_mode !== $new_mode) { $result[] = "old mode {$old_mode}".$eol; $result[] = "new mode {$new_mode}".$eol; } } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE) { $result[] = "copy from {$old_path}".$eol; $result[] = "copy to {$cur_path}".$eol; } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_HERE) { $result[] = "rename from {$old_path}".$eol; $result[] = "rename to {$cur_path}".$eol; } else if ($type == ArcanistDiffChangeType::TYPE_DELETE || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { $old_mode = idx($change->getOldProperties(), 'unix:filemode'); if ($old_mode) { $result[] = "deleted file mode {$old_mode}".$eol; } } if ($change_body) { if (!$is_binary) { $result[] = "--- {$old_target}".$eol; $result[] = "+++ {$cur_target}".$eol; } $result[] = $change_body; } } $diff = implode('', $result).$eol; return $this->convertNonUTF8Diff($diff); } private function isGitBinaryChange(ArcanistDiffChange $change) { $file_type = $change->getFileType(); return ($file_type == ArcanistDiffChangeType::FILE_BINARY || $file_type == ArcanistDiffChangeType::FILE_IMAGE); } private function convertNonUTF8Diff($diff) { if ($this->encoding) { $diff = phutil_utf8_convert($diff, $this->encoding, 'UTF-8'); } return $diff; } public function getChanges() { return $this->changes; } private function breakHunkIntoSmallHunks(ArcanistDiffHunk $base_hunk) { $context = 3; $results = array(); $lines = phutil_split_lines($base_hunk->getCorpus()); $n = count($lines); $old_offset = $base_hunk->getOldOffset(); $new_offset = $base_hunk->getNewOffset(); $ii = 0; $jj = 0; while ($ii < $n) { // Skip lines until we find the next line with changes. Note: this skips // both ' ' (no changes) and '\' (no newline at end of file) lines. If we // don't skip the latter, we may incorrectly generate a terminal hunk // that has no actual change information when a file doesn't have a // terminal newline and not changed near the end of the file. 'patch' will // fail to apply the diff if we generate a hunk that does not actually // contain changes. for ($jj = $ii; $jj < $n; ++$jj) { $char = $lines[$jj][0]; if ($char == '-' || $char == '+') { break; } } if ($jj >= $n) { break; } $hunk_start = max($jj - $context, 0); // NOTE: There are two tricky considerations here. // We can not generate a patch with overlapping hunks, or 'git apply' // rejects it after 1.7.3.4. // We can not generate a patch with too much trailing context, or // 'patch' rejects it. // So we need to ensure that we generate disjoint hunks, but don't // generate any hunks with too much context. $old_lines = 0; $new_lines = 0; $hunk_adjust = 0; $last_change = $jj; $break_here = null; for (; $jj < $n; ++$jj) { if ($lines[$jj][0] == ' ') { if ($jj - $last_change > $context) { if ($break_here === null) { // We haven't seen a change in $context lines, so this is a // potential place to break the hunk. However, we need to keep // looking in case there is another change fewer than $context // lines away, in which case we have to merge the hunks. $break_here = $jj; } } // If the context value is "3" and there are 7 unchanged lines // between the two changes, we could either generate one or two hunks // and end up with the same number of output lines. If we generate // one hunk, the middle line will be a line of source. If we generate // two hunks, the middle line will be an "@@ -1,2 +3,4 @@" header. // We choose to generate two hunks because this is the behavior of // "diff -u". See PHI838. if ($jj - $last_change >= ($context * 2 + 1)) { // We definitely aren't going to merge this with the next hunk, so // break out of the loop. We'll end the hunk at $break_here. break; } } else { $break_here = null; $last_change = $jj; if ($lines[$jj][0] == '\\') { // When we have a "\ No newline at end of file" line, it does not // contribute to either hunk length. ++$hunk_adjust; } else if ($lines[$jj][0] == '-') { ++$old_lines; } else if ($lines[$jj][0] == '+') { ++$new_lines; } } } if ($break_here !== null) { $jj = $break_here; } $hunk_length = min($jj, $n) - $hunk_start; $count_length = ($hunk_length - $hunk_adjust); $hunk = new ArcanistDiffHunk(); $hunk->setOldOffset($old_offset + $hunk_start - $ii); $hunk->setNewOffset($new_offset + $hunk_start - $ii); $hunk->setOldLength($count_length - $new_lines); $hunk->setNewLength($count_length - $old_lines); $corpus = array_slice($lines, $hunk_start, $hunk_length); $corpus = implode('', $corpus); $hunk->setCorpus($corpus); $results[] = $hunk; $old_offset += ($jj - $ii) - $new_lines; $new_offset += ($jj - $ii) - $old_lines; $ii = $jj; } return $results; } private function encodeGitTargetPath($path) { // See T8768. If a target path contains spaces, it must be terminated with // a tab. If we don't do this, Mercurial has the wrong behavior when // applying the patch. This results in a semantic trailing whitespace // character: // // +++ b/X Y.txt\t // // Everyone is at fault here and there are no winners. if (strpos($path, ' ') !== false) { $path = $path."\t"; } return $path; } private function getOldPath(ArcanistDiffChange $change) { $old_path = $change->getOldPath(); $type = $change->getType(); if ($old_path === '' || $type == ArcanistDiffChangeType::TYPE_ADD) { $old_path = null; } return $old_path; } private function getCurrentPath(ArcanistDiffChange $change) { $cur_path = $change->getCurrentPath(); $type = $change->getType(); if (!strlen($cur_path) || $type == ArcanistDiffChangeType::TYPE_DELETE || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { $cur_path = null; } return $cur_path; } private function buildHunkChanges(array $hunks, $eol) { assert_instances_of($hunks, 'ArcanistDiffHunk'); $result = array(); foreach ($hunks as $hunk) { $small_hunks = $this->breakHunkIntoSmallHunks($hunk); foreach ($small_hunks as $small_hunk) { $o_off = $small_hunk->getOldOffset(); $o_len = $small_hunk->getOldLength(); $n_off = $small_hunk->getNewOffset(); $n_len = $small_hunk->getNewLength(); $corpus = $small_hunk->getCorpus(); $this->reserveBytes(strlen($corpus)); // NOTE: If the length is 1 it can be omitted. Since git does this, // we also do it so that "arc export --git" diffs are as similar to // real git diffs as possible, which helps debug issues. if ($o_len == 1) { $o_head = "{$o_off}"; } else { $o_head = "{$o_off},{$o_len}"; } if ($n_len == 1) { $n_head = "{$n_off}"; } else { $n_head = "{$n_off},{$n_len}"; } $result[] = "@@ -{$o_head} +{$n_head} @@".$eol; $result[] = $corpus; $last = substr($corpus, -1); if ($last !== false && $last != "\r" && $last != "\n") { $result[] = $eol; } } } return implode('', $result); } public function setLoadFileDataCallback($callback) { $this->loadFileDataCallback = $callback; return $this; } private function getBlob($phid, $name = null) { if ($this->loadFileDataCallback) { return call_user_func($this->loadFileDataCallback, $phid); } if ($this->diskPath) { list($blob_data) = execx('tar xfO %s blobs/%s', $this->diskPath, $phid); return $blob_data; } $console = PhutilConsole::getConsole(); if ($this->conduit) { if ($name) { $console->writeErr( "%s\n", pht("Downloading binary data for '%s'...", $name)); } else { $console->writeErr("%s\n", pht('Downloading binary data...')); } $data_base64 = $this->conduit->callMethodSynchronous( 'file.download', array( 'phid' => $phid, )); return base64_decode($data_base64); } throw new Exception(pht("Nowhere to load blob '%s' from!", $phid)); } private function buildBinaryChange(ArcanistDiffChange $change, $old_binary) { $eol = $this->getEOL('git'); // In Git, when we write out a binary file move or copy, we need the // original binary for the source and the current binary for the // destination. if ($old_binary) { if ($old_binary->getOriginalFileData() !== null) { $old_data = $old_binary->getOriginalFileData(); $old_phid = null; } else { $old_data = null; $old_phid = $old_binary->getMetadata('old:binary-phid'); } } else { $old_data = $change->getOriginalFileData(); $old_phid = $change->getMetadata('old:binary-phid'); } if ($old_data === null && $old_phid) { $name = basename($change->getOldPath()); $old_data = $this->getBlob($old_phid, $name); } - $old_length = strlen($old_data); - - // Here, and below, the binary will be emitted with base85 encoding. This - // encoding encodes each 4 bytes of input in 5 bytes of output, so we may - // need up to 5/4ths as many bytes to represent it. - - // We reserve space up front because base85 encoding isn't super cheap. If - // the blob is enormous, we'd rather just bail out now before doing a ton - // of work and then throwing it away anyway. - - // However, the data is compressed before it is emitted so we may actually - // end up using fewer bytes. For now, the allocator just assumes the worst - // case since it isn't important to be precise, but we could do a more - // exact job of this. - $this->reserveBytes($old_length * 5 / 4); - if ($old_data === null) { + $old_length = 0; $old_data = ''; $old_sha1 = str_repeat('0', 40); } else { + // Here, and below, the binary will be emitted with base85 encoding. This + // encoding encodes each 4 bytes of input in 5 bytes of output, so we may + // need up to 5/4ths as many bytes to represent it. + + // We reserve space up front because base85 encoding isn't super cheap. If + // the blob is enormous, we'd rather just bail out now before doing a ton + // of work and then throwing it away anyway. + + // However, the data is compressed before it is emitted so we may actually + // end up using fewer bytes. For now, the allocator just assumes the worst + // case since it isn't important to be precise, but we could do a more + // exact job of this. + $old_length = strlen($old_data); + $this->reserveBytes($old_length * 5 / 4); $old_sha1 = sha1("blob {$old_length}\0{$old_data}"); } $new_phid = $change->getMetadata('new:binary-phid'); $new_data = null; if ($change->getCurrentFileData() !== null) { $new_data = $change->getCurrentFileData(); } else if ($new_phid) { $name = basename($change->getCurrentPath()); $new_data = $this->getBlob($new_phid, $name); } - $new_length = strlen($new_data); - $this->reserveBytes($new_length * 5 / 4); - if ($new_data === null) { + $new_length = 0; $new_data = ''; $new_sha1 = str_repeat('0', 40); } else { + $new_length = strlen($new_data); + $this->reserveBytes($new_length * 5 / 4); $new_sha1 = sha1("blob {$new_length}\0{$new_data}"); } $content = array(); $content[] = "index {$old_sha1}..{$new_sha1}".$eol; $content[] = 'GIT binary patch'.$eol; $content[] = "literal {$new_length}".$eol; $content[] = $this->emitBinaryDiffBody($new_data).$eol; $content[] = "literal {$old_length}".$eol; $content[] = $this->emitBinaryDiffBody($old_data).$eol; return implode('', $content); } private function emitBinaryDiffBody($data) { $eol = $this->getEOL('git'); return self::newBase85Data($data, $eol); } public static function newBase85Data($data, $eol, $mode = null) { // The "32bit" and "64bit" modes are used by unit tests to verify that all // of the encoding pathways here work identically. In these modes, we skip // compression because `gzcompress()` may not be stable and we just want // to test that the output matches some expected result. if ($mode === null) { if (!function_exists('gzcompress')) { throw new Exception( pht( 'This patch has binary data. The PHP zlib extension is required '. 'to apply patches with binary data to git. Install the PHP zlib '. 'extension to continue.')); } $input = gzcompress($data); $is_64bit = (PHP_INT_SIZE >= 8); } else { switch ($mode) { case '32bit': $input = $data; $is_64bit = false; break; case '64bit': $input = $data; $is_64bit = true; break; default: throw new Exception( pht( 'Unsupported base85 encoding mode "%s".', $mode)); } } // See emit_binary_diff_body() in diff.c for git's implementation. // This is implemented awkwardly in order to closely mirror git's // implementation in base85.c // It is also implemented awkwardly to work correctly on 32-bit machines. // Broadly, this algorithm converts the binary input to printable output // by transforming each 4 binary bytes of input to 5 printable bytes of // output, one piece at a time. // // To do this, we convert the 4 bytes into a 32-bit integer, then use // modulus and division by 85 to pick out printable bytes (85^5 is slightly // larger than 2^32). In C, this algorithm is fairly easy to implement // because the accumulator can be made unsigned. // // In PHP, there are no unsigned integers, so values larger than 2^31 break // on 32-bit systems under modulus: // // $ php -r 'print (1 << 31) % 13;' # On a 32-bit machine. // -11 // // However, PHP's float type is an IEEE 754 64-bit double precision float, // so we can safely store integers up to around 2^53 without loss of // precision. To work around the lack of an unsigned type, we just use a // double and perform the modulus with fmod(). // // (Since PHP overflows integer operations into floats, we don't need much // additional casting.) // On 64 bit systems, we skip all this fanfare and just use integers. This // is significantly faster. static $map = array( '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~', ); $len_map = array(); for ($ii = 0; $ii <= 52; $ii++) { if ($ii <= 26) { $len_map[$ii] = chr($ii + ord('A') - 1); } else { $len_map[$ii] = chr($ii - 26 + ord('a') - 1); } } $buf = ''; $lines = str_split($input, 52); $final = (count($lines) - 1); foreach ($lines as $idx => $line) { if ($idx === $final) { $len = strlen($line); } else { $len = 52; } // The first character encodes the line length. $buf .= $len_map[$len]; $pos = 0; while ($len) { $accum = 0; for ($count = 24; $count >= 0; $count -= 8) { $val = ord($line[$pos++]); $val = $val * (1 << $count); $accum = $accum + $val; if (--$len == 0) { break; } } $slice = ''; // If we're in 64bit mode, we can just use integers. Otherwise, we // need to use floating point math to avoid overflows. if ($is_64bit) { for ($count = 4; $count >= 0; $count--) { $val = $accum % 85; $accum = (int)($accum / 85); $slice .= $map[$val]; } } else { for ($count = 4; $count >= 0; $count--) { $val = (int)fmod($accum, 85.0); $accum = floor($accum / 85.0); $slice .= $map[$val]; } } $buf .= strrev($slice); } $buf .= $eol; } return $buf; } private function reserveBytes($bytes) { $this->reservedBytes += $bytes; if ($this->byteLimit) { if ($this->reservedBytes > $this->byteLimit) { throw new ArcanistDiffByteSizeException( pht( 'This large diff requires more space than it is allowed to '. 'use (limited to %s bytes; needs more than %s bytes).', new PhutilNumber($this->byteLimit), new PhutilNumber($this->reservedBytes))); } } return $this; } } diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index 9a708fd2..22047dd2 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -1,1401 +1,1403 @@ repositoryAPI = $repository_api; return $this; } public function setDetectBinaryFiles($detect) { $this->detectBinaryFiles = $detect; return $this; } public function setTryEncoding($encoding) { $this->tryEncoding = $encoding; return $this; } public function forcePath($path) { $this->forcePath = $path; return $this; } public function setChanges(array $changes) { assert_instances_of($changes, 'ArcanistDiffChange'); $this->changes = mpull($changes, null, 'getCurrentPath'); return $this; } public function parseSubversionDiff(ArcanistSubversionAPI $api, $paths) { $this->setRepositoryAPI($api); $diffs = array(); foreach ($paths as $path => $status) { if ($status & ArcanistRepositoryAPI::FLAG_UNTRACKED || $status & ArcanistRepositoryAPI::FLAG_CONFLICT || $status & ArcanistRepositoryAPI::FLAG_MISSING) { unset($paths[$path]); } } $root = null; $from = array(); foreach ($paths as $path => $status) { $change = $this->buildChange($path); if ($status & ArcanistRepositoryAPI::FLAG_ADDED) { $change->setType(ArcanistDiffChangeType::TYPE_ADD); } else if ($status & ArcanistRepositoryAPI::FLAG_DELETED) { $change->setType(ArcanistDiffChangeType::TYPE_DELETE); } else { $change->setType(ArcanistDiffChangeType::TYPE_CHANGE); } $is_dir = is_dir($api->getPath($path)); if ($is_dir) { $change->setFileType(ArcanistDiffChangeType::FILE_DIRECTORY); // We have to go hit the diff even for directories because they may // have property changes or moves, etc. } $is_link = is_link($api->getPath($path)); if ($is_link) { $change->setFileType(ArcanistDiffChangeType::FILE_SYMLINK); } $diff = $api->getRawDiffText($path); if ($diff) { $this->parseDiff($diff); } $info = $api->getSVNInfo($path); if (idx($info, 'Copied From URL')) { if (!$root) { $rinfo = $api->getSVNInfo('.'); $root = $rinfo['URL'].'/'; } $cpath = $info['Copied From URL']; $root_len = strlen($root); if (!strncmp($cpath, $root, $root_len)) { $cpath = substr($cpath, $root_len); // The user can "svn cp /path/to/file@12345 x", which pulls a file out // of version history at a specific revision. If we just use the path, // we'll collide with possible changes to that path in the working // copy below. In particular, "svn cp"-ing a path which no longer // exists somewhere in the working copy and then adding that path // gets us to the "origin change type" branches below with a // TYPE_ADD state on the path. To avoid this, append the origin // revision to the path so we'll necessarily generate a new change. // TODO: In theory, you could have an '@' in your path and this could // cause a collision, e.g. two files named 'f' and 'f@12345'. This is // at least somewhat the user's fault, though. if ($info['Copied From Rev']) { if ($info['Copied From Rev'] != $info['Revision']) { $cpath .= '@'.$info['Copied From Rev']; } } $change->setOldPath($cpath); $from[$path] = $cpath; } } $type = $change->getType(); if (($type === ArcanistDiffChangeType::TYPE_MOVE_AWAY || $type === ArcanistDiffChangeType::TYPE_DELETE) && idx($info, 'Node Kind') === 'directory') { $change->setFileType(ArcanistDiffChangeType::FILE_DIRECTORY); } } foreach ($paths as $path => $status) { $change = $this->buildChange($path); if (empty($from[$path])) { continue; } if (empty($this->changes[$from[$path]])) { if ($change->getType() == ArcanistDiffChangeType::TYPE_COPY_HERE) { // If the origin path wasn't changed (or isn't included in this diff) // and we only copied it, don't generate a changeset for it. This // keeps us out of trouble when we go to 'arc commit' and need to // figure out which files should be included in the commit list. continue; } } $origin = $this->buildChange($from[$path]); $origin->addAwayPath($change->getCurrentPath()); $type = $origin->getType(); switch ($type) { case ArcanistDiffChangeType::TYPE_MULTICOPY: case ArcanistDiffChangeType::TYPE_COPY_AWAY: // "Add" is possible if you do some bizarre tricks with svn:ignore and // "svn copy"'ing URLs straight from the repository; you can end up with // a file that is a copy of itself. See T271. case ArcanistDiffChangeType::TYPE_ADD: break; case ArcanistDiffChangeType::TYPE_DELETE: $origin->setType(ArcanistDiffChangeType::TYPE_MOVE_AWAY); break; case ArcanistDiffChangeType::TYPE_MOVE_AWAY: $origin->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); break; case ArcanistDiffChangeType::TYPE_CHANGE: $origin->setType(ArcanistDiffChangeType::TYPE_COPY_AWAY); break; default: throw new Exception(pht('Bad origin state %s.', $type)); } $type = $origin->getType(); switch ($type) { case ArcanistDiffChangeType::TYPE_MULTICOPY: case ArcanistDiffChangeType::TYPE_MOVE_AWAY: $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); break; case ArcanistDiffChangeType::TYPE_ADD: case ArcanistDiffChangeType::TYPE_COPY_AWAY: $change->setType(ArcanistDiffChangeType::TYPE_COPY_HERE); break; default: throw new Exception(pht('Bad origin state %s.', $type)); } } return $this->changes; } public function parseDiff($diff) { if (!strlen(trim($diff))) { throw new Exception(pht("Can't parse an empty diff!")); } // Detect `git-format-patch`, by looking for a "---" line somewhere in // the file and then a footer with Git version number, which looks like // this: // // -- // 1.8.4.2 // // Note that `git-format-patch` adds a space after the "--", but we don't // require it when detecting patches, as trailing whitespace can easily be // lost in transit. $detect_patch = '/^---$.*^-- ?[\s\d.]+\z/ms'; $message = null; if (preg_match($detect_patch, $diff)) { list($message, $diff) = $this->stripGitFormatPatch($diff); } $this->didStartParse($diff); // Strip off header comments. While `patch` allows comments anywhere in the // file, `git apply` is more strict. We get these comments in `hg export` // diffs, and Eclipse can also produce them. $line = $this->getLineTrimmed(); while (preg_match('/^#/', $line)) { $line = $this->nextLine(); } if ($message !== null && strlen($message)) { // If we found a message during pre-parse steps, add it to the resulting // changes here. $change = $this->buildChange(null) ->setType(ArcanistDiffChangeType::TYPE_MESSAGE) ->setMetadata('message', $message); } do { $patterns = array( // This is a normal SVN text change, probably from "svn diff". '(?PIndex): (?P.+)', // This is an SVN text change, probably from "svnlook diff". '(?PModified|Added|Deleted|Copied): (?P.+)', // This is an SVN property change, probably from "svn diff". '(?PProperty changes on): (?P.+)', // This is a git commit message, probably from "git show". '(?Pcommit) (?P[a-f0-9]+)(?: \(.*\))?', // This is a git diff, probably from "git show" or "git diff". // Note that the filenames may appear quoted. '(?Pdiff --git) (?P.*)', // RCS Diff '(?Prcsdiff -u) (?P.*)', // This is a unified diff, probably from "diff -u" or synthetic diffing. '(?P---) (?P.+)\s+\d{4}-\d{2}-\d{2}.*', '(?PBinary files|Files) '. '(?P.+)\s+\d{4}-\d{2}-\d{2} and '. '(?P.+)\s+\d{4}-\d{2}-\d{2} differ.*', // This is a normal Mercurial text change, probably from "hg diff". It // may have two "-r" blocks if it came from "hg diff -r x:y". '(?Pdiff -r) (?P[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P.+)', ); $line = $this->getLineTrimmed(); $match = null; $ok = $this->tryMatchHeader($patterns, $line, $match); $failed_parse = false; if (!$ok && $this->isFirstNonEmptyLine()) { // 'hg export' command creates so called "extended diff" that // contains some meta information and comment at the beginning // (isFirstNonEmptyLine() to check for beginning). Actual mercurial // code detects where comment ends and unified diff starts by // searching for "diff -r" or "diff --git" in the text. $this->saveLine(); $line = $this->nextLineThatLooksLikeDiffStart(); - if (!$this->tryMatchHeader($patterns, $line, $match)) { + if ($line === null + || !$this->tryMatchHeader($patterns, $line, $match)) { + // Restore line before guessing to display correct error. $this->restoreLine(); $failed_parse = true; } } else if (!$ok) { $failed_parse = true; } if ($failed_parse) { $this->didFailParse( pht( "Expected a hunk header, like '%s' (svn), '%s' (svn properties), ". "'%s' (git show), '%s' (git diff), '%s' (unified diff), or ". "'%s' (hg diff or patch).", 'Index: /path/to/file.ext', 'Property changes on: /path/to/file.ext', 'commit 59bcc3ad6775562f845953cf01624225', 'diff --git', '--- filename', 'diff -r')); } if (isset($match['type'])) { if ($match['type'] == 'diff --git') { $filename = self::extractGitCommonFilename($match['oldnew']); if ($filename !== null) { $match['old'] = $filename; $match['cur'] = $filename; } } } $change = $this->buildChange(idx($match, 'cur')); if (isset($match['old'])) { $change->setOldPath($match['old']); } if (isset($match['hash'])) { $change->setCommitHash($match['hash']); } if (isset($match['binary'])) { $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); $line = $this->nextNonemptyLine(); continue; } $line = $this->nextLine(); switch ($match['type']) { case 'Index': case 'Modified': case 'Added': case 'Deleted': case 'Copied': $this->parseIndexHunk($change); break; case 'Property changes on': $this->parsePropertyHunk($change); break; case 'diff --git': $this->setIsGit(true); $this->parseIndexHunk($change); break; case 'commit': $this->setIsGit(true); $this->parseCommitMessage($change); break; case '---': $ok = preg_match( '@^(?:\+\+\+) (.*)\s+\d{4}-\d{2}-\d{2}.*$@', $line, $match); if (!$ok) { $this->didFailParse(pht( "Expected '%s' in unified diff.", '+++ filename')); } $change->setCurrentPath($match[1]); $line = $this->nextLine(); $this->parseChangeset($change); break; case 'diff -r': $this->setIsMercurial(true); $this->parseIndexHunk($change); break; case 'rcsdiff -u': $this->isRCS = true; $this->parseIndexHunk($change); break; default: $this->didFailParse(pht('Unknown diff type.')); break; } } while ($this->getLine() !== null); $this->didFinishParse(); $this->loadSyntheticData(); return $this->changes; } protected function tryMatchHeader($patterns, $line, &$match) { foreach ($patterns as $pattern) { if (preg_match('@^'.$pattern.'$@', $line, $match)) { return true; } } return false; } protected function parseCommitMessage(ArcanistDiffChange $change) { $change->setType(ArcanistDiffChangeType::TYPE_MESSAGE); $message = array(); $line = $this->getLine(); if (preg_match('/^Merge: /', $line)) { $this->nextLine(); } $line = $this->getLine(); if (!preg_match('/^Author: /', $line)) { $this->didFailParse(pht("Expected 'Author:'.")); } $line = $this->nextLine(); if (!preg_match('/^Date: /', $line)) { $this->didFailParse(pht("Expected 'Date:'.")); } while (($line = $this->nextLineTrimmed()) !== null) { if (strlen($line) && $line[0] != ' ') { break; } // Strip leading spaces from Git commit messages. Note that empty lines // are represented as just "\n"; don't touch those. $message[] = preg_replace('/^ /', '', $this->getLine()); } $message = rtrim(implode('', $message), "\r\n"); $change->setMetadata('message', $message); } /** * Parse an SVN property change hunk. These hunks are ambiguous so just sort * of try to get it mostly right. It's entirely possible to foil this parser * (or any other parser) with a carefully constructed property change. */ protected function parsePropertyHunk(ArcanistDiffChange $change) { $line = $this->getLineTrimmed(); if (!preg_match('/^_+$/', $line)) { $this->didFailParse(pht("Expected '%s'.", '______________________')); } $line = $this->nextLine(); while ($line !== null) { $done = preg_match('/^(Index|Property changes on):/', $line); if ($done) { break; } // NOTE: Before 1.5, SVN uses "Name". At 1.5 and later, SVN uses // "Modified", "Added" and "Deleted". $matches = null; $ok = preg_match( '/^(Name|Modified|Added|Deleted): (.*)$/', $line, $matches); if (!$ok) { $this->didFailParse( pht("Expected 'Name', 'Added', 'Deleted', or 'Modified'.")); } $op = $matches[1]; $prop = $matches[2]; list($old, $new) = $this->parseSVNPropertyChange($op, $prop); if ($old !== null) { $change->setOldProperty($prop, $old); } if ($new !== null) { $change->setNewProperty($prop, $new); } $line = $this->getLine(); } } private function parseSVNPropertyChange($op, $prop) { $old = array(); $new = array(); $target = null; $line = $this->nextLine(); $prop_index = 2; while ($line !== null) { $done = preg_match( '/^(Modified|Added|Deleted|Index|Property changes on):/', $line); if ($done) { break; } $trimline = ltrim($line); if ($trimline && $trimline[0] == '#') { // in svn1.7, a line like ## -0,0 +1 ## is put between the Added: line // and the line with the property change. If we have such a line, we'll // just ignore it (: $line = $this->nextLine(); $prop_index = 1; $trimline = ltrim($line); } if ($trimline && $trimline[0] == '+') { if ($op == 'Deleted') { $this->didFailParse(pht( 'Unexpected "%s" section in property deletion.', '+')); } $target = 'new'; $line = substr($trimline, $prop_index); } else if ($trimline && $trimline[0] == '-') { if ($op == 'Added') { $this->didFailParse(pht( 'Unexpected "%s" section in property addition.', '-')); } $target = 'old'; $line = substr($trimline, $prop_index); } else if (!strncmp($trimline, 'Merged', 6)) { if ($op == 'Added') { $target = 'new'; } else { // These can appear on merges. No idea how to interpret this (unclear // what the old / new values are) and it's of dubious usefulness so // just throw it away until someone complains. $target = null; } $line = $trimline; } if ($target == 'new') { $new[] = $line; } else if ($target == 'old') { $old[] = $line; } $line = $this->nextLine(); } $old = rtrim(implode('', $old)); $new = rtrim(implode('', $new)); if (!strlen($old)) { $old = null; } if (!strlen($new)) { $new = null; } return array($old, $new); } protected function setIsGit($git) { if ($this->isGit !== null && $this->isGit != $git) { throw new Exception(pht('Git status has changed!')); } $this->isGit = $git; return $this; } protected function getIsGit() { return $this->isGit; } public function setIsMercurial($is_mercurial) { $this->isMercurial = $is_mercurial; return $this; } public function getIsMercurial() { return $this->isMercurial; } protected function parseIndexHunk(ArcanistDiffChange $change) { $is_git = $this->getIsGit(); $is_mercurial = $this->getIsMercurial(); $is_svn = (!$is_git && !$is_mercurial); $move_source = null; $line = $this->getLine(); if ($is_git) { do { $patterns = array( '(?Pnew) file mode (?P\d+)', '(?Pdeleted) file mode (?P\d+)', // These occur when someone uses `chmod` on a file. 'old mode (?P\d+)', 'new mode (?P\d+)', // These occur when you `mv` a file and git figures it out. 'similarity index ', 'rename from (?P.*)', '(?Prename) to (?P.*)', 'copy from (?P.*)', '(?Pcopy) to (?P.*)', ); $ok = false; $match = null; if ($line !== null) { foreach ($patterns as $pattern) { $ok = preg_match('@^'.$pattern.'@', $line, $match); if ($ok) { break; } } } if (!$ok) { if ($line === null || preg_match('/^(diff --git|commit) /', $line)) { // In this case, there are ONLY file mode changes, or this is a // pure move. If it's a move, flag these changesets so we can build // synthetic changes later, enabling us to show file contents in // Differential -- git only gives us a block like this: // // diff --git a/README b/READYOU // similarity index 100% // rename from README // rename to READYOU // // ...i.e., there is no associated diff. // This allows us to distinguish between property changes only // and actual moves. For property changes only, we can't currently // build a synthetic diff correctly, so just skip it. // TODO: Build synthetic diffs for property changes, too. if ($change->getType() != ArcanistDiffChangeType::TYPE_CHANGE) { $change->setNeedsSyntheticGitHunks(true); if ($move_source) { $move_source->setNeedsSyntheticGitHunks(true); } } return; } break; } if (!empty($match['oldmode'])) { $change->setOldProperty('unix:filemode', $match['oldmode']); } if (!empty($match['newmode'])) { $change->setNewProperty('unix:filemode', $match['newmode']); } if (!empty($match['deleted'])) { $change->setType(ArcanistDiffChangeType::TYPE_DELETE); } if (!empty($match['new'])) { // If you replace a symlink with a normal file, git renders the change // as a "delete" of the symlink plus an "add" of the new file. We // prefer to represent this as a change. if ($change->getType() == ArcanistDiffChangeType::TYPE_DELETE) { $change->setType(ArcanistDiffChangeType::TYPE_CHANGE); } else { $change->setType(ArcanistDiffChangeType::TYPE_ADD); } } if (!empty($match['old'])) { $match['old'] = self::unescapeFilename($match['old']); $change->setOldPath($match['old']); } if (!empty($match['cur'])) { $match['cur'] = self::unescapeFilename($match['cur']); $change->setCurrentPath($match['cur']); } if (!empty($match['copy'])) { $change->setType(ArcanistDiffChangeType::TYPE_COPY_HERE); $old = $this->buildChange($change->getOldPath()); $type = $old->getType(); if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else { $old->setType(ArcanistDiffChangeType::TYPE_COPY_AWAY); } $old->addAwayPath($change->getCurrentPath()); } if (!empty($match['move'])) { $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); $old = $this->buildChange($change->getOldPath()); $type = $old->getType(); if ($type == ArcanistDiffChangeType::TYPE_MULTICOPY) { // Great, no change. } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else if ($type == ArcanistDiffChangeType::TYPE_COPY_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else { $old->setType(ArcanistDiffChangeType::TYPE_MOVE_AWAY); } // We'll reference this above. $move_source = $old; $old->addAwayPath($change->getCurrentPath()); } $line = $this->nextNonemptyLine(); } while (true); } $line = $this->getLine(); if ($is_svn) { $ok = preg_match('/^=+\s*$/', $line); if (!$ok) { $this->didFailParse(pht( "Expected '%s' divider line.", '=======================')); } else { // Adding an empty file in SVN can produce an empty line here. $line = $this->nextNonemptyLine(); } } else if ($is_git) { $ok = preg_match('/^index .*$/', $line); if (!$ok) { // TODO: "hg diff -g" diffs ("mercurial git-style diffs") do not include // this line, so we can't parse them if we fail on it. Maybe introduce // a flag saying "parse this diff using relaxed git-style diff rules"? // $this->didFailParse("Expected 'index af23f...a98bc' header line."); } else { // NOTE: In the git case, where this patch is the last change in the // file, we may have a final terminal newline. Skip over it so that // we'll hit the '$line === null' block below. This is covered by the // 'git-empty-file.gitdiff' test case. $line = $this->nextNonemptyLine(); } } // If there are files with only whitespace changes and -b or -w are // supplied as command-line flags to `diff', svn and git both produce // changes without any body. if ($line === null || preg_match( '/^(Index:|Property changes on:|diff --git|commit) /', $line)) { return; } $is_binary_add = preg_match( '/^Cannot display: file marked as a binary type\.$/', rtrim($line)); if ($is_binary_add) { $this->nextLine(); // Cannot display: file marked as a binary type. $this->nextNonemptyLine(); // svn:mime-type = application/octet-stream $this->markBinary($change); return; } // We can get this in git, or in SVN when a file exists in the repository // WITHOUT a binary mime-type and is changed and given a binary mime-type. $is_binary_diff = preg_match( '/^(Binary files|Files) .* and .* differ$/', rtrim($line)); if ($is_binary_diff) { $this->nextNonemptyLine(); // Binary files x and y differ $this->markBinary($change); return; } // This occurs under "hg diff --git" when a binary file is removed. See // test case "hg-binary-delete.hgdiff". (I believe it never occurs under // git, which reports the "files X and /dev/null differ" string above. Git // can not apply these patches.) $is_hg_binary_delete = preg_match( '/^Binary file .* has changed$/', rtrim($line)); if ($is_hg_binary_delete) { $this->nextNonemptyLine(); $this->markBinary($change); return; } // With "git diff --binary" (not a normal mode, but one users may explicitly // invoke and then, e.g., copy-paste into the web console) or "hg diff // --git" (normal under hg workflows), we may encounter a literal binary // patch. $is_git_binary_patch = preg_match( '/^GIT binary patch$/', rtrim($line)); if ($is_git_binary_patch) { $this->nextLine(); $this->parseGitBinaryPatch(); $line = $this->getLine(); if ($line !== null && preg_match('/^literal/', $line)) { // We may have old/new binaries (change) or just a new binary (hg add). // If there are two blocks, parse both. $this->parseGitBinaryPatch(); } $this->markBinary($change); return; } if ($is_git) { // "git diff -b" ignores whitespace, but has an empty hunk target if (preg_match('@^diff --git .*$@', $line)) { $this->nextLine(); return null; } } if ($this->isRCS) { // Skip the RCS headers. $this->nextLine(); $this->nextLine(); $this->nextLine(); } $old_file = $this->parseHunkTarget(); $new_file = $this->parseHunkTarget(); if ($this->isRCS) { $change->setCurrentPath($new_file); } $change->setOldPath($old_file); $this->parseChangeset($change); } private function parseGitBinaryPatch() { // TODO: We could decode the patches, but it's a giant mess so don't bother // for now. We'll pick up the data from the working copy in the common // case ("arc diff"). $line = $this->getLine(); if (!preg_match('/^literal /', $line)) { $this->didFailParse( pht("Expected '%s' to start git binary patch.", 'literal NNNN')); } do { $line = $this->nextLineTrimmed(); if ($line === '' || $line === null) { // Some versions of Mercurial apparently omit the terminal newline, // although it's unclear if Git will ever do this. In either case, // rely on the base85 check for sanity. $this->nextNonemptyLine(); return; } else if (!preg_match('/^[a-zA-Z]/', $line)) { $this->didFailParse( pht('Expected base85 line length character (a-zA-Z).')); } } while (true); } protected function parseHunkTarget() { $line = $this->getLine(); $matches = null; $remainder = '(?:\s*\(.*\))?'; if ($this->getIsMercurial()) { // Something like "Fri Aug 26 01:20:50 2005 -0700", don't bother trying // to parse it. $remainder = '\t.*'; } else if ($this->isRCS) { $remainder = '\s.*'; } else if ($this->getIsGit()) { // When filenames contain spaces, Git terminates this line with a tab. // Normally, the tab is not present. If there's a tab, ignore it. $remainder = '(?:\t.*)?'; } $ok = preg_match( '@^[-+]{3} (?:[ab]/)?(?P.*?)'.$remainder.'$@', $line, $matches); if (!$ok) { $this->didFailParse( pht( "Expected hunk target '%s'.", '+++ path/to/file.ext (revision N)')); } $this->nextLine(); return $matches['path']; } protected function markBinary(ArcanistDiffChange $change) { $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); return $this; } protected function parseChangeset(ArcanistDiffChange $change) { // If a diff includes two sets of changes to the same file, let the // second one win. In particular, this occurs when adding subdirectories // in Subversion that contain files: the file text will be present in // both the directory diff and the file diff. See T5555. Dropping the // hunks lets whichever one shows up later win instead of showing changes // twice. $change->dropHunks(); $all_changes = array(); do { $hunk = new ArcanistDiffHunk(); $line = $this->getLineTrimmed(); $real = array(); // In the case where only one line is changed, the length is omitted. // The final group is for git, which appends a guess at the function // context to the diff. $matches = null; $ok = preg_match( '/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?: .*?)?$/U', $line, $matches); if (!$ok) { // It's possible we hit the style of an svn1.7 property change. // This is a 4-line Index block, followed by an empty line, followed // by a "Property changes on:" section similar to svn1.6. if ($line == '') { $line = $this->nextNonemptyLine(); $ok = preg_match('/^Property changes on:/', $line); if (!$ok) { $this->didFailParse(pht('Confused by empty line')); } $line = $this->nextLine(); return $this->parsePropertyHunk($change); } $this->didFailParse(pht( "Expected hunk header '%s'.", '@@ -NN,NN +NN,NN @@')); } $hunk->setOldOffset($matches[1]); $hunk->setNewOffset($matches[3]); // Cover for the cases where length wasn't present (implying one line). $old_len = idx($matches, 2, ''); if (!strlen($old_len)) { $old_len = 1; } $new_len = idx($matches, 4, ''); if (!strlen($new_len)) { $new_len = 1; } $hunk->setOldLength($old_len); $hunk->setNewLength($new_len); $add = 0; $del = 0; $hit_next_hunk = false; while ((($line = $this->nextLine()) !== null)) { if (strlen(rtrim($line, "\r\n"))) { $char = $line[0]; } else { // Normally, we do not encouter empty lines in diffs, because // unchanged lines have an initial space. However, in Git, with // the option `diff.suppress-blank-empty` set, unchanged blank lines // emit as completely empty. If we encounter a completely empty line, // treat it as a ' ' (i.e., unchanged empty line) line. $char = ' '; } switch ($char) { case '\\': if (!preg_match('@\\ No newline at end of file@', $line)) { $this->didFailParse( pht("Expected '\ No newline at end of file'.")); } if ($new_len) { $real[] = $line; $hunk->setIsMissingOldNewline(true); } else { $real[] = $line; $hunk->setIsMissingNewNewline(true); } if (!$new_len) { break 2; } break; case '+': ++$add; --$new_len; $real[] = $line; break; case '-': if (!$old_len) { // In this case, we've hit "---" from a new file. So don't // advance the line cursor. $hit_next_hunk = true; break 2; } ++$del; --$old_len; $real[] = $line; break; case ' ': if (!$old_len && !$new_len) { break 2; } --$old_len; --$new_len; $real[] = $line; break; default: // We hit something, likely another hunk. $hit_next_hunk = true; break 2; } } if ($old_len || $new_len) { $this->didFailParse(pht('Found the wrong number of hunk lines.')); } $corpus = implode('', $real); $is_binary = false; if ($this->detectBinaryFiles) { $is_binary = !phutil_is_utf8($corpus); $try_encoding = $this->tryEncoding; if ($is_binary && $try_encoding) { $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus); if (!$is_binary) { $corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding); if (!phutil_is_utf8($corpus)) { throw new Exception( pht( "Failed to convert a hunk from '%s' to UTF-8. ". "Check that the specified encoding is correct.", $try_encoding)); } } } } if ($is_binary) { // SVN happily treats binary files which aren't marked with the right // mime type as text files. Detect that junk here and mark the file // binary. We'll catch stuff with unicode too, but that's verboten // anyway. If there are too many false positives with this we might // need to make it threshold-triggered instead of triggering on any // unprintable byte. $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); } else { $hunk->setCorpus($corpus); $hunk->setAddLines($add); $hunk->setDelLines($del); $change->addHunk($hunk); } if (!$hit_next_hunk) { $line = $this->nextNonemptyLine(); } } while (($line !== null) && preg_match('/^@@ /', $line)); } protected function buildChange($path = null) { $change = null; if ($path !== null) { if (!empty($this->changes[$path])) { return $this->changes[$path]; } } if ($this->forcePath) { return $this->changes[$this->forcePath]; } $change = new ArcanistDiffChange(); if ($path !== null) { $change->setCurrentPath($path); $this->changes[$path] = $change; } else { $this->changes[] = $change; } return $change; } protected function didStartParse($text) { $this->rawDiff = $text; // Eat leading whitespace. This may happen if the first change in the diff // is an SVN property change. $text = ltrim($text); // Try to strip ANSI color codes from colorized diffs. ANSI color codes // might be present in two cases: // // - You piped a colorized diff into 'arc --raw' or similar (normally // we're able to disable colorization on diffs we control the generation // of). // - You're diffing a file which actually contains ANSI color codes. // // The former is vastly more likely, but we try to distinguish between the // two cases by testing for a color code at the beginning of a line. If // we find one, we know it's a colorized diff (since the beginning of the // line should be "+", "-" or " " if the code is in the diff text). // // While it's possible a diff might be colorized and fail this test, it's // unlikely, and it covers hg's color extension which seems to be the most // stubborn about colorizing text despite stdout not being a TTY. // // We might incorrectly strip color codes from a colorized diff of a text // file with color codes inside it, but this case is stupid and pathological // and you've dug your own grave. $ansi_color_pattern = '\x1B\[[\d;]*m'; if (preg_match('/^'.$ansi_color_pattern.'/m', $text)) { $text = preg_replace('/'.$ansi_color_pattern.'/', '', $text); } $this->text = phutil_split_lines($text); $this->line = 0; } protected function getLine() { if ($this->text === null) { throw new Exception(pht('Not parsing!')); } if (isset($this->text[$this->line])) { return $this->text[$this->line]; } return null; } protected function getLineTrimmed() { $line = $this->getLine(); if ($line !== null) { $line = trim($line, "\r\n"); } return $line; } protected function nextLine() { $this->line++; return $this->getLine(); } protected function nextLineTrimmed() { $line = $this->nextLine(); if ($line !== null) { $line = trim($line, "\r\n"); } return $line; } protected function nextNonemptyLine() { while (($line = $this->nextLine()) !== null) { if (strlen(trim($line)) !== 0) { break; } } return $this->getLine(); } protected function nextLineThatLooksLikeDiffStart() { while (($line = $this->nextLine()) !== null) { if (preg_match('/^\s*diff\s+-(?:r|-git)/', $line)) { break; } } return $this->getLine(); } protected function saveLine() { $this->lineSaved = $this->line; } protected function restoreLine() { $this->line = $this->lineSaved; } protected function isFirstNonEmptyLine() { $len = count($this->text); for ($ii = 0; $ii < $len; $ii++) { $line = $this->text[$ii]; if (!strlen(trim($line))) { // This line is empty, skip it. continue; } if (preg_match('/^#/', $line)) { // This line is a comment, skip it. continue; } return ($ii == $this->line); } // Entire file is empty. return false; } protected function didFinishParse() { $this->text = null; } public function setWriteDiffOnFailure($write) { $this->writeDiffOnFailure = $write; return $this; } protected function didFailParse($message) { $context = 5; $min = max(0, $this->line - $context); $max = min($this->line + $context, count($this->text) - 1); $context = ''; for ($ii = $min; $ii <= $max; $ii++) { $context .= sprintf( '%8.8s %6.6s %s', ($ii == $this->line) ? '>>> ' : '', $ii + 1, $this->text[$ii]); } $out = array(); $out[] = pht('Diff Parse Exception: %s', $message); if ($this->writeDiffOnFailure) { $temp = new TempFile(); $temp->setPreserveFile(true); Filesystem::writeFile($temp, $this->rawDiff); $out[] = pht('Raw input file was written to: %s', $temp); } $out[] = $context; $out = implode("\n\n", $out); throw new Exception($out); } /** * Unescape escaped filenames, e.g. from "git diff". */ private static function unescapeFilename($name) { if (preg_match('/^".+"$/', $name)) { return stripcslashes(substr($name, 1, -1)); } else { return $name; } } private function loadSyntheticData() { if (!$this->changes) { return; } $repository_api = $this->repositoryAPI; if (!$repository_api) { return; } $imagechanges = array(); $changes = $this->changes; foreach ($changes as $change) { $path = $change->getCurrentPath(); // Certain types of changes (moves and copies) don't contain change data // when expressed in raw "git diff" form. Augment any such diffs with // textual data. if ($change->getNeedsSyntheticGitHunks() && ($repository_api instanceof ArcanistGitAPI)) { $diff = $repository_api->getRawDiffText($path, $moves = false); // NOTE: We're reusing the parser and it doesn't reset change state // between parses because there's an oddball SVN workflow in Phabricator // which relies on being able to inject changes. // TODO: Fix this. $parser = clone $this; $parser->setChanges(array()); $raw_changes = $parser->parseDiff($diff); foreach ($raw_changes as $raw_change) { if ($raw_change->getCurrentPath() == $path) { $change->setFileType($raw_change->getFileType()); foreach ($raw_change->getHunks() as $hunk) { // Git thinks that this file has been added. But we know that it // has been moved or copied without a change. $hunk->setCorpus( preg_replace('/^\+/m', ' ', $hunk->getCorpus())); $change->addHunk($hunk); } break; } } $change->setNeedsSyntheticGitHunks(false); } if ($change->getFileType() != ArcanistDiffChangeType::FILE_BINARY && $change->getFileType() != ArcanistDiffChangeType::FILE_IMAGE) { continue; } $imagechanges[$path] = $change; } // Fetch the actual file contents in batches so repositories // that have slow random file accesses (i.e. mercurial) can // optimize the retrieval. $paths = array_keys($imagechanges); $filedata = $repository_api->getBulkOriginalFileData($paths); foreach ($filedata as $path => $data) { $imagechanges[$path]->setOriginalFileData($data); } $filedata = $repository_api->getBulkCurrentFileData($paths); foreach ($filedata as $path => $data) { $imagechanges[$path]->setCurrentFileData($data); } $this->changes = $changes; } /** * Extracts the common filename from two strings with differing path * prefixes as found after `diff --git`. These strings may be * quoted; if so, the filename is returned unescaped. The prefixes * default to "a/" and "b/", but may be any string -- or may be * entierly absent. This function may return "null" if the hunk * represents a file move or copy, and with pathological renames may * return an incorrect value. Such cases are expected to be * recovered by later rename detection codepaths. * * @param string Text from a diff line after "diff --git ". * @return string Filename being altered, or null for a rename. */ public static function extractGitCommonFilename($paths) { $matches = null; $paths = rtrim($paths, "\r\n"); // Try the exact same string twice in a row separated by a // space, with an optional prefix. This can hit a false // positive for moves from files like "old file old" to "file", // but such a cases will be caught by the "rename from" / // "rename to" lines. $prefix = '(?:[^/]+/)?'; $pattern = "@^(?P(?P\"?){$prefix}(?P.+)\\k)" ." " ."(?P(?P\"?){$prefix}\\k\\k)$@"; if (!preg_match($pattern, $paths, $matches)) { // A rename or some form; return null for now, and let the // "rename from" / "rename to" lines fix it up. return null; } // Use the common subpart. There may be ambiguity here: "src/file // dst/file" may _either_ be a prefix-less move, or a change with // two custom prefixes. We assume it is the latter; if it is a // rename, diff parsing will update based on the "rename from" / // "rename to" lines. // This re-assembles with the differing prefixes removed, but the // quoting from the original. Necessary so we know if we should // unescape characters from the common string. $new = $matches['newq'].$matches['common'].$matches['newq']; $new = self::unescapeFilename($new); return $new; } /** * Strip the header and footer off a `git-format-patch` diff. * * Returns a parseable normal diff and a textual commit message. */ private function stripGitFormatPatch($diff) { // We can parse this by splitting it into two pieces over and over again // along different section dividers: // // 1. Mail headers. // 2. ("\n\n") // 3. Mail body. // 4. ("---") // 5. Diff stat section. // 6. ("\n\n") // 7. Actual diff body. // 8. ("--") // 9. Patch footer. list($head, $tail) = preg_split('/^---$/m', $diff, 2); list($mail_headers, $mail_body) = explode("\n\n", $head, 2); list($body, $foot) = preg_split('/^-- ?$/m', $tail, 2); list($stat, $diff) = explode("\n\n", $body, 2); // Rebuild the commit message by putting the subject line back on top of it, // if we can find one. $matches = null; $pattern = '/^Subject: (?:\[PATCH\] )?(.*)$/mi'; if (preg_match($pattern, $mail_headers, $matches)) { $mail_body = $matches[1]."\n\n".$mail_body; $mail_body = rtrim($mail_body); } return array($mail_body, $diff); } } diff --git a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php b/src/parser/xhpast/__tests__/PHPASTParserTestCase.php index 45f64221..38dda194 100644 --- a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php +++ b/src/parser/xhpast/__tests__/PHPASTParserTestCase.php @@ -1,330 +1,330 @@ assertSkipped( pht('%s is not built or not up to date.', 'xhpast')); } } $dir = dirname(__FILE__).'/data/'; foreach (Filesystem::listDirectory($dir) as $file) { if (preg_match('/\.test$/', $file)) { $this->executeParserTest($file, $dir.$file); } } } private function executeParserTest($name, $file) { $contents = Filesystem::readFile($file); $contents = preg_split('/^~{4,}\n/m', $contents); if (count($contents) < 2) { throw new Exception( pht( "Expected '%s' separating test case and results.", '~~~~~~~~~~')); } list($data, $options, $expect) = array_merge($contents, array(null)); $options = id(new PhutilSimpleOptions())->parse($options); $type = null; foreach ($options as $key => $value) { switch ($key) { case 'pass': case 'fail-syntax': case 'fail-parse': if ($type !== null) { throw new Exception( pht( 'Test file "%s" unexpectedly specifies multiple expected '. 'test outcomes.', $name)); } $type = $key; break; case 'comment': // Human readable comment providing test case information. break; case 'rtrim': // Allows construction of tests which rely on EOF without newlines. $data = rtrim($data); break; default: throw new Exception( pht( 'Test file "%s" has unknown option "%s" in its options '. 'string.', $name, $key)); } } if ($type === null) { throw new Exception( pht( 'Test file "%s" does not specify a test result (like "pass") in '. 'its options string.', $name)); } $future = PhutilXHPASTBinary::getParserFuture($data); list($err, $stdout, $stderr) = $future->resolve(); switch ($type) { case 'pass': $this->assertEqual(0, $err, pht('Exit code for "%s".', $name)); - if (!strlen($expect)) { + if ($expect === null || !strlen($expect)) { // If there's no "expect" data in the test case, that's OK. break; } try { $stdout = phutil_json_decode($stdout); } catch (PhutilJSONParserException $ex) { throw new PhutilProxyException( pht( 'Output for test file "%s" is not valid JSON.', $name), $ex); } $stdout_nice = $this->newReadableAST($stdout, $data); $this->assertEqual( $expect, $stdout_nice, pht('Parser output for "%s".', $name)); break; case 'fail-syntax': $this->assertEqual(1, $err, pht('Exit code for "%s".', $name)); $this->assertTrue( (bool)preg_match('/syntax error/', $stderr), pht('Expect "syntax error" in stderr or "%s".', $name)); break; default: throw new Exception( pht( 'Unknown PHPAST parser test type "%s"!', $type)); } } /** * Build a human-readable, stable, relatively diff-able string representing * an AST (both the node tree itself and the accompanying token stream) for * use in unit tests. */ private function newReadableAST(array $data, $source) { $tree = new XHPASTTree($data['tree'], $data['stream'], $source); $root = $tree->getRootNode(); $depth = 0; $list = $this->newReadableTreeLines($root, $depth); return implode('', $list); } private function newReadableTreeLines(AASTNode $node, $depth) { $out = array(); try { $type_name = $node->getTypeName(); } catch (Exception $ex) { $type_name = sprintf('', $node->getTypeID()); } $out[] = $this->newBlock($depth, '*', $type_name); $tokens = $node->getTokens(); if ($tokens) { $l = head_key($tokens); $r = last_key($tokens); } else { $l = null; $r = null; } $items = array(); $child_token_map = array(); $children = $node->getChildren(); foreach ($children as $child) { $child_tokens = $child->getTokens(); if ($child_tokens) { $child_l = head_key($child_tokens); $child_r = last_key($child_tokens); } else { $child_l = null; $child_r = null; } if ($l !== null) { for ($ii = $l; $ii < $child_l; $ii++) { $items[] = $tokens[$ii]; } } $items[] = $child; if ($child_r !== null) { // NOTE: In some cases, child nodes do not appear in token order. // That is, the 4th child of a node may use tokens that appear // between children 2 and 3. Ideally, we wouldn't have cases of // this and wouldn't have a positional AST. // Work around this by: never moving the token cursor backwards; and // explicitly preventing tokens appearing in any child from being // printed at top level. for ($ii = $child_l; $ii <= $child_r; $ii++) { if (!isset($tokens[$ii])) { continue; } $child_token_map[$tokens[$ii]->getTokenID()] = true; } $l = max($l, $child_r + 1); } else { $l = null; } } if ($l !== null) { for ($ii = $l; $ii <= $r; $ii++) { $items[] = $tokens[$ii]; } } // See above. If we have tokens in the list which are part of a // child node that appears later, remove them now. foreach ($items as $key => $item) { if ($item instanceof AASTToken) { $token = $item; $token_id = $token->getTokenID(); if (isset($child_token_map[$token_id])) { unset($items[$key]); } } } foreach ($items as $item) { if ($item instanceof AASTNode) { $lines = $this->newReadableTreeLines($item, $depth + 1); foreach ($lines as $line) { $out[] = $line; } } else { $token_value = $item->getValue(); $out[] = $this->newBlock($depth + 1, '>', $token_value); } } return $out; } private function newBlock($depth, $type, $text) { $output_width = 80; $usable_width = ($output_width - $depth - 2); $must_escape = false; // We must escape the text if it isn't just simple printable characters. if (preg_match('/[ \\\\\\r\\n\\t\\"]/', $text)) { $must_escape = true; } // We must escape the text if it has trailing whitespace. if (preg_match('/ \z/', $text)) { $must_escape = true; } // We must escape the text if it won't fit on a single line. if (strlen($text) > $usable_width) { $must_escape = true; } if (!$must_escape) { $lines = array($text); } else { $vector = phutil_utf8v_combined($text); $escape_map = array( "\r" => '\\r', "\n" => '\\n', "\t" => '\\t', '"' => '\\"', '\\' => '\\', ); $escaped = array(); foreach ($vector as $key => $word) { if (isset($escape_map[$word])) { $vector[$key] = $escape_map[$word]; } } $line_l = '"'; $line_r = '"'; $max_width = ($usable_width - strlen($line_l) - strlen($line_r)); $line = ''; $len = 0; $lines = array(); foreach ($vector as $word) { $word_length = phutil_utf8_console_strlen($word); if ($len + $word_length > $max_width) { $lines[] = $line_l.$line.$line_r; $line = ''; $len = 0; } $line .= $word; $len += $word_length; } $lines[] = $line_l.$line.$line_r; } $is_first = true; $indent = str_repeat(' ', $depth); $output = array(); foreach ($lines as $line) { if ($is_first) { $marker = $type; $is_first = false; } else { $marker = '.'; } $output[] = sprintf( "%s%s %s\n", $indent, $marker, $line); } return implode('', $output); } } diff --git a/src/parser/xhpast/api/XHPASTNode.php b/src/parser/xhpast/api/XHPASTNode.php index 0f685c5e..82dba48f 100644 --- a/src/parser/xhpast/api/XHPASTNode.php +++ b/src/parser/xhpast/api/XHPASTNode.php @@ -1,342 +1,342 @@ getTypeName(), array( 'n_STRING_SCALAR', 'n_NUMERIC_SCALAR', )); } public function getDocblockToken() { if ($this->l == -1) { return null; } $tokens = $this->tree->getRawTokenStream(); for ($ii = $this->l - 1; $ii >= 0; $ii--) { if ($tokens[$ii]->getTypeName() == 'T_DOC_COMMENT') { return $tokens[$ii]; } if (!$tokens[$ii]->isAnyWhitespace()) { return null; } } return null; } public function evalStatic() { switch ($this->getTypeName()) { case 'n_STATEMENT': return $this->getChildByIndex(0)->evalStatic(); break; case 'n_STRING_SCALAR': return phutil_string_cast($this->getStringLiteralValue()); case 'n_HEREDOC': return phutil_string_cast($this->getStringLiteralValue()); case 'n_NUMERIC_SCALAR': $value = $this->getSemanticString(); if (preg_match('/^0x/i', $value)) { // Hex $value = base_convert(substr($value, 2), 16, 10); } else if (preg_match('/^0\d+$/i', $value)) { // Octal $value = base_convert(substr($value, 1), 8, 10); } return +$value; case 'n_SYMBOL_NAME': $value = $this->getSemanticString(); if ($value == 'INF') { return INF; } switch (strtolower($value)) { case 'true': return true; case 'false': return false; case 'null': return null; default: throw new Exception(pht('Unrecognized symbol name.')); } break; case 'n_UNARY_PREFIX_EXPRESSION': $operator = $this->getChildOfType(0, 'n_OPERATOR'); $operand = $this->getChildByIndex(1); switch ($operator->getSemanticString()) { case '-': return -$operand->evalStatic(); break; case '+': return $operand->evalStatic(); break; default: throw new Exception( pht('Unexpected operator in static expression.')); } break; case 'n_ARRAY_LITERAL': $result = array(); $values = $this->getChildOfType(0, 'n_ARRAY_VALUE_LIST'); foreach ($values->getChildren() as $child) { $key = $child->getChildByIndex(0); $val = $child->getChildByIndex(1); if ($key->getTypeName() == 'n_EMPTY') { $result[] = $val->evalStatic(); } else { $result[$key->evalStatic()] = $val->evalStatic(); } } return $result; case 'n_CONCATENATION_LIST': $result = ''; foreach ($this->getChildren() as $child) { if ($child->getTypeName() == 'n_OPERATOR') { continue; } $result .= $child->evalStatic(); } return $result; default: throw new Exception( pht( 'Unexpected node during static evaluation, of type: %s', $this->getTypeName())); } } public function isConstantString() { return $this->checkIsConstantString(); } public function isConstantStringWithMagicConstants() { return $this->checkIsConstantString(array('n_MAGIC_SCALAR')); } private function checkIsConstantString(array $additional_types = array()) { switch ($this->getTypeName()) { case 'n_HEREDOC': case 'n_STRING_SCALAR': return !$this->getStringVariables(); case 'n_CONCATENATION_LIST': foreach ($this->getChildren() as $child) { if ($child->getTypeName() == 'n_OPERATOR') { continue; } if (!$child->checkIsConstantString($additional_types)) { return false; } } return true; default: if (in_array($this->getTypeName(), $additional_types)) { return true; } return false; } } public function getStringVariables() { $value = $this->getConcreteString(); switch ($this->getTypeName()) { case 'n_HEREDOC': if (preg_match("/^<<<\s*'/", $value)) { // Nowdoc: <<<'EOT' return array(); } break; case 'n_STRING_SCALAR': if ($value[0] == "'") { return array(); } break; default: throw new Exception(pht('Unexpected type %s.', $this->getTypeName())); } // We extract just the variable names and ignore properties and array keys. $re = '/\\\\.|(\$|\{\$|\${)([a-z_\x7F-\xFF][a-z0-9_\x7F-\xFF]*)/i'; $matches = null; preg_match_all($re, $value, $matches, PREG_OFFSET_CAPTURE); // NOTE: The result format for this construction changed in PHP 7.4. // See T13518. $names = $matches[2]; foreach ($names as $name_idx => $name_match) { if ($name_match === '') { unset($names[$name_idx]); continue; } if ($name_match[1] === -1) { unset($names[$name_idx]); continue; } } $names = ipull($names, 0, 1); return $names; } public function getStringLiteralValue() { $type_name = $this->getTypeName(); if ($type_name === 'n_HEREDOC') { $value = $this->getSemanticString(); $value = phutil_split_lines($value); $value = array_slice($value, 1, -1); $value = implode('', $value); // Strip the final newline from value, this isn't part of the string // literal. $value = preg_replace('/(\r|\n|\r\n)\z/', '', $value); return $this->newStringLiteralFromSemanticString($value); } if ($type_name === 'n_STRING_SCALAR') { $value = $this->getSemanticString(); $type = $value[0]; $value = preg_replace('/^b?[\'"]|[\'"]$/i', '', $value); if ($type == "'") { // Single quoted strings treat everything as a literal except "\\" and // "\'". return str_replace( array('\\\\', '\\\''), array('\\', "'"), $value); } return $this->newStringLiteralFromSemanticString($value); } return null; } private function newStringLiteralFromSemanticString($value) { // Double quoted strings treat "\X" as a literal if X isn't specifically // a character which needs to be escaped -- e.g., "\q" and "\'" are // literally "\q" and "\'". stripcslashes() is too aggressive, so find // all these under-escaped backslashes and escape them. $len = strlen($value); $esc = false; $out = ''; for ($ii = 0; $ii < $len; $ii++) { $c = $value[$ii]; if ($esc) { $esc = false; switch ($c) { case 'x': - $u = isset($value[$ii + 1]) ? $value[$ii + 1] : null; + $u = isset($value[$ii + 1]) ? $value[$ii + 1] : ''; if (!preg_match('/^[a-f0-9]/i', $u)) { // PHP treats \x followed by anything which is not a hex digit // as a literal \x. $out .= '\\\\'.$c; break; } /* fallthrough */ case 'n': case 'r': case 'f': case 'v': case '"': case '$': case 't': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': $out .= '\\'.$c; break; case 'e': // Since PHP 5.4.0, this means "esc". However, stripcslashes() does // not perform this conversion. $out .= chr(27); break; default: $out .= '\\\\'.$c; break; } } else if ($c == '\\') { $esc = true; } else { $out .= $c; } } return stripcslashes($out); } /** * Determines the parent namespace for a node. * * Traverses the AST upwards from a given node in order to determine the * namespace in which the node is declared. * * To prevent any possible ambiguity, the returned namespace will always be * prefixed with the namespace separator. * * @param XHPASTNode The input node. * @return string|null The namespace which contains the input node, or * `null` if no such node exists. */ public function getNamespace() { $namespaces = $this ->getTree() ->getRootNode() ->selectDescendantsOfType('n_NAMESPACE') ->getRawNodes(); foreach (array_reverse($namespaces) as $namespace) { if ($namespace->isAfter($this)) { continue; } $body = $namespace->getChildByIndex(1); if ($body->getTypeName() != 'n_EMPTY') { if (!$body->containsDescendant($this)) { continue; } } return $namespace->getNamespaceName(); } return null; } /** * Returns the namespace name from a node of type `n_NAMESPACE`. * * @return string|null */ private function getNamespaceName() { if ($this->getTypeName() != 'n_NAMESPACE') { return null; } $namespace_name = $this->getChildByIndex(0); if ($namespace_name->getTypeName() == 'n_EMPTY') { return null; } return '\\'.$namespace_name->getConcreteString(); } } diff --git a/src/utils/utils.php b/src/utils/utils.php index 3bcaab02..990cf70b 100644 --- a/src/utils/utils.php +++ b/src/utils/utils.php @@ -1,2221 +1,2225 @@ doStuff(); * * ...but this works fine: * * id(new Thing())->doStuff(); * * @param wild Anything. * @return wild Unmodified argument. */ function id($x) { return $x; } /** * Access an array index, retrieving the value stored there if it exists or * a default if it does not. This function allows you to concisely access an * index which may or may not exist without raising a warning. * * @param array Array to access. * @param scalar Index to access in the array. * @param wild Default value to return if the key is not present in the * array. * @return wild If `$array[$key]` exists, that value is returned. If not, * $default is returned without raising a warning. */ function idx(array $array, $key, $default = null) { // isset() is a micro-optimization - it is fast but fails for null values. if (isset($array[$key])) { return $array[$key]; } // Comparing $default is also a micro-optimization. if ($default === null || array_key_exists($key, $array)) { return null; } return $default; } /** * Access a sequence of array indexes, retrieving a deeply nested value if * it exists or a default if it does not. * * For example, `idxv($dict, array('a', 'b', 'c'))` accesses the key at * `$dict['a']['b']['c']`, if it exists. If it does not, or any intermediate * value is not itself an array, it returns the defualt value. * * @param array Array to access. * @param list List of keys to access, in sequence. * @param wild Default value to return. * @return wild Accessed value, or default if the value is not accessible. */ function idxv(array $map, array $path, $default = null) { if (!$path) { return $default; } $last = last($path); $path = array_slice($path, 0, -1); $cursor = $map; foreach ($path as $key) { $cursor = idx($cursor, $key); if (!is_array($cursor)) { return $default; } } return idx($cursor, $last, $default); } /** * Call a method on a list of objects. Short for "method pull", this function * works just like @{function:ipull}, except that it operates on a list of * objects instead of a list of arrays. This function simplifies a common type * of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $key => $object) { * $names[$key] = $object->getName(); * } * * You can express this more concisely with mpull(): * * $names = mpull($objects, 'getName'); * * mpull() takes a third argument, which allows you to do the same but for * the array's keys: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $object) { * $names[$object->getID()] = $object->getName(); * } * * This is the mpull version(): * * $names = mpull($objects, 'getName', 'getID'); * * If you pass ##null## as the second argument, the objects will be preserved: * * COUNTEREXAMPLE * $id_map = array(); * foreach ($objects as $object) { * $id_map[$object->getID()] = $object; * } * * With mpull(): * * $id_map = mpull($objects, null, 'getID'); * * See also @{function:ipull}, which works similarly but accesses array indexes * instead of calling methods. * * @param list Some list of objects. * @param string|null Determines which **values** will appear in the result * array. Use a string like 'getName' to store the * value of calling the named method in each value, or * ##null## to preserve the original objects. * @param string|null Determines how **keys** will be assigned in the result * array. Use a string like 'getID' to use the result * of calling the named method as each object's key, or * `null` to preserve the original keys. * @return dict A dictionary with keys and values derived according * to whatever you passed as `$method` and `$key_method`. */ function mpull(array $list, $method, $key_method = null) { $result = array(); foreach ($list as $key => $object) { if ($key_method !== null) { $key = $object->$key_method(); } if ($method !== null) { $value = $object->$method(); } else { $value = $object; } $result[$key] = $value; } return $result; } /** * Access a property on a list of objects. Short for "property pull", this * function works just like @{function:mpull}, except that it accesses object * properties instead of methods. This function simplifies a common type of * mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $key => $object) { * $names[$key] = $object->name; * } * * You can express this more concisely with ppull(): * * $names = ppull($objects, 'name'); * * ppull() takes a third argument, which allows you to do the same but for * the array's keys: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $object) { * $names[$object->id] = $object->name; * } * * This is the ppull version(): * * $names = ppull($objects, 'name', 'id'); * * If you pass ##null## as the second argument, the objects will be preserved: * * COUNTEREXAMPLE * $id_map = array(); * foreach ($objects as $object) { * $id_map[$object->id] = $object; * } * * With ppull(): * * $id_map = ppull($objects, null, 'id'); * * See also @{function:mpull}, which works similarly but calls object methods * instead of accessing object properties. * * @param list Some list of objects. * @param string|null Determines which **values** will appear in the result * array. Use a string like 'name' to store the value of * accessing the named property in each value, or * `null` to preserve the original objects. * @param string|null Determines how **keys** will be assigned in the result * array. Use a string like 'id' to use the result of * accessing the named property as each object's key, or * `null` to preserve the original keys. * @return dict A dictionary with keys and values derived according * to whatever you passed as `$property` and * `$key_property`. */ function ppull(array $list, $property, $key_property = null) { $result = array(); foreach ($list as $key => $object) { if ($key_property !== null) { $key = $object->$key_property; } if ($property !== null) { $value = $object->$property; } else { $value = $object; } $result[$key] = $value; } return $result; } /** * Choose an index from a list of arrays. Short for "index pull", this function * works just like @{function:mpull}, except that it operates on a list of * arrays and selects an index from them instead of operating on a list of * objects and calling a method on them. * * This function simplifies a common type of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($list as $key => $dict) { * $names[$key] = $dict['name']; * } * * With ipull(): * * $names = ipull($list, 'name'); * * See @{function:mpull} for more usage examples. * * @param list Some list of arrays. * @param scalar|null Determines which **values** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the arrays unmodified as * values. * @param scalar|null Determines which **keys** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the array keys. * @return dict A dictionary with keys and values derived according * to whatever you passed for `$index` and `$key_index`. */ function ipull(array $list, $index, $key_index = null) { $result = array(); foreach ($list as $key => $array) { if ($key_index !== null) { $key = $array[$key_index]; } if ($index !== null) { $value = $array[$index]; } else { $value = $array; } $result[$key] = $value; } return $result; } /** * Group a list of objects by the result of some method, similar to how * GROUP BY works in an SQL query. This function simplifies grouping objects * by some property: * * COUNTEREXAMPLE * $animals_by_species = array(); * foreach ($animals as $animal) { * $animals_by_species[$animal->getSpecies()][] = $animal; * } * * This can be expressed more tersely with mgroup(): * * $animals_by_species = mgroup($animals, 'getSpecies'); * * In either case, the result is a dictionary which maps species (e.g., like * "dog") to lists of animals with that property, so all the dogs are grouped * together and all the cats are grouped together, or whatever super * businessesey thing is actually happening in your problem domain. * * See also @{function:igroup}, which works the same way but operates on * array indexes. * * @param list List of objects to group by some property. * @param string Name of a method, like 'getType', to call on each object * in order to determine which group it should be placed into. * @param ... Zero or more additional method names, to subgroup the * groups. * @return dict Dictionary mapping distinct method returns to lists of * all objects which returned that value. */ function mgroup(array $list, $by /* , ... */) { $map = mpull($list, $by); $groups = array(); foreach ($map as $group) { // Can't array_fill_keys() here because 'false' gets encoded wrong. $groups[$group] = array(); } foreach ($map as $key => $group) { $groups[$group][$key] = $list[$key]; } $args = func_get_args(); $args = array_slice($args, 2); if ($args) { array_unshift($args, null); foreach ($groups as $group_key => $grouped) { $args[0] = $grouped; $groups[$group_key] = call_user_func_array('mgroup', $args); } } return $groups; } /** * Group a list of arrays by the value of some index. This function is the same * as @{function:mgroup}, except it operates on the values of array indexes * rather than the return values of method calls. * * @param list List of arrays to group by some index value. * @param string Name of an index to select from each array in order to * determine which group it should be placed into. * @param ... Zero or more additional indexes names, to subgroup the * groups. * @return dict Dictionary mapping distinct index values to lists of * all objects which had that value at the index. */ function igroup(array $list, $by /* , ... */) { $map = ipull($list, $by); $groups = array(); foreach ($map as $group) { $groups[$group] = array(); } foreach ($map as $key => $group) { $groups[$group][$key] = $list[$key]; } $args = func_get_args(); $args = array_slice($args, 2); if ($args) { array_unshift($args, null); foreach ($groups as $group_key => $grouped) { $args[0] = $grouped; $groups[$group_key] = call_user_func_array('igroup', $args); } } return $groups; } /** * Sort a list of objects by the return value of some method. In PHP, this is * often vastly more efficient than `usort()` and similar. * * // Sort a list of Duck objects by name. * $sorted = msort($ducks, 'getName'); * * It is usually significantly more efficient to define an ordering method * on objects and call `msort()` than to write a comparator. It is often more * convenient, as well. * * NOTE: This method does not take the list by reference; it returns a new list. * * @param list List of objects to sort by some property. * @param string Name of a method to call on each object; the return values * will be used to sort the list. * @return list Objects ordered by the return values of the method calls. */ function msort(array $list, $method) { $surrogate = mpull($list, $method); // See T13303. A "PhutilSortVector" is technically a sortable object, so // a method which returns a "PhutilSortVector" is suitable for use with // "msort()". However, it's almost certain that the caller intended to use // "msortv()", not "msort()", and forgot to add a "v". Treat this as an error. if ($surrogate) { $item = head($surrogate); if ($item instanceof PhutilSortVector) { throw new Exception( pht( 'msort() was passed a method ("%s") which returns '. '"PhutilSortVector" objects. Use "msortv()", not "msort()", to '. 'sort a list which produces vectors.', $method)); } } asort($surrogate); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Sort a list of objects by a sort vector. * * This sort is stable, well-behaved, and more efficient than `usort()`. * * @param list List of objects to sort. * @param string Name of a method to call on each object. The method must * return a @{class:PhutilSortVector}. * @return list Objects ordered by the vectors. */ function msortv(array $list, $method) { return msortv_internal($list, $method, SORT_STRING); } function msortv_natural(array $list, $method) { return msortv_internal($list, $method, SORT_NATURAL | SORT_FLAG_CASE); } function msortv_internal(array $list, $method, $flags) { $surrogate = mpull($list, $method); $index = 0; foreach ($surrogate as $key => $value) { if (!($value instanceof PhutilSortVector)) { throw new Exception( pht( 'Objects passed to "%s" must return sort vectors (objects of '. 'class "%s") from the specified method ("%s"). One object (with '. 'key "%s") did not.', 'msortv()', 'PhutilSortVector', $method, $key)); } // Add the original index to keep the sort stable. $value->addInt($index++); $surrogate[$key] = (string)$value; } asort($surrogate, $flags); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Sort a list of arrays by the value of some index. This method is identical to * @{function:msort}, but operates on a list of arrays instead of a list of * objects. * * @param list List of arrays to sort by some index value. * @param string Index to access on each object; the return values * will be used to sort the list. * @return list Arrays ordered by the index values. */ function isort(array $list, $index) { $surrogate = ipull($list, $index); asort($surrogate); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Filter a list of objects by executing a method across all the objects and * filter out the ones with empty() results. this function works just like * @{function:ifilter}, except that it operates on a list of objects instead * of a list of arrays. * * For example, to remove all objects with no children from a list, where * 'hasChildren' is a method name, do this: * * mfilter($list, 'hasChildren'); * * The optional third parameter allows you to negate the operation and filter * out nonempty objects. To remove all objects that DO have children, do this: * * mfilter($list, 'hasChildren', true); * * @param array List of objects to filter. * @param string A method name. * @param bool Optionally, pass true to drop objects which pass the * filter instead of keeping them. * @return array List of objects which pass the filter. */ function mfilter(array $list, $method, $negate = false) { if (!is_string($method)) { throw new InvalidArgumentException(pht('Argument method is not a string.')); } $result = array(); foreach ($list as $key => $object) { $value = $object->$method(); if (!$negate) { if (!empty($value)) { $result[$key] = $object; } } else { if (empty($value)) { $result[$key] = $object; } } } return $result; } /** * Filter a list of arrays by removing the ones with an empty() value for some * index. This function works just like @{function:mfilter}, except that it * operates on a list of arrays instead of a list of objects. * * For example, to remove all arrays without value for key 'username', do this: * * ifilter($list, 'username'); * * The optional third parameter allows you to negate the operation and filter * out nonempty arrays. To remove all arrays that DO have value for key * 'username', do this: * * ifilter($list, 'username', true); * * @param array List of arrays to filter. * @param scalar The index. * @param bool Optionally, pass true to drop arrays which pass the * filter instead of keeping them. * @return array List of arrays which pass the filter. */ function ifilter(array $list, $index, $negate = false) { if (!is_scalar($index)) { throw new InvalidArgumentException(pht('Argument index is not a scalar.')); } $result = array(); if (!$negate) { foreach ($list as $key => $array) { if (!empty($array[$index])) { $result[$key] = $array; } } } else { foreach ($list as $key => $array) { if (empty($array[$index])) { $result[$key] = $array; } } } return $result; } /** * Selects a list of keys from an array, returning a new array with only the * key-value pairs identified by the selected keys, in the specified order. * * Note that since this function orders keys in the result according to the * order they appear in the list of keys, there are effectively two common * uses: either reducing a large dictionary to a smaller one, or changing the * key order on an existing dictionary. * * @param dict Dictionary of key-value pairs to select from. * @param list List of keys to select. * @return dict Dictionary of only those key-value pairs where the key was * present in the list of keys to select. Ordering is * determined by the list order. */ function array_select_keys(array $dict, array $keys) { $result = array(); foreach ($keys as $key) { if (array_key_exists($key, $dict)) { $result[$key] = $dict[$key]; } } return $result; } /** * Checks if all values of array are instances of the passed class. Throws * `InvalidArgumentException` if it isn't true for any value. * * @param array * @param string Name of the class or 'array' to check arrays. * @return array Returns passed array. */ function assert_instances_of(array $arr, $class) { $is_array = !strcasecmp($class, 'array'); foreach ($arr as $key => $object) { if ($is_array) { if (!is_array($object)) { $given = gettype($object); throw new InvalidArgumentException( pht( "Array item with key '%s' must be of type array, %s given.", $key, $given)); } } else if (!($object instanceof $class)) { $given = gettype($object); if (is_object($object)) { $given = pht('instance of %s', get_class($object)); } throw new InvalidArgumentException( pht( "Array item with key '%s' must be an instance of %s, %s given.", $key, $class, $given)); } } return $arr; } /** * Assert that two arrays have the exact same keys, in any order. * * @param map Array with expected keys. * @param map Array with actual keys. * @return void */ function assert_same_keys(array $expect, array $actual) { foreach ($expect as $key => $value) { if (isset($actual[$key]) || array_key_exists($key, $actual)) { continue; } throw new InvalidArgumentException( pht( 'Expected to find key "%s", but it is not present.', $key)); } foreach ($actual as $key => $value) { if (isset($expect[$key]) || array_key_exists($key, $expect)) { continue; } throw new InvalidArgumentException( pht( 'Found unexpected surplus key "%s" where no such key was expected.', $key)); } } /** * Assert that passed data can be converted to string. * * @param string Assert that this data is valid. * @return void * * @task assert */ function assert_stringlike($parameter) { switch (gettype($parameter)) { case 'string': case 'NULL': case 'boolean': case 'double': case 'integer': return; case 'object': if (method_exists($parameter, '__toString')) { return; } break; case 'array': case 'resource': case 'unknown type': default: break; } throw new InvalidArgumentException( pht( 'Argument must be scalar or object which implements %s!', '__toString()')); } /** * Returns the first argument which is not strictly null, or `null` if there * are no such arguments. Identical to the MySQL function of the same name. * * @param ... Zero or more arguments of any type. * @return mixed First non-`null` arg, or null if no such arg exists. */ function coalesce(/* ... */) { $args = func_get_args(); foreach ($args as $arg) { if ($arg !== null) { return $arg; } } return null; } /** * Similar to @{function:coalesce}, but less strict: returns the first * non-`empty()` argument, instead of the first argument that is strictly * non-`null`. If no argument is nonempty, it returns the last argument. This * is useful idiomatically for setting defaults: * * $display_name = nonempty($user_name, $full_name, "Anonymous"); * * @param ... Zero or more arguments of any type. * @return mixed First non-`empty()` arg, or last arg if no such arg * exists, or null if you passed in zero args. */ function nonempty(/* ... */) { $args = func_get_args(); $result = null; foreach ($args as $arg) { $result = $arg; if ($arg) { break; } } return $result; } /** * Invokes the "new" operator with a vector of arguments. There is no way to * `call_user_func_array()` on a class constructor, so you can instead use this * function: * * $obj = newv($class_name, $argv); * * That is, these two statements are equivalent: * * $pancake = new Pancake('Blueberry', 'Maple Syrup', true); * $pancake = newv('Pancake', array('Blueberry', 'Maple Syrup', true)); * * DO NOT solve this problem in other, more creative ways! Three popular * alternatives are: * * - Build a fake serialized object and unserialize it. * - Invoke the constructor twice. * - just use `eval()` lol * * These are really bad solutions to the problem because they can have side * effects (e.g., __wakeup()) and give you an object in an otherwise impossible * state. Please endeavor to keep your objects in possible states. * * If you own the classes you're doing this for, you should consider whether * or not restructuring your code (for instance, by creating static * construction methods) might make it cleaner before using `newv()`. Static * constructors can be invoked with `call_user_func_array()`, and may give your * class a cleaner and more descriptive API. * * @param string The name of a class. * @param list Array of arguments to pass to its constructor. * @return obj A new object of the specified class, constructed by passing * the argument vector to its constructor. */ function newv($class_name, array $argv) { $reflector = new ReflectionClass($class_name); if ($argv) { return $reflector->newInstanceArgs($argv); } else { return $reflector->newInstance(); } } /** * Returns the first element of an array. Exactly like reset(), but doesn't * choke if you pass it some non-referenceable value like the return value of * a function. * * @param array Array to retrieve the first element from. * @return wild The first value of the array. */ function head(array $arr) { return reset($arr); } /** * Returns the last element of an array. This is exactly like `end()` except * that it won't warn you if you pass some non-referencable array to * it -- e.g., the result of some other array operation. * * @param array Array to retrieve the last element from. * @return wild The last value of the array. */ function last(array $arr) { return end($arr); } /** * Returns the first key of an array. * * @param array Array to retrieve the first key from. * @return int|string The first key of the array. */ function head_key(array $arr) { reset($arr); return key($arr); } /** * Returns the last key of an array. * * @param array Array to retrieve the last key from. * @return int|string The last key of the array. */ function last_key(array $arr) { end($arr); return key($arr); } /** * Merge a vector of arrays performantly. This has the same semantics as * array_merge(), so these calls are equivalent: * * array_merge($a, $b, $c); * array_mergev(array($a, $b, $c)); * * However, when you have a vector of arrays, it is vastly more performant to * merge them with this function than by calling array_merge() in a loop, * because using a loop generates an intermediary array on each iteration. * * @param list Vector of arrays to merge. * @return list Arrays, merged with array_merge() semantics. */ function array_mergev(array $arrayv) { if (!$arrayv) { return array(); } foreach ($arrayv as $key => $item) { if (!is_array($item)) { throw new InvalidArgumentException( pht( 'Expected all items passed to "array_mergev()" to be arrays, but '. 'argument with key "%s" has type "%s".', $key, gettype($item))); } } // See T13588. In PHP8, "call_user_func_array()" will attempt to use // "unnatural" array keys as named parameters, and then fail because // "array_merge()" does not accept named parameters . Guarantee the list is // a "natural" list to avoid this. $arrayv = array_values($arrayv); return call_user_func_array('array_merge', $arrayv); } /** * Split a corpus of text into lines. This function splits on "\n", "\r\n", or * a mixture of any of them. * * NOTE: This function does not treat "\r" on its own as a newline because none * of SVN, Git or Mercurial do on any OS. * * @param string Block of text to be split into lines. * @param bool If true, retain line endings in result strings. * @return list List of lines. * * @phutil-external-symbol class PhutilSafeHTML * @phutil-external-symbol function phutil_safe_html */ function phutil_split_lines($corpus, $retain_endings = true) { if (!strlen($corpus)) { return array(''); } // Split on "\r\n" or "\n". if ($retain_endings) { $lines = preg_split('/(?<=\n)/', $corpus); } else { $lines = preg_split('/\r?\n/', $corpus); } // If the text ends with "\n" or similar, we'll end up with an empty string // at the end; discard it. if (end($lines) == '') { array_pop($lines); } if ($corpus instanceof PhutilSafeHTML) { foreach ($lines as $key => $line) { $lines[$key] = phutil_safe_html($line); } return $lines; } return $lines; } /** * Simplifies a common use of `array_combine()`. Specifically, this: * * COUNTEREXAMPLE: * if ($list) { * $result = array_combine($list, $list); * } else { * // Prior to PHP 5.4, array_combine() failed if given empty arrays. * $result = array(); * } * * ...is equivalent to this: * * $result = array_fuse($list); * * @param list List of scalars. * @return dict Dictionary with inputs mapped to themselves. */ function array_fuse(array $list) { if ($list) { return array_combine($list, $list); } return array(); } /** * Add an element between every two elements of some array. That is, given a * list `A, B, C, D`, and some element to interleave, `x`, this function returns * `A, x, B, x, C, x, D`. This works like `implode()`, but does not concatenate * the list into a string. In particular: * * implode('', array_interleave($x, $list)); * * ...is equivalent to: * * implode($x, $list); * * This function does not preserve keys. * * @param wild Element to interleave. * @param list List of elements to be interleaved. * @return list Original list with the new element interleaved. */ function array_interleave($interleave, array $array) { $result = array(); foreach ($array as $item) { $result[] = $item; $result[] = $interleave; } array_pop($result); return $result; } function phutil_is_windows() { // We can also use PHP_OS, but that's kind of sketchy because it returns // "WINNT" for Windows 7 and "Darwin" for Mac OS X. Practically, testing for // DIRECTORY_SEPARATOR is more straightforward. return (DIRECTORY_SEPARATOR != '/'); } function phutil_is_hiphop_runtime() { return (array_key_exists('HPHP', $_ENV) && $_ENV['HPHP'] === 1); } /** * Converts a string to a loggable one, with unprintables and newlines escaped. * * @param string Any string. * @return string String with control and newline characters escaped, suitable * for printing on a single log line. */ function phutil_loggable_string($string) { if (preg_match('/^[\x20-\x7E]+$/', $string)) { return $string; } $result = ''; static $c_map = array( '\\' => '\\\\', "\n" => '\\n', "\r" => '\\r', "\t" => '\\t', ); $len = strlen($string); for ($ii = 0; $ii < $len; $ii++) { $c = $string[$ii]; if (isset($c_map[$c])) { $result .= $c_map[$c]; } else { $o = ord($c); if ($o < 0x20 || $o >= 0x7F) { $result .= '\\x'.sprintf('%02X', $o); } else { $result .= $c; } } } return $result; } /** * Perform an `fwrite()` which distinguishes between EAGAIN and EPIPE. * * PHP's `fwrite()` is broken, and never returns `false` for writes to broken * nonblocking pipes: it always returns 0, and provides no straightforward * mechanism for distinguishing between EAGAIN (buffer is full, can't write any * more right now) and EPIPE or similar (no write will ever succeed). * * See: https://bugs.php.net/bug.php?id=39598 * * If you call this method instead of `fwrite()`, it will attempt to detect * when a zero-length write is caused by EAGAIN and return `0` only if the * write really should be retried. * * @param resource Socket or pipe stream. * @param string Bytes to write. * @return bool|int Number of bytes written, or `false` on any error (including * errors which `fwrite()` can not detect, like a broken pipe). */ function phutil_fwrite_nonblocking_stream($stream, $bytes) { if (!strlen($bytes)) { return 0; } $result = @fwrite($stream, $bytes); if ($result !== 0) { // In cases where some bytes are witten (`$result > 0`) or // an error occurs (`$result === false`), the behavior of fwrite() is // correct. We can return the value as-is. return $result; } // If we make it here, we performed a 0-length write. Try to distinguish // between EAGAIN and EPIPE. To do this, we're going to `stream_select()` // the stream, write to it again if PHP claims that it's writable, and // consider the pipe broken if the write fails. // (Signals received during the "fwrite()" do not appear to affect anything, // see D20083.) $read = array(); $write = array($stream); $except = array(); $result = @stream_select($read, $write, $except, 0); if ($result === false) { // See T13243. If the select is interrupted by a signal, it may return // "false" indicating an underlying EINTR condition. In this case, the // results (notably, "$write") are not usable because "stream_select()" // didn't update them. // In this case, treat this stream as blocked and tell the caller to // retry, since EINTR is the only condition we're currently aware of that // can cause "fwrite()" to return "0" and "stream_select()" to return // "false" on the same stream. return 0; } if (!$write) { // The stream isn't writable, so we conclude that it probably really is // blocked and the underlying error was EAGAIN. Return 0 to indicate that // no data could be written yet. return 0; } // If we make it here, PHP **just** claimed that this stream is writable, so // perform a write. If the write also fails, conclude that these failures are // EPIPE or some other permanent failure. $result = @fwrite($stream, $bytes); if ($result !== 0) { // The write worked or failed explicitly. This value is fine to return. return $result; } // We performed a 0-length write, were told that the stream was writable, and // then immediately performed another 0-length write. Conclude that the pipe // is broken and return `false`. return false; } /** * Convert a human-readable unit description into a numeric one. This function * allows you to replace this: * * COUNTEREXAMPLE * $ttl = (60 * 60 * 24 * 30); // 30 days * * ...with this: * * $ttl = phutil_units('30 days in seconds'); * * ...which is self-documenting and difficult to make a mistake with. * * @param string Human readable description of a unit quantity. * @return int Quantity of specified unit. */ function phutil_units($description) { $matches = null; if (!preg_match('/^(\d+) (\w+) in (\w+)$/', $description, $matches)) { throw new InvalidArgumentException( pht( 'Unable to parse unit specification (expected a specification in the '. 'form "%s"): %s', '5 days in seconds', $description)); } $quantity = (int)$matches[1]; $src_unit = $matches[2]; $dst_unit = $matches[3]; $is_divisor = false; switch ($dst_unit) { case 'seconds': switch ($src_unit) { case 'second': case 'seconds': $factor = 1; break; case 'minute': case 'minutes': $factor = 60; break; case 'hour': case 'hours': $factor = 60 * 60; break; case 'day': case 'days': $factor = 60 * 60 * 24; break; default: throw new InvalidArgumentException( pht( 'This function can not convert from the unit "%s".', $src_unit)); } break; case 'bytes': switch ($src_unit) { case 'byte': case 'bytes': $factor = 1; break; case 'bit': case 'bits': $factor = 8; $is_divisor = true; break; default: throw new InvalidArgumentException( pht( 'This function can not convert from the unit "%s".', $src_unit)); } break; case 'milliseconds': switch ($src_unit) { case 'second': case 'seconds': $factor = 1000; break; case 'minute': case 'minutes': $factor = 1000 * 60; break; case 'hour': case 'hours': $factor = 1000 * 60 * 60; break; case 'day': case 'days': $factor = 1000 * 60 * 60 * 24; break; default: throw new InvalidArgumentException( pht( 'This function can not convert from the unit "%s".', $src_unit)); } break; case 'microseconds': switch ($src_unit) { case 'second': case 'seconds': $factor = 1000000; break; case 'minute': case 'minutes': $factor = 1000000 * 60; break; case 'hour': case 'hours': $factor = 1000000 * 60 * 60; break; case 'day': case 'days': $factor = 1000000 * 60 * 60 * 24; break; default: throw new InvalidArgumentException( pht( 'This function can not convert from the unit "%s".', $src_unit)); } break; default: throw new InvalidArgumentException( pht( 'This function can not convert into the unit "%s".', $dst_unit)); } if ($is_divisor) { if ($quantity % $factor) { throw new InvalidArgumentException( pht( '"%s" is not an exact quantity.', $description)); } return (int)($quantity / $factor); } else { return $quantity * $factor; } } /** * Compute the number of microseconds that have elapsed since an earlier * timestamp (from `microtime(true)`). * * @param double Microsecond-precision timestamp, from `microtime(true)`. * @return int Elapsed microseconds. */ function phutil_microseconds_since($timestamp) { if (!is_float($timestamp)) { throw new Exception( pht( 'Argument to "phutil_microseconds_since(...)" should be a value '. 'returned from "microtime(true)".')); } $delta = (microtime(true) - $timestamp); $delta = 1000000 * $delta; $delta = (int)$delta; return $delta; } /** * Decode a JSON dictionary. * * @param string A string which ostensibly contains a JSON-encoded list or * dictionary. * @return mixed Decoded list/dictionary. */ function phutil_json_decode($string) { - $result = @json_decode($string, true); + if ($string === null) { + throw new PhutilJSONParserException(pht('Value "null" is not a valid JSON '. + 'encoded object.')); + } + $result = @json_decode($string, true); if (!is_array($result)) { // Failed to decode the JSON. Try to use @{class:PhutilJSONParser} instead. // This will probably fail, but will throw a useful exception. $parser = new PhutilJSONParser(); $result = $parser->parse($string); } return $result; } /** * Encode a value in JSON, raising an exception if it can not be encoded. * * @param wild A value to encode. * @return string JSON representation of the value. */ function phutil_json_encode($value) { $result = @json_encode($value); if ($result === false) { $reason = phutil_validate_json($value); if (function_exists('json_last_error')) { $err = json_last_error(); if (function_exists('json_last_error_msg')) { $msg = json_last_error_msg(); $extra = pht('#%d: %s', $err, $msg); } else { $extra = pht('#%d', $err); } } else { $extra = null; } if ($extra) { $message = pht( 'Failed to JSON encode value (%s): %s.', $extra, $reason); } else { $message = pht( 'Failed to JSON encode value: %s.', $reason); } throw new Exception($message); } return $result; } /** * Produce a human-readable explanation why a value can not be JSON-encoded. * * @param wild Value to validate. * @param string Path within the object to provide context. * @return string|null Explanation of why it can't be encoded, or null. */ function phutil_validate_json($value, $path = '') { if ($value === null) { return; } if ($value === true) { return; } if ($value === false) { return; } if (is_int($value)) { return; } if (is_float($value)) { return; } if (is_array($value)) { foreach ($value as $key => $subvalue) { if (strlen($path)) { $full_key = $path.' > '; } else { $full_key = ''; } if (!phutil_is_utf8($key)) { $full_key = $full_key.phutil_utf8ize($key); return pht( 'Dictionary key "%s" is not valid UTF8, and cannot be JSON encoded.', $full_key); } $full_key .= $key; $result = phutil_validate_json($subvalue, $full_key); if ($result !== null) { return $result; } } } if (is_string($value)) { if (!phutil_is_utf8($value)) { $display = substr($value, 0, 256); $display = phutil_utf8ize($display); if (!strlen($path)) { return pht( 'String value is not valid UTF8, and can not be JSON encoded: %s', $display); } else { return pht( 'Dictionary value at key "%s" is not valid UTF8, and cannot be '. 'JSON encoded: %s', $path, $display); } } } return; } /** * Decode an INI string. * * @param string * @return mixed */ function phutil_ini_decode($string) { $results = null; $trap = new PhutilErrorTrap(); try { $have_call = false; if (function_exists('parse_ini_string')) { if (defined('INI_SCANNER_RAW')) { $results = @parse_ini_string($string, true, INI_SCANNER_RAW); $have_call = true; } } if (!$have_call) { throw new PhutilMethodNotImplementedException( pht( '%s is not compatible with your version of PHP (%s). This function '. 'is only supported on PHP versions newer than 5.3.0.', __FUNCTION__, phpversion())); } if ($results === false) { throw new PhutilINIParserException(trim($trap->getErrorsAsString())); } foreach ($results as $section => $result) { if (!is_array($result)) { // We JSON decode the value in ordering to perform the following // conversions: // // - `'true'` => `true` // - `'false'` => `false` // - `'123'` => `123` // - `'1.234'` => `1.234` // $result = json_decode($result, true); if ($result !== null && !is_array($result)) { $results[$section] = $result; } continue; } foreach ($result as $key => $value) { $value = json_decode($value, true); if ($value !== null && !is_array($value)) { $results[$section][$key] = $value; } } } } catch (Exception $ex) { $trap->destroy(); throw $ex; } $trap->destroy(); return $results; } /** * Attempt to censor any plaintext credentials from a string. * * The major use case here is to censor usernames and passwords from command * output. For example, when `git fetch` fails, the output includes credentials * for authenticated HTTP remotes. * * @param string Some block of text. * @return string A similar block of text, but with credentials that could * be identified censored. */ function phutil_censor_credentials($string) { return preg_replace(',(?<=://)([^/@\s]+)(?=@|$),', '********', $string); } /** * Returns a parsable string representation of a variable. * * This function is intended to behave similarly to PHP's `var_export` function, * but the output is intended to follow our style conventions. * * @param wild The variable you want to export. * @return string */ function phutil_var_export($var) { // `var_export(null, true)` returns `"NULL"` (in uppercase). if ($var === null) { return 'null'; } // PHP's `var_export` doesn't format arrays very nicely. In particular: // // - An empty array is split over two lines (`"array (\n)"`). // - A space separates "array" and the first opening brace. // - Non-associative arrays are returned as associative arrays with an // integer key. // if (is_array($var)) { if (count($var) === 0) { return 'array()'; } // Don't show keys for non-associative arrays. $show_keys = !phutil_is_natural_list($var); $output = array(); $output[] = 'array('; foreach ($var as $key => $value) { // Adjust the indentation of the value. $value = str_replace("\n", "\n ", phutil_var_export($value)); $output[] = ' '. ($show_keys ? var_export($key, true).' => ' : ''). $value.','; } $output[] = ')'; return implode("\n", $output); } // Let PHP handle everything else. return var_export($var, true); } /** * An improved version of `fnmatch`. * * @param string A glob pattern. * @param string A path. * @return bool */ function phutil_fnmatch($glob, $path) { // Modify the glob to allow `**/` to match files in the root directory. $glob = preg_replace('@(?:(? Dictionary of parameters. * @return string HTTP query string. */ function phutil_build_http_querystring(array $parameters) { $pairs = array(); foreach ($parameters as $key => $value) { $pairs[] = array($key, $value); } return phutil_build_http_querystring_from_pairs($pairs); } /** * Build a query string from a list of parameter pairs. * * @param list> List of pairs. * @return string HTTP query string. */ function phutil_build_http_querystring_from_pairs(array $pairs) { // We want to encode in RFC3986 mode, but "http_build_query()" did not get // a flag for that mode until PHP 5.4.0. This is equivalent to calling // "http_build_query()" with the "PHP_QUERY_RFC3986" flag. $query = array(); foreach ($pairs as $pair_key => $pair) { if (!is_array($pair) || (count($pair) !== 2)) { throw new Exception( pht( 'HTTP parameter pair (with key "%s") is not valid: each pair must '. 'be an array with exactly two elements.', $pair_key)); } list($key, $value) = $pair; list($key, $value) = phutil_http_parameter_pair($key, $value); $query[] = rawurlencode($key).'='.rawurlencode($value); } $query = implode('&', $query); return $query; } /** * Typecheck and cast an HTTP key-value parameter pair. * * Scalar values are converted to strings. Nonscalar values raise exceptions. * * @param scalar HTTP parameter key. * @param scalar HTTP parameter value. * @return pair Key and value as strings. */ function phutil_http_parameter_pair($key, $value) { try { assert_stringlike($key); } catch (InvalidArgumentException $ex) { throw new PhutilProxyException( pht('HTTP query parameter key must be a scalar.'), $ex); } $key = phutil_string_cast($key); try { assert_stringlike($value); } catch (InvalidArgumentException $ex) { throw new PhutilProxyException( pht( 'HTTP query parameter value (for key "%s") must be a scalar.', $key), $ex); } $value = phutil_string_cast($value); return array($key, $value); } function phutil_decode_mime_header($header) { if (function_exists('iconv_mime_decode')) { return iconv_mime_decode($header, 0, 'UTF-8'); } if (function_exists('mb_decode_mimeheader')) { return mb_decode_mimeheader($header); } throw new Exception( pht( 'Unable to decode MIME header: install "iconv" or "mbstring" '. 'extension.')); } /** * Perform a "(string)" cast without disabling standard exception behavior. * * When PHP invokes "__toString()" automatically, it fatals if the method * raises an exception. In older versions of PHP (until PHP 7.1), this fatal is * fairly opaque and does not give you any information about the exception * itself, although newer versions of PHP at least include the exception * message. * * This is documented on the "__toString()" manual page: * * Warning * You cannot throw an exception from within a __toString() method. Doing * so will result in a fatal error. * * However, this only applies to implicit invocation by the language runtime. * Application code can safely call `__toString()` directly without any effect * on exception handling behavior. Very cool. * * We also reject arrays. PHP casts them to the string "Array". This behavior * is, charitably, evil. * * @param wild Any value which aspires to be represented as a string. * @return string String representation of the provided value. */ function phutil_string_cast($value) { if (is_array($value)) { throw new Exception( pht( 'Value passed to "phutil_string_cast()" is an array; arrays can '. 'not be sensibly cast to strings.')); } if (is_object($value)) { $string = $value->__toString(); if (!is_string($string)) { throw new Exception( pht( 'Object (of class "%s") did not return a string from "__toString()".', get_class($value))); } return $string; } return (string)$value; } /** * Return a short, human-readable description of an object's type. * * This is mostly useful for raising errors like "expected x() to return a Y, * but it returned a Z". * * This is similar to "get_type()", but describes objects and arrays in more * detail. * * @param wild Anything. * @return string Human-readable description of the value's type. */ function phutil_describe_type($value) { return PhutilTypeSpec::getTypeOf($value); } /** * Test if a list has the natural numbers (1, 2, 3, and so on) as keys, in * order. * * @return bool True if the list is a natural list. */ function phutil_is_natural_list(array $list) { $expect = 0; foreach ($list as $key => $item) { if ($key !== $expect) { return false; } $expect++; } return true; } /** * Escape text for inclusion in a URI or a query parameter. Note that this * method does NOT escape '/', because "%2F" is invalid in paths and Apache * will automatically 404 the page if it's present. This will produce correct * (the URIs will work) and desirable (the URIs will be readable) behavior in * these cases: * * '/path/?param='.phutil_escape_uri($string); # OK: Query Parameter * '/path/to/'.phutil_escape_uri($string); # OK: URI Suffix * * It will potentially produce the WRONG behavior in this special case: * * COUNTEREXAMPLE * '/path/to/'.phutil_escape_uri($string).'/thing/'; # BAD: URI Infix * * In this case, any '/' characters in the string will not be escaped, so you * will not be able to distinguish between the string and the suffix (unless * you have more information, like you know the format of the suffix). For infix * URI components, use @{function:phutil_escape_uri_path_component} instead. * * @param string Some string. * @return string URI encoded string, except for '/'. */ function phutil_escape_uri($string) { return str_replace('%2F', '/', rawurlencode($string)); } /** * Escape text for inclusion as an infix URI substring. See discussion at * @{function:phutil_escape_uri}. This function covers an unusual special case; * @{function:phutil_escape_uri} is usually the correct function to use. * * This function will escape a string into a format which is safe to put into * a URI path and which does not contain '/' so it can be correctly parsed when * embedded as a URI infix component. * * However, you MUST decode the string with * @{function:phutil_unescape_uri_path_component} before it can be used in the * application. * * @param string Some string. * @return string URI encoded string that is safe for infix composition. */ function phutil_escape_uri_path_component($string) { return rawurlencode(rawurlencode($string)); } /** * Unescape text that was escaped by * @{function:phutil_escape_uri_path_component}. See * @{function:phutil_escape_uri} for discussion. * * Note that this function is NOT the inverse of * @{function:phutil_escape_uri_path_component}! It undoes additional escaping * which is added to survive the implied unescaping performed by the webserver * when interpreting the request. * * @param string Some string emitted * from @{function:phutil_escape_uri_path_component} and * then accessed via a web server. * @return string Original string. */ function phutil_unescape_uri_path_component($string) { return rawurldecode($string); } function phutil_is_noninteractive() { if (function_exists('posix_isatty') && !posix_isatty(STDIN)) { return true; } return false; } function phutil_is_interactive() { if (function_exists('posix_isatty') && posix_isatty(STDIN)) { return true; } return false; } function phutil_encode_log($message) { return addcslashes($message, "\0..\37\\\177..\377"); } /** * Insert a value in between each pair of elements in a list. * * Keys in the input list are preserved. */ function phutil_glue(array $list, $glue) { if (!$list) { return $list; } $last_key = last_key($list); $keys = array(); $values = array(); $tmp = $list; foreach ($list as $key => $ignored) { $keys[] = $key; if ($key !== $last_key) { $tmp[] = $glue; $keys[] = last_key($tmp); } } return array_select_keys($tmp, $keys); } function phutil_partition(array $map) { $partitions = array(); $partition = array(); $is_first = true; $partition_value = null; foreach ($map as $key => $value) { if (!$is_first) { if ($partition_value === $value) { $partition[$key] = $value; continue; } $partitions[] = $partition; } $is_first = false; $partition = array($key => $value); $partition_value = $value; } if ($partition) { $partitions[] = $partition; } return $partitions; } function phutil_preg_match( $pattern, $subject, $flags = 0, $offset = 0) { $matches = null; $result = @preg_match($pattern, $subject, $matches, $flags, $offset); if ($result === false || $result === null) { phutil_raise_preg_exception( 'preg_match', array( $pattern, $subject, $matches, $flags, $offset, )); } return $matches; } function phutil_preg_match_all( $pattern, $subject, $flags = 0, $offset = 0) { $matches = null; $result = @preg_match_all($pattern, $subject, $matches, $flags, $offset); if ($result === false || $result === null) { phutil_raise_preg_exception( 'preg_match_all', array( $pattern, $subject, $matches, $flags, $offset, )); } return $matches; } function phutil_raise_preg_exception($function, array $argv) { $trap = new PhutilErrorTrap(); // NOTE: This ugly construction to avoid issues with reference behavior when // passing values through "call_user_func_array()". switch ($function) { case 'preg_match': @preg_match($argv[0], $argv[1], $argv[2], $argv[3], $argv[4]); break; case 'preg_match_all': @preg_match_all($argv[0], $argv[1], $argv[2], $argv[3], $argv[4]); break; } $error_message = $trap->getErrorsAsString(); $trap->destroy(); $pattern = $argv[0]; $pattern_display = sprintf( '"%s"', addcslashes($pattern, '\\\"')); $message = array(); $message[] = pht( 'Call to %s(%s, ...) failed.', $function, $pattern_display); if (strlen($error_message)) { $message[] = pht( 'Regular expression engine emitted message: %s', $error_message); } $message = implode("\n\n", $message); throw new PhutilRegexException($message); } /** * Test if a value is a nonempty string. * * The value "null" and the empty string are considered empty; all other * strings are considered nonempty. * * This method raises an exception if passed a value which is neither null * nor a string. * * @param Value to test. * @return bool True if the parameter is a nonempty string. */ function phutil_nonempty_string($value) { if ($value === null) { return false; } if ($value === '') { return false; } if (is_string($value)) { return true; } throw new InvalidArgumentException( pht( 'Call to phutil_nonempty_string() expected null or a string, got: %s.', phutil_describe_type($value))); } /** * Test if a value is a nonempty, stringlike value. * * The value "null", the empty string, and objects which have a "__toString()" * method which returns the empty string are empty. * * Other strings, and objects with a "__toString()" method that returns a * string other than the empty string are considered nonempty. * * This method raises an exception if passed any other value. * * @param Value to test. * @return bool True if the parameter is a nonempty, stringlike value. */ function phutil_nonempty_stringlike($value) { if ($value === null) { return false; } if ($value === '') { return false; } if (is_string($value)) { return true; } if (is_object($value)) { try { $string = phutil_string_cast($value); return phutil_nonempty_string($string); } catch (Exception $ex) { // Continue below. } catch (Throwable $ex) { // Continue below. } } throw new InvalidArgumentException( pht( 'Call to phutil_nonempty_stringlike() expected a string or stringlike '. 'object, got: %s.', phutil_describe_type($value))); } /** * Test if a value is a nonempty, scalar value. * * The value "null", the empty string, and objects which have a "__toString()" * method which returns the empty string are empty. * * Other strings, objects with a "__toString()" method which returns a * string other than the empty string, integers, and floats are considered * scalar. * * This method raises an exception if passed any other value. * * @param Value to test. * @return bool True if the parameter is a nonempty, scalar value. */ function phutil_nonempty_scalar($value) { if ($value === null) { return false; } if ($value === '') { return false; } if (is_string($value) || is_int($value) || is_float($value)) { return true; } if (is_object($value)) { try { $string = phutil_string_cast($value); return phutil_nonempty_string($string); } catch (Exception $ex) { // Continue below. } catch (Throwable $ex) { // Continue below. } } throw new InvalidArgumentException( pht( 'Call to phutil_nonempty_scalar() expected: a string; or stringlike '. 'object; or int; or float. Got: %s.', phutil_describe_type($value))); }