diff --git a/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php b/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php index ae3dff9995..98947f56c1 100644 --- a/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php +++ b/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php @@ -1,63 +1,68 @@ id(new PhabricatorBoolEditField()) ->setOptions( pht('Allow New Repositories'), pht('Prevent New Repositories')) ->setValue(false), ); } public function getBindingFieldSpecifications(AlmanacBinding $binding) { $protocols = array( array( 'value' => 'http', 'port' => 80, ), array( 'value' => 'https', 'port' => 443, ), array( 'value' => 'ssh', 'port' => 22, ), ); $default_value = 'http'; if ($binding->hasInterface()) { $interface = $binding->getInterface(); $port = $interface->getPort(); $default_ports = ipull($protocols, 'value', 'port'); $default_value = idx($default_ports, $port, $default_value); } return array( 'protocol' => id(new PhabricatorSelectEditField()) ->setOptions(ipull($protocols, 'value', 'value')) ->setValue($default_value), + 'writable' => id(new PhabricatorBoolEditField()) + ->setOptions( + pht('Prevent Writes'), + pht('Allow Writes')) + ->setValue(true), ); } } diff --git a/src/applications/diffusion/controller/DiffusionServeController.php b/src/applications/diffusion/controller/DiffusionServeController.php index 6b9c5c6802..5a5c446a29 100644 --- a/src/applications/diffusion/controller/DiffusionServeController.php +++ b/src/applications/diffusion/controller/DiffusionServeController.php @@ -1,1241 +1,1242 @@ getRequest()->setUser($viewer); $this->serviceViewer = $viewer; return $this; } public function getServiceViewer() { return $this->serviceViewer; } public function setServiceRepository(PhabricatorRepository $repository) { $this->serviceRepository = $repository; return $this; } public function getServiceRepository() { return $this->serviceRepository; } public function getIsGitLFSRequest() { return $this->isGitLFSRequest; } public function getGitLFSToken() { return $this->gitLFSToken; } public function isVCSRequest(AphrontRequest $request) { $identifier = $this->getRepositoryIdentifierFromRequest($request); if ($identifier === null) { return null; } $content_type = $request->getHTTPHeader('Content-Type'); $user_agent = idx($_SERVER, 'HTTP_USER_AGENT'); $request_type = $request->getHTTPHeader('X-Phabricator-Request-Type'); // This may have a "charset" suffix, so only match the prefix. $lfs_pattern = '(^application/vnd\\.git-lfs\\+json(;|\z))'; $vcs = null; if ($request->getExists('service')) { $service = $request->getStr('service'); // We get this initially for `info/refs`. // Git also gives us a User-Agent like "git/1.8.2.3". $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; } else if (strncmp($user_agent, 'git/', 4) === 0) { $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; } else if ($content_type == 'application/x-git-upload-pack-request') { // We get this for `git-upload-pack`. $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; } else if ($content_type == 'application/x-git-receive-pack-request') { // We get this for `git-receive-pack`. $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; } else if (preg_match($lfs_pattern, $content_type)) { // This is a Git LFS HTTP API request. $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; $this->isGitLFSRequest = true; } else if ($request_type == 'git-lfs') { // This is a Git LFS object content request. $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; $this->isGitLFSRequest = true; } else if ($request->getExists('cmd')) { // Mercurial also sends an Accept header like // "application/mercurial-0.1", and a User-Agent like // "mercurial/proto-1.0". $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL; } else { // Subversion also sends an initial OPTIONS request (vs GET/POST), and // has a User-Agent like "SVN/1.8.3 (x86_64-apple-darwin11.4.2) // serf/1.3.2". $dav = $request->getHTTPHeader('DAV'); $dav = new PhutilURI($dav); if ($dav->getDomain() === 'subversion.tigris.org') { $vcs = PhabricatorRepositoryType::REPOSITORY_TYPE_SVN; } } return $vcs; } public function handleRequest(AphrontRequest $request) { $service_exception = null; $response = null; try { $response = $this->serveRequest($request); } catch (Exception $ex) { $service_exception = $ex; } try { $remote_addr = $request->getRemoteAddress(); if ($request->isHTTPS()) { $remote_protocol = PhabricatorRepositoryPullEvent::PROTOCOL_HTTPS; } else { $remote_protocol = PhabricatorRepositoryPullEvent::PROTOCOL_HTTP; } $pull_event = id(new PhabricatorRepositoryPullEvent()) ->setEpoch(PhabricatorTime::getNow()) ->setRemoteAddress($remote_addr) ->setRemoteProtocol($remote_protocol); if ($response) { $response_code = $response->getHTTPResponseCode(); if ($response_code == 200) { $pull_event ->setResultType(PhabricatorRepositoryPullEvent::RESULT_PULL) ->setResultCode($response_code); } else { $pull_event ->setResultType(PhabricatorRepositoryPullEvent::RESULT_ERROR) ->setResultCode($response_code); } if ($response instanceof PhabricatorVCSResponse) { $pull_event->setProperties( array( 'response.message' => $response->getMessage(), )); } } else { $pull_event ->setResultType(PhabricatorRepositoryPullEvent::RESULT_EXCEPTION) ->setResultCode(500) ->setProperties( array( 'exception.class' => get_class($ex), 'exception.message' => $ex->getMessage(), )); } $viewer = $this->getServiceViewer(); if ($viewer) { $pull_event->setPullerPHID($viewer->getPHID()); } $repository = $this->getServiceRepository(); if ($repository) { $pull_event->setRepositoryPHID($repository->getPHID()); } $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $pull_event->save(); unset($unguarded); } catch (Exception $ex) { if ($service_exception) { throw $service_exception; } throw $ex; } if ($service_exception) { throw $service_exception; } return $response; } private function serveRequest(AphrontRequest $request) { $identifier = $this->getRepositoryIdentifierFromRequest($request); // If authentication credentials have been provided, try to find a user // that actually matches those credentials. // We require both the username and password to be nonempty, because Git // won't prompt users who provide a username but no password otherwise. // See T10797 for discussion. $have_user = strlen(idx($_SERVER, 'PHP_AUTH_USER')); $have_pass = strlen(idx($_SERVER, 'PHP_AUTH_PW')); if ($have_user && $have_pass) { $username = $_SERVER['PHP_AUTH_USER']; $password = new PhutilOpaqueEnvelope($_SERVER['PHP_AUTH_PW']); // Try Git LFS auth first since we can usually reject it without doing // any queries, since the username won't match the one we expect or the // request won't be LFS. $viewer = $this->authenticateGitLFSUser($username, $password); // If that failed, try normal auth. Note that we can use normal auth on // LFS requests, so this isn't strictly an alternative to LFS auth. if (!$viewer) { $viewer = $this->authenticateHTTPRepositoryUser($username, $password); } if (!$viewer) { return new PhabricatorVCSResponse( 403, pht('Invalid credentials.')); } } else { // User hasn't provided credentials, which means we count them as // being "not logged in". $viewer = new PhabricatorUser(); } $this->setServiceViewer($viewer); $allow_public = PhabricatorEnv::getEnvConfig('policy.allow-public'); $allow_auth = PhabricatorEnv::getEnvConfig('diffusion.allow-http-auth'); if (!$allow_public) { if (!$viewer->isLoggedIn()) { if ($allow_auth) { return new PhabricatorVCSResponse( 401, pht('You must log in to access repositories.')); } else { return new PhabricatorVCSResponse( 403, pht('Public and authenticated HTTP access are both forbidden.')); } } } try { $repository = id(new PhabricatorRepositoryQuery()) ->setViewer($viewer) ->withIdentifiers(array($identifier)) ->needURIs(true) ->executeOne(); if (!$repository) { return new PhabricatorVCSResponse( 404, pht('No such repository exists.')); } } catch (PhabricatorPolicyException $ex) { if ($viewer->isLoggedIn()) { return new PhabricatorVCSResponse( 403, pht('You do not have permission to access this repository.')); } else { if ($allow_auth) { return new PhabricatorVCSResponse( 401, pht('You must log in to access this repository.')); } else { return new PhabricatorVCSResponse( 403, pht( 'This repository requires authentication, which is forbidden '. 'over HTTP.')); } } } $response = $this->validateGitLFSRequest($repository, $viewer); if ($response) { return $response; } $this->setServiceRepository($repository); if (!$repository->isTracked()) { return new PhabricatorVCSResponse( 403, pht('This repository is inactive.')); } $is_push = !$this->isReadOnlyRequest($repository); if ($this->getIsGitLFSRequest() && $this->getGitLFSToken()) { // We allow git LFS requests over HTTP even if the repository does not // otherwise support HTTP reads or writes, as long as the user is using a // token from SSH. If they're using HTTP username + password auth, they // have to obey the normal HTTP rules. } else { // For now, we don't distinguish between HTTP and HTTPS-originated // requests that are proxied within the cluster, so the user can connect // with HTTPS but we may be on HTTP by the time we reach this part of // the code. Allow things to move forward as long as either protocol // can be served. $proto_https = PhabricatorRepositoryURI::BUILTIN_PROTOCOL_HTTPS; $proto_http = PhabricatorRepositoryURI::BUILTIN_PROTOCOL_HTTP; $can_read = $repository->canServeProtocol($proto_https, false) || $repository->canServeProtocol($proto_http, false); if (!$can_read) { return new PhabricatorVCSResponse( 403, pht('This repository is not available over HTTP.')); } if ($is_push) { $can_write = $repository->canServeProtocol($proto_https, true) || $repository->canServeProtocol($proto_http, true); if (!$can_write) { return new PhabricatorVCSResponse( 403, pht('This repository is read-only over HTTP.')); } } } if ($is_push) { $can_push = PhabricatorPolicyFilter::hasCapability( $viewer, $repository, DiffusionPushCapability::CAPABILITY); if (!$can_push) { if ($viewer->isLoggedIn()) { $error_code = 403; $error_message = pht( 'You do not have permission to push to this repository ("%s").', $repository->getDisplayName()); if ($this->getIsGitLFSRequest()) { return DiffusionGitLFSResponse::newErrorResponse( $error_code, $error_message); } else { return new PhabricatorVCSResponse( $error_code, $error_message); } } else { if ($allow_auth) { return new PhabricatorVCSResponse( 401, pht('You must log in to push to this repository.')); } else { return new PhabricatorVCSResponse( 403, pht( 'Pushing to this repository requires authentication, '. 'which is forbidden over HTTP.')); } } } } $vcs_type = $repository->getVersionControlSystem(); $req_type = $this->isVCSRequest($request); if ($vcs_type != $req_type) { switch ($req_type) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $result = new PhabricatorVCSResponse( 500, pht( 'This repository ("%s") is not a Git repository.', $repository->getDisplayName())); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $result = new PhabricatorVCSResponse( 500, pht( 'This repository ("%s") is not a Mercurial repository.', $repository->getDisplayName())); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $result = new PhabricatorVCSResponse( 500, pht( 'This repository ("%s") is not a Subversion repository.', $repository->getDisplayName())); break; default: $result = new PhabricatorVCSResponse( 500, pht('Unknown request type.')); break; } } else { switch ($vcs_type) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $result = $this->serveVCSRequest($repository, $viewer); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $result = new PhabricatorVCSResponse( 500, pht( 'Phabricator does not support HTTP access to Subversion '. 'repositories.')); break; default: $result = new PhabricatorVCSResponse( 500, pht('Unknown version control system.')); break; } } $code = $result->getHTTPResponseCode(); if ($is_push && ($code == 200)) { $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $repository->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, PhabricatorRepositoryStatusMessage::CODE_OKAY); unset($unguarded); } return $result; } private function serveVCSRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { // We can serve Git LFS requests first, since we don't need to proxy them. // It's also important that LFS requests never fall through to standard // service pathways, because that would let you use LFS tokens to read // normal repository data. if ($this->getIsGitLFSRequest()) { return $this->serveGitLFSRequest($repository, $viewer); } // If this repository is hosted on a service, we need to proxy the request // to a host which can serve it. $is_cluster_request = $this->getRequest()->isProxiedClusterRequest(); $uri = $repository->getAlmanacServiceURI( $viewer, array( 'neverProxy' => $is_cluster_request, 'protocols' => array( 'http', 'https', ), + 'writable' => !$this->isReadOnlyRequest($repository), )); if ($uri) { $future = $this->getRequest()->newClusterProxyFuture($uri); return id(new AphrontHTTPProxyResponse()) ->setHTTPFuture($future); } // Otherwise, we're going to handle the request locally. $vcs_type = $repository->getVersionControlSystem(); switch ($vcs_type) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $result = $this->serveGitRequest($repository, $viewer); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $result = $this->serveMercurialRequest($repository, $viewer); break; } return $result; } private function isReadOnlyRequest( PhabricatorRepository $repository) { $request = $this->getRequest(); $method = $_SERVER['REQUEST_METHOD']; // TODO: This implementation is safe by default, but very incomplete. if ($this->getIsGitLFSRequest()) { return $this->isGitLFSReadOnlyRequest($repository); } switch ($repository->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $service = $request->getStr('service'); $path = $this->getRequestDirectoryPath($repository); // NOTE: Service names are the reverse of what you might expect, as they // are from the point of view of the server. The main read service is // "git-upload-pack", and the main write service is "git-receive-pack". if ($method == 'GET' && $path == '/info/refs' && $service == 'git-upload-pack') { return true; } if ($path == '/git-upload-pack') { return true; } break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $cmd = $request->getStr('cmd'); if ($cmd == 'batch') { $cmds = idx($this->getMercurialArguments(), 'cmds'); return DiffusionMercurialWireProtocol::isReadOnlyBatchCommand($cmds); } return DiffusionMercurialWireProtocol::isReadOnlyCommand($cmd); case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: break; } return false; } /** * @phutil-external-symbol class PhabricatorStartup */ private function serveGitRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { $request = $this->getRequest(); $request_path = $this->getRequestDirectoryPath($repository); $repository_root = $repository->getLocalPath(); // Rebuild the query string to strip `__magic__` parameters and prevent // issues where we might interpret inputs like "service=read&service=write" // differently than the server does and pass it an unsafe command. // NOTE: This does not use getPassthroughRequestParameters() because // that code is HTTP-method agnostic and will encode POST data. $query_data = $_GET; foreach ($query_data as $key => $value) { if (!strncmp($key, '__', 2)) { unset($query_data[$key]); } } $query_string = http_build_query($query_data, '', '&'); // We're about to wipe out PATH with the rest of the environment, so // resolve the binary first. $bin = Filesystem::resolveBinary('git-http-backend'); if (!$bin) { throw new Exception( pht( 'Unable to find `%s` in %s!', 'git-http-backend', '$PATH')); } // NOTE: We do not set HTTP_CONTENT_ENCODING here, because we already // decompressed the request when we read the request body, so the body is // just plain data with no encoding. $env = array( 'REQUEST_METHOD' => $_SERVER['REQUEST_METHOD'], 'QUERY_STRING' => $query_string, 'CONTENT_TYPE' => $request->getHTTPHeader('Content-Type'), 'REMOTE_ADDR' => $_SERVER['REMOTE_ADDR'], 'GIT_PROJECT_ROOT' => $repository_root, 'GIT_HTTP_EXPORT_ALL' => '1', 'PATH_INFO' => $request_path, 'REMOTE_USER' => $viewer->getUsername(), // TODO: Set these correctly. // GIT_COMMITTER_NAME // GIT_COMMITTER_EMAIL ) + $this->getCommonEnvironment($viewer); $input = PhabricatorStartup::getRawInput(); $command = csprintf('%s', $bin); $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $cluster_engine = id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository); $did_write_lock = false; if ($this->isReadOnlyRequest($repository)) { $cluster_engine->synchronizeWorkingCopyBeforeRead(); } else { $did_write_lock = true; $cluster_engine->synchronizeWorkingCopyBeforeWrite(); } $caught = null; try { list($err, $stdout, $stderr) = id(new ExecFuture('%C', $command)) ->setEnv($env, true) ->write($input) ->resolve(); } catch (Exception $ex) { $caught = $ex; } if ($did_write_lock) { $cluster_engine->synchronizeWorkingCopyAfterWrite(); } unset($unguarded); if ($caught) { throw $caught; } if ($err) { if ($this->isValidGitShallowCloneResponse($stdout, $stderr)) { // Ignore the error if the response passes this special check for // validity. $err = 0; } } if ($err) { return new PhabricatorVCSResponse( 500, pht( 'Error %d: %s', $err, phutil_utf8ize($stderr))); } return id(new DiffusionGitResponse())->setGitData($stdout); } private function getRequestDirectoryPath(PhabricatorRepository $repository) { $request = $this->getRequest(); $request_path = $request->getRequestURI()->getPath(); $info = PhabricatorRepository::parseRepositoryServicePath( $request_path, $repository->getVersionControlSystem()); $base_path = $info['path']; // For Git repositories, strip an optional directory component if it // isn't the name of a known Git resource. This allows users to clone // repositories as "/diffusion/X/anything.git", for example. if ($repository->isGit()) { $known = array( 'info', 'git-upload-pack', 'git-receive-pack', ); foreach ($known as $key => $path) { $known[$key] = preg_quote($path, '@'); } $known = implode('|', $known); if (preg_match('@^/([^/]+)/('.$known.')(/|$)@', $base_path)) { $base_path = preg_replace('@^/([^/]+)@', '', $base_path); } } return $base_path; } private function authenticateGitLFSUser( $username, PhutilOpaqueEnvelope $password) { // Never accept these credentials for requests which aren't LFS requests. if (!$this->getIsGitLFSRequest()) { return null; } // If we have the wrong username, don't bother checking if the token // is right. if ($username !== DiffusionGitLFSTemporaryTokenType::HTTP_USERNAME) { return null; } $lfs_pass = $password->openEnvelope(); $lfs_hash = PhabricatorHash::weakDigest($lfs_pass); $token = id(new PhabricatorAuthTemporaryTokenQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withTokenTypes(array(DiffusionGitLFSTemporaryTokenType::TOKENTYPE)) ->withTokenCodes(array($lfs_hash)) ->withExpired(false) ->executeOne(); if (!$token) { return null; } $user = id(new PhabricatorPeopleQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withPHIDs(array($token->getUserPHID())) ->executeOne(); if (!$user) { return null; } if (!$user->isUserActivated()) { return null; } $this->gitLFSToken = $token; return $user; } private function authenticateHTTPRepositoryUser( $username, PhutilOpaqueEnvelope $password) { if (!PhabricatorEnv::getEnvConfig('diffusion.allow-http-auth')) { // No HTTP auth permitted. return null; } if (!strlen($username)) { // No username. return null; } if (!strlen($password->openEnvelope())) { // No password. return null; } $user = id(new PhabricatorPeopleQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withUsernames(array($username)) ->executeOne(); if (!$user) { // Username doesn't match anything. return null; } if (!$user->isUserActivated()) { // User is not activated. return null; } $request = $this->getRequest(); $content_source = PhabricatorContentSource::newFromRequest($request); $engine = id(new PhabricatorAuthPasswordEngine()) ->setViewer($user) ->setContentSource($content_source) ->setPasswordType(PhabricatorAuthPassword::PASSWORD_TYPE_VCS) ->setObject($user); if (!$engine->isValidPassword($password)) { return null; } return $user; } private function serveMercurialRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { $request = $this->getRequest(); $bin = Filesystem::resolveBinary('hg'); if (!$bin) { throw new Exception( pht( 'Unable to find `%s` in %s!', 'hg', '$PATH')); } $env = $this->getCommonEnvironment($viewer); $input = PhabricatorStartup::getRawInput(); $cmd = $request->getStr('cmd'); $args = $this->getMercurialArguments(); $args = $this->formatMercurialArguments($cmd, $args); if (strlen($input)) { $input = strlen($input)."\n".$input."0\n"; } $command = csprintf( '%s -R %s serve --stdio', $bin, $repository->getLocalPath()); $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); list($err, $stdout, $stderr) = id(new ExecFuture('%C', $command)) ->setEnv($env, true) ->setCWD($repository->getLocalPath()) ->write("{$cmd}\n{$args}{$input}") ->resolve(); if ($err) { return new PhabricatorVCSResponse( 500, pht('Error %d: %s', $err, $stderr)); } if ($cmd == 'getbundle' || $cmd == 'changegroup' || $cmd == 'changegroupsubset') { // We're not completely sure that "changegroup" and "changegroupsubset" // actually work, they're for very old Mercurial. $body = gzcompress($stdout); } else if ($cmd == 'unbundle') { // This includes diagnostic information and anything echoed by commit // hooks. We ignore `stdout` since it just has protocol garbage, and // substitute `stderr`. $body = strlen($stderr)."\n".$stderr; } else { list($length, $body) = explode("\n", $stdout, 2); if ($cmd == 'capabilities') { $body = DiffusionMercurialWireProtocol::filterBundle2Capability($body); } } return id(new DiffusionMercurialResponse())->setContent($body); } private function getMercurialArguments() { // Mercurial sends arguments in HTTP headers. "Why?", you might wonder, // "Why would you do this?". $args_raw = array(); for ($ii = 1;; $ii++) { $header = 'HTTP_X_HGARG_'.$ii; if (!array_key_exists($header, $_SERVER)) { break; } $args_raw[] = $_SERVER[$header]; } $args_raw = implode('', $args_raw); return id(new PhutilQueryStringParser()) ->parseQueryString($args_raw); } private function formatMercurialArguments($command, array $arguments) { $spec = DiffusionMercurialWireProtocol::getCommandArgs($command); $out = array(); // Mercurial takes normal arguments like this: // // name // value $has_star = false; foreach ($spec as $arg_key) { if ($arg_key == '*') { $has_star = true; continue; } if (isset($arguments[$arg_key])) { $value = $arguments[$arg_key]; $size = strlen($value); $out[] = "{$arg_key} {$size}\n{$value}"; unset($arguments[$arg_key]); } } if ($has_star) { // Mercurial takes arguments for variable argument lists roughly like // this: // // * // argname1 // argvalue1 // argname2 // argvalue2 $count = count($arguments); $out[] = "* {$count}\n"; foreach ($arguments as $key => $value) { if (in_array($key, $spec)) { // We already added this argument above, so skip it. continue; } $size = strlen($value); $out[] = "{$key} {$size}\n{$value}"; } } return implode('', $out); } private function isValidGitShallowCloneResponse($stdout, $stderr) { // If you execute `git clone --depth N ...`, git sends a request which // `git-http-backend` responds to by emitting valid output and then exiting // with a failure code and an error message. If we ignore this error, // everything works. // This is a pretty funky fix: it would be nice to more precisely detect // that a request is a `--depth N` clone request, but we don't have any code // to decode protocol frames yet. Instead, look for reasonable evidence // in the error and output that we're looking at a `--depth` clone. // For evidence this isn't completely crazy, see: // https://github.com/schacon/grack/pull/7 $stdout_regexp = '(^Content-Type: application/x-git-upload-pack-result)m'; $stderr_regexp = '(The remote end hung up unexpectedly)'; $has_pack = preg_match($stdout_regexp, $stdout); $is_hangup = preg_match($stderr_regexp, $stderr); return $has_pack && $is_hangup; } private function getCommonEnvironment(PhabricatorUser $viewer) { $remote_address = $this->getRequest()->getRemoteAddress(); return array( DiffusionCommitHookEngine::ENV_USER => $viewer->getUsername(), DiffusionCommitHookEngine::ENV_REMOTE_ADDRESS => $remote_address, DiffusionCommitHookEngine::ENV_REMOTE_PROTOCOL => 'http', ); } private function validateGitLFSRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { if (!$this->getIsGitLFSRequest()) { return null; } if (!$repository->canUseGitLFS()) { return new PhabricatorVCSResponse( 403, pht( 'The requested repository ("%s") does not support Git LFS.', $repository->getDisplayName())); } // If this is using an LFS token, sanity check that we're using it on the // correct repository. This shouldn't really matter since the user could // just request a proper token anyway, but it suspicious and should not // be permitted. $token = $this->getGitLFSToken(); if ($token) { $resource = $token->getTokenResource(); if ($resource !== $repository->getPHID()) { return new PhabricatorVCSResponse( 403, pht( 'The authentication token provided in the request is bound to '. 'a different repository than the requested repository ("%s").', $repository->getDisplayName())); } } return null; } private function serveGitLFSRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { if (!$this->getIsGitLFSRequest()) { throw new Exception(pht('This is not a Git LFS request!')); } $path = $this->getGitLFSRequestPath($repository); $matches = null; if (preg_match('(^upload/(.*)\z)', $path, $matches)) { $oid = $matches[1]; return $this->serveGitLFSUploadRequest($repository, $viewer, $oid); } else if ($path == 'objects/batch') { return $this->serveGitLFSBatchRequest($repository, $viewer); } else { return DiffusionGitLFSResponse::newErrorResponse( 404, pht( 'Git LFS operation "%s" is not supported by this server.', $path)); } } private function serveGitLFSBatchRequest( PhabricatorRepository $repository, PhabricatorUser $viewer) { $input = $this->getGitLFSInput(); $operation = idx($input, 'operation'); switch ($operation) { case 'upload': $want_upload = true; break; case 'download': $want_upload = false; break; default: return DiffusionGitLFSResponse::newErrorResponse( 404, pht( 'Git LFS batch operation "%s" is not supported by this server.', $operation)); } $objects = idx($input, 'objects', array()); $hashes = array(); foreach ($objects as $object) { $hashes[] = idx($object, 'oid'); } if ($hashes) { $refs = id(new PhabricatorRepositoryGitLFSRefQuery()) ->setViewer($viewer) ->withRepositoryPHIDs(array($repository->getPHID())) ->withObjectHashes($hashes) ->execute(); $refs = mpull($refs, null, 'getObjectHash'); } else { $refs = array(); } $file_phids = mpull($refs, 'getFilePHID'); if ($file_phids) { $files = id(new PhabricatorFileQuery()) ->setViewer($viewer) ->withPHIDs($file_phids) ->execute(); $files = mpull($files, null, 'getPHID'); } else { $files = array(); } $authorization = null; $output = array(); foreach ($objects as $object) { $oid = idx($object, 'oid'); $size = idx($object, 'size'); $ref = idx($refs, $oid); $error = null; // NOTE: If we already have a ref for this object, we only emit a // "download" action. The client should not upload the file again. $actions = array(); if ($ref) { $file = idx($files, $ref->getFilePHID()); if ($file) { // Git LFS may prompt users for authentication if the action does // not provide an "Authorization" header and does not have a query // parameter named "token". See here for discussion: // $no_authorization = 'Basic '.base64_encode('none'); $get_uri = $file->getCDNURI('data'); $actions['download'] = array( 'href' => $get_uri, 'header' => array( 'Authorization' => $no_authorization, 'X-Phabricator-Request-Type' => 'git-lfs', ), ); } else { $error = array( 'code' => 404, 'message' => pht( 'Object "%s" was previously uploaded, but no longer exists '. 'on this server.', $oid), ); } } else if ($want_upload) { if (!$authorization) { // Here, we could reuse the existing authorization if we have one, // but it's a little simpler to just generate a new one // unconditionally. $authorization = $this->newGitLFSHTTPAuthorization( $repository, $viewer, $operation); } $put_uri = $repository->getGitLFSURI("info/lfs/upload/{$oid}"); $actions['upload'] = array( 'href' => $put_uri, 'header' => array( 'Authorization' => $authorization, 'X-Phabricator-Request-Type' => 'git-lfs', ), ); } $object = array( 'oid' => $oid, 'size' => $size, ); if ($actions) { $object['actions'] = $actions; } if ($error) { $object['error'] = $error; } $output[] = $object; } $output = array( 'objects' => $output, ); return id(new DiffusionGitLFSResponse()) ->setContent($output); } private function serveGitLFSUploadRequest( PhabricatorRepository $repository, PhabricatorUser $viewer, $oid) { $ref = id(new PhabricatorRepositoryGitLFSRefQuery()) ->setViewer($viewer) ->withRepositoryPHIDs(array($repository->getPHID())) ->withObjectHashes(array($oid)) ->executeOne(); if ($ref) { return DiffusionGitLFSResponse::newErrorResponse( 405, pht( 'Content for object "%s" is already known to this server. It can '. 'not be uploaded again.', $oid)); } // Remove the execution time limit because uploading large files may take // a while. set_time_limit(0); $request_stream = new AphrontRequestStream(); $request_iterator = $request_stream->getIterator(); $hashing_iterator = id(new PhutilHashingIterator($request_iterator)) ->setAlgorithm('sha256'); $source = id(new PhabricatorIteratorFileUploadSource()) ->setName('lfs-'.$oid) ->setViewPolicy(PhabricatorPolicies::POLICY_NOONE) ->setIterator($hashing_iterator); $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $file = $source->uploadFile(); unset($unguarded); $hash = $hashing_iterator->getHash(); if ($hash !== $oid) { return DiffusionGitLFSResponse::newErrorResponse( 400, pht( 'Uploaded data is corrupt or invalid. Expected hash "%s", actual '. 'hash "%s".', $oid, $hash)); } $ref = id(new PhabricatorRepositoryGitLFSRef()) ->setRepositoryPHID($repository->getPHID()) ->setObjectHash($hash) ->setByteSize($file->getByteSize()) ->setAuthorPHID($viewer->getPHID()) ->setFilePHID($file->getPHID()); $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); // Attach the file to the repository to give users permission // to access it. $file->attachToObject($repository->getPHID()); $ref->save(); unset($unguarded); // This is just a plain HTTP 200 with no content, which is what `git lfs` // expects. return new DiffusionGitLFSResponse(); } private function newGitLFSHTTPAuthorization( PhabricatorRepository $repository, PhabricatorUser $viewer, $operation) { $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $authorization = DiffusionGitLFSTemporaryTokenType::newHTTPAuthorization( $repository, $viewer, $operation); unset($unguarded); return $authorization; } private function getGitLFSRequestPath(PhabricatorRepository $repository) { $request_path = $this->getRequestDirectoryPath($repository); $matches = null; if (preg_match('(^/info/lfs(?:\z|/)(.*))', $request_path, $matches)) { return $matches[1]; } return null; } private function getGitLFSInput() { if (!$this->gitLFSInput) { $input = PhabricatorStartup::getRawInput(); $input = phutil_json_decode($input); $this->gitLFSInput = $input; } return $this->gitLFSInput; } private function isGitLFSReadOnlyRequest(PhabricatorRepository $repository) { if (!$this->getIsGitLFSRequest()) { return false; } $path = $this->getGitLFSRequestPath($repository); if ($path === 'objects/batch') { $input = $this->getGitLFSInput(); $operation = idx($input, 'operation'); switch ($operation) { case 'download': return true; default: return false; } } return false; } } diff --git a/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php index 400340abeb..d48abc159e 100644 --- a/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php @@ -1,135 +1,135 @@ setName('git-receive-pack'); $this->setArguments( array( array( 'name' => 'dir', 'wildcard' => true, ), )); } protected function executeRepositoryOperations() { $host_wait_start = microtime(true); $repository = $this->getRepository(); $viewer = $this->getSSHUser(); $device = AlmanacKeys::getLiveDevice(); // This is a write, and must have write access. $this->requireWriteAccess(); $cluster_engine = id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository) ->setLog($this); if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + $command = $this->getProxyCommand(true); $did_synchronize = false; if ($device) { $this->writeClusterEngineLogMessage( pht( "# Push received by \"%s\", forwarding to cluster host.\n", $device->getName())); } } else { $command = csprintf('git-receive-pack %s', $repository->getLocalPath()); $did_synchronize = true; $cluster_engine->synchronizeWorkingCopyBeforeWrite(); if ($device) { $this->writeClusterEngineLogMessage( pht( "# Ready to receive on cluster host \"%s\".\n", $device->getName())); } } $caught = null; try { $err = $this->executeRepositoryCommand($command); } catch (Exception $ex) { $caught = $ex; } // We've committed the write (or rejected it), so we can release the lock // without waiting for the client to receive the acknowledgement. if ($did_synchronize) { $cluster_engine->synchronizeWorkingCopyAfterWrite(); } if ($caught) { throw $caught; } if (!$err) { $repository->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, PhabricatorRepositoryStatusMessage::CODE_OKAY); $this->waitForGitClient(); $host_wait_end = microtime(true); $this->updatePushLogWithTimingInformation( $this->getClusterEngineLogProperty('writeWait'), $this->getClusterEngineLogProperty('readWait'), ($host_wait_end - $host_wait_start)); } return $err; } private function executeRepositoryCommand($command) { $repository = $this->getRepository(); $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $future = id(new ExecFuture('%C', $command)) ->setEnv($this->getEnvironment()); return $this->newPassthruCommand() ->setIOChannel($this->getIOChannel()) ->setCommandChannelFromExecFuture($future) ->execute(); } private function updatePushLogWithTimingInformation( $write_wait, $read_wait, $host_wait) { if ($write_wait !== null) { $write_wait = (int)(1000000 * $write_wait); } if ($read_wait !== null) { $read_wait = (int)(1000000 * $read_wait); } if ($host_wait !== null) { $host_wait = (int)(1000000 * $host_wait); } $identifier = $this->getRequestIdentifier(); $event = new PhabricatorRepositoryPushEvent(); $conn = $event->establishConnection('w'); queryfx( $conn, 'UPDATE %T SET writeWait = %nd, readWait = %nd, hostWait = %nd WHERE requestIdentifier = %s', $event->getTableName(), $write_wait, $read_wait, $host_wait, $identifier); } } diff --git a/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php index 5ed42ba79d..e96030f577 100644 --- a/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php @@ -1,89 +1,89 @@ setName('git-upload-pack'); $this->setArguments( array( array( 'name' => 'dir', 'wildcard' => true, ), )); } protected function executeRepositoryOperations() { $repository = $this->getRepository(); $viewer = $this->getSSHUser(); $device = AlmanacKeys::getLiveDevice(); $skip_sync = $this->shouldSkipReadSynchronization(); $is_proxy = $this->shouldProxy(); if ($is_proxy) { - $command = $this->getProxyCommand(); + $command = $this->getProxyCommand(false); if ($device) { $this->writeClusterEngineLogMessage( pht( "# Fetch received by \"%s\", forwarding to cluster host.\n", $device->getName())); } } else { $command = csprintf('git-upload-pack -- %s', $repository->getLocalPath()); if (!$skip_sync) { $cluster_engine = id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository) ->setLog($this) ->synchronizeWorkingCopyBeforeRead(); if ($device) { $this->writeClusterEngineLogMessage( pht( "# Cleared to fetch on cluster host \"%s\".\n", $device->getName())); } } } $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $pull_event = $this->newPullEvent(); $future = id(new ExecFuture('%C', $command)) ->setEnv($this->getEnvironment()); $err = $this->newPassthruCommand() ->setIOChannel($this->getIOChannel()) ->setCommandChannelFromExecFuture($future) ->execute(); if ($err) { $pull_event ->setResultType(PhabricatorRepositoryPullEvent::RESULT_ERROR) ->setResultCode($err); } else { $pull_event ->setResultType(PhabricatorRepositoryPullEvent::RESULT_PULL) ->setResultCode(0); } // TODO: Currently, when proxying, we do not write a log on the proxy. // Perhaps we should write a "proxy log". This is not very useful for // statistics or auditing, but could be useful for diagnostics. Marking // the proxy logs as proxied (and recording devicePHID on all logs) would // make differentiating between these use cases easier. if (!$is_proxy) { $pull_event->save(); } if (!$err) { $this->waitForGitClient(); } return $err; } } diff --git a/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php index 5701a4bac1..221d1696b2 100644 --- a/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php @@ -1,114 +1,117 @@ setName('hg'); $this->setArguments( array( array( 'name' => 'repository', 'short' => 'R', 'param' => 'repo', ), array( 'name' => 'stdio', ), array( 'name' => 'command', 'wildcard' => true, ), )); } protected function identifyRepository() { $args = $this->getArgs(); $path = $args->getArg('repository'); return $this->loadRepositoryWithPath( $path, PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL); } protected function executeRepositoryOperations() { $repository = $this->getRepository(); $args = $this->getArgs(); if (!$args->getArg('stdio')) { throw new Exception(pht('Expected `%s`!', 'hg ... --stdio')); } if ($args->getArg('command') !== array('serve')) { throw new Exception(pht('Expected `%s`!', 'hg ... serve')); } if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + // NOTE: For now, we're always requesting a writable node. The request + // may not actually need one, but we can't currently determine whether + // it is read-only or not at this phase of evaluation. + $command = $this->getProxyCommand(true); } else { $command = csprintf( 'hg -R %s serve --stdio', $repository->getLocalPath()); } $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $future = id(new ExecFuture('%C', $command)) ->setEnv($this->getEnvironment()); $io_channel = $this->getIOChannel(); $protocol_channel = new DiffusionMercurialWireClientSSHProtocolChannel( $io_channel); $err = id($this->newPassthruCommand()) ->setIOChannel($protocol_channel) ->setCommandChannelFromExecFuture($future) ->setWillWriteCallback(array($this, 'willWriteMessageCallback')) ->execute(); if (!$err && $this->didSeeWrite) { $repository->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, PhabricatorRepositoryStatusMessage::CODE_OKAY); } return $err; } public function willWriteMessageCallback( PhabricatorSSHPassthruCommand $command, $message) { $command = $message['command']; // Check if this is a readonly command. $is_readonly = false; if ($command == 'batch') { $cmds = idx($message['arguments'], 'cmds'); if (DiffusionMercurialWireProtocol::isReadOnlyBatchCommand($cmds)) { $is_readonly = true; } } else if (DiffusionMercurialWireProtocol::isReadOnlyCommand($command)) { $is_readonly = true; } if (!$is_readonly) { $this->requireWriteAccess(); $this->didSeeWrite = true; } return $message['raw']; } protected function raiseWrongVCSException( PhabricatorRepository $repository) { throw new Exception( pht( 'This repository ("%s") is not a Mercurial repository. Use "%s" to '. 'interact with this repository.', $repository->getDisplayName(), $repository->getVersionControlSystem())); } } diff --git a/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php index 2a3841892f..02cacbfe94 100644 --- a/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php @@ -1,287 +1,309 @@ repository) { throw new Exception(pht('Repository is not available yet!')); } return $this->repository; } private function setRepository(PhabricatorRepository $repository) { $this->repository = $repository; return $this; } public function getArgs() { return $this->args; } public function getEnvironment() { $env = array( DiffusionCommitHookEngine::ENV_USER => $this->getSSHUser()->getUsername(), DiffusionCommitHookEngine::ENV_REMOTE_PROTOCOL => 'ssh', ); $identifier = $this->getRequestIdentifier(); if ($identifier !== null) { $env[DiffusionCommitHookEngine::ENV_REQUEST] = $identifier; } $remote_address = $this->getSSHRemoteAddress(); if ($remote_address !== null) { $env[DiffusionCommitHookEngine::ENV_REMOTE_ADDRESS] = $remote_address; } return $env; } /** * Identify and load the affected repository. */ abstract protected function identifyRepository(); abstract protected function executeRepositoryOperations(); abstract protected function raiseWrongVCSException( PhabricatorRepository $repository); protected function getBaseRequestPath() { return $this->baseRequestPath; } protected function writeError($message) { $this->getErrorChannel()->write($message); return $this; } protected function getCurrentDeviceName() { $device = AlmanacKeys::getLiveDevice(); if ($device) { return $device->getName(); } return php_uname('n'); } - protected function getTargetDeviceName() { - // TODO: This should use the correct device identity. - $uri = new PhutilURI($this->proxyURI); - return $uri->getDomain(); - } - protected function shouldProxy() { - return (bool)$this->proxyURI; + return $this->shouldProxy; } - protected function getProxyCommand() { - $uri = new PhutilURI($this->proxyURI); + protected function getProxyCommand($for_write) { + $viewer = $this->getSSHUser(); + $repository = $this->getRepository(); + + $is_cluster_request = $this->getIsClusterRequest(); + + $uri = $repository->getAlmanacServiceURI( + $viewer, + array( + 'neverProxy' => $is_cluster_request, + 'protocols' => array( + 'ssh', + ), + 'writable' => $for_write, + )); + + if (!$uri) { + throw new Exception( + pht( + 'Failed to generate an intracluster proxy URI even though this '. + 'request was routed as a proxy request.')); + } + + $uri = new PhutilURI($uri); $username = AlmanacKeys::getClusterSSHUser(); if ($username === null) { throw new Exception( pht( 'Unable to determine the username to connect with when trying '. 'to proxy an SSH request within the Phabricator cluster.')); } $port = $uri->getPort(); $host = $uri->getDomain(); $key_path = AlmanacKeys::getKeyPath('device.key'); if (!Filesystem::pathExists($key_path)) { throw new Exception( pht( 'Unable to proxy this SSH request within the cluster: this device '. 'is not registered and has a missing device key (expected to '. 'find key at "%s").', $key_path)); } $options = array(); $options[] = '-o'; $options[] = 'StrictHostKeyChecking=no'; $options[] = '-o'; $options[] = 'UserKnownHostsFile=/dev/null'; // This is suppressing "added
to the list of known hosts" // messages, which are confusing and irrelevant when they arise from // proxied requests. It might also be suppressing lots of useful errors, // of course. Ideally, we would enforce host keys eventually. $options[] = '-o'; $options[] = 'LogLevel=quiet'; // NOTE: We prefix the command with "@username", which the far end of the // connection will parse in order to act as the specified user. This // behavior is only available to cluster requests signed by a trusted // device key. return csprintf( 'ssh %Ls -l %s -i %s -p %s %s -- %s %Ls', $options, $username, $key_path, $port, $host, '@'.$this->getSSHUser()->getUsername(), $this->getOriginalArguments()); } final public function execute(PhutilArgumentParser $args) { $this->args = $args; $viewer = $this->getSSHUser(); $have_diffusion = PhabricatorApplication::isClassInstalledForViewer( 'PhabricatorDiffusionApplication', $viewer); if (!$have_diffusion) { throw new Exception( pht( 'You do not have permission to access the Diffusion application, '. 'so you can not interact with repositories over SSH.')); } $repository = $this->identifyRepository(); $this->setRepository($repository); + // NOTE: Here, we're just figuring out if this is a proxyable request to + // a clusterized repository or not. We don't (and can't) use the URI we get + // back directly. + + // For example, we may get a read-only URI here but be handling a write + // request. We only care if we get back `null` (which means we should + // handle the request locally) or anything else (which means we should + // proxy it to an appropriate device). + $is_cluster_request = $this->getIsClusterRequest(); $uri = $repository->getAlmanacServiceURI( $viewer, array( 'neverProxy' => $is_cluster_request, 'protocols' => array( 'ssh', ), )); - - if ($uri) { - $this->proxyURI = $uri; - } + $this->shouldProxy = (bool)$uri; try { return $this->executeRepositoryOperations(); } catch (Exception $ex) { $this->writeError(get_class($ex).': '.$ex->getMessage()); return 1; } } protected function loadRepositoryWithPath($path, $vcs) { $viewer = $this->getSSHUser(); $info = PhabricatorRepository::parseRepositoryServicePath($path, $vcs); if ($info === null) { throw new Exception( pht( 'Unrecognized repository path "%s". Expected a path like "%s", '. '"%s", or "%s".', $path, '/diffusion/X/', '/diffusion/123/', '/source/thaumaturgy.git')); } $identifier = $info['identifier']; $base = $info['base']; $this->baseRequestPath = $base; $repository = id(new PhabricatorRepositoryQuery()) ->setViewer($viewer) ->withIdentifiers(array($identifier)) ->needURIs(true) ->executeOne(); if (!$repository) { throw new Exception( pht('No repository "%s" exists!', $identifier)); } $protocol = PhabricatorRepositoryURI::BUILTIN_PROTOCOL_SSH; if (!$repository->canServeProtocol($protocol, false)) { throw new Exception( pht( 'This repository ("%s") is not available over SSH.', $repository->getDisplayName())); } if ($repository->getVersionControlSystem() != $vcs) { $this->raiseWrongVCSException($repository); } return $repository; } protected function requireWriteAccess($protocol_command = null) { if ($this->hasWriteAccess === true) { return; } $repository = $this->getRepository(); $viewer = $this->getSSHUser(); if ($viewer->isOmnipotent()) { throw new Exception( pht( 'This request is authenticated as a cluster device, but is '. 'performing a write. Writes must be performed with a real '. 'user account.')); } $protocol = PhabricatorRepositoryURI::BUILTIN_PROTOCOL_SSH; if ($repository->canServeProtocol($protocol, true)) { $can_push = PhabricatorPolicyFilter::hasCapability( $viewer, $repository, DiffusionPushCapability::CAPABILITY); if (!$can_push) { throw new Exception( pht('You do not have permission to push to this repository.')); } } else { if ($protocol_command !== null) { throw new Exception( pht( 'This repository is read-only over SSH (tried to execute '. 'protocol command "%s").', $protocol_command)); } else { throw new Exception( pht('This repository is read-only over SSH.')); } } $this->hasWriteAccess = true; return $this->hasWriteAccess; } protected function shouldSkipReadSynchronization() { $viewer = $this->getSSHUser(); // Currently, the only case where devices interact over SSH without // assuming user credentials is when synchronizing before a read. These // synchronizing reads do not themselves need to be synchronized. if ($viewer->isOmnipotent()) { return true; } return false; } protected function newPullEvent() { $viewer = $this->getSSHUser(); $repository = $this->getRepository(); $remote_address = $this->getSSHRemoteAddress(); return id(new PhabricatorRepositoryPullEvent()) ->setEpoch(PhabricatorTime::getNow()) ->setRemoteAddress($remote_address) ->setRemoteProtocol(PhabricatorRepositoryPullEvent::PROTOCOL_SSH) ->setPullerPHID($viewer->getPHID()) ->setRepositoryPHID($repository->getPHID()); } } diff --git a/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php index 0df4aa17fe..76dae05971 100644 --- a/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php @@ -1,462 +1,465 @@ command; } protected function didConstruct() { $this->setName('svnserve'); $this->setArguments( array( array( 'name' => 'tunnel', 'short' => 't', ), )); } protected function identifyRepository() { // NOTE: In SVN, we need to read the first few protocol frames before we // can determine which repository the user is trying to access. We're // going to peek at the data on the wire to identify the repository. $io_channel = $this->getIOChannel(); // Before the client will send us the first protocol frame, we need to send // it a connection frame with server capabilities. To figure out the // correct frame we're going to start `svnserve`, read the frame from it, // send it to the client, then kill the subprocess. // TODO: This is pretty inelegant and the protocol frame will change very // rarely. We could cache it if we can find a reasonable way to dirty the // cache. $command = csprintf('svnserve -t'); $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $future = new ExecFuture('%C', $command); $exec_channel = new PhutilExecChannel($future); $exec_protocol = new DiffusionSubversionWireProtocol(); while (true) { PhutilChannel::waitForAny(array($exec_channel)); $exec_channel->update(); $exec_message = $exec_channel->read(); if ($exec_message !== null) { $messages = $exec_protocol->writeData($exec_message); if ($messages) { $message = head($messages); $raw = $message['raw']; // Write the greeting frame to the client. $io_channel->write($raw); // Kill the subprocess. $future->resolveKill(); break; } } if (!$exec_channel->isOpenForReading()) { throw new Exception( pht( '%s subprocess exited before emitting a protocol frame.', 'svnserve')); } } $io_protocol = new DiffusionSubversionWireProtocol(); while (true) { PhutilChannel::waitForAny(array($io_channel)); $io_channel->update(); $in_message = $io_channel->read(); if ($in_message !== null) { $this->peekBuffer .= $in_message; if (strlen($this->peekBuffer) > (1024 * 1024)) { throw new Exception( pht( 'Client transmitted more than 1MB of data without transmitting '. 'a recognizable protocol frame.')); } $messages = $io_protocol->writeData($in_message); if ($messages) { $message = head($messages); $struct = $message['structure']; // This is the: // // ( version ( cap1 ... ) url ... ) // // The `url` allows us to identify the repository. $uri = $struct[2]['value']; $path = $this->getPathFromSubversionURI($uri); return $this->loadRepositoryWithPath( $path, PhabricatorRepositoryType::REPOSITORY_TYPE_SVN); } } if (!$io_channel->isOpenForReading()) { throw new Exception( pht( 'Client closed connection before sending a complete protocol '. 'frame.')); } // If the client has disconnected, kill the subprocess and bail. if (!$io_channel->isOpenForWriting()) { throw new Exception( pht( 'Client closed connection before receiving response.')); } } } protected function executeRepositoryOperations() { $repository = $this->getRepository(); $args = $this->getArgs(); if (!$args->getArg('tunnel')) { throw new Exception(pht('Expected `%s`!', 'svnserve -t')); } if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + // NOTE: We're always requesting a writable device here. The request + // might be read-only, but we can't currently tell, and SVN requests + // can mix reads and writes. + $command = $this->getProxyCommand(true); $this->isProxying = true; $cwd = null; } else { $command = csprintf( 'svnserve -t --tunnel-user=%s', $this->getSSHUser()->getUsername()); $cwd = PhabricatorEnv::getEmptyCWD(); } $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); $future = new ExecFuture('%C', $command); // If we're receiving a commit, svnserve will fail to execute the commit // hook with an unhelpful error if the CWD isn't readable by the user we // are sudoing to. Switch to a readable, empty CWD before running // svnserve. See T10941. if ($cwd !== null) { $future->setCWD($cwd); } $this->inProtocol = new DiffusionSubversionWireProtocol(); $this->outProtocol = new DiffusionSubversionWireProtocol(); $this->command = id($this->newPassthruCommand()) ->setIOChannel($this->getIOChannel()) ->setCommandChannelFromExecFuture($future) ->setWillWriteCallback(array($this, 'willWriteMessageCallback')) ->setWillReadCallback(array($this, 'willReadMessageCallback')); $this->command->setPauseIOReads(true); $err = $this->command->execute(); if (!$err && $this->didSeeWrite) { $this->getRepository()->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, PhabricatorRepositoryStatusMessage::CODE_OKAY); } return $err; } public function willWriteMessageCallback( PhabricatorSSHPassthruCommand $command, $message) { $proto = $this->inProtocol; $messages = $proto->writeData($message); $result = array(); foreach ($messages as $message) { $message_raw = $message['raw']; $struct = $message['structure']; if (!$this->inSeenGreeting) { $this->inSeenGreeting = true; // The first message the client sends looks like: // // ( version ( cap1 ... ) url ... ) // // We want to grab the URL, load the repository, make sure it exists and // is accessible, and then replace it with the location of the // repository on disk. $uri = $struct[2]['value']; $struct[2]['value'] = $this->makeInternalURI($uri); $message_raw = $proto->serializeStruct($struct); } else if (isset($struct[0]) && $struct[0]['type'] == 'word') { if (!$proto->isReadOnlyCommand($struct)) { $this->didSeeWrite = true; $this->requireWriteAccess($struct[0]['value']); } // Several other commands also pass in URLs. We need to translate // all of these into the internal representation; this also makes sure // they're valid and accessible. switch ($struct[0]['value']) { case 'reparent': // ( reparent ( url ) ) $struct[1]['value'][0]['value'] = $this->makeInternalURI( $struct[1]['value'][0]['value']); $message_raw = $proto->serializeStruct($struct); break; case 'switch': // ( switch ( ( rev ) target recurse url ... ) ) $struct[1]['value'][3]['value'] = $this->makeInternalURI( $struct[1]['value'][3]['value']); $message_raw = $proto->serializeStruct($struct); break; case 'diff': // ( diff ( ( rev ) target recurse ignore-ancestry url ... ) ) $struct[1]['value'][4]['value'] = $this->makeInternalURI( $struct[1]['value'][4]['value']); $message_raw = $proto->serializeStruct($struct); break; case 'add-file': case 'add-dir': // ( add-file ( path dir-token file-token [ copy-path copy-rev ] ) ) // ( add-dir ( path parent child [ copy-path copy-rev ] ) ) if (isset($struct[1]['value'][3]['value'][0]['value'])) { $copy_from = $struct[1]['value'][3]['value'][0]['value']; $copy_from = $this->makeInternalURI($copy_from); $struct[1]['value'][3]['value'][0]['value'] = $copy_from; } $message_raw = $proto->serializeStruct($struct); break; } } $result[] = $message_raw; } if (!$result) { return null; } return implode('', $result); } public function willReadMessageCallback( PhabricatorSSHPassthruCommand $command, $message) { $proto = $this->outProtocol; $messages = $proto->writeData($message); $result = array(); foreach ($messages as $message) { $message_raw = $message['raw']; $struct = $message['structure']; if (isset($struct[0]) && ($struct[0]['type'] == 'word')) { if ($struct[0]['value'] == 'success') { switch ($this->outPhaseCount) { case 0: // This is the "greeting", which announces capabilities. // We already sent this when we were figuring out which // repository this request is for, so we aren't going to send // it again. // Instead, we're going to replay the client's response (which // we also already read). $command = $this->getCommand(); $command->writeIORead($this->peekBuffer); $command->setPauseIOReads(false); $message_raw = null; break; case 1: // This responds to the client greeting, and announces auth. break; case 2: // This responds to auth, which should be trivial over SSH. break; case 3: // This contains the URI of the repository. We need to edit it; // if it does not match what the client requested it will reject // the response. $struct[1]['value'][1]['value'] = $this->makeExternalURI( $struct[1]['value'][1]['value']); $message_raw = $proto->serializeStruct($struct); break; default: // We don't care about other protocol frames. break; } $this->outPhaseCount++; } else if ($struct[0]['value'] == 'failure') { // Find any error messages which include the internal URI, and // replace the text with the external URI. foreach ($struct[1]['value'] as $key => $error) { $code = $error['value'][0]['value']; $message = $error['value'][1]['value']; $message = str_replace( $this->internalBaseURI, $this->externalBaseURI, $message); // Derp derp derp derp derp. The structure looks like this: // ( failure ( ( code message ... ) ... ) ) $struct[1]['value'][$key]['value'][1]['value'] = $message; } $message_raw = $proto->serializeStruct($struct); } } if ($message_raw !== null) { $result[] = $message_raw; } } if (!$result) { return null; } return implode('', $result); } private function getPathFromSubversionURI($uri_string) { $uri = new PhutilURI($uri_string); $proto = $uri->getProtocol(); if ($proto !== 'svn+ssh') { throw new Exception( pht( 'Protocol for URI "%s" MUST be "%s".', $uri_string, 'svn+ssh')); } $path = $uri->getPath(); // Subversion presumably deals with this, but make sure there's nothing // sketchy going on with the URI. if (preg_match('(/\\.\\./)', $path)) { throw new Exception( pht( 'String "%s" is invalid in path specification "%s".', '/../', $uri_string)); } $path = $this->normalizeSVNPath($path); return $path; } private function makeInternalURI($uri_string) { if ($this->isProxying) { return $uri_string; } $uri = new PhutilURI($uri_string); $repository = $this->getRepository(); $path = $this->getPathFromSubversionURI($uri_string); $external_base = $this->getBaseRequestPath(); // Replace "/diffusion/X" in the request with the repository local path, // so "/diffusion/X/master/" becomes "/path/to/repository/X/master/". $local_path = rtrim($repository->getLocalPath(), '/'); $path = $local_path.substr($path, strlen($external_base)); // NOTE: We are intentionally NOT removing username information from the // URI. Subversion retains it over the course of the request and considers // two repositories with different username identifiers to be distinct and // incompatible. $uri->setPath($path); // If this is happening during the handshake, these are the base URIs for // the request. if ($this->externalBaseURI === null) { $pre = (string)id(clone $uri)->setPath(''); $external_path = $external_base; $external_path = $this->normalizeSVNPath($external_path); $this->externalBaseURI = $pre.$external_path; $internal_path = rtrim($repository->getLocalPath(), '/'); $internal_path = $this->normalizeSVNPath($internal_path); $this->internalBaseURI = $pre.$internal_path; } return (string)$uri; } private function makeExternalURI($uri) { if ($this->isProxying) { return $uri; } $internal = $this->internalBaseURI; $external = $this->externalBaseURI; if (strncmp($uri, $internal, strlen($internal)) === 0) { $uri = $external.substr($uri, strlen($internal)); } return $uri; } private function normalizeSVNPath($path) { // Subversion normalizes redundant slashes internally, so normalize them // here as well to make sure things match up. $path = preg_replace('(/+)', '/', $path); return $path; } protected function raiseWrongVCSException( PhabricatorRepository $repository) { throw new Exception( pht( 'This repository ("%s") is not a Subversion repository. Use "%s" to '. 'interact with this repository.', $repository->getDisplayName(), $repository->getVersionControlSystem())); } } diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 37dd5f1ec5..0a09503ee9 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -1,2670 +1,2705 @@ setViewer($actor) ->withClasses(array('PhabricatorDiffusionApplication')) ->executeOne(); $view_policy = $app->getPolicy(DiffusionDefaultViewCapability::CAPABILITY); $edit_policy = $app->getPolicy(DiffusionDefaultEditCapability::CAPABILITY); $push_policy = $app->getPolicy(DiffusionDefaultPushCapability::CAPABILITY); $repository = id(new PhabricatorRepository()) ->setViewPolicy($view_policy) ->setEditPolicy($edit_policy) ->setPushPolicy($push_policy) ->setSpacePHID($actor->getDefaultSpacePHID()); // Put the repository in "Importing" mode until we finish // parsing it. $repository->setDetail('importing', true); return $repository; } protected function getConfiguration() { return array( self::CONFIG_AUX_PHID => true, self::CONFIG_SERIALIZATION => array( 'details' => self::SERIALIZATION_JSON, ), self::CONFIG_COLUMN_SCHEMA => array( 'name' => 'sort255', 'callsign' => 'sort32?', 'repositorySlug' => 'sort64?', 'versionControlSystem' => 'text32', 'uuid' => 'text64?', 'pushPolicy' => 'policy', 'credentialPHID' => 'phid?', 'almanacServicePHID' => 'phid?', 'localPath' => 'text128?', 'profileImagePHID' => 'phid?', ), self::CONFIG_KEY_SCHEMA => array( 'callsign' => array( 'columns' => array('callsign'), 'unique' => true, ), 'key_name' => array( 'columns' => array('name(128)'), ), 'key_vcs' => array( 'columns' => array('versionControlSystem'), ), 'key_slug' => array( 'columns' => array('repositorySlug'), 'unique' => true, ), 'key_local' => array( 'columns' => array('localPath'), 'unique' => true, ), ), ) + parent::getConfiguration(); } public function generatePHID() { return PhabricatorPHID::generateNewPHID( PhabricatorRepositoryRepositoryPHIDType::TYPECONST); } public static function getStatusMap() { return array( self::STATUS_ACTIVE => array( 'name' => pht('Active'), 'isTracked' => 1, ), self::STATUS_INACTIVE => array( 'name' => pht('Inactive'), 'isTracked' => 0, ), ); } public static function getStatusNameMap() { return ipull(self::getStatusMap(), 'name'); } public function getStatus() { if ($this->isTracked()) { return self::STATUS_ACTIVE; } else { return self::STATUS_INACTIVE; } } public function toDictionary() { return array( 'id' => $this->getID(), 'name' => $this->getName(), 'phid' => $this->getPHID(), 'callsign' => $this->getCallsign(), 'monogram' => $this->getMonogram(), 'vcs' => $this->getVersionControlSystem(), 'uri' => PhabricatorEnv::getProductionURI($this->getURI()), 'remoteURI' => (string)$this->getRemoteURI(), 'description' => $this->getDetail('description'), 'isActive' => $this->isTracked(), 'isHosted' => $this->isHosted(), 'isImporting' => $this->isImporting(), 'encoding' => $this->getDefaultTextEncoding(), 'staging' => array( 'supported' => $this->supportsStaging(), 'prefix' => 'phabricator', 'uri' => $this->getStagingURI(), ), ); } public function getDefaultTextEncoding() { return $this->getDetail('encoding', 'UTF-8'); } public function getMonogram() { $callsign = $this->getCallsign(); if (strlen($callsign)) { return "r{$callsign}"; } $id = $this->getID(); return "R{$id}"; } public function getDisplayName() { $slug = $this->getRepositorySlug(); if (strlen($slug)) { return $slug; } return $this->getMonogram(); } public function getAllMonograms() { $monograms = array(); $monograms[] = 'R'.$this->getID(); $callsign = $this->getCallsign(); if (strlen($callsign)) { $monograms[] = 'r'.$callsign; } return $monograms; } public function setLocalPath($path) { // Convert any extra slashes ("//") in the path to a single slash ("/"). $path = preg_replace('(//+)', '/', $path); return parent::setLocalPath($path); } public function getDetail($key, $default = null) { return idx($this->details, $key, $default); } public function getHumanReadableDetail($key, $default = null) { $value = $this->getDetail($key, $default); switch ($key) { case 'branch-filter': case 'close-commits-filter': $value = array_keys($value); $value = implode(', ', $value); break; } return $value; } public function setDetail($key, $value) { $this->details[$key] = $value; return $this; } public function attachCommitCount($count) { $this->commitCount = $count; return $this; } public function getCommitCount() { return $this->assertAttached($this->commitCount); } public function attachMostRecentCommit( PhabricatorRepositoryCommit $commit = null) { $this->mostRecentCommit = $commit; return $this; } public function getMostRecentCommit() { return $this->assertAttached($this->mostRecentCommit); } public function getDiffusionBrowseURIForPath( PhabricatorUser $user, $path, $line = null, $branch = null) { $drequest = DiffusionRequest::newFromDictionary( array( 'user' => $user, 'repository' => $this, 'path' => $path, 'branch' => $branch, )); return $drequest->generateURI( array( 'action' => 'browse', 'line' => $line, )); } public function getSubversionBaseURI($commit = null) { $subpath = $this->getDetail('svn-subpath'); if (!strlen($subpath)) { $subpath = null; } return $this->getSubversionPathURI($subpath, $commit); } public function getSubversionPathURI($path = null, $commit = null) { $vcs = $this->getVersionControlSystem(); if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_SVN) { throw new Exception(pht('Not a subversion repository!')); } if ($this->isHosted()) { $uri = 'file://'.$this->getLocalPath(); } else { $uri = $this->getDetail('remote-uri'); } $uri = rtrim($uri, '/'); if (strlen($path)) { $path = rawurlencode($path); $path = str_replace('%2F', '/', $path); $uri = $uri.'/'.ltrim($path, '/'); } if ($path !== null || $commit !== null) { $uri .= '@'; } if ($commit !== null) { $uri .= $commit; } return $uri; } public function attachProjectPHIDs(array $project_phids) { $this->projectPHIDs = $project_phids; return $this; } public function getProjectPHIDs() { return $this->assertAttached($this->projectPHIDs); } /** * Get the name of the directory this repository should clone or checkout * into. For example, if the repository name is "Example Repository", a * reasonable name might be "example-repository". This is used to help users * get reasonable results when cloning repositories, since they generally do * not want to clone into directories called "X/" or "Example Repository/". * * @return string */ public function getCloneName() { $name = $this->getRepositorySlug(); // Make some reasonable effort to produce reasonable default directory // names from repository names. if (!strlen($name)) { $name = $this->getName(); $name = phutil_utf8_strtolower($name); $name = preg_replace('@[/ -:<>]+@', '-', $name); $name = trim($name, '-'); if (!strlen($name)) { $name = $this->getCallsign(); } } return $name; } public static function isValidRepositorySlug($slug) { try { self::assertValidRepositorySlug($slug); return true; } catch (Exception $ex) { return false; } } public static function assertValidRepositorySlug($slug) { if (!strlen($slug)) { throw new Exception( pht( 'The empty string is not a valid repository short name. '. 'Repository short names must be at least one character long.')); } if (strlen($slug) > 64) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names must not be longer than 64 characters.', $slug)); } if (preg_match('/[^a-zA-Z0-9._-]/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names may only contain letters, numbers, periods, hyphens '. 'and underscores.', $slug)); } if (!preg_match('/^[a-zA-Z0-9]/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names must begin with a letter or number.', $slug)); } if (!preg_match('/[a-zA-Z0-9]\z/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names must end with a letter or number.', $slug)); } if (preg_match('/__|--|\\.\\./', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names must not contain multiple consecutive underscores, '. 'hyphens, or periods.', $slug)); } if (preg_match('/^[A-Z]+\z/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names may not contain only uppercase letters.', $slug)); } if (preg_match('/^\d+\z/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names may not contain only numbers.', $slug)); } if (preg_match('/\\.git/', $slug)) { throw new Exception( pht( 'The name "%s" is not a valid repository short name. Repository '. 'short names must not end in ".git". This suffix will be added '. 'automatically in appropriate contexts.', $slug)); } } public static function assertValidCallsign($callsign) { if (!strlen($callsign)) { throw new Exception( pht( 'A repository callsign must be at least one character long.')); } if (strlen($callsign) > 32) { throw new Exception( pht( 'The callsign "%s" is not a valid repository callsign. Callsigns '. 'must be no more than 32 bytes long.', $callsign)); } if (!preg_match('/^[A-Z]+\z/', $callsign)) { throw new Exception( pht( 'The callsign "%s" is not a valid repository callsign. Callsigns '. 'may only contain UPPERCASE letters.', $callsign)); } } public function getProfileImageURI() { return $this->getProfileImageFile()->getBestURI(); } public function attachProfileImageFile(PhabricatorFile $file) { $this->profileImageFile = $file; return $this; } public function getProfileImageFile() { return $this->assertAttached($this->profileImageFile); } /* -( Remote Command Execution )------------------------------------------- */ public function execRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newRemoteCommandFuture($args)->resolve(); } public function execxRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newRemoteCommandFuture($args)->resolvex(); } public function getRemoteCommandFuture($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newRemoteCommandFuture($args); } public function passthruRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newRemoteCommandPassthru($args)->execute(); } private function newRemoteCommandFuture(array $argv) { return $this->newRemoteCommandEngine($argv) ->newFuture(); } private function newRemoteCommandPassthru(array $argv) { return $this->newRemoteCommandEngine($argv) ->setPassthru(true) ->newFuture(); } private function newRemoteCommandEngine(array $argv) { return DiffusionCommandEngine::newCommandEngine($this) ->setArgv($argv) ->setCredentialPHID($this->getCredentialPHID()) ->setURI($this->getRemoteURIObject()); } /* -( Local Command Execution )-------------------------------------------- */ public function execLocalCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newLocalCommandFuture($args)->resolve(); } public function execxLocalCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newLocalCommandFuture($args)->resolvex(); } public function getLocalCommandFuture($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newLocalCommandFuture($args); } public function passthruLocalCommand($pattern /* , $arg, ... */) { $args = func_get_args(); return $this->newLocalCommandPassthru($args)->execute(); } private function newLocalCommandFuture(array $argv) { $this->assertLocalExists(); $future = DiffusionCommandEngine::newCommandEngine($this) ->setArgv($argv) ->newFuture(); if ($this->usesLocalWorkingCopy()) { $future->setCWD($this->getLocalPath()); } return $future; } private function newLocalCommandPassthru(array $argv) { $this->assertLocalExists(); $future = DiffusionCommandEngine::newCommandEngine($this) ->setArgv($argv) ->setPassthru(true) ->newFuture(); if ($this->usesLocalWorkingCopy()) { $future->setCWD($this->getLocalPath()); } return $future; } public function getURI() { $short_name = $this->getRepositorySlug(); if (strlen($short_name)) { return "/source/{$short_name}/"; } $callsign = $this->getCallsign(); if (strlen($callsign)) { return "/diffusion/{$callsign}/"; } $id = $this->getID(); return "/diffusion/{$id}/"; } public function getPathURI($path) { return $this->getURI().ltrim($path, '/'); } public function getCommitURI($identifier) { $callsign = $this->getCallsign(); if (strlen($callsign)) { return "/r{$callsign}{$identifier}"; } $id = $this->getID(); return "/R{$id}:{$identifier}"; } public static function parseRepositoryServicePath($request_path, $vcs) { $is_git = ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_GIT); $patterns = array( '(^'. '(?P/?(?:diffusion|source)/(?P[^/]+))'. '(?P.*)'. '\z)', ); $identifier = null; foreach ($patterns as $pattern) { $matches = null; if (!preg_match($pattern, $request_path, $matches)) { continue; } $identifier = $matches['identifier']; if ($is_git) { $identifier = preg_replace('/\\.git\z/', '', $identifier); } $base = $matches['base']; $path = $matches['path']; break; } if ($identifier === null) { return null; } return array( 'identifier' => $identifier, 'base' => $base, 'path' => $path, ); } public function getCanonicalPath($request_path) { $standard_pattern = '(^'. '(?P/(?:diffusion|source)/)'. '(?P[^/]+)'. '(?P(?:/.*)?)'. '\z)'; $matches = null; if (preg_match($standard_pattern, $request_path, $matches)) { $suffix = $matches['suffix']; return $this->getPathURI($suffix); } $commit_pattern = '(^'. '(?P/)'. '(?P'. '(?:'. 'r(?P[A-Z]+)'. '|'. 'R(?P[1-9]\d*):'. ')'. '(?P[a-f0-9]+)'. ')'. '\z)'; $matches = null; if (preg_match($commit_pattern, $request_path, $matches)) { $commit = $matches['commit']; return $this->getCommitURI($commit); } return null; } public function generateURI(array $params) { $req_branch = false; $req_commit = false; $action = idx($params, 'action'); switch ($action) { case 'history': case 'graph': case 'clone': case 'blame': case 'browse': case 'document': case 'change': case 'lastmodified': case 'tags': case 'branches': case 'lint': case 'pathtree': case 'refs': case 'compare': break; case 'branch': // NOTE: This does not actually require a branch, and won't have one // in Subversion. Possibly this should be more clear. break; case 'commit': case 'rendering-ref': $req_commit = true; break; default: throw new Exception( pht( 'Action "%s" is not a valid repository URI action.', $action)); } $path = idx($params, 'path'); $branch = idx($params, 'branch'); $commit = idx($params, 'commit'); $line = idx($params, 'line'); $head = idx($params, 'head'); $against = idx($params, 'against'); if ($req_commit && !strlen($commit)) { throw new Exception( pht( 'Diffusion URI action "%s" requires commit!', $action)); } if ($req_branch && !strlen($branch)) { throw new Exception( pht( 'Diffusion URI action "%s" requires branch!', $action)); } if ($action === 'commit') { return $this->getCommitURI($commit); } if (strlen($path)) { $path = ltrim($path, '/'); $path = str_replace(array(';', '$'), array(';;', '$$'), $path); $path = phutil_escape_uri($path); } $raw_branch = $branch; if (strlen($branch)) { $branch = phutil_escape_uri_path_component($branch); $path = "{$branch}/{$path}"; } $raw_commit = $commit; if (strlen($commit)) { $commit = str_replace('$', '$$', $commit); $commit = ';'.phutil_escape_uri($commit); } if (strlen($line)) { $line = '$'.phutil_escape_uri($line); } $query = array(); switch ($action) { case 'change': case 'history': case 'graph': case 'blame': case 'browse': case 'document': case 'lastmodified': case 'tags': case 'branches': case 'lint': case 'pathtree': case 'refs': $uri = $this->getPathURI("/{$action}/{$path}{$commit}{$line}"); break; case 'compare': $uri = $this->getPathURI("/{$action}/"); if (strlen($head)) { $query['head'] = $head; } else if (strlen($raw_commit)) { $query['commit'] = $raw_commit; } else if (strlen($raw_branch)) { $query['head'] = $raw_branch; } if (strlen($against)) { $query['against'] = $against; } break; case 'branch': if (strlen($path)) { $uri = $this->getPathURI("/repository/{$path}"); } else { $uri = $this->getPathURI('/'); } break; case 'external': $commit = ltrim($commit, ';'); $uri = "/diffusion/external/{$commit}/"; break; case 'rendering-ref': // This isn't a real URI per se, it's passed as a query parameter to // the ajax changeset stuff but then we parse it back out as though // it came from a URI. $uri = rawurldecode("{$path}{$commit}"); break; case 'clone': $uri = $this->getPathURI("/{$action}/"); break; } if ($action == 'rendering-ref') { return $uri; } $uri = new PhutilURI($uri); if (isset($params['lint'])) { $params['params'] = idx($params, 'params', array()) + array( 'lint' => $params['lint'], ); } $query = idx($params, 'params', array()) + $query; if ($query) { $uri->setQueryParams($query); } return $uri; } public function updateURIIndex() { $indexes = array(); $uris = $this->getURIs(); foreach ($uris as $uri) { if ($uri->getIsDisabled()) { continue; } $indexes[] = $uri->getNormalizedURI(); } PhabricatorRepositoryURIIndex::updateRepositoryURIs( $this->getPHID(), $indexes); return $this; } public function isTracked() { $status = $this->getDetail('tracking-enabled'); $map = self::getStatusMap(); $spec = idx($map, $status); if (!$spec) { if ($status) { $status = self::STATUS_ACTIVE; } else { $status = self::STATUS_INACTIVE; } $spec = idx($map, $status); } return (bool)idx($spec, 'isTracked', false); } public function getDefaultBranch() { $default = $this->getDetail('default-branch'); if (strlen($default)) { return $default; } $default_branches = array( PhabricatorRepositoryType::REPOSITORY_TYPE_GIT => 'master', PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL => 'default', ); return idx($default_branches, $this->getVersionControlSystem()); } public function getDefaultArcanistBranch() { return coalesce($this->getDefaultBranch(), 'svn'); } private function isBranchInFilter($branch, $filter_key) { $vcs = $this->getVersionControlSystem(); $is_git = ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_GIT); $use_filter = ($is_git); if (!$use_filter) { // If this VCS doesn't use filters, pass everything through. return true; } $filter = $this->getDetail($filter_key, array()); // If there's no filter set, let everything through. if (!$filter) { return true; } // If this branch isn't literally named `regexp(...)`, and it's in the // filter list, let it through. if (isset($filter[$branch])) { if (self::extractBranchRegexp($branch) === null) { return true; } } // If the branch matches a regexp, let it through. foreach ($filter as $pattern => $ignored) { $regexp = self::extractBranchRegexp($pattern); if ($regexp !== null) { if (preg_match($regexp, $branch)) { return true; } } } // Nothing matched, so filter this branch out. return false; } public static function extractBranchRegexp($pattern) { $matches = null; if (preg_match('/^regexp\\((.*)\\)\z/', $pattern, $matches)) { return $matches[1]; } return null; } public function shouldTrackRef(DiffusionRepositoryRef $ref) { // At least for now, don't track the staging area tags. if ($ref->isTag()) { if (preg_match('(^phabricator/)', $ref->getShortName())) { return false; } } if (!$ref->isBranch()) { return true; } return $this->shouldTrackBranch($ref->getShortName()); } public function shouldTrackBranch($branch) { return $this->isBranchInFilter($branch, 'branch-filter'); } public function formatCommitName($commit_identifier, $local = false) { $vcs = $this->getVersionControlSystem(); $type_git = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; $type_hg = PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL; $is_git = ($vcs == $type_git); $is_hg = ($vcs == $type_hg); if ($is_git || $is_hg) { $name = substr($commit_identifier, 0, 12); $need_scope = false; } else { $name = $commit_identifier; $need_scope = true; } if (!$local) { $need_scope = true; } if ($need_scope) { $callsign = $this->getCallsign(); if ($callsign) { $scope = "r{$callsign}"; } else { $id = $this->getID(); $scope = "R{$id}:"; } $name = $scope.$name; } return $name; } public function isImporting() { return (bool)$this->getDetail('importing', false); } public function isNewlyInitialized() { return (bool)$this->getDetail('newly-initialized', false); } public function loadImportProgress() { $progress = queryfx_all( $this->establishConnection('r'), 'SELECT importStatus, count(*) N FROM %T WHERE repositoryID = %d GROUP BY importStatus', id(new PhabricatorRepositoryCommit())->getTableName(), $this->getID()); $done = 0; $total = 0; foreach ($progress as $row) { $total += $row['N'] * 4; $status = $row['importStatus']; if ($status & PhabricatorRepositoryCommit::IMPORTED_MESSAGE) { $done += $row['N']; } if ($status & PhabricatorRepositoryCommit::IMPORTED_CHANGE) { $done += $row['N']; } if ($status & PhabricatorRepositoryCommit::IMPORTED_OWNERS) { $done += $row['N']; } if ($status & PhabricatorRepositoryCommit::IMPORTED_HERALD) { $done += $row['N']; } } if ($total) { $ratio = ($done / $total); } else { $ratio = 0; } // Cap this at "99.99%", because it's confusing to users when the actual // fraction is "99.996%" and it rounds up to "100.00%". if ($ratio > 0.9999) { $ratio = 0.9999; } return $ratio; } /** * Should this repository publish feed, notifications, audits, and email? * * We do not publish information about repositories during initial import, * or if the repository has been set not to publish. */ public function shouldPublish() { if ($this->isImporting()) { return false; } if ($this->getDetail('herald-disabled')) { return false; } return true; } /* -( Autoclose )---------------------------------------------------------- */ public function shouldAutocloseRef(DiffusionRepositoryRef $ref) { if (!$ref->isBranch()) { return false; } return $this->shouldAutocloseBranch($ref->getShortName()); } /** * Determine if autoclose is active for a branch. * * For more details about why, use @{method:shouldSkipAutocloseBranch}. * * @param string Branch name to check. * @return bool True if autoclose is active for the branch. * @task autoclose */ public function shouldAutocloseBranch($branch) { return ($this->shouldSkipAutocloseBranch($branch) === null); } /** * Determine if autoclose is active for a commit. * * For more details about why, use @{method:shouldSkipAutocloseCommit}. * * @param PhabricatorRepositoryCommit Commit to check. * @return bool True if autoclose is active for the commit. * @task autoclose */ public function shouldAutocloseCommit(PhabricatorRepositoryCommit $commit) { return ($this->shouldSkipAutocloseCommit($commit) === null); } /** * Determine why autoclose should be skipped for a branch. * * This method gives a detailed reason why autoclose will be skipped. To * perform a simple test, use @{method:shouldAutocloseBranch}. * * @param string Branch name to check. * @return const|null Constant identifying reason to skip this branch, or null * if autoclose is active. * @task autoclose */ public function shouldSkipAutocloseBranch($branch) { $all_reason = $this->shouldSkipAllAutoclose(); if ($all_reason) { return $all_reason; } if (!$this->shouldTrackBranch($branch)) { return self::BECAUSE_BRANCH_UNTRACKED; } if (!$this->isBranchInFilter($branch, 'close-commits-filter')) { return self::BECAUSE_BRANCH_NOT_AUTOCLOSE; } return null; } /** * Determine why autoclose should be skipped for a commit. * * This method gives a detailed reason why autoclose will be skipped. To * perform a simple test, use @{method:shouldAutocloseCommit}. * * @param PhabricatorRepositoryCommit Commit to check. * @return const|null Constant identifying reason to skip this commit, or null * if autoclose is active. * @task autoclose */ public function shouldSkipAutocloseCommit( PhabricatorRepositoryCommit $commit) { $all_reason = $this->shouldSkipAllAutoclose(); if ($all_reason) { return $all_reason; } switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return null; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: break; default: throw new Exception(pht('Unrecognized version control system.')); } $closeable_flag = PhabricatorRepositoryCommit::IMPORTED_CLOSEABLE; if (!$commit->isPartiallyImported($closeable_flag)) { return self::BECAUSE_NOT_ON_AUTOCLOSE_BRANCH; } return null; } /** * Determine why all autoclose operations should be skipped for this * repository. * * @return const|null Constant identifying reason to skip all autoclose * operations, or null if autoclose operations are not blocked at the * repository level. * @task autoclose */ private function shouldSkipAllAutoclose() { if ($this->isImporting()) { return self::BECAUSE_REPOSITORY_IMPORTING; } if ($this->getDetail('disable-autoclose', false)) { return self::BECAUSE_AUTOCLOSE_DISABLED; } return null; } /* -( Repository URI Management )------------------------------------------ */ /** * Get the remote URI for this repository. * * @return string * @task uri */ public function getRemoteURI() { return (string)$this->getRemoteURIObject(); } /** * Get the remote URI for this repository, including credentials if they're * used by this repository. * * @return PhutilOpaqueEnvelope URI, possibly including credentials. * @task uri */ public function getRemoteURIEnvelope() { $uri = $this->getRemoteURIObject(); $remote_protocol = $this->getRemoteProtocol(); if ($remote_protocol == 'http' || $remote_protocol == 'https') { // For SVN, we use `--username` and `--password` flags separately, so // don't add any credentials here. if (!$this->isSVN()) { $credential_phid = $this->getCredentialPHID(); if ($credential_phid) { $key = PassphrasePasswordKey::loadFromPHID( $credential_phid, PhabricatorUser::getOmnipotentUser()); $uri->setUser($key->getUsernameEnvelope()->openEnvelope()); $uri->setPass($key->getPasswordEnvelope()->openEnvelope()); } } } return new PhutilOpaqueEnvelope((string)$uri); } /** * Get the clone (or checkout) URI for this repository, without authentication * information. * * @return string Repository URI. * @task uri */ public function getPublicCloneURI() { return (string)$this->getCloneURIObject(); } /** * Get the protocol for the repository's remote. * * @return string Protocol, like "ssh" or "git". * @task uri */ public function getRemoteProtocol() { $uri = $this->getRemoteURIObject(); return $uri->getProtocol(); } /** * Get a parsed object representation of the repository's remote URI.. * * @return wild A @{class@libphutil:PhutilURI}. * @task uri */ public function getRemoteURIObject() { $raw_uri = $this->getDetail('remote-uri'); if (!strlen($raw_uri)) { return new PhutilURI(''); } if (!strncmp($raw_uri, '/', 1)) { return new PhutilURI('file://'.$raw_uri); } return new PhutilURI($raw_uri); } /** * Get the "best" clone/checkout URI for this repository, on any protocol. */ public function getCloneURIObject() { if (!$this->isHosted()) { if ($this->isSVN()) { // Make sure we pick up the "Import Only" path for Subversion, so // the user clones the repository starting at the correct path, not // from the root. $base_uri = $this->getSubversionBaseURI(); $base_uri = new PhutilURI($base_uri); $path = $base_uri->getPath(); if (!$path) { $path = '/'; } // If the trailing "@" is not required to escape the URI, strip it for // readability. if (!preg_match('/@.*@/', $path)) { $path = rtrim($path, '@'); } $base_uri->setPath($path); return $base_uri; } else { return $this->getRemoteURIObject(); } } // TODO: This should be cleaned up to deal with all the new URI handling. $another_copy = id(new PhabricatorRepositoryQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withPHIDs(array($this->getPHID())) ->needURIs(true) ->executeOne(); $clone_uris = $another_copy->getCloneURIs(); if (!$clone_uris) { return null; } return head($clone_uris)->getEffectiveURI(); } private function getRawHTTPCloneURIObject() { $uri = PhabricatorEnv::getProductionURI($this->getURI()); $uri = new PhutilURI($uri); if ($this->isGit()) { $uri->setPath($uri->getPath().$this->getCloneName().'.git'); } else if ($this->isHg()) { $uri->setPath($uri->getPath().$this->getCloneName().'/'); } return $uri; } /** * Determine if we should connect to the remote using SSH flags and * credentials. * * @return bool True to use the SSH protocol. * @task uri */ private function shouldUseSSH() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); if ($this->isSSHProtocol($protocol)) { return true; } return false; } /** * Determine if we should connect to the remote using HTTP flags and * credentials. * * @return bool True to use the HTTP protocol. * @task uri */ private function shouldUseHTTP() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); return ($protocol == 'http' || $protocol == 'https'); } /** * Determine if we should connect to the remote using SVN flags and * credentials. * * @return bool True to use the SVN protocol. * @task uri */ private function shouldUseSVNProtocol() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); return ($protocol == 'svn'); } /** * Determine if a protocol is SSH or SSH-like. * * @param string A protocol string, like "http" or "ssh". * @return bool True if the protocol is SSH-like. * @task uri */ private function isSSHProtocol($protocol) { return ($protocol == 'ssh' || $protocol == 'svn+ssh'); } public function delete() { $this->openTransaction(); $paths = id(new PhabricatorOwnersPath()) ->loadAllWhere('repositoryPHID = %s', $this->getPHID()); foreach ($paths as $path) { $path->delete(); } queryfx( $this->establishConnection('w'), 'DELETE FROM %T WHERE repositoryPHID = %s', id(new PhabricatorRepositorySymbol())->getTableName(), $this->getPHID()); $commits = id(new PhabricatorRepositoryCommit()) ->loadAllWhere('repositoryID = %d', $this->getID()); foreach ($commits as $commit) { // note PhabricatorRepositoryAuditRequests and // PhabricatorRepositoryCommitData are deleted here too. $commit->delete(); } $uris = id(new PhabricatorRepositoryURI()) ->loadAllWhere('repositoryPHID = %s', $this->getPHID()); foreach ($uris as $uri) { $uri->delete(); } $ref_cursors = id(new PhabricatorRepositoryRefCursor()) ->loadAllWhere('repositoryPHID = %s', $this->getPHID()); foreach ($ref_cursors as $cursor) { $cursor->delete(); } $conn_w = $this->establishConnection('w'); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_FILESYSTEM, $this->getID()); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_PATHCHANGE, $this->getID()); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_SUMMARY, $this->getID()); $result = parent::delete(); $this->saveTransaction(); return $result; } public function isGit() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_GIT); } public function isSVN() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_SVN); } public function isHg() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL); } public function isHosted() { return (bool)$this->getDetail('hosting-enabled', false); } public function setHosted($enabled) { return $this->setDetail('hosting-enabled', $enabled); } public function canServeProtocol($protocol, $write) { if (!$this->isTracked()) { return false; } $clone_uris = $this->getCloneURIs(); foreach ($clone_uris as $uri) { if ($uri->getBuiltinProtocol() !== $protocol) { continue; } $io_type = $uri->getEffectiveIoType(); if ($io_type == PhabricatorRepositoryURI::IO_READWRITE) { return true; } if (!$write) { if ($io_type == PhabricatorRepositoryURI::IO_READ) { return true; } } } return false; } public function hasLocalWorkingCopy() { try { self::assertLocalExists(); return true; } catch (Exception $ex) { return false; } } /** * Raise more useful errors when there are basic filesystem problems. */ private function assertLocalExists() { if (!$this->usesLocalWorkingCopy()) { return; } $local = $this->getLocalPath(); Filesystem::assertExists($local); Filesystem::assertIsDirectory($local); Filesystem::assertReadable($local); } /** * Determine if the working copy is bare or not. In Git, this corresponds * to `--bare`. In Mercurial, `--noupdate`. */ public function isWorkingCopyBare() { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return false; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $local = $this->getLocalPath(); if (Filesystem::pathExists($local.'/.git')) { return false; } else { return true; } } } public function usesLocalWorkingCopy() { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: return $this->isHosted(); case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return true; } } public function getHookDirectories() { $directories = array(); if (!$this->isHosted()) { return $directories; } $root = $this->getLocalPath(); switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: if ($this->isWorkingCopyBare()) { $directories[] = $root.'/hooks/pre-receive-phabricator.d/'; } else { $directories[] = $root.'/.git/hooks/pre-receive-phabricator.d/'; } break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $directories[] = $root.'/hooks/pre-commit-phabricator.d/'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: // NOTE: We don't support custom Mercurial hooks for now because they're // messy and we can't easily just drop a `hooks.d/` directory next to // the hooks. break; } return $directories; } public function canDestroyWorkingCopy() { if ($this->isHosted()) { // Never destroy hosted working copies. return false; } $default_path = PhabricatorEnv::getEnvConfig( 'repository.default-local-path'); return Filesystem::isDescendant($this->getLocalPath(), $default_path); } public function canUsePathTree() { return !$this->isSVN(); } public function canUseGitLFS() { if (!$this->isGit()) { return false; } if (!$this->isHosted()) { return false; } if (!PhabricatorEnv::getEnvConfig('diffusion.allow-git-lfs')) { return false; } return true; } public function getGitLFSURI($path = null) { if (!$this->canUseGitLFS()) { throw new Exception( pht( 'This repository does not support Git LFS, so Git LFS URIs can '. 'not be generated for it.')); } $uri = $this->getRawHTTPCloneURIObject(); $uri = (string)$uri; $uri = $uri.'/'.$path; return $uri; } public function canMirror() { if ($this->isGit() || $this->isHg()) { return true; } return false; } public function canAllowDangerousChanges() { if (!$this->isHosted()) { return false; } // In Git and Mercurial, ref deletions and rewrites are dangerous. // In Subversion, editing revprops is dangerous. return true; } public function shouldAllowDangerousChanges() { return (bool)$this->getDetail('allow-dangerous-changes'); } public function canAllowEnormousChanges() { if (!$this->isHosted()) { return false; } return true; } public function shouldAllowEnormousChanges() { return (bool)$this->getDetail('allow-enormous-changes'); } public function writeStatusMessage( $status_type, $status_code, array $parameters = array()) { $table = new PhabricatorRepositoryStatusMessage(); $conn_w = $table->establishConnection('w'); $table_name = $table->getTableName(); if ($status_code === null) { queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d AND statusType = %s', $table_name, $this->getID(), $status_type); } else { // If the existing message has the same code (e.g., we just hit an // error and also previously hit an error) we increment the message // count. This allows us to determine how many times in a row we've // run into an error. // NOTE: The assignments in "ON DUPLICATE KEY UPDATE" are evaluated // in order, so the "messageCount" assignment must occur before the // "statusCode" assignment. See T11705. queryfx( $conn_w, 'INSERT INTO %T (repositoryID, statusType, statusCode, parameters, epoch, messageCount) VALUES (%d, %s, %s, %s, %d, %d) ON DUPLICATE KEY UPDATE messageCount = IF( statusCode = VALUES(statusCode), messageCount + VALUES(messageCount), VALUES(messageCount)), statusCode = VALUES(statusCode), parameters = VALUES(parameters), epoch = VALUES(epoch)', $table_name, $this->getID(), $status_type, $status_code, json_encode($parameters), time(), 1); } return $this; } public static function assertValidRemoteURI($uri) { if (trim($uri) != $uri) { throw new Exception( pht('The remote URI has leading or trailing whitespace.')); } $uri_object = new PhutilURI($uri); $protocol = $uri_object->getProtocol(); // Catch confusion between Git/SCP-style URIs and normal URIs. See T3619 // for discussion. This is usually a user adding "ssh://" to an implicit // SSH Git URI. if ($protocol == 'ssh') { if (preg_match('(^[^:@]+://[^/:]+:[^\d])', $uri)) { throw new Exception( pht( "The remote URI is not formatted correctly. Remote URIs ". "with an explicit protocol should be in the form ". "'%s', not '%s'. The '%s' syntax is only valid in SCP-style URIs.", 'proto://domain/path', 'proto://domain:/path', ':/path')); } } switch ($protocol) { case 'ssh': case 'http': case 'https': case 'git': case 'svn': case 'svn+ssh': break; default: // NOTE: We're explicitly rejecting 'file://' because it can be // used to clone from the working copy of another repository on disk // that you don't normally have permission to access. throw new Exception( pht( 'The URI protocol is unrecognized. It should begin with '. '"%s", "%s", "%s", "%s", "%s", "%s", or be in the form "%s".', 'ssh://', 'http://', 'https://', 'git://', 'svn://', 'svn+ssh://', 'git@domain.com:path')); } return true; } /** * Load the pull frequency for this repository, based on the time since the * last activity. * * We pull rarely used repositories less frequently. This finds the most * recent commit which is older than the current time (which prevents us from * spinning on repositories with a silly commit post-dated to some time in * 2037). We adjust the pull frequency based on when the most recent commit * occurred. * * @param int The minimum update interval to use, in seconds. * @return int Repository update interval, in seconds. */ public function loadUpdateInterval($minimum = 15) { // First, check if we've hit errors recently. If we have, wait one period // for each consecutive error. Normally, this corresponds to a backoff of // 15s, 30s, 45s, etc. $message_table = new PhabricatorRepositoryStatusMessage(); $conn = $message_table->establishConnection('r'); $error_count = queryfx_one( $conn, 'SELECT MAX(messageCount) error_count FROM %T WHERE repositoryID = %d AND statusType IN (%Ls) AND statusCode IN (%Ls)', $message_table->getTableName(), $this->getID(), array( PhabricatorRepositoryStatusMessage::TYPE_INIT, PhabricatorRepositoryStatusMessage::TYPE_FETCH, ), array( PhabricatorRepositoryStatusMessage::CODE_ERROR, )); $error_count = (int)$error_count['error_count']; if ($error_count > 0) { return (int)($minimum * $error_count); } // If a repository is still importing, always pull it as frequently as // possible. This prevents us from hanging for a long time at 99.9% when // importing an inactive repository. if ($this->isImporting()) { return $minimum; } $window_start = (PhabricatorTime::getNow() + $minimum); $table = id(new PhabricatorRepositoryCommit()); $last_commit = queryfx_one( $table->establishConnection('r'), 'SELECT epoch FROM %T WHERE repositoryID = %d AND epoch <= %d ORDER BY epoch DESC LIMIT 1', $table->getTableName(), $this->getID(), $window_start); if ($last_commit) { $time_since_commit = ($window_start - $last_commit['epoch']); } else { // If the repository has no commits, treat the creation date as // though it were the date of the last commit. This makes empty // repositories update quickly at first but slow down over time // if they don't see any activity. $time_since_commit = ($window_start - $this->getDateCreated()); } $last_few_days = phutil_units('3 days in seconds'); if ($time_since_commit <= $last_few_days) { // For repositories with activity in the recent past, we wait one // extra second for every 10 minutes since the last commit. This // shorter backoff is intended to handle weekends and other short // breaks from development. $smart_wait = ($time_since_commit / 600); } else { // For repositories without recent activity, we wait one extra second // for every 4 minutes since the last commit. This longer backoff // handles rarely used repositories, up to the maximum. $smart_wait = ($time_since_commit / 240); } // We'll never wait more than 6 hours to pull a repository. $longest_wait = phutil_units('6 hours in seconds'); $smart_wait = min($smart_wait, $longest_wait); $smart_wait = max($minimum, $smart_wait); return (int)$smart_wait; } /** * Retrieve the service URI for the device hosting this repository. * * See @{method:newConduitClient} for a general discussion of interacting * with repository services. This method provides lower-level resolution of * services, returning raw URIs. * * @param PhabricatorUser Viewing user. * @param map Constraints on selectable services. * @return string|null URI, or `null` for local repositories. */ public function getAlmanacServiceURI( PhabricatorUser $viewer, array $options) { PhutilTypeSpec::checkMap( $options, array( 'neverProxy' => 'bool', 'protocols' => 'list', + 'writable' => 'optional bool', )); $never_proxy = $options['neverProxy']; $protocols = $options['protocols']; + $writable = idx($options, 'writable', false); $cache_key = $this->getAlmanacServiceCacheKey(); if (!$cache_key) { return null; } $cache = PhabricatorCaches::getMutableStructureCache(); $uris = $cache->getKey($cache_key, false); // If we haven't built the cache yet, build it now. if ($uris === false) { $uris = $this->buildAlmanacServiceURIs(); $cache->setKey($cache_key, $uris); } if ($uris === null) { return null; } $local_device = AlmanacKeys::getDeviceID(); if ($never_proxy && !$local_device) { throw new Exception( pht( 'Unable to handle proxied service request. This device is not '. 'registered, so it can not identify local services. Register '. 'this device before sending requests here.')); } $protocol_map = array_fuse($protocols); $results = array(); foreach ($uris as $uri) { // If we're never proxying this and it's locally satisfiable, return // `null` to tell the caller to handle it locally. If we're allowed to // proxy, we skip this check and may proxy the request to ourselves. // (That proxied request will end up here with proxying forbidden, // return `null`, and then the request will actually run.) if ($local_device && $never_proxy) { if ($uri['device'] == $local_device) { return null; } } if (isset($protocol_map[$uri['protocol']])) { - $results[] = new PhutilURI($uri['uri']); + $results[] = $uri; } } if (!$results) { throw new Exception( pht( 'The Almanac service for this repository is not bound to any '. 'interfaces which support the required protocols (%s).', implode(', ', $protocols))); } if ($never_proxy) { throw new Exception( pht( 'Refusing to proxy a repository request from a cluster host. '. 'Cluster hosts must correctly route their intracluster requests.')); } if (count($results) > 1) { if (!$this->supportsSynchronization()) { throw new Exception( pht( 'Repository "%s" is bound to multiple active repository hosts, '. 'but this repository does not support cluster synchronization. '. 'Declusterize this repository or move it to a service with only '. 'one host.', $this->getDisplayName())); } } + // If we require a writable device, remove URIs which aren't writable. + if ($writable) { + foreach ($results as $key => $uri) { + if (!$uri['writable']) { + unset($results[$key]); + } + } + + if (!$results) { + throw new Exception( + pht( + 'This repository ("%s") is not writable with the given '. + 'protocols (%s). The Almanac service for this repository has no '. + 'writable bindings that support these protocols.', + $this->getDisplayName(), + implode(', ', $protocols))); + } + } + shuffle($results); - return head($results); + + $result = head($results); + return $result['uri']; } public function supportsSynchronization() { // TODO: For now, this is only supported for Git. if (!$this->isGit()) { return false; } return true; } public function getAlmanacServiceCacheKey() { $service_phid = $this->getAlmanacServicePHID(); if (!$service_phid) { return null; } $repository_phid = $this->getPHID(); - return "diffusion.repository({$repository_phid}).service({$service_phid})"; + + $parts = array( + "repo({$repository_phid})", + "serv({$service_phid})", + 'v2', + ); + + return implode('.', $parts); } private function buildAlmanacServiceURIs() { $service = $this->loadAlmanacService(); if (!$service) { return null; } $bindings = $service->getActiveBindings(); if (!$bindings) { throw new Exception( pht( 'The Almanac service for this repository is not bound to any '. 'interfaces.')); } $uris = array(); foreach ($bindings as $binding) { $iface = $binding->getInterface(); $uri = $this->getClusterRepositoryURIFromBinding($binding); $protocol = $uri->getProtocol(); $device_name = $iface->getDevice()->getName(); $uris[] = array( 'protocol' => $protocol, 'uri' => (string)$uri, 'device' => $device_name, + 'writable' => (bool)$binding->getAlmanacPropertyValue('writable'), ); } return $uris; } /** * Build a new Conduit client in order to make a service call to this * repository. * * If the repository is hosted locally, this method may return `null`. The * caller should use `ConduitCall` or other local logic to complete the * request. * * By default, we will return a @{class:ConduitClient} for any repository with * a service, even if that service is on the current device. * * We do this because this configuration does not make very much sense in a * production context, but is very common in a test/development context * (where the developer's machine is both the web host and the repository * service). By proxying in development, we get more consistent behavior * between development and production, and don't have a major untested * codepath. * * The `$never_proxy` parameter can be used to prevent this local proxying. * If the flag is passed: * * - The method will return `null` (implying a local service call) * if the repository service is hosted on the current device. * - The method will throw if it would need to return a client. * * This is used to prevent loops in Conduit: the first request will proxy, * even in development, but the second request will be identified as a * cluster request and forced not to proxy. * * For lower-level service resolution, see @{method:getAlmanacServiceURI}. * * @param PhabricatorUser Viewing user. * @param bool `true` to throw if a client would be returned. * @return ConduitClient|null Client, or `null` for local repositories. */ public function newConduitClient( PhabricatorUser $viewer, $never_proxy = false) { $uri = $this->getAlmanacServiceURI( $viewer, array( 'neverProxy' => $never_proxy, 'protocols' => array( 'http', 'https', ), + + // At least today, no Conduit call can ever write to a repository, + // so it's fine to send anything to a read-only node. + 'writable' => false, )); if ($uri === null) { return null; } $domain = id(new PhutilURI(PhabricatorEnv::getURI('/')))->getDomain(); $client = id(new ConduitClient($uri)) ->setHost($domain); if ($viewer->isOmnipotent()) { // If the caller is the omnipotent user (normally, a daemon), we will // sign the request with this host's asymmetric keypair. $public_path = AlmanacKeys::getKeyPath('device.pub'); try { $public_key = Filesystem::readFile($public_path); } catch (Exception $ex) { throw new PhutilAggregateException( pht( 'Unable to read device public key while attempting to make '. 'authenticated method call within the Phabricator cluster. '. 'Use `%s` to register keys for this device. Exception: %s', 'bin/almanac register', $ex->getMessage()), array($ex)); } $private_path = AlmanacKeys::getKeyPath('device.key'); try { $private_key = Filesystem::readFile($private_path); $private_key = new PhutilOpaqueEnvelope($private_key); } catch (Exception $ex) { throw new PhutilAggregateException( pht( 'Unable to read device private key while attempting to make '. 'authenticated method call within the Phabricator cluster. '. 'Use `%s` to register keys for this device. Exception: %s', 'bin/almanac register', $ex->getMessage()), array($ex)); } $client->setSigningKeys($public_key, $private_key); } else { // If the caller is a normal user, we generate or retrieve a cluster // API token. $token = PhabricatorConduitToken::loadClusterTokenForUser($viewer); if ($token) { $client->setConduitToken($token->getToken()); } } return $client; } public function getPassthroughEnvironmentalVariables() { $env = $_ENV; if ($this->isGit()) { // $_ENV does not populate in CLI contexts if "E" is missing from // "variables_order" in PHP config. Currently, we do not require this // to be configured. Since it may not be, explicitly bring expected Git // environmental variables into scope. This list is not exhaustive, but // only lists variables with a known impact on commit hook behavior. // This can be removed if we later require "E" in "variables_order". $git_env = array( 'GIT_OBJECT_DIRECTORY', 'GIT_ALTERNATE_OBJECT_DIRECTORIES', 'GIT_QUARANTINE_PATH', ); foreach ($git_env as $key) { $value = getenv($key); if (strlen($value)) { $env[$key] = $value; } } $key = 'GIT_PUSH_OPTION_COUNT'; $git_count = getenv($key); if (strlen($git_count)) { $git_count = (int)$git_count; $env[$key] = $git_count; for ($ii = 0; $ii < $git_count; $ii++) { $key = 'GIT_PUSH_OPTION_'.$ii; $env[$key] = getenv($key); } } } $result = array(); foreach ($env as $key => $value) { // In Git, pass anything matching "GIT_*" though. Some of these variables // need to be preserved to allow `git` operations to work properly when // running from commit hooks. if ($this->isGit()) { if (preg_match('/^GIT_/', $key)) { $result[$key] = $value; } } } return $result; } public function supportsBranchComparison() { return $this->isGit(); } /* -( Repository URIs )---------------------------------------------------- */ public function attachURIs(array $uris) { $custom_map = array(); foreach ($uris as $key => $uri) { $builtin_key = $uri->getRepositoryURIBuiltinKey(); if ($builtin_key !== null) { $custom_map[$builtin_key] = $key; } } $builtin_uris = $this->newBuiltinURIs(); $seen_builtins = array(); foreach ($builtin_uris as $builtin_uri) { $builtin_key = $builtin_uri->getRepositoryURIBuiltinKey(); $seen_builtins[$builtin_key] = true; // If this builtin URI is disabled, don't attach it and remove the // persisted version if it exists. if ($builtin_uri->getIsDisabled()) { if (isset($custom_map[$builtin_key])) { unset($uris[$custom_map[$builtin_key]]); } continue; } // If the URI exists, make sure it's marked as not being disabled. if (isset($custom_map[$builtin_key])) { $uris[$custom_map[$builtin_key]]->setIsDisabled(false); } } // Remove any builtins which no longer exist. foreach ($custom_map as $builtin_key => $key) { if (empty($seen_builtins[$builtin_key])) { unset($uris[$key]); } } $this->uris = $uris; return $this; } public function getURIs() { return $this->assertAttached($this->uris); } public function getCloneURIs() { $uris = $this->getURIs(); $clone = array(); foreach ($uris as $uri) { if (!$uri->isBuiltin()) { continue; } if ($uri->getIsDisabled()) { continue; } $io_type = $uri->getEffectiveIoType(); $is_clone = ($io_type == PhabricatorRepositoryURI::IO_READ) || ($io_type == PhabricatorRepositoryURI::IO_READWRITE); if (!$is_clone) { continue; } $clone[] = $uri; } $clone = msort($clone, 'getURIScore'); $clone = array_reverse($clone); return $clone; } public function newBuiltinURIs() { $has_callsign = ($this->getCallsign() !== null); $has_shortname = ($this->getRepositorySlug() !== null); $identifier_map = array( PhabricatorRepositoryURI::BUILTIN_IDENTIFIER_CALLSIGN => $has_callsign, PhabricatorRepositoryURI::BUILTIN_IDENTIFIER_SHORTNAME => $has_shortname, PhabricatorRepositoryURI::BUILTIN_IDENTIFIER_ID => true, ); // If the view policy of the repository is public, support anonymous HTTP // even if authenticated HTTP is not supported. if ($this->getViewPolicy() === PhabricatorPolicies::POLICY_PUBLIC) { $allow_http = true; } else { $allow_http = PhabricatorEnv::getEnvConfig('diffusion.allow-http-auth'); } $base_uri = PhabricatorEnv::getURI('/'); $base_uri = new PhutilURI($base_uri); $has_https = ($base_uri->getProtocol() == 'https'); $has_https = ($has_https && $allow_http); $has_http = !PhabricatorEnv::getEnvConfig('security.require-https'); $has_http = ($has_http && $allow_http); // HTTP is not supported for Subversion. if ($this->isSVN()) { $has_http = false; $has_https = false; } $has_ssh = (bool)strlen(PhabricatorEnv::getEnvConfig('phd.user')); $protocol_map = array( PhabricatorRepositoryURI::BUILTIN_PROTOCOL_SSH => $has_ssh, PhabricatorRepositoryURI::BUILTIN_PROTOCOL_HTTPS => $has_https, PhabricatorRepositoryURI::BUILTIN_PROTOCOL_HTTP => $has_http, ); $uris = array(); foreach ($protocol_map as $protocol => $proto_supported) { foreach ($identifier_map as $identifier => $id_supported) { // This is just a dummy value because it can't be empty; we'll force // it to a proper value when using it in the UI. $builtin_uri = "{$protocol}://{$identifier}"; $uris[] = PhabricatorRepositoryURI::initializeNewURI() ->setRepositoryPHID($this->getPHID()) ->attachRepository($this) ->setBuiltinProtocol($protocol) ->setBuiltinIdentifier($identifier) ->setURI($builtin_uri) ->setIsDisabled((int)(!$proto_supported || !$id_supported)); } } return $uris; } public function getClusterRepositoryURIFromBinding( AlmanacBinding $binding) { $protocol = $binding->getAlmanacPropertyValue('protocol'); if ($protocol === null) { $protocol = 'https'; } $iface = $binding->getInterface(); $address = $iface->renderDisplayAddress(); $path = $this->getURI(); return id(new PhutilURI("{$protocol}://{$address}")) ->setPath($path); } public function loadAlmanacService() { $service_phid = $this->getAlmanacServicePHID(); if (!$service_phid) { // No service, so this is a local repository. return null; } $service = id(new AlmanacServiceQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withPHIDs(array($service_phid)) ->needBindings(true) ->needProperties(true) ->executeOne(); if (!$service) { throw new Exception( pht( 'The Almanac service for this repository is invalid or could not '. 'be loaded.')); } $service_type = $service->getServiceImplementation(); if (!($service_type instanceof AlmanacClusterRepositoryServiceType)) { throw new Exception( pht( 'The Almanac service for this repository does not have the correct '. 'service type.')); } return $service; } public function markImporting() { $this->openTransaction(); $this->beginReadLocking(); $repository = $this->reload(); $repository->setDetail('importing', true); $repository->save(); $this->endReadLocking(); $this->saveTransaction(); return $repository; } /* -( Symbols )-------------------------------------------------------------*/ public function getSymbolSources() { return $this->getDetail('symbol-sources', array()); } public function getSymbolLanguages() { return $this->getDetail('symbol-languages', array()); } /* -( Staging )------------------------------------------------------------ */ public function supportsStaging() { return $this->isGit(); } public function getStagingURI() { if (!$this->supportsStaging()) { return null; } return $this->getDetail('staging-uri', null); } /* -( Automation )--------------------------------------------------------- */ public function supportsAutomation() { return $this->isGit(); } public function canPerformAutomation() { if (!$this->supportsAutomation()) { return false; } if (!$this->getAutomationBlueprintPHIDs()) { return false; } return true; } public function getAutomationBlueprintPHIDs() { if (!$this->supportsAutomation()) { return array(); } return $this->getDetail('automation.blueprintPHIDs', array()); } /* -( PhabricatorApplicationTransactionInterface )------------------------- */ public function getApplicationTransactionEditor() { return new PhabricatorRepositoryEditor(); } public function getApplicationTransactionObject() { return $this; } public function getApplicationTransactionTemplate() { return new PhabricatorRepositoryTransaction(); } public function willRenderTimeline( PhabricatorApplicationTransactionView $timeline, AphrontRequest $request) { return $timeline; } /* -( PhabricatorPolicyInterface )----------------------------------------- */ public function getCapabilities() { return array( PhabricatorPolicyCapability::CAN_VIEW, PhabricatorPolicyCapability::CAN_EDIT, DiffusionPushCapability::CAPABILITY, ); } public function getPolicy($capability) { switch ($capability) { case PhabricatorPolicyCapability::CAN_VIEW: return $this->getViewPolicy(); case PhabricatorPolicyCapability::CAN_EDIT: return $this->getEditPolicy(); case DiffusionPushCapability::CAPABILITY: return $this->getPushPolicy(); } } public function hasAutomaticCapability($capability, PhabricatorUser $user) { return false; } /* -( PhabricatorMarkupInterface )----------------------------------------- */ public function getMarkupFieldKey($field) { $hash = PhabricatorHash::digestForIndex($this->getMarkupText($field)); return "repo:{$hash}"; } public function newMarkupEngine($field) { return PhabricatorMarkupEngine::newMarkupEngine(array()); } public function getMarkupText($field) { return $this->getDetail('description'); } public function didMarkupText( $field, $output, PhutilMarkupEngine $engine) { require_celerity_resource('phabricator-remarkup-css'); return phutil_tag( 'div', array( 'class' => 'phabricator-remarkup', ), $output); } public function shouldUseMarkupCache($field) { return true; } /* -( PhabricatorDestructibleInterface )----------------------------------- */ public function destroyObjectPermanently( PhabricatorDestructionEngine $engine) { $phid = $this->getPHID(); $this->openTransaction(); $this->delete(); PhabricatorRepositoryURIIndex::updateRepositoryURIs($phid, array()); $books = id(new DivinerBookQuery()) ->setViewer($engine->getViewer()) ->withRepositoryPHIDs(array($phid)) ->execute(); foreach ($books as $book) { $engine->destroyObject($book); } $atoms = id(new DivinerAtomQuery()) ->setViewer($engine->getViewer()) ->withRepositoryPHIDs(array($phid)) ->execute(); foreach ($atoms as $atom) { $engine->destroyObject($atom); } $lfs_refs = id(new PhabricatorRepositoryGitLFSRefQuery()) ->setViewer($engine->getViewer()) ->withRepositoryPHIDs(array($phid)) ->execute(); foreach ($lfs_refs as $ref) { $engine->destroyObject($ref); } $this->saveTransaction(); } /* -( PhabricatorDestructibleCodexInterface )------------------------------ */ public function newDestructibleCodex() { return new PhabricatorRepositoryDestructibleCodex(); } /* -( PhabricatorSpacesInterface )----------------------------------------- */ public function getSpacePHID() { return $this->spacePHID; } /* -( PhabricatorConduitResultInterface )---------------------------------- */ public function getFieldSpecificationsForConduit() { return array( id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('name') ->setType('string') ->setDescription(pht('The repository name.')), id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('vcs') ->setType('string') ->setDescription( pht('The VCS this repository uses ("git", "hg" or "svn").')), id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('callsign') ->setType('string') ->setDescription(pht('The repository callsign, if it has one.')), id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('shortName') ->setType('string') ->setDescription(pht('Unique short name, if the repository has one.')), id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('status') ->setType('string') ->setDescription(pht('Active or inactive status.')), id(new PhabricatorConduitSearchFieldSpecification()) ->setKey('isImporting') ->setType('bool') ->setDescription( pht( 'True if the repository is importing initial commits.')), ); } public function getFieldValuesForConduit() { return array( 'name' => $this->getName(), 'vcs' => $this->getVersionControlSystem(), 'callsign' => $this->getCallsign(), 'shortName' => $this->getRepositorySlug(), 'status' => $this->getStatus(), 'isImporting' => (bool)$this->isImporting(), ); } public function getConduitSearchAttachments() { return array( id(new DiffusionRepositoryURIsSearchEngineAttachment()) ->setAttachmentKey('uris'), ); } /* -( PhabricatorFulltextInterface )--------------------------------------- */ public function newFulltextEngine() { return new PhabricatorRepositoryFulltextEngine(); } /* -( PhabricatorFerretInterface )----------------------------------------- */ public function newFerretEngine() { return new PhabricatorRepositoryFerretEngine(); } } diff --git a/src/docs/user/cluster/cluster_repositories.diviner b/src/docs/user/cluster/cluster_repositories.diviner index 44003679af..8d764c2e3e 100644 --- a/src/docs/user/cluster/cluster_repositories.diviner +++ b/src/docs/user/cluster/cluster_repositories.diviner @@ -1,492 +1,512 @@ @title Cluster: Repositories @group cluster Configuring Phabricator to use multiple repository hosts. Overview ======== If you use Git, you can deploy Phabricator with multiple repository hosts, configured so that each host is readable and writable. The advantages of doing this are: - you can completely survive the loss of repository hosts; - reads and writes can scale across multiple machines; and - read and write performance across multiple geographic regions may improve. This configuration is complex, and many installs do not need to pursue it. This configuration is not currently supported with Subversion or Mercurial. How Reads and Writes Work ========================= Phabricator repository replicas are multi-master: every node is readable and writable, and a cluster of nodes can (almost always) survive the loss of any arbitrary subset of nodes so long as at least one node is still alive. Phabricator maintains an internal version for each repository, and increments it when the repository is mutated. Before responding to a read, replicas make sure their version of the repository is up to date (no node in the cluster has a newer version of the repository). If it isn't, they block the read until they can complete a fetch. Before responding to a write, replicas obtain a global lock, perform the same version check and fetch if necessary, then allow the write to continue. Additionally, repositories passively check other nodes for updates and replicate changes in the background. After you push a change to a repository, it will usually spread passively to all other repository nodes within a few minutes. Even if passive replication is slow, the active replication makes acknowledged changes sequential to all observers: after a write is acknowledged, all subsequent reads are guaranteed to see it. The system does not permit stale reads, and you do not need to wait for a replication delay to see a consistent view of the repository no matter which node you ask. HTTP vs HTTPS ============= Intracluster requests (from the daemons to repository servers, or from webservers to repository servers) are permitted to use HTTP, even if you have set `security.require-https` in your configuration. It is common to terminate SSL at a load balancer and use plain HTTP beyond that, and the `security.require-https` feature is primarily focused on making client browser behavior more convenient for users, so it does not apply to intracluster traffic. Using HTTP within the cluster leaves you vulnerable to attackers who can observe traffic within a datacenter, or observe traffic between datacenters. This is normally very difficult, but within reach for state-level adversaries like the NSA. If you are concerned about these attackers, you can terminate HTTPS on repository hosts and bind to them with the "https" protocol. Just be aware that the `security.require-https` setting won't prevent you from making configuration mistakes, as it doesn't cover intracluster traffic. Other mitigations are possible, but securing a network against the NSA and similar agents of other rogue nations is beyond the scope of this document. Repository Hosts ================ Repository hosts must run a complete, fully configured copy of Phabricator, including a webserver. They must also run a properly configured `sshd`. If you are converting existing hosts into cluster hosts, you may need to revisit @{article:Diffusion User Guide: Repository Hosting} and make sure the system user accounts have all the necessary `sudo` permissions. In particular, cluster devices need `sudo` access to `ssh` so they can read device keys. Generally, these hosts will run the same set of services and configuration that web hosts run. If you prefer, you can overlay these services and put web and repository services on the same hosts. See @{article:Clustering Introduction} for some guidance on overlaying services. When a user requests information about a repository that can only be satisfied by examining a repository working copy, the webserver receiving the request will make an HTTP service call to a repository server which hosts the repository to retrieve the data it needs. It will use the result of this query to respond to the user. Setting up a Cluster Services ============================= To set up clustering, first register the devices that you want to use as part of the cluster with Almanac. For details, see @{article:Cluster: Devices}. NOTE: Once you create a service, new repositories will immediately allocate on it. You may want to disable repository creation during initial setup. Once the hosts are registered as devices, you can create a new service in Almanac: - First, register at least one device according to the device clustering instructions. - Create a new service of type **Phabricator Cluster: Repository** in Almanac. - Bind this service to all the interfaces on the device or devices. - For each binding, add a `protocol` key with one of these values: `ssh`, `http`, `https`. For example, a service might look like this: - Service: `repos001.mycompany.net` - Binding: `repo001.mycompany.net:80`, `protocol=http` - Binding: `repo001.mycompany.net:2222`, `protocol=ssh` The service itself has a `closed` property. You can set this to `true` to disable new repository allocations on this service (for example, if it is reaching capacity). Migrating to Clustered Services =============================== To convert existing repositories on an install into cluster repositories, you will generally perform these steps: - Register the existing host as a cluster device. - Configure a single host repository service using //only// that host. This puts you in a transitional state where repositories on the host can work as either on-host repositories or cluster repositories. You can move forward from here slowly and make sure services still work, with a quick path back to safety if you run into trouble. To move forward, migrate one repository to the service and make sure things work correctly. If you run into issues, you can back out by migrating the repository off the service. To migrate a repository onto a cluster service, use this command: ``` $ ./bin/repository clusterize --service ``` To migrate a repository back off a service, use this command: ``` $ ./bin/repository clusterize --remove-service ``` This command only changes how Phabricator connects to the repository; it does not move any data or make any complex structural changes. When Phabricator needs information about a non-clustered repository, it just runs a command like `git log` directly on disk. When Phabricator needs information about a clustered repository, it instead makes a service call to another server, asking that server to run `git log` instead. In a single-host cluster the server will make this service call to itself, so nothing will really change. But this //is// an effective test for most possible configuration mistakes. If your canary repository works well, you can migrate the rest of your repositories when ready (you can use `bin/repository list` to quickly get a list of all repository monograms). Once all repositories are migrated, you've reached a stable state and can remain here as long as you want. This state is sufficient to convert daemons, SSH, and web services into clustered versions and spread them across multiple machines if those goals are more interesting. Obviously, your single-device "cluster" will not be able to survive the loss of the single repository host, but you can take as long as you want to expand the cluster and add redundancy. After creating a service, you do not need to `clusterize` new repositories: they will automatically allocate onto an open service. When you're ready to expand the cluster, continue below. Expanding a Cluster =================== To expand an existing cluster, follow these general steps: - Register new devices in Almanac. - Add bindings to the new devices to the repository service, also in Almanac. - Start the daemons on the new devices. For instructions on configuring and registering devices, see @{article:Cluster: Devices}. As soon as you add active bindings to a service, Phabricator will begin synchronizing repositories and sending traffic to the new device. You do not need to copy any repository data to the device: Phabricator will automatically synchronize it. If you have a large amount of repository data, you may want to help this process along by copying the repository directory from an existing cluster device before bringing the new host online. This is optional, but can reduce the amount of time required to fully synchronize the cluster. You do not need to synchronize the most up-to-date data or stop writes during this process. For example, loading the most recent backup snapshot onto the new device will substantially reduce the amount of data that needs to be synchronized. Contracting a Cluster ===================== -To reduce the size of an existing cluster, follow these general steps: +If you want to remove working devices from a cluster (for example, to take +hosts down for maintenance), first do this for each device: - - Disable the bindings from the service to the dead device in Almanac. + - Change the `writable` property on the bindings to "Prevent Writes". + - Wait a few moments until the cluster synchronizes (see + "Monitoring Services" below). + +This will ensure that the device you're about to remove is not the only cluster +leader, even if the cluster is receiving a high write volume. You can skip this +step if the device isn't working property to start with. + +Once you've stopped writes and waited for synchronization (or if the hosts are +not working in the first place) do this for each device: + + - Disable the bindings from the service to the device in Almanac. If you are removing a device because it failed abruptly (or removing several -devices at once) it is possible that some repositories will have lost all their -leaders. See "Loss of Leaders" below to understand and resolve this. +devices at once; or you skip the "Prevent Writes" step), it is possible that +some repositories will have lost all their leaders. See "Loss of Leaders" below +to understand and resolve this. + +If you want to put the hosts back in service later: + + - Enable the bindings again. + - Change `writable` back to "Allow Writes". + +This will restore the cluster to the original state. Monitoring Services =================== You can get an overview of repository cluster status from the {nav Config > Repository Servers} screen. This table shows a high-level overview of all active repository services. **Repos**: The number of repositories hosted on this service. **Sync**: Synchronization status of repositories on this service. This is an at-a-glance view of service health, and can show these values: - **Synchronized**: All nodes are fully synchronized and have the latest version of all repositories. - **Partial**: All repositories either have at least two leaders, or have a very recent write which is not expected to have propagated yet. - **Unsynchronized**: At least one repository has changes which are only available on one node and were not pushed very recently. Data may be at risk. - **No Repositories**: This service has no repositories. - **Ambiguous Leader**: At least one repository has an ambiguous leader. If this screen identifies problems, you can drill down into repository details to get more information about them. See the next section for details. Monitoring Repositories ======================= You can get a more detailed view the current status of a specific repository on cluster devices in {nav Diffusion > (Repository) > Manage Repository > Cluster Configuration}. This screen shows all the configured devices which are hosting the repository and the available version on that device. **Version**: When a repository is mutated by a push, Phabricator increases an internal version number for the repository. This column shows which version is on disk on the corresponding device. After a change is pushed, the device which received the change will have a larger version number than the other devices. The change should be passively replicated to the remaining devices after a brief period of time, although this can take a while if the change was large or the network connection between devices is slow or unreliable. You can click the version number to see the corresponding push logs for that change. The logs contain details about what was changed, and can help you identify if replication is slow because a change is large or for some other reason. **Writing**: This shows that the device is currently holding a write lock. This normally means that it is actively receiving a push, but can also mean that there was a write interruption. See "Write Interruptions" below for details. **Last Writer**: This column identifies the user who most recently pushed a change to this device. If the write lock is currently held, this user is the user whose change is holding the lock. **Last Write At**: When the most recent write started. If the write lock is currently held, this shows when the lock was acquired. Cluster Failure Modes ===================== There are three major cluster failure modes: - **Write Interruptions**: A write started but did not complete, leaving the disk state and cluster state out of sync. - **Loss of Leaders**: None of the devices with the most up-to-date data are reachable. - **Ambiguous Leaders**: The internal state of the repository is unclear. Phabricator can detect these issues, and responds by freezing the repository (usually preventing all reads and writes) until the issue is resolved. These conditions are normally rare and very little data is at risk, but Phabricator errs on the side of caution and requires decisions which may result in data loss to be confirmed by a human. The next sections cover these failure modes and appropriate responses in more detail. In general, you will respond to these issues by assessing the situation and then possibly choosing to discard some data. Write Interruptions =================== A repository cluster can be put into an inconsistent state by an interruption in a brief window during and immediately after a write. This looks like this: - A change is pushed to a server. - The server acquires a write lock and begins writing the change. - During or immediately after the write, lightning strikes the server and destroys it. Phabricator can not commit changes to a working copy (stored on disk) and to the global state (stored in a database) atomically, so there is necessarily a narrow window between committing these two different states when some tragedy can befall a server, leaving the global and local views of the repository state possibly divergent. In these cases, Phabricator fails into a frozen state where further writes are not permitted until the failure is investigated and resolved. When a repository is frozen in this way it remains readable. You can use the monitoring console to review the state of a frozen repository with a held write lock. The **Writing** column will show which device is holding the lock, and whoever is named in the **Last Writer** column may be able to help you figure out what happened by providing more information about what they were doing and what they observed. Because the push was not acknowledged, it is normally safe to resolve this issue by demoting the device. Demoting the device will undo any changes committed by the push, and they will be lost forever. However, the user should have received an error anyway, and should not expect their push to have worked. Still, data is technically at risk and you may want to investigate further and try to understand the issue in more detail before continuing. There is no way to explicitly keep the write, but if it was committed to disk you can recover it manually from the working copy on the device (for example, by using `git format-patch`) and then push it again after recovering. If you demote the device, the in-process write will be thrown away, even if it was complete on disk. To demote the device and release the write lock, run this command: ``` phabricator/ $ ./bin/repository thaw --demote ``` {icon exclamation-triangle, color="yellow"} Any committed but unacknowledged data on the device will be lost. Loss of Leaders =============== A more straightforward failure condition is the loss of all servers in a cluster which have the most up-to-date copy of a repository. This looks like this: - There is a cluster setup with two devices, X and Y. - A new change is pushed to server X. - Before the change can propagate to server Y, lightning strikes server X and destroys it. Here, all of the "leader" devices with the most up-to-date copy of the repository have been lost. Phabricator will freeze the repository refuse to serve requests because it can not serve reads consistently and can not accept new writes without data loss. The most straightforward way to resolve this issue is to restore any leader to service. The change will be able to replicate to other devices once a leader comes back online. If you are unable to restore a leader or unsure that you can restore one quickly, you can use the monitoring console to review which changes are present on the leaders but not present on the followers by examining the push logs. If you are comfortable discarding these changes, you can instruct Phabricator that it can forget about the leaders in two ways: disable the service bindings to all of the leader devices so they are no longer part of the cluster, or use `bin/repository thaw` to `--demote` the leaders explicitly. If you do this, **you will lose data**. Either action will discard any changes on the affected leaders which have not replicated to other devices in the cluster. To remove a device from the cluster, disable all of the bindings to it in Almanac, using the web UI. {icon exclamation-triangle, color="red"} Any data which is only present on the disabled device will be lost. To demote a device without removing it from the cluster, run this command: ``` phabricator/ $ ./bin/repository thaw rXYZ --demote repo002.corp.net ``` {icon exclamation-triangle, color="red"} Any data which is only present on **this** device will be lost. Ambiguous Leaders ================= Repository clusters can also freeze if the leader devices are ambiguous. This can happen if you replace an entire cluster with new devices suddenly, or make a mistake with the `--demote` flag. This may arise from some kind of operator error, like these: - Someone accidentally uses `bin/repository thaw ... --demote` to demote every device in a cluster. - Someone accidentally deletes all the version information for a repository from the database by making a mistake with a `DELETE` or `UPDATE` query. - Someone accidentally disables all of the devices in a cluster, then adds entirely new ones before repositories can propagate. If you are moving repositories into cluster services, you can also reach this state if you use `clusterize` to associate a repository with a service that is bound to multiple active devices. In this case, Phabricator will not know which device or devices have up-to-date information. When Phabricator can not tell which device in a cluster is a leader, it freezes the cluster because it is possible that some devices have less data and others have more, and if it chooses a leader arbitrarily it may destroy some data which you would prefer to retain. To resolve this, you need to tell Phabricator which device has the most up-to-date data and promote that device to become a leader. If you know all devices have the same data, you are free to promote any device. If you promote a device, **you may lose data** if you promote the wrong device and some other device really had more up-to-date data. If you want to double check, you can examine the working copies on disk before promoting by connecting to the machines and using commands like `git log` to inspect state. Once you have identified a device which has data you're happy with, use `bin/repository thaw` to `--promote` the device. The data on the chosen device will become authoritative: ``` phabricator/ $ ./bin/repository thaw rXYZ --promote repo002.corp.net ``` {icon exclamation-triangle, color="red"} Any data which is only present on **other** devices will be lost. Backups ====== Even if you configure clustering, you should still consider retaining separate backup snapshots. Replicas protect you from data loss if you lose a host, but they do not let you rewind time to recover from data mutation mistakes. If something issues a `--force` push that destroys branch heads, the mutation will propagate to the replicas. You may be able to manually restore the branches by using tools like the Phabricator push log or the Git reflog so it is less important to retain repository snapshots than database snapshots, but it is still possible for data to be lost permanently, especially if you don't notice the problem for some time. Retaining separate backup snapshots will improve your ability to recover more data more easily in a wider range of disaster situations. Next Steps ========== Continue by: - returning to @{article:Clustering Introduction}.