diff --git a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php index bccb58e35a..21b0e13413 100644 --- a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php +++ b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php @@ -1,432 +1,445 @@ getArgv(); array_unshift($argv, __CLASS__); $args = new PhutilArgumentParser($argv); $args->parse( array( array( 'name' => 'no-discovery', 'help' => pht('Pull only, without discovering commits.'), ), array( 'name' => 'not', 'param' => 'repository', 'repeat' => true, 'help' => pht('Do not pull __repository__.'), ), array( 'name' => 'repositories', 'wildcard' => true, 'help' => pht('Pull specific __repositories__ instead of all.'), ), )); $no_discovery = $args->getArg('no-discovery'); $include = $args->getArg('repositories'); $exclude = $args->getArg('not'); // Each repository has an individual pull frequency; after we pull it, // wait that long to pull it again. When we start up, try to pull everything // serially. $retry_after = array(); $min_sleep = 15; $max_futures = 4; $futures = array(); $queue = array(); while (!$this->shouldExit()) { PhabricatorCaches::destroyRequestCache(); $pullable = $this->loadPullableRepositories($include, $exclude); // If any repositories have the NEEDS_UPDATE flag set, pull them // as soon as possible. $need_update_messages = $this->loadRepositoryUpdateMessages(true); foreach ($need_update_messages as $message) { $repo = idx($pullable, $message->getRepositoryID()); if (!$repo) { continue; } $this->log( pht( 'Got an update message for repository "%s"!', $repo->getMonogram())); $retry_after[$message->getRepositoryID()] = time(); } // If any repositories were deleted, remove them from the retry timer map // so we don't end up with a retry timer that never gets updated and // causes us to sleep for the minimum amount of time. $retry_after = array_select_keys( $retry_after, array_keys($pullable)); // Figure out which repositories we need to queue for an update. foreach ($pullable as $id => $repository) { $monogram = $repository->getMonogram(); if (isset($futures[$id])) { $this->log(pht('Repository "%s" is currently updating.', $monogram)); continue; } if (isset($queue[$id])) { $this->log(pht('Repository "%s" is already queued.', $monogram)); continue; } $after = idx($retry_after, $id, 0); if ($after > time()) { $this->log( pht( 'Repository "%s" is not due for an update for %s second(s).', $monogram, new PhutilNumber($after - time()))); continue; } if (!$after) { $this->log( pht( 'Scheduling repository "%s" for an initial update.', $monogram)); } else { $this->log( pht( 'Scheduling repository "%s" for an update (%s seconds overdue).', $monogram, new PhutilNumber(time() - $after))); } $queue[$id] = $after; } // Process repositories in the order they became candidates for updates. asort($queue); // Dequeue repositories until we hit maximum parallelism. while ($queue && (count($futures) < $max_futures)) { foreach ($queue as $id => $time) { $repository = idx($pullable, $id); if (!$repository) { $this->log( pht('Repository %s is no longer pullable; skipping.', $id)); unset($queue[$id]); continue; } $monogram = $repository->getMonogram(); $this->log(pht('Starting update for repository "%s".', $monogram)); unset($queue[$id]); $futures[$id] = $this->buildUpdateFuture( $repository, $no_discovery); break; } } if ($queue) { $this->log( pht( 'Not enough process slots to schedule the other %s '. 'repository(s) for updates yet.', phutil_count($queue))); } if ($futures) { $iterator = id(new FutureIterator($futures)) ->setUpdateInterval($min_sleep); foreach ($iterator as $id => $future) { $this->stillWorking(); if ($future === null) { $this->log(pht('Waiting for updates to complete...')); $this->stillWorking(); if ($this->loadRepositoryUpdateMessages()) { $this->log(pht('Interrupted by pending updates!')); break; } continue; } unset($futures[$id]); $retry_after[$id] = $this->resolveUpdateFuture( $pullable[$id], $future, $min_sleep); // We have a free slot now, so go try to fill it. break; } // Jump back into prioritization if we had any futures to deal with. continue; } $this->waitForUpdates($min_sleep, $retry_after); } } /** * @task pull */ private function buildUpdateFuture( PhabricatorRepository $repository, $no_discovery) { $bin = dirname(phutil_get_library_root('phabricator')).'/bin/repository'; $flags = array(); if ($no_discovery) { $flags[] = '--no-discovery'; } - $callsign = $repository->getCallsign(); - - $future = new ExecFuture('%s update %Ls -- %s', $bin, $flags, $callsign); + $monogram = $repository->getMonogram(); + $future = new ExecFuture('%s update %Ls -- %s', $bin, $flags, $monogram); // Sometimes, the underlying VCS commands will hang indefinitely. We've // observed this occasionally with GitHub, and other users have observed // it with other VCS servers. // To limit the damage this can cause, kill the update out after a // reasonable amount of time, under the assumption that it has hung. // Since it's hard to know what a "reasonable" amount of time is given that // users may be downloading a repository full of pirated movies over a // potato, these limits are fairly generous. Repositories exceeding these // limits can be manually pulled with `bin/repository update X`, which can // just run for as long as it wants. if ($repository->isImporting()) { $timeout = phutil_units('4 hours in seconds'); } else { $timeout = phutil_units('15 minutes in seconds'); } $future->setTimeout($timeout); return $future; } /** * Check for repositories that should be updated immediately. * * With the `$consume` flag, an internal cursor will also be incremented so * that these messages are not returned by subsequent calls. * * @param bool Pass `true` to consume these messages, so the process will * not see them again. * @return list Pending update messages. * * @task pull */ private function loadRepositoryUpdateMessages($consume = false) { $type_need_update = PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE; $messages = id(new PhabricatorRepositoryStatusMessage())->loadAllWhere( 'statusType = %s AND id > %d', $type_need_update, $this->statusMessageCursor); // Keep track of messages we've seen so that we don't load them again. // If we reload messages, we can get stuck a loop if we have a failing // repository: we update immediately in response to the message, but do // not clear the message because the update does not succeed. We then // immediately retry. Instead, messages are only permitted to trigger // an immediate update once. if ($consume) { foreach ($messages as $message) { $this->statusMessageCursor = max( $this->statusMessageCursor, $message->getID()); } } return $messages; } /** * @task pull */ private function loadPullableRepositories(array $include, array $exclude) { $query = id(new PhabricatorRepositoryQuery()) ->setViewer($this->getViewer()); if ($include) { - $query->withCallsigns($include); + $query->withIdentifiers($include); } $repositories = $query->execute(); + $repositories = mpull($repositories, null, 'getPHID'); if ($include) { - $by_callsign = mpull($repositories, null, 'getCallsign'); - foreach ($include as $name) { - if (empty($by_callsign[$name])) { + $map = $query->getIdentifierMap(); + foreach ($include as $identifier) { + if (empty($map[$identifier])) { throw new Exception( pht( - "No repository exists with callsign '%s'!", - $name)); + 'No repository "%s" exists!', + $identifier)); } } } if ($exclude) { - $exclude = array_fuse($exclude); - foreach ($repositories as $key => $repository) { - if (isset($exclude[$repository->getCallsign()])) { - unset($repositories[$key]); + $xquery = id(new PhabricatorRepositoryQuery()) + ->setViewer($this->getViewer()) + ->withIdentifiers($exclude); + + $excluded_repos = $xquery->execute(); + $xmap = $xquery->getIdentifierMap(); + + foreach ($exclude as $identifier) { + if (empty($xmap[$identifier])) { + throw new Exception( + pht( + 'No repository "%s" exists!', + $identifier)); } } + + foreach ($excluded_repos as $excluded_repo) { + unset($repositories[$excluded_repo->getPHID()]); + } } foreach ($repositories as $key => $repository) { if (!$repository->isTracked()) { unset($repositories[$key]); } } // Shuffle the repositories, then re-key the array since shuffle() // discards keys. This is mostly for startup, we'll use soft priorities // later. shuffle($repositories); $repositories = mpull($repositories, null, 'getID'); return $repositories; } /** * @task pull */ private function resolveUpdateFuture( PhabricatorRepository $repository, ExecFuture $future, $min_sleep) { $monogram = $repository->getMonogram(); $this->log(pht('Resolving update for "%s".', $monogram)); try { list($stdout, $stderr) = $future->resolvex(); } catch (Exception $ex) { $proxy = new PhutilProxyException( pht( 'Error while updating the "%s" repository.', $repository->getMonogram()), $ex); phlog($proxy); return time() + $min_sleep; } if (strlen($stderr)) { $stderr_msg = pht( 'Unexpected output while updating repository "%s": %s', $monogram, $stderr); phlog($stderr_msg); } $smart_wait = $repository->loadUpdateInterval($min_sleep); $this->log( pht( 'Based on activity in repository "%s", considering a wait of %s '. 'seconds before update.', $repository->getMonogram(), new PhutilNumber($smart_wait))); return time() + $smart_wait; } /** * Sleep for a short period of time, waiting for update messages from the * * * @task pull */ private function waitForUpdates($min_sleep, array $retry_after) { $this->log( pht('No repositories need updates right now, sleeping...')); $sleep_until = time() + $min_sleep; if ($retry_after) { $sleep_until = min($sleep_until, min($retry_after)); } while (($sleep_until - time()) > 0) { $sleep_duration = ($sleep_until - time()); $this->log( pht( 'Sleeping for %s more second(s)...', new PhutilNumber($sleep_duration))); $this->sleep(1); if ($this->shouldExit()) { $this->log(pht('Awakened from sleep by graceful shutdown!')); return; } if ($this->loadRepositoryUpdateMessages()) { $this->log(pht('Awakened from sleep by pending updates!')); break; } } } } diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php index 407940e474..3c3a0526be 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php @@ -1,582 +1,582 @@ repairMode = $repair_mode; return $this; } public function getRepairMode() { return $this->repairMode; } /** * @task discovery */ public function discoverCommits() { $repository = $this->getRepository(); $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $refs = $this->discoverSubversionCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $refs = $this->discoverMercurialCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $refs = $this->discoverGitCommits(); break; default: throw new Exception(pht("Unknown VCS '%s'!", $vcs)); } // Clear the working set cache. $this->workingSet = array(); // Record discovered commits and mark them in the cache. foreach ($refs as $ref) { $this->recordCommit( $repository, $ref->getIdentifier(), $ref->getEpoch(), $ref->getCanCloseImmediately(), $ref->getParents()); $this->commitCache[$ref->getIdentifier()] = true; } return $refs; } /* -( Discovering Git Repositories )--------------------------------------- */ /** * @task git */ private function discoverGitCommits() { $repository = $this->getRepository(); if (!$repository->isHosted()) { $this->verifyGitOrigin($repository); } $branches = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->withIsOriginBranch(true) ->execute(); if (!$branches) { // This repository has no branches at all, so we don't need to do // anything. Generally, this means the repository is empty. return array(); } $branches = $this->sortBranches($branches); $branches = mpull($branches, 'getCommitIdentifier', 'getShortName'); $this->log( pht( - 'Discovering commits in repository %s.', - $repository->getCallsign())); + 'Discovering commits in repository "%s".', + $repository->getDisplayName())); $this->fillCommitCache(array_values($branches)); $refs = array(); foreach ($branches as $name => $commit) { $this->log(pht('Examining branch "%s", at "%s".', $name, $commit)); if (!$repository->shouldTrackBranch($name)) { $this->log(pht('Skipping, branch is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, HEAD is known.')); continue; } $this->log(pht('Looking for new commits.')); $branch_refs = $this->discoverStreamAncestry( new PhabricatorGitGraphStream($repository, $commit), $commit, $repository->shouldAutocloseBranch($name)); $this->didDiscoverRefs($branch_refs); $refs[] = $branch_refs; } return array_mergev($refs); } /* -( Discovering Subversion Repositories )-------------------------------- */ /** * @task svn */ private function discoverSubversionCommits() { $repository = $this->getRepository(); if (!$repository->isHosted()) { $this->verifySubversionRoot($repository); } $upper_bound = null; $limit = 1; $refs = array(); do { // Find all the unknown commits on this path. Note that we permit // importing an SVN subdirectory rather than the entire repository, so // commits may be nonsequential. if ($upper_bound === null) { $at_rev = 'HEAD'; } else { $at_rev = ($upper_bound - 1); } try { list($xml, $stderr) = $repository->execxRemoteCommand( 'log --xml --quiet --limit %d %s', $limit, $repository->getSubversionBaseURI($at_rev)); } catch (CommandException $ex) { $stderr = $ex->getStdErr(); if (preg_match('/(path|File) not found/', $stderr)) { // We've gone all the way back through history and this path was not // affected by earlier commits. break; } throw $ex; } $xml = phutil_utf8ize($xml); $log = new SimpleXMLElement($xml); foreach ($log->logentry as $entry) { $identifier = (int)$entry['revision']; $epoch = (int)strtotime((string)$entry->date[0]); $refs[$identifier] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($identifier) ->setEpoch($epoch) ->setCanCloseImmediately(true); if ($upper_bound === null) { $upper_bound = $identifier; } else { $upper_bound = min($upper_bound, $identifier); } } // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially // import large repositories fairly quickly, while pulling only as much // data as we need in the common case (when we've already imported the // repository and are just grabbing one commit at a time). $limit = min($limit * 2, 256); } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)); krsort($refs); while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) { array_pop($refs); } $refs = array_reverse($refs); $this->didDiscoverRefs($refs); return $refs; } private function verifySubversionRoot(PhabricatorRepository $repository) { list($xml) = $repository->execxRemoteCommand( 'info --xml %s', $repository->getSubversionPathURI()); $xml = phutil_utf8ize($xml); $xml = new SimpleXMLElement($xml); $remote_root = (string)($xml->entry[0]->repository[0]->root[0]); $expect_root = $repository->getSubversionPathURI(); $normal_type_svn = PhabricatorRepositoryURINormalizer::TYPE_SVN; $remote_normal = id(new PhabricatorRepositoryURINormalizer( $normal_type_svn, $remote_root))->getNormalizedPath(); $expect_normal = id(new PhabricatorRepositoryURINormalizer( $normal_type_svn, $expect_root))->getNormalizedPath(); if ($remote_normal != $expect_normal) { throw new Exception( pht( 'Repository "%s" does not have a correctly configured remote URI. '. 'The remote URI for a Subversion repository MUST point at the '. 'repository root. The root for this repository is "%s", but the '. 'configured URI is "%s". To resolve this error, set the remote URI '. 'to point at the repository root. If you want to import only part '. 'of a Subversion repository, use the "Import Only" option.', - $repository->getCallsign(), + $repository->getDisplayName(), $remote_root, $expect_root)); } } /* -( Discovering Mercurial Repositories )--------------------------------- */ /** * @task hg */ private function discoverMercurialCommits() { $repository = $this->getRepository(); $branches = id(new DiffusionLowLevelMercurialBranchesQuery()) ->setRepository($repository) ->execute(); $this->fillCommitCache(mpull($branches, 'getCommitIdentifier')); $refs = array(); foreach ($branches as $branch) { // NOTE: Mercurial branches may have multiple heads, so the names may // not be unique. $name = $branch->getShortName(); $commit = $branch->getCommitIdentifier(); $this->log(pht('Examining branch "%s" head "%s".', $name, $commit)); if (!$repository->shouldTrackBranch($name)) { $this->log(pht('Skipping, branch is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, this head is a known commit.')); continue; } $this->log(pht('Looking for new commits.')); $branch_refs = $this->discoverStreamAncestry( new PhabricatorMercurialGraphStream($repository, $commit), $commit, $close_immediately = true); $this->didDiscoverRefs($branch_refs); $refs[] = $branch_refs; } return array_mergev($refs); } /* -( Internals )---------------------------------------------------------- */ private function discoverStreamAncestry( PhabricatorRepositoryGraphStream $stream, $commit, $close_immediately) { $discover = array($commit); $graph = array(); $seen = array(); // Find all the reachable, undiscovered commits. Build a graph of the // edges. while ($discover) { $target = array_pop($discover); if (empty($graph[$target])) { $graph[$target] = array(); } $parents = $stream->getParents($target); foreach ($parents as $parent) { if ($this->isKnownCommit($parent)) { continue; } $graph[$target][$parent] = true; if (empty($seen[$parent])) { $seen[$parent] = true; $discover[] = $parent; } } } // Now, sort them topographically. $commits = $this->reduceGraph($graph); $refs = array(); foreach ($commits as $commit) { $refs[] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($commit) ->setEpoch($stream->getCommitDate($commit)) ->setCanCloseImmediately($close_immediately) ->setParents($stream->getParents($commit)); } return $refs; } private function reduceGraph(array $edges) { foreach ($edges as $commit => $parents) { $edges[$commit] = array_keys($parents); } $graph = new PhutilDirectedScalarGraph(); $graph->addNodes($edges); $commits = $graph->getTopographicallySortedNodes(); // NOTE: We want the most ancestral nodes first, so we need to reverse the // list we get out of AbstractDirectedGraph. $commits = array_reverse($commits); return $commits; } private function isKnownCommit($identifier) { if (isset($this->commitCache[$identifier])) { return true; } if (isset($this->workingSet[$identifier])) { return true; } if ($this->repairMode) { // In repair mode, rediscover the entire repository, ignoring the // database state. We can hit the local cache above, but if we miss it // stop the script from going to the database cache. return false; } $this->fillCommitCache(array($identifier)); return isset($this->commitCache[$identifier]); } private function fillCommitCache(array $identifiers) { if (!$identifiers) { return; } $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere( 'repositoryID = %d AND commitIdentifier IN (%Ls)', $this->getRepository()->getID(), $identifiers); foreach ($commits as $commit) { $this->commitCache[$commit->getCommitIdentifier()] = true; } while (count($this->commitCache) > self::MAX_COMMIT_CACHE_SIZE) { array_shift($this->commitCache); } } /** * Sort branches so we process closeable branches first. This makes the * whole import process a little cheaper, since we can close these commits * the first time through rather than catching them in the refs step. * * @task internal * * @param list List of branch heads. * @return list Sorted list of branch heads. */ private function sortBranches(array $branches) { $repository = $this->getRepository(); $head_branches = array(); $tail_branches = array(); foreach ($branches as $branch) { $name = $branch->getShortName(); if ($repository->shouldAutocloseBranch($name)) { $head_branches[] = $branch; } else { $tail_branches[] = $branch; } } return array_merge($head_branches, $tail_branches); } private function recordCommit( PhabricatorRepository $repository, $commit_identifier, $epoch, $close_immediately, array $parents) { $commit = new PhabricatorRepositoryCommit(); $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($commit_identifier); $commit->setEpoch($epoch); if ($close_immediately) { $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_CLOSEABLE); } $data = new PhabricatorRepositoryCommitData(); $conn_w = $repository->establishConnection('w'); try { // If this commit has parents, look up their IDs. The parent commits // should always exist already. $parent_ids = array(); if ($parents) { $parent_rows = queryfx_all( $conn_w, 'SELECT id, commitIdentifier FROM %T WHERE commitIdentifier IN (%Ls) AND repositoryID = %d', $commit->getTableName(), $parents, $repository->getID()); $parent_map = ipull($parent_rows, 'id', 'commitIdentifier'); foreach ($parents as $parent) { if (empty($parent_map[$parent])) { throw new Exception( pht('Unable to identify parent "%s"!', $parent)); } $parent_ids[] = $parent_map[$parent]; } } else { // Write an explicit 0 so we can distinguish between "really no // parents" and "data not available". if (!$repository->isSVN()) { $parent_ids = array(0); } } $commit->openTransaction(); $commit->save(); $data->setCommitID($commit->getID()); $data->save(); foreach ($parent_ids as $parent_id) { queryfx( $conn_w, 'INSERT IGNORE INTO %T (childCommitID, parentCommitID) VALUES (%d, %d)', PhabricatorRepository::TABLE_PARENTS, $commit->getID(), $parent_id); } $commit->saveTransaction(); $this->insertTask($repository, $commit); queryfx( $conn_w, 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, 1, %d, %d) ON DUPLICATE KEY UPDATE size = size + 1, lastCommitID = IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $commit->getID(), $epoch); if ($this->repairMode) { // Normally, the query should throw a duplicate key exception. If we // reach this in repair mode, we've actually performed a repair. $this->log(pht('Repaired commit "%s".', $commit_identifier)); } PhutilEventEngine::dispatchEvent( new PhabricatorEvent( PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, array( 'repository' => $repository, 'commit' => $commit, ))); } catch (AphrontDuplicateKeyQueryException $ex) { $commit->killTransaction(); // Ignore. This can happen because we discover the same new commit // more than once when looking at history, or because of races or // data inconsistency or cosmic radiation; in any case, we're still // in a good state if we ignore the failure. } } private function didDiscoverRefs(array $refs) { foreach ($refs as $ref) { $this->workingSet[$ref->getIdentifier()] = true; } } private function insertTask( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, $data = array()) { $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; break; default: throw new Exception(pht("Unknown repository type '%s'!", $vcs)); } $data['commitID'] = $commit->getID(); PhabricatorWorker::scheduleTask($class, $data); } } diff --git a/src/applications/repository/engine/PhabricatorRepositoryMirrorEngine.php b/src/applications/repository/engine/PhabricatorRepositoryMirrorEngine.php index 8d3dd9f8f9..e4e65aab6f 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryMirrorEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryMirrorEngine.php @@ -1,105 +1,105 @@ getRepository(); if (!$repository->canMirror()) { return; } if (PhabricatorEnv::getEnvConfig('phabricator.silent')) { $this->log( pht('Phabricator is running in silent mode; declining to mirror.')); return; } $mirrors = id(new PhabricatorRepositoryMirrorQuery()) ->setViewer($this->getViewer()) ->withRepositoryPHIDs(array($repository->getPHID())) ->execute(); $exceptions = array(); foreach ($mirrors as $mirror) { try { $this->pushRepositoryToMirror($repository, $mirror); } catch (Exception $ex) { $exceptions[] = $ex; } } if ($exceptions) { throw new PhutilAggregateException( pht( 'Exceptions occurred while mirroring the "%s" repository.', - $repository->getCallsign()), + $repository->getDisplayName()), $exceptions); } } private function pushRepositoryToMirror( PhabricatorRepository $repository, PhabricatorRepositoryMirror $mirror) { // TODO: This is a little bit janky, but we don't have first-class // infrastructure for running remote commands against an arbitrary remote // right now. Just make an emphemeral copy of the repository and muck with // it a little bit. In the medium term, we should pull this command stuff // out and use it here and for "Land to ...". $proxy = clone $repository; $proxy->makeEphemeral(); $proxy->setDetail('hosting-enabled', false); $proxy->setDetail('remote-uri', $mirror->getRemoteURI()); $proxy->setCredentialPHID($mirror->getCredentialPHID()); $this->log(pht('Pushing to remote "%s"...', $mirror->getRemoteURI())); if ($proxy->isGit()) { $this->pushToGitRepository($proxy); } else if ($proxy->isHg()) { $this->pushToHgRepository($proxy); } else { throw new Exception(pht('Unsupported VCS!')); } } private function pushToGitRepository( PhabricatorRepository $proxy) { $future = $proxy->getRemoteCommandFuture( 'push --verbose --mirror -- %P', $proxy->getRemoteURIEnvelope()); $future ->setCWD($proxy->getLocalPath()) ->resolvex(); } private function pushToHgRepository( PhabricatorRepository $proxy) { $future = $proxy->getRemoteCommandFuture( 'push --verbose --rev tip -- %P', $proxy->getRemoteURIEnvelope()); try { $future ->setCWD($proxy->getLocalPath()) ->resolvex(); } catch (CommandException $ex) { if (preg_match('/no changes found/', $ex->getStdOut())) { // mercurial says nothing changed, but that's good } else { throw $ex; } } } } diff --git a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php index 299310be27..30ab234027 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php @@ -1,559 +1,560 @@ getRepository(); $is_hg = false; $is_git = false; $is_svn = false; $vcs = $repository->getVersionControlSystem(); - $callsign = $repository->getCallsign(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: // We never pull a local copy of non-hosted Subversion repositories. if (!$repository->isHosted()) { $this->skipPull( pht( - "Repository '%s' is a non-hosted Subversion repository, which ". - "does not require a local working copy to be pulled.", - $callsign)); + 'Repository "%s" is a non-hosted Subversion repository, which '. + 'does not require a local working copy to be pulled.', + $repository->getDisplayName())); return; } $is_svn = true; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $is_git = true; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $is_hg = true; break; default: $this->abortPull(pht('Unknown VCS "%s"!', $vcs)); break; } - $callsign = $repository->getCallsign(); $local_path = $repository->getLocalPath(); if ($local_path === null) { $this->abortPull( pht( - "No local path is configured for repository '%s'.", - $callsign)); + 'No local path is configured for repository "%s".', + $repository->getDisplayName())); } try { $dirname = dirname($local_path); if (!Filesystem::pathExists($dirname)) { Filesystem::createDirectory($dirname, 0755, $recursive = true); } if (!Filesystem::pathExists($local_path)) { $this->logPull( pht( - "Creating a new working copy for repository '%s'.", - $callsign)); + 'Creating a new working copy for repository "%s".', + $repository->getDisplayName())); if ($is_git) { $this->executeGitCreate(); } else if ($is_hg) { $this->executeMercurialCreate(); } else { $this->executeSubversionCreate(); } } else { if (!$repository->isHosted()) { $this->logPull( pht( - "Updating the working copy for repository '%s'.", - $callsign)); + 'Updating the working copy for repository "%s".', + $repository->getDisplayName())); if ($is_git) { $this->verifyGitOrigin($repository); $this->executeGitUpdate(); } else if ($is_hg) { $this->executeMercurialUpdate(); } } } if ($repository->isHosted()) { if ($is_git) { $this->installGitHook(); } else if ($is_svn) { $this->installSubversionHook(); } else if ($is_hg) { $this->installMercurialHook(); } foreach ($repository->getHookDirectories() as $directory) { $this->installHookDirectory($directory); } } } catch (Exception $ex) { $this->abortPull( - pht('Pull of "%s" failed: %s', $callsign, $ex->getMessage()), + pht( + "Pull of '%s' failed: %s", + $repository->getDisplayName(), + $ex->getMessage()), $ex); } $this->donePull(); return $this; } private function skipPull($message) { $this->log('%s', $message); $this->donePull(); } private function abortPull($message, Exception $ex = null) { $code_error = PhabricatorRepositoryStatusMessage::CODE_ERROR; $this->updateRepositoryInitStatus($code_error, $message); if ($ex) { throw $ex; } else { throw new Exception($message); } } private function logPull($message) { $code_working = PhabricatorRepositoryStatusMessage::CODE_WORKING; $this->updateRepositoryInitStatus($code_working, $message); $this->log('%s', $message); } private function donePull() { $code_okay = PhabricatorRepositoryStatusMessage::CODE_OKAY; $this->updateRepositoryInitStatus($code_okay); } private function updateRepositoryInitStatus($code, $message = null) { $this->getRepository()->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_INIT, $code, array( 'message' => $message, )); } private function installHook($path) { $this->log('%s', pht('Installing commit hook to "%s"...', $path)); $repository = $this->getRepository(); $identifier = $this->getHookContextIdentifier($repository); $root = dirname(phutil_get_library_root('phabricator')); $bin = $root.'/bin/commit-hook'; $full_php_path = Filesystem::resolveBinary('php'); $cmd = csprintf( 'exec %s -f %s -- %s "$@"', $full_php_path, $bin, $identifier); $hook = "#!/bin/sh\nexport TERM=dumb\n{$cmd}\n"; Filesystem::writeFile($path, $hook); Filesystem::changePermissions($path, 0755); } private function installHookDirectory($path) { $readme = pht( "To add custom hook scripts to this repository, add them to this ". "directory.\n\nPhabricator will run any executables in this directory ". "after running its own checks, as though they were normal hook ". "scripts."); Filesystem::createDirectory($path, 0755); Filesystem::writeFile($path.'/README', $readme); } private function getHookContextIdentifier(PhabricatorRepository $repository) { $identifier = $repository->getCallsign(); $instance = PhabricatorEnv::getEnvConfig('cluster.instance'); if (strlen($instance)) { $identifier = "{$identifier}:{$instance}"; } return $identifier; } /* -( Pulling Git Working Copies )----------------------------------------- */ /** * @task git */ private function executeGitCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); if ($repository->isHosted()) { $repository->execxRemoteCommand( 'init --bare -- %s', $path); } else { $repository->execxRemoteCommand( 'clone --bare -- %P %s', $repository->getRemoteURIEnvelope(), $path); } } /** * @task git */ private function executeGitUpdate() { $repository = $this->getRepository(); list($err, $stdout) = $repository->execLocalCommand( 'rev-parse --show-toplevel'); $message = null; $path = $repository->getLocalPath(); if ($err) { // Try to raise a more tailored error message in the more common case // of the user creating an empty directory. (We could try to remove it, // but might not be able to, and it's much simpler to raise a good // message than try to navigate those waters.) if (is_dir($path)) { $files = Filesystem::listDirectory($path, $include_hidden = true); if (!$files) { $message = pht( "Expected to find a git repository at '%s', but there ". "is an empty directory there. Remove the directory: the daemon ". "will run '%s' for you.", $path, 'git clone'); } else { $message = pht( "Expected to find a git repository at '%s', but there is ". "a non-repository directory (with other stuff in it) there. Move ". "or remove this directory (or reconfigure the repository to use a ". "different directory), and then either clone a repository ". "yourself or let the daemon do it.", $path); } } else if (is_file($path)) { $message = pht( "Expected to find a git repository at '%s', but there is a ". "file there instead. Remove it and let the daemon clone a ". "repository for you.", $path); } else { $message = pht( "Expected to find a git repository at '%s', but did not.", $path); } } else { $repo_path = rtrim($stdout, "\n"); if (empty($repo_path)) { // This can mean one of two things: we're in a bare repository, or // we're inside a git repository inside another git repository. Since // the first is dramatically more likely now that we perform bare // clones and I don't have a great way to test for the latter, assume // we're OK. } else if (!Filesystem::pathsAreEquivalent($repo_path, $path)) { $err = true; $message = pht( "Expected to find repo at '%s', but the actual git repository root ". "for this directory is '%s'. Something is misconfigured. ". "The repository's 'Local Path' should be set to some place where ". "the daemon can check out a working copy, ". "and should not be inside another git repository.", $path, $repo_path); } } if ($err && $repository->canDestroyWorkingCopy()) { phlog( pht( "Repository working copy at '%s' failed sanity check; ". "destroying and re-cloning. %s", $path, $message)); Filesystem::remove($path); $this->executeGitCreate(); } else if ($err) { throw new Exception($message); } $retry = false; do { // This is a local command, but needs credentials. if ($repository->isWorkingCopyBare()) { // For bare working copies, we need this magic incantation. $future = $repository->getRemoteCommandFuture( 'fetch origin %s --prune', '+refs/heads/*:refs/heads/*'); } else { $future = $repository->getRemoteCommandFuture( 'fetch --all --prune'); } $future->setCWD($path); list($err, $stdout, $stderr) = $future->resolve(); if ($err && !$retry && $repository->canDestroyWorkingCopy()) { $retry = true; // Fix remote origin url if it doesn't match our configuration $origin_url = $repository->execLocalCommand( 'config --get remote.origin.url'); $remote_uri = $repository->getRemoteURIEnvelope(); if ($origin_url != $remote_uri->openEnvelope()) { $repository->execLocalCommand( 'remote set-url origin %P', $remote_uri); } } else if ($err) { throw new CommandException( pht('Failed to fetch changes!'), $future->getCommand(), $err, $stdout, $stderr); } else { $retry = false; } } while ($retry); } /** * @task git */ private function installGitHook() { $repository = $this->getRepository(); $root = $repository->getLocalPath(); if ($repository->isWorkingCopyBare()) { $path = '/hooks/pre-receive'; } else { $path = '/.git/hooks/pre-receive'; } $this->installHook($root.$path); } /* -( Pulling Mercurial Working Copies )----------------------------------- */ /** * @task hg */ private function executeMercurialCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); if ($repository->isHosted()) { $repository->execxRemoteCommand( 'init -- %s', $path); } else { $remote = $repository->getRemoteURIEnvelope(); // NOTE: Mercurial prior to 3.2.4 has an severe command injection // vulnerability. See: // On vulnerable versions of Mercurial, we refuse to clone remotes which // contain characters which may be interpreted by the shell. $hg_version = PhabricatorRepositoryVersion::getMercurialVersion(); $is_vulnerable = version_compare($hg_version, '3.2.4', '<'); if ($is_vulnerable) { $cleartext = $remote->openEnvelope(); // The use of "%R" here is an attempt to limit collateral damage // for normal URIs because it isn't clear how long this vulnerability // has been around for. $escaped = csprintf('%R', $cleartext); if ((string)$escaped !== (string)$cleartext) { throw new Exception( pht( 'You have an old version of Mercurial (%s) which has a severe '. 'command injection security vulnerability. The remote URI for '. 'this repository (%s) is potentially unsafe. Upgrade Mercurial '. 'to at least 3.2.4 to clone it.', $hg_version, $repository->getMonogram())); } } try { $repository->execxRemoteCommand( 'clone --noupdate -- %P %s', $remote, $path); } catch (Exception $ex) { $message = $ex->getMessage(); $message = $this->censorMercurialErrorMessage($message); throw new Exception($message); } } } /** * @task hg */ private function executeMercurialUpdate() { $repository = $this->getRepository(); $path = $repository->getLocalPath(); // This is a local command, but needs credentials. $remote = $repository->getRemoteURIEnvelope(); $future = $repository->getRemoteCommandFuture('pull -u -- %P', $remote); $future->setCWD($path); try { $future->resolvex(); } catch (CommandException $ex) { $err = $ex->getError(); $stdout = $ex->getStdOut(); // NOTE: Between versions 2.1 and 2.1.1, Mercurial changed the behavior // of "hg pull" to return 1 in case of a successful pull with no changes. // This behavior has been reverted, but users who updated between Feb 1, // 2012 and Mar 1, 2012 will have the erroring version. Do a dumb test // against stdout to check for this possibility. // See: https://github.com/phacility/phabricator/issues/101/ // NOTE: Mercurial has translated versions, which translate this error // string. In a translated version, the string will be something else, // like "aucun changement trouve". There didn't seem to be an easy way // to handle this (there are hard ways but this is not a common problem // and only creates log spam, not application failures). Assume English. // TODO: Remove this once we're far enough in the future that deployment // of 2.1 is exceedingly rare? if ($err == 1 && preg_match('/no changes found/', $stdout)) { return; } else { $message = $ex->getMessage(); $message = $this->censorMercurialErrorMessage($message); throw new Exception($message); } } } /** * Censor response bodies from Mercurial error messages. * * When Mercurial attempts to clone an HTTP repository but does not * receive a response it expects, it emits the response body in the * command output. * * This represents a potential SSRF issue, because an attacker with * permission to create repositories can create one which points at the * remote URI for some local service, then read the response from the * error message. To prevent this, censor response bodies out of error * messages. * * @param string Uncensored Mercurial command output. * @return string Censored Mercurial command output. */ private function censorMercurialErrorMessage($message) { return preg_replace( '/^---%<---.*/sm', pht('')."\n", $message); } /** * @task hg */ private function installMercurialHook() { $repository = $this->getRepository(); $path = $repository->getLocalPath().'/.hg/hgrc'; $identifier = $this->getHookContextIdentifier($repository); $root = dirname(phutil_get_library_root('phabricator')); $bin = $root.'/bin/commit-hook'; $data = array(); $data[] = '[hooks]'; // This hook handles normal pushes. $data[] = csprintf( 'pretxnchangegroup.phabricator = TERM=dumb %s %s %s', $bin, $identifier, 'pretxnchangegroup'); // This one handles creating bookmarks. $data[] = csprintf( 'prepushkey.phabricator = TERM=dumb %s %s %s', $bin, $identifier, 'prepushkey'); $data[] = null; $data = implode("\n", $data); $this->log('%s', pht('Installing commit hook config to "%s"...', $path)); Filesystem::writeFile($path, $data); } /* -( Pulling Subversion Working Copies )---------------------------------- */ /** * @task svn */ private function executeSubversionCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); execx('svnadmin create -- %s', $path); } /** * @task svn */ private function installSubversionHook() { $repository = $this->getRepository(); $root = $repository->getLocalPath(); $path = '/hooks/pre-commit'; $this->installHook($root.$path); } }