diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php index 8e99aabafa..82f2b26aff 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php @@ -1,939 +1,935 @@ repairMode = $repair_mode; return $this; } public function getRepairMode() { return $this->repairMode; } /** * @task discovery */ public function discoverCommits() { $repository = $this->getRepository(); $lock = $this->newRepositoryLock($repository, 'repo.look', false); try { $lock->lock(); } catch (PhutilLockException $ex) { throw new DiffusionDaemonLockException( pht( 'Another process is currently discovering repository "%s", '. 'skipping discovery.', $repository->getDisplayName())); } try { $result = $this->discoverCommitsWithLock(); } catch (Exception $ex) { $lock->unlock(); throw $ex; } $lock->unlock(); return $result; } private function discoverCommitsWithLock() { $repository = $this->getRepository(); $viewer = $this->getViewer(); $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $refs = $this->discoverSubversionCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $refs = $this->discoverMercurialCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $refs = $this->discoverGitCommits(); break; default: throw new Exception(pht("Unknown VCS '%s'!", $vcs)); } if ($this->isInitialImport($refs)) { $this->log( pht( 'Discovered more than %s commit(s) in an empty repository, '. 'marking repository as importing.', new PhutilNumber(PhabricatorRepository::IMPORT_THRESHOLD))); $repository->markImporting(); } // Clear the working set cache. $this->workingSet = array(); // Record discovered commits and mark them in the cache. foreach ($refs as $ref) { $this->recordCommit( $repository, $ref->getIdentifier(), $ref->getEpoch(), $ref->getCanCloseImmediately(), $ref->getParents()); $this->commitCache[$ref->getIdentifier()] = true; } $this->markUnreachableCommits($repository); $version = $this->getObservedVersion($repository); if ($version !== null) { id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository) ->synchronizeWorkingCopyAfterDiscovery($version); } return $refs; } /* -( Discovering Git Repositories )--------------------------------------- */ /** * @task git */ private function discoverGitCommits() { $repository = $this->getRepository(); - if (!$repository->isHosted()) { - $this->verifyGitOrigin($repository); - } - $heads = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); if (!$heads) { // This repository has no heads at all, so we don't need to do // anything. Generally, this means the repository is empty. return array(); } $heads = $this->sortRefs($heads); $head_commits = mpull($heads, 'getCommitIdentifier'); $this->log( pht( 'Discovering commits in repository "%s".', $repository->getDisplayName())); $this->fillCommitCache($head_commits); $refs = array(); foreach ($heads as $ref) { $name = $ref->getShortName(); $commit = $ref->getCommitIdentifier(); $this->log( pht( 'Examining "%s" (%s) at "%s".', $name, $ref->getRefType(), $commit)); if (!$repository->shouldTrackRef($ref)) { $this->log(pht('Skipping, ref is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, HEAD is known.')); continue; } // In Git, it's possible to tag anything. We just skip tags that don't // point to a commit. See T11301. $fields = $ref->getRawFields(); $ref_type = idx($fields, 'objecttype'); $tag_type = idx($fields, '*objecttype'); if ($ref_type != 'commit' && $tag_type != 'commit') { $this->log(pht('Skipping, this is not a commit.')); continue; } $this->log(pht('Looking for new commits.')); $head_refs = $this->discoverStreamAncestry( new PhabricatorGitGraphStream($repository, $commit), $commit, $repository->shouldAutocloseRef($ref)); $this->didDiscoverRefs($head_refs); $refs[] = $head_refs; } return array_mergev($refs); } /* -( Discovering Subversion Repositories )-------------------------------- */ /** * @task svn */ private function discoverSubversionCommits() { $repository = $this->getRepository(); if (!$repository->isHosted()) { $this->verifySubversionRoot($repository); } $upper_bound = null; $limit = 1; $refs = array(); do { // Find all the unknown commits on this path. Note that we permit // importing an SVN subdirectory rather than the entire repository, so // commits may be nonsequential. if ($upper_bound === null) { $at_rev = 'HEAD'; } else { $at_rev = ($upper_bound - 1); } try { list($xml, $stderr) = $repository->execxRemoteCommand( 'log --xml --quiet --limit %d %s', $limit, $repository->getSubversionBaseURI($at_rev)); } catch (CommandException $ex) { $stderr = $ex->getStderr(); if (preg_match('/(path|File) not found/', $stderr)) { // We've gone all the way back through history and this path was not // affected by earlier commits. break; } throw $ex; } $xml = phutil_utf8ize($xml); $log = new SimpleXMLElement($xml); foreach ($log->logentry as $entry) { $identifier = (int)$entry['revision']; $epoch = (int)strtotime((string)$entry->date[0]); $refs[$identifier] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($identifier) ->setEpoch($epoch) ->setCanCloseImmediately(true); if ($upper_bound === null) { $upper_bound = $identifier; } else { $upper_bound = min($upper_bound, $identifier); } } // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially // import large repositories fairly quickly, while pulling only as much // data as we need in the common case (when we've already imported the // repository and are just grabbing one commit at a time). $limit = min($limit * 2, 256); } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)); krsort($refs); while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) { array_pop($refs); } $refs = array_reverse($refs); $this->didDiscoverRefs($refs); return $refs; } private function verifySubversionRoot(PhabricatorRepository $repository) { list($xml) = $repository->execxRemoteCommand( 'info --xml %s', $repository->getSubversionPathURI()); $xml = phutil_utf8ize($xml); $xml = new SimpleXMLElement($xml); $remote_root = (string)($xml->entry[0]->repository[0]->root[0]); $expect_root = $repository->getSubversionPathURI(); $normal_type_svn = PhabricatorRepositoryURINormalizer::TYPE_SVN; $remote_normal = id(new PhabricatorRepositoryURINormalizer( $normal_type_svn, $remote_root))->getNormalizedPath(); $expect_normal = id(new PhabricatorRepositoryURINormalizer( $normal_type_svn, $expect_root))->getNormalizedPath(); if ($remote_normal != $expect_normal) { throw new Exception( pht( 'Repository "%s" does not have a correctly configured remote URI. '. 'The remote URI for a Subversion repository MUST point at the '. 'repository root. The root for this repository is "%s", but the '. 'configured URI is "%s". To resolve this error, set the remote URI '. 'to point at the repository root. If you want to import only part '. 'of a Subversion repository, use the "Import Only" option.', $repository->getDisplayName(), $remote_root, $expect_root)); } } /* -( Discovering Mercurial Repositories )--------------------------------- */ /** * @task hg */ private function discoverMercurialCommits() { $repository = $this->getRepository(); $branches = id(new DiffusionLowLevelMercurialBranchesQuery()) ->setRepository($repository) ->execute(); $this->fillCommitCache(mpull($branches, 'getCommitIdentifier')); $refs = array(); foreach ($branches as $branch) { // NOTE: Mercurial branches may have multiple heads, so the names may // not be unique. $name = $branch->getShortName(); $commit = $branch->getCommitIdentifier(); $this->log(pht('Examining branch "%s" head "%s".', $name, $commit)); if (!$repository->shouldTrackBranch($name)) { $this->log(pht('Skipping, branch is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, this head is a known commit.')); continue; } $this->log(pht('Looking for new commits.')); $branch_refs = $this->discoverStreamAncestry( new PhabricatorMercurialGraphStream($repository, $commit), $commit, $close_immediately = true); $this->didDiscoverRefs($branch_refs); $refs[] = $branch_refs; } return array_mergev($refs); } /* -( Internals )---------------------------------------------------------- */ private function discoverStreamAncestry( PhabricatorRepositoryGraphStream $stream, $commit, $close_immediately) { $discover = array($commit); $graph = array(); $seen = array(); // Find all the reachable, undiscovered commits. Build a graph of the // edges. while ($discover) { $target = array_pop($discover); if (empty($graph[$target])) { $graph[$target] = array(); } $parents = $stream->getParents($target); foreach ($parents as $parent) { if ($this->isKnownCommit($parent)) { continue; } $graph[$target][$parent] = true; if (empty($seen[$parent])) { $seen[$parent] = true; $discover[] = $parent; } } } // Now, sort them topographically. $commits = $this->reduceGraph($graph); $refs = array(); foreach ($commits as $commit) { $epoch = $stream->getCommitDate($commit); // If the epoch doesn't fit into a uint32, treat it as though it stores // the current time. For discussion, see T11537. if ($epoch > 0xFFFFFFFF) { $epoch = PhabricatorTime::getNow(); } // If the epoch is not present at all, treat it as though it stores the // value "0". For discussion, see T12062. This behavior is consistent // with the behavior of "git show". if (!strlen($epoch)) { $epoch = 0; } $refs[] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($commit) ->setEpoch($epoch) ->setCanCloseImmediately($close_immediately) ->setParents($stream->getParents($commit)); } return $refs; } private function reduceGraph(array $edges) { foreach ($edges as $commit => $parents) { $edges[$commit] = array_keys($parents); } $graph = new PhutilDirectedScalarGraph(); $graph->addNodes($edges); $commits = $graph->getTopographicallySortedNodes(); // NOTE: We want the most ancestral nodes first, so we need to reverse the // list we get out of AbstractDirectedGraph. $commits = array_reverse($commits); return $commits; } private function isKnownCommit($identifier) { if (isset($this->commitCache[$identifier])) { return true; } if (isset($this->workingSet[$identifier])) { return true; } if ($this->repairMode) { // In repair mode, rediscover the entire repository, ignoring the // database state. We can hit the local cache above, but if we miss it // stop the script from going to the database cache. return false; } $this->fillCommitCache(array($identifier)); return isset($this->commitCache[$identifier]); } private function fillCommitCache(array $identifiers) { if (!$identifiers) { return; } $max_size = self::MAX_COMMIT_CACHE_SIZE; // If we're filling more identifiers than would fit in the cache, ignore // the ones that don't fit. Because the cache is FIFO, overfilling it can // cause the entire cache to miss. See T12296. if (count($identifiers) > $max_size) { $identifiers = array_slice($identifiers, 0, $max_size); } // When filling the cache we ignore commits which have been marked as // unreachable, treating them as though they do not exist. When recording // commits later we'll revive commits that exist but are unreachable. $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere( 'repositoryID = %d AND commitIdentifier IN (%Ls) AND (importStatus & %d) != %d', $this->getRepository()->getID(), $identifiers, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); foreach ($commits as $commit) { $this->commitCache[$commit->getCommitIdentifier()] = true; } while (count($this->commitCache) > $max_size) { array_shift($this->commitCache); } } /** * Sort branches so we process closeable branches first. This makes the * whole import process a little cheaper, since we can close these commits * the first time through rather than catching them in the refs step. * * @task internal * * @param list List of refs. * @return list Sorted list of refs. */ private function sortRefs(array $refs) { $repository = $this->getRepository(); $head_refs = array(); $tail_refs = array(); foreach ($refs as $ref) { if ($repository->shouldAutocloseRef($ref)) { $head_refs[] = $ref; } else { $tail_refs[] = $ref; } } return array_merge($head_refs, $tail_refs); } private function recordCommit( PhabricatorRepository $repository, $commit_identifier, $epoch, $close_immediately, array $parents) { $commit = new PhabricatorRepositoryCommit(); $conn_w = $repository->establishConnection('w'); // First, try to revive an existing unreachable commit (if one exists) by // removing the "unreachable" flag. If we succeed, we don't need to do // anything else: we already discovered this commit some time ago. queryfx( $conn_w, 'UPDATE %T SET importStatus = (importStatus & ~%d) WHERE repositoryID = %d AND commitIdentifier = %s', $commit->getTableName(), PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, $repository->getID(), $commit_identifier); if ($conn_w->getAffectedRows()) { $commit = $commit->loadOneWhere( 'repositoryID = %d AND commitIdentifier = %s', $repository->getID(), $commit_identifier); // After reviving a commit, schedule new daemons for it. $this->didDiscoverCommit($repository, $commit, $epoch); return; } $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($commit_identifier); $commit->setEpoch($epoch); if ($close_immediately) { $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_CLOSEABLE); } $data = new PhabricatorRepositoryCommitData(); try { // If this commit has parents, look up their IDs. The parent commits // should always exist already. $parent_ids = array(); if ($parents) { $parent_rows = queryfx_all( $conn_w, 'SELECT id, commitIdentifier FROM %T WHERE commitIdentifier IN (%Ls) AND repositoryID = %d', $commit->getTableName(), $parents, $repository->getID()); $parent_map = ipull($parent_rows, 'id', 'commitIdentifier'); foreach ($parents as $parent) { if (empty($parent_map[$parent])) { throw new Exception( pht('Unable to identify parent "%s"!', $parent)); } $parent_ids[] = $parent_map[$parent]; } } else { // Write an explicit 0 so we can distinguish between "really no // parents" and "data not available". if (!$repository->isSVN()) { $parent_ids = array(0); } } $commit->openTransaction(); $commit->save(); $data->setCommitID($commit->getID()); $data->save(); foreach ($parent_ids as $parent_id) { queryfx( $conn_w, 'INSERT IGNORE INTO %T (childCommitID, parentCommitID) VALUES (%d, %d)', PhabricatorRepository::TABLE_PARENTS, $commit->getID(), $parent_id); } $commit->saveTransaction(); $this->didDiscoverCommit($repository, $commit, $epoch); if ($this->repairMode) { // Normally, the query should throw a duplicate key exception. If we // reach this in repair mode, we've actually performed a repair. $this->log(pht('Repaired commit "%s".', $commit_identifier)); } PhutilEventEngine::dispatchEvent( new PhabricatorEvent( PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, array( 'repository' => $repository, 'commit' => $commit, ))); } catch (AphrontDuplicateKeyQueryException $ex) { $commit->killTransaction(); // Ignore. This can happen because we discover the same new commit // more than once when looking at history, or because of races or // data inconsistency or cosmic radiation; in any case, we're still // in a good state if we ignore the failure. } } private function didDiscoverCommit( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, $epoch) { $this->insertTask($repository, $commit); // Update the repository summary table. queryfx( $commit->establishConnection('w'), 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, 1, %d, %d) ON DUPLICATE KEY UPDATE size = size + 1, lastCommitID = IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $commit->getID(), $epoch); } private function didDiscoverRefs(array $refs) { foreach ($refs as $ref) { $this->workingSet[$ref->getIdentifier()] = true; } } private function insertTask( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, $data = array()) { $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; break; default: throw new Exception(pht("Unknown repository type '%s'!", $vcs)); } $data['commitID'] = $commit->getID(); // If the repository is importing for the first time, we schedule tasks // at IMPORT priority, which is very low. Making progress on importing a // new repository for the first time is less important than any other // daemon task. // If the repository has finished importing and we're just catching up // on recent commits, we schedule discovery at COMMIT priority, which is // slightly below the default priority. // Note that followup tasks and triggered tasks (like those generated by // Herald or Harbormaster) will queue at DEFAULT priority, so that each // commit tends to fully import before we start the next one. This tends // to give imports fairly predictable progress. See T11677 for some // discussion. if ($repository->isImporting()) { $task_priority = PhabricatorWorker::PRIORITY_IMPORT; } else { $task_priority = PhabricatorWorker::PRIORITY_COMMIT; } $options = array( 'priority' => $task_priority, ); PhabricatorWorker::scheduleTask($class, $data, $options); } private function isInitialImport(array $refs) { $commit_count = count($refs); if ($commit_count <= PhabricatorRepository::IMPORT_THRESHOLD) { // If we fetched a small number of commits, assume it's an initial // commit or a stack of a few initial commits. return false; } $viewer = $this->getViewer(); $repository = $this->getRepository(); $any_commits = id(new DiffusionCommitQuery()) ->setViewer($viewer) ->withRepository($repository) ->setLimit(1) ->execute(); if ($any_commits) { // If the repository already has commits, this isn't an import. return false; } return true; } private function getObservedVersion(PhabricatorRepository $repository) { if ($repository->isHosted()) { return null; } if ($repository->isGit()) { return $this->getGitObservedVersion($repository); } return null; } private function getGitObservedVersion(PhabricatorRepository $repository) { $refs = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); if (!$refs) { return null; } // In Git, the observed version is the most recently discovered commit // at any repository HEAD. It's possible for this to regress temporarily // if a branch is pushed and then deleted. This is acceptable because it // doesn't do anything meaningfully bad and will fix itself on the next // push. $ref_identifiers = mpull($refs, 'getCommitIdentifier'); $ref_identifiers = array_fuse($ref_identifiers); $version = queryfx_one( $repository->establishConnection('w'), 'SELECT MAX(id) version FROM %T WHERE repositoryID = %d AND commitIdentifier IN (%Ls)', id(new PhabricatorRepositoryCommit())->getTableName(), $repository->getID(), $ref_identifiers); if (!$version) { return null; } return (int)$version['version']; } private function markUnreachableCommits(PhabricatorRepository $repository) { // For now, this is only supported for Git. if (!$repository->isGit()) { return; } // Find older versions of refs which we haven't processed yet. We're going // to make sure their commits are still reachable. $old_refs = id(new PhabricatorRepositoryOldRef())->loadAllWhere( 'repositoryPHID = %s', $repository->getPHID()); // If we don't have any refs to update, bail out before building a graph // stream. In particular, this improves behavior in empty repositories, // where `git log` exits with an error. if (!$old_refs) { return; } // We can share a single graph stream across all the checks we need to do. $stream = new PhabricatorGitGraphStream($repository); foreach ($old_refs as $old_ref) { $identifier = $old_ref->getCommitIdentifier(); $this->markUnreachableFrom($repository, $stream, $identifier); // If nothing threw an exception, we're all done with this ref. $old_ref->delete(); } } private function markUnreachableFrom( PhabricatorRepository $repository, PhabricatorGitGraphStream $stream, $identifier) { $unreachable = array(); $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 'repositoryID = %s AND commitIdentifier = %s', $repository->getID(), $identifier); if (!$commit) { return; } $look = array($commit); $seen = array(); while ($look) { $target = array_pop($look); // If we've already checked this commit (for example, because history // branches and then merges) we don't need to check it again. $target_identifier = $target->getCommitIdentifier(); if (isset($seen[$target_identifier])) { continue; } $seen[$target_identifier] = true; try { $stream->getCommitDate($target_identifier); $reachable = true; } catch (Exception $ex) { $reachable = false; } if ($reachable) { // This commit is reachable, so we don't need to go any further // down this road. continue; } $unreachable[] = $target; // Find the commit's parents and check them for reachability, too. We // have to look in the database since we no may longer have the commit // in the repository. $rows = queryfx_all( $commit->establishConnection('w'), 'SELECT commit.* FROM %T commit JOIN %T parents ON commit.id = parents.parentCommitID WHERE parents.childCommitID = %d', $commit->getTableName(), PhabricatorRepository::TABLE_PARENTS, $target->getID()); if (!$rows) { continue; } $parents = id(new PhabricatorRepositoryCommit()) ->loadAllFromArray($rows); foreach ($parents as $parent) { $look[] = $parent; } } $unreachable = array_reverse($unreachable); $flag = PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE; foreach ($unreachable as $unreachable_commit) { $unreachable_commit->writeImportStatusFlag($flag); } // If anything was unreachable, just rebuild the whole summary table. // We can't really update it incrementally when a commit becomes // unreachable. if ($unreachable) { $this->rebuildSummaryTable($repository); } } private function rebuildSummaryTable(PhabricatorRepository $repository) { $conn_w = $repository->establishConnection('w'); $data = queryfx_one( $conn_w, 'SELECT COUNT(*) N, MAX(id) id, MAX(epoch) epoch FROM %T WHERE repositoryID = %d AND (importStatus & %d) != %d', id(new PhabricatorRepositoryCommit())->getTableName(), $repository->getID(), PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); queryfx( $conn_w, 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, %d, %d, %d) ON DUPLICATE KEY UPDATE size = VALUES(size), lastCommitID = VALUES(lastCommitID), epoch = VALUES(epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $data['N'], $data['id'], $data['epoch']); } } diff --git a/src/applications/repository/engine/PhabricatorRepositoryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryEngine.php index fed5b09521..12cd8f1ed6 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryEngine.php @@ -1,203 +1,86 @@ repository = $repository; return $this; } /** * @task config */ protected function getRepository() { if ($this->repository === null) { throw new PhutilInvalidStateException('setRepository'); } return $this->repository; } /** * @task config */ public function setVerbose($verbose) { $this->verbose = $verbose; return $this; } /** * @task config */ public function getVerbose() { return $this->verbose; } public function getViewer() { return PhabricatorUser::getOmnipotentUser(); } protected function newRepositoryLock( PhabricatorRepository $repository, $lock_key, $lock_device_only) { $lock_parts = array( 'repositoryPHID' => $repository->getPHID(), ); if ($lock_device_only) { $device = AlmanacKeys::getLiveDevice(); if ($device) { $lock_parts['devicePHID'] = $device->getPHID(); } } return PhabricatorGlobalLock::newLock($lock_key, $lock_parts); } - - /** - * Verify that the "origin" remote exists, and points at the correct URI. - * - * This catches or corrects some types of misconfiguration, and also repairs - * an issue where Git 1.7.1 does not create an "origin" for `--bare` clones. - * See T4041. - * - * @param PhabricatorRepository Repository to verify. - * @return void - */ - protected function verifyGitOrigin(PhabricatorRepository $repository) { - try { - list($remotes) = $repository->execxLocalCommand( - 'remote show -n origin'); - } catch (CommandException $ex) { - throw new PhutilProxyException( - pht( - 'Expected to find a Git working copy at path "%s", but the '. - 'path exists and is not a valid working copy. If you remove '. - 'this directory, the daemons will automatically recreate it '. - 'correctly. Phabricator will not destroy the directory for you '. - 'because it can not be sure that it does not contain important '. - 'data.', - $repository->getLocalPath()), - $ex); - } - - $matches = null; - if (!preg_match('/^\s*Fetch URL:\s*(.*?)\s*$/m', $remotes, $matches)) { - throw new Exception( - pht( - "Expected '%s' in '%s'.", - 'Fetch URL', - 'git remote show -n origin')); - } - - $remote_uri = $matches[1]; - $expect_remote = $repository->getRemoteURI(); - - if ($remote_uri == 'origin') { - // If a remote does not exist, git pretends it does and prints out a - // made up remote where the URI is the same as the remote name. This is - // definitely not correct. - - // Possibly, we should use `git remote --verbose` instead, which does not - // suffer from this problem (but is a little more complicated to parse). - $valid = false; - $exists = false; - } else { - $normal_type_git = PhabricatorRepositoryURINormalizer::TYPE_GIT; - - $remote_normal = id(new PhabricatorRepositoryURINormalizer( - $normal_type_git, - $remote_uri))->getNormalizedPath(); - - $expect_normal = id(new PhabricatorRepositoryURINormalizer( - $normal_type_git, - $expect_remote))->getNormalizedPath(); - - $valid = ($remote_normal == $expect_normal); - $exists = true; - } - - // These URIs may have plaintext HTTP credentials. If they do, censor - // them for display. See T12945. - $display_remote = phutil_censor_credentials($remote_uri); - $display_expect = phutil_censor_credentials($expect_remote); - - if (!$valid) { - if (!$exists) { - // If there's no "origin" remote, just create it regardless of how - // strongly we own the working copy. There is almost no conceivable - // scenario in which this could do damage. - $this->log( - pht( - 'Remote "origin" does not exist. Creating "origin", with '. - 'URI "%s".', - $expect_remote)); - $repository->execxLocalCommand( - 'remote add origin %P', - $repository->getRemoteURIEnvelope()); - - // NOTE: This doesn't fetch the origin (it just creates it), so we won't - // know about origin branches until the next "pull" happens. That's fine - // for our purposes, but might impact things in the future. - } else { - if ($repository->canDestroyWorkingCopy()) { - // Bad remote, but we can try to repair it. - $this->log( - pht( - 'Remote "origin" exists, but is pointed at the wrong URI, "%s". '. - 'Resetting origin URI to "%s.', - $remote_uri, - $expect_remote)); - $repository->execxLocalCommand( - 'remote set-url origin %P', - $repository->getRemoteURIEnvelope()); - } else { - // Bad remote and we aren't comfortable repairing it. - $message = pht( - 'Working copy at "%s" has a mismatched origin URI, "%s". '. - 'The expected origin URI is "%s". Fix your configuration, or '. - 'set the remote URI correctly. To avoid breaking anything, '. - 'Phabricator will not automatically fix this.', - $repository->getLocalPath(), - $display_remote, - $display_expect); - throw new Exception($message); - } - } - } - } - - - - /** * @task internal */ protected function log($pattern /* ... */) { if ($this->getVerbose()) { $console = PhutilConsole::getConsole(); $argv = func_get_args(); array_unshift($argv, "%s\n"); call_user_func_array(array($console, 'writeOut'), $argv); } return $this; } } diff --git a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php index 208a3792b3..5d0edb1f10 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php @@ -1,670 +1,659 @@ getRepository(); $lock = $this->newRepositoryLock($repository, 'repo.pull', true); try { $lock->lock(); } catch (PhutilLockException $ex) { throw new DiffusionDaemonLockException( pht( 'Another process is currently updating repository "%s", '. 'skipping pull.', $repository->getDisplayName())); } try { $result = $this->pullRepositoryWithLock(); } catch (Exception $ex) { $lock->unlock(); throw $ex; } $lock->unlock(); return $result; } private function pullRepositoryWithLock() { $repository = $this->getRepository(); $viewer = PhabricatorUser::getOmnipotentUser(); $is_hg = false; $is_git = false; $is_svn = false; $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: // We never pull a local copy of non-hosted Subversion repositories. if (!$repository->isHosted()) { $this->skipPull( pht( 'Repository "%s" is a non-hosted Subversion repository, which '. 'does not require a local working copy to be pulled.', $repository->getDisplayName())); return; } $is_svn = true; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $is_git = true; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $is_hg = true; break; default: $this->abortPull(pht('Unknown VCS "%s"!', $vcs)); break; } $local_path = $repository->getLocalPath(); if ($local_path === null) { $this->abortPull( pht( 'No local path is configured for repository "%s".', $repository->getDisplayName())); } try { $dirname = dirname($local_path); if (!Filesystem::pathExists($dirname)) { Filesystem::createDirectory($dirname, 0755, $recursive = true); } if (!Filesystem::pathExists($local_path)) { $this->logPull( pht( 'Creating a new working copy for repository "%s".', $repository->getDisplayName())); if ($is_git) { $this->executeGitCreate(); } else if ($is_hg) { $this->executeMercurialCreate(); } else { $this->executeSubversionCreate(); } } id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository) ->synchronizeWorkingCopyBeforeRead(); if (!$repository->isHosted()) { $this->logPull( pht( 'Updating the working copy for repository "%s".', $repository->getDisplayName())); if ($is_git) { - $this->verifyGitOrigin($repository); $this->executeGitUpdate(); } else if ($is_hg) { $this->executeMercurialUpdate(); } } if ($repository->isHosted()) { if ($is_git) { $this->installGitHook(); } else if ($is_svn) { $this->installSubversionHook(); } else if ($is_hg) { $this->installMercurialHook(); } foreach ($repository->getHookDirectories() as $directory) { $this->installHookDirectory($directory); } } } catch (Exception $ex) { $this->abortPull( pht( "Pull of '%s' failed: %s", $repository->getDisplayName(), $ex->getMessage()), $ex); } $this->donePull(); return $this; } private function skipPull($message) { $this->log($message); $this->donePull(); } private function abortPull($message, Exception $ex = null) { $code_error = PhabricatorRepositoryStatusMessage::CODE_ERROR; $this->updateRepositoryInitStatus($code_error, $message); if ($ex) { throw $ex; } else { throw new Exception($message); } } private function logPull($message) { $this->log($message); } private function donePull() { $code_okay = PhabricatorRepositoryStatusMessage::CODE_OKAY; $this->updateRepositoryInitStatus($code_okay); } private function updateRepositoryInitStatus($code, $message = null) { $this->getRepository()->writeStatusMessage( PhabricatorRepositoryStatusMessage::TYPE_INIT, $code, array( 'message' => $message, )); } private function installHook($path, array $hook_argv = array()) { $this->log(pht('Installing commit hook to "%s"...', $path)); $repository = $this->getRepository(); $identifier = $this->getHookContextIdentifier($repository); $root = dirname(phutil_get_library_root('phabricator')); $bin = $root.'/bin/commit-hook'; $full_php_path = Filesystem::resolveBinary('php'); $cmd = csprintf( 'exec %s -f %s -- %s %Ls "$@"', $full_php_path, $bin, $identifier, $hook_argv); $hook = "#!/bin/sh\nexport TERM=dumb\n{$cmd}\n"; Filesystem::writeFile($path, $hook); Filesystem::changePermissions($path, 0755); } private function installHookDirectory($path) { $readme = pht( "To add custom hook scripts to this repository, add them to this ". "directory.\n\nPhabricator will run any executables in this directory ". "after running its own checks, as though they were normal hook ". "scripts."); Filesystem::createDirectory($path, 0755); Filesystem::writeFile($path.'/README', $readme); } private function getHookContextIdentifier(PhabricatorRepository $repository) { $identifier = $repository->getPHID(); $instance = PhabricatorEnv::getEnvConfig('cluster.instance'); if (strlen($instance)) { $identifier = "{$identifier}:{$instance}"; } return $identifier; } /* -( Pulling Git Working Copies )----------------------------------------- */ /** * @task git */ private function executeGitCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); if ($repository->isHosted()) { $repository->execxRemoteCommand( 'init --bare -- %s', $path); } else { $repository->execxRemoteCommand( 'clone --bare -- %P %s', $repository->getRemoteURIEnvelope(), $path); } } /** * @task git */ private function executeGitUpdate() { $repository = $this->getRepository(); list($err, $stdout) = $repository->execLocalCommand( 'rev-parse --show-toplevel'); $message = null; $path = $repository->getLocalPath(); if ($err) { // Try to raise a more tailored error message in the more common case // of the user creating an empty directory. (We could try to remove it, // but might not be able to, and it's much simpler to raise a good // message than try to navigate those waters.) if (is_dir($path)) { $files = Filesystem::listDirectory($path, $include_hidden = true); if (!$files) { $message = pht( "Expected to find a git repository at '%s', but there ". "is an empty directory there. Remove the directory: the daemon ". "will run '%s' for you.", $path, 'git clone'); } else { $message = pht( "Expected to find a git repository at '%s', but there is ". "a non-repository directory (with other stuff in it) there. Move ". "or remove this directory (or reconfigure the repository to use a ". "different directory), and then either clone a repository ". "yourself or let the daemon do it.", $path); } } else if (is_file($path)) { $message = pht( "Expected to find a git repository at '%s', but there is a ". "file there instead. Remove it and let the daemon clone a ". "repository for you.", $path); } else { $message = pht( "Expected to find a git repository at '%s', but did not.", $path); } } else { $repo_path = rtrim($stdout, "\n"); if (empty($repo_path)) { // This can mean one of two things: we're in a bare repository, or // we're inside a git repository inside another git repository. Since // the first is dramatically more likely now that we perform bare // clones and I don't have a great way to test for the latter, assume // we're OK. } else if (!Filesystem::pathsAreEquivalent($repo_path, $path)) { $err = true; $message = pht( "Expected to find repo at '%s', but the actual git repository root ". "for this directory is '%s'. Something is misconfigured. ". "The repository's 'Local Path' should be set to some place where ". "the daemon can check out a working copy, ". "and should not be inside another git repository.", $path, $repo_path); } } if ($err && $repository->canDestroyWorkingCopy()) { phlog( pht( "Repository working copy at '%s' failed sanity check; ". "destroying and re-cloning. %s", $path, $message)); Filesystem::remove($path); $this->executeGitCreate(); } else if ($err) { throw new Exception($message); } $remote_refs = $this->loadGitRemoteRefs($repository); $local_refs = $this->loadGitLocalRefs($repository); if ($remote_refs === $local_refs) { $this->log( pht( 'Skipping fetch because local and remote refs are already '. 'identical.')); return false; } $this->logRefDifferences($remote_refs, $local_refs); - // Force the "origin" URI to the configured value. - $repository->execxLocalCommand( - 'remote set-url origin -- %P', - $repository->getRemoteURIEnvelope()); - - if ($repository->isWorkingCopyBare()) { - // For bare working copies, we need this magic incantation. - $future = $repository->getRemoteCommandFuture( - 'fetch origin %s --prune', - '+refs/*:refs/*'); - } else { - $future = $repository->getRemoteCommandFuture( - 'fetch --all --prune'); - } + $future = $repository->getRemoteCommandFuture( + 'fetch %P %s --prune', + $repository->getRemoteURIEnvelope(), + '+refs/*:refs/*'); $future ->setCWD($path) ->resolvex(); } /** * @task git */ private function installGitHook() { $repository = $this->getRepository(); $root = $repository->getLocalPath(); if ($repository->isWorkingCopyBare()) { $path = '/hooks/pre-receive'; } else { $path = '/.git/hooks/pre-receive'; } $this->installHook($root.$path); } private function loadGitRemoteRefs(PhabricatorRepository $repository) { $remote_envelope = $repository->getRemoteURIEnvelope(); // NOTE: "git ls-remote" does not support "--" until circa January 2016. // See T12416. None of the flags to "ls-remote" appear dangerous, and // other checks make it difficult to configure a suspicious remote URI. list($stdout) = $repository->execxRemoteCommand( 'ls-remote %P', $remote_envelope); // Empty repositories don't have any refs. if (!strlen(rtrim($stdout))) { return array(); } $map = array(); $lines = phutil_split_lines($stdout, false); foreach ($lines as $line) { list($hash, $name) = preg_split('/\s+/', $line, 2); // If the remote has a HEAD, just ignore it. if ($name == 'HEAD') { continue; } // If the remote ref is itself a remote ref, ignore it. if (preg_match('(^refs/remotes/)', $name)) { continue; } $map[$name] = $hash; } ksort($map); return $map; } private function loadGitLocalRefs(PhabricatorRepository $repository) { $refs = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); $map = array(); foreach ($refs as $ref) { $fields = $ref->getRawFields(); $map[idx($fields, 'refname')] = $ref->getCommitIdentifier(); } ksort($map); return $map; } private function logRefDifferences(array $remote, array $local) { $all = $local + $remote; $differences = array(); foreach ($all as $key => $ignored) { $remote_ref = idx($remote, $key, pht('')); $local_ref = idx($local, $key, pht('')); if ($remote_ref !== $local_ref) { $differences[] = pht( '%s (remote: "%s", local: "%s")', $key, $remote_ref, $local_ref); } } $this->log( pht( "Updating repository after detecting ref differences:\n%s", implode("\n", $differences))); } /* -( Pulling Mercurial Working Copies )----------------------------------- */ /** * @task hg */ private function executeMercurialCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); if ($repository->isHosted()) { $repository->execxRemoteCommand( 'init -- %s', $path); } else { $remote = $repository->getRemoteURIEnvelope(); // NOTE: Mercurial prior to 3.2.4 has an severe command injection // vulnerability. See: // On vulnerable versions of Mercurial, we refuse to clone remotes which // contain characters which may be interpreted by the shell. $hg_binary = PhutilBinaryAnalyzer::getForBinary('hg'); $is_vulnerable = $hg_binary->isMercurialVulnerableToInjection(); if ($is_vulnerable) { $cleartext = $remote->openEnvelope(); // The use of "%R" here is an attempt to limit collateral damage // for normal URIs because it isn't clear how long this vulnerability // has been around for. $escaped = csprintf('%R', $cleartext); if ((string)$escaped !== (string)$cleartext) { throw new Exception( pht( 'You have an old version of Mercurial (%s) which has a severe '. 'command injection security vulnerability. The remote URI for '. 'this repository (%s) is potentially unsafe. Upgrade Mercurial '. 'to at least 3.2.4 to clone it.', $hg_binary->getBinaryVersion(), $repository->getMonogram())); } } try { $repository->execxRemoteCommand( 'clone --noupdate -- %P %s', $remote, $path); } catch (Exception $ex) { $message = $ex->getMessage(); $message = $this->censorMercurialErrorMessage($message); throw new Exception($message); } } } /** * @task hg */ private function executeMercurialUpdate() { $repository = $this->getRepository(); $path = $repository->getLocalPath(); // This is a local command, but needs credentials. $remote = $repository->getRemoteURIEnvelope(); $future = $repository->getRemoteCommandFuture('pull -- %P', $remote); $future->setCWD($path); try { $future->resolvex(); } catch (CommandException $ex) { $err = $ex->getError(); $stdout = $ex->getStdout(); // NOTE: Between versions 2.1 and 2.1.1, Mercurial changed the behavior // of "hg pull" to return 1 in case of a successful pull with no changes. // This behavior has been reverted, but users who updated between Feb 1, // 2012 and Mar 1, 2012 will have the erroring version. Do a dumb test // against stdout to check for this possibility. // See: https://github.com/phacility/phabricator/issues/101/ // NOTE: Mercurial has translated versions, which translate this error // string. In a translated version, the string will be something else, // like "aucun changement trouve". There didn't seem to be an easy way // to handle this (there are hard ways but this is not a common problem // and only creates log spam, not application failures). Assume English. // TODO: Remove this once we're far enough in the future that deployment // of 2.1 is exceedingly rare? if ($err == 1 && preg_match('/no changes found/', $stdout)) { return; } else { $message = $ex->getMessage(); $message = $this->censorMercurialErrorMessage($message); throw new Exception($message); } } } /** * Censor response bodies from Mercurial error messages. * * When Mercurial attempts to clone an HTTP repository but does not * receive a response it expects, it emits the response body in the * command output. * * This represents a potential SSRF issue, because an attacker with * permission to create repositories can create one which points at the * remote URI for some local service, then read the response from the * error message. To prevent this, censor response bodies out of error * messages. * * @param string Uncensored Mercurial command output. * @return string Censored Mercurial command output. */ private function censorMercurialErrorMessage($message) { return preg_replace( '/^---%<---.*/sm', pht('')."\n", $message); } /** * @task hg */ private function installMercurialHook() { $repository = $this->getRepository(); $path = $repository->getLocalPath().'/.hg/hgrc'; $identifier = $this->getHookContextIdentifier($repository); $root = dirname(phutil_get_library_root('phabricator')); $bin = $root.'/bin/commit-hook'; $data = array(); $data[] = '[hooks]'; // This hook handles normal pushes. $data[] = csprintf( 'pretxnchangegroup.phabricator = TERM=dumb %s %s %s', $bin, $identifier, 'pretxnchangegroup'); // This one handles creating bookmarks. $data[] = csprintf( 'prepushkey.phabricator = TERM=dumb %s %s %s', $bin, $identifier, 'prepushkey'); $data[] = null; $data = implode("\n", $data); $this->log('%s', pht('Installing commit hook config to "%s"...', $path)); Filesystem::writeFile($path, $data); } /* -( Pulling Subversion Working Copies )---------------------------------- */ /** * @task svn */ private function executeSubversionCreate() { $repository = $this->getRepository(); $path = rtrim($repository->getLocalPath(), '/'); execx('svnadmin create -- %s', $path); } /** * @task svn */ private function installSubversionHook() { $repository = $this->getRepository(); $root = $repository->getLocalPath(); $path = '/hooks/pre-commit'; $this->installHook($root.$path); $revprop_path = '/hooks/pre-revprop-change'; $revprop_argv = array( '--hook-mode', 'svn-revprop', ); $this->installHook($root.$revprop_path, $revprop_argv); } }