diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php index dedb9de93a..21d95b227d 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php @@ -1,996 +1,904 @@ repairMode = $repair_mode; return $this; } public function getRepairMode() { return $this->repairMode; } /** * @task discovery */ public function discoverCommits() { $repository = $this->getRepository(); $lock = $this->newRepositoryLock($repository, 'repo.look', false); try { $lock->lock(); } catch (PhutilLockException $ex) { throw new DiffusionDaemonLockException( pht( 'Another process is currently discovering repository "%s", '. 'skipping discovery.', $repository->getDisplayName())); } try { $result = $this->discoverCommitsWithLock(); } catch (Exception $ex) { $lock->unlock(); throw $ex; } $lock->unlock(); return $result; } private function discoverCommitsWithLock() { $repository = $this->getRepository(); $viewer = $this->getViewer(); $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $refs = $this->discoverSubversionCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $refs = $this->discoverMercurialCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $refs = $this->discoverGitCommits(); break; default: throw new Exception(pht("Unknown VCS '%s'!", $vcs)); } if ($this->isInitialImport($refs)) { $this->log( pht( 'Discovered more than %s commit(s) in an empty repository, '. 'marking repository as importing.', new PhutilNumber(PhabricatorRepository::IMPORT_THRESHOLD))); $repository->markImporting(); } // Clear the working set cache. $this->workingSet = array(); $task_priority = $this->getImportTaskPriority($repository, $refs); // Record discovered commits and mark them in the cache. foreach ($refs as $ref) { $this->recordCommit( $repository, $ref->getIdentifier(), $ref->getEpoch(), $ref->getIsPermanent(), $ref->getParents(), $task_priority); $this->commitCache[$ref->getIdentifier()] = true; } $this->markUnreachableCommits($repository); $version = $this->getObservedVersion($repository); if ($version !== null) { id(new DiffusionRepositoryClusterEngine()) ->setViewer($viewer) ->setRepository($repository) ->synchronizeWorkingCopyAfterDiscovery($version); } return $refs; } /* -( Discovering Git Repositories )--------------------------------------- */ /** * @task git */ private function discoverGitCommits() { $repository = $this->getRepository(); $publisher = $repository->newPublisher(); $heads = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); if (!$heads) { // This repository has no heads at all, so we don't need to do // anything. Generally, this means the repository is empty. return array(); } $heads = $this->sortRefs($heads); $head_commits = mpull($heads, 'getCommitIdentifier'); $this->log( pht( 'Discovering commits in repository "%s".', $repository->getDisplayName())); $this->fillCommitCache($head_commits); $refs = array(); foreach ($heads as $ref) { $name = $ref->getShortName(); $commit = $ref->getCommitIdentifier(); $this->log( pht( 'Examining "%s" (%s) at "%s".', $name, $ref->getRefType(), $commit)); if (!$repository->shouldTrackRef($ref)) { $this->log(pht('Skipping, ref is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, HEAD is known.')); continue; } // In Git, it's possible to tag anything. We just skip tags that don't // point to a commit. See T11301. $fields = $ref->getRawFields(); $ref_type = idx($fields, 'objecttype'); $tag_type = idx($fields, '*objecttype'); if ($ref_type != 'commit' && $tag_type != 'commit') { $this->log(pht('Skipping, this is not a commit.')); continue; } $this->log(pht('Looking for new commits.')); $head_refs = $this->discoverStreamAncestry( new PhabricatorGitGraphStream($repository, $commit), $commit, $publisher->isPermanentRef($ref)); $this->didDiscoverRefs($head_refs); $refs[] = $head_refs; } return array_mergev($refs); } /* -( Discovering Subversion Repositories )-------------------------------- */ /** * @task svn */ private function discoverSubversionCommits() { $repository = $this->getRepository(); if (!$repository->isHosted()) { $this->verifySubversionRoot($repository); } $upper_bound = null; $limit = 1; $refs = array(); do { // Find all the unknown commits on this path. Note that we permit // importing an SVN subdirectory rather than the entire repository, so // commits may be nonsequential. if ($upper_bound === null) { $at_rev = 'HEAD'; } else { $at_rev = ($upper_bound - 1); } try { list($xml, $stderr) = $repository->execxRemoteCommand( 'log --xml --quiet --limit %d %s', $limit, $repository->getSubversionBaseURI($at_rev)); } catch (CommandException $ex) { $stderr = $ex->getStderr(); if (preg_match('/(path|File) not found/', $stderr)) { // We've gone all the way back through history and this path was not // affected by earlier commits. break; } throw $ex; } $xml = phutil_utf8ize($xml); $log = new SimpleXMLElement($xml); foreach ($log->logentry as $entry) { $identifier = (int)$entry['revision']; $epoch = (int)strtotime((string)$entry->date[0]); $refs[$identifier] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($identifier) ->setEpoch($epoch) ->setIsPermanent(true); if ($upper_bound === null) { $upper_bound = $identifier; } else { $upper_bound = min($upper_bound, $identifier); } } // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially // import large repositories fairly quickly, while pulling only as much // data as we need in the common case (when we've already imported the // repository and are just grabbing one commit at a time). $limit = min($limit * 2, 256); } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)); krsort($refs); while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) { array_pop($refs); } $refs = array_reverse($refs); $this->didDiscoverRefs($refs); return $refs; } private function verifySubversionRoot(PhabricatorRepository $repository) { list($xml) = $repository->execxRemoteCommand( 'info --xml %s', $repository->getSubversionPathURI()); $xml = phutil_utf8ize($xml); $xml = new SimpleXMLElement($xml); $remote_root = (string)($xml->entry[0]->repository[0]->root[0]); $expect_root = $repository->getSubversionPathURI(); $normal_type_svn = ArcanistRepositoryURINormalizer::TYPE_SVN; $remote_normal = id(new ArcanistRepositoryURINormalizer( $normal_type_svn, $remote_root))->getNormalizedPath(); $expect_normal = id(new ArcanistRepositoryURINormalizer( $normal_type_svn, $expect_root))->getNormalizedPath(); if ($remote_normal != $expect_normal) { throw new Exception( pht( 'Repository "%s" does not have a correctly configured remote URI. '. 'The remote URI for a Subversion repository MUST point at the '. 'repository root. The root for this repository is "%s", but the '. 'configured URI is "%s". To resolve this error, set the remote URI '. 'to point at the repository root. If you want to import only part '. 'of a Subversion repository, use the "Import Only" option.', $repository->getDisplayName(), $remote_root, $expect_root)); } } /* -( Discovering Mercurial Repositories )--------------------------------- */ /** * @task hg */ private function discoverMercurialCommits() { $repository = $this->getRepository(); $branches = id(new DiffusionLowLevelMercurialBranchesQuery()) ->setRepository($repository) ->execute(); $this->fillCommitCache(mpull($branches, 'getCommitIdentifier')); $refs = array(); foreach ($branches as $branch) { // NOTE: Mercurial branches may have multiple heads, so the names may // not be unique. $name = $branch->getShortName(); $commit = $branch->getCommitIdentifier(); $this->log(pht('Examining branch "%s" head "%s".', $name, $commit)); if (!$repository->shouldTrackBranch($name)) { $this->log(pht('Skipping, branch is untracked.')); continue; } if ($this->isKnownCommit($commit)) { $this->log(pht('Skipping, this head is a known commit.')); continue; } $this->log(pht('Looking for new commits.')); $branch_refs = $this->discoverStreamAncestry( new PhabricatorMercurialGraphStream($repository, $commit), $commit, $is_permanent = true); $this->didDiscoverRefs($branch_refs); $refs[] = $branch_refs; } return array_mergev($refs); } /* -( Internals )---------------------------------------------------------- */ private function discoverStreamAncestry( PhabricatorRepositoryGraphStream $stream, $commit, $is_permanent) { $discover = array($commit); $graph = array(); $seen = array(); // Find all the reachable, undiscovered commits. Build a graph of the // edges. while ($discover) { $target = array_pop($discover); if (empty($graph[$target])) { $graph[$target] = array(); } $parents = $stream->getParents($target); foreach ($parents as $parent) { if ($this->isKnownCommit($parent)) { continue; } $graph[$target][$parent] = true; if (empty($seen[$parent])) { $seen[$parent] = true; $discover[] = $parent; } } } // Now, sort them topologically. $commits = $this->reduceGraph($graph); $refs = array(); foreach ($commits as $commit) { $epoch = $stream->getCommitDate($commit); // If the epoch doesn't fit into a uint32, treat it as though it stores // the current time. For discussion, see T11537. if ($epoch > 0xFFFFFFFF) { $epoch = PhabricatorTime::getNow(); } // If the epoch is not present at all, treat it as though it stores the // value "0". For discussion, see T12062. This behavior is consistent // with the behavior of "git show". if (!strlen($epoch)) { $epoch = 0; } $refs[] = id(new PhabricatorRepositoryCommitRef()) ->setIdentifier($commit) ->setEpoch($epoch) ->setIsPermanent($is_permanent) ->setParents($stream->getParents($commit)); } return $refs; } private function reduceGraph(array $edges) { foreach ($edges as $commit => $parents) { $edges[$commit] = array_keys($parents); } $graph = new PhutilDirectedScalarGraph(); $graph->addNodes($edges); $commits = $graph->getNodesInTopologicalOrder(); // NOTE: We want the most ancestral nodes first, so we need to reverse the // list we get out of AbstractDirectedGraph. $commits = array_reverse($commits); return $commits; } private function isKnownCommit($identifier) { if (isset($this->commitCache[$identifier])) { return true; } if (isset($this->workingSet[$identifier])) { return true; } $this->fillCommitCache(array($identifier)); return isset($this->commitCache[$identifier]); } private function fillCommitCache(array $identifiers) { if (!$identifiers) { return; } if ($this->repairMode) { // In repair mode, rediscover the entire repository, ignoring the // database state. The engine still maintains a local cache (the // "Working Set") but we just give up before looking in the database. return; } $max_size = self::MAX_COMMIT_CACHE_SIZE; // If we're filling more identifiers than would fit in the cache, ignore // the ones that don't fit. Because the cache is FIFO, overfilling it can // cause the entire cache to miss. See T12296. if (count($identifiers) > $max_size) { $identifiers = array_slice($identifiers, 0, $max_size); } // When filling the cache we ignore commits which have been marked as // unreachable, treating them as though they do not exist. When recording // commits later we'll revive commits that exist but are unreachable. $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere( 'repositoryID = %d AND commitIdentifier IN (%Ls) AND (importStatus & %d) != %d', $this->getRepository()->getID(), $identifiers, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); foreach ($commits as $commit) { $this->commitCache[$commit->getCommitIdentifier()] = true; } while (count($this->commitCache) > $max_size) { array_shift($this->commitCache); } } /** * Sort refs so we process permanent refs first. This makes the whole import * process a little cheaper, since we can publish these commits the first * time through rather than catching them in the refs step. * * @task internal * * @param list List of refs. * @return list Sorted list of refs. */ private function sortRefs(array $refs) { $repository = $this->getRepository(); $publisher = $repository->newPublisher(); $head_refs = array(); $tail_refs = array(); foreach ($refs as $ref) { if ($publisher->isPermanentRef($ref)) { $head_refs[] = $ref; } else { $tail_refs[] = $ref; } } return array_merge($head_refs, $tail_refs); } private function recordCommit( PhabricatorRepository $repository, $commit_identifier, $epoch, $is_permanent, array $parents, $task_priority) { $commit = new PhabricatorRepositoryCommit(); $conn_w = $repository->establishConnection('w'); // First, try to revive an existing unreachable commit (if one exists) by // removing the "unreachable" flag. If we succeed, we don't need to do // anything else: we already discovered this commit some time ago. queryfx( $conn_w, 'UPDATE %T SET importStatus = (importStatus & ~%d) WHERE repositoryID = %d AND commitIdentifier = %s', $commit->getTableName(), PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, $repository->getID(), $commit_identifier); if ($conn_w->getAffectedRows()) { $commit = $commit->loadOneWhere( 'repositoryID = %d AND commitIdentifier = %s', $repository->getID(), $commit_identifier); // After reviving a commit, schedule new daemons for it. $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); return; } $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($commit_identifier); $commit->setEpoch($epoch); if ($is_permanent) { $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_PERMANENT); } $data = new PhabricatorRepositoryCommitData(); try { // If this commit has parents, look up their IDs. The parent commits // should always exist already. $parent_ids = array(); if ($parents) { $parent_rows = queryfx_all( $conn_w, 'SELECT id, commitIdentifier FROM %T WHERE commitIdentifier IN (%Ls) AND repositoryID = %d', $commit->getTableName(), $parents, $repository->getID()); $parent_map = ipull($parent_rows, 'id', 'commitIdentifier'); foreach ($parents as $parent) { if (empty($parent_map[$parent])) { throw new Exception( pht('Unable to identify parent "%s"!', $parent)); } $parent_ids[] = $parent_map[$parent]; } } else { // Write an explicit 0 so we can distinguish between "really no // parents" and "data not available". if (!$repository->isSVN()) { $parent_ids = array(0); } } $commit->openTransaction(); $commit->save(); $data->setCommitID($commit->getID()); $data->save(); foreach ($parent_ids as $parent_id) { queryfx( $conn_w, 'INSERT IGNORE INTO %T (childCommitID, parentCommitID) VALUES (%d, %d)', PhabricatorRepository::TABLE_PARENTS, $commit->getID(), $parent_id); } $commit->saveTransaction(); $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); if ($this->repairMode) { // Normally, the query should throw a duplicate key exception. If we // reach this in repair mode, we've actually performed a repair. $this->log(pht('Repaired commit "%s".', $commit_identifier)); } PhutilEventEngine::dispatchEvent( new PhabricatorEvent( PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, array( 'repository' => $repository, 'commit' => $commit, ))); } catch (AphrontDuplicateKeyQueryException $ex) { $commit->killTransaction(); // Ignore. This can happen because we discover the same new commit // more than once when looking at history, or because of races or // data inconsistency or cosmic radiation; in any case, we're still // in a good state if we ignore the failure. } } private function didDiscoverCommit( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, $epoch, $task_priority) { - $this->insertTask($repository, $commit, $task_priority); + $this->queueCommitImportTask( + $repository, + $commit->getID(), + $commit->getPHID(), + $task_priority, + $via = 'discovery'); // Update the repository summary table. queryfx( $commit->establishConnection('w'), 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, 1, %d, %d) ON DUPLICATE KEY UPDATE size = size + 1, lastCommitID = IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $commit->getID(), $epoch); } private function didDiscoverRefs(array $refs) { foreach ($refs as $ref) { $this->workingSet[$ref->getIdentifier()] = true; } } - private function insertTask( - PhabricatorRepository $repository, - PhabricatorRepositoryCommit $commit, - $task_priority, - $data = array()) { - - $vcs = $repository->getVersionControlSystem(); - switch ($vcs) { - case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: - $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; - break; - case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: - $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; - break; - case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: - $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; - break; - default: - throw new Exception(pht("Unknown repository type '%s'!", $vcs)); - } - - $data['commitID'] = $commit->getID(); - - $options = array( - 'priority' => $task_priority, - ); - - PhabricatorWorker::scheduleTask($class, $data, $options); - } - private function isInitialImport(array $refs) { $commit_count = count($refs); if ($commit_count <= PhabricatorRepository::IMPORT_THRESHOLD) { // If we fetched a small number of commits, assume it's an initial // commit or a stack of a few initial commits. return false; } $viewer = $this->getViewer(); $repository = $this->getRepository(); $any_commits = id(new DiffusionCommitQuery()) ->setViewer($viewer) ->withRepository($repository) ->setLimit(1) ->execute(); if ($any_commits) { // If the repository already has commits, this isn't an import. return false; } return true; } private function getObservedVersion(PhabricatorRepository $repository) { if ($repository->isHosted()) { return null; } if ($repository->isGit()) { return $this->getGitObservedVersion($repository); } return null; } private function getGitObservedVersion(PhabricatorRepository $repository) { $refs = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); if (!$refs) { return null; } // In Git, the observed version is the most recently discovered commit // at any repository HEAD. It's possible for this to regress temporarily // if a branch is pushed and then deleted. This is acceptable because it // doesn't do anything meaningfully bad and will fix itself on the next // push. $ref_identifiers = mpull($refs, 'getCommitIdentifier'); $ref_identifiers = array_fuse($ref_identifiers); $version = queryfx_one( $repository->establishConnection('w'), 'SELECT MAX(id) version FROM %T WHERE repositoryID = %d AND commitIdentifier IN (%Ls)', id(new PhabricatorRepositoryCommit())->getTableName(), $repository->getID(), $ref_identifiers); if (!$version) { return null; } return (int)$version['version']; } private function markUnreachableCommits(PhabricatorRepository $repository) { // For now, this is only supported for Git. if (!$repository->isGit()) { return; } // Find older versions of refs which we haven't processed yet. We're going // to make sure their commits are still reachable. $old_refs = id(new PhabricatorRepositoryOldRef())->loadAllWhere( 'repositoryPHID = %s', $repository->getPHID()); // If we don't have any refs to update, bail out before building a graph // stream. In particular, this improves behavior in empty repositories, // where `git log` exits with an error. if (!$old_refs) { return; } // We can share a single graph stream across all the checks we need to do. $stream = new PhabricatorGitGraphStream($repository); foreach ($old_refs as $old_ref) { $identifier = $old_ref->getCommitIdentifier(); $this->markUnreachableFrom($repository, $stream, $identifier); // If nothing threw an exception, we're all done with this ref. $old_ref->delete(); } } private function markUnreachableFrom( PhabricatorRepository $repository, PhabricatorGitGraphStream $stream, $identifier) { $unreachable = array(); $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 'repositoryID = %s AND commitIdentifier = %s', $repository->getID(), $identifier); if (!$commit) { return; } $look = array($commit); $seen = array(); while ($look) { $target = array_pop($look); // If we've already checked this commit (for example, because history // branches and then merges) we don't need to check it again. $target_identifier = $target->getCommitIdentifier(); if (isset($seen[$target_identifier])) { continue; } $seen[$target_identifier] = true; // See PHI1688. If this commit is already marked as unreachable, we don't // need to consider its ancestors. This may skip a lot of work if many // branches with a lot of shared ancestry are deleted at the same time. if ($target->isUnreachable()) { continue; } try { $stream->getCommitDate($target_identifier); $reachable = true; } catch (Exception $ex) { $reachable = false; } if ($reachable) { // This commit is reachable, so we don't need to go any further // down this road. continue; } $unreachable[] = $target; // Find the commit's parents and check them for reachability, too. We // have to look in the database since we no may longer have the commit // in the repository. $rows = queryfx_all( $commit->establishConnection('w'), 'SELECT commit.* FROM %T commit JOIN %T parents ON commit.id = parents.parentCommitID WHERE parents.childCommitID = %d', $commit->getTableName(), PhabricatorRepository::TABLE_PARENTS, $target->getID()); if (!$rows) { continue; } $parents = id(new PhabricatorRepositoryCommit()) ->loadAllFromArray($rows); foreach ($parents as $parent) { $look[] = $parent; } } $unreachable = array_reverse($unreachable); $flag = PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE; foreach ($unreachable as $unreachable_commit) { $unreachable_commit->writeImportStatusFlag($flag); } // If anything was unreachable, just rebuild the whole summary table. // We can't really update it incrementally when a commit becomes // unreachable. if ($unreachable) { $this->rebuildSummaryTable($repository); } } private function rebuildSummaryTable(PhabricatorRepository $repository) { $conn_w = $repository->establishConnection('w'); $data = queryfx_one( $conn_w, 'SELECT COUNT(*) N, MAX(id) id, MAX(epoch) epoch FROM %T WHERE repositoryID = %d AND (importStatus & %d) != %d', id(new PhabricatorRepositoryCommit())->getTableName(), $repository->getID(), PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); queryfx( $conn_w, 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, %d, %d, %d) ON DUPLICATE KEY UPDATE size = VALUES(size), lastCommitID = VALUES(lastCommitID), epoch = VALUES(epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $data['N'], $data['id'], $data['epoch']); } - private function getImportTaskPriority( - PhabricatorRepository $repository, - array $refs) { - - // If the repository is importing for the first time, we schedule tasks - // at IMPORT priority, which is very low. Making progress on importing a - // new repository for the first time is less important than any other - // daemon task. - - // If the repository has finished importing and we're just catching up - // on recent commits, we usually schedule discovery at COMMIT priority, - // which is slightly below the default priority. - - // Note that followup tasks and triggered tasks (like those generated by - // Herald or Harbormaster) will queue at DEFAULT priority, so that each - // commit tends to fully import before we start the next one. This tends - // to give imports fairly predictable progress. See T11677 for some - // discussion. - - if ($repository->isImporting()) { - $this->log( - pht( - 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. - 'because this repository is still importing.', - phutil_count($refs))); - - return PhabricatorWorker::PRIORITY_IMPORT; - } - - // See T13369. If we've discovered a lot of commits at once, import them - // at lower priority. - - // This is mostly aimed at reducing the impact that synchronizing thousands - // of commits from a remote upstream has on other repositories. The queue - // is "mostly FIFO", so queueing a thousand commit imports can stall other - // repositories. - - // In a perfect world we'd probably give repositories round-robin queue - // priority, but we don't currently have the primitives for this and there - // isn't a strong case for building them. - - // Use "a whole lot of commits showed up at once" as a heuristic for - // detecting "someone synchronized an upstream", and import them at a lower - // priority to more closely approximate fair scheduling. - - if (count($refs) >= PhabricatorRepository::LOWPRI_THRESHOLD) { - $this->log( - pht( - 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. - 'because many commits were discovered at once.', - phutil_count($refs))); - - return PhabricatorWorker::PRIORITY_IMPORT; - } - - // Otherwise, import at normal priority. - - if ($refs) { - $this->log( - pht( - 'Importing %s commit(s) at normal priority ("PRIORITY_COMMIT").', - phutil_count($refs))); - } - - return PhabricatorWorker::PRIORITY_COMMIT; - } - } diff --git a/src/applications/repository/engine/PhabricatorRepositoryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryEngine.php index 12cd8f1ed6..c1ad203f0b 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryEngine.php @@ -1,86 +1,196 @@ repository = $repository; return $this; } /** * @task config */ protected function getRepository() { if ($this->repository === null) { throw new PhutilInvalidStateException('setRepository'); } return $this->repository; } /** * @task config */ public function setVerbose($verbose) { $this->verbose = $verbose; return $this; } /** * @task config */ public function getVerbose() { return $this->verbose; } public function getViewer() { return PhabricatorUser::getOmnipotentUser(); } protected function newRepositoryLock( PhabricatorRepository $repository, $lock_key, $lock_device_only) { $lock_parts = array( 'repositoryPHID' => $repository->getPHID(), ); if ($lock_device_only) { $device = AlmanacKeys::getLiveDevice(); if ($device) { $lock_parts['devicePHID'] = $device->getPHID(); } } return PhabricatorGlobalLock::newLock($lock_key, $lock_parts); } /** * @task internal */ protected function log($pattern /* ... */) { if ($this->getVerbose()) { $console = PhutilConsole::getConsole(); $argv = func_get_args(); array_unshift($argv, "%s\n"); call_user_func_array(array($console, 'writeOut'), $argv); } return $this; } + final protected function queueCommitImportTask( + PhabricatorRepository $repository, + $commit_id, + $commit_phid, + $task_priority, + $via = null) { + + $vcs = $repository->getVersionControlSystem(); + switch ($vcs) { + case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: + $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; + break; + case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: + $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; + break; + case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: + $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; + break; + default: + throw new Exception( + pht( + 'Unknown repository type "%s"!', + $vcs)); + } + + $data = array( + 'commitID' => $commit_id, + ); + + if ($via !== null) { + $data['via'] = $via; + } + + $options = array( + 'priority' => $task_priority, + 'objectPHID' => $commit_phid, + ); + + PhabricatorWorker::scheduleTask($class, $data, $options); + } + + final protected function getImportTaskPriority( + PhabricatorRepository $repository, + array $refs) { + assert_instances_of($refs, 'PhabricatorRepositoryCommitRef'); + + // If the repository is importing for the first time, we schedule tasks + // at IMPORT priority, which is very low. Making progress on importing a + // new repository for the first time is less important than any other + // daemon task. + + // If the repository has finished importing and we're just catching up + // on recent commits, we usually schedule discovery at COMMIT priority, + // which is slightly below the default priority. + + // Note that followup tasks and triggered tasks (like those generated by + // Herald or Harbormaster) will queue at DEFAULT priority, so that each + // commit tends to fully import before we start the next one. This tends + // to give imports fairly predictable progress. See T11677 for some + // discussion. + + if ($repository->isImporting()) { + $this->log( + pht( + 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. + 'because this repository is still importing.', + phutil_count($refs))); + + return PhabricatorWorker::PRIORITY_IMPORT; + } + + // See T13369. If we've discovered a lot of commits at once, import them + // at lower priority. + + // This is mostly aimed at reducing the impact that synchronizing thousands + // of commits from a remote upstream has on other repositories. The queue + // is "mostly FIFO", so queueing a thousand commit imports can stall other + // repositories. + + // In a perfect world we'd probably give repositories round-robin queue + // priority, but we don't currently have the primitives for this and there + // isn't a strong case for building them. + + // Use "a whole lot of commits showed up at once" as a heuristic for + // detecting "someone synchronized an upstream", and import them at a lower + // priority to more closely approximate fair scheduling. + + if (count($refs) >= PhabricatorRepository::LOWPRI_THRESHOLD) { + $this->log( + pht( + 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. + 'because many commits were discovered at once.', + phutil_count($refs))); + + return PhabricatorWorker::PRIORITY_IMPORT; + } + + // Otherwise, import at normal priority. + + if ($refs) { + $this->log( + pht( + 'Importing %s commit(s) at normal priority ("PRIORITY_COMMIT").', + phutil_count($refs))); + } + + return PhabricatorWorker::PRIORITY_COMMIT; + } + + } diff --git a/src/applications/repository/engine/PhabricatorRepositoryRefEngine.php b/src/applications/repository/engine/PhabricatorRepositoryRefEngine.php index 390e6e569d..679cbcdf67 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryRefEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryRefEngine.php @@ -1,717 +1,728 @@ rebuild = $rebuild; return $this; } public function getRebuild() { return $this->rebuild; } public function updateRefs() { $this->newPositions = array(); $this->deadPositions = array(); $this->permanentCommits = array(); $repository = $this->getRepository(); $viewer = $this->getViewer(); $branches_may_close = false; $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: // No meaningful refs of any type in Subversion. $maps = array(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $branches = $this->loadMercurialBranchPositions($repository); $bookmarks = $this->loadMercurialBookmarkPositions($repository); $maps = array( PhabricatorRepositoryRefCursor::TYPE_BRANCH => $branches, PhabricatorRepositoryRefCursor::TYPE_BOOKMARK => $bookmarks, ); $branches_may_close = true; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $maps = $this->loadGitRefPositions($repository); break; default: throw new Exception(pht('Unknown VCS "%s"!', $vcs)); } // Fill in any missing types with empty lists. $maps = $maps + array( PhabricatorRepositoryRefCursor::TYPE_BRANCH => array(), PhabricatorRepositoryRefCursor::TYPE_TAG => array(), PhabricatorRepositoryRefCursor::TYPE_BOOKMARK => array(), PhabricatorRepositoryRefCursor::TYPE_REF => array(), ); $all_cursors = id(new PhabricatorRepositoryRefCursorQuery()) ->setViewer($viewer) ->withRepositoryPHIDs(array($repository->getPHID())) ->needPositions(true) ->execute(); $cursor_groups = mgroup($all_cursors, 'getRefType'); // Find all the heads of permanent refs. $all_closing_heads = array(); foreach ($all_cursors as $cursor) { // See T13284. Note that we're considering whether this ref was a // permanent ref or not the last time we updated refs for this // repository. This allows us to handle things properly when a ref // is reconfigured from non-permanent to permanent. $was_permanent = $cursor->getIsPermanent(); if (!$was_permanent) { continue; } foreach ($cursor->getPositionIdentifiers() as $identifier) { $all_closing_heads[] = $identifier; } } $all_closing_heads = array_unique($all_closing_heads); $all_closing_heads = $this->removeMissingCommits($all_closing_heads); foreach ($maps as $type => $refs) { $cursor_group = idx($cursor_groups, $type, array()); $this->updateCursors($cursor_group, $refs, $type, $all_closing_heads); } if ($this->permanentCommits) { $this->setPermanentFlagOnCommits($this->permanentCommits); } $save_cursors = $this->getCursorsForUpdate($repository, $all_cursors); if ($this->newPositions || $this->deadPositions || $save_cursors) { $repository->openTransaction(); $this->saveNewPositions(); $this->deleteDeadPositions(); foreach ($save_cursors as $cursor) { $cursor->save(); } $repository->saveTransaction(); } $branches = $maps[PhabricatorRepositoryRefCursor::TYPE_BRANCH]; if ($branches && $branches_may_close) { $this->updateBranchStates($repository, $branches); } } private function getCursorsForUpdate( PhabricatorRepository $repository, array $cursors) { assert_instances_of($cursors, 'PhabricatorRepositoryRefCursor'); $publisher = $repository->newPublisher(); $results = array(); foreach ($cursors as $cursor) { $diffusion_ref = $cursor->newDiffusionRepositoryRef(); $is_permanent = $publisher->isPermanentRef($diffusion_ref); if ($is_permanent == $cursor->getIsPermanent()) { continue; } $cursor->setIsPermanent((int)$is_permanent); $results[] = $cursor; } return $results; } private function updateBranchStates( PhabricatorRepository $repository, array $branches) { assert_instances_of($branches, 'DiffusionRepositoryRef'); $viewer = $this->getViewer(); $all_cursors = id(new PhabricatorRepositoryRefCursorQuery()) ->setViewer($viewer) ->withRepositoryPHIDs(array($repository->getPHID())) ->needPositions(true) ->execute(); $state_map = array(); $type_branch = PhabricatorRepositoryRefCursor::TYPE_BRANCH; foreach ($all_cursors as $cursor) { if ($cursor->getRefType() !== $type_branch) { continue; } $raw_name = $cursor->getRefNameRaw(); foreach ($cursor->getPositions() as $position) { $hash = $position->getCommitIdentifier(); $state_map[$raw_name][$hash] = $position; } } $updates = array(); foreach ($branches as $branch) { $position = idx($state_map, $branch->getShortName(), array()); $position = idx($position, $branch->getCommitIdentifier()); if (!$position) { continue; } $fields = $branch->getRawFields(); $position_state = (bool)$position->getIsClosed(); $branch_state = (bool)idx($fields, 'closed'); if ($position_state != $branch_state) { $updates[$position->getID()] = (int)$branch_state; } } if ($updates) { $position_table = id(new PhabricatorRepositoryRefPosition()); $conn = $position_table->establishConnection('w'); $position_table->openTransaction(); foreach ($updates as $position_id => $branch_state) { queryfx( $conn, 'UPDATE %T SET isClosed = %d WHERE id = %d', $position_table->getTableName(), $branch_state, $position_id); } $position_table->saveTransaction(); } } private function markPositionNew( PhabricatorRepositoryRefPosition $position) { $this->newPositions[] = $position; return $this; } private function markPositionDead( PhabricatorRepositoryRefPosition $position) { $this->deadPositions[] = $position; return $this; } private function markPermanentCommits(array $identifiers) { foreach ($identifiers as $identifier) { $this->permanentCommits[$identifier] = $identifier; } return $this; } /** * Remove commits which no longer exist in the repository from a list. * * After a force push and garbage collection, we may have branch cursors which * point at commits which no longer exist. This can make commands issued later * fail. See T5839 for discussion. * * @param list List of commit identifiers. * @return list List with nonexistent identifiers removed. */ private function removeMissingCommits(array $identifiers) { if (!$identifiers) { return array(); } $resolved = id(new DiffusionLowLevelResolveRefsQuery()) ->setRepository($this->getRepository()) ->withRefs($identifiers) ->execute(); foreach ($identifiers as $key => $identifier) { if (empty($resolved[$identifier])) { unset($identifiers[$key]); } } return $identifiers; } private function updateCursors( array $cursors, array $new_refs, $ref_type, array $all_closing_heads) { $repository = $this->getRepository(); $publisher = $repository->newPublisher(); // NOTE: Mercurial branches may have multiple branch heads; this logic // is complex primarily to account for that. $cursors = mpull($cursors, null, 'getRefNameRaw'); // Group all the new ref values by their name. As above, these groups may // have multiple members in Mercurial. $ref_groups = mgroup($new_refs, 'getShortName'); foreach ($ref_groups as $name => $refs) { $new_commits = mpull($refs, 'getCommitIdentifier', 'getCommitIdentifier'); $ref_cursor = idx($cursors, $name); if ($ref_cursor) { $old_positions = $ref_cursor->getPositions(); } else { $old_positions = array(); } // We're going to delete all the cursors pointing at commits which are // no longer associated with the refs. This primarily makes the Mercurial // multiple head case easier, and means that when we update a ref we // delete the old one and write a new one. foreach ($old_positions as $old_position) { $hash = $old_position->getCommitIdentifier(); if (isset($new_commits[$hash])) { // This ref previously pointed at this commit, and still does. $this->log( pht( 'Ref %s "%s" still points at %s.', $ref_type, $name, $hash)); continue; } // This ref previously pointed at this commit, but no longer does. $this->log( pht( 'Ref %s "%s" no longer points at %s.', $ref_type, $name, $hash)); // Nuke the obsolete cursor. $this->markPositionDead($old_position); } // Now, we're going to insert new cursors for all the commits which are // associated with this ref that don't currently have cursors. $old_commits = mpull($old_positions, 'getCommitIdentifier'); $old_commits = array_fuse($old_commits); $added_commits = array_diff_key($new_commits, $old_commits); foreach ($added_commits as $identifier) { $this->log( pht( 'Ref %s "%s" now points at %s.', $ref_type, $name, $identifier)); if (!$ref_cursor) { // If this is the first time we've seen a particular ref (for // example, a new branch) we need to insert a RefCursor record // for it before we can insert a RefPosition. $ref_cursor = $this->newRefCursor( $repository, $ref_type, $name); } $new_position = id(new PhabricatorRepositoryRefPosition()) ->setCursorID($ref_cursor->getID()) ->setCommitIdentifier($identifier) ->setIsClosed(0); $this->markPositionNew($new_position); } $diffusion_ref = head($refs)->newDiffusionRepositoryRef(); if ($publisher->isPermanentRef($diffusion_ref)) { // See T13284. If this cursor was already marked as permanent, we // only need to publish the newly created ref positions. However, if // this cursor was not previously permanent but has become permanent, // we need to publish all the ref positions. // This corresponds to users reconfiguring a branch to make it // permanent without pushing any new commits to it. $is_rebuild = $this->getRebuild(); $was_permanent = $ref_cursor->getIsPermanent(); if ($is_rebuild || !$was_permanent) { $update_all = true; } else { $update_all = false; } if ($update_all) { $update_commits = $new_commits; } else { $update_commits = $added_commits; } if ($is_rebuild) { $exclude = array(); } else { $exclude = $all_closing_heads; } foreach ($update_commits as $identifier) { $new_identifiers = $this->loadNewCommitIdentifiers( $identifier, $exclude); $this->markPermanentCommits($new_identifiers); } } } // Find any cursors for refs which no longer exist. This happens when a // branch, tag or bookmark is deleted. foreach ($cursors as $name => $cursor) { if (!empty($ref_groups[$name])) { // This ref still has some positions, so we don't need to wipe it // out. Try the next one. continue; } foreach ($cursor->getPositions() as $position) { $this->log( pht( 'Ref %s "%s" no longer exists.', $cursor->getRefType(), $cursor->getRefName())); $this->markPositionDead($position); } } } /** * Find all ancestors of a new closing branch head which are not ancestors * of any old closing branch head. */ private function loadNewCommitIdentifiers( $new_head, array $all_closing_heads) { $repository = $this->getRepository(); $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: if ($all_closing_heads) { $parts = array(); foreach ($all_closing_heads as $head) { $parts[] = hgsprintf('%s', $head); } // See T5896. Mercurial can not parse an "X or Y or ..." rev list // with more than about 300 items, because it exceeds the maximum // allowed recursion depth. Split all the heads into chunks of // 256, and build a query like this: // // ((1 or 2 or ... or 255) or (256 or 257 or ... 511)) // // If we have more than 65535 heads, we'll do that again: // // (((1 or ...) or ...) or ((65536 or ...) or ...)) $chunk_size = 256; while (count($parts) > $chunk_size) { $chunks = array_chunk($parts, $chunk_size); foreach ($chunks as $key => $chunk) { $chunks[$key] = '('.implode(' or ', $chunk).')'; } $parts = array_values($chunks); } $parts = '('.implode(' or ', $parts).')'; list($stdout) = $this->getRepository()->execxLocalCommand( 'log --template %s --rev %s', '{node}\n', hgsprintf('%s', $new_head).' - '.$parts); } else { list($stdout) = $this->getRepository()->execxLocalCommand( 'log --template %s --rev %s', '{node}\n', hgsprintf('%s', $new_head)); } $stdout = trim($stdout); if (!strlen($stdout)) { return array(); } return phutil_split_lines($stdout, $retain_newlines = false); case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: if ($all_closing_heads) { // See PHI1474. This length of list may exceed the maximum size of // a command line argument list, so pipe the list in using "--stdin" // instead. $ref_list = array(); $ref_list[] = $new_head; foreach ($all_closing_heads as $old_head) { $ref_list[] = '^'.$old_head; } $ref_list[] = '--'; $ref_list = implode("\n", $ref_list)."\n"; $future = $this->getRepository()->getLocalCommandFuture( 'log %s --stdin --', '--format=%H'); list($stdout) = $future ->write($ref_list) ->resolvex(); } else { list($stdout) = $this->getRepository()->execxLocalCommand( 'log %s %s --', '--format=%H', gitsprintf('%s', $new_head)); } $stdout = trim($stdout); if (!strlen($stdout)) { return array(); } return phutil_split_lines($stdout, $retain_newlines = false); default: throw new Exception(pht('Unsupported VCS "%s"!', $vcs)); } } /** * Mark a list of commits as permanent, and queue workers for those commits * which don't already have the flag. */ private function setPermanentFlagOnCommits(array $identifiers) { $repository = $this->getRepository(); $commit_table = new PhabricatorRepositoryCommit(); $conn = $commit_table->establishConnection('w'); $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; break; default: throw new Exception(pht("Unknown repository type '%s'!", $vcs)); } $identifier_tokens = array(); foreach ($identifiers as $identifier) { $identifier_tokens[] = qsprintf( $conn, '%s', $identifier); } $all_commits = array(); foreach (PhabricatorLiskDAO::chunkSQL($identifier_tokens) as $chunk) { $rows = queryfx_all( $conn, 'SELECT id, phid, commitIdentifier, importStatus FROM %T WHERE repositoryID = %d AND commitIdentifier IN (%LQ)', $commit_table->getTableName(), $repository->getID(), $chunk); foreach ($rows as $row) { $all_commits[] = $row; } } + $commit_refs = array(); + foreach ($identifiers as $identifier) { + + // See T13591. This construction is a bit ad-hoc, but the priority + // function currently only cares about the number of refs we have + // discovered, so we'll get the right result even without filling + // these records out in detail. + + $commit_refs[] = id(new PhabricatorRepositoryCommitRef()) + ->setIdentifier($identifier); + } + + $task_priority = $this->getImportTaskPriority( + $repository, + $commit_refs); + $permanent_flag = PhabricatorRepositoryCommit::IMPORTED_PERMANENT; $published_flag = PhabricatorRepositoryCommit::IMPORTED_PUBLISH; $all_commits = ipull($all_commits, null, 'commitIdentifier'); foreach ($identifiers as $identifier) { $row = idx($all_commits, $identifier); if (!$row) { throw new Exception( pht( 'Commit "%s" has not been discovered yet! Run discovery before '. 'updating refs.', $identifier)); } $import_status = $row['importStatus']; if (!($import_status & $permanent_flag)) { // Set the "permanent" flag. $import_status = ($import_status | $permanent_flag); // See T13580. Clear the "published" flag, so publishing executes // again. We may have previously performed a no-op "publish" on the // commit to make sure it has all bits in the "IMPORTED_ALL" bitmask. $import_status = ($import_status & ~$published_flag); queryfx( $conn, 'UPDATE %T SET importStatus = %d WHERE id = %d', $commit_table->getTableName(), $import_status, $row['id']); - $data = array( - 'commitID' => $row['id'], - ); - - PhabricatorWorker::scheduleTask( - $class, - $data, - array( - 'priority' => PhabricatorWorker::PRIORITY_COMMIT, - 'objectPHID' => $row['phid'], - )); + $this->queueCommitImportTask( + $repository, + $row['id'], + $row['phid'], + $task_priority, + $via = 'ref'); } } return $this; } private function newRefCursor( PhabricatorRepository $repository, $ref_type, $ref_name) { $is_permanent = $this->isPermanentRef($ref_type, $ref_name); $cursor = id(new PhabricatorRepositoryRefCursor()) ->setRepositoryPHID($repository->getPHID()) ->setRefType($ref_type) ->setRefName($ref_name) ->setIsPermanent((int)$is_permanent); try { return $cursor->save(); } catch (AphrontDuplicateKeyQueryException $ex) { // If we raced another daemon to create this position and lost the race, // load the cursor the other daemon created instead. } $viewer = $this->getViewer(); $cursor = id(new PhabricatorRepositoryRefCursorQuery()) ->setViewer($viewer) ->withRepositoryPHIDs(array($repository->getPHID())) ->withRefTypes(array($ref_type)) ->withRefNames(array($ref_name)) ->needPositions(true) ->executeOne(); if (!$cursor) { throw new Exception( pht( 'Failed to create a new ref cursor (for "%s", of type "%s", in '. 'repository "%s") because it collided with an existing cursor, '. 'but then failed to load that cursor.', $ref_name, $ref_type, $repository->getDisplayName())); } return $cursor; } private function saveNewPositions() { $positions = $this->newPositions; foreach ($positions as $position) { try { $position->save(); } catch (AphrontDuplicateKeyQueryException $ex) { // We may race another daemon to create this position. If we do, and // we lose the race, that's fine: the other daemon did our work for // us and we can continue. } } $this->newPositions = array(); } private function deleteDeadPositions() { $positions = $this->deadPositions; $repository = $this->getRepository(); foreach ($positions as $position) { // Shove this ref into the old refs table so the discovery engine // can check if any commits have been rendered unreachable. id(new PhabricatorRepositoryOldRef()) ->setRepositoryPHID($repository->getPHID()) ->setCommitIdentifier($position->getCommitIdentifier()) ->save(); $position->delete(); } $this->deadPositions = array(); } /* -( Updating Git Refs )-------------------------------------------------- */ /** * @task git */ private function loadGitRefPositions(PhabricatorRepository $repository) { $refs = id(new DiffusionLowLevelGitRefQuery()) ->setRepository($repository) ->execute(); return mgroup($refs, 'getRefType'); } /* -( Updating Mercurial Refs )-------------------------------------------- */ /** * @task hg */ private function loadMercurialBranchPositions( PhabricatorRepository $repository) { return id(new DiffusionLowLevelMercurialBranchesQuery()) ->setRepository($repository) ->execute(); } /** * @task hg */ private function loadMercurialBookmarkPositions( PhabricatorRepository $repository) { // TODO: Implement support for Mercurial bookmarks. return array(); } }