diff --git a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php index 30b436cfff..1135420a6e 100644 --- a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php +++ b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php @@ -1,156 +1,168 @@ '; } protected function defineCustomParamTypes() { return array( 'paths' => 'required map', ); } protected function getGitResult(ConduitAPIRequest $request) { $drequest = $this->getDiffusionRequest(); $repository = $drequest->getRepository(); $paths = $request->getValue('paths'); $results = $this->loadCommitsFromCache($paths); foreach ($paths as $path => $commit) { if (array_key_exists($path, $results)) { continue; } list($hash) = $repository->execxLocalCommand( 'log -n1 --format=%%H %s -- %s', $commit, $path); $results[$path] = trim($hash); } return $results; } protected function getSVNResult(ConduitAPIRequest $request) { $drequest = $this->getDiffusionRequest(); $repository = $drequest->getRepository(); $results = array(); foreach ($request->getValue('paths') as $path => $commit) { $history_result = DiffusionQuery::callConduitWithDiffusionRequest( $request->getUser(), $drequest, 'diffusion.historyquery', array( 'commit' => $commit, 'path' => $path, 'limit' => 1, 'offset' => 0, 'needDirectChanges' => true, 'needChildChanges' => true, )); $history_array = DiffusionPathChange::newFromConduit( $history_result['pathChanges']); if ($history_array) { $results[$path] = head($history_array) ->getCommit() ->getCommitIdentifier(); } } return $results; } protected function getMercurialResult(ConduitAPIRequest $request) { $drequest = $this->getDiffusionRequest(); $repository = $drequest->getRepository(); $paths = $request->getValue('paths'); $results = $this->loadCommitsFromCache($paths); foreach ($paths as $path => $commit) { if (array_key_exists($path, $results)) { continue; } list($hash) = $repository->execxLocalCommand( 'log --template %s --limit 1 --removed --rev %s -- %s', '{node}', hgsprintf('reverse(ancestors(%s))', $commit), nonempty(ltrim($path, '/'), '.')); $results[$path] = trim($hash); } return $results; } private function loadCommitsFromCache(array $map) { $drequest = $this->getDiffusionRequest(); $repository = $drequest->getRepository(); $path_map = id(new DiffusionPathIDQuery(array_keys($map))) ->loadPathIDs(); $commit_query = id(new DiffusionCommitQuery()) ->setViewer($drequest->getUser()) ->withRepository($repository) ->withIdentifiers(array_values($map)); $commit_query->execute(); $commit_map = $commit_query->getIdentifierMap(); $commit_map = mpull($commit_map, 'getID'); $graph_cache = new PhabricatorRepositoryGraphCache(); $results = array(); + + // Spend no more than this many total seconds trying to satisfy queries + // via the graph cache. + $remaining_time = 10.0; foreach ($map as $path => $commit) { $path_id = idx($path_map, $path); if (!$path_id) { continue; } $commit_id = idx($commit_map, $commit); if (!$commit_id) { continue; } + $t_start = microtime(true); $cache_result = $graph_cache->loadLastModifiedCommitID( $commit_id, - $path_id); + $path_id, + $remaining_time); + $t_end = microtime(true); if ($cache_result !== false) { $results[$path] = $cache_result; } + + $remaining_time -= ($t_end - $t_start); + if ($remaining_time <= 0) { + break; + } } if ($results) { $commits = id(new DiffusionCommitQuery()) ->setViewer($drequest->getUser()) ->withRepository($repository) ->withIDs($results) ->execute(); foreach ($results as $path => $id) { $commit = idx($commits, $id); if ($commit) { $results[$path] = $commit->getCommitIdentifier(); } else { unset($results[$path]); } } } return $results; } } diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php index d52182fcc9..c4adc61ab0 100644 --- a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php +++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php @@ -1,409 +1,418 @@ -- * * ...routinely takes several hundred milliseconds, and equivalent requests * often take longer in Mercurial. * * Unfortunately, this operation is fundamental to rendering a repository for * the web, and essentially everything else that's slow can be reduced to this * plus some trivial work afterward. Making this fast is desirable and powerful, * and allows us to make other things fast by expressing them in terms of this * query. * * Because the query is fundamentally a graph query, it isn't easy to express * in a reasonable way in MySQL, and we can't do round trips to the server to * walk the graph without incurring huge performance penalties. * * However, the total amount of data in the graph is relatively small. By * caching it in chunks and keeping it in APC, we can reasonably load and walk * the graph in PHP quickly. * * For more context, see T2683. * * Structure of the Cache * ====================== * * The cache divides commits into buckets (see @{method:getBucketSize}). To * walk the graph, we pull a commit's bucket. The bucket is a map from commit * IDs to a list of parents and changed paths, separated by `null`. For * example, a bucket might look like this: * * array( * 1 => array(0, null, 17, 18), * 2 => array(1, null, 4), * // ... * ) * * This means that commit ID 1 has parent commit 0 (a special value meaning * no parents) and affected path IDs 17 and 18. Commit ID 2 has parent commit 1, * and affected path 4. * * This data structure attempts to balance compactness, ease of construction, * simplicity of cache semantics, and lookup performance. In the average case, * it appears to do a reasonable job at this. * * @task query Querying the Graph Cache * @task cache Cache Internals */ final class PhabricatorRepositoryGraphCache extends Phobject { private $rebuiltKeys = array(); /* -( Querying the Graph Cache )------------------------------------------- */ /** * Search the graph cache for the most modification to a path. * * @param int The commit ID to search ancestors of. * @param int The path ID to search for changes to. * @param float Maximum number of seconds to spend trying to satisfy this * query using the graph cache. By default, `0.5` (500ms). * @return mixed Commit ID, or `null` if no ancestors exist, or `false` if * the graph cache was unable to determine the answer. * @task query */ public function loadLastModifiedCommitID($commit_id, $path_id, $time = 0.5) { $commit_id = (int)$commit_id; $path_id = (int)$path_id; $bucket_data = null; $data_key = null; $seen = array(); $t_start = microtime(true); $iterations = 0; while (true) { $bucket_key = $this->getBucketKey($commit_id); if (($data_key != $bucket_key) || $bucket_data === null) { $bucket_data = $this->getBucketData($bucket_key); $data_key = $bucket_key; } if (empty($bucket_data[$commit_id])) { // Rebuild the cache bucket, since the commit might be a very recent // one that we'll pick up by rebuilding. $bucket_data = $this->getBucketData($bucket_key, $bucket_data); if (empty($bucket_data[$commit_id])) { // A rebuild didn't help. This can occur legitimately if the commit // is new and hasn't parsed yet. return false; } // Otherwise, the rebuild gave us the data, so we can keep going. + + $did_fill = true; + } else { + $did_fill = false; } // Sanity check so we can survive and recover from bad data. if (isset($seen[$commit_id])) { phlog(pht('Unexpected infinite loop in %s!', __CLASS__)); return false; } else { $seen[$commit_id] = true; } // `$data` is a list: the commit's parent IDs, followed by `null`, // followed by the modified paths in ascending order. We figure out the // first parent first, then check if the path was touched. If the path // was touched, this is the commit we're after. If not, walk backward // in the tree. $items = $bucket_data[$commit_id]; $size = count($items); // Walk past the parent information. $parent_id = null; for ($ii = 0;; ++$ii) { if ($items[$ii] === null) { break; } if ($parent_id === null) { $parent_id = $items[$ii]; } } // Look for a modification to the path. for (; $ii < $size; ++$ii) { $item = $items[$ii]; if ($item > $path_id) { break; } if ($item === $path_id) { return $commit_id; } } if ($parent_id) { $commit_id = $parent_id; // Periodically check if we've spent too long looking for a result - // in the cache, and return so we can fall back to a VCS operation. This - // keeps us from having a degenerate worst case if, e.g., the cache - // is cold and we need to inspect a very large number of blocks + // in the cache, and return so we can fall back to a VCS operation. + // This keeps us from having a degenerate worst case if, e.g., the + // cache is cold and we need to inspect a very large number of blocks // to satisfy the query. - if (((++$iterations) % 64) === 0) { + ++$iterations; + + // If we performed a cache fill in this cycle, always check the time + // limit, since cache fills may take a significant amount of time. + + if ($did_fill || ($iterations % 64 === 0)) { $t_end = microtime(true); if (($t_end - $t_start) > $time) { return false; } } continue; } // If we have an explicit 0, that means this commit really has no parents. // Usually, it is the first commit in the repository. if ($parent_id === 0) { return null; } // If we didn't find a parent, the parent data isn't available. We fail // to find an answer in the cache and fall back to querying the VCS. return false; } } /* -( Cache Internals )---------------------------------------------------- */ /** * Get the bucket key for a given commit ID. * * @param int Commit ID. * @return int Bucket key. * @task cache */ private function getBucketKey($commit_id) { return (int)floor($commit_id / $this->getBucketSize()); } /** * Get the cache key for a given bucket key (from @{method:getBucketKey}). * * @param int Bucket key. * @return string Cache key. * @task cache */ private function getBucketCacheKey($bucket_key) { static $prefix; if ($prefix === null) { $self = get_class($this); $size = $this->getBucketSize(); $prefix = "{$self}:{$size}:2:"; } return $prefix.$bucket_key; } /** * Get the number of items per bucket. * * @return int Number of items to store per bucket. * @task cache */ private function getBucketSize() { return 4096; } /** * Retrieve or build a graph cache bucket from the cache. * * Normally, this operates as a readthrough cache call. It can also be used * to force a cache update by passing the existing data to `$rebuild_data`. * * @param int Bucket key, from @{method:getBucketKey}. * @param mixed Current data, to force a cache rebuild of this bucket. * @return array Data from the cache. * @task cache */ private function getBucketData($bucket_key, $rebuild_data = null) { $cache_key = $this->getBucketCacheKey($bucket_key); // TODO: This cache stuff could be handled more gracefully, but the // database cache currently requires values to be strings and needs // some tweaking to support this as part of a stack. Our cache semantics // here are also unusual (not purely readthrough) because this cache is // appendable. $cache_level1 = PhabricatorCaches::getRepositoryGraphL1Cache(); $cache_level2 = PhabricatorCaches::getRepositoryGraphL2Cache(); if ($rebuild_data === null) { $bucket_data = $cache_level1->getKey($cache_key); if ($bucket_data) { return $bucket_data; } $bucket_data = $cache_level2->getKey($cache_key); if ($bucket_data) { $unserialized = @unserialize($bucket_data); if ($unserialized) { // Fill APC if we got a database hit but missed in APC. $cache_level1->setKey($cache_key, $unserialized); return $unserialized; } } } if (!is_array($rebuild_data)) { $rebuild_data = array(); } $bucket_data = $this->rebuildBucket($bucket_key, $rebuild_data); // Don't bother writing the data if we didn't update anything. if ($bucket_data !== $rebuild_data) { $cache_level2->setKey($cache_key, serialize($bucket_data)); $cache_level1->setKey($cache_key, $bucket_data); } return $bucket_data; } /** * Rebuild a cache bucket, amending existing data if available. * * @param int Bucket key, from @{method:getBucketKey}. * @param array Existing bucket data. * @return array Rebuilt bucket data. * @task cache */ private function rebuildBucket($bucket_key, array $current_data) { // First, check if we've already rebuilt this bucket. In some cases (like // browsing a repository at some commit) it's common to issue many lookups // against one commit. If that commit has been discovered but not yet // fully imported, we'll repeatedly attempt to rebuild the bucket. If the // first rebuild did not work, subsequent rebuilds are very unlikely to // have any effect. We can just skip the rebuild in these cases. if (isset($this->rebuiltKeys[$bucket_key])) { return $current_data; } else { $this->rebuiltKeys[$bucket_key] = true; } $bucket_min = ($bucket_key * $this->getBucketSize()); $bucket_max = ($bucket_min + $this->getBucketSize()) - 1; // We need to reload all of the commits in the bucket because there is // no guarantee that they'll get parsed in order, so we can fill large // commit IDs before small ones. Later on, we'll ignore the commits we // already know about. $table_commit = new PhabricatorRepositoryCommit(); $table_repository = new PhabricatorRepository(); $conn_r = $table_commit->establishConnection('r'); // Find all the Git and Mercurial commits in the block which have completed // change import. We can't fill the cache accurately for commits which have // not completed change import, so just pretend we don't know about them. // In these cases, we will ultimately fall back to VCS queries. $commit_rows = queryfx_all( $conn_r, 'SELECT c.id FROM %T c JOIN %T r ON c.repositoryID = r.id AND r.versionControlSystem IN (%Ls) WHERE c.id BETWEEN %d AND %d AND (c.importStatus & %d) = %d', $table_commit->getTableName(), $table_repository->getTableName(), array( PhabricatorRepositoryType::REPOSITORY_TYPE_GIT, PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL, ), $bucket_min, $bucket_max, PhabricatorRepositoryCommit::IMPORTED_CHANGE, PhabricatorRepositoryCommit::IMPORTED_CHANGE); // If we don't have any data, just return the existing data. if (!$commit_rows) { return $current_data; } // Remove the commits we already have data for. We don't need to rebuild // these. If there's nothing left, return the existing data. $commit_ids = ipull($commit_rows, 'id', 'id'); $commit_ids = array_diff_key($commit_ids, $current_data); if (!$commit_ids) { return $current_data; } // Find all the path changes for the new commits. $path_changes = queryfx_all( $conn_r, 'SELECT commitID, pathID FROM %T WHERE commitID IN (%Ld) AND (isDirect = 1 OR changeType = %d)', PhabricatorRepository::TABLE_PATHCHANGE, $commit_ids, DifferentialChangeType::TYPE_CHILD); $path_changes = igroup($path_changes, 'commitID'); // Find all the parents for the new commits. $parents = queryfx_all( $conn_r, 'SELECT childCommitID, parentCommitID FROM %T WHERE childCommitID IN (%Ld) ORDER BY id ASC', PhabricatorRepository::TABLE_PARENTS, $commit_ids); $parents = igroup($parents, 'childCommitID'); // Build the actual data for the cache. foreach ($commit_ids as $commit_id) { $parent_ids = array(); if (!empty($parents[$commit_id])) { foreach ($parents[$commit_id] as $row) { $parent_ids[] = (int)$row['parentCommitID']; } } else { // We expect all rows to have parents (commits with no parents get // an explicit "0" placeholder). If we're in an older repository, the // parent information might not have been populated yet. Decline to fill // the cache if we don't have the parent information, since the fill // will be incorrect. continue; } if (isset($path_changes[$commit_id])) { $path_ids = $path_changes[$commit_id]; foreach ($path_ids as $key => $path_id) { $path_ids[$key] = (int)$path_id['pathID']; } sort($path_ids); } else { $path_ids = array(); } $value = $parent_ids; $value[] = null; foreach ($path_ids as $path_id) { $value[] = $path_id; } $current_data[$commit_id] = $value; } return $current_data; } }