Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F17659955
D9045.id21479.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
22 KB
Referenced Files
None
Subscribers
None
D9045.id21479.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -1985,8 +1985,10 @@
'PhabricatorRepositoryEngine' => 'applications/repository/engine/PhabricatorRepositoryEngine.php',
'PhabricatorRepositoryGitCommitChangeParserWorker' => 'applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php',
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'applications/repository/worker/commitmessageparser/PhabricatorRepositoryGitCommitMessageParserWorker.php',
+ 'PhabricatorRepositoryGraphCache' => 'applications/repository/graphcache/PhabricatorRepositoryGraphCache.php',
'PhabricatorRepositoryGraphStream' => 'applications/repository/daemon/PhabricatorRepositoryGraphStream.php',
'PhabricatorRepositoryListController' => 'applications/repository/controller/PhabricatorRepositoryListController.php',
+ 'PhabricatorRepositoryManagementCacheWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php',
'PhabricatorRepositoryManagementDeleteWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementDeleteWorkflow.php',
'PhabricatorRepositoryManagementDiscoverWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementDiscoverWorkflow.php',
'PhabricatorRepositoryManagementEditWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementEditWorkflow.php',
@@ -4832,6 +4834,7 @@
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'PhabricatorRepositoryCommitMessageParserWorker',
'PhabricatorRepositoryGraphStream' => 'Phobject',
'PhabricatorRepositoryListController' => 'PhabricatorRepositoryController',
+ 'PhabricatorRepositoryManagementCacheWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementDeleteWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementDiscoverWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementEditWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
diff --git a/src/applications/cache/PhabricatorCaches.php b/src/applications/cache/PhabricatorCaches.php
--- a/src/applications/cache/PhabricatorCaches.php
+++ b/src/applications/cache/PhabricatorCaches.php
@@ -9,6 +9,53 @@
return PhabricatorEnv::getEnvConfig('phabricator.cache-namespace');
}
+ private static function newStackFromCaches(array $caches) {
+ $caches = self::addNamespaceToCaches($caches);
+ $caches = self::addProfilerToCaches($caches);
+ return id(new PhutilKeyValueCacheStack())
+ ->setCaches($caches);
+ }
+
+
+/* -( Repository Graph Cache )--------------------------------------------- */
+
+
+ public static function getRepositoryGraphL1Cache() {
+ static $cache;
+ if (!$cache) {
+ $caches = self::buildRepositoryGraphL1Caches();
+ $cache = self::newStackFromCaches($caches);
+ }
+ return $cache;
+ }
+
+ private static function buildRepositoryGraphL1Caches() {
+ $caches = array();
+
+ $apc = new PhutilKeyValueCacheAPC();
+ if ($apc->isAvailable()) {
+ $caches[] = $apc;
+ }
+
+ return $caches;
+ }
+
+ public static function getRepositoryGraphL2Cache() {
+ static $cache;
+ if (!$cache) {
+ $caches = self::buildRepositoryGraphL2Caches();
+ $cache = self::newStackFromCaches($caches);
+ }
+ return $cache;
+ }
+
+ private static function buildRepositoryGraphL2Caches() {
+ $caches = array();
+ $caches[] = new PhabricatorKeyValueDatabaseCache();
+ return $caches;
+ }
+
+
/* -( Setup Cache )-------------------------------------------------------- */
@@ -30,10 +77,7 @@
static $cache;
if (!$cache) {
$caches = self::buildSetupCaches();
- $caches = self::addNamespaceToCaches($caches);
- $caches = self::addProfilerToCaches($caches);
- $cache = id(new PhutilKeyValueCacheStack())
- ->setCaches($caches);
+ $cache = self::newStackFromCaches($caches);
}
return $cache;
}
diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
--- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
+++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
@@ -575,6 +575,12 @@
}
$parent_ids[] = $parent_map[$parent];
}
+ } else {
+ // Write an explicit 0 so we can distinguish between "really no
+ // parents" and "data not available".
+ if (!$repository->isSVN()) {
+ $parent_ids = array(0);
+ }
}
$commit->openTransaction();
diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
new file mode 100644
--- /dev/null
+++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
@@ -0,0 +1,386 @@
+<?php
+
+/**
+ * Given a commit and a path, efficiently determine the most recent ancestor
+ * commit where the path was touched.
+ *
+ * In Git and Mercurial, log operations with a path are relatively slow. For
+ * example:
+ *
+ * git log -n1 <commit> -- <path>
+ *
+ * ...routinely takes several hundred milliseconds, and equivalent requests
+ * often take longer in Mercurial.
+ *
+ * Unfortunately, this operation is fundamental to rendering a repository for
+ * the web, and essentially everything else that's slow can be reduced to this
+ * plus some trivial work afterward. Making this fast is desirable and powerful,
+ * and allows us to make other things fast by expressing them in terms of this
+ * query.
+ *
+ * Because the query is fundamentally a graph query, it isn't easy to express
+ * in a reasonable way in MySQL, and we can't do round trips to the server to
+ * walk the graph without incurring huge performance penalties.
+ *
+ * However, the total amount of data in the graph is relatively small. By
+ * caching it in chunks and keeping it in APC, we can reasonably load and walk
+ * the graph in PHP quickly.
+ *
+ * For more context, see T2683.
+ *
+ * Structure of the Cache
+ * ======================
+ *
+ * The cache divides commits into buckets (see @{method:getBucketSize}). To
+ * walk the graph, we pull a commit's bucket. The bucket is a map from commit
+ * IDs to a list of parents and changed paths, separated by `null`. For
+ * example, a bucket might look like this:
+ *
+ * array(
+ * 1 => array(0, null, 17, 18),
+ * 2 => array(1, null, 4),
+ * // ...
+ * )
+ *
+ * This means that commit ID 1 has parent commit 0 (a special value meaning
+ * no parents) and affected path IDs 17 and 18. Commit ID 2 has parent commit 1,
+ * and affected path 4.
+ *
+ * This data structure attempts to balance compactness, ease of construction,
+ * simplicity of cache semantics, and lookup performance. In the average case,
+ * it appears to do a reasonable job at this.
+ *
+ * @task query Querying the Graph Cache
+ * @task cache Cache Internals
+ */
+final class PhabricatorRepositoryGraphCache {
+
+
+/* -( Querying the Graph Cache )------------------------------------------- */
+
+
+ /**
+ * Search the graph cache for the most modification to a path.
+ *
+ * @param int The commit ID to search ancestors of.
+ * @param int The path ID to search for changes to.
+ * @param float Maximum number of seconds to spend trying to satisfy this
+ * query using the graph cache. By default, `0.5` (500ms).
+ * @return mixed Commit ID, or `null` if no ancestors exist, or `false` if
+ * the graph cache was unable to determine the answer.
+ * @task query
+ */
+ public function loadLastModifiedCommitID($commit_id, $path_id, $time = 0.5) {
+ $commit_id = (int)$commit_id;
+ $path_id = (int)$path_id;
+
+ $bucket_data = null;
+ $data_key = null;
+ $seen = array();
+
+ $t_start = microtime(true);
+ $iterations = 0;
+ while (true) {
+ $bucket_key = $this->getBucketKey($commit_id);
+
+ if (($data_key != $bucket_key) || $bucket_data === null) {
+ $bucket_data = $this->getBucketData($bucket_key);
+ $data_key = $bucket_key;
+ }
+
+ if (empty($bucket_data[$commit_id])) {
+ // Rebuild the cache bucket, since the commit might be a very recent
+ // one that we'll pick up by rebuilding.
+
+ $bucket_data = $this->getBucketData($bucket_key, $bucket_data);
+ if (empty($bucket_data[$commit_id])) {
+ // A rebuild didn't help. This can occur legitimately if the commit
+ // is new and hasn't parsed yet.
+ return false;
+ }
+
+ // Otherwise, the rebuild gave us the data, so we can keep going.
+ }
+
+ // Sanity check so we can survive and recover from bad data.
+ if (isset($seen[$commit_id])) {
+ phlog(pht('Unexpected infinite loop in RepositoryGraphCache!'));
+ return false;
+ } else {
+ $seen[$commit_id] = true;
+ }
+
+ // `$data` is a list: the commit's parent IDs, followed by `null`,
+ // followed by the modified paths in ascending order. We figure out the
+ // first parent first, then check if the path was touched. If the path
+ // was touched, this is the commit we're after. If not, walk backward
+ // in the tree.
+
+ $items = $bucket_data[$commit_id];
+ $size = count($items);
+
+ // Walk past the parent information.
+ $parent_id = null;
+ for ($ii = 0; ; ++$ii) {
+ if ($items[$ii] === null) {
+ break;
+ }
+ if ($parent_id === null) {
+ $parent_id = $items[$ii];
+ }
+ }
+
+ // Look for a modification to the path.
+ for (; $ii < $size; ++$ii) {
+ $item = $items[$ii];
+ if ($item > $path_id) {
+ break;
+ }
+ if ($item === $path_id) {
+ return $commit_id;
+ }
+ }
+
+ if ($parent_id) {
+ $commit_id = $parent_id;
+
+ // Periodically check if we've spent too long looking for a result
+ // in the cache, and return so we can fall back to a VCS operation. This
+ // keeps us from having a degenerate worst case if, e.g., the cache
+ // is cold and we need to inspect a very large number of blocks
+ // to satisfy the query.
+
+ if (((++$iterations) % 64) === 0) {
+ $t_end = microtime(true);
+ if (($t_end - $t_start) > $time) {
+ return false;
+ }
+ }
+ continue;
+ }
+
+ // If we have an explicit 0, that means this commit really has no parents.
+ // Usually, it is the first commit in the repository.
+ if ($parent_id === 0) {
+ return null;
+ }
+
+ // If we didn't find a parent, the parent data isn't available. We fail
+ // to find an answer in the cache and fall back to querying the VCS.
+ return false;
+ }
+ }
+
+
+/* -( Cache Internals )---------------------------------------------------- */
+
+
+ /**
+ * Get the bucket key for a given commit ID.
+ *
+ * @param int Commit ID.
+ * @return int Bucket key.
+ * @task cache
+ */
+ private function getBucketKey($commit_id) {
+ return (int)floor($commit_id / $this->getBucketSize());
+ }
+
+
+ /**
+ * Get the cache key for a given bucket key (from @{method:getBucketKey}).
+ *
+ * @param int Bucket key.
+ * @return string Cache key.
+ * @task cache
+ */
+ private function getBucketCacheKey($bucket_key) {
+ static $prefix;
+
+ if ($prefix === null) {
+ $self = get_class($this);
+ $size = $this->getBucketSize();
+ $prefix = "{$self}:{$size}:1:";
+ }
+
+ return $prefix.$bucket_key;
+ }
+
+
+ /**
+ * Get the number of items per bucket.
+ *
+ * @return int Number of items to store per bucket.
+ * @task cache
+ */
+ private function getBucketSize() {
+ return 4096;
+ }
+
+
+ /**
+ * Retrieve or build a graph cache bucket from the cache.
+ *
+ * Normally, this operates as a readthrough cache call. It can also be used
+ * to force a cache update by passing the existing data to `$rebuild_data`.
+ *
+ * @param int Bucket key, from @{method:getBucketKey}.
+ * @param mixed Current data, to force a cache rebuild of this bucket.
+ * @return array Data from the cache.
+ * @task cache
+ */
+ private function getBucketData($bucket_key, $rebuild_data = null) {
+ $cache_key = $this->getBucketCacheKey($bucket_key);
+
+ // TODO: This cache stuff could be handled more gracefully, but the
+ // database cache currently requires values to be strings and needs
+ // some tweaking to support this as part of a stack. Our cache semantics
+ // here are also unusual (not purely readthrough) because this cache is
+ // appendable.
+
+ $cache_level1 = PhabricatorCaches::getRepositoryGraphL1Cache();
+ $cache_level2 = PhabricatorCaches::getRepositoryGraphL2Cache();
+ if ($rebuild_data === null) {
+ $bucket_data = $cache_level1->getKey($cache_key);
+ if ($bucket_data) {
+ return $bucket_data;
+ }
+
+ $bucket_data = $cache_level2->getKey($cache_key);
+ if ($bucket_data) {
+ $unserialized = @unserialize($bucket_data);
+ if ($unserialized) {
+ // Fill APC if we got a database hit but missed in APC.
+ $cache_level1->setKey($cache_key, $unserialized);
+ return $unserialized;
+ }
+ }
+ }
+
+ if (!is_array($rebuild_data)) {
+ $rebuild_data = array();
+ }
+
+ $bucket_data = $this->rebuildBucket($bucket_key, $rebuild_data);
+
+ // Don't bother writing the data if we didn't update anything.
+ if ($bucket_data !== $rebuild_data) {
+ $cache_level2->setKey($cache_key, serialize($bucket_data));
+ $cache_level1->setKey($cache_key, $bucket_data);
+ }
+
+ return $bucket_data;
+ }
+
+
+ /**
+ * Rebuild a cache bucket, amending existing data if avialable.
+ *
+ * @param int Bucket key, from @{method:getBucketKey}.
+ * @param array Existing bucket data.
+ * @return array Rebuilt bucket data.
+ * @task cache
+ */
+ private function rebuildBucket($bucket_key, array $current_data) {
+ $bucket_min = ($bucket_key * $this->getBucketSize());
+ $bucket_max = ($bucket_min + $this->getBucketSize()) - 1;
+
+ // We need to reload all of the commits in the bucket because there is
+ // no guarantee that they'll get parsed in order, so we can fill large
+ // commit IDs before small ones. Later on, we'll ignore the commits we
+ // already know about.
+
+ $table_commit = new PhabricatorRepositoryCommit();
+ $table_repository = new PhabricatorRepository();
+ $conn_r = $table_commit->establishConnection('r');
+
+ // Find all the Git and Mercurial commits in the block which have completed
+ // change import. We can't fill the cache accurately for commits which have
+ // not completed change import, so just pretend we don't know about them.
+ // In these cases, we will will ultimately fall back to VCS queries.
+
+ $commit_rows = queryfx_all(
+ $conn_r,
+ 'SELECT c.id FROM %T c
+ JOIN %T r ON c.repositoryID = r.id AND r.versionControlSystem IN (%Ls)
+ WHERE c.id BETWEEN %d AND %d
+ AND (c.importStatus & %d) = %d',
+ $table_commit->getTableName(),
+ $table_repository->getTableName(),
+ array(
+ PhabricatorRepositoryType::REPOSITORY_TYPE_GIT,
+ PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL,
+ ),
+ $bucket_min,
+ $bucket_max,
+ PhabricatorRepositoryCommit::IMPORTED_CHANGE,
+ PhabricatorRepositoryCommit::IMPORTED_CHANGE);
+
+ // If we don't have any data, just return the existing data.
+ if (!$commit_rows) {
+ return $current_data;
+ }
+
+ // Remove the commits we already have data for. We don't need to rebuild
+ // these. If there's nothing left, return the existing data.
+
+ $commit_ids = ipull($commit_rows, 'id', 'id');
+ $commit_ids = array_diff_key($commit_ids, $current_data);
+
+ if (!$commit_ids) {
+ return $current_data;
+ }
+
+ // Find all the path changes for the new commits.
+ $path_changes = queryfx_all(
+ $conn_r,
+ 'SELECT commitID, pathID FROM %T
+ WHERE commitID IN (%Ld)
+ AND (isDirect = 1 OR changeType = %d)',
+ PhabricatorRepository::TABLE_PATHCHANGE,
+ $commit_ids,
+ DifferentialChangeType::TYPE_CHILD);
+ $path_changes = igroup($path_changes, 'commitID');
+
+ // Find all the parents for the new commits.
+ $parents = queryfx_all(
+ $conn_r,
+ 'SELECT childCommitID, parentCommitID FROM %T
+ WHERE childCommitID IN (%Ld)
+ ORDER BY id ASC',
+ PhabricatorRepository::TABLE_PARENTS,
+ $commit_ids);
+ $parents = igroup($parents, 'childCommitID');
+
+ // Build the actual data for the cache.
+ foreach ($commit_ids as $commit_id) {
+ $parent_ids = array();
+ if (isset($parents[$commit_id])) {
+ foreach ($parents[$commit_id] as $row) {
+ $parent_ids[] = (int)$row['parentCommitID'];
+ }
+ }
+
+ if (isset($path_changes[$commit_id])) {
+ $path_ids = $path_changes[$commit_id];
+ foreach ($path_ids as $key => $path_id) {
+ $path_ids[$key] = (int)$path_id['pathID'];
+ }
+ sort($path_ids);
+ } else {
+ $path_ids = array();
+ }
+
+ $value = $parent_ids;
+ $value[] = null;
+ foreach ($path_ids as $path_id) {
+ $value[] = $path_id;
+ }
+
+ $current_data[$commit_id] = $value;
+ }
+
+ return $current_data;
+ }
+
+}
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php
new file mode 100644
--- /dev/null
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php
@@ -0,0 +1,92 @@
+<?php
+
+final class PhabricatorRepositoryManagementCacheWorkflow
+ extends PhabricatorRepositoryManagementWorkflow {
+
+ public function didConstruct() {
+ $this
+ ->setName('cache')
+ ->setExamples(
+ "**cache** [__options__] --commit __commit__ --path __path__")
+ ->setSynopsis(pht('Manage the repository graph cache.'))
+ ->setArguments(
+ array(
+ array(
+ 'name' => 'commit',
+ 'param' => 'commit',
+ 'help' => pht('Specify a commit to look up.'),
+ ),
+ array(
+ 'name' => 'path',
+ 'param' => 'path',
+ 'help' => pht('Specify a path to look up.'),
+ ),
+ ));
+ }
+
+ public function execute(PhutilArgumentParser $args) {
+
+ $commit_name = $args->getArg('commit');
+ if ($commit_name === null) {
+ throw new PhutilArgumentUsageException(
+ pht('Specify a commit to look up with `--commit`.'));
+ }
+ $commit = $this->loadNamedCommit($commit_name);
+
+ $path_name = $args->getArg('path');
+ if ($path_name === null) {
+ throw new PhutilArgumentUsageException(
+ pht('Specify a path to look up with `--path`.'));
+ }
+
+ $path_map = id(new DiffusionPathIDQuery(array($path_name)))
+ ->loadPathIDs();
+ if (empty($path_map[$path_name])) {
+ throw new PhutilArgumentUsageException(
+ pht('Path "%s" is not known to Phabricator.', $path_name));
+ }
+ $path_id = $path_map[$path_name];
+
+ $graph_cache = new PhabricatorRepositoryGraphCache();
+
+ $t_start = microtime(true);
+ $cache_result = $graph_cache->loadLastModifiedCommitID(
+ $commit->getID(),
+ $path_id);
+ $t_end = microtime(true);
+
+ $console = PhutilConsole::getConsole();
+
+ $console->writeOut(
+ "%s\n",
+ pht('Query took %s ms.', new PhutilNumber(1000 * ($t_end - $t_start))));
+
+ if ($cache_result === false) {
+ $console->writeOut(
+ "%s\n",
+ pht('Not found in graph cache.'));
+ } else if ($cache_result === null) {
+ $console->writeOut(
+ "%s\n",
+ pht('Path not modified in any ancestor commit.'));
+ } else {
+ $last = id(new DiffusionCommitQuery())
+ ->setViewer($this->getViewer())
+ ->withIDs(array($cache_result))
+ ->executeOne();
+ if (!$last) {
+ throw new Exception(pht('Cache returned bogus result!'));
+ }
+
+ $console->writeOut(
+ "%s\n",
+ pht(
+ 'Path was last changed at %s.',
+ $commit->getRepository()->formatCommitName(
+ $last->getcommitIdentifier())));
+ }
+
+ return 0;
+ }
+
+}
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
--- a/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
@@ -116,12 +116,20 @@
}
$sql = array();
- foreach ($parents as $parent) {
+ if (!$parents) {
+ // Write an explicit 0 to indicate "no parents" instead of "no data".
$sql[] = qsprintf(
$conn_w,
- '(%d, %d)',
- $map[$child],
- $map[$parent]);
+ '(%d, 0)',
+ $map[$child]);
+ } else {
+ foreach ($parents as $parent) {
+ $sql[] = qsprintf(
+ $conn_w,
+ '(%d, %d)',
+ $map[$child],
+ $map[$parent]);
+ }
}
$commit_table->openTransaction();
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
--- a/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
@@ -32,6 +32,15 @@
return null;
}
+ return $this->loadNamedCommits($names);
+ }
+
+ protected function loadNamedCommit($name) {
+ $map = $this->loadNamedCommits(array($name));
+ return $map[$name];
+ }
+
+ protected function loadNamedCommits(array $names) {
$query = id(new DiffusionCommitQuery())
->setViewer($this->getViewer())
->withIdentifiers($names);
@@ -49,4 +58,5 @@
return $map;
}
+
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jul 13, 2:56 PM (2 d, 21 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
8398062
Default Alt Text
D9045.id21479.diff (22 KB)
Attached To
Mode
D9045: Implement a chunked, APC-backed graph cache
Attached
Detach File
Event Timeline
Log In to Comment