Page MenuHomePhabricator

D9045.id21479.diff
No OneTemporary

D9045.id21479.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -1985,8 +1985,10 @@
'PhabricatorRepositoryEngine' => 'applications/repository/engine/PhabricatorRepositoryEngine.php',
'PhabricatorRepositoryGitCommitChangeParserWorker' => 'applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php',
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'applications/repository/worker/commitmessageparser/PhabricatorRepositoryGitCommitMessageParserWorker.php',
+ 'PhabricatorRepositoryGraphCache' => 'applications/repository/graphcache/PhabricatorRepositoryGraphCache.php',
'PhabricatorRepositoryGraphStream' => 'applications/repository/daemon/PhabricatorRepositoryGraphStream.php',
'PhabricatorRepositoryListController' => 'applications/repository/controller/PhabricatorRepositoryListController.php',
+ 'PhabricatorRepositoryManagementCacheWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php',
'PhabricatorRepositoryManagementDeleteWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementDeleteWorkflow.php',
'PhabricatorRepositoryManagementDiscoverWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementDiscoverWorkflow.php',
'PhabricatorRepositoryManagementEditWorkflow' => 'applications/repository/management/PhabricatorRepositoryManagementEditWorkflow.php',
@@ -4832,6 +4834,7 @@
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'PhabricatorRepositoryCommitMessageParserWorker',
'PhabricatorRepositoryGraphStream' => 'Phobject',
'PhabricatorRepositoryListController' => 'PhabricatorRepositoryController',
+ 'PhabricatorRepositoryManagementCacheWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementDeleteWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementDiscoverWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
'PhabricatorRepositoryManagementEditWorkflow' => 'PhabricatorRepositoryManagementWorkflow',
diff --git a/src/applications/cache/PhabricatorCaches.php b/src/applications/cache/PhabricatorCaches.php
--- a/src/applications/cache/PhabricatorCaches.php
+++ b/src/applications/cache/PhabricatorCaches.php
@@ -9,6 +9,53 @@
return PhabricatorEnv::getEnvConfig('phabricator.cache-namespace');
}
+ private static function newStackFromCaches(array $caches) {
+ $caches = self::addNamespaceToCaches($caches);
+ $caches = self::addProfilerToCaches($caches);
+ return id(new PhutilKeyValueCacheStack())
+ ->setCaches($caches);
+ }
+
+
+/* -( Repository Graph Cache )--------------------------------------------- */
+
+
+ public static function getRepositoryGraphL1Cache() {
+ static $cache;
+ if (!$cache) {
+ $caches = self::buildRepositoryGraphL1Caches();
+ $cache = self::newStackFromCaches($caches);
+ }
+ return $cache;
+ }
+
+ private static function buildRepositoryGraphL1Caches() {
+ $caches = array();
+
+ $apc = new PhutilKeyValueCacheAPC();
+ if ($apc->isAvailable()) {
+ $caches[] = $apc;
+ }
+
+ return $caches;
+ }
+
+ public static function getRepositoryGraphL2Cache() {
+ static $cache;
+ if (!$cache) {
+ $caches = self::buildRepositoryGraphL2Caches();
+ $cache = self::newStackFromCaches($caches);
+ }
+ return $cache;
+ }
+
+ private static function buildRepositoryGraphL2Caches() {
+ $caches = array();
+ $caches[] = new PhabricatorKeyValueDatabaseCache();
+ return $caches;
+ }
+
+
/* -( Setup Cache )-------------------------------------------------------- */
@@ -30,10 +77,7 @@
static $cache;
if (!$cache) {
$caches = self::buildSetupCaches();
- $caches = self::addNamespaceToCaches($caches);
- $caches = self::addProfilerToCaches($caches);
- $cache = id(new PhutilKeyValueCacheStack())
- ->setCaches($caches);
+ $cache = self::newStackFromCaches($caches);
}
return $cache;
}
diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
--- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
+++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
@@ -575,6 +575,12 @@
}
$parent_ids[] = $parent_map[$parent];
}
+ } else {
+ // Write an explicit 0 so we can distinguish between "really no
+ // parents" and "data not available".
+ if (!$repository->isSVN()) {
+ $parent_ids = array(0);
+ }
}
$commit->openTransaction();
diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
new file mode 100644
--- /dev/null
+++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
@@ -0,0 +1,386 @@
+<?php
+
+/**
+ * Given a commit and a path, efficiently determine the most recent ancestor
+ * commit where the path was touched.
+ *
+ * In Git and Mercurial, log operations with a path are relatively slow. For
+ * example:
+ *
+ * git log -n1 <commit> -- <path>
+ *
+ * ...routinely takes several hundred milliseconds, and equivalent requests
+ * often take longer in Mercurial.
+ *
+ * Unfortunately, this operation is fundamental to rendering a repository for
+ * the web, and essentially everything else that's slow can be reduced to this
+ * plus some trivial work afterward. Making this fast is desirable and powerful,
+ * and allows us to make other things fast by expressing them in terms of this
+ * query.
+ *
+ * Because the query is fundamentally a graph query, it isn't easy to express
+ * in a reasonable way in MySQL, and we can't do round trips to the server to
+ * walk the graph without incurring huge performance penalties.
+ *
+ * However, the total amount of data in the graph is relatively small. By
+ * caching it in chunks and keeping it in APC, we can reasonably load and walk
+ * the graph in PHP quickly.
+ *
+ * For more context, see T2683.
+ *
+ * Structure of the Cache
+ * ======================
+ *
+ * The cache divides commits into buckets (see @{method:getBucketSize}). To
+ * walk the graph, we pull a commit's bucket. The bucket is a map from commit
+ * IDs to a list of parents and changed paths, separated by `null`. For
+ * example, a bucket might look like this:
+ *
+ * array(
+ * 1 => array(0, null, 17, 18),
+ * 2 => array(1, null, 4),
+ * // ...
+ * )
+ *
+ * This means that commit ID 1 has parent commit 0 (a special value meaning
+ * no parents) and affected path IDs 17 and 18. Commit ID 2 has parent commit 1,
+ * and affected path 4.
+ *
+ * This data structure attempts to balance compactness, ease of construction,
+ * simplicity of cache semantics, and lookup performance. In the average case,
+ * it appears to do a reasonable job at this.
+ *
+ * @task query Querying the Graph Cache
+ * @task cache Cache Internals
+ */
+final class PhabricatorRepositoryGraphCache {
+
+
+/* -( Querying the Graph Cache )------------------------------------------- */
+
+
+ /**
+ * Search the graph cache for the most modification to a path.
+ *
+ * @param int The commit ID to search ancestors of.
+ * @param int The path ID to search for changes to.
+ * @param float Maximum number of seconds to spend trying to satisfy this
+ * query using the graph cache. By default, `0.5` (500ms).
+ * @return mixed Commit ID, or `null` if no ancestors exist, or `false` if
+ * the graph cache was unable to determine the answer.
+ * @task query
+ */
+ public function loadLastModifiedCommitID($commit_id, $path_id, $time = 0.5) {
+ $commit_id = (int)$commit_id;
+ $path_id = (int)$path_id;
+
+ $bucket_data = null;
+ $data_key = null;
+ $seen = array();
+
+ $t_start = microtime(true);
+ $iterations = 0;
+ while (true) {
+ $bucket_key = $this->getBucketKey($commit_id);
+
+ if (($data_key != $bucket_key) || $bucket_data === null) {
+ $bucket_data = $this->getBucketData($bucket_key);
+ $data_key = $bucket_key;
+ }
+
+ if (empty($bucket_data[$commit_id])) {
+ // Rebuild the cache bucket, since the commit might be a very recent
+ // one that we'll pick up by rebuilding.
+
+ $bucket_data = $this->getBucketData($bucket_key, $bucket_data);
+ if (empty($bucket_data[$commit_id])) {
+ // A rebuild didn't help. This can occur legitimately if the commit
+ // is new and hasn't parsed yet.
+ return false;
+ }
+
+ // Otherwise, the rebuild gave us the data, so we can keep going.
+ }
+
+ // Sanity check so we can survive and recover from bad data.
+ if (isset($seen[$commit_id])) {
+ phlog(pht('Unexpected infinite loop in RepositoryGraphCache!'));
+ return false;
+ } else {
+ $seen[$commit_id] = true;
+ }
+
+ // `$data` is a list: the commit's parent IDs, followed by `null`,
+ // followed by the modified paths in ascending order. We figure out the
+ // first parent first, then check if the path was touched. If the path
+ // was touched, this is the commit we're after. If not, walk backward
+ // in the tree.
+
+ $items = $bucket_data[$commit_id];
+ $size = count($items);
+
+ // Walk past the parent information.
+ $parent_id = null;
+ for ($ii = 0; ; ++$ii) {
+ if ($items[$ii] === null) {
+ break;
+ }
+ if ($parent_id === null) {
+ $parent_id = $items[$ii];
+ }
+ }
+
+ // Look for a modification to the path.
+ for (; $ii < $size; ++$ii) {
+ $item = $items[$ii];
+ if ($item > $path_id) {
+ break;
+ }
+ if ($item === $path_id) {
+ return $commit_id;
+ }
+ }
+
+ if ($parent_id) {
+ $commit_id = $parent_id;
+
+ // Periodically check if we've spent too long looking for a result
+ // in the cache, and return so we can fall back to a VCS operation. This
+ // keeps us from having a degenerate worst case if, e.g., the cache
+ // is cold and we need to inspect a very large number of blocks
+ // to satisfy the query.
+
+ if (((++$iterations) % 64) === 0) {
+ $t_end = microtime(true);
+ if (($t_end - $t_start) > $time) {
+ return false;
+ }
+ }
+ continue;
+ }
+
+ // If we have an explicit 0, that means this commit really has no parents.
+ // Usually, it is the first commit in the repository.
+ if ($parent_id === 0) {
+ return null;
+ }
+
+ // If we didn't find a parent, the parent data isn't available. We fail
+ // to find an answer in the cache and fall back to querying the VCS.
+ return false;
+ }
+ }
+
+
+/* -( Cache Internals )---------------------------------------------------- */
+
+
+ /**
+ * Get the bucket key for a given commit ID.
+ *
+ * @param int Commit ID.
+ * @return int Bucket key.
+ * @task cache
+ */
+ private function getBucketKey($commit_id) {
+ return (int)floor($commit_id / $this->getBucketSize());
+ }
+
+
+ /**
+ * Get the cache key for a given bucket key (from @{method:getBucketKey}).
+ *
+ * @param int Bucket key.
+ * @return string Cache key.
+ * @task cache
+ */
+ private function getBucketCacheKey($bucket_key) {
+ static $prefix;
+
+ if ($prefix === null) {
+ $self = get_class($this);
+ $size = $this->getBucketSize();
+ $prefix = "{$self}:{$size}:1:";
+ }
+
+ return $prefix.$bucket_key;
+ }
+
+
+ /**
+ * Get the number of items per bucket.
+ *
+ * @return int Number of items to store per bucket.
+ * @task cache
+ */
+ private function getBucketSize() {
+ return 4096;
+ }
+
+
+ /**
+ * Retrieve or build a graph cache bucket from the cache.
+ *
+ * Normally, this operates as a readthrough cache call. It can also be used
+ * to force a cache update by passing the existing data to `$rebuild_data`.
+ *
+ * @param int Bucket key, from @{method:getBucketKey}.
+ * @param mixed Current data, to force a cache rebuild of this bucket.
+ * @return array Data from the cache.
+ * @task cache
+ */
+ private function getBucketData($bucket_key, $rebuild_data = null) {
+ $cache_key = $this->getBucketCacheKey($bucket_key);
+
+ // TODO: This cache stuff could be handled more gracefully, but the
+ // database cache currently requires values to be strings and needs
+ // some tweaking to support this as part of a stack. Our cache semantics
+ // here are also unusual (not purely readthrough) because this cache is
+ // appendable.
+
+ $cache_level1 = PhabricatorCaches::getRepositoryGraphL1Cache();
+ $cache_level2 = PhabricatorCaches::getRepositoryGraphL2Cache();
+ if ($rebuild_data === null) {
+ $bucket_data = $cache_level1->getKey($cache_key);
+ if ($bucket_data) {
+ return $bucket_data;
+ }
+
+ $bucket_data = $cache_level2->getKey($cache_key);
+ if ($bucket_data) {
+ $unserialized = @unserialize($bucket_data);
+ if ($unserialized) {
+ // Fill APC if we got a database hit but missed in APC.
+ $cache_level1->setKey($cache_key, $unserialized);
+ return $unserialized;
+ }
+ }
+ }
+
+ if (!is_array($rebuild_data)) {
+ $rebuild_data = array();
+ }
+
+ $bucket_data = $this->rebuildBucket($bucket_key, $rebuild_data);
+
+ // Don't bother writing the data if we didn't update anything.
+ if ($bucket_data !== $rebuild_data) {
+ $cache_level2->setKey($cache_key, serialize($bucket_data));
+ $cache_level1->setKey($cache_key, $bucket_data);
+ }
+
+ return $bucket_data;
+ }
+
+
+ /**
+ * Rebuild a cache bucket, amending existing data if avialable.
+ *
+ * @param int Bucket key, from @{method:getBucketKey}.
+ * @param array Existing bucket data.
+ * @return array Rebuilt bucket data.
+ * @task cache
+ */
+ private function rebuildBucket($bucket_key, array $current_data) {
+ $bucket_min = ($bucket_key * $this->getBucketSize());
+ $bucket_max = ($bucket_min + $this->getBucketSize()) - 1;
+
+ // We need to reload all of the commits in the bucket because there is
+ // no guarantee that they'll get parsed in order, so we can fill large
+ // commit IDs before small ones. Later on, we'll ignore the commits we
+ // already know about.
+
+ $table_commit = new PhabricatorRepositoryCommit();
+ $table_repository = new PhabricatorRepository();
+ $conn_r = $table_commit->establishConnection('r');
+
+ // Find all the Git and Mercurial commits in the block which have completed
+ // change import. We can't fill the cache accurately for commits which have
+ // not completed change import, so just pretend we don't know about them.
+ // In these cases, we will will ultimately fall back to VCS queries.
+
+ $commit_rows = queryfx_all(
+ $conn_r,
+ 'SELECT c.id FROM %T c
+ JOIN %T r ON c.repositoryID = r.id AND r.versionControlSystem IN (%Ls)
+ WHERE c.id BETWEEN %d AND %d
+ AND (c.importStatus & %d) = %d',
+ $table_commit->getTableName(),
+ $table_repository->getTableName(),
+ array(
+ PhabricatorRepositoryType::REPOSITORY_TYPE_GIT,
+ PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL,
+ ),
+ $bucket_min,
+ $bucket_max,
+ PhabricatorRepositoryCommit::IMPORTED_CHANGE,
+ PhabricatorRepositoryCommit::IMPORTED_CHANGE);
+
+ // If we don't have any data, just return the existing data.
+ if (!$commit_rows) {
+ return $current_data;
+ }
+
+ // Remove the commits we already have data for. We don't need to rebuild
+ // these. If there's nothing left, return the existing data.
+
+ $commit_ids = ipull($commit_rows, 'id', 'id');
+ $commit_ids = array_diff_key($commit_ids, $current_data);
+
+ if (!$commit_ids) {
+ return $current_data;
+ }
+
+ // Find all the path changes for the new commits.
+ $path_changes = queryfx_all(
+ $conn_r,
+ 'SELECT commitID, pathID FROM %T
+ WHERE commitID IN (%Ld)
+ AND (isDirect = 1 OR changeType = %d)',
+ PhabricatorRepository::TABLE_PATHCHANGE,
+ $commit_ids,
+ DifferentialChangeType::TYPE_CHILD);
+ $path_changes = igroup($path_changes, 'commitID');
+
+ // Find all the parents for the new commits.
+ $parents = queryfx_all(
+ $conn_r,
+ 'SELECT childCommitID, parentCommitID FROM %T
+ WHERE childCommitID IN (%Ld)
+ ORDER BY id ASC',
+ PhabricatorRepository::TABLE_PARENTS,
+ $commit_ids);
+ $parents = igroup($parents, 'childCommitID');
+
+ // Build the actual data for the cache.
+ foreach ($commit_ids as $commit_id) {
+ $parent_ids = array();
+ if (isset($parents[$commit_id])) {
+ foreach ($parents[$commit_id] as $row) {
+ $parent_ids[] = (int)$row['parentCommitID'];
+ }
+ }
+
+ if (isset($path_changes[$commit_id])) {
+ $path_ids = $path_changes[$commit_id];
+ foreach ($path_ids as $key => $path_id) {
+ $path_ids[$key] = (int)$path_id['pathID'];
+ }
+ sort($path_ids);
+ } else {
+ $path_ids = array();
+ }
+
+ $value = $parent_ids;
+ $value[] = null;
+ foreach ($path_ids as $path_id) {
+ $value[] = $path_id;
+ }
+
+ $current_data[$commit_id] = $value;
+ }
+
+ return $current_data;
+ }
+
+}
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php
new file mode 100644
--- /dev/null
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementCacheWorkflow.php
@@ -0,0 +1,92 @@
+<?php
+
+final class PhabricatorRepositoryManagementCacheWorkflow
+ extends PhabricatorRepositoryManagementWorkflow {
+
+ public function didConstruct() {
+ $this
+ ->setName('cache')
+ ->setExamples(
+ "**cache** [__options__] --commit __commit__ --path __path__")
+ ->setSynopsis(pht('Manage the repository graph cache.'))
+ ->setArguments(
+ array(
+ array(
+ 'name' => 'commit',
+ 'param' => 'commit',
+ 'help' => pht('Specify a commit to look up.'),
+ ),
+ array(
+ 'name' => 'path',
+ 'param' => 'path',
+ 'help' => pht('Specify a path to look up.'),
+ ),
+ ));
+ }
+
+ public function execute(PhutilArgumentParser $args) {
+
+ $commit_name = $args->getArg('commit');
+ if ($commit_name === null) {
+ throw new PhutilArgumentUsageException(
+ pht('Specify a commit to look up with `--commit`.'));
+ }
+ $commit = $this->loadNamedCommit($commit_name);
+
+ $path_name = $args->getArg('path');
+ if ($path_name === null) {
+ throw new PhutilArgumentUsageException(
+ pht('Specify a path to look up with `--path`.'));
+ }
+
+ $path_map = id(new DiffusionPathIDQuery(array($path_name)))
+ ->loadPathIDs();
+ if (empty($path_map[$path_name])) {
+ throw new PhutilArgumentUsageException(
+ pht('Path "%s" is not known to Phabricator.', $path_name));
+ }
+ $path_id = $path_map[$path_name];
+
+ $graph_cache = new PhabricatorRepositoryGraphCache();
+
+ $t_start = microtime(true);
+ $cache_result = $graph_cache->loadLastModifiedCommitID(
+ $commit->getID(),
+ $path_id);
+ $t_end = microtime(true);
+
+ $console = PhutilConsole::getConsole();
+
+ $console->writeOut(
+ "%s\n",
+ pht('Query took %s ms.', new PhutilNumber(1000 * ($t_end - $t_start))));
+
+ if ($cache_result === false) {
+ $console->writeOut(
+ "%s\n",
+ pht('Not found in graph cache.'));
+ } else if ($cache_result === null) {
+ $console->writeOut(
+ "%s\n",
+ pht('Path not modified in any ancestor commit.'));
+ } else {
+ $last = id(new DiffusionCommitQuery())
+ ->setViewer($this->getViewer())
+ ->withIDs(array($cache_result))
+ ->executeOne();
+ if (!$last) {
+ throw new Exception(pht('Cache returned bogus result!'));
+ }
+
+ $console->writeOut(
+ "%s\n",
+ pht(
+ 'Path was last changed at %s.',
+ $commit->getRepository()->formatCommitName(
+ $last->getcommitIdentifier())));
+ }
+
+ return 0;
+ }
+
+}
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
--- a/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementParentsWorkflow.php
@@ -116,12 +116,20 @@
}
$sql = array();
- foreach ($parents as $parent) {
+ if (!$parents) {
+ // Write an explicit 0 to indicate "no parents" instead of "no data".
$sql[] = qsprintf(
$conn_w,
- '(%d, %d)',
- $map[$child],
- $map[$parent]);
+ '(%d, 0)',
+ $map[$child]);
+ } else {
+ foreach ($parents as $parent) {
+ $sql[] = qsprintf(
+ $conn_w,
+ '(%d, %d)',
+ $map[$child],
+ $map[$parent]);
+ }
}
$commit_table->openTransaction();
diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
--- a/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
+++ b/src/applications/repository/management/PhabricatorRepositoryManagementWorkflow.php
@@ -32,6 +32,15 @@
return null;
}
+ return $this->loadNamedCommits($names);
+ }
+
+ protected function loadNamedCommit($name) {
+ $map = $this->loadNamedCommits(array($name));
+ return $map[$name];
+ }
+
+ protected function loadNamedCommits(array $names) {
$query = id(new DiffusionCommitQuery())
->setViewer($this->getViewer())
->withIdentifiers($names);
@@ -49,4 +58,5 @@
return $map;
}
+
}

File Metadata

Mime Type
text/plain
Expires
Sun, Jul 13, 2:56 PM (2 d, 21 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
8398062
Default Alt Text
D9045.id21479.diff (22 KB)

Event Timeline