diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryCommitChangeParserWorker.php
index 9ff7f89234..35d88d2eca 100644
--- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryCommitChangeParserWorker.php
+++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryCommitChangeParserWorker.php
@@ -1,77 +1,102 @@
getCommitIdentifier();
+ $callsign = $repository->getCallsign();
+ $full_name = 'r'.$callsign.$identifier;
+
+ $this->log("Parsing %s...\n", $full_name);
+ if ($this->isBadCommit($full_name)) {
+ $this->log("This commit is marked bad!");
+ $result = null;
+ } else {
+ $result = $this->parseCommitChanges($repository, $commit);
+ }
+
+ $this->finishParse();
+
+ return $result;
+ }
+
public static function lookupOrCreatePaths(array $paths) {
$repository = new PhabricatorRepository();
$conn_w = $repository->establishConnection('w');
$result_map = self::lookupPaths($paths);
$missing_paths = array_fill_keys($paths, true);
$missing_paths = array_diff_key($missing_paths, $result_map);
$missing_paths = array_keys($missing_paths);
if ($missing_paths) {
foreach (array_chunk($missing_paths, 128) as $path_chunk) {
$sql = array();
foreach ($path_chunk as $path) {
$sql[] = qsprintf($conn_w, '(%s, %s)', $path, md5($path));
}
queryfx(
$conn_w,
'INSERT IGNORE INTO %T (path, pathHash) VALUES %Q',
PhabricatorRepository::TABLE_PATH,
implode(', ', $sql));
}
$result_map += self::lookupPaths($missing_paths);
}
return $result_map;
}
private static function lookupPaths(array $paths) {
$repository = new PhabricatorRepository();
$conn_w = $repository->establishConnection('w');
$result_map = array();
foreach (array_chunk($paths, 128) as $path_chunk) {
$chunk_map = queryfx_all(
$conn_w,
'SELECT path, id FROM %T WHERE pathHash IN (%Ls)',
PhabricatorRepository::TABLE_PATH,
array_map('md5', $path_chunk));
foreach ($chunk_map as $row) {
$result_map[$row['path']] = $row['id'];
}
}
return $result_map;
}
protected function finishParse() {
$commit = $this->commit;
$commit->writeImportStatusFlag(
PhabricatorRepositoryCommit::IMPORTED_CHANGE);
id(new PhabricatorSearchIndexer())
->indexDocumentByPHID($commit->getPHID());
PhabricatorOwnersPackagePathValidator::updateOwnersPackagePaths($commit);
if ($this->shouldQueueFollowupTasks()) {
PhabricatorWorker::scheduleTask(
'PhabricatorRepositoryCommitOwnersWorker',
array(
'commitID' => $commit->getID(),
));
}
}
}
diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php
index acc047464a..da2cdbf18f 100644
--- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php
+++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryGitCommitChangeParserWorker.php
@@ -1,279 +1,270 @@
getCallsign().$commit->getCommitIdentifier();
- echo "Parsing {$full_name}...\n";
- if ($this->isBadCommit($full_name)) {
- echo "This commit is marked bad!\n";
- return;
- }
-
// Check if the commit has parents. We're testing to see whether it is the
// first commit in history (in which case we must use "git log") or some
// other commit (in which case we can use "git diff"). We'd rather use
// "git diff" because it has the right behavior for merge commits, but
// it requires the commit to have a parent that we can diff against. The
// first commit doesn't, so "commit^" is not a valid ref.
list($parents) = $repository->execxLocalCommand(
'log -n1 --format=%s %s',
'%P',
$commit->getCommitIdentifier());
$use_log = !strlen(trim($parents));
if ($use_log) {
// This is the first commit so we need to use "log". We know it's not a
// merge commit because it couldn't be merging anything, so this is safe.
// NOTE: "--pretty=format: " is to disable diff output, we only want the
// part we get from "--raw".
list($raw) = $repository->execxLocalCommand(
'log -n1 -M -C -B --find-copies-harder --raw -t '.
'--pretty=format: --abbrev=40 %s',
$commit->getCommitIdentifier());
} else {
// Otherwise, we can use "diff", which will give us output for merges.
// We diff against the first parent, as this is generally the expectation
// and results in sensible behavior.
list($raw) = $repository->execxLocalCommand(
'diff -n1 -M -C -B --find-copies-harder --raw -t '.
'--abbrev=40 %s^1 %s',
$commit->getCommitIdentifier(),
$commit->getCommitIdentifier());
}
$changes = array();
$move_away = array();
$copy_away = array();
$lines = explode("\n", $raw);
foreach ($lines as $line) {
if (!strlen(trim($line))) {
continue;
}
list($old_mode, $new_mode,
$old_hash, $new_hash,
$more_stuff) = preg_split('/ +/', $line, 5);
// We may only have two pieces here.
list($action, $src_path, $dst_path) = array_merge(
explode("\t", $more_stuff),
array(null));
// Normalize the paths for consistency with the SVN workflow.
$src_path = '/'.$src_path;
if ($dst_path) {
$dst_path = '/'.$dst_path;
}
$old_mode = intval($old_mode, 8);
$new_mode = intval($new_mode, 8);
switch ($new_mode & 0160000) {
case 0160000:
$file_type = DifferentialChangeType::FILE_SUBMODULE;
break;
case 0120000:
$file_type = DifferentialChangeType::FILE_SYMLINK;
break;
case 0040000:
$file_type = DifferentialChangeType::FILE_DIRECTORY;
break;
default:
$file_type = DifferentialChangeType::FILE_NORMAL;
break;
}
// TODO: We can detect binary changes as git does, through a combination
// of running 'git check-attr' for stuff like 'binary', 'merge' or 'diff',
// and by falling back to inspecting the first 8,000 characters of the
// buffer for null bytes (this is seriously git's algorithm, see
// buffer_is_binary() in xdiff-interface.c).
$change_type = null;
$change_path = $src_path;
$change_target = null;
$is_direct = true;
switch ($action[0]) {
case 'A':
$change_type = DifferentialChangeType::TYPE_ADD;
break;
case 'D':
$change_type = DifferentialChangeType::TYPE_DELETE;
break;
case 'C':
$change_type = DifferentialChangeType::TYPE_COPY_HERE;
$change_path = $dst_path;
$change_target = $src_path;
$copy_away[$change_target][] = $change_path;
break;
case 'R':
$change_type = DifferentialChangeType::TYPE_MOVE_HERE;
$change_path = $dst_path;
$change_target = $src_path;
$move_away[$change_target][] = $change_path;
break;
case 'T':
// Type of the file changed, fall through and treat it as a
// modification. Not 100% sure this is the right thing to do but it
// seems reasonable.
case 'M':
if ($file_type == DifferentialChangeType::FILE_DIRECTORY) {
$change_type = DifferentialChangeType::TYPE_CHILD;
$is_direct = false;
} else {
$change_type = DifferentialChangeType::TYPE_CHANGE;
}
break;
// NOTE: "U" (unmerged) and "X" (unknown) statuses are also possible
// in theory but shouldn't appear here.
default:
throw new Exception("Failed to parse line '{$line}'.");
}
$changes[$change_path] = array(
'repositoryID' => $repository->getID(),
'commitID' => $commit->getID(),
'path' => $change_path,
'changeType' => $change_type,
'fileType' => $file_type,
'isDirect' => $is_direct,
'commitSequence' => $commit->getEpoch(),
'targetPath' => $change_target,
'targetCommitID' => $change_target ? $commit->getID() : null,
);
}
// Add a change to '/' since git doesn't mention it.
$changes['/'] = array(
'repositoryID' => $repository->getID(),
'commitID' => $commit->getID(),
'path' => '/',
'changeType' => DifferentialChangeType::TYPE_CHILD,
'fileType' => DifferentialChangeType::FILE_DIRECTORY,
'isDirect' => false,
'commitSequence' => $commit->getEpoch(),
'targetPath' => null,
'targetCommitID' => null,
);
foreach ($copy_away as $change_path => $destinations) {
if (isset($move_away[$change_path])) {
$change_type = DifferentialChangeType::TYPE_MULTICOPY;
$is_direct = true;
unset($move_away[$change_path]);
} else {
$change_type = DifferentialChangeType::TYPE_COPY_AWAY;
// This change is direct if we picked up a modification above (i.e.,
// the original copy source was also edited). Otherwise the original
// wasn't touched, so leave it as an indirect change.
$is_direct = isset($changes[$change_path]);
}
$reference = $changes[reset($destinations)];
$changes[$change_path] = array(
'repositoryID' => $repository->getID(),
'commitID' => $commit->getID(),
'path' => $change_path,
'changeType' => $change_type,
'fileType' => $reference['fileType'],
'isDirect' => $is_direct,
'commitSequence' => $commit->getEpoch(),
'targetPath' => null,
'targetCommitID' => null,
);
}
foreach ($move_away as $change_path => $destinations) {
$reference = $changes[reset($destinations)];
$changes[$change_path] = array(
'repositoryID' => $repository->getID(),
'commitID' => $commit->getID(),
'path' => $change_path,
'changeType' => DifferentialChangeType::TYPE_MOVE_AWAY,
'fileType' => $reference['fileType'],
'isDirect' => true,
'commitSequence' => $commit->getEpoch(),
'targetPath' => null,
'targetCommitID' => null,
);
}
$paths = array();
foreach ($changes as $change) {
$paths[$change['path']] = true;
if ($change['targetPath']) {
$paths[$change['targetPath']] = true;
}
}
$path_map = $this->lookupOrCreatePaths(array_keys($paths));
foreach ($changes as $key => $change) {
$changes[$key]['pathID'] = $path_map[$change['path']];
if ($change['targetPath']) {
$changes[$key]['targetPathID'] = $path_map[$change['targetPath']];
} else {
$changes[$key]['targetPathID'] = null;
}
}
$conn_w = $repository->establishConnection('w');
$changes_sql = array();
foreach ($changes as $change) {
$values = array(
(int)$change['repositoryID'],
(int)$change['pathID'],
(int)$change['commitID'],
$change['targetPathID']
? (int)$change['targetPathID']
: 'null',
$change['targetCommitID']
? (int)$change['targetCommitID']
: 'null',
(int)$change['changeType'],
(int)$change['fileType'],
(int)$change['isDirect'],
(int)$change['commitSequence'],
);
$changes_sql[] = '('.implode(', ', $values).')';
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE commitID = %d',
PhabricatorRepository::TABLE_PATHCHANGE,
$commit->getID());
foreach (array_chunk($changes_sql, 256) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, pathID, commitID, targetPathID, targetCommitID,
changeType, fileType, isDirect, commitSequence)
VALUES %Q',
PhabricatorRepository::TABLE_PATHCHANGE,
implode(', ', $sql_chunk));
}
-
- $this->finishParse();
}
}
diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryMercurialCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryMercurialCommitChangeParserWorker.php
index d0444c3547..14360b6f7f 100644
--- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryMercurialCommitChangeParserWorker.php
+++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositoryMercurialCommitChangeParserWorker.php
@@ -1,339 +1,330 @@
getCallsign().$commit->getCommitIdentifier();
- echo "Parsing {$full_name}...\n";
- if ($this->isBadCommit($full_name)) {
- echo "This commit is marked bad!\n";
- return;
- }
-
list($stdout) = $repository->execxLocalCommand(
'status -C --change %s',
$commit->getCommitIdentifier());
$status = ArcanistMercurialParser::parseMercurialStatusDetails($stdout);
$common_attributes = array(
'repositoryID' => $repository->getID(),
'commitID' => $commit->getID(),
'commitSequence' => $commit->getEpoch(),
);
$changes = array();
// Like Git, Mercurial doesn't track directories directly. We need to infer
// directory creation and removal by observing file creation and removal
// and testing if the directories in question are previously empty (thus,
// created) or subsequently empty (thus, removed).
$maybe_new_directories = array();
$maybe_del_directories = array();
$all_directories = array();
// Parse the basic information from "hg status", which shows files that
// were directly affected by the change.
foreach ($status as $path => $path_info) {
$path = '/'.$path;
$flags = $path_info['flags'];
$change_target = $path_info['from'] ? '/'.$path_info['from'] : null;
$changes[$path] = array(
'path' => $path,
'isDirect' => true,
'targetPath' => $change_target,
'targetCommitID' => $change_target ? $commit->getID() : null,
// We're going to fill these in shortly.
'changeType' => null,
'fileType' => null,
'flags' => $flags,
) + $common_attributes;
if ($flags & ArcanistRepositoryAPI::FLAG_ADDED) {
$maybe_new_directories[] = dirname($path);
} else if ($flags & ArcanistRepositoryAPI::FLAG_DELETED) {
$maybe_del_directories[] = dirname($path);
}
$all_directories[] = dirname($path);
}
// Add change information for each source path which doesn't appear in the
// status. These files were copied, but were not modified. We also know they
// must exist.
foreach ($changes as $path => $change) {
$from = $change['targetPath'];
if ($from && empty($changes[$from])) {
$changes[$from] = array(
'path' => $from,
'isDirect' => false,
'targetPath' => null,
'targetCommitID' => null,
'changeType' => DifferentialChangeType::TYPE_COPY_AWAY,
'fileType' => null,
'flags' => 0,
) + $common_attributes;
}
}
$away = array();
foreach ($changes as $path => $change) {
$target_path = $change['targetPath'];
if ($target_path) {
$away[$target_path][] = $path;
}
}
// Now that we have all the direct changes, figure out change types.
foreach ($changes as $path => $change) {
$flags = $change['flags'];
$from = $change['targetPath'];
if ($from) {
$target = $changes[$from];
} else {
$target = null;
}
if ($flags & ArcanistRepositoryAPI::FLAG_ADDED) {
if ($target) {
if ($target['flags'] & ArcanistRepositoryAPI::FLAG_DELETED) {
$change_type = DifferentialChangeType::TYPE_MOVE_HERE;
} else {
$change_type = DifferentialChangeType::TYPE_COPY_HERE;
}
} else {
$change_type = DifferentialChangeType::TYPE_ADD;
}
} else if ($flags & ArcanistRepositoryAPI::FLAG_DELETED) {
if (isset($away[$path])) {
if (count($away[$path]) > 1) {
$change_type = DifferentialChangeType::TYPE_MULTICOPY;
} else {
$change_type = DifferentialChangeType::TYPE_MOVE_AWAY;
}
} else {
$change_type = DifferentialChangeType::TYPE_DELETE;
}
} else {
if (isset($away[$path])) {
$change_type = DifferentialChangeType::TYPE_COPY_AWAY;
} else {
$change_type = DifferentialChangeType::TYPE_CHANGE;
}
}
$changes[$path]['changeType'] = $change_type;
}
// Go through all the affected directories and identify any which were
// actually added or deleted.
$dir_status = array();
foreach ($maybe_del_directories as $dir) {
$exists = false;
foreach (DiffusionPathIDQuery::expandPathToRoot($dir) as $path) {
if (isset($dir_status[$path])) {
break;
}
// If we know some child exists, we know this path exists. If we don't
// know that a child exists, test if this directory still exists.
if (!$exists) {
$exists = $this->mercurialPathExists(
$repository,
$path,
$commit->getCommitIdentifier());
}
if ($exists) {
$dir_status[$path] = DifferentialChangeType::TYPE_CHILD;
} else {
$dir_status[$path] = DifferentialChangeType::TYPE_DELETE;
}
}
}
list($stdout) = $repository->execxLocalCommand(
'parents --rev %s --style default',
$commit->getCommitIdentifier());
$parents = ArcanistMercurialParser::parseMercurialLog($stdout);
$parent = reset($parents);
if ($parent) {
// TODO: We should expand this to a full 40-character hash using "hg id".
$parent = $parent['rev'];
}
foreach ($maybe_new_directories as $dir) {
$exists = false;
foreach (DiffusionPathIDQuery::expandPathToRoot($dir) as $path) {
if (isset($dir_status[$path])) {
break;
}
if (!$exists) {
if ($parent) {
$exists = $this->mercurialPathExists($repository, $path, $parent);
} else {
$exists = false;
}
}
if ($exists) {
$dir_status[$path] = DifferentialChangeType::TYPE_CHILD;
} else {
$dir_status[$path] = DifferentialChangeType::TYPE_ADD;
}
}
}
foreach ($all_directories as $dir) {
foreach (DiffusionPathIDQuery::expandPathToRoot($dir) as $path) {
if (isset($dir_status[$path])) {
break;
}
$dir_status[$path] = DifferentialChangeType::TYPE_CHILD;
}
}
// Merge all the directory statuses into the path statuses.
foreach ($dir_status as $path => $status) {
if (isset($changes[$path])) {
// TODO: The UI probably doesn't handle any of these cases with
// terrible elegance, but they are exceedingly rare.
$existing_type = $changes[$path]['changeType'];
if ($existing_type == DifferentialChangeType::TYPE_DELETE) {
// This change removes a file, replaces it with a directory, and then
// adds children of that directory. Mark it as a "change" instead,
// and make the type a directory.
$changes[$path]['fileType'] = DifferentialChangeType::FILE_DIRECTORY;
$changes[$path]['changeType'] = DifferentialChangeType::TYPE_CHANGE;
} else if ($existing_type == DifferentialChangeType::TYPE_MOVE_AWAY ||
$existing_type == DifferentialChangeType::TYPE_MULTICOPY) {
// This change moves or copies a file, replaces it with a directory,
// and then adds children to that directory. Mark it as "copy away"
// instead of whatever it was, and make the type a directory.
$changes[$path]['fileType'] = DifferentialChangeType::FILE_DIRECTORY;
$changes[$path]['changeType']
= DifferentialChangeType::TYPE_COPY_AWAY;
} else if ($existing_type == DifferentialChangeType::TYPE_ADD) {
// This change removes a diretory and replaces it with a file. Mark
// it as "change" instead of "add".
$changes[$path]['changeType'] = DifferentialChangeType::TYPE_CHANGE;
}
continue;
}
$changes[$path] = array(
'path' => $path,
'isDirect' => ($status == DifferentialChangeType::TYPE_CHILD)
? false
: true,
'fileType' => DifferentialChangeType::FILE_DIRECTORY,
'changeType' => $status,
'targetPath' => null,
'targetCommitID' => null,
) + $common_attributes;
}
// TODO: use "hg diff --git" to figure out which files are symlinks.
foreach ($changes as $path => $change) {
if (empty($change['fileType'])) {
$changes[$path]['fileType'] = DifferentialChangeType::FILE_NORMAL;
}
}
$all_paths = array();
foreach ($changes as $path => $change) {
$all_paths[$path] = true;
if ($change['targetPath']) {
$all_paths[$change['targetPath']] = true;
}
}
$path_map = $this->lookupOrCreatePaths(array_keys($all_paths));
foreach ($changes as $key => $change) {
$changes[$key]['pathID'] = $path_map[$change['path']];
if ($change['targetPath']) {
$changes[$key]['targetPathID'] = $path_map[$change['targetPath']];
} else {
$changes[$key]['targetPathID'] = null;
}
}
$conn_w = $repository->establishConnection('w');
$changes_sql = array();
foreach ($changes as $change) {
$values = array(
(int)$change['repositoryID'],
(int)$change['pathID'],
(int)$change['commitID'],
$change['targetPathID']
? (int)$change['targetPathID']
: 'null',
$change['targetCommitID']
? (int)$change['targetCommitID']
: 'null',
(int)$change['changeType'],
(int)$change['fileType'],
(int)$change['isDirect'],
(int)$change['commitSequence'],
);
$changes_sql[] = '('.implode(', ', $values).')';
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE commitID = %d',
PhabricatorRepository::TABLE_PATHCHANGE,
$commit->getID());
foreach (array_chunk($changes_sql, 256) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, pathID, commitID, targetPathID, targetCommitID,
changeType, fileType, isDirect, commitSequence)
VALUES %Q',
PhabricatorRepository::TABLE_PATHCHANGE,
implode(', ', $sql_chunk));
}
-
- $this->finishParse();
}
private function mercurialPathExists(
PhabricatorRepository $repository,
$path,
$rev) {
if ($path == '/') {
return true;
}
// NOTE: For directories, this grabs the entire directory contents, but
// we don't have any more surgical approach available to us in Mercurial.
// We can't use "log" because it doesn't have enough information for us
// to figure out when a directory is deleted by a change.
list($err) = $repository->execLocalCommand(
'cat --rev %s -- %s > /dev/null',
$rev,
$path);
if ($err) {
return false;
} else {
return true;
}
}
}
diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
index 32bd2c7ea6..386458c7b0 100644
--- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
+++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
@@ -1,790 +1,779 @@
getDetail('remote-uri');
$svn_commit = $commit->getCommitIdentifier();
- $callsign = $repository->getCallsign();
- $full_name = 'r'.$callsign.$svn_commit;
- echo "Parsing {$full_name}...\n";
-
- if ($this->isBadCommit($full_name)) {
- echo "This commit is marked bad!\n";
- return;
- }
-
// Pull the top-level path changes out of "svn log". This is pretty
// straightforward; just parse the XML log.
$log = $this->getSVNLogXMLObject($uri, $svn_commit, $verbose = true);
$entry = $log->logentry[0];
if (!$entry->paths) {
// TODO: Explicitly mark this commit as broken elsewhere? This isn't
// supposed to happen but we have some cases like rE27 and rG935 in the
// Facebook repositories where things got all clowned up.
return;
}
$raw_paths = array();
foreach ($entry->paths->path as $path) {
$name = trim((string)$path);
$raw_paths[$name] = array(
'rawPath' => $name,
'rawTargetPath' => (string)$path['copyfrom-path'],
'rawChangeType' => (string)$path['action'],
'rawTargetCommit' => (string)$path['copyfrom-rev'],
);
}
$copied_or_moved_map = array();
$deleted_paths = array();
$add_paths = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawTargetPath']) {
$copied_or_moved_map[$raw_info['rawTargetPath']][] = $raw_info;
}
switch ($raw_info['rawChangeType']) {
case 'D':
$deleted_paths[$path] = $raw_info;
break;
case 'A':
case 'R':
$add_paths[$path] = $raw_info;
break;
}
}
// If a path was deleted, we need to look in the repository history to
// figure out where the former valid location for it is so we can figure out
// if it was a directory or not, among other things.
$lookup_here = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] != 'D') {
continue;
}
// If a change copies a directory and then deletes something from it,
// we need to look at the old location for information about the path, not
// the new location. This workflow is pretty ridiculous -- so much so that
// Trac gets it wrong. See Facebook rO6 for an example, if you happen to
// work at Facebook.
$parents = $this->expandAllParentPaths($path, $include_self = true);
foreach ($parents as $parent) {
if (isset($add_paths[$parent])) {
$relative_path = substr($path, strlen($parent));
$lookup_here[$path] = array(
'rawPath' => $add_paths[$parent]['rawTargetPath'].$relative_path,
'rawCommit' => $add_paths[$parent]['rawTargetCommit'],
);
continue 2;
}
}
// Otherwise we can just look at the previous revision.
$lookup_here[$path] = array(
'rawPath' => $path,
'rawCommit' => $svn_commit - 1,
);
}
$lookup = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] == 'D') {
$lookup[$path] = $lookup_here[$path];
} else {
// For everything that wasn't deleted, we can just look it up directly.
$lookup[$path] = array(
'rawPath' => $path,
'rawCommit' => $svn_commit,
);
}
}
$effects = array();
$path_file_types = $this->lookupPathFileTypes($repository, $lookup);
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] == 'D' &&
$path_file_types[$path] == DifferentialChangeType::FILE_DIRECTORY) {
// Bad. Child paths aren't enumerated in "svn log" so we need
// to go fishing.
$list = $this->lookupRecursiveFileList(
$repository,
$lookup[$path]);
foreach ($list as $deleted_path => $path_file_type) {
$deleted_path = rtrim($path.'/'.$deleted_path, '/');
if (!empty($raw_paths[$deleted_path])) {
// We somehow learned about this deletion explicitly?
// TODO: Unclear how this is possible.
continue;
}
$effect_type = DifferentialChangeType::TYPE_DELETE;
$effect_target_path = null;
if (isset($copied_or_moved_map[$deleted_path])) {
$effect_target_path = $path;
if (count($copied_or_moved_map[$deleted_path]) > 1) {
$effect_type = DifferentialChangeType::TYPE_MULTICOPY;
} else {
$effect_type = DifferentialChangeType::TYPE_MOVE_AWAY;
}
}
$effects[$deleted_path] = array(
'rawPath' => $deleted_path,
'rawTargetPath' => $effect_target_path,
'rawTargetCommit' => null,
'rawDirect' => true,
'changeType' => $effect_type,
'fileType' => $path_file_type,
);
$deleted_paths[$deleted_path] = $effects[$deleted_path];
}
}
}
$resolved_types = array();
$supplemental = array();
foreach ($raw_paths as $path => $raw_info) {
if (isset($resolved_types[$path])) {
$type = $resolved_types[$path];
} else {
switch ($raw_info['rawChangeType']) {
case 'D':
if (isset($copied_or_moved_map[$path])) {
if (count($copied_or_moved_map[$path]) > 1) {
$type = DifferentialChangeType::TYPE_MULTICOPY;
} else {
$type = DifferentialChangeType::TYPE_MOVE_AWAY;
}
} else {
$type = DifferentialChangeType::TYPE_DELETE;
}
break;
case 'A':
$copy_from = $raw_info['rawTargetPath'];
$copy_rev = $raw_info['rawTargetCommit'];
if (!strlen($copy_from)) {
$type = DifferentialChangeType::TYPE_ADD;
} else {
if (isset($deleted_paths[$copy_from])) {
$type = DifferentialChangeType::TYPE_MOVE_HERE;
$other_type = DifferentialChangeType::TYPE_MOVE_AWAY;
} else {
$type = DifferentialChangeType::TYPE_COPY_HERE;
$other_type = DifferentialChangeType::TYPE_COPY_AWAY;
}
$source_file_type = $this->lookupPathFileType(
$repository,
$copy_from,
array(
'rawPath' => $copy_from,
'rawCommit' => $copy_rev,
));
if ($source_file_type == DifferentialChangeType::FILE_DELETED) {
throw new Exception(
"Something is wrong; source of a copy must exist.");
}
if ($source_file_type != DifferentialChangeType::FILE_DIRECTORY) {
if (isset($raw_paths[$copy_from]) ||
isset($effects[$copy_from])) {
break;
}
$effects[$copy_from] = array(
'rawPath' => $copy_from,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => $other_type,
'fileType' => $source_file_type,
);
} else {
// ULTRADISASTER. We've added a directory which was copied
// or moved from somewhere else. This is the most complex and
// ridiculous case.
$list = $this->lookupRecursiveFileList(
$repository,
array(
'rawPath' => $copy_from,
'rawCommit' => $copy_rev,
));
foreach ($list as $from_path => $from_file_type) {
$full_from = rtrim($copy_from.'/'.$from_path, '/');
$full_to = rtrim($path.'/'.$from_path, '/');
if (empty($raw_paths[$full_to])) {
$effects[$full_to] = array(
'rawPath' => $full_to,
'rawTargetPath' => $full_from,
'rawTargetCommit' => $copy_rev,
'rawDirect' => true,
'changeType' => $type,
'fileType' => $from_file_type,
);
} else {
// This means we picked the file up explicitly elsewhere.
// If the file as modified, SVN will drop the copy
// information. We need to restore it.
$supplemental[$full_to]['rawTargetPath'] = $full_from;
$supplemental[$full_to]['rawTargetCommit'] = $copy_rev;
if ($raw_paths[$full_to]['rawChangeType'] == 'M') {
$resolved_types[$full_to] = $type;
}
}
if (empty($raw_paths[$full_from]) &&
empty($effects[$full_from])) {
if ($other_type == DifferentialChangeType::TYPE_COPY_AWAY) {
// Add an indirect effect for the copied file, if we
// don't already have an entry for it (e.g., a separate
// change).
$effects[$full_from] = array(
'rawPath' => $full_from,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => $other_type,
'fileType' => $from_file_type,
);
}
}
}
}
}
break;
// This is "replaced", caused by "svn rm"-ing a file, putting another
// in its place, and then "svn add"-ing it. We do not distinguish
// between this and "M".
case 'R':
case 'M':
if (isset($copied_or_moved_map[$path])) {
$type = DifferentialChangeType::TYPE_COPY_AWAY;
} else {
$type = DifferentialChangeType::TYPE_CHANGE;
}
break;
}
}
$resolved_types[$path] = $type;
}
foreach ($raw_paths as $path => $raw_info) {
$raw_paths[$path]['changeType'] = $resolved_types[$path];
if (isset($supplemental[$path])) {
foreach ($supplemental[$path] as $key => $value) {
$raw_paths[$path][$key] = $value;
}
}
}
foreach ($raw_paths as $path => $raw_info) {
$effects[$path] = array(
'rawPath' => $path,
'rawTargetPath' => $raw_info['rawTargetPath'],
'rawTargetCommit' => $raw_info['rawTargetCommit'],
'rawDirect' => true,
'changeType' => $raw_info['changeType'],
'fileType' => $path_file_types[$path],
);
}
$parents = array();
foreach ($effects as $path => $effect) {
foreach ($this->expandAllParentPaths($path) as $parent_path) {
$parents[$parent_path] = true;
}
}
$parents = array_keys($parents);
foreach ($parents as $parent) {
if (isset($effects[$parent])) {
continue;
}
$effects[$parent] = array(
'rawPath' => $parent,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => DifferentialChangeType::TYPE_CHILD,
'fileType' => DifferentialChangeType::FILE_DIRECTORY,
);
}
$lookup_paths = array();
foreach ($effects as $effect) {
$lookup_paths[$effect['rawPath']] = true;
if ($effect['rawTargetPath']) {
$lookup_paths[$effect['rawTargetPath']] = true;
}
}
$lookup_paths = array_keys($lookup_paths);
$lookup_commits = array();
foreach ($effects as $effect) {
if ($effect['rawTargetCommit']) {
$lookup_commits[$effect['rawTargetCommit']] = true;
}
}
$lookup_commits = array_keys($lookup_commits);
$path_map = $this->lookupOrCreatePaths($lookup_paths);
$commit_map = $this->lookupSvnCommits($repository, $lookup_commits);
$this->writeChanges($repository, $commit, $effects, $path_map, $commit_map);
$this->writeBrowse($repository, $commit, $effects, $path_map);
-
- $this->finishParse();
}
private function writeChanges(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,
array $effects,
array $path_map,
array $commit_map) {
$conn_w = $repository->establishConnection('w');
$sql = array();
foreach ($effects as $effect) {
$sql[] = qsprintf(
$conn_w,
'(%d, %d, %d, %nd, %nd, %d, %d, %d, %d)',
$repository->getID(),
$path_map[$effect['rawPath']],
$commit->getID(),
$effect['rawTargetPath']
? $path_map[$effect['rawTargetPath']]
: null,
$effect['rawTargetCommit']
? $commit_map[$effect['rawTargetCommit']]
: null,
$effect['changeType'],
$effect['fileType'],
$effect['rawDirect']
? 1
: 0,
$commit->getCommitIdentifier());
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE commitID = %d',
PhabricatorRepository::TABLE_PATHCHANGE,
$commit->getID());
foreach (array_chunk($sql, 512) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, pathID, commitID, targetPathID, targetCommitID,
changeType, fileType, isDirect, commitSequence)
VALUES %Q',
PhabricatorRepository::TABLE_PATHCHANGE,
implode(', ', $sql_chunk));
}
}
private function writeBrowse(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,
array $effects,
array $path_map) {
$conn_w = $repository->establishConnection('w');
$sql = array();
foreach ($effects as $effect) {
$type = $effect['changeType'];
if (!$effect['rawDirect']) {
if ($type == DifferentialChangeType::TYPE_COPY_AWAY) {
// Don't write COPY_AWAY to the filesystem table if it isn't a direct
// event.
continue;
}
if ($type == DifferentialChangeType::TYPE_CHILD) {
// Don't write CHILD to the filesystem table. Although doing these
// writes has the nice property of letting you see when a directory's
// contents were last changed, it explodes the table tremendously
// and makes Diffusion far slower.
continue;
}
}
if ($effect['rawPath'] == '/') {
// Don't write any events on '/' to the filesystem table; in
// particular, it doesn't have a meaningful parentID.
continue;
}
$existed = !DifferentialChangeType::isDeleteChangeType($type);
$sql[] = qsprintf(
$conn_w,
'(%d, %d, %d, %d, %d, %d)',
$repository->getID(),
$path_map[$this->getParentPath($effect['rawPath'])],
$commit->getCommitIdentifier(),
$path_map[$effect['rawPath']],
$existed
? 1
: 0,
$effect['fileType']);
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE repositoryID = %d AND svnCommit = %d',
PhabricatorRepository::TABLE_FILESYSTEM,
$repository->getID(),
$commit->getCommitIdentifier());
foreach (array_chunk($sql, 512) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, parentID, svnCommit, pathID, existed, fileType)
VALUES %Q',
PhabricatorRepository::TABLE_FILESYSTEM,
implode(', ', $sql_chunk));
}
}
private function lookupSvnCommits(
PhabricatorRepository $repository,
array $commits) {
if (!$commits) {
return array();
}
$commit_table = new PhabricatorRepositoryCommit();
$commit_data = queryfx_all(
$commit_table->establishConnection('w'),
'SELECT id, commitIdentifier FROM %T
WHERE repositoryID = %d AND commitIdentifier in (%Ls)',
$commit_table->getTableName(),
$repository->getID(),
$commits);
$commit_map = ipull($commit_data, 'id', 'commitIdentifier');
$need = array();
foreach ($commits as $commit) {
if (empty($commit_map[$commit])) {
$need[] = $commit;
}
}
// If we are parsing a Subversion repository and have been configured to
// import only some subdirectory of it, we may find commits which reference
// other foreign commits outside of the directory (for instance, because of
// a move or copy). Rather than trying to execute full parses on them, just
// create stub commits and identify the stubs as foreign commits.
if ($need) {
$subpath = $repository->getDetail('svn-subpath');
if (!$subpath) {
$commits = implode(', ', $need);
throw new Exception(
"Missing commits ({$need}) in a SVN repository which is not ".
"configured for subdirectory-only parsing!");
}
foreach ($need as $foreign_commit) {
$commit = new PhabricatorRepositoryCommit();
$commit->setRepositoryID($repository->getID());
$commit->setCommitIdentifier($foreign_commit);
$commit->setEpoch(0);
$commit->save();
$data = new PhabricatorRepositoryCommitData();
$data->setCommitID($commit->getID());
$data->setAuthorName('');
$data->setCommitMessage('');
$data->setCommitDetails(
array(
'foreign-svn-stub' => true,
// Denormalize this to make it easier to debug cases where someone
// did half a parse and then changed the subdirectory or something
// like that.
'svn-subpath' => $subpath,
));
$data->save();
$commit_map[$foreign_commit] = $commit->getID();
}
}
return $commit_map;
}
private function lookupPathFileType(
PhabricatorRepository $repository,
$path,
array $path_info) {
$result = $this->lookupPathFileTypes(
$repository,
array(
$path => $path_info,
));
return $result[$path];
}
private function lookupPathFileTypes(
PhabricatorRepository $repository,
array $paths) {
$result_map = array();
$repository_uri = $repository->getDetail('remote-uri');
if (isset($paths['/'])) {
$result_map['/'] = DifferentialChangeType::FILE_DIRECTORY;
unset($paths['/']);
}
$parents = array();
$path_mapping = array();
foreach ($paths as $path => $lookup) {
$parent = dirname($lookup['rawPath']);
$parent = ltrim($parent, '/');
$parent = $this->encodeSVNPath($parent);
$parent = $repository_uri.$parent.'@'.$lookup['rawCommit'];
$parent = escapeshellarg($parent);
$parents[$parent] = true;
$path_mapping[$parent][] = dirname($path);
}
// Reverse this list so we can pop $path_mapping, as that's more efficient
// than shifting it. We need to associate these maps positionally because
// a change can copy the same source path from multiple revisions via
// "svn cp path@1 a; svn cp path@2 b;" and the XML output gives us no way
// to distinguish which revision we're looking at except based on its
// position in the document.
$all_paths = array_reverse(array_keys($parents));
foreach (array_chunk($all_paths, 64) as $path_chunk) {
list($raw_xml) = $repository->execxRemoteCommand(
'--xml ls %C',
implode(' ', $path_chunk));
$xml = new SimpleXMLElement($raw_xml);
foreach ($xml->list as $list) {
$list_path = (string)$list['path'];
// SVN is a big mess. See Facebook rG8 (a revision which adds files
// with spaces in their names) for an example.
$list_path = rawurldecode($list_path);
if ($list_path == $repository_uri) {
$base = '/';
} else {
$base = substr($list_path, strlen($repository_uri));
}
$mapping = array_pop($path_mapping);
foreach ($list->entry as $entry) {
$val = $this->getFileTypeFromSVNKind($entry['kind']);
foreach ($mapping as $base_path) {
// rtrim() causes us to handle top-level directories correctly.
$key = rtrim($base_path, '/').'/'.$entry->name;
$result_map[$key] = $val;
}
}
}
}
foreach ($paths as $path => $lookup) {
if (empty($result_map[$path])) {
$result_map[$path] = DifferentialChangeType::FILE_DELETED;
}
}
return $result_map;
}
private function encodeSVNPath($path) {
$path = rawurlencode($path);
$path = str_replace('%2F', '/', $path);
return $path;
}
private function getFileTypeFromSVNKind($kind) {
$kind = (string)$kind;
switch ($kind) {
case 'dir': return DifferentialChangeType::FILE_DIRECTORY;
case 'file': return DifferentialChangeType::FILE_NORMAL;
default:
throw new Exception("Unknown SVN file kind '{$kind}'.");
}
}
private function lookupRecursiveFileList(
PhabricatorRepository $repository,
array $info) {
$path = $info['rawPath'];
$rev = $info['rawCommit'];
$path = $this->encodeSVNPath($path);
$hashkey = md5($repository->getDetail('remote-uri').$path.'@'.$rev);
// This method is quite horrible. The underlying challenge is that some
// commits in the Facebook repository are enormous, taking multiple hours
// to 'ls -R' out of the repository and producing XML files >1GB in size.
// If we try to SimpleXML them, the object exhausts available memory on a
// 64G machine. Instead, cache the XML output and then parse it line by line
// to limit space requirements.
$cache_loc = sys_get_temp_dir().'/diffusion.'.$hashkey.'.svnls';
if (!Filesystem::pathExists($cache_loc)) {
$tmp = new TempFile();
$repository->execxRemoteCommand(
'--xml ls -R %s%s@%d > %s',
$repository->getDetail('remote-uri'),
$path,
$rev,
$tmp);
execx(
'mv %s %s',
$tmp,
$cache_loc);
}
$map = $this->parseRecursiveListFileData($cache_loc);
Filesystem::remove($cache_loc);
return $map;
}
private function parseRecursiveListFileData($file_path) {
$map = array();
$mode = 'xml';
$done = false;
$entry = null;
foreach (new LinesOfALargeFile($file_path) as $lno => $line) {
switch ($mode) {
case 'entry':
if ($line == '') {
$entry = implode('', $entry);
$pattern = '@^\s+kind="(file|dir)">'.
'(.*?)'.
'((.*?))?@';
$matches = null;
if (!preg_match($pattern, $entry, $matches)) {
throw new Exception("Unable to parse entry!");
}
$map[html_entity_decode($matches[2])] =
$this->getFileTypeFromSVNKind($matches[1]);
$mode = 'entry-or-end';
} else {
$entry[] = $line;
}
break;
case 'entry-or-end':
if ($line == '') {
$done = true;
break 2;
} else if ($line == ' or /';
if (!preg_match($expect, $line)) {
throw new Exception("Expected '{$expect}', got {$line}.");
}
$mode = 'list';
break;
case 'list':
$expect = '';
if ($line !== $expect) {
throw new Exception("Expected '{$expect}', got {$line}.");
}
$mode = 'list1';
break;
case 'list1':
$expect = '= 1) {
array_pop($parts);
$parents[] = '/'.implode('/', $parts);
}
return $parents;
}
}