Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15284982
D16405.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Referenced Files
None
Subscribers
None
D16405.diff
View Options
diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php
--- a/src/parser/ArcanistDiffParser.php
+++ b/src/parser/ArcanistDiffParser.php
@@ -288,9 +288,11 @@
if (isset($match['type'])) {
if ($match['type'] == 'diff --git') {
- list($old, $new) = self::splitGitDiffPaths($match['oldnew']);
- $match['old'] = $old;
- $match['cur'] = $new;
+ $filename = self::extractGitCommonFilename($match['oldnew']);
+ if ($filename !== null) {
+ $match['old'] = $filename;
+ $match['cur'] = $filename;
+ }
}
}
@@ -1308,99 +1310,46 @@
/**
- * Strip prefixes off paths from `git diff`. By default git uses a/ and b/,
- * but you can set `diff.mnemonicprefix` to get a different set of prefixes,
- * or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to
- * other arbitrary values.
- *
- * We strip the default and mnemonic prefixes, and trust the user knows what
- * they're doing in the other cases.
- *
- * @param string Path to strip.
- * @return string Stripped path.
- */
- public static function stripGitPathPrefix($path) {
-
- static $regex;
- if ($regex === null) {
- $prefixes = array(
- // These are the defaults.
- 'a/',
- 'b/',
-
- // These show up when you set "diff.mnemonicprefix".
- 'i/',
- 'c/',
- 'w/',
- 'o/',
- '1/',
- '2/',
- );
-
- foreach ($prefixes as $key => $prefix) {
- $prefixes[$key] = preg_quote($prefix, '@');
- }
- $regex = '@^('.implode('|', $prefixes).')@S';
- }
-
- return preg_replace($regex, '', $path);
- }
-
-
- /**
* Split the paths on a "diff --git" line into old and new paths. This
* is difficult because they may be ambiguous if the files contain spaces.
*
* @param string Text from a diff line after "diff --git ".
* @return pair<string, string> Old and new paths.
*/
- public static function splitGitDiffPaths($paths) {
+ public static function extractGitCommonFilename($paths) {
$matches = null;
$paths = rtrim($paths, "\r\n");
- $patterns = array(
- // Try quoted paths, used for unicode filenames or filenames with quotes.
- '@^(?P<old>"(?:\\\\.|[^"\\\\]+)+") (?P<new>"(?:\\\\.|[^"\\\\]+)+")$@',
-
- // Try paths without spaces.
- '@^(?P<old>[^ ]+) (?P<new>[^ ]+)$@',
-
- // Try paths with well-known prefixes.
- '@^(?P<old>[abicwo12]/.*) (?P<new>[abicwo12]/.*)$@',
-
- // Try the exact same string twice in a row separated by a space.
- // This can hit a false positive for moves from files like "old file old"
- // to "file", but such a case combined with custom diff prefixes is
- // incredibly obscure.
- '@^(?P<old>.*) (?P<new>\\1)$@',
- );
-
- foreach ($patterns as $pattern) {
- if (preg_match($pattern, $paths, $matches)) {
- break;
- }
- }
-
- if (!$matches) {
- throw new Exception(
- pht(
- "Input diff contains ambiguous line '%s'. This line is ambiguous ".
- "because there are spaces in the file names, so the parser can not ".
- "determine where the file names begin and end. To resolve this ".
- "ambiguity, use standard prefixes ('a/' and 'b/') when ".
- "generating diffs.",
- "diff --git {$paths}"));
+ // Try the exact same string twice in a row separated by a
+ // space, with an optional prefix. This can hit a false
+ // positive for moves from files like "old file old" to "file",
+ // but such a cases will be caught by the "rename from" /
+ // "rename to" lines.
+ $prefix = '(?:[^/]+/)?';
+ $pattern =
+ "@^(?P<old>(?P<oldq>\"?){$prefix}(?P<common>.+)\\k<oldq>)"
+ ." "
+ ."(?P<new>(?P<newq>\"?){$prefix}\\k<common>\\k<newq>)$@";
+
+ if (!preg_match($pattern, $paths, $matches)) {
+ // A rename or some form; return null for now, and let the
+ // "rename from" / "rename to" lines fix it up.
+ return null;
}
- $old = $matches['old'];
- $old = self::unescapeFilename($old);
- $old = self::stripGitPathPrefix($old);
+ // Use the common subpart. There may be ambiguity here: "src/file
+ // dst/file" may _either_ be a prefix-less move, or a change with
+ // two custom prefixes. We assume it is the latter; if it is a
+ // rename, diff parsing will update based on the "rename from" /
+ // "rename to" lines.
- $new = $matches['new'];
+ // This re-assembles with the differing prefixes removed, but the
+ // quoting from the original. Necessary so we know if we should
+ // unescape characters from the common string.
+ $new = $matches['newq'].$matches['common'].$matches['newq'];
$new = self::unescapeFilename($new);
- $new = self::stripGitPathPrefix($new);
- return array($old, $new);
+ return $new;
}
diff --git a/src/parser/__tests__/ArcanistDiffParserTestCase.php b/src/parser/__tests__/ArcanistDiffParserTestCase.php
--- a/src/parser/__tests__/ArcanistDiffParserTestCase.php
+++ b/src/parser/__tests__/ArcanistDiffParserTestCase.php
@@ -535,7 +535,14 @@
$this->assertEqual(1, count($changes));
$change = head($changes);
$this->assertEqual(
- 'dst/file',
+ 'file',
+ $change->getCurrentPath());
+ break;
+ case 'custom-prefixes-edit.gitdiff':
+ $this->assertEqual(1, count($changes));
+ $change = head($changes);
+ $this->assertEqual(
+ 'file',
$change->getCurrentPath());
break;
case 'more-newlines.svndiff':
@@ -607,81 +614,111 @@
}
}
- public function testGitPrefixStripping() {
- static $tests = array(
- 'a/file.c' => 'file.c',
- 'b/file.c' => 'file.c',
- 'i/file.c' => 'file.c',
- 'c/file.c' => 'file.c',
- 'w/file.c' => 'file.c',
- 'o/file.c' => 'file.c',
- '1/file.c' => 'file.c',
- '2/file.c' => 'file.c',
- 'src/file.c' => 'src/file.c',
- 'file.c' => 'file.c',
- );
-
- foreach ($tests as $input => $expect) {
- $this->assertEqual(
- $expect,
- ArcanistDiffParser::stripGitPathPrefix($input),
- pht("Strip git prefix from '%s'.", $input));
- }
- }
-
- public function testGitPathSplitting() {
+ public function testGitCommonFilenameExtraction() {
static $tests = array(
- 'a/old.c b/new.c' => array('old.c', 'new.c'),
- "a/old.c b/new.c\n" => array('old.c', 'new.c'),
- "a/old.c b/new.c\r\n" => array('old.c', 'new.c'),
- 'old.c new.c' => array('old.c', 'new.c'),
- '1/old.c 2/new.c' => array('old.c', 'new.c'),
- '"a/\\"quotes1\\"" "b/\\"quotes2\\""' => array(
- '"quotes1"',
- '"quotes2"',
- ),
- '"a/\\"quotes and spaces1\\"" "b/\\"quotes and spaces2\\""' => array(
- '"quotes and spaces1"',
- '"quotes and spaces2"',
- ),
- '"a/\\342\\230\\2031" "b/\\342\\230\\2032"' => array(
- "\xE2\x98\x831",
- "\xE2\x98\x832",
- ),
- 'a/Core Data/old.c b/Core Data/new.c' => array(
- 'Core Data/old.c',
- 'Core Data/new.c',
- ),
- 'some file with spaces.c some file with spaces.c' => array(
- 'some file with spaces.c',
- 'some file with spaces.c',
- ),
+ 'a/filename.c b/filename.c' => 'filename.c',
+ "a/filename.c b/filename.c\n" => 'filename.c',
+ "a/filename.c b/filename.c\r\n" => 'filename.c',
+ 'filename.c filename.c' => 'filename.c',
+ '1/filename.c 2/filename.c' => 'filename.c',
+ '"a/\\"quotes\\"" "b/\\"quotes\\""' => '"quotes"',
+ '"a/\\"quotes and spaces\\"" "b/\\"quotes and spaces\\""' =>
+ '"quotes and spaces"',
+ '"a/\\342\\230\\203" "b/\\342\\230\\203"' =>
+ "\xE2\x98\x83",
+ 'a/Core Data/filename.c b/Core Data/filename.c' =>
+ 'Core Data/filename.c',
+ 'some file with spaces.c some file with spaces.c' =>
+ 'some file with spaces.c',
+ '"foo bar.c" foo bar.c' => 'foo bar.c',
+ '"a/foo bar.c" b/foo bar.c' => 'foo bar.c',
+ 'src/file dst/file' => 'file',
+
+ // Renames are handled by the "rename from ..." lines later in
+ // the diff, for simplicity of parsing; this is also how git
+ // itself handles it.
+ 'a/foo.c b/bar.c' => null,
+ 'a/foo bar.c b/baz troz.c' => null,
+ '"a/foo bar.c" b/baz troz.c' => null,
+ 'a/foo bar.c "b/baz troz.c"' => null,
+ '"a/foo bar.c" "b/baz troz.c"' => null,
+ 'filename file with spaces.c filename file with spaces.c' =>
+ 'filename file with spaces.c',
);
foreach ($tests as $input => $expect) {
- $result = ArcanistDiffParser::splitGitDiffPaths($input);
+ $result = ArcanistDiffParser::extractGitCommonFilename($input);
$this->assertEqual(
$expect,
$result,
pht('Split: %s', $input));
}
+ }
- static $ambiguous = array(
- 'old file with spaces.c new file with spaces.c',
- );
-
- foreach ($ambiguous as $input) {
- $caught = null;
- try {
- ArcanistDiffParser::splitGitDiffPaths($input);
- } catch (Exception $ex) {
- $caught = $ex;
- }
- $this->assertTrue(
- ($caught instanceof Exception),
- pht('Ambiguous: %s', $input));
- }
+ public function runSingleRename($diffline, $from, $to, $old, $new) {
+ $str = "diff --git $diffline\nsimilarity index 95%\n"
+ ."rename from $from\nrename to $to\n";
+ $parser = new ArcanistDiffParser();
+ $changes = $parser->parseDiff($str);
+ $this->assertTrue(
+ $changes !== null,
+ pht("Parsed:\n%s", $str));
+ $this->assertEqual(
+ $old == $new ? 1 : 2, count($changes),
+ pht("Parsed one change:\n%s", $str));
+ $change = reset($changes);
+ $this->assertEqual(
+ array($old, $new),
+ array($change->getOldPath(), $change->getCurrentPath()),
+ pht('Split: %s', $diffline));
}
+ public function testGitRenames() {
+ $this->runSingleRename('a/old.c b/new.c',
+ 'old.c', 'new.c',
+ 'old.c', 'new.c');
+ $this->runSingleRename('old.c new.c',
+ 'old.c', 'new.c',
+ 'old.c', 'new.c');
+ $this->runSingleRename('1/old.c 2/new.c',
+ 'old.c', 'new.c',
+ 'old.c', 'new.c');
+ $this->runSingleRename('from/file.c to/file.c',
+ 'from/file.c', 'to/file.c',
+ 'from/file.c', 'to/file.c');
+ $this->runSingleRename('"a/\\"quotes1\\"" "b/\\"quotes2\\""',
+ '"\\"quotes1\\""', '"\\"quotes2\\""',
+ '"quotes1"', '"quotes2"');
+ $this->runSingleRename('"a/\\"quotes spaces1\\"" "b/\\"quotes spaces2\\""',
+ '"\\"quotes spaces1\\""', '"\\"quotes spaces2\\""',
+ '"quotes spaces1"', '"quotes spaces2"');
+ $this->runSingleRename('"a/\\342\\230\\2031" "b/\\342\\230\\2032"',
+ '"\\342\\230\\2031"', '"\\342\\230\\2032"',
+ "\xE2\x98\x831", "\xE2\x98\x832");
+ $this->runSingleRename('a/Core Data/old.c b/Core Data/new.c',
+ 'Core Data/old.c', 'Core Data/new.c',
+ 'Core Data/old.c', 'Core Data/new.c');
+ $this->runSingleRename('file with spaces.c file with spaces.c',
+ 'file with spaces.c', 'file with spaces.c',
+ 'file with spaces.c', 'file with spaces.c');
+ $this->runSingleRename('a/non-quoted filename.c "b/quoted filename.c"',
+ 'non-quoted filename.c', '"quoted filename.c"',
+ 'non-quoted filename.c', 'quoted filename.c');
+ $this->runSingleRename('non-quoted filename.c "quoted filename.c"',
+ 'non-quoted filename.c', '"quoted filename.c"',
+ 'non-quoted filename.c', 'quoted filename.c');
+ $this->runSingleRename('"a/quoted filename.c" b/non quoted filename.c',
+ '"quoted filename.c"', 'non quoted filename.c',
+ 'quoted filename.c', 'non quoted filename.c');
+ $this->runSingleRename('"quoted filename.c" non-quoted filename.c',
+ '"quoted filename.c"', 'non-quoted filename.c',
+ 'quoted filename.c', 'non-quoted filename.c');
+ $this->runSingleRename('old file with spaces.c new file with spaces.c',
+ 'old file with spaces.c', 'new file with spaces.c',
+ 'old file with spaces.c', 'new file with spaces.c');
+ $this->runSingleRename('old file old file',
+ 'old file old', 'file',
+ 'old file old', 'file');
+ }
}
diff --git a/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff
new file mode 100644
--- /dev/null
+++ b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff
@@ -0,0 +1,11 @@
+diff --git src/file dst/file
+index 711edb6..a8cd7be 100644
+--- src/file
++++ dst/file
+@@ -5,5 +5,5 @@
+ some context
+ other context
+-old value
++new value
+ some context
+ other context
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Mar 5, 11:10 AM (13 h, 43 m)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7225284
Default Alt Text
D16405.diff (13 KB)
Attached To
Mode
D16405: Parse git renames with inconsistent quoting or custom prefixes
Attached
Detach File
Event Timeline
Log In to Comment