Changeset View
Changeset View
Standalone View
Standalone View
src/parser/ArcanistDiffParser.php
Show First 20 Lines • Show All 282 Lines • ▼ Show 20 Lines | do { | ||||
'commit 59bcc3ad6775562f845953cf01624225', | 'commit 59bcc3ad6775562f845953cf01624225', | ||||
'diff --git', | 'diff --git', | ||||
'--- filename', | '--- filename', | ||||
'diff -r')); | 'diff -r')); | ||||
} | } | ||||
if (isset($match['type'])) { | if (isset($match['type'])) { | ||||
if ($match['type'] == 'diff --git') { | if ($match['type'] == 'diff --git') { | ||||
list($old, $new) = self::splitGitDiffPaths($match['oldnew']); | $filename = self::extractGitCommonFilename($match['oldnew']); | ||||
$match['old'] = $old; | if ($filename !== null) { | ||||
epriestley: By convention, prefer `$filename !== null` over `isset($filename)` to test for null. | |||||
Not Done Inline Actions:nod: I was patterning off of the other issets in the area. alexmv: :nod: I was patterning off of the other `isset`s in the area. | |||||
$match['cur'] = $new; | $match['old'] = $filename; | ||||
$match['cur'] = $filename; | |||||
} | |||||
} | } | ||||
} | } | ||||
$change = $this->buildChange(idx($match, 'cur')); | $change = $this->buildChange(idx($match, 'cur')); | ||||
if (isset($match['old'])) { | if (isset($match['old'])) { | ||||
$change->setOldPath($match['old']); | $change->setOldPath($match['old']); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,001 Lines • ▼ Show 20 Lines | foreach ($filedata as $path => $data) { | ||||
$imagechanges[$path]->setCurrentFileData($data); | $imagechanges[$path]->setCurrentFileData($data); | ||||
} | } | ||||
$this->changes = $changes; | $this->changes = $changes; | ||||
} | } | ||||
/** | /** | ||||
* Strip prefixes off paths from `git diff`. By default git uses a/ and b/, | |||||
* but you can set `diff.mnemonicprefix` to get a different set of prefixes, | |||||
* or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to | |||||
* other arbitrary values. | |||||
* | |||||
* We strip the default and mnemonic prefixes, and trust the user knows what | |||||
* they're doing in the other cases. | |||||
* | |||||
* @param string Path to strip. | |||||
* @return string Stripped path. | |||||
*/ | |||||
public static function stripGitPathPrefix($path) { | |||||
static $regex; | |||||
if ($regex === null) { | |||||
$prefixes = array( | |||||
// These are the defaults. | |||||
'a/', | |||||
'b/', | |||||
// These show up when you set "diff.mnemonicprefix". | |||||
'i/', | |||||
'c/', | |||||
'w/', | |||||
'o/', | |||||
'1/', | |||||
'2/', | |||||
); | |||||
foreach ($prefixes as $key => $prefix) { | |||||
$prefixes[$key] = preg_quote($prefix, '@'); | |||||
} | |||||
$regex = '@^('.implode('|', $prefixes).')@S'; | |||||
} | |||||
return preg_replace($regex, '', $path); | |||||
} | |||||
/** | |||||
* Split the paths on a "diff --git" line into old and new paths. This | * Split the paths on a "diff --git" line into old and new paths. This | ||||
* is difficult because they may be ambiguous if the files contain spaces. | * is difficult because they may be ambiguous if the files contain spaces. | ||||
* | * | ||||
* @param string Text from a diff line after "diff --git ". | * @param string Text from a diff line after "diff --git ". | ||||
* @return pair<string, string> Old and new paths. | * @return pair<string, string> Old and new paths. | ||||
*/ | */ | ||||
public static function splitGitDiffPaths($paths) { | public static function extractGitCommonFilename($paths) { | ||||
Not Done Inline ActionsThis is kind of a junk method name now, but ehh. epriestley: This is kind of a junk method name now, but ehh. | |||||
Not Done Inline ActionsRenamed. alexmv: Renamed. | |||||
$matches = null; | $matches = null; | ||||
$paths = rtrim($paths, "\r\n"); | $paths = rtrim($paths, "\r\n"); | ||||
$patterns = array( | // Try the exact same string twice in a row separated by a | ||||
// Try quoted paths, used for unicode filenames or filenames with quotes. | // space, with an optional prefix. This can hit a false | ||||
'@^(?P<old>"(?:\\\\.|[^"\\\\]+)+") (?P<new>"(?:\\\\.|[^"\\\\]+)+")$@', | // positive for moves from files like "old file old" to "file", | ||||
// but such a cases will be caught by the "rename from" / | |||||
// Try paths without spaces. | // "rename to" lines. | ||||
'@^(?P<old>[^ ]+) (?P<new>[^ ]+)$@', | $prefix = '(?:[^/]+/)?'; | ||||
$pattern = | |||||
// Try paths with well-known prefixes. | "@^(?P<old>(?P<oldq>\"?){$prefix}(?P<common>.+)\\k<oldq>)" | ||||
'@^(?P<old>[abicwo12]/.*) (?P<new>[abicwo12]/.*)$@', | ." " | ||||
."(?P<new>(?P<newq>\"?){$prefix}\\k<common>\\k<newq>)$@"; | |||||
Not Done Inline ActionsBy convention, prefer the more explicit "{$var}" over "$var" when embedding variables in strings. This should be lint-enforced but T11081 is trickier to resolve than most lint stuff is. epriestley: By convention, prefer the more explicit `"{$var}"` over `"$var"` when embedding variables in… | |||||
Not Done Inline ActionsSure; fixed, and noted for future changes. alexmv: Sure; fixed, and noted for future changes. | |||||
// Try the exact same string twice in a row separated by a space. | |||||
// This can hit a false positive for moves from files like "old file old" | if (!preg_match($pattern, $paths, $matches)) { | ||||
// to "file", but such a case combined with custom diff prefixes is | // A rename or some form; return null for now, and let the | ||||
// incredibly obscure. | // "rename from" / "rename to" lines fix it up. | ||||
'@^(?P<old>.*) (?P<new>\\1)$@', | return null; | ||||
); | |||||
foreach ($patterns as $pattern) { | |||||
if (preg_match($pattern, $paths, $matches)) { | |||||
break; | |||||
} | |||||
} | |||||
if (!$matches) { | |||||
throw new Exception( | |||||
pht( | |||||
"Input diff contains ambiguous line '%s'. This line is ambiguous ". | |||||
"because there are spaces in the file names, so the parser can not ". | |||||
"determine where the file names begin and end. To resolve this ". | |||||
"ambiguity, use standard prefixes ('a/' and 'b/') when ". | |||||
"generating diffs.", | |||||
"diff --git {$paths}")); | |||||
} | } | ||||
$old = $matches['old']; | // Use the common subpart. There may be ambiguity here: "src/file | ||||
$old = self::unescapeFilename($old); | // dst/file" may _either_ be a prefix-less move, or a change with | ||||
$old = self::stripGitPathPrefix($old); | // two custom prefixes. We assume it is the latter; if it is a | ||||
// rename, diff parsing will update based on the "rename from" / | |||||
$new = $matches['new']; | // "rename to" lines. | ||||
// This re-assembles with the differing prefixes removed, but the | |||||
// quoting from the original. Necessary so we know if we should | |||||
// unescape characters from the common string. | |||||
$new = $matches['newq'].$matches['common'].$matches['newq']; | |||||
$new = self::unescapeFilename($new); | $new = self::unescapeFilename($new); | ||||
$new = self::stripGitPathPrefix($new); | |||||
return array($old, $new); | return $new; | ||||
} | } | ||||
/** | /** | ||||
* Strip the header and footer off a `git-format-patch` diff. | * Strip the header and footer off a `git-format-patch` diff. | ||||
* | * | ||||
* Returns a parseable normal diff and a textual commit message. | * Returns a parseable normal diff and a textual commit message. | ||||
*/ | */ | ||||
Show All 32 Lines |
By convention, prefer $filename !== null over isset($filename) to test for null.