diff --git a/scripts/__init_script__.php b/scripts/__init_script__.php index 742f5dab..9f7486b6 100644 --- a/scripts/__init_script__.php +++ b/scripts/__init_script__.php @@ -1,108 +1,107 @@ addTranslations(array( 'Locally modified path(s) are not included in this revision:' => array( 'A locally modified path is not included in this revision:', 'Locally modified paths are not included in this revision:', ), 'They will NOT be committed. Commit this revision anyway?' => array( 'It will NOT be committed. Commit this revision anyway?', 'They will NOT be committed. Commit this revision anyway?', ), 'Revision includes changes to path(s) that do not exist:' => array( 'Revision includes changes to a path that does not exist:', 'Revision includes changes to paths that do not exist:', ), 'This diff includes file(s) which are not valid UTF-8 (they contain '. 'invalid byte sequences). You can either stop this workflow and fix '. 'these files, or continue. If you continue, these files will be '. 'marked as binary.' => array( 'This diff includes a file which is not valid UTF-8 (it has invalid '. 'byte sequences). You can either stop this workflow and fix it, or '. 'continue. If you continue, this file will be marked as binary.', 'This diff includes files which are not valid UTF-8 (they contain '. 'invalid byte sequences). You can either stop this workflow and fix '. 'these files, or continue. If you continue, these files will be '. 'marked as binary.', ), 'AFFECTED FILE(S)' => array('AFFECTED FILE', 'AFFECTED FILES'), 'Do you want to mark these files as binary and continue?' => array( 'Do you want to mark this file as binary and continue?', 'Do you want to mark these files as binary and continue?', ), 'Do you want to amend these files to the commit?' => array( 'Do you want to amend this file to the commit?', 'Do you want to amend these files to the commit?', ), 'Do you want to add these files to the commit?' => array( 'Do you want to add this file to the commit?', 'Do you want to add these files to the commit?', ), 'line(s)' => array('line', 'lines'), '%d test(s)' => array('%d test', '%d tests'), '%d assertion(s) passed.' => array( '%d assertion passed.', '%d assertions passed.', ), )); phutil_load_library(dirname(dirname(__FILE__)).'/src/'); diff --git a/src/configuration/ArcanistConfiguration.php b/src/configuration/ArcanistConfiguration.php index b5a89f1a..f8dbebe4 100644 --- a/src/configuration/ArcanistConfiguration.php +++ b/src/configuration/ArcanistConfiguration.php @@ -1,268 +1,270 @@ buildAllWorkflows(), $command); } public function buildAllWorkflows() { $workflows_by_name = array(); $workflows_by_class_name = id(new PhutilSymbolLoader()) ->setAncestorClass('ArcanistBaseWorkflow') ->loadObjects(); foreach ($workflows_by_class_name as $class => $workflow) { $name = $workflow->getWorkflowName(); if (isset($workflows_by_name[$name])) { $other = get_class($workflows_by_name[$name]); throw new Exception( "Workflows {$class} and {$other} both implement workflows named ". "{$name}."); } $workflows_by_name[$name] = $workflow; } return $workflows_by_name; } final public function isValidWorkflow($workflow) { return (bool)$this->buildWorkflow($workflow); } public function willRunWorkflow($command, ArcanistBaseWorkflow $workflow) { // This is a hook. } - public function didRunWorkflow($command, ArcanistBaseWorkflow $workflow, - $err) { + public function didRunWorkflow( + $command, + ArcanistBaseWorkflow $workflow, + $err) { + // This is a hook. } public function didAbortWorkflow($command, $workflow, Exception $ex) { // This is a hook. } public function getCustomArgumentsForCommand($command) { return array(); } final public function selectWorkflow( &$command, array &$args, ArcanistConfigurationManager $configuration_manager, PhutilConsole $console) { // First, try to build a workflow with the exact name provided. We always // pick an exact match, and do not allow aliases to override it. $workflow = $this->buildWorkflow($command); if ($workflow) { return $workflow; } // If the user has an alias, like 'arc alias dhelp diff help', look it up // and substitute it. We do this only after trying to resolve the workflow // normally to prevent you from doing silly things like aliasing 'alias' // to something else. $aliases = ArcanistAliasWorkflow::getAliases($configuration_manager); list($new_command, $args) = ArcanistAliasWorkflow::resolveAliases( $command, $this, $args, $configuration_manager); $full_alias = idx($aliases, $command, array()); $full_alias = implode(' ', $full_alias); // Run shell command aliases. if (ArcanistAliasWorkflow::isShellCommandAlias($new_command)) { $shell_cmd = substr($full_alias, 1); $console->writeLog( "[alias: 'arc %s' -> $ %s]", $command, $shell_cmd); if ($args) { $err = phutil_passthru('%C %Ls', $shell_cmd, $args); } else { $err = phutil_passthru('%C', $shell_cmd); } exit($err); } // Run arc command aliases. if ($new_command) { $workflow = $this->buildWorkflow($new_command); if ($workflow) { $console->writeLog( "[alias: 'arc %s' -> 'arc %s']\n", $command, $full_alias); $command = $new_command; return $workflow; } } $all = array_keys($this->buildAllWorkflows()); // We haven't found a real command or an alias, so try to locate a command // by unique prefix. $prefixes = $this->expandCommandPrefix($command, $all); if (count($prefixes) == 1) { $command = head($prefixes); return $this->buildWorkflow($command); } else if (count($prefixes) > 1) { $this->raiseUnknownCommand($command, $prefixes); } // We haven't found a real command, alias, or unique prefix. Try similar // spellings. $corrected = self::correctCommandSpelling($command, $all, 2); if (count($corrected) == 1) { $console->writeErr( pht( "(Assuming '%s' is the British spelling of '%s'.)", $command, head($corrected))."\n"); $command = head($corrected); return $this->buildWorkflow($command); } else if (count($corrected) > 1) { $this->raiseUnknownCommand($command, $corrected); } $this->raiseUnknownCommand($command); } private function raiseUnknownCommand($command, array $maybe = array()) { $message = pht("Unknown command '%s'. Try 'arc help'.", $command); if ($maybe) { $message .= "\n\n".pht('Did you mean:')."\n"; sort($maybe); foreach ($maybe as $other) { $message .= " ".$other."\n"; } } throw new ArcanistUsageException($message); } private function expandCommandPrefix($command, array $options) { $is_prefix = array(); foreach ($options as $option) { if (strncmp($option, $command, strlen($command)) == 0) { $is_prefix[$option] = true; } } return array_keys($is_prefix); } public static function correctCommandSpelling( $command, array $options, $max_distance) { // Adjust to the scaled edit costs we use below, so "2" roughly means // "2 edits". $max_distance = $max_distance * 3; // These costs are somewhat made up, but the theory is that it is far more // likely you will mis-strike a key ("lans" for "land") or press two keys // out of order ("alnd" for "land") than omit keys or press extra keys. $matrix = id(new PhutilEditDistanceMatrix()) ->setInsertCost(4) ->setDeleteCost(4) ->setReplaceCost(3) ->setTransposeCost(2); return self::correctSpelling($command, $options, $matrix, $max_distance); } public static function correctArgumentSpelling($command, array $options) { $max_distance = 1; // We are stricter with arguments - we allow only one inserted or deleted // character. It is mainly to handle cases like --no-lint versus --nolint // or --reviewer versus --reviewers. $matrix = id(new PhutilEditDistanceMatrix()) ->setInsertCost(1) ->setDeleteCost(1) ->setReplaceCost(10); return self::correctSpelling($command, $options, $matrix, $max_distance); } public static function correctSpelling( $input, array $options, PhutilEditDistanceMatrix $matrix, $max_distance) { $distances = array(); $inputv = str_split($input); foreach ($options as $option) { $optionv = str_split($option); $matrix->setSequences($optionv, $inputv); $distances[$option] = $matrix->getEditDistance(); } asort($distances); $best = min($max_distance, reset($distances)); foreach ($distances as $option => $distance) { if ($distance > $best) { unset($distances[$option]); } } // Before filtering, check if we have multiple equidistant matches and // return them if we do. This prevents us from, e.g., matching "alnd" with // both "land" and "amend", then dropping "land" for being too short, and // incorrectly completing to "amend". if (count($distances) > 1) { return array_keys($distances); } foreach ($distances as $option => $distance) { if (strlen($option) < $distance) { unset($distances[$option]); } } return array_keys($distances); } } diff --git a/src/configuration/ArcanistConfigurationManager.php b/src/configuration/ArcanistConfigurationManager.php index 38caf2e6..9d5ab534 100644 --- a/src/configuration/ArcanistConfigurationManager.php +++ b/src/configuration/ArcanistConfigurationManager.php @@ -1,340 +1,338 @@ workingCopy = $working_copy; } /* -( Get config )--------------------------------------------------------- */ const CONFIG_SOURCE_RUNTIME = 'runtime'; const CONFIG_SOURCE_LOCAL = 'local'; const CONFIG_SOURCE_PROJECT = 'project'; const CONFIG_SOURCE_USER = 'user'; const CONFIG_SOURCE_SYSTEM = 'system'; const CONFIG_SOURCE_DEFAULT = 'default'; public function getProjectConfig($key) { if ($this->workingCopy) { return $this->workingCopy->getProjectConfig($key); } return null; } public function getLocalConfig($key) { if ($this->workingCopy) { return $this->workingCopy->getLocalConfig($key); } return null; } public function getWorkingCopyIdentity() { return $this->workingCopy; } /** * Read a configuration directive from any available configuration source. * This includes the directive in local, user and system configuration in * addition to project configuration, and configuration provided as command * arguments ("runtime"). * The precedence is runtime > local > project > user > system * * @param key Key to read. * @param wild Default value if key is not found. * @return wild Value, or default value if not found. * * @task config */ public function getConfigFromAnySource($key, $default = null) { $all = $this->getConfigFromAllSources($key); return empty($all) ? $default : head($all); } /** * For the advanced case where you want customized configuration handling. * * Reads the configuration from all available sources, returning a map (array) * of results, with the source as key. Missing values will not be in the map, * so an empty array will be returned if no results are found. * * The map is ordered by the canonical sources precedence, which is: * runtime > local > project > user > system * * @param key Key to read * @return array Mapping of source => value read. Sources with no value are * not in the array. * * @task config */ public function getConfigFromAllSources($key) { $results = array(); $settings = new ArcanistSettings(); $pval = idx($this->runtimeConfig, $key); if ($pval !== null) { $results[self::CONFIG_SOURCE_RUNTIME] = $settings->willReadValue($key, $pval); } $pval = $this->getLocalConfig($key); if ($pval !== null) { $results[self::CONFIG_SOURCE_LOCAL] = $settings->willReadValue($key, $pval); } $pval = $this->getProjectConfig($key); if ($pval !== null) { $results[self::CONFIG_SOURCE_PROJECT] = $settings->willReadValue($key, $pval); } $user_config = $this->readUserArcConfig(); $pval = idx($user_config, $key); if ($pval !== null) { $results[self::CONFIG_SOURCE_USER] = $settings->willReadValue($key, $pval); } $system_config = $this->readSystemArcConfig(); $pval = idx($system_config, $key); if ($pval !== null) { $results[self::CONFIG_SOURCE_SYSTEM] = $settings->willReadValue($key, $pval); } $default_config = $this->readDefaultConfig(); if (array_key_exists($key, $default_config)) { $results[self::CONFIG_SOURCE_DEFAULT] = $default_config[$key]; } return $results; } /** * Sets a runtime config value that takes precedence over any static * config values. * * @param key Key to set. * @param value The value of the key. * * @task config */ public function setRuntimeConfig($key, $value) { $this->runtimeConfig[$key] = $value; return $this; } /* -( Read/write config )--------------------------------------------------- */ public function readLocalArcConfig() { if ($this->workingCopy) { return $this->workingCopy->readLocalArcConfig(); } return array(); } public function writeLocalArcConfig(array $config) { if ($this->workingCopy) { return $this->workingCopy->writeLocalArcConfig($config); } throw new Exception(pht('No working copy to write config to!')); } /** * This is probably not the method you're looking for; try * @{method:readUserArcConfig}. */ public function readUserConfigurationFile() { if ($this->userConfigCache === null) { $user_config = array(); $user_config_path = $this->getUserConfigurationFileLocation(); $console = PhutilConsole::getConsole(); if (Filesystem::pathExists($user_config_path)) { $console->writeLog( "%s\n", pht( 'Config: Reading user configuration file "%s"...', $user_config_path)); if (!phutil_is_windows()) { $mode = fileperms($user_config_path); if (!$mode) { throw new Exception( pht( 'Unable to read file permissions for "%s"!', $user_config_path)); } if ($mode & 0177) { // Mode should allow only owner access. $prompt = "File permissions on your ~/.arcrc are too open. ". "Fix them by chmod'ing to 600?"; if (!phutil_console_confirm($prompt, $default_no = false)) { throw new ArcanistUsageException( 'Set ~/.arcrc to file mode 600.'); } execx('chmod 600 %s', $user_config_path); // Drop the stat cache so we don't read the old permissions if // we end up here again. If we don't do this, we may prompt the user // to fix permissions multiple times. clearstatcache(); } } $user_config_data = Filesystem::readFile($user_config_path); try { $user_config = phutil_json_decode($user_config_data); } catch (PhutilJSONParserException $ex) { throw new PhutilProxyException( "Your '~/.arcrc' file is not a valid JSON file.". $ex); } } else { $console->writeLog( "%s\n", pht( 'Config: Did not find user configuration at "%s".', $user_config_path)); } $this->userConfigCache = $user_config; } return $this->userConfigCache; } /** * This is probably not the method you're looking for; try * @{method:writeUserArcConfig}. */ public function writeUserConfigurationFile($config) { $json_encoder = new PhutilJSON(); $json = $json_encoder->encodeFormatted($config); $path = $this->getUserConfigurationFileLocation(); Filesystem::writeFile($path, $json); if (!phutil_is_windows()) { execx('chmod 600 %s', $path); } } public function setUserConfigurationFileLocation($custom_arcrc) { if (!Filesystem::pathExists($custom_arcrc)) { throw new Exception( 'Custom arcrc file was specified, but it was not found!'); } $this->customArcrcFilename = $custom_arcrc; $this->userConfigCache = null; } public function getUserConfigurationFileLocation() { if (strlen($this->customArcrcFilename)) { return $this->customArcrcFilename; } if (phutil_is_windows()) { return getenv('APPDATA').'/.arcrc'; } else { return getenv('HOME').'/.arcrc'; } } public function readUserArcConfig() { return idx($this->readUserConfigurationFile(), 'config', array()); } public function writeUserArcConfig(array $options) { $config = $this->readUserConfigurationFile(); $config['config'] = $options; $this->writeUserConfigurationFile($config); } public function getSystemArcConfigLocation() { if (phutil_is_windows()) { return Filesystem::resolvePath( 'Phabricator/Arcanist/config', getenv('ProgramData')); } else { return '/etc/arcconfig'; } } public function readSystemArcConfig() { static $system_config; if ($system_config === null) { $system_config = array(); $system_config_path = $this->getSystemArcConfigLocation(); $console = PhutilConsole::getConsole(); if (Filesystem::pathExists($system_config_path)) { $console->writeLog( "%s\n", pht( 'Config: Reading system configuration file "%s"...', $system_config_path)); $file = Filesystem::readFile($system_config_path); try { $system_config = phutil_json_decode($file); } catch (PhutilJSONParserException $ex) { throw new PhutilProxyException( pht( "Your '%s' file is not a valid JSON file.", $system_config_path), $ex); } } else { $console->writeLog( "%s\n", pht( 'Config: Did not find system configuration at "%s".', $system_config_path)); } } return $system_config; } public function applyRuntimeArcConfig($args) { $arcanist_settings = new ArcanistSettings(); $options = $args->getArg('config'); foreach ($options as $opt) { $opt_config = preg_split('/=/', $opt, 2); if (count($opt_config) !== 2) { throw new ArcanistUsageException("Argument was '{$opt}', but must be ". "'name=value'. For example, history.immutable=true"); } list($key, $value) = $opt_config; $value = $arcanist_settings->willWriteValue($key, $value); $this->setRuntimeConfig($key, $value); } return $this->runtimeConfig; } public function readDefaultConfig() { $settings = new ArcanistSettings(); return $settings->getDefaultSettings(); } } diff --git a/src/configuration/ArcanistSettings.php b/src/configuration/ArcanistSettings.php index a89ef574..180b3673 100644 --- a/src/configuration/ArcanistSettings.php +++ b/src/configuration/ArcanistSettings.php @@ -1,333 +1,329 @@ array( 'type' => 'string', 'help' => 'The URI of a Phabricator install to connect to by default, if '. 'arc is run in a project without a Phabricator URI or run outside '. 'of a project.', 'example' => '"http://phabricator.example.com/"', ), 'base' => array( 'type' => 'string', 'help' => 'Base commit ruleset to invoke when determining the start of a '. 'commit range. See "Arcanist User Guide: Commit Ranges" for '. 'details.', 'example' => '"arc:amended, arc:prompt"', ), 'load' => array( 'type' => 'list', 'legacy' => 'phutil_libraries', 'help' => 'A list of paths to phutil libraries that should be loaded at '. 'startup. This can be used to make classes available, like lint or '. 'unit test engines.', 'example' => '["/var/arc/customlib/src"]', 'default' => array(), ), 'repository.callsign' => array( 'type' => 'string', 'example' => '"X"', 'help' => pht( 'Associate the working copy with a specific Phabricator repository. '. 'Normally, arc can figure this association out on its own, but if '. 'your setup is unusual you can use this option to tell it what the '. 'desired value is.'), ), 'phabricator.uri' => array( 'type' => 'string', 'legacy' => 'conduit_uri', 'example' => '"https://phabricator.mycompany.com/"', 'help' => pht( 'Associates this working copy with a specific installation of '. 'Phabricator.'), ), 'project.name' => array( 'type' => 'string', 'legacy' => 'project_id', 'example' => '"arcanist"', 'help' => pht( 'Associates this working copy with a named Arcanist Project. '. 'This is primarily useful if you use SVN and have several different '. 'projects in the same repository.'), ), 'lint.engine' => array( 'type' => 'string', 'legacy' => 'lint_engine', 'help' => 'The name of a default lint engine to use, if no lint engine is '. 'specified by the current project.', 'example' => '"ExampleLintEngine"', ), 'unit.engine' => array( 'type' => 'string', 'legacy' => 'unit_engine', 'help' => 'The name of a default unit test engine to use, if no unit test '. 'engine is specified by the current project.', 'example' => '"ExampleUnitTestEngine"', ), 'arc.feature.start.default' => array( 'type' => 'string', 'help' => 'The name of the default branch to create the new feature branch '. 'off of.', 'example' => '"develop"', ), 'arc.land.onto.default' => array( 'type' => 'string', 'help' => 'The name of the default branch to land changes onto when '. '`arc land` is run.', 'example' => '"develop"', ), 'arc.land.update.default' => array( 'type' => 'string', 'help' => 'The default strategy to use when arc land updates the feature '. 'branch. Supports \'rebase\' and \'merge\' strategies.', 'example' => '"rebase"', ), 'arc.lint.cache' => array( 'type' => 'bool', 'help' => "Enable the lint cache by default. When enabled, 'arc lint' ". "attempts to use cached results if possible. Currently, the cache ". "is not always invalidated correctly and may cause 'arc lint' to ". "report incorrect results, particularly while developing linters. ". "This is probably worth enabling only if your linters are very slow.", 'example' => 'false', 'default' => false, ), 'history.immutable' => array( 'type' => 'bool', 'legacy' => 'immutable_history', 'help' => 'If true, arc will never change repository history (e.g., through '. 'amending or rebasing). Defaults to true in Mercurial and false in '. 'Git. This setting has no effect in Subversion.', 'example' => 'false', 'default' => false, ), 'editor' => array( 'type' => 'string', 'help' => "Command to use to invoke an interactive editor, like 'nano' or ". "'vim'. This setting overrides the EDITOR environmental variable.", 'example' => '"nano"', ), 'https.cabundle' => array( 'type' => 'string', 'help' => "Path to a custom CA bundle file to be used for arcanist's cURL ". "calls. This is used primarily when your conduit endpoint is ". "behind https signed by your organization's internal CA.", 'example' => 'support/yourca.pem' ), 'https.blindly-trust-domains' => array( 'type' => 'list', 'help' => 'List of domains to blindly trust SSL certificates for. '. 'Disables peer verification.', 'example' => '["secure.mycompany.com"]', 'default' => array(), ), 'browser' => array( 'type' => 'string', 'help' => 'Command to use to invoke a web browser.', 'example' => '"gnome-www-browser"', ), 'events.listeners' => array( 'type' => 'list', 'help' => 'List of event listener classes to install at startup.', 'example' => '["ExampleEventListener"]', 'default' => array(), ), 'http.basicauth.user' => array( 'type' => 'string', 'help' => 'Username to use for basic auth over http transports', 'example' => '"bob"', ), 'http.basicauth.pass' => array( 'type' => 'string', 'help' => 'Password to use for basic auth over http transports', 'example' => '"bobhasasecret"', ), 'arc.autostash' => array( 'type' => 'bool', 'help' => 'Whether arc should permit the automatic stashing of changes in '. - 'the working directory when requiring a clean working copy. '. + 'the working directory when requiring a clean working copy. '. 'This option should only be used when users understand how '. 'to restore their working directory from the local stash if '. 'an Arcanist operation causes an unrecoverable error.', 'example' => 'false', 'default' => false, ), ); } private function getOption($key) { return idx($this->getOptions(), $key, array()); } public function getAllKeys() { return array_keys($this->getOptions()); } public function getHelp($key) { return idx($this->getOption($key), 'help'); } public function getExample($key) { return idx($this->getOption($key), 'example'); } public function getType($key) { return idx($this->getOption($key), 'type', 'wild'); } public function getLegacyName($key) { return idx($this->getOption($key), 'legacy'); } public function getDefaultSettings() { $defaults = array(); foreach ($this->getOptions() as $key => $option) { if (array_key_exists('default', $option)) { $defaults[$key] = $option['default']; } } return $defaults; } public function willWriteValue($key, $value) { $type = $this->getType($key); switch ($type) { case 'bool': if (strtolower($value) === 'false' || strtolower($value) === 'no' || strtolower($value) === 'off' || $value === '' || $value === '0' || $value === 0 || $value === false) { $value = false; } else if (strtolower($value) === 'true' || strtolower($value) === 'yes' || strtolower($value) === 'on' || $value === '1' || $value === 1 || $value === true) { $value = true; } else { throw new ArcanistUsageException( "Type of setting '{$key}' must be boolean, like 'true' or ". "'false'."); } break; case 'list': if (is_array($value)) { break; } if (is_string($value)) { $list = json_decode($value, true); if (is_array($list)) { $value = $list; break; } } $list_example = '["apple", "banana", "cherry"]'; throw new ArcanistUsageException( "Type of setting '{$key}' must be list. You can specify a list ". "in JSON, like: {$list_example}"); case 'string': if (!is_scalar($value)) { throw new ArcanistUsageException( "Type of setting '{$key}' must be string."); } $value = (string)$value; break; case 'wild': break; } return $value; } public function willReadValue($key, $value) { $type = $this->getType($key); switch ($type) { case 'string': if (!is_string($value)) { throw new ArcanistUsageException( "Type of setting '{$key}' must be string."); } break; case 'bool': if ($value !== true && $value !== false) { throw new ArcanistUsageException( "Type of setting '{$key}' must be boolean."); } break; case 'list': if (!is_array($value)) { throw new ArcanistUsageException( "Type of setting '{$key}' must be list."); } break; case 'wild': break; } return $value; } public function formatConfigValueForDisplay($key, $value) { if ($value === false) { return 'false'; } if ($value === true) { return 'true'; } if ($value === null) { return 'null'; } if (is_string($value)) { return '"'.$value.'"'; } if (is_array($value)) { // TODO: Both json_encode() and PhutilJSON do a bad job with one-liners. // PhutilJSON splits them across a bunch of lines, while json_encode() // escapes all kinds of stuff like "/". It would be nice if PhutilJSON // had a mode for pretty one-liners. $value = json_encode($value); // json_encode() unnecessarily escapes "/" to prevent "" stuff, // optimistically unescape it for display to improve readability. $value = preg_replace('@(?assertCommandCompletion( array('land'), 'alnd', array('land', 'amend')); $this->assertCommandCompletion( array('branch'), 'brnach', array('branch', 'browse')); $this->assertCommandCompletion( array(), 'test', array('list', 'unit')); $this->assertCommandCompletion( array('list'), 'lists', array('list')); $this->assertCommandCompletion( array('diff'), 'dfif', array('diff')); $this->assertCommandCompletion( array('unit'), 'uint', array('unit', 'lint', 'list')); $this->assertCommandCompletion( array('list', 'lint'), 'nilt', array('unit', 'lint', 'list')); } private function assertCommandCompletion($expect, $input, $commands) { $result = ArcanistConfiguration::correctCommandSpelling( $input, $commands, 2); sort($result); sort($expect); $commands = implode(', ', $commands); $this->assertEqual( $expect, $result, "Correction of {$input} against: {$commands}"); } - public function testArgumentCompletion() { $this->assertArgumentCompletion( array('nolint'), 'no-lint', array('nolint', 'nounit')); $this->assertArgumentCompletion( array('reviewers'), 'reviewer', array('reviewers', 'cc')); $this->assertArgumentCompletion( array(), 'onlint', array('nolint')); $this->assertArgumentCompletion( array(), 'nolind', array('nolint')); } private function assertArgumentCompletion($expect, $input, $arguments) { $result = ArcanistConfiguration::correctArgumentSpelling( $input, $arguments); sort($result); sort($expect); $arguments = implode(', ', $arguments); $this->assertEqual( $expect, $result, "Correction of {$input} against: {$arguments}"); } } diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php index 4fdff267..2c89599e 100644 --- a/src/difference/ArcanistDiffUtils.php +++ b/src/difference/ArcanistDiffUtils.php @@ -1,256 +1,254 @@ '; $highlight_c = ''; $is_html = false; if ($str instanceof PhutilSafeHTML) { $is_html = true; $str = $str->getHTMLContent(); } $n = strlen($str); for ($i = 0; $i < $n; $i++) { if ($p == $e) { do { if (empty($intra_stack)) { $buf .= substr($str, $i); break 2; } $stack = array_shift($intra_stack); $s = $e; $e += $stack[1]; } while ($stack[0] == 0); } if (!$highlight && !$tag && !$ent && $p == $s) { $buf .= $highlight_o; $highlight = true; } if ($str[$i] == '<') { $tag = true; if ($highlight) { $buf .= $highlight_c; } } if (!$tag) { if ($str[$i] == '&') { $ent = true; } if ($ent && $str[$i] == ';') { $ent = false; } if (!$ent) { $p++; } } $buf .= $str[$i]; if ($tag && $str[$i] == '>') { $tag = false; if ($highlight) { $buf .= $highlight_o; } } if ($highlight && ($p == $e || $i == $n - 1)) { $buf .= $highlight_c; $highlight = false; } } if ($is_html) { return phutil_safe_html($buf); } return $buf; } private static function collapseIntralineRuns($runs) { $count = count($runs); for ($ii = 0; $ii < $count - 1; $ii++) { if ($runs[$ii][0] == $runs[$ii + 1][0]) { $runs[$ii + 1][1] += $runs[$ii][1]; unset($runs[$ii]); } } return array_values($runs); } public static function generateEditString(array $ov, array $nv, $max = 80) { return id(new PhutilEditDistanceMatrix()) ->setComputeString(true) ->setAlterCost(1 / ($max * 2)) ->setReplaceCost(2) ->setMaximumLength($max) ->setSequences($ov, $nv) ->getEditString(); } public static function computeIntralineEdits($o, $n) { if (preg_match('/[\x80-\xFF]/', $o.$n)) { $ov = phutil_utf8v_combined($o); $nv = phutil_utf8v_combined($n); $multibyte = true; } else { $ov = str_split($o); $nv = str_split($n); $multibyte = false; } $result = self::generateEditString($ov, $nv); // Smooth the string out, by replacing short runs of similar characters // with 'x' operations. This makes the result more readable to humans, since // there are fewer choppy runs of short added and removed substrings. do { $original = $result; $result = preg_replace( '/([xdi])(s{3})([xdi])/', '$1xxx$3', $result); $result = preg_replace( '/([xdi])(s{2})([xdi])/', '$1xx$3', $result); $result = preg_replace( '/([xdi])(s{1})([xdi])/', '$1x$3', $result); } while ($result != $original); // Now we have a character-based description of the edit. We need to // convert into a byte-based description. Walk through the edit string and // adjust each operation to reflect the number of bytes in the underlying // character. $o_pos = 0; $n_pos = 0; $result_len = strlen($result); $o_run = array(); $n_run = array(); $old_char_len = 1; $new_char_len = 1; for ($ii = 0; $ii < $result_len; $ii++) { $c = $result[$ii]; if ($multibyte) { $old_char_len = strlen($ov[$o_pos]); $new_char_len = strlen($nv[$n_pos]); } switch ($c) { case 's': case 'x': $byte_o = $old_char_len; $byte_n = $new_char_len; $o_pos++; $n_pos++; break; case 'i': $byte_o = 0; $byte_n = $new_char_len; $n_pos++; break; case 'd': $byte_o = $old_char_len; $byte_n = 0; $o_pos++; break; } if ($byte_o) { if ($c == 's') { $o_run[] = array(0, $byte_o); } else { $o_run[] = array(1, $byte_o); } } if ($byte_n) { if ($c == 's') { $n_run[] = array(0, $byte_n); } else { $n_run[] = array(1, $byte_n); } } } $o_run = self::collapseIntralineRuns($o_run); $n_run = self::collapseIntralineRuns($n_run); return array($o_run, $n_run); } } diff --git a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php index e07da6a4..d928e539 100644 --- a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php +++ b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php @@ -1,242 +1,242 @@ assertEqual( $test[2], ArcanistDiffUtils::generateEditString( str_split($test[0]), str_split($test[1])), "'{$test[0]}' vs '{$test[1]}'"); } $utf8_tests = array( array( 'GrumpyCat', "Grumpy\xE2\x98\x83at", 'ssssssxss', ), ); foreach ($tests as $test) { $this->assertEqual( $test[2], ArcanistDiffUtils::generateEditString( phutil_utf8v_combined($test[0]), phutil_utf8v_combined($test[1])), "'{$test[0]}' vs '{$test[1]}' (utf8)"); } } public function testGenerateUTF8IntralineDiff() { // Both Strings Empty. $left = ''; $right = ''; $result = array( - array(array(0, 0)), - array(array(0, 0)) - ); + array(array(0, 0)), + array(array(0, 0)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // Left String Empty. $left = ''; $right = "Grumpy\xE2\x98\x83at"; $result = array( - array(array(0, 0)), - array(array(0, 11)) - ); + array(array(0, 0)), + array(array(0, 11)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // Right String Empty. $left = "Grumpy\xE2\x98\x83at"; $right = ''; $result = array( - array(array(0, 11)), - array(array(0, 0)) - ); + array(array(0, 11)), + array(array(0, 0)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // Both Strings Same $left = "Grumpy\xE2\x98\x83at"; $right = "Grumpy\xE2\x98\x83at"; $result = array( - array(array(0, 11)), - array(array(0, 11)) - ); + array(array(0, 11)), + array(array(0, 11)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // Both Strings are different. $left = "Grumpy\xE2\x98\x83at"; $right = 'Smiling Dog'; $result = array( - array(array(1, 11)), - array(array(1, 11)) - ); + array(array(1, 11)), + array(array(1, 11)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // String with one difference in the middle. $left = 'GrumpyCat'; $right = "Grumpy\xE2\x98\x83at"; $result = array( - array(array(0, 6), array(1, 1), array(0, 2)), - array(array(0, 6), array(1, 3), array(0, 2)) - ); + array(array(0, 6), array(1, 1), array(0, 2)), + array(array(0, 6), array(1, 3), array(0, 2)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // Differences in middle, not connected to each other. $left = 'GrumpyCat'; $right = "Grumpy\xE2\x98\x83a\xE2\x98\x83t"; $result = array( - array(array(0, 6), array(1, 2), array(0, 1)), - array(array(0, 6), array(1, 7), array(0, 1)) - ); + array(array(0, 6), array(1, 2), array(0, 1)), + array(array(0, 6), array(1, 7), array(0, 1)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // String with difference at the beginning. $left = "GrumpyC\xE2\x98\x83t"; $right = "DrumpyC\xE2\x98\x83t"; $result = array( - array(array(1, 1), array(0, 10)), - array(array(1, 1), array(0, 10)) - ); + array(array(1, 1), array(0, 10)), + array(array(1, 1), array(0, 10)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // String with difference at the end. $left = "GrumpyC\xE2\x98\x83t"; $right = "GrumpyC\xE2\x98\x83P"; $result = array( - array(array(0, 10), array(1, 1)), - array(array(0, 10), array(1, 1)) - ); + array(array(0, 10), array(1, 1)), + array(array(0, 10), array(1, 1)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // String with differences at the beginning and end. $left = "GrumpyC\xE2\x98\x83t"; $right = "DrumpyC\xE2\x98\x83P"; $result = array( - array(array(1, 1), array(0, 9), array(1, 1)), - array(array(1, 1), array(0, 9), array(1, 1)) - ); + array(array(1, 1), array(0, 9), array(1, 1)), + array(array(1, 1), array(0, 9), array(1, 1)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); // This is a unicode combining character, "COMBINING DOUBLE TILDE". $cc = "\xCD\xA0"; $left = 'Senor'; $right = "Sen{$cc}or"; $result = array( - array(array(0, 2), array(1, 1), array(0, 2)), - array(array(0, 2), array(1, 3), array(0, 2)) - ); + array(array(0, 2), array(1, 1), array(0, 2)), + array(array(0, 2), array(1, 3), array(0, 2)), + ); $this->assertEqual( $result, ArcanistDiffUtils::generateIntralineDiff($left, $right)); } + } diff --git a/src/differential/ArcanistDifferentialCommitMessage.php b/src/differential/ArcanistDifferentialCommitMessage.php index 4381556b..ab0d49c1 100644 --- a/src/differential/ArcanistDifferentialCommitMessage.php +++ b/src/differential/ArcanistDifferentialCommitMessage.php @@ -1,114 +1,112 @@ rawCorpus = $corpus; $match = null; if (preg_match('/^Differential Revision:\s*(.*)/im', $corpus, $match)) { $revision_id = trim($match[1]); if (strlen($revision_id)) { $uri = new PhutilURI($revision_id); $path = $uri->getPath(); $path = trim($path, '/'); if (preg_match('/^D\d+$/', $path)) { $obj->revisionID = (int)trim($path, 'D'); } else { throw new ArcanistUsageException( "Invalid 'Differential Revision' field. The field should have a ". "Phabricator URI like 'http://phabricator.example.com/D123', ". "but has '{$match[1]}'."); } } } $pattern = '/^git-svn-id:\s*([^@]+)@(\d+)\s+(.*)$/m'; if (preg_match($pattern, $corpus, $match)) { $obj->gitSVNBaseRevision = $match[1].'@'.$match[2]; $obj->gitSVNBasePath = $match[1]; $obj->gitSVNUUID = $match[3]; } return $obj; } public function getRawCorpus() { return $this->rawCorpus; } public function getRevisionID() { return $this->revisionID; } public function pullDataFromConduit( ConduitClient $conduit, $partial = false) { $result = $conduit->callMethodSynchronous( 'differential.parsecommitmessage', array( 'corpus' => $this->rawCorpus, 'partial' => $partial, )); $this->fields = $result['fields']; if (!empty($result['errors'])) { throw new ArcanistDifferentialCommitMessageParserException( $result['errors']); } return $this; } public function getFieldValue($key) { if (array_key_exists($key, $this->fields)) { return $this->fields[$key]; } return null; } public function setFieldValue($key, $value) { $this->fields[$key] = $value; return $this; } public function getFields() { return $this->fields; } public function getGitSVNBaseRevision() { return $this->gitSVNBaseRevision; } public function getGitSVNBasePath() { return $this->gitSVNBasePath; } public function getGitSVNUUID() { return $this->gitSVNUUID; } public function getChecksum() { $fields = array_filter($this->fields); ksort($fields); $fields = json_encode($fields); return md5($fields); } } diff --git a/src/differential/ArcanistDifferentialCommitMessageParserException.php b/src/differential/ArcanistDifferentialCommitMessageParserException.php index 680811dc..5227b021 100644 --- a/src/differential/ArcanistDifferentialCommitMessageParserException.php +++ b/src/differential/ArcanistDifferentialCommitMessageParserException.php @@ -1,21 +1,19 @@ parserErrors = $errors; parent::__construct(head($errors)); } public function getParserErrors() { return $this->parserErrors; } } diff --git a/src/events/constant/ArcanistEventType.php b/src/events/constant/ArcanistEventType.php index 8c988dbd..d42dd4a4 100644 --- a/src/events/constant/ArcanistEventType.php +++ b/src/events/constant/ArcanistEventType.php @@ -1,15 +1,16 @@ executeCommand($command); * * The advantage of using this complex mechanism is that commands run in this * way do not need to pay the startup overhead for hg and the Python runtime, * which is often on the order of 100ms or more per command. * * @task construct Construction * @task config Configuration * @task exec Executing Mercurial Commands * @task internal Internals */ final class ArcanistHgProxyClient { private $workingCopy; private $server; private $skipHello; /* -( Construction )------------------------------------------------------- */ /** * Build a new client. This client is bound to a working copy. A server * must already be running on this working copy for the client to work. * * @param string Path to a Mercurial working copy. * * @task construct */ public function __construct($working_copy) { $this->workingCopy = Filesystem::resolvePath($working_copy); } /* -( Configuration )------------------------------------------------------ */ /** * When connecting, do not expect the "capabilities" message. * * @param bool True to skip the "capabilities" message. * @return this * * @task config */ public function setSkipHello($skip) { $this->skipHello = $skip; return $this; } /* -( Executing Merucurial Commands )-------------------------------------- */ /** * Execute a command (given as a list of arguments) via the command server. * * @param list A list of command arguments, like "log", "-l", "5". * @return tuple Return code, stdout and stderr. * * @task exec */ public function executeCommand(array $argv) { if (!$this->server) { try { $server = $this->connectToDaemon(); } catch (Exception $ex) { $this->launchDaemon(); $server = $this->connectToDaemon(); } $this->server = $server; } $server = $this->server; // Note that we're adding "runcommand" to make the server run the command. // Theoretically the server supports other capabilities, but in practice // we are only concerend with "runcommand". $server->write(array_merge(array('runcommand'), $argv)); // We'll get back one or more blocks of response data, ending with an 'r' // block which indicates the return code. Reconstitute these into stdout, // stderr and a return code. $stdout = ''; $stderr = ''; $err = 0; $done = false; while ($message = $server->waitForMessage()) { // The $server channel handles decoding of the wire format and gives us // messages which look like this: // // array('o', ''); list($channel, $data) = $message; switch ($channel) { case 'o': $stdout .= $data; break; case 'e': $stderr .= $data; break; case 'd': // TODO: Do something with this? This is the 'debug' channel. break; case 'r': // NOTE: This little dance is because the value is emitted as a // big-endian signed 32-bit long. PHP has no flag to unpack() that // can unpack these, so we unpack a big-endian unsigned long, then // repack it as a machine-order unsigned long, then unpack it as // a machine-order signed long. This appears to produce the desired // result. $err = head(unpack('N', $data)); $err = pack('L', $err); $err = head(unpack('l', $err)); $done = true; break; } if ($done) { break; } } return array($err, $stdout, $stderr); } /* -( Internals )---------------------------------------------------------- */ /** * @task internal */ private function connectToDaemon() { $errno = null; $errstr = null; $socket_path = ArcanistHgProxyServer::getPathToSocket($this->workingCopy); $socket = @stream_socket_client('unix://'.$socket_path, $errno, $errstr); if ($errno || !$socket) { throw new Exception( "Unable to connect socket! Error #{$errno}: {$errstr}"); } $channel = new PhutilSocketChannel($socket); $server = new ArcanistHgServerChannel($channel); if (!$this->skipHello) { // The protocol includes a "hello" message with capability and encoding // information. Read and discard it, we use only the "runcommand" // capability which is guaranteed to be available. $hello = $server->waitForMessage(); } return $server; } /** * @task internal */ private function launchDaemon() { $root = dirname(phutil_get_library_root('arcanist')); $bin = $root.'/scripts/hgdaemon/hgdaemon_server.php'; $proxy = new ExecFuture( '%s %s --idle-limit 15 --quiet %C', $bin, $this->workingCopy, $this->skipHello ? '--skip-hello' : null); $proxy->resolvex(); } } diff --git a/src/hgdaemon/ArcanistHgProxyServer.php b/src/hgdaemon/ArcanistHgProxyServer.php index 2cef30eb..aa7a7402 100644 --- a/src/hgdaemon/ArcanistHgProxyServer.php +++ b/src/hgdaemon/ArcanistHgProxyServer.php @@ -1,487 +1,486 @@ workingCopy = Filesystem::resolvePath($working_copy); } /* -( Configuration )------------------------------------------------------ */ /** * Disable status messages to stdout. Controlled with `--quiet`. * * @param bool True to disable status messages. * @return this * * @task config */ public function setQuiet($quiet) { $this->quiet = $quiet; return $this; } /** * Configure a client limit. After serving this many clients, the server * will exit. Controlled with `--client-limit`. * * You can use `--client-limit 1` with `--xprofile` and `--do-not-daemonize` * to profile the server. * * @param int Client limit, or 0 to disable limit. * @return this * * @task config */ public function setClientLimit($limit) { $this->clientLimit = $limit; return $this; } /** * Configure an idle time limit. After this many seconds idle, the server * will exit. Controlled with `--idle-limit`. * * @param int Idle limit, or 0 to disable limit. * @return this * * @task config */ public function setIdleLimit($limit) { $this->idleLimit = $limit; return $this; } /** * When clients connect, do not send the "capabilities" message expected by * the Mercurial protocol. This deviates from the protocol and will only work * if the clients are also configured not to expect the message, but slightly * improves performance. Controlled with --skip-hello. * * @param bool True to skip the "capabilities" message. * @return this * * @task config */ public function setSkipHello($skip) { $this->skipHello = $skip; return $this; } /** * Configure whether the server runs in the foreground or daemonizes. * Controlled by --do-not-daemonize. Primarily useful for debugging. * * @param bool True to run in the foreground. * @return this * * @task config */ public function setDoNotDaemonize($do_not_daemonize) { $this->doNotDaemonize = $do_not_daemonize; return $this; } /* -( Serving Requests )--------------------------------------------------- */ /** * Start the server. This method returns after the client limit or idle * limit are exceeded. If neither limit is configured, this method does not * exit. * * @return null * * @task server */ public function start() { // Create the unix domain socket in the working copy to listen for clients. $socket = $this->startWorkingCopySocket(); $this->socket = $socket; if (!$this->doNotDaemonize) { $this->daemonize(); } // Start the Mercurial process which we'll forward client requests to. $hg = $this->startMercurialProcess(); $clients = array(); $this->log(null, 'Listening'); $this->idleSince = time(); while (true) { // Wait for activity on any active clients, the Mercurial process, or // the listening socket where new clients connect. PhutilChannel::waitForAny( array_merge($clients, array($hg)), array( 'read' => $socket ? array($socket) : array(), 'except' => $socket ? array($socket) : array() )); if (!$hg->update()) { throw new Exception('Server exited unexpectedly!'); } // Accept any new clients. while ($socket && ($client = $this->acceptNewClient($socket))) { $clients[] = $client; $key = last_key($clients); $client->setName($key); $this->log($client, 'Connected'); $this->idleSince = time(); // Check if we've hit the client limit. If there's a configured // client limit and we've hit it, stop accepting new connections // and close the socket. $this->lifetimeClientCount++; if ($this->clientLimit) { if ($this->lifetimeClientCount >= $this->clientLimit) { $this->closeSocket(); $socket = null; } } } // Update all the active clients. foreach ($clients as $key => $client) { if ($this->updateClient($client, $hg)) { // In this case, the client is still connected so just move on to // the next one. Otherwise we continue below and handle the disconect. continue; } $this->log($client, 'Disconnected'); unset($clients[$key]); // If we have a client limit and we've served that many clients, exit. if ($this->clientLimit) { if ($this->lifetimeClientCount >= $this->clientLimit) { if (!$clients) { $this->log(null, 'Exiting (Client Limit)'); return; } } } } // If we have an idle limit and haven't had any activity in at least // that long, exit. if ($this->idleLimit) { $remaining = $this->idleLimit - (time() - $this->idleSince); if ($remaining <= 0) { $this->log(null, 'Exiting (Idle Limit)'); return; } if ($remaining <= 5) { $this->log(null, 'Exiting in '.$remaining.' seconds'); } } } } /** * Update one client, processing any commands it has sent us. We fully * process all commands we've received here before returning to the main * server loop. * * @param ArcanistHgClientChannel The client to update. * @param ArcanistHgServerChannel The Mercurial server. * * @task server */ private function updateClient( ArcanistHgClientChannel $client, ArcanistHgServerChannel $hg) { if (!$client->update()) { // Client has disconnected, don't bother proceeding. return false; } // Read a command from the client if one is available. Note that we stop // updating other clients or accepting new connections while processing a // command, since there isn't much we can do with them until the server // finishes executing this command. $message = $client->read(); if (!$message) { return true; } $this->log($client, '$ '.$message[0].' '.$message[1]); $t_start = microtime(true); // Forward the command to the server. $hg->write($message); while (true) { PhutilChannel::waitForAny(array($client, $hg)); if (!$client->update() || !$hg->update()) { // If either the client or server has exited, bail. return false; } $response = $hg->read(); if (!$response) { continue; } // Forward the response back to the client. $client->write($response); // If the response was on the 'r'esult channel, it indicates the end // of the command output. We can process the next command (if any // remain) or go back to accepting new connections and servicing // other clients. if ($response[0] == 'r') { // Update the client immediately to try to get the bytes on the wire // as quickly as possible. This gives us slightly more throughput. $client->update(); break; } } // Log the elapsed time. $t_end = microtime(true); $t = 1000000 * ($t_end - $t_start); $this->log($client, '< '.number_format($t, 0).'us'); $this->idleSince = time(); return true; } /* -( Managing Clients )--------------------------------------------------- */ /** * @task client */ public static function getPathToSocket($working_copy) { return $working_copy.'/.hg/hgdaemon-socket'; } /** * @task client */ private function startWorkingCopySocket() { $errno = null; $errstr = null; $socket_path = self::getPathToSocket($this->workingCopy); $socket_uri = 'unix://'.$socket_path; $socket = @stream_socket_server($socket_uri, $errno, $errstr); if ($errno || !$socket) { Filesystem::remove($socket_path); $socket = @stream_socket_server($socket_uri, $errno, $errstr); } if ($errno || !$socket) { throw new Exception( "Unable to start socket! Error #{$errno}: {$errstr}"); } $ok = stream_set_blocking($socket, 0); if ($ok === false) { throw new Exception('Unable to set socket nonblocking!'); } return $socket; } /** * @task client */ private function acceptNewClient($socket) { // NOTE: stream_socket_accept() always blocks, even when the socket has // been set nonblocking. $new_client = @stream_socket_accept($socket, $timeout = 0); if (!$new_client) { return null; } $channel = new PhutilSocketChannel($new_client); $client = new ArcanistHgClientChannel($channel); if (!$this->skipHello) { $client->write($this->hello); } return $client; } /* -( Managing Mercurial )------------------------------------------------- */ /** * Starts a Mercurial process which can actually handle requests. * * @return ArcanistHgServerChannel Channel to the Mercurial server. * @task hg */ private function startMercurialProcess() { // NOTE: "cmdserver.log=-" makes Mercurial use the 'd'ebug channel for // log messages. $future = new ExecFuture( 'HGPLAIN=1 hg --config cmdserver.log=- serve --cmdserver pipe'); $future->setCWD($this->workingCopy); $channel = new PhutilExecChannel($future); $hg = new ArcanistHgServerChannel($channel); // The server sends a "hello" message with capability and encoding // information. Save it and forward it to clients when they connect. $this->hello = $hg->waitForMessage(); return $hg; } /* -( Internals )---------------------------------------------------------- */ /** * Close and remove the unix domain socket in the working copy. * * @task internal */ public function __destruct() { $this->closeSocket(); } private function closeSocket() { if ($this->socket) { @stream_socket_shutdown($this->socket, STREAM_SHUT_RDWR); @fclose($this->socket); Filesystem::remove(self::getPathToSocket($this->workingCopy)); $this->socket = null; } } private function log($client, $message) { if ($this->quiet) { return; } if ($client) { $message = '[Client '.$client->getName().'] '.$message; } else { $message = '[Server] '.$message; } echo $message."\n"; } private function daemonize() { - // Keep stdout if it's been redirected somewhere, otherwise shut it down. $keep_stdout = false; $keep_stderr = false; if (function_exists('posix_isatty')) { if (!posix_isatty(STDOUT)) { $keep_stdout = true; } if (!posix_isatty(STDERR)) { $keep_stderr = true; } } $pid = pcntl_fork(); if ($pid === -1) { throw new Exception('Unable to fork!'); } else if ($pid) { // We're the parent; exit. First, drop our reference to the socket so // our __destruct() doesn't tear it down; the child will tear it down // later. $this->socket = null; exit(0); } // We're the child; continue. fclose(STDIN); if (!$keep_stdout) { fclose(STDOUT); $this->quiet = true; } if (!$keep_stderr) { fclose(STDERR); } } } diff --git a/src/lint/ArcanistLintMessage.php b/src/lint/ArcanistLintMessage.php index 80816fbb..8996c992 100644 --- a/src/lint/ArcanistLintMessage.php +++ b/src/lint/ArcanistLintMessage.php @@ -1,230 +1,228 @@ setPath($dict['path']); $message->setLine($dict['line']); $message->setChar($dict['char']); $message->setCode($dict['code']); $message->setSeverity($dict['severity']); $message->setName($dict['name']); $message->setDescription($dict['description']); if (isset($dict['original'])) { $message->setOriginalText($dict['original']); } if (isset($dict['replacement'])) { $message->setReplacementText($dict['replacement']); } $message->setGranularity(idx($dict, 'granularity')); $message->setOtherLocations(idx($dict, 'locations', array())); if (isset($dict['bypassChangedLineFiltering'])) { $message->bypassChangedLineFiltering($dict['bypassChangedLineFiltering']); } return $message; } public function toDictionary() { return array( 'path' => $this->getPath(), 'line' => $this->getLine(), 'char' => $this->getChar(), 'code' => $this->getCode(), 'severity' => $this->getSeverity(), 'name' => $this->getName(), 'description' => $this->getDescription(), 'original' => $this->getOriginalText(), 'replacement' => $this->getReplacementText(), 'granularity' => $this->getGranularity(), 'locations' => $this->getOtherLocations(), 'bypassChangedLineFiltering' => $this->shouldBypassChangedLineFiltering(), ); } public function setPath($path) { $this->path = $path; return $this; } public function getPath() { return $this->path; } public function setLine($line) { $this->line = $line; return $this; } public function getLine() { return $this->line; } public function setChar($char) { $this->char = $char; return $this; } public function getChar() { return $this->char; } public function setCode($code) { $this->code = $code; return $this; } public function getCode() { return $this->code; } public function setSeverity($severity) { $this->severity = $severity; return $this; } public function getSeverity() { return $this->severity; } public function setName($name) { $this->name = $name; return $this; } public function getName() { return $this->name; } public function setDescription($description) { $this->description = $description; return $this; } public function getDescription() { return $this->description; } public function setOriginalText($original) { $this->originalText = $original; return $this; } public function getOriginalText() { return $this->originalText; } public function setReplacementText($replacement) { $this->replacementText = $replacement; return $this; } public function getReplacementText() { return $this->replacementText; } /** * @param dict Keys 'path', 'line', 'char', 'original'. */ public function setOtherLocations(array $locations) { assert_instances_of($locations, 'array'); $this->otherLocations = $locations; return $this; } public function getOtherLocations() { return $this->otherLocations; } public function isError() { return $this->getSeverity() == ArcanistLintSeverity::SEVERITY_ERROR; } public function isWarning() { return $this->getSeverity() == ArcanistLintSeverity::SEVERITY_WARNING; } public function isAutofix() { return $this->getSeverity() == ArcanistLintSeverity::SEVERITY_AUTOFIX; } public function hasFileContext() { return ($this->getLine() !== null); } public function setObsolete($obsolete) { $this->obsolete = $obsolete; return $this; } public function getObsolete() { return $this->obsolete; } public function isPatchable() { return ($this->getReplacementText() !== null) && ($this->getReplacementText() !== $this->getOriginalText()); } public function didApplyPatch() { if ($this->appliedToDisk) { return $this; } $this->appliedToDisk = true; foreach ($this->dependentMessages as $message) { $message->didApplyPatch(); } return $this; } public function isPatchApplied() { return $this->appliedToDisk; } public function setGranularity($granularity) { $this->granularity = $granularity; return $this; } public function getGranularity() { return $this->granularity; } public function setDependentMessages(array $messages) { assert_instances_of($messages, 'ArcanistLintMessage'); $this->dependentMessages = $messages; return $this; } public function setBypassChangedLineFiltering($bypass_changed_lines) { $this->bypassChangedLineFiltering = $bypass_changed_lines; return $this; } public function shouldBypassChangedLineFiltering() { return $this->bypassChangedLineFiltering; } } diff --git a/src/lint/ArcanistLintPatcher.php b/src/lint/ArcanistLintPatcher.php index 51639d3e..7d22b240 100644 --- a/src/lint/ArcanistLintPatcher.php +++ b/src/lint/ArcanistLintPatcher.php @@ -1,145 +1,143 @@ lintResult = $result; return $obj; } public function getUnmodifiedFileContent() { return $this->lintResult->getData(); } public function getModifiedFileContent() { if ($this->modifiedData === null) { $this->buildModifiedFile(); } return $this->modifiedData; } public function writePatchToDisk() { $path = $this->lintResult->getFilePathOnDisk(); $data = $this->getModifiedFileContent(); $ii = null; do { $lint = $path.'.linted'.($ii++); } while (file_exists($lint)); // Copy existing file to preserve permissions. 'chmod --reference' is not // supported under OSX. if (Filesystem::pathExists($path)) { // This path may not exist if we're generating a new file. execx('cp -p %s %s', $path, $lint); } Filesystem::writeFile($lint, $data); list($err) = exec_manual('mv -f %s %s', $lint, $path); if ($err) { throw new Exception( "Unable to overwrite path `{$path}', patched version was left ". "at `{$lint}'."); } foreach ($this->applyMessages as $message) { $message->didApplyPatch(); } } private function __construct() { } private function buildModifiedFile() { $data = $this->getUnmodifiedFileContent(); foreach ($this->lintResult->getMessages() as $lint) { if (!$lint->isPatchable()) { continue; } $orig_offset = $this->getCharacterOffset($lint->getLine() - 1); $orig_offset += $lint->getChar() - 1; $dirty = $this->getDirtyCharacterOffset(); if ($dirty > $orig_offset) { continue; } // Adjust the character offset by the delta *after* checking for // dirtiness. The dirty character cursor is a cursor on the original file, // and should be compared with the patch position in the original file. $working_offset = $orig_offset + $this->getCharacterDelta(); $old_str = $lint->getOriginalText(); $old_len = strlen($old_str); $new_str = $lint->getReplacementText(); $new_len = strlen($new_str); if ($working_offset == strlen($data)) { // Temporary hack to work around a destructive hphpi issue, see #451031. $data .= $new_str; } else { $data = substr_replace($data, $new_str, $working_offset, $old_len); } $this->changeCharacterDelta($new_len - $old_len); $this->setDirtyCharacterOffset($orig_offset + $old_len); $this->applyMessages[] = $lint; } $this->modifiedData = $data; } private function getCharacterOffset($line_num) { if ($this->lineOffsets === null) { $lines = explode("\n", $this->getUnmodifiedFileContent()); $this->lineOffsets = array(0); $last = 0; foreach ($lines as $line) { $this->lineOffsets[] = $last + strlen($line) + 1; $last += strlen($line) + 1; } } if ($line_num >= count($this->lineOffsets)) { throw new Exception("Data has fewer than `{$line}' lines."); } return idx($this->lineOffsets, $line_num); } private function setDirtyCharacterOffset($offset) { $this->dirtyUntil = $offset; return $this; } private function getDirtyCharacterOffset() { return $this->dirtyUntil; } private function changeCharacterDelta($change) { $this->characterDelta += $change; return $this; } private function getCharacterDelta() { return $this->characterDelta; } } diff --git a/src/lint/ArcanistLintResult.php b/src/lint/ArcanistLintResult.php index 0e2dc18e..69d819ad 100644 --- a/src/lint/ArcanistLintResult.php +++ b/src/lint/ArcanistLintResult.php @@ -1,107 +1,104 @@ path = $path; return $this; } public function getPath() { return $this->path; } public function addMessage(ArcanistLintMessage $message) { $this->messages[] = $message; $this->needsSort = true; return $this; } public function getMessages() { if ($this->needsSort) { $this->sortAndFilterMessages(); } return $this->effectiveMessages; } public function setData($data) { $this->data = $data; return $this; } public function getData() { return $this->data; } public function setFilePathOnDisk($file_path_on_disk) { $this->filePathOnDisk = $file_path_on_disk; return $this; } public function getFilePathOnDisk() { return $this->filePathOnDisk; } public function setCacheVersion($version) { $this->cacheVersion = $version; return $this; } public function getCacheVersion() { return $this->cacheVersion; } public function isPatchable() { foreach ($this->messages as $message) { if ($message->isPatchable()) { return true; } } return false; } public function isAllAutofix() { foreach ($this->messages as $message) { if (!$message->isAutofix()) { return false; } } return true; } public function sortAndFilterMessages() { $messages = $this->messages; foreach ($messages as $key => $message) { if ($message->getObsolete()) { unset($messages[$key]); continue; } } $map = array(); foreach ($messages as $key => $message) { $map[$key] = ($message->getLine() * (2 << 12)) + $message->getChar(); } asort($map); $messages = array_select_keys($messages, array_keys($map)); $this->effectiveMessages = $messages; $this->needsSort = false; - } } diff --git a/src/lint/ArcanistLintSeverity.php b/src/lint/ArcanistLintSeverity.php index 05ec3723..c34756d9 100644 --- a/src/lint/ArcanistLintSeverity.php +++ b/src/lint/ArcanistLintSeverity.php @@ -1,54 +1,50 @@ 'Advice', self::SEVERITY_AUTOFIX => 'Auto-Fix', self::SEVERITY_WARNING => 'Warning', self::SEVERITY_ERROR => 'Error', self::SEVERITY_DISABLED => 'Disabled', ); } public static function getStringForSeverity($severity_code) { $map = self::getLintSeverities(); if (!array_key_exists($severity_code, $map)) { throw new Exception("Unknown lint severity '{$severity_code}'!"); } return $map[$severity_code]; } public static function isAtLeastAsSevere($message_sev, $level) { - static $map = array( self::SEVERITY_DISABLED => 10, self::SEVERITY_ADVICE => 20, self::SEVERITY_AUTOFIX => 25, self::SEVERITY_WARNING => 30, self::SEVERITY_ERROR => 40, ); if (empty($map[$message_sev])) { return true; } return $map[$message_sev] >= idx($map, $level, 0); } - } diff --git a/src/lint/engine/ArcanistLintEngine.php b/src/lint/engine/ArcanistLintEngine.php index 2ba79fc7..6a000566 100644 --- a/src/lint/engine/ArcanistLintEngine.php +++ b/src/lint/engine/ArcanistLintEngine.php @@ -1,584 +1,584 @@ configurationManager = $configuration_manager; return $this; } final public function getConfigurationManager() { return $this->configurationManager; } final public function setWorkingCopy( ArcanistWorkingCopyIdentity $working_copy) { $this->workingCopy = $working_copy; return $this; } final public function getWorkingCopy() { return $this->workingCopy; } final public function setPaths($paths) { $this->paths = $paths; return $this; } public function getPaths() { return $this->paths; } final public function setPathChangedLines($path, $changed) { if ($changed === null) { $this->changedLines[$path] = null; } else { $this->changedLines[$path] = array_fill_keys($changed, true); } return $this; } final public function getPathChangedLines($path) { return idx($this->changedLines, $path); } final public function setFileData($data) { $this->fileData = $data + $this->fileData; return $this; } final public function setCommitHookMode($mode) { $this->commitHookMode = $mode; return $this; } final public function setHookAPI(ArcanistHookAPI $hook_api) { $this->hookAPI = $hook_api; return $this; } final public function getHookAPI() { return $this->hookAPI; } final public function setEnableAsyncLint($enable_async_lint) { $this->enableAsyncLint = $enable_async_lint; return $this; } final public function getEnableAsyncLint() { return $this->enableAsyncLint; } final public function loadData($path) { if (!isset($this->fileData[$path])) { if ($this->getCommitHookMode()) { $this->fileData[$path] = $this->getHookAPI() ->getCurrentFileData($path); } else { $disk_path = $this->getFilePathOnDisk($path); $this->fileData[$path] = Filesystem::readFile($disk_path); } } return $this->fileData[$path]; } public function pathExists($path) { if ($this->getCommitHookMode()) { $file_data = $this->loadData($path); return ($file_data !== null); } else { $disk_path = $this->getFilePathOnDisk($path); return Filesystem::pathExists($disk_path); } } final public function isDirectory($path) { if ($this->getCommitHookMode()) { // TODO: This won't get the right result in every case (we need more // metadata) but should almost always be correct. try { $this->loadData($path); return false; } catch (Exception $ex) { return true; } } else { $disk_path = $this->getFilePathOnDisk($path); return is_dir($disk_path); } } final public function isBinaryFile($path) { try { $data = $this->loadData($path); } catch (Exception $ex) { return false; } return ArcanistDiffUtils::isHeuristicBinaryFile($data); } final public function isSymbolicLink($path) { return is_link($this->getFilePathOnDisk($path)); } final public function getFilePathOnDisk($path) { return Filesystem::resolvePath( $path, $this->getWorkingCopy()->getProjectRoot()); } final public function setMinimumSeverity($severity) { $this->minimumSeverity = $severity; return $this; } final public function getCommitHookMode() { return $this->commitHookMode; } final public function run() { $linters = $this->buildLinters(); if (!$linters) { throw new ArcanistNoEffectException('No linters to run.'); } $linters = msort($linters, 'getLinterPriority'); foreach ($linters as $linter) { $linter->setEngine($this); } $have_paths = false; foreach ($linters as $linter) { if ($linter->getPaths()) { $have_paths = true; break; } } if (!$have_paths) { throw new ArcanistNoEffectException('No paths are lintable.'); } $versions = array($this->getCacheVersion()); foreach ($linters as $linter) { $version = get_class($linter).':'.$linter->getCacheVersion(); $symbols = id(new PhutilSymbolLoader()) ->setType('class') ->setName(get_class($linter)) ->selectSymbolsWithoutLoading(); $symbol = idx($symbols, 'class$'.get_class($linter)); if ($symbol) { $version .= ':'.md5_file( phutil_get_library_root($symbol['library']).'/'.$symbol['where']); } $versions[] = $version; } $this->cacheVersion = crc32(implode("\n", $versions)); $this->stopped = array(); $exceptions = array(); foreach ($linters as $linter_name => $linter) { if (!is_string($linter_name)) { $linter_name = get_class($linter); } try { if (!$linter->canRun()) { continue; } $paths = $linter->getPaths(); foreach ($paths as $key => $path) { // Make sure each path has a result generated, even if it is empty // (i.e., the file has no lint messages). $result = $this->getResultForPath($path); if (isset($this->stopped[$path])) { unset($paths[$key]); } if (isset($this->cachedResults[$path][$this->cacheVersion])) { $cached_result = $this->cachedResults[$path][$this->cacheVersion]; $use_cache = $this->shouldUseCache( $linter->getCacheGranularity(), idx($cached_result, 'repository_version')); if ($use_cache) { unset($paths[$key]); if (idx($cached_result, 'stopped') == $linter_name) { $this->stopped[$path] = $linter_name; } } } } $paths = array_values($paths); if ($paths) { $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall(array( 'type' => 'lint', 'linter' => $linter_name, 'paths' => $paths, )); try { $linter->willLintPaths($paths); foreach ($paths as $path) { $linter->willLintPath($path); $linter->lintPath($path); if ($linter->didStopAllLinters()) { $this->stopped[$path] = $linter_name; } } } catch (Exception $ex) { $profiler->endServiceCall($call_id, array()); throw $ex; } $profiler->endServiceCall($call_id, array()); } } catch (Exception $ex) { $exceptions[$linter_name] = $ex; } } $exceptions += $this->didRunLinters($linters); foreach ($linters as $linter) { foreach ($linter->getLintMessages() as $message) { if (!$this->isSeverityEnabled($message->getSeverity())) { continue; } if (!$this->isRelevantMessage($message)) { continue; } $message->setGranularity($linter->getCacheGranularity()); $result = $this->getResultForPath($message->getPath()); $result->addMessage($message); } } if ($this->cachedResults) { foreach ($this->cachedResults as $path => $messages) { $messages = idx($messages, $this->cacheVersion, array()); $repository_version = idx($messages, 'repository_version'); unset($messages['stopped']); unset($messages['repository_version']); foreach ($messages as $message) { $use_cache = $this->shouldUseCache( idx($message, 'granularity'), $repository_version); if ($use_cache) { $this->getResultForPath($path)->addMessage( ArcanistLintMessage::newFromDictionary($message)); } } } } foreach ($this->results as $path => $result) { $disk_path = $this->getFilePathOnDisk($path); $result->setFilePathOnDisk($disk_path); if (isset($this->fileData[$path])) { $result->setData($this->fileData[$path]); } else if ($disk_path && Filesystem::pathExists($disk_path)) { // TODO: this may cause us to, e.g., load a large binary when we only // raised an error about its filename. We could refine this by looking // through the lint messages and doing this load only if any of them // have original/replacement text or something like that. try { $this->fileData[$path] = Filesystem::readFile($disk_path); $result->setData($this->fileData[$path]); } catch (FilesystemException $ex) { // Ignore this, it's noncritical that we access this data and it // might be unreadable or a directory or whatever else for plenty // of legitimate reasons. } } } if ($exceptions) { throw new PhutilAggregateException('Some linters failed:', $exceptions); } return $this->results; } final public function isSeverityEnabled($severity) { $minimum = $this->minimumSeverity; return ArcanistLintSeverity::isAtLeastAsSevere($severity, $minimum); } - final private function shouldUseCache($cache_granularity, - $repository_version) { + final private function shouldUseCache( + $cache_granularity, + $repository_version) { + if ($this->commitHookMode) { return false; } switch ($cache_granularity) { case ArcanistLinter::GRANULARITY_FILE: return true; case ArcanistLinter::GRANULARITY_DIRECTORY: case ArcanistLinter::GRANULARITY_REPOSITORY: return ($this->repositoryVersion == $repository_version); default: return false; } } /** * @param dict>> * @return this */ final public function setCachedResults(array $results) { $this->cachedResults = $results; return $this; } final public function getResults() { return $this->results; } final public function getStoppedPaths() { return $this->stopped; } abstract protected function buildLinters(); final protected function didRunLinters(array $linters) { assert_instances_of($linters, 'ArcanistLinter'); $exceptions = array(); $profiler = PhutilServiceProfiler::getInstance(); foreach ($linters as $linter_name => $linter) { if (!is_string($linter_name)) { $linter_name = get_class($linter); } $call_id = $profiler->beginServiceCall(array( 'type' => 'lint', 'linter' => $linter_name, )); try { $linter->didRunLinters(); } catch (Exception $ex) { $exceptions[$linter_name] = $ex; } $profiler->endServiceCall($call_id, array()); } return $exceptions; } final public function setRepositoryVersion($version) { $this->repositoryVersion = $version; return $this; } final private function isRelevantMessage(ArcanistLintMessage $message) { // When a user runs "arc lint", we default to raising only warnings on // lines they have changed (errors are still raised anywhere in the // file). The list of $changed lines may be null, to indicate that the // path is a directory or a binary file so we should not exclude // warnings. if (!$this->changedLines || $message->isError() || $message->shouldBypassChangedLineFiltering()) { return true; } $locations = $message->getOtherLocations(); $locations[] = $message->toDictionary(); foreach ($locations as $location) { $path = idx($location, 'path', $message->getPath()); if (!array_key_exists($path, $this->changedLines)) { continue; } $changed = $this->getPathChangedLines($path); if ($changed === null || !$location['line']) { return true; } $last_line = $location['line']; if (isset($location['original'])) { $last_line += substr_count($location['original'], "\n"); } for ($l = $location['line']; $l <= $last_line; $l++) { if (!empty($changed[$l])) { return true; } } } return false; } final protected function getResultForPath($path) { if (empty($this->results[$path])) { $result = new ArcanistLintResult(); $result->setPath($path); $result->setCacheVersion($this->cacheVersion); $this->results[$path] = $result; } return $this->results[$path]; } final public function getLineAndCharFromOffset($path, $offset) { if (!isset($this->charToLine[$path])) { $char_to_line = array(); $line_to_first_char = array(); $lines = explode("\n", $this->loadData($path)); $line_number = 0; $line_start = 0; foreach ($lines as $line) { $len = strlen($line) + 1; // Account for "\n". $line_to_first_char[] = $line_start; $line_start += $len; for ($ii = 0; $ii < $len; $ii++) { $char_to_line[] = $line_number; } $line_number++; } $this->charToLine[$path] = $char_to_line; $this->lineToFirstChar[$path] = $line_to_first_char; } $line = $this->charToLine[$path][$offset]; $char = $offset - $this->lineToFirstChar[$path][$line]; return array($line, $char); } final public function getPostponedLinters() { return $this->postponedLinters; } final public function setPostponedLinters(array $linters) { $this->postponedLinters = $linters; return $this; } protected function getCacheVersion() { return 1; } /** * Get a named linter resource shared by another linter. * * This mechanism allows linters to share arbitrary resources, like the * results of computation. If several linters need to perform the same * expensive computation step, they can use a named resource to synchronize * construction of the result so it doesn't need to be built multiple * times. * * @param string Resource identifier. * @param wild Optionally, default value to return if resource does not * exist. * @return wild Resource, or default value if not present. */ public function getLinterResource($key, $default = null) { return idx($this->linterResources, $key, $default); } /** * Set a linter resource that other linters can accesss. * * See @{method:getLinterResource} for a description of this mechanism. * * @param string Resource identifier. * @param wild Resource. * @return this */ public function setLinterResource($key, $value) { $this->linterResources[$key] = $value; return $this; } - } diff --git a/src/lint/engine/ArcanistSingleLintEngine.php b/src/lint/engine/ArcanistSingleLintEngine.php index a001582f..34249b8c 100644 --- a/src/lint/engine/ArcanistSingleLintEngine.php +++ b/src/lint/engine/ArcanistSingleLintEngine.php @@ -1,63 +1,62 @@ getConfigurationManager() ->getConfigFromAnySource($key); if (!$linter_name) { throw new ArcanistUsageException( "You must configure '{$key}' with the name of a linter in order to ". "use ArcanistSingleLintEngine."); } if (!class_exists($linter_name)) { throw new ArcanistUsageException( "Linter '{$linter_name}' configured in '{$key}' does not exist!"); } if (!is_subclass_of($linter_name, 'ArcanistLinter')) { throw new ArcanistUsageException( "Linter '{$linter_name}' configured in '{$key}' MUST be a subclass of ". "ArcanistLinter."); } // Filter the affected paths. $paths = $this->getPaths(); foreach ($paths as $key => $path) { if (!$this->pathExists($path)) { // Don't lint removed files. In more complex linters it is sometimes // appropriate to lint removed files so you can raise a warning like // "you deleted X, but forgot to delete Y!", but most linters do not // operate correctly on removed files. unset($paths[$key]); continue; } $disk = $this->getFilePathOnDisk($path); if (is_dir($disk)) { // Don't lint directories. (In SVN, they can be directly modified by // changing properties on them, and may appear as modified paths.) unset($paths[$key]); continue; } } $linter = newv($linter_name, array()); $linter->setPaths($paths); return array($linter); } + } diff --git a/src/lint/engine/ComprehensiveLintEngine.php b/src/lint/engine/ComprehensiveLintEngine.php index ed8f6aec..a7f12e09 100644 --- a/src/lint/engine/ComprehensiveLintEngine.php +++ b/src/lint/engine/ComprehensiveLintEngine.php @@ -1,57 +1,55 @@ getPaths(); foreach ($paths as $key => $path) { if (preg_match('@^externals/@', $path)) { // Third-party stuff lives in /externals/; don't run lint engines // against it. unset($paths[$key]); } } $text_paths = preg_grep('/\.(php|css|hpp|cpp|l|y|py|pl)$/', $paths); $linters[] = id(new ArcanistGeneratedLinter())->setPaths($text_paths); $linters[] = id(new ArcanistNoLintLinter())->setPaths($text_paths); $linters[] = id(new ArcanistTextLinter())->setPaths($text_paths); $linters[] = id(new ArcanistFilenameLinter())->setPaths($paths); $linters[] = id(new ArcanistXHPASTLinter()) ->setPaths(preg_grep('/\.php$/', $paths)); $py_paths = preg_grep('/\.py$/', $paths); $linters[] = id(new ArcanistPyFlakesLinter())->setPaths($py_paths); $linters[] = id(new ArcanistPEP8Linter()) ->setFlags($this->getPEP8WithTextOptions()) ->setPaths($py_paths); $linters[] = id(new ArcanistRubyLinter()) ->setPaths(preg_grep('/\.rb$/', $paths)); $linters[] = id(new ArcanistJSHintLinter()) ->setPaths(preg_grep('/\.js$/', $paths)); return $linters; } protected function getPEP8WithTextOptions() { // E101 is subset of TXT2 (Tab Literal). // E501 is same as TXT3 (Line Too Long). // W291 is same as TXT6 (Trailing Whitespace). // W292 is same as TXT4 (File Does Not End in Newline). // W293 is same as TXT6 (Trailing Whitespace). return array('--ignore=E101,E501,W291,W292,W293'); } } diff --git a/src/lint/engine/UnitTestableArcanistLintEngine.php b/src/lint/engine/UnitTestableArcanistLintEngine.php index 5fd90cb0..4c1f771a 100644 --- a/src/lint/engine/UnitTestableArcanistLintEngine.php +++ b/src/lint/engine/UnitTestableArcanistLintEngine.php @@ -1,34 +1,32 @@ linters[] = $linter; return $this; } public function addFileData($path, $data) { $this->fileData[$path] = $data; return $this; } public function pathExists($path) { if (idx($this->fileData, $path)) { return true; } return parent::pathExists($path); } protected function buildLinters() { return $this->linters; } } diff --git a/src/lint/linter/ArcanistCSharpLinter.php b/src/lint/linter/ArcanistCSharpLinter.php index a9b9a257..6f0206cd 100644 --- a/src/lint/linter/ArcanistCSharpLinter.php +++ b/src/lint/linter/ArcanistCSharpLinter.php @@ -1,246 +1,246 @@ 'map>', 'help' => pht('Provide a discovery map.'), ); // TODO: This should probably be replaced with "bin" when this moves // to extend ExternalLinter. $options['binary'] = array( 'type' => 'string', 'help' => pht('Override default binary.'), ); return $options; } public function setLinterConfigurationValue($key, $value) { switch ($key) { case 'discovery': $this->discoveryMap = $value; return; case 'binary': $this->cslintHintPath = $value; return; } parent::setLinterConfigurationValue($key, $value); } public function getLintCodeFromLinterConfigurationKey($code) { return $code; } public function setCustomSeverityMap(array $map) { foreach ($map as $code => $severity) { if (substr($code, 0, 2) === 'SA' && $severity == 'disabled') { throw new Exception( "In order to keep StyleCop integration with IDEs and other tools ". "consistent with Arcanist results, you aren't permitted to ". - "disable StyleCop rules within '.arclint'. ". + "disable StyleCop rules within '.arclint'. ". "Instead configure the severity using the StyleCop settings dialog ". - "(usually accessible from within your IDE). StyleCop settings ". + "(usually accessible from within your IDE). StyleCop settings ". "for your project will be used when linting for Arcanist."); } } return parent::setCustomSeverityMap($map); } /** - * Determines what executables and lint paths to use. Between platforms - * this also changes whether the lint engine is run under .NET or Mono. It + * Determines what executables and lint paths to use. Between platforms + * this also changes whether the lint engine is run under .NET or Mono. It * also ensures that all of the required binaries are available for the lint * to run successfully. * * @return void */ private function loadEnvironment() { if ($this->loaded) { return; } // Determine runtime engine (.NET or Mono). if (phutil_is_windows()) { $this->runtimeEngine = ''; } else if (Filesystem::binaryExists('mono')) { $this->runtimeEngine = 'mono '; } else { throw new Exception('Unable to find Mono and you are not on Windows!'); } // Determine cslint path. $cslint = $this->cslintHintPath; if ($cslint !== null && file_exists($cslint)) { $this->cslintEngine = Filesystem::resolvePath($cslint); } else if (Filesystem::binaryExists('cslint.exe')) { $this->cslintEngine = 'cslint.exe'; } else { throw new Exception('Unable to locate cslint.'); } // Determine cslint version. $ver_future = new ExecFuture( '%C -v', $this->runtimeEngine.$this->cslintEngine); list($err, $stdout, $stderr) = $ver_future->resolve(); if ($err !== 0) { throw new Exception( - 'You are running an old version of cslint. Please '. + 'You are running an old version of cslint. Please '. 'upgrade to version '.self::SUPPORTED_VERSION.'.'); } $ver = (int)$stdout; if ($ver < self::SUPPORTED_VERSION) { throw new Exception( - 'You are running an old version of cslint. Please '. + 'You are running an old version of cslint. Please '. 'upgrade to version '.self::SUPPORTED_VERSION.'.'); } else if ($ver > self::SUPPORTED_VERSION) { throw new Exception( 'Arcanist does not support this version of cslint (it is '. - 'newer). You can try upgrading Arcanist with `arc upgrade`.'); + 'newer). You can try upgrading Arcanist with `arc upgrade`.'); } $this->loaded = true; } public function lintPath($path) {} public function willLintPaths(array $paths) { $this->loadEnvironment(); $futures = array(); // Bulk linting up into futures, where the number of files // is based on how long the command is. $current_paths = array(); foreach ($paths as $path) { // If the current paths for the command, plus the next path // is greater than 6000 characters (less than the Windows // command line limit), then finalize this future and add it. $total = 0; foreach ($current_paths as $current_path) { $total += strlen($current_path) + 3; // Quotes and space. } if ($total + strlen($path) > 6000) { // %s won't pass through the JSON correctly - // under Windows. This is probably because not only + // under Windows. This is probably because not only // does the JSON have quotation marks in the content, // but because there'll be a lot of escaping and // double escaping because the JSON also contains - // regular expressions. cslint supports passing the + // regular expressions. cslint supports passing the // settings JSON through base64-encoded to mitigate // this issue. $futures[] = new ExecFuture( '%C --settings-base64=%s -r=. %Ls', $this->runtimeEngine.$this->cslintEngine, base64_encode(json_encode($this->discoveryMap)), $current_paths); $current_paths = array(); } // Append the path to the current paths array. $current_paths[] = $this->getEngine()->getFilePathOnDisk($path); } // If we still have paths left in current paths, then we need to create // a future for those too. if (count($current_paths) > 0) { $futures[] = new ExecFuture( '%C --settings-base64=%s -r=. %Ls', $this->runtimeEngine.$this->cslintEngine, base64_encode(json_encode($this->discoveryMap)), $current_paths); $current_paths = array(); } $this->futures = $futures; } public function didRunLinters() { if ($this->futures) { foreach (Futures($this->futures)->limit(8) as $future) { $this->resolveFuture($future); } } } protected function resolveFuture(Future $future) { list($stdout) = $future->resolvex(); $all_results = json_decode($stdout); foreach ($all_results as $results) { if ($results === null || $results->Issues === null) { return; } foreach ($results->Issues as $issue) { $message = new ArcanistLintMessage(); $message->setPath($results->FileName); $message->setLine($issue->LineNumber); $message->setCode($issue->Index->Code); $message->setName($issue->Index->Name); $message->setChar($issue->Column); $message->setOriginalText($issue->OriginalText); $message->setReplacementText($issue->ReplacementText); $desc = @vsprintf($issue->Index->Message, $issue->Parameters); if ($desc === false) { $desc = $issue->Index->Message; } $message->setDescription($desc); $severity = ArcanistLintSeverity::SEVERITY_ADVICE; switch ($issue->Index->Severity) { case 0: $severity = ArcanistLintSeverity::SEVERITY_ADVICE; break; case 1: $severity = ArcanistLintSeverity::SEVERITY_AUTOFIX; break; case 2: $severity = ArcanistLintSeverity::SEVERITY_WARNING; break; case 3: $severity = ArcanistLintSeverity::SEVERITY_ERROR; break; case 4: $severity = ArcanistLintSeverity::SEVERITY_DISABLED; break; } $severity_override = $this->getLintMessageSeverity($issue->Index->Code); if ($severity_override !== null) { $severity = $severity_override; } $message->setSeverity($severity); $this->addLintMessage($message); } } } protected function getDefaultMessageSeverity($code) { return null; } } diff --git a/src/lint/linter/ArcanistConduitLinter.php b/src/lint/linter/ArcanistConduitLinter.php index 5a7a67a2..44edc2d7 100644 --- a/src/lint/linter/ArcanistConduitLinter.php +++ b/src/lint/linter/ArcanistConduitLinter.php @@ -1,102 +1,101 @@ must match passed in path. * 'line' * 'char' * 'code' * 'severity' => Must match a constant in ArcanistLintSeverity. * 'name' * 'description' * 'original' & 'replacement' => optional patch information * 'locations' => other locations of the same error (in the same format) * * This class is intended for customization via instantiation, not via * subclassing. */ final class ArcanistConduitLinter extends ArcanistLinter { const CONDUIT_METHOD = 'lint.getalllint'; private $conduitURI; private $linterName; private $lintByPath; // array(/pa/th/ => ), valid after willLintPaths(). public function __construct($conduit_uri = null, $linter_name = null) { // TODO: Facebook uses this (probably?) but we need to be able to // construct it without arguments for ".arclint". $this->conduitURI = $conduit_uri; $this->linterName = $linter_name; } public function willLintPaths(array $paths) { // Load all file path data into $this->data. array_map(array($this, 'getData'), $paths); $conduit = new ConduitClient($this->conduitURI); $this->lintByPath = $conduit->callMethodSynchronous( self::CONDUIT_METHOD, array( 'file_contents' => $this->data, )); } public function lintPath($path) { $lint_for_path = idx($this->lintByPath, $path); if (!$lint_for_path) { return; } foreach ($lint_for_path as $lint) { $this->addLintMessage(ArcanistLintMessage::newFromDictionary($lint)); } } public function getLinterName() { return $this->linterName; } public function getLintSeverityMap() { // The rationale here is that this class will only be used for custom // linting in installations. No two server endpoints will be the same across // different instantiations. Therefore, the server can handle all severity // customization directly. throw new ArcanistUsageException( 'ArcanistConduitLinter does not support client-side severity '. - 'customization.' - ); + 'customization.'); } public function getLintNameMap() { // See getLintSeverityMap for rationale. throw new ArcanistUsageException( - 'ArcanistConduitLinter does not support a name map.' - ); + 'ArcanistConduitLinter does not support a name map.'); } protected function canCustomizeLintSeverities() { return false; } + } diff --git a/src/lint/linter/ArcanistJSONLintLinter.php b/src/lint/linter/ArcanistJSONLintLinter.php index 1cd4e9bd..254f8b31 100644 --- a/src/lint/linter/ArcanistJSONLintLinter.php +++ b/src/lint/linter/ArcanistJSONLintLinter.php @@ -1,96 +1,97 @@ getExecutableCommand()); $matches = array(); if (preg_match('/^(?P\d+\.\d+\.\d+)$/', $stdout, $matches)) { return $matches['version']; } else { return false; } } public function getInstallInstructions() { return pht('Install jsonlint using `npm install -g jsonlint`.'); } public function shouldExpectCommandErrors() { return true; } public function supportsReadDataFromStdin() { return true; } protected function getMandatoryFlags() { return array( '--compact', ); } protected function parseLinterOutput($path, $err, $stdout, $stderr) { $lines = phutil_split_lines($stderr, false); $messages = array(); foreach ($lines as $line) { $matches = null; $match = preg_match( '/^(?:(?.+): )?'. 'line (?\d+), col (?\d+), '. '(?.*)$/', $line, $matches); if ($match) { $message = new ArcanistLintMessage(); $message->setPath($path); $message->setLine($matches['line']); $message->setChar($matches['column']); $message->setCode($this->getLinterName()); $message->setDescription(ucfirst($matches['description'])); $message->setSeverity(ArcanistLintSeverity::SEVERITY_ERROR); $messages[] = $message; } } if ($err && !$messages) { return false; } return $messages; } + } diff --git a/src/lint/linter/ArcanistLinter.php b/src/lint/linter/ArcanistLinter.php index 9001362d..2987b7c2 100644 --- a/src/lint/linter/ArcanistLinter.php +++ b/src/lint/linter/ArcanistLinter.php @@ -1,503 +1,502 @@ getLinterName(), $this->getLinterConfigurationName(), get_class($this)); } public function getLinterPriority() { return 1.0; } public function setCustomSeverityMap(array $map) { $this->customSeverityMap = $map; return $this; } public function setCustomSeverityRules(array $rules) { $this->customSeverityRules = $rules; return $this; } final public function getActivePath() { return $this->activePath; } final public function getOtherLocation($offset, $path = null) { if ($path === null) { $path = $this->getActivePath(); } list($line, $char) = $this->getEngine()->getLineAndCharFromOffset( $path, $offset); return array( 'path' => $path, 'line' => $line + 1, 'char' => $char, ); } final public function stopAllLinters() { $this->stopAllLinters = true; return $this; } final public function didStopAllLinters() { return $this->stopAllLinters; } final public function addPath($path) { $this->paths[$path] = $path; return $this; } final public function setPaths(array $paths) { $this->paths = $paths; return $this; } /** * Filter out paths which this linter doesn't act on (for example, because * they are binaries and the linter doesn't apply to binaries). */ final private function filterPaths($paths) { $engine = $this->getEngine(); $keep = array(); foreach ($paths as $path) { if (!$this->shouldLintDeletedFiles() && !$engine->pathExists($path)) { continue; } if (!$this->shouldLintDirectories() && $engine->isDirectory($path)) { continue; } if (!$this->shouldLintBinaryFiles() && $engine->isBinaryFile($path)) { continue; } if (!$this->shouldLintSymbolicLinks() && $engine->isSymbolicLink($path)) { continue; } $keep[] = $path; } return $keep; } final public function getPaths() { return $this->filterPaths(array_values($this->paths)); } final public function addData($path, $data) { $this->data[$path] = $data; return $this; } final protected function getData($path) { if (!array_key_exists($path, $this->data)) { $this->data[$path] = $this->getEngine()->loadData($path); } return $this->data[$path]; } final public function setEngine(ArcanistLintEngine $engine) { $this->engine = $engine; return $this; } final protected function getEngine() { return $this->engine; } public function getCacheVersion() { return 0; } final public function getLintMessageFullCode($short_code) { return $this->getLinterName().$short_code; } final public function getLintMessageSeverity($code) { $map = $this->customSeverityMap; if (isset($map[$code])) { return $map[$code]; } $map = $this->getLintSeverityMap(); if (isset($map[$code])) { return $map[$code]; } foreach ($this->customSeverityRules as $rule => $severity) { if (preg_match($rule, $code)) { return $severity; } } return $this->getDefaultMessageSeverity($code); } protected function getDefaultMessageSeverity($code) { return ArcanistLintSeverity::SEVERITY_ERROR; } final public function isMessageEnabled($code) { return ($this->getLintMessageSeverity($code) !== ArcanistLintSeverity::SEVERITY_DISABLED); } final public function getLintMessageName($code) { $map = $this->getLintNameMap(); if (isset($map[$code])) { return $map[$code]; } return 'Unknown lint message!'; } final protected function addLintMessage(ArcanistLintMessage $message) { if (!$this->getEngine()->getCommitHookMode()) { $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); $path = Filesystem::resolvePath($message->getPath(), $root); $message->setPath(Filesystem::readablePath($path, $root)); } $this->messages[] = $message; return $message; } final public function getLintMessages() { return $this->messages; } final protected function raiseLintAtLine( $line, $char, $code, $desc, $original = null, $replacement = null) { $message = id(new ArcanistLintMessage()) ->setPath($this->getActivePath()) ->setLine($line) ->setChar($char) ->setCode($this->getLintMessageFullCode($code)) ->setSeverity($this->getLintMessageSeverity($code)) ->setName($this->getLintMessageName($code)) ->setDescription($desc) ->setOriginalText($original) ->setReplacementText($replacement); return $this->addLintMessage($message); } final protected function raiseLintAtPath($code, $desc) { return $this->raiseLintAtLine(null, null, $code, $desc, null, null); } final protected function raiseLintAtOffset( $offset, $code, $desc, $original = null, $replacement = null) { $path = $this->getActivePath(); $engine = $this->getEngine(); if ($offset === null) { $line = null; $char = null; } else { list($line, $char) = $engine->getLineAndCharFromOffset($path, $offset); } return $this->raiseLintAtLine( $line + 1, $char + 1, $code, $desc, $original, $replacement); } public function willLintPath($path) { $this->stopAllLinters = false; $this->activePath = $path; } public function canRun() { return true; } public function willLintPaths(array $paths) { return; } abstract public function lintPath($path); abstract public function getLinterName(); public function getVersion() { return null; } public function didRunLinters() { // This is a hook. } final protected function isCodeEnabled($code) { $severity = $this->getLintMessageSeverity($code); return $this->getEngine()->isSeverityEnabled($severity); } public function getLintSeverityMap() { return array(); } public function getLintNameMap() { return array(); } public function getCacheGranularity() { return self::GRANULARITY_FILE; } /** * If this linter is selectable via `.arclint` configuration files, return * a short, human-readable name to identify it. For example, `"jshint"` or * `"pep8"`. * * If you do not implement this method, the linter will not be selectable * through `.arclint` files. */ public function getLinterConfigurationName() { return null; } public function getLinterConfigurationOptions() { if (!$this->canCustomizeLintSeverities()) { return array(); } return array( 'severity' => array( 'type' => 'optional map', 'help' => pht( 'Provide a map from lint codes to adjusted severity levels: error, '. 'warning, advice, autofix or disabled.') ), 'severity.rules' => array( 'type' => 'optional map', 'help' => pht( 'Provide a map of regular expressions to severity levels. All '. 'matching codes have their severity adjusted.'), ), ); } public function setLinterConfigurationValue($key, $value) { $sev_map = array( 'error' => ArcanistLintSeverity::SEVERITY_ERROR, 'warning' => ArcanistLintSeverity::SEVERITY_WARNING, 'autofix' => ArcanistLintSeverity::SEVERITY_AUTOFIX, 'advice' => ArcanistLintSeverity::SEVERITY_ADVICE, 'disabled' => ArcanistLintSeverity::SEVERITY_DISABLED, ); switch ($key) { case 'severity': if (!$this->canCustomizeLintSeverities()) { break; } $custom = array(); foreach ($value as $code => $severity) { if (empty($sev_map[$severity])) { $valid = implode(', ', array_keys($sev_map)); throw new Exception( pht( 'Unknown lint severity "%s". Valid severities are: %s.', $severity, $valid)); } $code = $this->getLintCodeFromLinterConfigurationKey($code); $custom[$code] = $severity; } $this->setCustomSeverityMap($custom); return; case 'severity.rules': if (!$this->canCustomizeLintSeverities()) { break; } foreach ($value as $rule => $severity) { if (@preg_match($rule, '') === false) { throw new Exception( pht( 'Severity rule "%s" is not a valid regular expression.', $rule)); } if (empty($sev_map[$severity])) { $valid = implode(', ', array_keys($sev_map)); throw new Exception( pht( 'Unknown lint severity "%s". Valid severities are: %s.', $severity, $valid)); } } $this->setCustomSeverityRules($value); return; } throw new Exception("Incomplete implementation: {$key}!"); } protected function canCustomizeLintSeverities() { return true; } protected function shouldLintBinaryFiles() { return false; } protected function shouldLintDeletedFiles() { return false; } protected function shouldLintDirectories() { return false; } protected function shouldLintSymbolicLinks() { return false; } /** * Map a configuration lint code to an `arc` lint code. Primarily, this is * intended for validation, but can also be used to normalize case or * otherwise be more permissive in accepted inputs. * * If the code is not recognized, you should throw an exception. * * @param string Code specified in configuration. * @return string Normalized code to use in severity map. */ protected function getLintCodeFromLinterConfigurationKey($code) { return $code; } /** * Retrieve an old lint configuration value from `.arcconfig` or a similar * source. * * Modern linters should use @{method:getConfig} to read configuration from * `.arclint`. * * @param string Configuration key to retrieve. * @param wild Default value to return if key is not present in config. * @return wild Configured value, or default if no configuration exists. */ final protected function getDeprecatedConfiguration($key, $default = null) { // If we're being called in a context without an engine (probably from // `arc linters`), just return the default value. if (!$this->engine) { return $default; } $config = $this->getEngine()->getConfigurationManager(); // Construct a sentinel object so we can tell if we're reading config // or not. $sentinel = (object)array(); $result = $config->getConfigFromAnySource($key, $sentinel); // If we read config, warn the user that this mechanism is deprecated and // discouraged. if ($result !== $sentinel) { $console = PhutilConsole::getConsole(); $console->writeErr( "**%s**: %s\n", pht('Deprecation Warning'), pht( 'Configuration option "%s" is deprecated. Generally, linters should '. 'now be configured using an `.arclint` file. See "Arcanist User '. 'Guide: Lint" in the documentation for more information.', $key)); return $result; } return $default; } } diff --git a/src/lint/linter/ArcanistMergeConflictLinter.php b/src/lint/linter/ArcanistMergeConflictLinter.php index 021b456f..b4db9f9a 100644 --- a/src/lint/linter/ArcanistMergeConflictLinter.php +++ b/src/lint/linter/ArcanistMergeConflictLinter.php @@ -1,53 +1,54 @@ getData($path), false); foreach ($lines as $lineno => $line) { // An unresolved merge conflict will contain a series of seven // '<', '=', or '>'. if (preg_match('/^(>{7}|<{7}|={7})$/', $line)) { $this->raiseLintAtLine( $lineno + 1, 1, self::LINT_MERGECONFLICT, pht('This syntax indicates there is an unresolved merge conflict.')); } } } public function getLintNameMap() { return array( self::LINT_MERGECONFLICT => pht('Unresolved merge conflict'), ); } + } diff --git a/src/lint/linter/ArcanistPhpcsLinter.php b/src/lint/linter/ArcanistPhpcsLinter.php index dccbc01b..870d1860 100644 --- a/src/lint/linter/ArcanistPhpcsLinter.php +++ b/src/lint/linter/ArcanistPhpcsLinter.php @@ -1,142 +1,140 @@ getDeprecatedConfiguration('lint.phpcs.options', array()); $standard = $this->getDeprecatedConfiguration('lint.phpcs.standard'); if (!empty($standard)) { if (is_array($options)) { $options[] = '--standard='.$standard; } else { $options .= ' --standard='.$standard; } } return $options; } public function getDefaultBinary() { return $this->getDeprecatedConfiguration('lint.phpcs.bin', 'phpcs'); } public function getVersion() { list($stdout) = execx('%C --version', $this->getExecutableCommand()); $matches = array(); $regex = '/^PHP_CodeSniffer version (?P\d+\.\d+\.\d+)\b/'; if (preg_match($regex, $stdout, $matches)) { return $matches['version']; } else { return false; } } public function shouldExpectCommandErrors() { return true; } public function supportsReadDataFromStdin() { return true; } protected function parseLinterOutput($path, $err, $stdout, $stderr) { // NOTE: Some version of PHPCS after 1.4.6 stopped printing a valid, empty // XML document to stdout in the case of no errors. If PHPCS exits with // error 0, just ignore output. if (!$err) { return array(); } $report_dom = new DOMDocument(); $ok = @$report_dom->loadXML($stdout); if (!$ok) { return false; } $files = $report_dom->getElementsByTagName('file'); $messages = array(); foreach ($files as $file) { foreach ($file->childNodes as $child) { if (!($child instanceof DOMElement)) { continue; } if ($child->tagName == 'error') { $prefix = 'E'; } else { $prefix = 'W'; } $code = 'PHPCS.'.$prefix.'.'.$child->getAttribute('source'); $message = new ArcanistLintMessage(); $message->setPath($path); $message->setLine($child->getAttribute('line')); $message->setChar($child->getAttribute('column')); $message->setCode($code); $message->setDescription($child->nodeValue); $message->setSeverity($this->getLintMessageSeverity($code)); $messages[] = $message; } } return $messages; } protected function getDefaultMessageSeverity($code) { if (preg_match('/^PHPCS\\.W\\./', $code)) { return ArcanistLintSeverity::SEVERITY_WARNING; } else { return ArcanistLintSeverity::SEVERITY_ERROR; } } protected function getLintCodeFromLinterConfigurationKey($code) { if (!preg_match('/^PHPCS\\.(E|W)\\./', $code)) { throw new Exception( "Invalid severity code '{$code}', should begin with 'PHPCS.'."); } return $code; } } diff --git a/src/lint/linter/ArcanistPuppetLintLinter.php b/src/lint/linter/ArcanistPuppetLintLinter.php index 74f6458b..5fc20705 100644 --- a/src/lint/linter/ArcanistPuppetLintLinter.php +++ b/src/lint/linter/ArcanistPuppetLintLinter.php @@ -1,106 +1,107 @@ getExecutableCommand()); $matches = array(); $regex = '/^Puppet-lint (?P\d+\.\d+\.\d+)$/'; if (preg_match($regex, $stdout, $matches)) { return $matches['version']; } else { return false; } } public function getInstallInstructions() { return pht('Install puppet-lint using `gem install puppet-lint`.'); } public function shouldExpectCommandErrors() { return true; } public function supportsReadDataFromStdin() { return false; } protected function getMandatoryFlags() { return array(sprintf('--log-format=%s', implode('|', array( '%{linenumber}', '%{column}', '%{kind}', '%{check}', '%{message}')))); } protected function parseLinterOutput($path, $err, $stdout, $stderr) { $lines = phutil_split_lines($stdout, false); $messages = array(); foreach ($lines as $line) { $matches = explode('|', $line, 5); if (count($matches) === 5) { $message = new ArcanistLintMessage(); $message->setPath($path); $message->setLine($matches[0]); $message->setChar($matches[1]); $message->setName(ucwords(str_replace('_', ' ', $matches[3]))); $message->setDescription(ucfirst($matches[4])); switch ($matches[2]) { case 'warning': $message->setSeverity(ArcanistLintSeverity::SEVERITY_WARNING); break; case 'error': $message->setSeverity(ArcanistLintSeverity::SEVERITY_ERROR); break; default: $message->setSeverity(ArcanistLintSeverity::SEVERITY_ADVICE); break; } $messages[] = $message; } } if ($err && !$messages) { return false; } return $messages; } + } diff --git a/src/lint/linter/ArcanistPyLintLinter.php b/src/lint/linter/ArcanistPyLintLinter.php index 7c5538b1..b758e8e4 100644 --- a/src/lint/linter/ArcanistPyLintLinter.php +++ b/src/lint/linter/ArcanistPyLintLinter.php @@ -1,268 +1,266 @@ getEngine()->getConfigurationManager(); - $error_regexp = - $config->getConfigFromAnySource('lint.pylint.codes.error'); - $warning_regexp = - $config->getConfigFromAnySource('lint.pylint.codes.warning'); - $advice_regexp = - $config->getConfigFromAnySource('lint.pylint.codes.advice'); + $error_regexp = $config->getConfigFromAnySource( + 'lint.pylint.codes.error'); + $warning_regexp = $config->getConfigFromAnySource( + 'lint.pylint.codes.warning'); + $advice_regexp = $config->getConfigFromAnySource( + 'lint.pylint.codes.advice'); if (!$error_regexp && !$warning_regexp && !$advice_regexp) { throw new ArcanistUsageException( "You are invoking the PyLint linter but have not configured any of ". "'lint.pylint.codes.error', 'lint.pylint.codes.warning', or ". "'lint.pylint.codes.advice'. Consult the documentation for ". "ArcanistPyLintLinter."); } $code_map = array( ArcanistLintSeverity::SEVERITY_ERROR => $error_regexp, ArcanistLintSeverity::SEVERITY_WARNING => $warning_regexp, ArcanistLintSeverity::SEVERITY_ADVICE => $advice_regexp, ); foreach ($code_map as $sev => $codes) { if ($codes === null) { continue; } if (!is_array($codes)) { $codes = array($codes); } foreach ($codes as $code_re) { if (preg_match("/{$code_re}/", $code)) { return $sev; } } } // If the message code doesn't match any of the provided regex's, // then just disable it. return ArcanistLintSeverity::SEVERITY_DISABLED; } private function getPyLintPath() { $pylint_bin = 'pylint'; // Use the PyLint prefix specified in the config file $config = $this->getEngine()->getConfigurationManager(); $prefix = $config->getConfigFromAnySource('lint.pylint.prefix'); if ($prefix !== null) { $pylint_bin = $prefix.'/bin/'.$pylint_bin; } if (!Filesystem::pathExists($pylint_bin)) { list($err) = exec_manual('which %s', $pylint_bin); if ($err) { throw new ArcanistUsageException( "PyLint does not appear to be installed on this system. Install it ". "(e.g., with 'sudo easy_install pylint') or configure ". "'lint.pylint.prefix' in your .arcconfig to point to the directory ". "where it resides."); } } return $pylint_bin; } private function getPyLintPythonPath() { // Get non-default install locations for pylint and its dependencies // libraries. $config = $this->getEngine()->getConfigurationManager(); $prefixes = array( $config->getConfigFromAnySource('lint.pylint.prefix'), $config->getConfigFromAnySource('lint.pylint.logilab_astng.prefix'), $config->getConfigFromAnySource('lint.pylint.logilab_common.prefix'), ); // Add the libraries to the python search path $python_path = array(); foreach ($prefixes as $prefix) { if ($prefix !== null) { $python_path[] = $prefix.'/lib/python2.7/site-packages'; $python_path[] = $prefix.'/lib/python2.7/dist-packages'; $python_path[] = $prefix.'/lib/python2.6/site-packages'; $python_path[] = $prefix.'/lib/python2.6/dist-packages'; } } $working_copy = $this->getEngine()->getWorkingCopy(); $config_paths = $config->getConfigFromAnySource('lint.pylint.pythonpath'); if ($config_paths !== null) { foreach ($config_paths as $config_path) { if ($config_path !== null) { $python_path[] = Filesystem::resolvePath( $config_path, $working_copy->getProjectRoot()); } } } $python_path[] = ''; return implode(':', $python_path); } private function getPyLintOptions() { // '-rn': don't print lint report/summary at end $options = array('-rn'); // Version 0.x.x include the pylint message ids in the output if (version_compare($this->getLinterVersion(), '1', 'lt')) { array_push($options, '-iy', '--output-format=text'); } // Version 1.x.x set the output specifically to the 0.x.x format else { array_push($options, "--msg-template='{msg_id}:{line:3d}: {obj}: {msg}'"); } $working_copy = $this->getEngine()->getWorkingCopy(); $config = $this->getEngine()->getConfigurationManager(); // Specify an --rcfile, either absolute or relative to the project root. // Stupidly, the command line args above are overridden by rcfile, so be // careful. $rcfile = $config->getConfigFromAnySource('lint.pylint.rcfile'); if ($rcfile !== null) { $rcfile = Filesystem::resolvePath( $rcfile, $working_copy->getProjectRoot()); $options[] = csprintf('--rcfile=%s', $rcfile); } // Add any options defined in the config file for PyLint $config_options = $config->getConfigFromAnySource('lint.pylint.options'); if ($config_options !== null) { $options = array_merge($options, $config_options); } return implode(' ', $options); } public function getLinterName() { return 'PyLint'; } private function getLinterVersion() { $pylint_bin = $this->getPyLintPath(); $options = '--version'; list($stdout) = execx('%s %s', $pylint_bin, $options); $lines = phutil_split_lines($stdout, false); $matches = null; // If the version command didn't return anything or the regex didn't match // Assume a future version that at least is compatible with 1.x.x if (count($lines) == 0 || !preg_match('/pylint\s((?:\d+\.?)+)/', $lines[0], $matches)) { return '999'; } return $matches[1]; } public function lintPath($path) { $pylint_bin = $this->getPyLintPath(); $python_path = $this->getPyLintPythonPath(); $options = $this->getPyLintOptions(); $path_on_disk = $this->getEngine()->getFilePathOnDisk($path); try { list($stdout, $_) = execx( '/usr/bin/env PYTHONPATH=%s$PYTHONPATH %s %C %s', $python_path, $pylint_bin, $options, $path_on_disk); } catch (CommandException $e) { if ($e->getError() == 32) { // According to ##man pylint## the exit status of 32 means there was a // usage error. That's bad, so actually exit abnormally. throw $e; } else { // The other non-zero exit codes mean there were messages issued, // which is expected, so don't exit. $stdout = $e->getStdout(); } } $lines = phutil_split_lines($stdout, false); $messages = array(); foreach ($lines as $line) { $matches = null; $regex = '/([A-Z]\d+): *(\d+)(?:|,\d*): *(.*)$/'; if (!preg_match($regex, $line, $matches)) { continue; } foreach ($matches as $key => $match) { $matches[$key] = trim($match); } $message = new ArcanistLintMessage(); $message->setPath($path); $message->setLine($matches[2]); $message->setCode($matches[1]); $message->setName($this->getLinterName().' '.$matches[1]); $message->setDescription($matches[3]); $message->setSeverity($this->getMessageCodeSeverity($matches[1])); $this->addLintMessage($message); } } } diff --git a/src/lint/linter/ArcanistScriptAndRegexLinter.php b/src/lint/linter/ArcanistScriptAndRegexLinter.php index 1c12ff0d..22e88b89 100644 --- a/src/lint/linter/ArcanistScriptAndRegexLinter.php +++ b/src/lint/linter/ArcanistScriptAndRegexLinter.php @@ -1,409 +1,407 @@ &1' * * The return code of the script must be 0, or an exception will be raised * reporting that the linter failed. If you have a script which exits nonzero * under normal circumstances, you can force it to always exit 0 by using a * configuration like this: * * sh -c '/opt/lint/lint.sh "$0" || true' * * Multiple instances of the script will be run in parallel if there are * multiple files to be linted, so they should not use any unique resources. * For instance, this configuration would not work properly, because several * processes may attempt to write to the file at the same time: * * COUNTEREXAMPLE * sh -c '/opt/lint/lint.sh --output /tmp/lint.out "$0" && cat /tmp/lint.out' * * There are necessary limits to how gracefully this linter can deal with * edge cases, because it is just a script and a regex. If you need to do * things that this linter can't handle, you can write a phutil linter and move * the logic to handle those cases into PHP. PHP is a better general-purpose * programming language than regular expressions are, if only by a small margin. * * == ...and Regex == * * The regex must be a valid PHP PCRE regex, including delimiters and flags. * * The regex will be matched against the entire output of the script, so it * should generally be in this form if messages are one-per-line: * * /^...$/m * * The regex should capture these named patterns with `(?P...)`: * * - `message` (required) Text describing the lint message. For example, * "This is a syntax error.". * - `name` (optional) Text summarizing the lint message. For example, * "Syntax Error". * - `severity` (optional) The word "error", "warning", "autofix", "advice", * or "disabled", in any combination of upper and lower case. Instead, you * may match groups called `error`, `warning`, `advice`, `autofix`, or * `disabled`. These allow you to match output formats like "E123" and * "W123" to indicate errors and warnings, even though the word "error" is * not present in the output. If no severity capturing group is present, * messages are raised with "error" severity. If multiple severity capturing * groups are present, messages are raised with the highest captured * serverity. Capturing groups like `error` supersede the `severity` * capturing group. * - `error` (optional) Match some nonempty substring to indicate that this * message has "error" severity. * - `warning` (optional) Match some nonempty substring to indicate that this * message has "warning" severity. * - `advice` (optional) Match some nonempty substring to indicate that this * message has "advice" severity. * - `autofix` (optional) Match some nonempty substring to indicate that this * message has "autofix" severity. * - `disabled` (optional) Match some nonempty substring to indicate that this * message has "disabled" severity. * - `file` (optional) The name of the file to raise the lint message in. If * not specified, defaults to the linted file. It is generally not necessary * to capture this unless the linter can raise messages in files other than * the one it is linting. * - `line` (optional) The line number of the message. * - `char` (optional) The character offset of the message. * - `offset` (optional) The byte offset of the message. If captured, this * supersedes `line` and `char`. * - `original` (optional) The text the message affects. * - `replacement` (optional) The text that the range captured by `original` * should be automatically replaced by to resolve the message. * - `code` (optional) A short error type identifier which can be used * elsewhere to configure handling of specific types of messages. For * example, "EXAMPLE1", "EXAMPLE2", etc., where each code identifies a * class of message like "syntax error", "missing whitespace", etc. This * allows configuration to later change the severity of all whitespace * messages, for example. * - `ignore` (optional) Match some nonempty substring to ignore the match. * You can use this if your linter sometimes emits text like "No lint * errors". * - `stop` (optional) Match some nonempty substring to stop processing input. * Remaining matches for this file will be discarded, but linting will * continue with other linters and other files. * - `halt` (optional) Match some nonempty substring to halt all linting of * this file by any linter. Linting will continue with other files. * - `throw` (optional) Match some nonempty substring to throw an error, which * will stop `arc` completely. You can use this to fail abruptly if you * encounter unexpected output. All processing will abort. * * Numbered capturing groups are ignored. * * For example, if your lint script's output looks like this: * * error:13 Too many goats! * warning:22 Not enough boats. * * ...you could use this regex to parse it: * * /^(?Pwarning|error):(?P\d+) (?P.*)$/m * * The simplest valid regex for line-oriented output is something like this: * * /^(?P.*)$/m * * @task lint Linting * @task linterinfo Linter Information * @task parse Parsing Output * @task config Validating Configuration - * - * @group linter */ final class ArcanistScriptAndRegexLinter extends ArcanistLinter { private $output = array(); public function getInfoName() { return pht('Script and Regex'); } public function getInfoDescription() { return pht( 'Run an external script, then parse its output with a regular '. 'expression. This is a generic binding that can be used to '. 'run custom lint scripts.'); } /* -( Linting )------------------------------------------------------------ */ /** * Run the script on each file to be linted. * * @task lint */ public function willLintPaths(array $paths) { $script = $this->getConfiguredScript(); $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); $futures = array(); foreach ($paths as $path) { $future = new ExecFuture('%C %s', $script, $path); $future->setCWD($root); $futures[$path] = $future; } foreach (Futures($futures)->limit(4) as $path => $future) { list($stdout) = $future->resolvex(); $this->output[$path] = $stdout; } } /** * Run the regex on the output of the script. * * @task lint */ public function lintPath($path) { $regex = $this->getConfiguredRegex(); $output = idx($this->output, $path); if (!strlen($output)) { // No output, but it exited 0, so just move on. return; } $matches = null; if (!preg_match_all($regex, $output, $matches, PREG_SET_ORDER)) { // Output with no matches. This might be a configuration error, but more // likely it's something like "No lint errors." and the user just hasn't // written a sufficiently powerful/ridiculous regexp to capture it into an // 'ignore' group. Don't make them figure this out; advanced users can // capture 'throw' to handle this case. return; } foreach ($matches as $match) { if (!empty($match['throw'])) { $throw = $match['throw']; throw new ArcanistUsageException( "ArcanistScriptAndRegexLinter: ". "configuration captured a 'throw' named capturing group, ". "'{$throw}'. Script output:\n". $output); } if (!empty($match['halt'])) { $this->stopAllLinters(); break; } if (!empty($match['stop'])) { break; } if (!empty($match['ignore'])) { continue; } list($line, $char) = $this->getMatchLineAndChar($match, $path); $dict = array( 'path' => idx($match, 'file', $path), 'line' => $line, 'char' => $char, 'code' => idx($match, 'code', $this->getLinterName()), 'severity' => $this->getMatchSeverity($match), 'name' => idx($match, 'name', 'Lint'), 'description' => idx($match, 'message', 'Undefined Lint Message'), ); $original = idx($match, 'original'); if ($original !== null) { $dict['original'] = $original; } $replacement = idx($match, 'replacement'); if ($replacement !== null) { $dict['replacement'] = $replacement; } $lint = ArcanistLintMessage::newFromDictionary($dict); $this->addLintMessage($lint); } } /* -( Linter Information )------------------------------------------------- */ /** * Return the short name of the linter. * * @return string Short linter identifier. * * @task linterinfo */ public function getLinterName() { return 'S&RX'; } public function getLinterConfigurationName() { return 'script-and-regex'; } /* -( Parsing Output )----------------------------------------------------- */ /** * Get the line and character of the message from the regex match. * * @param dict Captured groups from regex. * @return pair Line and character of the message. * * @task parse */ private function getMatchLineAndChar(array $match, $path) { if (!empty($match['offset'])) { list($line, $char) = $this->getEngine()->getLineAndCharFromOffset( idx($match, 'file', $path), $match['offset']); return array($line + 1, $char + 1); } $line = idx($match, 'line', 1); $char = idx($match, 'char'); return array($line, $char); } /** * Map the regex matching groups to a message severity. We look for either * a nonempty severity name group like 'error', or a group called 'severity' * with a valid name. * * @param dict Captured groups from regex. * @return const @{class:ArcanistLintSeverity} constant. * * @task parse */ private function getMatchSeverity(array $match) { $map = array( 'error' => ArcanistLintSeverity::SEVERITY_ERROR, 'warning' => ArcanistLintSeverity::SEVERITY_WARNING, 'autofix' => ArcanistLintSeverity::SEVERITY_AUTOFIX, 'advice' => ArcanistLintSeverity::SEVERITY_ADVICE, 'disabled' => ArcanistLintSeverity::SEVERITY_DISABLED, ); $severity_name = strtolower(idx($match, 'severity')); foreach ($map as $name => $severity) { if (!empty($match[$name])) { return $severity; } else if ($severity_name == $name) { return $severity; } } return ArcanistLintSeverity::SEVERITY_ERROR; } /* -( Validating Configuration )------------------------------------------- */ /** * Load, validate, and return the "script" configuration. * * @return string The shell command fragment to use to run the linter. * * @task config */ private function getConfiguredScript() { $key = 'linter.scriptandregex.script'; $config = $this->getEngine() ->getConfigurationManager() ->getConfigFromAnySource($key); if (!$config) { throw new ArcanistUsageException( "ArcanistScriptAndRegexLinter: ". "You must configure '{$key}' to point to a script to execute."); } // NOTE: No additional validation since the "script" can be some random // shell command and/or include flags, so it does not need to point to some // file on disk. return $config; } /** * Load, validate, and return the "regex" configuration. * * @return string A valid PHP PCRE regular expression. * * @task config */ private function getConfiguredRegex() { $key = 'linter.scriptandregex.regex'; $config = $this->getEngine() ->getConfigurationManager() ->getConfigFromAnySource($key); if (!$config) { throw new ArcanistUsageException( "ArcanistScriptAndRegexLinter: ". "You must configure '{$key}' with a valid PHP PCRE regex."); } // NOTE: preg_match() returns 0 for no matches and false for compile error; // this won't match, but will validate the syntax of the regex. $ok = preg_match($config, 'syntax-check'); if ($ok === false) { throw new ArcanistUsageException( "ArcanistScriptAndRegexLinter: ". "Regex '{$config}' does not compile. You must configure '{$key}' with ". "a valid PHP PCRE regex, including delimiters."); } return $config; } } diff --git a/src/lint/linter/ArcanistXHPASTLinter.php b/src/lint/linter/ArcanistXHPASTLinter.php index 6b3309d0..552500d0 100644 --- a/src/lint/linter/ArcanistXHPASTLinter.php +++ b/src/lint/linter/ArcanistXHPASTLinter.php @@ -1,2542 +1,2542 @@ 'PHP Syntax Error!', self::LINT_UNABLE_TO_PARSE => 'Unable to Parse', self::LINT_VARIABLE_VARIABLE => 'Use of Variable Variable', self::LINT_EXTRACT_USE => 'Use of extract()', self::LINT_UNDECLARED_VARIABLE => 'Use of Undeclared Variable', self::LINT_PHP_SHORT_TAG => 'Use of Short Tag " 'Use of Echo Tag " 'Use of Close Tag "?>"', self::LINT_NAMING_CONVENTIONS => 'Naming Conventions', self::LINT_IMPLICIT_CONSTRUCTOR => 'Implicit Constructor', self::LINT_DYNAMIC_DEFINE => 'Dynamic define()', self::LINT_STATIC_THIS => 'Use of $this in Static Context', self::LINT_PREG_QUOTE_MISUSE => 'Misuse of preg_quote()', self::LINT_PHP_OPEN_TAG => 'Expected Open Tag', self::LINT_TODO_COMMENT => 'TODO Comment', self::LINT_EXIT_EXPRESSION => 'Exit Used as Expression', self::LINT_COMMENT_STYLE => 'Comment Style', self::LINT_CLASS_FILENAME_MISMATCH => 'Class-Filename Mismatch', self::LINT_TAUTOLOGICAL_EXPRESSION => 'Tautological Expression', self::LINT_PLUS_OPERATOR_ON_STRINGS => 'Not String Concatenation', self::LINT_DUPLICATE_KEYS_IN_ARRAY => 'Duplicate Keys in Array', self::LINT_REUSED_ITERATORS => 'Reuse of Iterator Variable', self::LINT_BRACE_FORMATTING => 'Brace placement', self::LINT_PARENTHESES_SPACING => 'Spaces Inside Parentheses', self::LINT_CONTROL_STATEMENT_SPACING => 'Space After Control Statement', self::LINT_BINARY_EXPRESSION_SPACING => 'Space Around Binary Operator', self::LINT_ARRAY_INDEX_SPACING => 'Spacing Before Array Index', self::LINT_IMPLICIT_FALLTHROUGH => 'Implicit Fallthrough', self::LINT_REUSED_AS_ITERATOR => 'Variable Reused As Iterator', self::LINT_COMMENT_SPACING => 'Comment Spaces', self::LINT_SLOWNESS => 'Slow Construct', self::LINT_CLOSING_CALL_PAREN => 'Call Formatting', self::LINT_CLOSING_DECL_PAREN => 'Declaration Formatting', self::LINT_REUSED_ITERATOR_REFERENCE => 'Reuse of Iterator References', self::LINT_KEYWORD_CASING => 'Keyword Conventions', self::LINT_DOUBLE_QUOTE => 'Unnecessary Double Quotes', self::LINT_ELSEIF_USAGE => 'ElseIf Usage', self::LINT_SEMICOLON_SPACING => 'Semicolon Spacing', self::LINT_CONCATENATION_OPERATOR => 'Concatenation Spacing', self::LINT_PHP_COMPATIBILITY => 'PHP Compatibility', ); } public function getLinterName() { return 'XHP'; } public function getLinterConfigurationName() { return 'xhpast'; } public function getLintSeverityMap() { $disabled = ArcanistLintSeverity::SEVERITY_DISABLED; $advice = ArcanistLintSeverity::SEVERITY_ADVICE; $warning = ArcanistLintSeverity::SEVERITY_WARNING; return array( self::LINT_TODO_COMMENT => $disabled, self::LINT_UNABLE_TO_PARSE => $warning, self::LINT_NAMING_CONVENTIONS => $warning, self::LINT_PREG_QUOTE_MISUSE => $advice, self::LINT_BRACE_FORMATTING => $warning, self::LINT_PARENTHESES_SPACING => $warning, self::LINT_CONTROL_STATEMENT_SPACING => $warning, self::LINT_BINARY_EXPRESSION_SPACING => $warning, self::LINT_ARRAY_INDEX_SPACING => $warning, self::LINT_IMPLICIT_FALLTHROUGH => $warning, self::LINT_SLOWNESS => $warning, self::LINT_COMMENT_SPACING => $advice, self::LINT_CLOSING_CALL_PAREN => $warning, self::LINT_CLOSING_DECL_PAREN => $warning, self::LINT_REUSED_ITERATOR_REFERENCE => $warning, self::LINT_KEYWORD_CASING => $warning, self::LINT_DOUBLE_QUOTE => $advice, self::LINT_ELSEIF_USAGE => $advice, self::LINT_SEMICOLON_SPACING => $advice, self::LINT_CONCATENATION_OPERATOR => $warning, ); } public function getLinterConfigurationOptions() { return parent::getLinterConfigurationOptions() + array( 'xhpast.naminghook' => array( 'type' => 'optional string', 'help' => pht( 'Name of a concrete subclass of ArcanistXHPASTLintNamingHook which '. 'enforces more granular naming convention rules for symbols.'), ), 'xhpast.switchhook' => array( 'type' => 'optional string', 'help' => pht( 'Name of a concrete subclass of ArcanistXHPASTLintSwitchHook which '. 'tunes the analysis of switch() statements for this linter.'), ), 'xhpast.php-version' => array( 'type' => 'optional string', 'help' => pht('PHP version to target.'), ), 'xhpast.php-version.windows' => array( 'type' => 'optional string', 'help' => pht('PHP version to target on Windows.'), ), ); } public function setLinterConfigurationValue($key, $value) { switch ($key) { case 'xhpast.naminghook': $this->naminghook = $value; return; case 'xhpast.switchhook': $this->switchhook = $value; return; case 'xhpast.php-version': $this->version = $value; return; case 'xhpast.php-version.windows': $this->windowsVersion = $value; return; } return parent::setLinterConfigurationValue($key, $value); } public function getVersion() { // The version number should be incremented whenever a new rule is added. return '7'; } protected function resolveFuture($path, Future $future) { $tree = $this->getXHPASTTreeForPath($path); if (!$tree) { $ex = $this->getXHPASTExceptionForPath($path); if ($ex instanceof XHPASTSyntaxErrorException) { $this->raiseLintAtLine( $ex->getErrorLine(), 1, self::LINT_PHP_SYNTAX_ERROR, 'This file contains a syntax error: '.$ex->getMessage()); } else if ($ex instanceof Exception) { $this->raiseLintAtPath(self::LINT_UNABLE_TO_PARSE, $ex->getMessage()); } return; } $root = $tree->getRootNode(); $method_codes = array( 'lintStrstrUsedForCheck' => self::LINT_SLOWNESS, 'lintStrposUsedForStart' => self::LINT_SLOWNESS, 'lintImplicitFallthrough' => self::LINT_IMPLICIT_FALLTHROUGH, 'lintBraceFormatting' => self::LINT_BRACE_FORMATTING, 'lintTautologicalExpressions' => self::LINT_TAUTOLOGICAL_EXPRESSION, 'lintCommentSpaces' => self::LINT_COMMENT_SPACING, 'lintHashComments' => self::LINT_COMMENT_STYLE, 'lintReusedIterators' => self::LINT_REUSED_ITERATORS, 'lintReusedIteratorReferences' => self::LINT_REUSED_ITERATOR_REFERENCE, 'lintVariableVariables' => self::LINT_VARIABLE_VARIABLE, 'lintUndeclaredVariables' => array( self::LINT_EXTRACT_USE, self::LINT_REUSED_AS_ITERATOR, self::LINT_UNDECLARED_VARIABLE, ), 'lintPHPTagUse' => array( self::LINT_PHP_SHORT_TAG, self::LINT_PHP_ECHO_TAG, self::LINT_PHP_OPEN_TAG, self::LINT_PHP_CLOSE_TAG, ), 'lintNamingConventions' => self::LINT_NAMING_CONVENTIONS, 'lintSurpriseConstructors' => self::LINT_IMPLICIT_CONSTRUCTOR, 'lintParenthesesShouldHugExpressions' => self::LINT_PARENTHESES_SPACING, 'lintSpaceAfterControlStatementKeywords' => self::LINT_CONTROL_STATEMENT_SPACING, 'lintSpaceAroundBinaryOperators' => self::LINT_BINARY_EXPRESSION_SPACING, 'lintDynamicDefines' => self::LINT_DYNAMIC_DEFINE, 'lintUseOfThisInStaticMethods' => self::LINT_STATIC_THIS, 'lintPregQuote' => self::LINT_PREG_QUOTE_MISUSE, 'lintExitExpressions' => self::LINT_EXIT_EXPRESSION, 'lintArrayIndexWhitespace' => self::LINT_ARRAY_INDEX_SPACING, 'lintTODOComments' => self::LINT_TODO_COMMENT, 'lintPrimaryDeclarationFilenameMatch' => self::LINT_CLASS_FILENAME_MISMATCH, 'lintPlusOperatorOnStrings' => self::LINT_PLUS_OPERATOR_ON_STRINGS, 'lintDuplicateKeysInArray' => self::LINT_DUPLICATE_KEYS_IN_ARRAY, 'lintClosingCallParen' => self::LINT_CLOSING_CALL_PAREN, 'lintClosingDeclarationParen' => self::LINT_CLOSING_DECL_PAREN, 'lintKeywordCasing' => self::LINT_KEYWORD_CASING, 'lintStrings' => self::LINT_DOUBLE_QUOTE, 'lintElseIfStatements' => self::LINT_ELSEIF_USAGE, 'lintSemicolons' => self::LINT_SEMICOLON_SPACING, 'lintSpaceAroundConcatenationOperators' => self::LINT_CONCATENATION_OPERATOR, 'lintPHPCompatibility' => self::LINT_PHP_COMPATIBILITY, ); foreach ($method_codes as $method => $codes) { foreach ((array)$codes as $code) { if ($this->isCodeEnabled($code)) { call_user_func(array($this, $method), $root); break; } } } } private function lintStrstrUsedForCheck(XHPASTNode $root) { $expressions = $root->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($expressions as $expression) { $operator = $expression->getChildOfType(1, 'n_OPERATOR'); $operator = $operator->getConcreteString(); if ($operator != '===' && $operator != '!==') { continue; } $false = $expression->getChildByIndex(0); if ($false->getTypeName() == 'n_SYMBOL_NAME' && $false->getConcreteString() == 'false') { $strstr = $expression->getChildByIndex(2); } else { $strstr = $false; $false = $expression->getChildByIndex(2); if ($false->getTypeName() != 'n_SYMBOL_NAME' || $false->getConcreteString() != 'false') { continue; } } if ($strstr->getTypeName() != 'n_FUNCTION_CALL') { continue; } $name = strtolower($strstr->getChildByIndex(0)->getConcreteString()); if ($name == 'strstr' || $name == 'strchr') { $this->raiseLintAtNode( $strstr, self::LINT_SLOWNESS, 'Use strpos() for checking if the string contains something.'); } else if ($name == 'stristr') { $this->raiseLintAtNode( $strstr, self::LINT_SLOWNESS, 'Use stripos() for checking if the string contains something.'); } } } private function lintStrposUsedForStart(XHPASTNode $root) { $expressions = $root->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($expressions as $expression) { $operator = $expression->getChildOfType(1, 'n_OPERATOR'); $operator = $operator->getConcreteString(); if ($operator != '===' && $operator != '!==') { continue; } $zero = $expression->getChildByIndex(0); if ($zero->getTypeName() == 'n_NUMERIC_SCALAR' && $zero->getConcreteString() == '0') { $strpos = $expression->getChildByIndex(2); } else { $strpos = $zero; $zero = $expression->getChildByIndex(2); if ($zero->getTypeName() != 'n_NUMERIC_SCALAR' || $zero->getConcreteString() != '0') { continue; } } if ($strpos->getTypeName() != 'n_FUNCTION_CALL') { continue; } $name = strtolower($strpos->getChildByIndex(0)->getConcreteString()); if ($name == 'strpos') { $this->raiseLintAtNode( $strpos, self::LINT_SLOWNESS, 'Use strncmp() for checking if the string starts with something.'); } else if ($name == 'stripos') { $this->raiseLintAtNode( $strpos, self::LINT_SLOWNESS, 'Use strncasecmp() for checking if the string starts with '. 'something.'); } } } private function lintPHPCompatibility(XHPASTNode $root) { if (!$this->version) { return; } $target = phutil_get_library_root('phutil'). '/../resources/php_compat_info.json'; $compat_info = phutil_json_decode(Filesystem::readFile($target)); $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($calls as $call) { $node = $call->getChildByIndex(0); $name = $node->getConcreteString(); $version = idx($compat_info['functions'], $name); if ($version && version_compare($version['min'], $this->version, '>')) { $this->raiseLintAtNode( $node, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but `{$name}()` was ". "not introduced until PHP {$version['min']}."); } else if (array_key_exists($name, $compat_info['params'])) { $params = $call->getChildOfType(1, 'n_CALL_PARAMETER_LIST'); foreach (array_values($params->getChildren()) as $i => $param) { $version = idx($compat_info['params'][$name], $i); if ($version && version_compare($version, $this->version, '>')) { $this->raiseLintAtNode( $param, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but parameter ". ($i + 1)." of `{$name}()` was not introduced until PHP ". "{$version}."); } } } if ($this->windowsVersion) { $windows = idx($compat_info['functions_windows'], $name); if ($windows === false) { $this->raiseLintAtNode( $node, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->windowsVersion} on Windows, ". "but `{$name}()` is not available there."); } else if (version_compare($windows, $this->windowsVersion, '>')) { $this->raiseLintAtNode( $node, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->windowsVersion} on Windows, ". "but `{$name}()` is not available there until PHP ". "{$this->windowsVersion}."); } } } $classes = $root->selectDescendantsOfType('n_CLASS_NAME'); foreach ($classes as $node) { $name = $node->getConcreteString(); $version = idx($compat_info['interfaces'], $name); $version = idx($compat_info['classes'], $name, $version); if ($version && version_compare($version['min'], $this->version, '>')) { $this->raiseLintAtNode( $node, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but `{$name}` was not ". "introduced until PHP {$version['min']}."); } } // TODO: Technically, this will include function names. This is unlikely to // cause any issues (unless, of course, there existed a function that had // the same name as some constant). $constants = $root->selectDescendantsOfType('n_SYMBOL_NAME'); foreach ($constants as $node) { $name = $node->getConcreteString(); $version = idx($compat_info['constants'], $name); if ($version && version_compare($version['min'], $this->version, '>')) { $this->raiseLintAtNode( $node, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but `{$name}` was not ". "introduced until PHP {$version['min']}."); } } if (version_compare($this->version, '5.3.0') < 0) { $this->lintPHP53Features($root); } if (version_compare($this->version, '5.4.0') < 0) { $this->lintPHP54Features($root); } } private function lintPHP53Features(XHPASTNode $root) { $functions = $root->selectTokensOfType('T_FUNCTION'); foreach ($functions as $function) { $next = $function->getNextToken(); while ($next) { if ($next->isSemantic()) { break; } $next = $next->getNextToken(); } if ($next) { if ($next->getTypeName() == '(') { $this->raiseLintAtToken( $function, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but anonymous ". "functions were not introduced until PHP 5.3."); } } } $namespaces = $root->selectTokensOfType('T_NAMESPACE'); foreach ($namespaces as $namespace) { $this->raiseLintAtToken( $namespace, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but namespaces were not ". "introduced until PHP 5.3."); } // NOTE: This is only "use x;", in anonymous functions the node type is // n_LEXICAL_VARIABLE_LIST even though both tokens are T_USE. // TODO: We parse n_USE in a slightly crazy way right now; that would be // a better selector once it's fixed. $uses = $root->selectDescendantsOfType('n_USE_LIST'); foreach ($uses as $use) { $this->raiseLintAtNode( $use, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but namespaces were not ". "introduced until PHP 5.3."); } $statics = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); foreach ($statics as $static) { $name = $static->getChildByIndex(0); if ($name->getTypeName() != 'n_CLASS_NAME') { continue; } if ($name->getConcreteString() == 'static') { $this->raiseLintAtNode( $name, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but `static::` was not ". "introduced until PHP 5.3."); } } $ternaries = $root->selectDescendantsOfType('n_TERNARY_EXPRESSION'); foreach ($ternaries as $ternary) { $yes = $ternary->getChildByIndex(1); if ($yes->getTypeName() == 'n_EMPTY') { $this->raiseLintAtNode( $ternary, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but short ternary was ". "not introduced until PHP 5.3."); } } $heredocs = $root->selectDescendantsOfType('n_HEREDOC'); foreach ($heredocs as $heredoc) { if (preg_match('/^<<<[\'"]/', $heredoc->getConcreteString())) { $this->raiseLintAtNode( $heredoc, self::LINT_PHP_COMPATIBILITY, "This codebase targets PHP {$this->version}, but nowdoc was not ". "introduced until PHP 5.3."); } } } private function lintPHP54Features(XHPASTNode $root) { $indexes = $root->selectDescendantsOfType('n_INDEX_ACCESS'); foreach ($indexes as $index) { $left = $index->getChildByIndex(0); switch ($left->getTypeName()) { case 'n_FUNCTION_CALL': case 'n_METHOD_CALL': $this->raiseLintAtNode( $index->getChildByIndex(1), self::LINT_PHP_COMPATIBILITY, 'The f()[...] syntax was not introduced until PHP 5.4, but this '. 'codebase targets an earlier version of PHP. You can rewrite '. 'this expression using idx().'); break; } } } private function lintImplicitFallthrough(XHPASTNode $root) { $hook_obj = null; $working_copy = $this->getEngine()->getWorkingCopy(); if ($working_copy) { $hook_class = $this->switchhook ? $this->switchhook : $this->getDeprecatedConfiguration('lint.xhpast.switchhook'); if ($hook_class) { $hook_obj = newv($hook_class, array()); assert_instances_of(array($hook_obj), 'ArcanistXHPASTLintSwitchHook'); } } $switches = $root->selectDescendantsOfType('n_SWITCH'); foreach ($switches as $switch) { $blocks = array(); $cases = $switch->selectDescendantsOfType('n_CASE'); foreach ($cases as $case) { $blocks[] = $case; } $defaults = $switch->selectDescendantsOfType('n_DEFAULT'); foreach ($defaults as $default) { $blocks[] = $default; } foreach ($blocks as $key => $block) { // Collect all the tokens in this block which aren't at top level. // We want to ignore "break", and "continue" in these blocks. $lower_level = $block->selectDescendantsOfType('n_WHILE'); $lower_level->add($block->selectDescendantsOfType('n_DO_WHILE')); $lower_level->add($block->selectDescendantsOfType('n_FOR')); $lower_level->add($block->selectDescendantsOfType('n_FOREACH')); $lower_level->add($block->selectDescendantsOfType('n_SWITCH')); $lower_level_tokens = array(); foreach ($lower_level as $lower_level_block) { $lower_level_tokens += $lower_level_block->getTokens(); } // Collect all the tokens in this block which aren't in this scope // (because they're inside class, function or interface declarations). // We want to ignore all of these tokens. $decls = $block->selectDescendantsOfType('n_FUNCTION_DECLARATION'); $decls->add($block->selectDescendantsOfType('n_CLASS_DECLARATION')); // For completeness; these can't actually have anything. $decls->add($block->selectDescendantsOfType('n_INTERFACE_DECLARATION')); $different_scope_tokens = array(); foreach ($decls as $decl) { $different_scope_tokens += $decl->getTokens(); } $lower_level_tokens += $different_scope_tokens; // Get all the trailing nonsemantic tokens, since we need to look for // "fallthrough" comments past the end of the semantic block. $tokens = $block->getTokens(); $last = end($tokens); while ($last && $last = $last->getNextToken()) { if ($last->isSemantic()) { break; } $tokens[$last->getTokenID()] = $last; } $blocks[$key] = array( $tokens, $lower_level_tokens, $different_scope_tokens, ); } foreach ($blocks as $token_lists) { list( $tokens, $lower_level_tokens, $different_scope_tokens) = $token_lists; // Test each block (case or default statement) to see if it's OK. It's // OK if: // // - it is empty; or // - it ends in break, return, throw, continue or exit at top level; or // - it has a comment with "fallthrough" in its text. // Empty blocks are OK, so we start this at `true` and only set it to // false if we find a statement. $block_ok = true; // Keeps track of whether the current statement is one that validates // the block (break, return, throw, continue) or something else. $statement_ok = false; foreach ($tokens as $token_id => $token) { if (!$token->isSemantic()) { // Liberally match "fall" in the comment text so that comments like // "fallthru", "fall through", "fallthrough", etc., are accepted. if (preg_match('/fall/i', $token->getValue())) { $block_ok = true; break; } continue; } $tok_type = $token->getTypeName(); if ($tok_type == 'T_FUNCTION' || $tok_type == 'T_CLASS' || $tok_type == 'T_INTERFACE') { // These aren't statements, but mark the block as nonempty anyway. $block_ok = false; continue; } if ($tok_type == ';') { if ($statement_ok) { $statment_ok = false; } else { $block_ok = false; } continue; } if ($tok_type == 'T_BREAK' || $tok_type == 'T_CONTINUE') { if (empty($lower_level_tokens[$token_id])) { $statement_ok = true; $block_ok = true; } continue; } if ($tok_type == 'T_RETURN' || $tok_type == 'T_THROW' || $tok_type == 'T_EXIT' || ($hook_obj && $hook_obj->checkSwitchToken($token))) { if (empty($different_scope_tokens[$token_id])) { $statement_ok = true; $block_ok = true; } continue; } } if (!$block_ok) { $this->raiseLintAtToken( head($tokens), self::LINT_IMPLICIT_FALLTHROUGH, "This 'case' or 'default' has a nonempty block which does not ". "end with 'break', 'continue', 'return', 'throw' or 'exit'. Did ". "you forget to add one of those? If you intend to fall through, ". "add a '// fallthrough' comment to silence this warning."); } } } } private function lintBraceFormatting(XHPASTNode $root) { foreach ($root->selectDescendantsOfType('n_STATEMENT_LIST') as $list) { $tokens = $list->getTokens(); if (!$tokens || head($tokens)->getValue() != '{') { continue; } list($before, $after) = $list->getSurroundingNonsemanticTokens(); if (!$before) { $first = head($tokens); // Only insert the space if we're after a closing parenthesis. If // we're in a construct like "else{}", other rules will insert space // after the 'else' correctly. $prev = $first->getPrevToken(); if (!$prev || $prev->getValue() != ')') { continue; } $this->raiseLintAtToken( $first, self::LINT_BRACE_FORMATTING, 'Put opening braces on the same line as control statements and '. 'declarations, with a single space before them.', ' '.$first->getValue()); } else if (count($before) == 1) { $before = reset($before); if ($before->getValue() != ' ') { $this->raiseLintAtToken( $before, self::LINT_BRACE_FORMATTING, 'Put opening braces on the same line as control statements and '. 'declarations, with a single space before them.', ' '); } } } } private function lintTautologicalExpressions(XHPASTNode $root) { $expressions = $root->selectDescendantsOfType('n_BINARY_EXPRESSION'); static $operators = array( '-' => true, '/' => true, '-=' => true, '/=' => true, '<=' => true, '<' => true, '==' => true, '===' => true, '!=' => true, '!==' => true, '>=' => true, '>' => true, ); static $logical = array( '||' => true, '&&' => true, ); foreach ($expressions as $expr) { $operator = $expr->getChildByIndex(1)->getConcreteString(); if (!empty($operators[$operator])) { $left = $expr->getChildByIndex(0)->getSemanticString(); $right = $expr->getChildByIndex(2)->getSemanticString(); if ($left == $right) { $this->raiseLintAtNode( $expr, self::LINT_TAUTOLOGICAL_EXPRESSION, 'Both sides of this expression are identical, so it always '. 'evaluates to a constant.'); } } if (!empty($logical[$operator])) { $left = $expr->getChildByIndex(0)->getSemanticString(); $right = $expr->getChildByIndex(2)->getSemanticString(); // NOTE: These will be null to indicate "could not evaluate". $left = $this->evaluateStaticBoolean($left); $right = $this->evaluateStaticBoolean($right); if (($operator == '||' && ($left === true || $right === true)) || ($operator == '&&' && ($left === false || $right === false))) { $this->raiseLintAtNode( $expr, self::LINT_TAUTOLOGICAL_EXPRESSION, 'The logical value of this expression is static. Did you forget '. 'to remove some debugging code?'); } } } } /** * Statically evaluate a boolean value from an XHP tree. * * TODO: Improve this and move it to XHPAST proper? * * @param string The "semantic string" of a single value. * @return mixed ##true## or ##false## if the value could be evaluated * statically; ##null## if static evaluation was not possible. */ private function evaluateStaticBoolean($string) { switch (strtolower($string)) { case '0': case 'null': case 'false': return false; case '1': case 'true': return true; } return null; } protected function lintCommentSpaces(XHPASTNode $root) { foreach ($root->selectTokensOfType('T_COMMENT') as $comment) { $value = $comment->getValue(); if ($value[0] != '#') { $match = null; if (preg_match('@^(/[/*]+)[^/*\s]@', $value, $match)) { $this->raiseLintAtOffset( $comment->getOffset(), self::LINT_COMMENT_SPACING, 'Put space after comment start.', $match[1], $match[1].' '); } } } } protected function lintHashComments(XHPASTNode $root) { foreach ($root->selectTokensOfType('T_COMMENT') as $comment) { $value = $comment->getValue(); if ($value[0] != '#') { continue; } $this->raiseLintAtOffset( $comment->getOffset(), self::LINT_COMMENT_STYLE, 'Use "//" single-line comments, not "#".', '#', (preg_match('/^#\S/', $value) ? '// ' : '//')); } } /** * Find cases where loops get nested inside each other but use the same * iterator variable. For example: * * COUNTEREXAMPLE * foreach ($list as $thing) { * foreach ($stuff as $thing) { // <-- Raises an error for reuse of $thing * // ... * } * } * */ private function lintReusedIterators(XHPASTNode $root) { $used_vars = array(); $for_loops = $root->selectDescendantsOfType('n_FOR'); foreach ($for_loops as $for_loop) { $var_map = array(); // Find all the variables that are assigned to in the for() expression. $for_expr = $for_loop->getChildOfType(0, 'n_FOR_EXPRESSION'); $bin_exprs = $for_expr->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($bin_exprs as $bin_expr) { if ($bin_expr->getChildByIndex(1)->getConcreteString() == '=') { $var = $bin_expr->getChildByIndex(0); $var_map[$var->getConcreteString()] = $var; } } $used_vars[$for_loop->getID()] = $var_map; } $foreach_loops = $root->selectDescendantsOfType('n_FOREACH'); foreach ($foreach_loops as $foreach_loop) { $var_map = array(); $foreach_expr = $foreach_loop->getChildOftype(0, 'n_FOREACH_EXPRESSION'); // We might use one or two vars, i.e. "foreach ($x as $y => $z)" or // "foreach ($x as $y)". $possible_used_vars = array( $foreach_expr->getChildByIndex(1), $foreach_expr->getChildByIndex(2), ); foreach ($possible_used_vars as $var) { if ($var->getTypeName() == 'n_EMPTY') { continue; } $name = $var->getConcreteString(); $name = trim($name, '&'); // Get rid of ref silliness. $var_map[$name] = $var; } $used_vars[$foreach_loop->getID()] = $var_map; } $all_loops = $for_loops->add($foreach_loops); foreach ($all_loops as $loop) { $child_for_loops = $loop->selectDescendantsOfType('n_FOR'); $child_foreach_loops = $loop->selectDescendantsOfType('n_FOREACH'); $child_loops = $child_for_loops->add($child_foreach_loops); $outer_vars = $used_vars[$loop->getID()]; foreach ($child_loops as $inner_loop) { $inner_vars = $used_vars[$inner_loop->getID()]; $shared = array_intersect_key($outer_vars, $inner_vars); if ($shared) { $shared_desc = implode(', ', array_keys($shared)); $message = $this->raiseLintAtNode( $inner_loop->getChildByIndex(0), self::LINT_REUSED_ITERATORS, "This loop reuses iterator variables ({$shared_desc}) from an ". "outer loop. You might be clobbering the outer iterator. Change ". "the inner loop to use a different iterator name."); $locations = array(); foreach ($shared as $var) { $locations[] = $this->getOtherLocation($var->getOffset()); } $message->setOtherLocations($locations); } } } } /** * Find cases where a foreach loop is being iterated using a variable * reference and the same variable is used outside of the loop without * calling unset() or reassigning the variable to another variable * reference. * * COUNTEREXAMPLE * foreach ($ar as &$a) { * // ... * } * $a = 1; // <-- Raises an error for using $a * */ protected function lintReusedIteratorReferences(XHPASTNode $root) { $fdefs = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); $mdefs = $root->selectDescendantsOfType('n_METHOD_DECLARATION'); $defs = $fdefs->add($mdefs); foreach ($defs as $def) { $body = $def->getChildByIndex(5); if ($body->getTypeName() == 'n_EMPTY') { // Abstract method declaration. continue; } $exclude = array(); // Exclude uses of variables, unsets, and foreach loops // within closures - they are checked on their own $func_defs = $body->selectDescendantsOfType('n_FUNCTION_DECLARATION'); foreach ($func_defs as $func_def) { $vars = $func_def->selectDescendantsOfType('n_VARIABLE'); foreach ($vars as $var) { $exclude[$var->getID()] = true; } $unset_lists = $func_def->selectDescendantsOfType('n_UNSET_LIST'); foreach ($unset_lists as $unset_list) { $exclude[$unset_list->getID()] = true; } $foreaches = $func_def->selectDescendantsOfType('n_FOREACH'); foreach ($foreaches as $foreach) { $exclude[$foreach->getID()] = true; } } // Find all variables that are unset within the scope $unset_vars = array(); $unset_lists = $body->selectDescendantsOfType('n_UNSET_LIST'); foreach ($unset_lists as $unset_list) { if (isset($exclude[$unset_list->getID()])) { continue; } $unset_list_vars = $unset_list->selectDescendantsOfType('n_VARIABLE'); foreach ($unset_list_vars as $var) { $concrete = $this->getConcreteVariableString($var); $unset_vars[$concrete][] = $var->getOffset(); $exclude[$var->getID()] = true; } } // Find all reference variables in foreach expressions $reference_vars = array(); $foreaches = $body->selectDescendantsOfType('n_FOREACH'); foreach ($foreaches as $foreach) { if (isset($exclude[$foreach->getID()])) { continue; } $foreach_expr = $foreach->getChildOfType(0, 'n_FOREACH_EXPRESSION'); $var = $foreach_expr->getChildByIndex(2); if ($var->getTypeName() != 'n_VARIABLE_REFERENCE') { continue; } $reference = $var->getChildByIndex(0); if ($reference->getTypeName() != 'n_VARIABLE') { continue; } $reference_name = $this->getConcreteVariableString($reference); $reference_vars[$reference_name][] = $reference->getOffset(); $exclude[$reference->getID()] = true; // Exclude uses of the reference variable within the foreach loop $foreach_vars = $foreach->selectDescendantsOfType('n_VARIABLE'); foreach ($foreach_vars as $var) { $name = $this->getConcreteVariableString($var); if ($name == $reference_name) { $exclude[$var->getID()] = true; } } } // Allow usage if the reference variable is assigned to another // reference variable $binary = $body->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($binary as $expr) { if ($expr->getChildByIndex(1)->getConcreteString() != '=') { continue; } $lval = $expr->getChildByIndex(0); if ($lval->getTypeName() != 'n_VARIABLE') { continue; } $rval = $expr->getChildByIndex(2); if ($rval->getTypeName() != 'n_VARIABLE_REFERENCE') { continue; } // Counts as unsetting a variable $concrete = $this->getConcreteVariableString($lval); $unset_vars[$concrete][] = $lval->getOffset(); $exclude[$lval->getID()] = true; } $all_vars = array(); $all = $body->selectDescendantsOfType('n_VARIABLE'); foreach ($all as $var) { if (isset($exclude[$var->getID()])) { continue; } $name = $this->getConcreteVariableString($var); if (!isset($reference_vars[$name])) { continue; } // Find the closest reference offset to this variable $reference_offset = null; foreach ($reference_vars[$name] as $offset) { if ($offset < $var->getOffset()) { $reference_offset = $offset; } else { break; } } if (!$reference_offset) { continue; } // Check if an unset exists between reference and usage of this // variable $warn = true; if (isset($unset_vars[$name])) { foreach ($unset_vars[$name] as $unset_offset) { if ($unset_offset > $reference_offset && $unset_offset < $var->getOffset()) { $warn = false; break; } } } if ($warn) { $this->raiseLintAtNode( $var, self::LINT_REUSED_ITERATOR_REFERENCE, 'This variable was used already as a by-reference iterator '. 'variable. Such variables survive outside the foreach loop, '. 'do not reuse.'); } } } } protected function lintVariableVariables(XHPASTNode $root) { $vvars = $root->selectDescendantsOfType('n_VARIABLE_VARIABLE'); foreach ($vvars as $vvar) { $this->raiseLintAtNode( $vvar, self::LINT_VARIABLE_VARIABLE, 'Rewrite this code to use an array. Variable variables are unclear '. 'and hinder static analysis.'); } } private function lintUndeclaredVariables(XHPASTNode $root) { // These things declare variables in a function: // Explicit parameters // Assignment // Assignment via list() // Static // Global // Lexical vars // Builtins ($this) // foreach() // catch // // These things make lexical scope unknowable: // Use of extract() // Assignment to variable variables ($$x) // Global with variable variables // // These things don't count as "using" a variable: // isset() // empty() // Static class variables // // The general approach here is to find each function/method declaration, // then: // // 1. Identify all the variable declarations, and where they first occur // in the function/method declaration. // 2. Identify all the uses that don't really count (as above). // 3. Everything else must be a use of a variable. // 4. For each variable, check if any uses occur before the declaration // and warn about them. // // We also keep track of where lexical scope becomes unknowable (e.g., // because the function calls extract() or uses dynamic variables, // preventing us from keeping track of which variables are defined) so we // can stop issuing warnings after that. // // TODO: Support functions defined inside other functions which is commonly // used with anonymous functions. $fdefs = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); $mdefs = $root->selectDescendantsOfType('n_METHOD_DECLARATION'); $defs = $fdefs->add($mdefs); foreach ($defs as $def) { // We keep track of the first offset where scope becomes unknowable, and // silence any warnings after that. Default it to INT_MAX so we can min() // it later to keep track of the first problem we encounter. $scope_destroyed_at = PHP_INT_MAX; $declarations = array( '$this' => 0, ) + array_fill_keys($this->getSuperGlobalNames(), 0); $declaration_tokens = array(); $exclude_tokens = array(); $vars = array(); // First up, find all the different kinds of declarations, as explained // above. Put the tokens into the $vars array. $param_list = $def->getChildOfType(3, 'n_DECLARATION_PARAMETER_LIST'); $param_vars = $param_list->selectDescendantsOfType('n_VARIABLE'); foreach ($param_vars as $var) { $vars[] = $var; } // This is PHP5.3 closure syntax: function () use ($x) {}; $lexical_vars = $def ->getChildByIndex(4) ->selectDescendantsOfType('n_VARIABLE'); foreach ($lexical_vars as $var) { $vars[] = $var; } $body = $def->getChildByIndex(5); if ($body->getTypeName() == 'n_EMPTY') { // Abstract method declaration. continue; } $static_vars = $body ->selectDescendantsOfType('n_STATIC_DECLARATION') ->selectDescendantsOfType('n_VARIABLE'); foreach ($static_vars as $var) { $vars[] = $var; } $global_vars = $body ->selectDescendantsOfType('n_GLOBAL_DECLARATION_LIST'); foreach ($global_vars as $var_list) { foreach ($var_list->getChildren() as $var) { if ($var->getTypeName() == 'n_VARIABLE') { $vars[] = $var; } else { // Dynamic global variable, i.e. "global $$x;". $scope_destroyed_at = min($scope_destroyed_at, $var->getOffset()); // An error is raised elsewhere, no need to raise here. } } } // Include "catch (Exception $ex)", but not variables in the body of the // catch block. $catches = $body->selectDescendantsOfType('n_CATCH'); foreach ($catches as $catch) { $vars[] = $catch->getChildOfType(1, 'n_VARIABLE'); } $binary = $body->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($binary as $expr) { if ($expr->getChildByIndex(1)->getConcreteString() != '=') { continue; } $lval = $expr->getChildByIndex(0); if ($lval->getTypeName() == 'n_VARIABLE') { $vars[] = $lval; } else if ($lval->getTypeName() == 'n_LIST') { // Recursivey grab everything out of list(), since the grammar // permits list() to be nested. Also note that list() is ONLY valid // as an lval assignments, so we could safely lift this out of the // n_BINARY_EXPRESSION branch. $assign_vars = $lval->selectDescendantsOfType('n_VARIABLE'); foreach ($assign_vars as $var) { $vars[] = $var; } } if ($lval->getTypeName() == 'n_VARIABLE_VARIABLE') { $scope_destroyed_at = min($scope_destroyed_at, $lval->getOffset()); // No need to raise here since we raise an error elsewhere. } } $calls = $body->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($calls as $call) { $name = strtolower($call->getChildByIndex(0)->getConcreteString()); if ($name == 'empty' || $name == 'isset') { $params = $call ->getChildOfType(1, 'n_CALL_PARAMETER_LIST') ->selectDescendantsOfType('n_VARIABLE'); foreach ($params as $var) { $exclude_tokens[$var->getID()] = true; } continue; } if ($name != 'extract') { continue; } $scope_destroyed_at = min($scope_destroyed_at, $call->getOffset()); $this->raiseLintAtNode( $call, self::LINT_EXTRACT_USE, 'Avoid extract(). It is confusing and hinders static analysis.'); } // Now we have every declaration except foreach(), handled below. Build // two maps, one which just keeps track of which tokens are part of // declarations ($declaration_tokens) and one which has the first offset // where a variable is declared ($declarations). foreach ($vars as $var) { $concrete = $this->getConcreteVariableString($var); $declarations[$concrete] = min( idx($declarations, $concrete, PHP_INT_MAX), $var->getOffset()); $declaration_tokens[$var->getID()] = true; } // Excluded tokens are ones we don't "count" as being used, described // above. Put them into $exclude_tokens. $class_statics = $body ->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); $class_static_vars = $class_statics ->selectDescendantsOfType('n_VARIABLE'); foreach ($class_static_vars as $var) { $exclude_tokens[$var->getID()] = true; } // Find all the variables in scope, and figure out where they are used. // We want to find foreach() iterators which are both declared before and // used after the foreach() loop. $uses = array(); $all_vars = $body->selectDescendantsOfType('n_VARIABLE'); $all = array(); // NOTE: $all_vars is not a real array so we can't unset() it. foreach ($all_vars as $var) { // Be strict since it's easier; we don't let you reuse an iterator you // declared before a loop after the loop, even if you're just assigning // to it. $concrete = $this->getConcreteVariableString($var); $uses[$concrete][$var->getID()] = $var->getOffset(); if (isset($declaration_tokens[$var->getID()])) { // We know this is part of a declaration, so it's fine. continue; } if (isset($exclude_tokens[$var->getID()])) { // We know this is part of isset() or similar, so it's fine. continue; } $all[$var->getOffset()] = $concrete; } // Do foreach() last, we want to handle implicit redeclaration of a // variable already in scope since this probably means we're ovewriting a // local. // NOTE: Processing foreach expressions in order allows programs which // reuse iterator variables in other foreach() loops -- this is fine. We // have a separate warning to prevent nested loops from reusing the same // iterators. $foreaches = $body->selectDescendantsOfType('n_FOREACH'); $all_foreach_vars = array(); foreach ($foreaches as $foreach) { $foreach_expr = $foreach->getChildOfType(0, 'n_FOREACH_EXPRESSION'); $foreach_vars = array(); // Determine the end of the foreach() loop. $foreach_tokens = $foreach->getTokens(); $last_token = end($foreach_tokens); $foreach_end = $last_token->getOffset(); $key_var = $foreach_expr->getChildByIndex(1); if ($key_var->getTypeName() == 'n_VARIABLE') { $foreach_vars[] = $key_var; } $value_var = $foreach_expr->getChildByIndex(2); if ($value_var->getTypeName() == 'n_VARIABLE') { $foreach_vars[] = $value_var; } else { // The root-level token may be a reference, as in: // foreach ($a as $b => &$c) { ... } // Reach into the n_VARIABLE_REFERENCE node to grab the n_VARIABLE // node. $var = $value_var->getChildByIndex(0); if ($var->getTypeName() == 'n_VARIABLE_VARIABLE') { $var = $var->getChildByIndex(0); } $foreach_vars[] = $var; } // Remove all uses of the iterators inside of the foreach() loop from // the $uses map. foreach ($foreach_vars as $var) { $concrete = $this->getConcreteVariableString($var); $offset = $var->getOffset(); foreach ($uses[$concrete] as $id => $use_offset) { if (($use_offset >= $offset) && ($use_offset < $foreach_end)) { unset($uses[$concrete][$id]); } } $all_foreach_vars[] = $var; } } foreach ($all_foreach_vars as $var) { $concrete = $this->getConcreteVariableString($var); $offset = $var->getOffset(); // If a variable was declared before a foreach() and is used after // it, raise a message. if (isset($declarations[$concrete])) { if ($declarations[$concrete] < $offset) { if (!empty($uses[$concrete]) && max($uses[$concrete]) > $offset) { $message = $this->raiseLintAtNode( $var, self::LINT_REUSED_AS_ITERATOR, 'This iterator variable is a previously declared local '. 'variable. To avoid overwriting locals, do not reuse them '. 'as iterator variables.'); $message->setOtherLocations(array( $this->getOtherLocation($declarations[$concrete]), $this->getOtherLocation(max($uses[$concrete])), )); } } } // This is a declaration, exclude it from the "declare variables prior // to use" check below. unset($all[$var->getOffset()]); $vars[] = $var; } // Now rebuild declarations to include foreach(). foreach ($vars as $var) { $concrete = $this->getConcreteVariableString($var); $declarations[$concrete] = min( idx($declarations, $concrete, PHP_INT_MAX), $var->getOffset()); $declaration_tokens[$var->getID()] = true; } foreach (array('n_STRING_SCALAR', 'n_HEREDOC') as $type) { foreach ($body->selectDescendantsOfType($type) as $string) { foreach ($string->getStringVariables() as $offset => $var) { $all[$string->getOffset() + $offset - 1] = '$'.$var; } } } // Issue a warning for every variable token, unless it appears in a // declaration, we know about a prior declaration, we have explicitly // exlcuded it, or scope has been made unknowable before it appears. $issued_warnings = array(); foreach ($all as $offset => $concrete) { if ($offset >= $scope_destroyed_at) { // This appears after an extract() or $$var so we have no idea // whether it's legitimate or not. We raised a harshly-worded warning // when scope was made unknowable, so just ignore anything we can't // figure out. continue; } if ($offset >= idx($declarations, $concrete, PHP_INT_MAX)) { // The use appears after the variable is declared, so it's fine. continue; } if (!empty($issued_warnings[$concrete])) { // We've already issued a warning for this variable so we don't need // to issue another one. continue; } $this->raiseLintAtOffset( $offset, self::LINT_UNDECLARED_VARIABLE, 'Declare variables prior to use (even if you are passing them '. 'as reference parameters). You may have misspelled this '. 'variable name.', $concrete); $issued_warnings[$concrete] = true; } } } private function getConcreteVariableString(XHPASTNode $var) { $concrete = $var->getConcreteString(); // Strip off curly braces as in $obj->{$property}. $concrete = trim($concrete, '{}'); return $concrete; } private function lintPHPTagUse(XHPASTNode $root) { $tokens = $root->getTokens(); foreach ($tokens as $token) { if ($token->getTypeName() == 'T_OPEN_TAG') { if (trim($token->getValue()) == 'raiseLintAtToken( $token, self::LINT_PHP_SHORT_TAG, 'Use the full form of the PHP open tag, "getTypeName() == 'T_OPEN_TAG_WITH_ECHO') { $this->raiseLintAtToken( $token, self::LINT_PHP_ECHO_TAG, 'Avoid the PHP echo short form, "getValue())) { $this->raiseLintAtToken( $token, self::LINT_PHP_OPEN_TAG, 'PHP files should start with "selectTokensOfType('T_CLOSE_TAG') as $token) { $this->raiseLintAtToken( $token, self::LINT_PHP_CLOSE_TAG, 'Do not use the PHP closing tag, "?>".'); } } private function lintNamingConventions(XHPASTNode $root) { // We're going to build up a list of tuples // and then try to instantiate a hook class which has the opportunity to // override us. $names = array(); $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $name_token = $class->getChildByIndex(1); $name_string = $name_token->getConcreteString(); $names[] = array( 'class', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isUpperCamelCase($name_string) ? null : 'Follow naming conventions: classes should be named using '. 'UpperCamelCase.', ); } $ifaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); foreach ($ifaces as $iface) { $name_token = $iface->getChildByIndex(1); $name_string = $name_token->getConcreteString(); $names[] = array( 'interface', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isUpperCamelCase($name_string) ? null : 'Follow naming conventions: interfaces should be named using '. 'UpperCamelCase.', ); } $functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); foreach ($functions as $function) { $name_token = $function->getChildByIndex(2); if ($name_token->getTypeName() == 'n_EMPTY') { // Unnamed closure. continue; } $name_string = $name_token->getConcreteString(); $names[] = array( 'function', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isLowercaseWithUnderscores( ArcanistXHPASTLintNamingHook::stripPHPFunction($name_string)) ? null : 'Follow naming conventions: functions should be named using '. 'lowercase_with_underscores.', ); } $methods = $root->selectDescendantsOfType('n_METHOD_DECLARATION'); foreach ($methods as $method) { $name_token = $method->getChildByIndex(2); $name_string = $name_token->getConcreteString(); $names[] = array( 'method', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isLowerCamelCase( ArcanistXHPASTLintNamingHook::stripPHPFunction($name_string)) ? null : 'Follow naming conventions: methods should be named using '. 'lowerCamelCase.', ); } $param_tokens = array(); $params = $root->selectDescendantsOfType('n_DECLARATION_PARAMETER_LIST'); foreach ($params as $param_list) { foreach ($param_list->getChildren() as $param) { $name_token = $param->getChildByIndex(1); if ($name_token->getTypeName() == 'n_VARIABLE_REFERENCE') { $name_token = $name_token->getChildOfType(0, 'n_VARIABLE'); } $param_tokens[$name_token->getID()] = true; $name_string = $name_token->getConcreteString(); $names[] = array( 'parameter', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isLowercaseWithUnderscores( ArcanistXHPASTLintNamingHook::stripPHPVariable($name_string)) ? null : 'Follow naming conventions: parameters should be named using '. 'lowercase_with_underscores.', ); } } $constants = $root->selectDescendantsOfType( 'n_CLASS_CONSTANT_DECLARATION_LIST'); foreach ($constants as $constant_list) { foreach ($constant_list->getChildren() as $constant) { $name_token = $constant->getChildByIndex(0); $name_string = $name_token->getConcreteString(); $names[] = array( 'constant', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isUppercaseWithUnderscores($name_string) ? null : 'Follow naming conventions: class constants should be named '. 'using UPPERCASE_WITH_UNDERSCORES.', ); } } $member_tokens = array(); $props = $root->selectDescendantsOfType('n_CLASS_MEMBER_DECLARATION_LIST'); foreach ($props as $prop_list) { foreach ($prop_list->getChildren() as $token_id => $prop) { if ($prop->getTypeName() == 'n_CLASS_MEMBER_MODIFIER_LIST') { continue; } $name_token = $prop->getChildByIndex(0); $member_tokens[$name_token->getID()] = true; $name_string = $name_token->getConcreteString(); $names[] = array( 'member', $name_string, $name_token, ArcanistXHPASTLintNamingHook::isLowerCamelCase( ArcanistXHPASTLintNamingHook::stripPHPVariable($name_string)) ? null : 'Follow naming conventions: class properties should be named '. 'using lowerCamelCase.', ); } } $superglobal_map = array_fill_keys( $this->getSuperGlobalNames(), true); $fdefs = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); $mdefs = $root->selectDescendantsOfType('n_METHOD_DECLARATION'); $defs = $fdefs->add($mdefs); foreach ($defs as $def) { $globals = $def->selectDescendantsOfType('n_GLOBAL_DECLARATION_LIST'); $globals = $globals->selectDescendantsOfType('n_VARIABLE'); $globals_map = array(); foreach ($globals as $global) { $global_string = $global->getConcreteString(); $globals_map[$global_string] = true; $names[] = array( 'user', $global_string, $global, // No advice for globals, but hooks have an option to provide some. null); } // Exclude access of static properties, since lint will be raised at // their declaration if they're invalid and they may not conform to // variable rules. This is slightly overbroad (includes the entire // rhs of a "Class::..." token) to cover cases like "Class:$x[0]". These // variables are simply made exempt from naming conventions. $exclude_tokens = array(); $statics = $def->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); foreach ($statics as $static) { $rhs = $static->getChildByIndex(1); $rhs_vars = $def->selectDescendantsOfType('n_VARIABLE'); foreach ($rhs_vars as $var) { $exclude_tokens[$var->getID()] = true; } } $vars = $def->selectDescendantsOfType('n_VARIABLE'); foreach ($vars as $token_id => $var) { if (isset($member_tokens[$token_id])) { continue; } if (isset($param_tokens[$token_id])) { continue; } if (isset($exclude_tokens[$token_id])) { continue; } $var_string = $var->getConcreteString(); // Awkward artifact of "$o->{$x}". $var_string = trim($var_string, '{}'); if (isset($superglobal_map[$var_string])) { continue; } if (isset($globals_map[$var_string])) { continue; } $names[] = array( 'variable', $var_string, $var, ArcanistXHPASTLintNamingHook::isLowercaseWithUnderscores( ArcanistXHPASTLintNamingHook::stripPHPVariable($var_string)) ? null : 'Follow naming conventions: variables should be named using '. 'lowercase_with_underscores.', ); } } $engine = $this->getEngine(); $working_copy = $engine->getWorkingCopy(); if ($working_copy) { // If a naming hook is configured, give it a chance to override the // default results for all the symbol names. $hook_class = $this->naminghook ? $this->naminghook : $working_copy->getProjectConfig('lint.xhpast.naminghook'); if ($hook_class) { $hook_obj = newv($hook_class, array()); foreach ($names as $k => $name_attrs) { list($type, $name, $token, $default) = $name_attrs; $result = $hook_obj->lintSymbolName($type, $name, $default); $names[$k][3] = $result; } } } // Raise anything we're left with. foreach ($names as $k => $name_attrs) { list($type, $name, $token, $result) = $name_attrs; if ($result) { $this->raiseLintAtNode( $token, self::LINT_NAMING_CONVENTIONS, $result); } } } private function lintSurpriseConstructors(XHPASTNode $root) { $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $class_name = $class->getChildByIndex(1)->getConcreteString(); $methods = $class->selectDescendantsOfType('n_METHOD_DECLARATION'); foreach ($methods as $method) { $method_name_token = $method->getChildByIndex(2); $method_name = $method_name_token->getConcreteString(); if (strtolower($class_name) == strtolower($method_name)) { $this->raiseLintAtNode( $method_name_token, self::LINT_IMPLICIT_CONSTRUCTOR, 'Name constructors __construct() explicitly. This method is a '. 'constructor because it has the same name as the class it is '. 'defined in.'); } } } } private function lintParenthesesShouldHugExpressions(XHPASTNode $root) { $calls = $root->selectDescendantsOfType('n_CALL_PARAMETER_LIST'); $controls = $root->selectDescendantsOfType('n_CONTROL_CONDITION'); $fors = $root->selectDescendantsOfType('n_FOR_EXPRESSION'); $foreach = $root->selectDescendantsOfType('n_FOREACH_EXPRESSION'); $decl = $root->selectDescendantsOfType('n_DECLARATION_PARAMETER_LIST'); $all_paren_groups = $calls ->add($controls) ->add($fors) ->add($foreach) ->add($decl); foreach ($all_paren_groups as $group) { $tokens = $group->getTokens(); $token_o = array_shift($tokens); $token_c = array_pop($tokens); if ($token_o->getTypeName() != '(') { throw new Exception('Expected open paren!'); } if ($token_c->getTypeName() != ')') { throw new Exception('Expected close paren!'); } $nonsem_o = $token_o->getNonsemanticTokensAfter(); $nonsem_c = $token_c->getNonsemanticTokensBefore(); if (!$nonsem_o) { continue; } $raise = array(); $string_o = implode('', mpull($nonsem_o, 'getValue')); if (preg_match('/^[ ]+$/', $string_o)) { $raise[] = array($nonsem_o, $string_o); } if ($nonsem_o !== $nonsem_c) { $string_c = implode('', mpull($nonsem_c, 'getValue')); if (preg_match('/^[ ]+$/', $string_c)) { $raise[] = array($nonsem_c, $string_c); } } foreach ($raise as $warning) { list($tokens, $string) = $warning; $this->raiseLintAtOffset( reset($tokens)->getOffset(), self::LINT_PARENTHESES_SPACING, 'Parentheses should hug their contents.', $string, ''); } } } private function lintSpaceAfterControlStatementKeywords(XHPASTNode $root) { foreach ($root->getTokens() as $id => $token) { switch ($token->getTypeName()) { case 'T_IF': case 'T_ELSE': case 'T_FOR': case 'T_FOREACH': case 'T_WHILE': case 'T_DO': case 'T_SWITCH': $after = $token->getNonsemanticTokensAfter(); if (empty($after)) { $this->raiseLintAtToken( $token, self::LINT_CONTROL_STATEMENT_SPACING, 'Convention: put a space after control statements.', $token->getValue().' '); } else if (count($after) == 1) { $space = head($after); // If we have an else clause with braces, $space may not be // a single white space. e.g., // // if ($x) // echo 'foo' // else // <- $space is not " " but "\n ". // echo 'bar' // // We just require it starts with either a whitespace or a newline. if ($token->getTypeName() == 'T_ELSE' || $token->getTypeName() == 'T_DO') { break; } if ($space->isAnyWhitespace() && $space->getValue() != ' ') { $this->raiseLintAtToken( $space, self::LINT_CONTROL_STATEMENT_SPACING, 'Convention: put a single space after control statements.', ' '); } } break; } } } private function lintSpaceAroundBinaryOperators(XHPASTNode $root) { $expressions = $root->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($expressions as $expression) { $operator = $expression->getChildByIndex(1); $operator_value = $operator->getConcreteString(); list($before, $after) = $operator->getSurroundingNonsemanticTokens(); $replace = null; if (empty($before) && empty($after)) { $replace = " {$operator_value} "; } else if (empty($before)) { $replace = " {$operator_value}"; } else if (empty($after)) { $replace = "{$operator_value} "; } if ($replace !== null) { $this->raiseLintAtNode( $operator, self::LINT_BINARY_EXPRESSION_SPACING, 'Convention: logical and arithmetic operators should be '. 'surrounded by whitespace.', $replace); } } $tokens = $root->selectTokensOfType(','); foreach ($tokens as $token) { $next = $token->getNextToken(); switch ($next->getTypeName()) { case ')': case 'T_WHITESPACE': break; default: $this->raiseLintAtToken( $token, self::LINT_BINARY_EXPRESSION_SPACING, 'Convention: comma should be followed by space.', ', '); break; } } $tokens = $root->selectTokensOfType('T_DOUBLE_ARROW'); foreach ($tokens as $token) { $prev = $token->getPrevToken(); $next = $token->getNextToken(); $prev_type = $prev->getTypeName(); $next_type = $next->getTypeName(); $prev_space = ($prev_type == 'T_WHITESPACE'); $next_space = ($next_type == 'T_WHITESPACE'); $replace = null; if (!$prev_space && !$next_space) { $replace = ' => '; } else if ($prev_space && !$next_space) { $replace = '=> '; } else if (!$prev_space && $next_space) { $replace = ' =>'; } if ($replace !== null) { $this->raiseLintAtToken( $token, self::LINT_BINARY_EXPRESSION_SPACING, 'Convention: double arrow should be surrounded by whitespace.', $replace); } } // TODO: Spacing around default parameter assignment in function/method // declarations (which is not n_BINARY_EXPRESSION). } private function lintSpaceAroundConcatenationOperators(XHPASTNode $root) { $tokens = $root->selectTokensOfType('.'); foreach ($tokens as $token) { $prev = $token->getPrevToken(); $next = $token->getNextToken(); foreach (array('prev' => $prev, 'next' => $next) as $wtoken) { if ($wtoken->getTypeName() != 'T_WHITESPACE') { continue; } $value = $wtoken->getValue(); if (strpos($value, "\n") !== false) { // If the whitespace has a newline, it's conventional. continue; } $next = $wtoken->getNextToken(); if ($next && $next->getTypeName() == 'T_COMMENT') { continue; } $this->raiseLintAtToken( $wtoken, self::LINT_BINARY_EXPRESSION_SPACING, 'Convention: no spaces around "." (string concatenation) operator.', ''); } } } private function lintDynamicDefines(XHPASTNode $root) { $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($calls as $call) { $name = $call->getChildByIndex(0)->getConcreteString(); if (strtolower($name) == 'define') { $parameter_list = $call->getChildOfType(1, 'n_CALL_PARAMETER_LIST'); $defined = $parameter_list->getChildByIndex(0); if (!$defined->isStaticScalar()) { $this->raiseLintAtNode( $defined, self::LINT_DYNAMIC_DEFINE, 'First argument to define() must be a string literal.'); } } } } private function lintUseOfThisInStaticMethods(XHPASTNode $root) { $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $methods = $class->selectDescendantsOfType('n_METHOD_DECLARATION'); foreach ($methods as $method) { $attributes = $method ->getChildByIndex(0, 'n_METHOD_MODIFIER_LIST') ->selectDescendantsOfType('n_STRING'); $method_is_static = false; $method_is_abstract = false; foreach ($attributes as $attribute) { if (strtolower($attribute->getConcreteString()) == 'static') { $method_is_static = true; } if (strtolower($attribute->getConcreteString()) == 'abstract') { $method_is_abstract = true; } } if ($method_is_abstract) { continue; } if (!$method_is_static) { continue; } $body = $method->getChildOfType(5, 'n_STATEMENT_LIST'); $variables = $body->selectDescendantsOfType('n_VARIABLE'); foreach ($variables as $variable) { if ($method_is_static && strtolower($variable->getConcreteString()) == '$this') { $this->raiseLintAtNode( $variable, self::LINT_STATIC_THIS, 'You can not reference "$this" inside a static method.'); } } } } } /** * preg_quote() takes two arguments, but the second one is optional because - * it is possible to use (), [] or {} as regular expression delimiters. If + * it is possible to use (), [] or {} as regular expression delimiters. If * you don't pass a second argument, you're probably going to get something * wrong. */ private function lintPregQuote(XHPASTNode $root) { $function_calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($function_calls as $call) { $name = $call->getChildByIndex(0)->getConcreteString(); if (strtolower($name) === 'preg_quote') { $parameter_list = $call->getChildOfType(1, 'n_CALL_PARAMETER_LIST'); if (count($parameter_list->getChildren()) !== 2) { $this->raiseLintAtNode( $call, self::LINT_PREG_QUOTE_MISUSE, 'If you use pattern delimiters that require escaping (such as //, '. 'but not ()) then you should pass two arguments to preg_quote(), '. 'so that preg_quote() knows which delimiter to escape.'); } } } } /** * Exit is parsed as an expression, but using it as such is almost always * wrong. That is, this is valid: * * strtoupper(33 * exit - 6); * * When exit is used as an expression, it causes the program to terminate with * exit code 0. This is likely not what is intended; these statements have * different effects: * * exit(-1); * exit -1; * * The former exits with a failure code, the latter with a success code! */ private function lintExitExpressions(XHPASTNode $root) { $unaries = $root->selectDescendantsOfType('n_UNARY_PREFIX_EXPRESSION'); foreach ($unaries as $unary) { $operator = $unary->getChildByIndex(0)->getConcreteString(); if (strtolower($operator) == 'exit') { if ($unary->getParentNode()->getTypeName() != 'n_STATEMENT') { $this->raiseLintAtNode( $unary, self::LINT_EXIT_EXPRESSION, 'Use exit as a statement, not an expression.'); } } } } private function lintArrayIndexWhitespace(XHPASTNode $root) { $indexes = $root->selectDescendantsOfType('n_INDEX_ACCESS'); foreach ($indexes as $index) { $tokens = $index->getChildByIndex(0)->getTokens(); $last = array_pop($tokens); $trailing = $last->getNonsemanticTokensAfter(); $trailing_text = implode('', mpull($trailing, 'getValue')); if (preg_match('/^ +$/', $trailing_text)) { $this->raiseLintAtOffset( $last->getOffset() + strlen($last->getValue()), self::LINT_ARRAY_INDEX_SPACING, 'Convention: no spaces before index access.', $trailing_text, ''); } } } private function lintTODOComments(XHPASTNode $root) { $comments = $root->selectTokensOfType('T_COMMENT') + $root->selectTokensOfType('T_DOC_COMMENT'); foreach ($comments as $token) { $value = $token->getValue(); if ($token->getTypeName() === 'T_DOC_COMMENT') { $regex = '/(TODO|@todo)/'; } else { $regex = '/TODO/'; } $matches = null; $preg = preg_match_all( $regex, $value, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $match) { list($string, $offset) = $match; $this->raiseLintAtOffset( $token->getOffset() + $offset, self::LINT_TODO_COMMENT, 'This comment has a TODO.', $string); } } } /** * Lint that if the file declares exactly one interface or class, * the name of the file matches the name of the class, * unless the classname is funky like an XHP element. */ private function lintPrimaryDeclarationFilenameMatch(XHPASTNode $root) { $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); $interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); if (count($classes) + count($interfaces) != 1) { return; } $declarations = count($classes) ? $classes : $interfaces; $declarations->rewind(); $declaration = $declarations->current(); $decl_name = $declaration->getChildByIndex(1); $decl_string = $decl_name->getConcreteString(); // Exclude strangely named classes, e.g. XHP tags. if (!preg_match('/^\w+$/', $decl_string)) { return; } $rename = $decl_string.'.php'; $path = $this->getActivePath(); $filename = basename($path); if ($rename == $filename) { return; } $this->raiseLintAtNode( $decl_name, self::LINT_CLASS_FILENAME_MISMATCH, "The name of this file differs from the name of the class or interface ". "it declares. Rename the file to '{$rename}'."); } private function lintPlusOperatorOnStrings(XHPASTNode $root) { $binops = $root->selectDescendantsOfType('n_BINARY_EXPRESSION'); foreach ($binops as $binop) { $op = $binop->getChildByIndex(1); if ($op->getConcreteString() != '+') { continue; } $left = $binop->getChildByIndex(0); $right = $binop->getChildByIndex(2); if (($left->getTypeName() == 'n_STRING_SCALAR') || ($right->getTypeName() == 'n_STRING_SCALAR')) { $this->raiseLintAtNode( $binop, self::LINT_PLUS_OPERATOR_ON_STRINGS, "In PHP, '.' is the string concatenation operator, not '+'. This ". "expression uses '+' with a string literal as an operand."); } } } /** * Finds duplicate keys in array initializers, as in - * array(1 => 'anything', 1 => 'foo'). Since the first entry is ignored, + * array(1 => 'anything', 1 => 'foo'). Since the first entry is ignored, * this is almost certainly an error. */ private function lintDuplicateKeysInArray(XHPASTNode $root) { $array_literals = $root->selectDescendantsOfType('n_ARRAY_LITERAL'); foreach ($array_literals as $array_literal) { $nodes_by_key = array(); $keys_warn = array(); $list_node = $array_literal->getChildByIndex(0); foreach ($list_node->getChildren() as $array_entry) { $key_node = $array_entry->getChildByIndex(0); switch ($key_node->getTypeName()) { case 'n_STRING_SCALAR': case 'n_NUMERIC_SCALAR': // Scalars: array(1 => 'v1', '1' => 'v2'); $key = 'scalar:'.(string)$key_node->evalStatic(); break; case 'n_SYMBOL_NAME': case 'n_VARIABLE': case 'n_CLASS_STATIC_ACCESS': // Constants: array(CONST => 'v1', CONST => 'v2'); // Variables: array($a => 'v1', $a => 'v2'); // Class constants and vars: array(C::A => 'v1', C::A => 'v2'); $key = $key_node->getTypeName().':'.$key_node->getConcreteString(); break; default: $key = null; break; } if ($key !== null) { if (isset($nodes_by_key[$key])) { $keys_warn[$key] = true; } $nodes_by_key[$key][] = $key_node; } } foreach ($keys_warn as $key => $_) { $node = array_pop($nodes_by_key[$key]); $message = $this->raiseLintAtNode( $node, self::LINT_DUPLICATE_KEYS_IN_ARRAY, 'Duplicate key in array initializer. PHP will ignore all '. 'but the last entry.'); $locations = array(); foreach ($nodes_by_key[$key] as $node) { $locations[] = $this->getOtherLocation($node->getOffset()); } $message->setOtherLocations($locations); } } } private function lintClosingCallParen(XHPASTNode $root) { $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); $calls = $calls->add($root->selectDescendantsOfType('n_METHOD_CALL')); foreach ($calls as $call) { // If the last parameter of a call is a HEREDOC, don't apply this rule. $params = $call ->getChildOfType(1, 'n_CALL_PARAMETER_LIST') ->getChildren(); if ($params) { $last_param = last($params); if ($last_param->getTypeName() == 'n_HEREDOC') { continue; } } $tokens = $call->getTokens(); $last = array_pop($tokens); $trailing = $last->getNonsemanticTokensBefore(); $trailing_text = implode('', mpull($trailing, 'getValue')); if (preg_match('/^\s+$/', $trailing_text)) { $this->raiseLintAtOffset( $last->getOffset() - strlen($trailing_text), self::LINT_CLOSING_CALL_PAREN, 'Convention: no spaces before closing parenthesis in calls.', $trailing_text, ''); } } } private function lintClosingDeclarationParen(XHPASTNode $root) { $decs = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); $decs = $decs->add($root->selectDescendantsOfType('n_METHOD_DECLARATION')); foreach ($decs as $dec) { $params = $dec->getChildOfType(3, 'n_DECLARATION_PARAMETER_LIST'); $tokens = $params->getTokens(); $last = array_pop($tokens); $trailing = $last->getNonsemanticTokensBefore(); $trailing_text = implode('', mpull($trailing, 'getValue')); if (preg_match('/^\s+$/', $trailing_text)) { $this->raiseLintAtOffset( $last->getOffset() - strlen($trailing_text), self::LINT_CLOSING_DECL_PAREN, 'Convention: no spaces before closing parenthesis in function and '. 'method declarations.', $trailing_text, ''); } } } private function lintKeywordCasing(XHPASTNode $root) { $keywords = array(); $symbols = $root->selectDescendantsOfType('n_SYMBOL_NAME'); foreach ($symbols as $symbol) { $keywords[] = head($symbol->getTokens()); } $arrays = $root->selectDescendantsOfType('n_ARRAY_LITERAL'); foreach ($arrays as $array) { $keywords[] = head($array->getTokens()); } $typehints = $root->selectDescendantsOfType('n_TYPE_NAME'); foreach ($typehints as $typehint) { $keywords[] = head($typehint->getTokens()); } $new_invocations = $root->selectDescendantsOfType('n_NEW'); foreach ($new_invocations as $invocation) { $keywords[] = head($invocation->getTokens()); } // NOTE: Although PHP generally allows arbitrary casing for all language // keywords, it's exceedingly rare for anyone to type, e.g., "CLASS" or // "cLaSs" in the wild. This list just attempts to cover unconventional // spellings which see some level of use, not all keywords exhaustively. // There is no token or node type which spans all keywords, so this is // significantly simpler. static $keyword_map = array( 'true' => 'true', 'false' => 'false', 'null' => 'null', 'array' => 'array', 'new' => 'new', ); foreach ($keywords as $keyword) { $value = $keyword->getValue(); $value_key = strtolower($value); if (!isset($keyword_map[$value_key])) { continue; } $expected_spelling = $keyword_map[$value_key]; if ($value !== $expected_spelling) { $this->raiseLintAtToken( $keyword, self::LINT_KEYWORD_CASING, "Convention: spell keyword '{$value}' as '{$expected_spelling}'.", $expected_spelling); } } } private function lintStrings(XHPASTNode $root) { $nodes = $root->selectDescendantsOfTypes(array( 'n_CONCATENATION_LIST', 'n_STRING_SCALAR', )); foreach ($nodes as $node) { $strings = array(); if ($node->getTypeName() === 'n_CONCATENATION_LIST') { $strings = $node->selectDescendantsOfType('n_STRING_SCALAR'); } else if ($node->getTypeName() === 'n_STRING_SCALAR') { $strings = array($node); if ($node->getParentNode()->getTypeName() === 'n_CONCATENATION_LIST') { continue; } } $valid = false; $invalid_nodes = array(); $fixes = array(); foreach ($strings as $string) { $concrete_string = $string->getConcreteString(); $single_quoted = ($concrete_string[0] === "'"); $contents = substr($concrete_string, 1, -1); // Double quoted strings are allowed when the string contains the // following characters. static $allowed_chars = array( '\n', '\r', '\t', '\v', '\e', '\f', '\'', '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\x', ); $contains_special_chars = false; foreach ($allowed_chars as $allowed_char) { if (strpos($contents, $allowed_char) !== false) { $contains_special_chars = true; } } if (!$string->isConstantString()) { $valid = true; } else if ($contains_special_chars && !$single_quoted) { $valid = true; } else if (!$contains_special_chars && !$single_quoted) { $invalid_nodes[] = $string; $fixes[$string->getID()] = "'".str_replace('\"', '"', $contents)."'"; } } if (!$valid) { foreach ($invalid_nodes as $invalid_node) { $this->raiseLintAtNode( $invalid_node, self::LINT_DOUBLE_QUOTE, pht( 'String does not require double quotes. For consistency, '. 'prefer single quotes.'), $fixes[$invalid_node->getID()]); } } } } protected function lintElseIfStatements(XHPASTNode $root) { $tokens = $root->selectTokensOfType('T_ELSEIF'); foreach ($tokens as $token) { $this->raiseLintAtToken( $token, self::LINT_ELSEIF_USAGE, pht('Usage of `else if` is preferred over `elseif`.'), 'else if'); } } protected function lintSemicolons(XHPASTNode $root) { $tokens = $root->selectTokensOfType(';'); foreach ($tokens as $token) { $prev = $token->getPrevToken(); if ($prev->isAnyWhitespace()) { $this->raiseLintAtToken( $prev, self::LINT_SEMICOLON_SPACING, pht('Space found before semicolon.'), ''); } } } public function getSuperGlobalNames() { return array( '$GLOBALS', '$_SERVER', '$_GET', '$_POST', '$_FILES', '$_COOKIE', '$_SESSION', '$_REQUEST', '$_ENV', ); } } diff --git a/src/lint/linter/__tests__/ArcanistLinterTestCase.php b/src/lint/linter/__tests__/ArcanistLinterTestCase.php index 48eb173a..ae2e3e9f 100644 --- a/src/lint/linter/__tests__/ArcanistLinterTestCase.php +++ b/src/lint/linter/__tests__/ArcanistLinterTestCase.php @@ -1,198 +1,199 @@ withType('f') ->withSuffix('lint-test') ->find(); $test_count = 0; foreach ($files as $file) { $this->lintFile($root.$file, $linter); $test_count++; } $this->assertTrue( ($test_count > 0), pht('Expected to find some .lint-test tests in directory %s!', $root)); } private function lintFile($file, ArcanistLinter $linter) { $linter = clone $linter; $contents = Filesystem::readFile($file); $contents = explode("~~~~~~~~~~\n", $contents); if (count($contents) < 2) { throw new Exception( "Expected '~~~~~~~~~~' separating test case and results."); } list ($data, $expect, $xform, $config) = array_merge( $contents, array(null, null)); $basename = basename($file); if ($config) { $config = phutil_json_decode($config); } else { $config = array(); } PhutilTypeSpec::checkMap( $config, array( 'hook' => 'optional bool', 'config' => 'optional wild', 'path' => 'optional string', 'arcconfig' => 'optional map', )); $exception = null; $after_lint = null; $messages = null; $exception_message = false; $caught_exception = false; try { $tmp = new TempFile($basename); Filesystem::writeFile($tmp, $data); $full_path = (string)$tmp; $dir = dirname($full_path); $path = basename($full_path); $config_file = null; $arcconfig = idx($config, 'arcconfig'); if ($arcconfig) { $config_file = json_encode($arcconfig); } $working_copy = ArcanistWorkingCopyIdentity::newFromRootAndConfigFile( $dir, $config_file, 'Unit Test'); $configuration_manager = new ArcanistConfigurationManager(); $configuration_manager->setWorkingCopyIdentity($working_copy); $engine = new UnitTestableArcanistLintEngine(); $engine->setWorkingCopy($working_copy); $engine->setConfigurationManager($configuration_manager); $engine->setPaths(array($path)); $engine->setCommitHookMode(idx($config, 'hook', false)); $path_name = idx($config, 'path', $path); $linter->addPath($path_name); $linter->addData($path_name, $data); $config = idx($config, 'config', array()); foreach ($config as $key => $value) { $linter->setLinterConfigurationValue($key, $value); } $engine->addLinter($linter); $engine->addFileData($path_name, $data); $results = $engine->run(); $this->assertEqual( 1, count($results), 'Expect one result returned by linter.'); $result = reset($results); $patcher = ArcanistLintPatcher::newFromArcanistLintResult($result); $after_lint = $patcher->getModifiedFileContent(); } catch (ArcanistPhutilTestTerminatedException $ex) { throw $ex; } catch (Exception $exception) { $caught_exception = true; if ($exception instanceof PhutilAggregateException) { $caught_exception = false; foreach ($exception->getExceptions() as $ex) { if ($ex instanceof ArcanistUsageException) { $this->assertSkipped($ex->getMessage()); } else { $caught_exception = true; } } } else if ($exception instanceof ArcanistUsageException) { $this->assertSkipped($exception->getMessage()); } $exception_message = $exception->getMessage()."\n\n". $exception->getTraceAsString(); } $this->assertEqual(false, $caught_exception, $exception_message); $this->compareLint($basename, $expect, $result); $this->compareTransform($xform, $after_lint); } private function compareLint($file, $expect, ArcanistLintResult $result) { $seen = array(); $raised = array(); $message_map = array(); foreach ($result->getMessages() as $message) { $sev = $message->getSeverity(); $line = $message->getLine(); $char = $message->getChar(); $code = $message->getCode(); $name = $message->getName(); $message_key = $sev.':'.$line.':'.$char; $message_map[$message_key] = $message; $seen[] = $message_key; $raised[] = " {$sev} at line {$line}, char {$char}: {$code} {$name}"; } $expect = trim($expect); if ($expect) { $expect = explode("\n", $expect); } else { $expect = array(); } foreach ($expect as $key => $expected) { $expect[$key] = head(explode(' ', $expected)); } $expect = array_fill_keys($expect, true); $seen = array_fill_keys($seen, true); if (!$raised) { $raised = array('No messages.'); } $raised = "Actually raised:\n".implode("\n", $raised); foreach (array_diff_key($expect, $seen) as $missing => $ignored) { list($sev, $line, $char) = explode(':', $missing); $this->assertFailure( "In '{$file}', ". "expected lint to raise {$sev} on line {$line} at char {$char}, ". "but no {$sev} was raised. {$raised}"); } foreach (array_diff_key($seen, $expect) as $surprising => $ignored) { $message = $message_map[$surprising]; $message_info = $message->getDescription(); list($sev, $line, $char) = explode(':', $surprising); $this->assertFailure( "In '{$file}', ". "lint raised {$sev} on line {$line} at char {$char}, ". "but nothing was expected:\n\n{$message_info}\n\n{$raised}"); } } private function compareTransform($expected, $actual) { if (!strlen($expected)) { return; } $this->assertEqual( $expected, $actual, 'File as patched by lint did not match the expected patched file.'); } + } diff --git a/src/lint/linter/xhpast/ArcanistXHPASTLintNamingHook.php b/src/lint/linter/xhpast/ArcanistXHPASTLintNamingHook.php index 9e70e6b4..237996ad 100644 --- a/src/lint/linter/xhpast/ArcanistXHPASTLintNamingHook.php +++ b/src/lint/linter/xhpast/ArcanistXHPASTLintNamingHook.php @@ -1,135 +1,134 @@ } /* -( Overriding Symbol Name Lint Messages )------------------------------- */ /** * Callback invoked for each symbol, which can override the default * determination of name validity or accept it by returning $default. The * symbol types are: xhp-class, class, interface, function, method, parameter, * constant, and member. * * For example, if you want to ban all symbols with "quack" in them and * otherwise accept all the defaults, except allow any naming convention for * methods with "duck" in them, you might implement the method like this: * * if (preg_match('/quack/i', $name)) { * return 'Symbol names containing "quack" are forbidden.'; * } * if ($type == 'method' && preg_match('/duck/i', $name)) { * return null; // Always accept. * } * return $default; * * @param string The symbol type. * @param string The symbol name. * @param string|null The default result from the main rule engine. * @return string|null Null to accept the name, or a message to reject it * with. You should return the default value if you don't * want to specifically provide an override. * @task override */ abstract public function lintSymbolName($type, $name, $default); /* -( Name Utilities )----------------------------------------------------- */ /** * Returns true if a symbol name is UpperCamelCase. * * @param string Symbol name. * @return bool True if the symbol is UpperCamelCase. * @task util */ public static function isUpperCamelCase($symbol) { return preg_match('/^[A-Z][A-Za-z0-9]*$/', $symbol); } /** * Returns true if a symbol name is lowerCamelCase. * * @param string Symbol name. * @return bool True if the symbol is lowerCamelCase. * @task util */ public static function isLowerCamelCase($symbol) { return preg_match('/^[a-z][A-Za-z0-9]*$/', $symbol); } /** * Returns true if a symbol name is UPPERCASE_WITH_UNDERSCORES. * * @param string Symbol name. * @return bool True if the symbol is UPPERCASE_WITH_UNDERSCORES. * @task util */ public static function isUppercaseWithUnderscores($symbol) { return preg_match('/^[A-Z0-9_]+$/', $symbol); } /** * Returns true if a symbol name is lowercase_with_underscores. * * @param string Symbol name. * @return bool True if the symbol is lowercase_with_underscores. * @task util */ public static function isLowercaseWithUnderscores($symbol) { return preg_match('/^[a-z0-9_]+$/', $symbol); } /** * Strip non-name components from PHP function symbols. Notably, this discards * the "__" magic-method signifier, to make a symbol appropriate for testing * with methods like @{method:isLowerCamelCase}. * * @param string Symbol name. * @return string Stripped symbol. * @task util */ public static function stripPHPFunction($symbol) { - // Allow initial "__" for magic methods like __construct; we could also - // enumerate these explicitly. + // Allow initial "__" for magic methods like __construct; we could also + // enumerate these explicitly. return preg_replace('/^__/', '', $symbol); } /** * Strip non-name components from PHP variable symbols. Notably, this discards * the "$", to make a symbol appropriate for testing with methods like * @{method:isLowercaseWithUnderscores}. * * @param string Symbol name. * @return string Stripped symbol. * @task util */ public static function stripPHPVariable($symbol) { return preg_replace('/^\$/', '', $symbol); } } diff --git a/src/lint/linter/xhpast/ArcanistXHPASTLintSwitchHook.php b/src/lint/linter/xhpast/ArcanistXHPASTLintSwitchHook.php index e5a82ea1..7f909bb7 100644 --- a/src/lint/linter/xhpast/ArcanistXHPASTLintSwitchHook.php +++ b/src/lint/linter/xhpast/ArcanistXHPASTLintSwitchHook.php @@ -1,16 +1,14 @@ array(1, 0, 0, 0), 'UpperCamelCaseROFL' => array(1, 0, 0, 0), 'lowerCamelCase' => array(0, 1, 0, 0), 'lowerCamelCaseROFL' => array(0, 1, 0, 0), 'UPPERCASE_WITH_UNDERSCORES' => array(0, 0, 1, 0), '_UPPERCASE_WITH_UNDERSCORES_' => array(0, 0, 1, 0), '__UPPERCASE__WITH__UNDERSCORES__' => array(0, 0, 1, 0), 'lowercase_with_underscores' => array(0, 0, 0, 1), '_lowercase_with_underscores_' => array(0, 0, 0, 1), '__lowercase__with__underscores__' => array(0, 0, 0, 1), 'mixedCASE_NoNsEnSe' => array(0, 0, 0, 0), ); foreach ($tests as $test => $expect) { $this->assertEqual( $expect[0], ArcanistXHPASTLintNamingHook::isUpperCamelCase($test), "UpperCamelCase: '{$test}'"); $this->assertEqual( $expect[1], ArcanistXHPASTLintNamingHook::isLowerCamelCase($test), "lowerCamelCase: '{$test}'"); $this->assertEqual( $expect[2], ArcanistXHPASTLintNamingHook::isUppercaseWithUnderscores($test), "UPPERCASE_WITH_UNDERSCORES: '{$test}'"); $this->assertEqual( $expect[3], ArcanistXHPASTLintNamingHook::isLowercaseWithUnderscores($test), "lowercase_with_underscores: '{$test}'"); } } public function testStripUtilities() { // Variable stripping. $this->assertEqual( 'stuff', ArcanistXHPASTLintNamingHook::stripPHPVariable('stuff')); $this->assertEqual( 'stuff', ArcanistXHPASTLintNamingHook::stripPHPVariable('$stuff')); // Function/method stripping. $this->assertEqual( 'construct', ArcanistXHPASTLintNamingHook::stripPHPFunction('construct')); $this->assertEqual( 'construct', ArcanistXHPASTLintNamingHook::stripPHPFunction('__construct')); } } diff --git a/src/lint/renderer/ArcanistLintCheckstyleXMLRenderer.php b/src/lint/renderer/ArcanistLintCheckstyleXMLRenderer.php index 29afda46..30a91485 100644 --- a/src/lint/renderer/ArcanistLintCheckstyleXMLRenderer.php +++ b/src/lint/renderer/ArcanistLintCheckstyleXMLRenderer.php @@ -1,56 +1,55 @@ writer = new XMLWriter(); $this->writer->openMemory(); $this->writer->setIndent(true); $this->writer->setIndentString(' '); } public function renderPreamble() { $this->writer->startDocument('1.0', 'UTF-8'); $this->writer->startElement('checkstyle'); $this->writer->writeAttribute('version', '4.3'); return $this->writer->flush(); } public function renderLintResult(ArcanistLintResult $result) { $this->writer->startElement('file'); $this->writer->writeAttribute('name', $result->getPath()); foreach ($result->getMessages() as $message) { $this->writer->startElement('error'); $this->writer->writeAttribute('line', $message->getLine()); $this->writer->writeAttribute('column', $message->getChar()); $this->writer->writeAttribute('severity', ArcanistLintSeverity::getStringForSeverity($message->getSeverity())); $this->writer->writeAttribute('message', $message->getDescription()); $this->writer->writeAttribute('source', $message->getCode()); $this->writer->endElement(); } $this->writer->endElement(); return $this->writer->flush(); } public function renderOkayResult() { return ''; } public function renderPostamble() { $this->writer->endElement(); $this->writer->endDocument(); return $this->writer->flush(); } + } diff --git a/src/lint/renderer/ArcanistLintConsoleRenderer.php b/src/lint/renderer/ArcanistLintConsoleRenderer.php index 53db10dc..c083abd0 100644 --- a/src/lint/renderer/ArcanistLintConsoleRenderer.php +++ b/src/lint/renderer/ArcanistLintConsoleRenderer.php @@ -1,240 +1,240 @@ showAutofixPatches = $show_autofix_patches; return $this; } public function renderLintResult(ArcanistLintResult $result) { $messages = $result->getMessages(); $path = $result->getPath(); $lines = explode("\n", $result->getData()); $text = array(); foreach ($messages as $message) { if (!$this->showAutofixPatches && $message->isAutofix()) { continue; } if ($message->isError()) { $color = 'red'; } else { $color = 'yellow'; } $severity = ArcanistLintSeverity::getStringForSeverity( $message->getSeverity()); $code = $message->getCode(); $name = $message->getName(); $description = $message->getDescription(); if ($message->getOtherLocations()) { $locations = array(); foreach ($message->getOtherLocations() as $location) { $locations[] = idx($location, 'path', $path). (!empty($location['line']) ? ":{$location['line']}" : ''); } $description .= "\nOther locations: ".implode(', ', $locations); } $text[] = phutil_console_format( " ** %s ** (%s) __%s__\n%s\n", $severity, $code, $name, phutil_console_wrap($description, 4)); if ($message->hasFileContext()) { $text[] = $this->renderContext($message, $lines); } } if ($text) { $prefix = phutil_console_format("**>>>** Lint for __%s__:\n\n\n", $path); return $prefix.implode("\n", $text); } else { return null; } } protected function renderContext( ArcanistLintMessage $message, array $line_data) { $lines_of_context = 3; $out = array(); $num_lines = count($line_data); // make line numbers line up with array indexes array_unshift($line_data, ''); $line_num = min($message->getLine(), $num_lines); $line_num = max(1, $line_num); // Print out preceding context before the impacted region. $cursor = max(1, $line_num - $lines_of_context); for (; $cursor < $line_num; $cursor++) { $out[] = $this->renderLine($cursor, $line_data[$cursor]); } $text = $message->getOriginalText(); $start = $message->getChar() - 1; $patch = ''; // Refine original and replacement text to eliminate start and end in common if ($message->isPatchable()) { $patch = $message->getReplacementText(); $text_strlen = strlen($text); $patch_strlen = strlen($patch); $min_length = min($text_strlen, $patch_strlen); $same_at_front = 0; for ($ii = 0; $ii < $min_length; $ii++) { if ($text[$ii] !== $patch[$ii]) { break; } $same_at_front++; $start++; if ($text[$ii] == "\n") { $out[] = $this->renderLine($cursor, $line_data[$cursor]); $cursor++; $start = 0; $line_num++; } } // deal with shorter string ' ' longer string ' a ' $min_length -= $same_at_front; // And check the end of the string $same_at_end = 0; for ($ii = 1; $ii <= $min_length; $ii++) { if ($text[$text_strlen - $ii] !== $patch[$patch_strlen - $ii]) { break; } $same_at_end++; } $text = substr( $text, $same_at_front, $text_strlen - $same_at_end - $same_at_front); $patch = substr( $patch, $same_at_front, $patch_strlen - $same_at_end - $same_at_front); } // Print out the impacted region itself. $diff = $message->isPatchable() ? '-' : null; $text_lines = explode("\n", $text); $text_length = count($text_lines); $intraline = ($text != '' || $start || !preg_match('/\n$/', $patch)); if ($intraline) { for (; $cursor < $line_num + $text_length; $cursor++) { $chevron = ($cursor == $line_num); // We may not have any data if, e.g., the old file does not exist. $data = idx($line_data, $cursor, null); // Highlight the problem substring. $text_line = $text_lines[$cursor - $line_num]; if (strlen($text_line)) { $data = substr_replace( $data, phutil_console_format('##%s##', $text_line), ($cursor == $line_num ? $start : 0), strlen($text_line)); } $out[] = $this->renderLine($cursor, $data, $chevron, $diff); } } // Print out replacement text. if ($message->isPatchable()) { // Strip trailing newlines, since "explode" will create an extra patch // line for these. if (strlen($patch) && ($patch[strlen($patch) - 1] === "\n")) { $patch = substr($patch, 0, -1); } $patch_lines = explode("\n", $patch); $patch_length = count($patch_lines); $patch_line = $patch_lines[0]; $len = isset($text_lines[0]) ? strlen($text_lines[0]) : 0; $patched = phutil_console_format('##%s##', $patch_line); if ($intraline) { $patched = substr_replace( $line_data[$line_num], $patched, $start, $len); } $out[] = $this->renderLine(null, $patched, false, '+'); foreach (array_slice($patch_lines, 1) as $patch_line) { $out[] = $this->renderLine( null, phutil_console_format('##%s##', $patch_line), false, '+'); } } $end = min($num_lines, $cursor + $lines_of_context); for (; $cursor < $end; $cursor++) { // If there is no original text, we didn't print out a chevron or any // highlighted text above, so print it out here. This allows messages // which don't have any original/replacement information to still // render with indicator chevrons. if ($text || $message->isPatchable()) { $chevron = false; } else { $chevron = ($cursor == $line_num); } $out[] = $this->renderLine($cursor, $line_data[$cursor], $chevron); // With original text, we'll render the text highlighted above. If the // lint message only has a line/char offset there's nothing to // highlight, so print out a caret on the next line instead. if ($chevron && $message->getChar()) { $out[] = $this->renderCaret($message->getChar()); } } $out[] = null; return implode("\n", $out); } private function renderCaret($pos) { return str_repeat(' ', 16 + $pos).'^'; } protected function renderLine($line, $data, $chevron = false, $diff = null) { $chevron = $chevron ? '>>>' : ''; return sprintf( ' %3s %1s %6s %s', $chevron, $diff, $line, $data); } public function renderOkayResult() { - return - phutil_console_format("** OKAY ** No lint warnings.\n"); + return phutil_console_format( + "** OKAY ** No lint warnings.\n"); } + } diff --git a/src/lint/renderer/ArcanistLintJSONRenderer.php b/src/lint/renderer/ArcanistLintJSONRenderer.php index bc0b68f7..7170bdc2 100644 --- a/src/lint/renderer/ArcanistLintJSONRenderer.php +++ b/src/lint/renderer/ArcanistLintJSONRenderer.php @@ -1,35 +1,35 @@ getMessages(); $path = $result->getPath(); $data = explode("\n", $result->getData()); array_unshift($data, ''); // make the line numbers work as array indices $output = array($path => array()); foreach ($messages as $message) { $dictionary = $message->toDictionary(); $dictionary['context'] = implode("\n", array_slice( $data, max(1, $message->getLine() - self::LINES_OF_CONTEXT), self::LINES_OF_CONTEXT * 2 + 1)); unset($dictionary['path']); $output[$path][] = $dictionary; } return json_encode($output)."\n"; } public function renderOkayResult() { return ''; } + } diff --git a/src/lint/renderer/ArcanistLintLikeCompilerRenderer.php b/src/lint/renderer/ArcanistLintLikeCompilerRenderer.php index 20b328ef..5ffce5ff 100644 --- a/src/lint/renderer/ArcanistLintLikeCompilerRenderer.php +++ b/src/lint/renderer/ArcanistLintLikeCompilerRenderer.php @@ -1,35 +1,35 @@ getMessages(); $path = $result->getPath(); foreach ($messages as $message) { $severity = ArcanistLintSeverity::getStringForSeverity( $message->getSeverity()); $line = $message->getLine(); $code = $message->getCode(); $description = $message->getDescription(); $lines[] = sprintf( "%s:%d:%s (%s) %s\n", $path, $line, $severity, $code, $description); } return implode('', $lines); } public function renderOkayResult() { return ''; } + } diff --git a/src/lint/renderer/ArcanistLintNoneRenderer.php b/src/lint/renderer/ArcanistLintNoneRenderer.php index da44ac86..3e047bed 100644 --- a/src/lint/renderer/ArcanistLintNoneRenderer.php +++ b/src/lint/renderer/ArcanistLintNoneRenderer.php @@ -1,16 +1,13 @@ getMessages(); $path = $result->getPath(); $text = array(); foreach ($messages as $message) { $name = $message->getName(); $severity = ArcanistLintSeverity::getStringForSeverity( $message->getSeverity()); $line = $message->getLine(); $text[] = "{$path}:{$line}:{$severity}: {$name}\n"; } return implode('', $text); } public function renderOkayResult() { - return - phutil_console_format("** OKAY ** No lint warnings.\n"); + return phutil_console_format( + "** OKAY ** No lint warnings.\n"); } + } diff --git a/src/parser/ArcanistBaseCommitParser.php b/src/parser/ArcanistBaseCommitParser.php index df3331cb..7927f6b6 100644 --- a/src/parser/ArcanistBaseCommitParser.php +++ b/src/parser/ArcanistBaseCommitParser.php @@ -1,193 +1,192 @@ api = $api; return $this; } private function tokenizeBaseCommitSpecification($raw_spec) { if (!$raw_spec) { return array(); } $spec = preg_split('/\s*,\s*/', $raw_spec); $spec = array_filter($spec); foreach ($spec as $rule) { if (strpos($rule, ':') === false) { throw new ArcanistUsageException( "Rule '{$rule}' is invalid, it must have a type and name like ". "'arc:upstream'."); } } return $spec; } private function log($message) { if ($this->verbose) { fwrite(STDERR, $message."\n"); } } public function resolveBaseCommit(array $specs) { $specs += array( 'runtime' => '', 'local' => '', 'project' => '', 'user' => '', 'system' => '', ); foreach ($specs as $source => $spec) { $specs[$source] = self::tokenizeBaseCommitSpecification($spec); } $this->try = array( 'runtime', 'local', 'project', 'user', 'system', ); while ($this->try) { $source = head($this->try); if (!idx($specs, $source)) { $this->log("No rules left from source '{$source}'."); array_shift($this->try); continue; } $this->log("Trying rules from source '{$source}'."); $rules = &$specs[$source]; while ($rule = array_shift($rules)) { $this->log("Trying rule '{$rule}'."); $commit = $this->resolveRule($rule, $source); if ($commit === false) { // If a rule returns false, it means to go to the next ruleset. break; } else if ($commit !== null) { $this->log("Resolved commit '{$commit}' from rule '{$rule}'."); return $commit; } } } return null; } /** * Handle resolving individual rules. */ private function resolveRule($rule, $source) { - // NOTE: Returning `null` from this method means "no match". // Returning `false` from this method means "stop current ruleset". list($type, $name) = explode(':', $rule, 2); switch ($type) { case 'literal': return $name; case 'git': case 'hg': return $this->api->resolveBaseCommitRule($rule, $source); case 'arc': return $this->resolveArcRule($rule, $name, $source); default: throw new ArcanistUsageException( "Base commit rule '{$rule}' (from source '{$source}') ". "is not a recognized rule."); } } /** * Handle resolving "arc:*" rules. */ private function resolveArcRule($rule, $name, $source) { $name = $this->updateLegacyRuleName($name); switch ($name) { case 'verbose': $this->verbose = true; $this->log('Enabled verbose mode.'); break; case 'prompt': $reason = 'it is what you typed when prompted.'; $this->api->setBaseCommitExplanation($reason); return phutil_console_prompt('Against which commit?'); case 'local': case 'user': case 'project': case 'runtime': case 'system': // Push the other source on top of the list. array_unshift($this->try, $name); $this->log("Switching to source '{$name}'."); return false; case 'yield': // Cycle this source to the end of the list. $this->try[] = array_shift($this->try); $this->log("Yielding processing of rules from '{$source}'."); return false; case 'halt': // Dump the whole stack. $this->try = array(); $this->log('Halting all rule processing.'); return false; case 'skip': return null; case 'empty': case 'upstream': case 'outgoing': case 'bookmark': case 'amended': case 'this': return $this->api->resolveBaseCommitRule($rule, $source); default: $matches = null; if (preg_match('/^exec\((.*)\)$/', $name, $matches)) { $root = $this->api->getWorkingCopyIdentity()->getProjectRoot(); $future = new ExecFuture('%C', $matches[1]); $future->setCWD($root); list($err, $stdout) = $future->resolve(); if (!$err) { return trim($stdout); } else { return null; } } else if (preg_match('/^nodiff\((.*)\)$/', $name, $matches)) { return $this->api->resolveBaseCommitRule($rule, $source); } throw new ArcanistUsageException( "Base commit rule '{$rule}' (from source '{$source}') ". "is not a recognized rule."); } } private function updateLegacyRuleName($name) { $updated = array( 'global' => 'user', 'args' => 'runtime', ); $new_name = idx($updated, $name); if ($new_name) { $this->log("translating legacy name '$name' to '$new_name'"); return $new_name; } return $name; } } diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 0ea663bf..4bfd1adf 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -1,875 +1,869 @@ authorEmail = $author_email; return $this; } public function getAuthorEmail() { return $this->authorEmail; } public function setAuthorName($author_name) { $this->authorName = $author_name; return $this; } public function getAuthorName() { return $this->authorName; } public function getFullAuthor() { $author_name = $this->getAuthorName(); if ($author_name === null) { return null; } $author_email = $this->getAuthorEmail(); if ($author_email === null) { return null; } $full_author = sprintf('%s <%s>', $author_name, $author_email); // Because git is very picky about the author being in a valid format, // verify that we can parse it. $address = new PhutilEmailAddress($full_author); if (!$address->getDisplayName() || !$address->getAddress()) { return null; } return $full_author; } public function setConduit(ConduitClient $conduit) { $this->conduit = $conduit; return $this; } public function setProjectID($project_id) { $this->projectID = $project_id; return $this; } public function getProjectID() { return $this->projectID; } public function setBaseRevision($base_revision) { $this->baseRevision = $base_revision; return $this; } public function setEncoding($encoding) { $this->encoding = $encoding; return $this; } public function getEncoding() { return $this->encoding; } public function getBaseRevision() { return $this->baseRevision; } public function setRevisionID($revision_id) { $this->revisionID = $revision_id; return $this; } public function getRevisionID() { return $this->revisionID; } public static function newFromChanges(array $changes) { $obj = new ArcanistBundle(); $obj->changes = $changes; return $obj; } private function getEOL($patch_type) { - // NOTE: Git always generates "\n" line endings, even under Windows, and // can not parse certain patches with "\r\n" line endings. SVN generates // patches with "\n" line endings on Mac or Linux and "\r\n" line endings // on Windows. (This EOL style is used only for patch metadata lines, not // for the actual patch content.) // (On Windows, Mercurial generates \n newlines for `--git` diffs, as it // must, but also \n newlines for unified diffs. We never need to deal with // these as we use Git format for Mercurial, so this case is currently // ignored.) switch ($patch_type) { case 'git': return "\n"; case 'unified': return phutil_is_windows() ? "\r\n" : "\n"; default: throw new Exception( "Unknown patch type '{$patch_type}'!"); } } public static function newFromArcBundle($path) { $path = Filesystem::resolvePath($path); $future = new ExecFuture( 'tar tfO %s', $path); list($stdout, $file_list) = $future->resolvex(); $file_list = explode("\n", trim($file_list)); if (in_array('meta.json', $file_list)) { $future = new ExecFuture( 'tar xfO %s meta.json', $path); $meta_info = $future->resolveJSON(); $version = idx($meta_info, 'version', 0); $project_name = idx($meta_info, 'projectName'); $base_revision = idx($meta_info, 'baseRevision'); $revision_id = idx($meta_info, 'revisionID'); $encoding = idx($meta_info, 'encoding'); $author_name = idx($meta_info, 'authorName'); $author_email = idx($meta_info, 'authorEmail'); } else { // this arc bundle was probably made before we started storing meta info $version = 0; $project_name = null; $base_revision = null; $revision_id = null; $encoding = null; $author = null; } $future = new ExecFuture( 'tar xfO %s changes.json', $path); $changes = $future->resolveJSON(); foreach ($changes as $change_key => $change) { foreach ($change['hunks'] as $key => $hunk) { list($hunk_data) = execx('tar xfO %s hunks/%s', $path, $hunk['corpus']); $changes[$change_key]['hunks'][$key]['corpus'] = $hunk_data; } } foreach ($changes as $change_key => $change) { $changes[$change_key] = ArcanistDiffChange::newFromDictionary($change); } $obj = new ArcanistBundle(); $obj->changes = $changes; $obj->diskPath = $path; $obj->setProjectID($project_name); $obj->setBaseRevision($base_revision); $obj->setRevisionID($revision_id); $obj->setEncoding($encoding); return $obj; } public static function newFromDiff($data) { $obj = new ArcanistBundle(); $parser = new ArcanistDiffParser(); $obj->changes = $parser->parseDiff($data); return $obj; } - private function __construct() { - - } + private function __construct() {} public function writeToDisk($path) { $changes = $this->getChanges(); $change_list = array(); foreach ($changes as $change) { $change_list[] = $change->toDictionary(); } $hunks = array(); foreach ($change_list as $change_key => $change) { foreach ($change['hunks'] as $key => $hunk) { $hunks[] = $hunk['corpus']; $change_list[$change_key]['hunks'][$key]['corpus'] = count($hunks) - 1; } } $blobs = array(); foreach ($change_list as $change) { if (!empty($change['metadata']['old:binary-phid'])) { $blobs[$change['metadata']['old:binary-phid']] = null; } if (!empty($change['metadata']['new:binary-phid'])) { $blobs[$change['metadata']['new:binary-phid']] = null; } } foreach ($blobs as $phid => $null) { $blobs[$phid] = $this->getBlob($phid); } $meta_info = array( 'version' => 5, 'projectName' => $this->getProjectID(), 'baseRevision' => $this->getBaseRevision(), 'revisionID' => $this->getRevisionID(), 'encoding' => $this->getEncoding(), 'authorName' => $this->getAuthorName(), 'authorEmail' => $this->getAuthorEmail(), ); $dir = Filesystem::createTemporaryDirectory(); Filesystem::createDirectory($dir.'/hunks'); Filesystem::createDirectory($dir.'/blobs'); Filesystem::writeFile($dir.'/changes.json', json_encode($change_list)); Filesystem::writeFile($dir.'/meta.json', json_encode($meta_info)); foreach ($hunks as $key => $hunk) { Filesystem::writeFile($dir.'/hunks/'.$key, $hunk); } foreach ($blobs as $key => $blob) { Filesystem::writeFile($dir.'/blobs/'.$key, $blob); } execx( '(cd %s; tar -czf %s *)', $dir, Filesystem::resolvePath($path)); Filesystem::remove($dir); } public function toUnifiedDiff() { - $eol = $this->getEOL('unified'); $result = array(); $changes = $this->getChanges(); foreach ($changes as $change) { $hunk_changes = $this->buildHunkChanges($change->getHunks(), $eol); if (!$hunk_changes) { continue; } $old_path = $this->getOldPath($change); $cur_path = $this->getCurrentPath($change); $index_path = $cur_path; if ($index_path === null) { $index_path = $old_path; } $result[] = 'Index: '.$index_path; $result[] = $eol; $result[] = str_repeat('=', 67); $result[] = $eol; if ($old_path === null) { $old_path = '/dev/null'; } if ($cur_path === null) { $cur_path = '/dev/null'; } // When the diff is used by `patch`, `patch` ignores what is listed as the // current path and just makes changes to the file at the old path (unless // the current path is '/dev/null'. // If the old path and the current path aren't the same (and neither is // /dev/null), this indicates the file was moved or copied. By listing // both paths as the new file, `patch` will apply the diff to the new // file. if ($cur_path !== '/dev/null' && $old_path !== '/dev/null') { $old_path = $cur_path; } $result[] = '--- '.$old_path.$eol; $result[] = '+++ '.$cur_path.$eol; $result[] = $hunk_changes; } if (!$result) { return ''; } $diff = implode('', $result); return $this->convertNonUTF8Diff($diff); } public function toGitPatch() { $eol = $this->getEOL('git'); $result = array(); $changes = $this->getChanges(); $binary_sources = array(); foreach ($changes as $change) { if (!$this->isGitBinaryChange($change)) { continue; } $type = $change->getType(); if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY || $type == ArcanistDiffChangeType::TYPE_COPY_AWAY || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { foreach ($change->getAwayPaths() as $path) { $binary_sources[$path] = $change; } } } foreach (array_keys($changes) as $multicopy_key) { $multicopy_change = $changes[$multicopy_key]; $type = $multicopy_change->getType(); if ($type != ArcanistDiffChangeType::TYPE_MULTICOPY) { continue; } // Decompose MULTICOPY into one MOVE_HERE and several COPY_HERE because // we need more information than we have in order to build a delete patch // and represent it as a bunch of COPY_HERE plus a delete. For details, // see T419. // Basically, MULTICOPY means there are 2 or more corresponding COPY_HERE // changes, so find one of them arbitrarily and turn it into a MOVE_HERE. // TODO: We might be able to do this more cleanly after T230 is resolved. $decompose_okay = false; foreach ($changes as $change_key => $change) { if ($change->getType() != ArcanistDiffChangeType::TYPE_COPY_HERE) { continue; } if ($change->getOldPath() != $multicopy_change->getCurrentPath()) { continue; } $decompose_okay = true; $change = clone $change; $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); $changes[$change_key] = $change; // The multicopy is now fully represented by MOVE_HERE plus one or more // COPY_HERE, so throw it away. unset($changes[$multicopy_key]); break; } if (!$decompose_okay) { throw new Exception( 'Failed to decompose multicopy changeset in order to generate diff.'); } } foreach ($changes as $change) { $type = $change->getType(); $file_type = $change->getFileType(); if ($file_type == ArcanistDiffChangeType::FILE_DIRECTORY) { // TODO: We should raise a FYI about this, so the user is aware // that we omitted it, if the directory is empty or has permissions // which git can't represent. // Git doesn't support empty directories, so we simply ignore them. If // the directory is nonempty, 'git apply' will create it when processing // the changesets for files inside it. continue; } if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { // Git will apply this in the corresponding MOVE_HERE. continue; } $old_mode = idx($change->getOldProperties(), 'unix:filemode', '100644'); $new_mode = idx($change->getNewProperties(), 'unix:filemode', '100644'); $is_binary = $this->isGitBinaryChange($change); if ($is_binary) { $old_binary = idx($binary_sources, $this->getCurrentPath($change)); $change_body = $this->buildBinaryChange($change, $old_binary); } else { $change_body = $this->buildHunkChanges($change->getHunks(), $eol); } if ($type == ArcanistDiffChangeType::TYPE_COPY_AWAY) { // TODO: This is only relevant when patching old Differential diffs // which were created prior to arc pruning TYPE_COPY_AWAY for files // with no modifications. if (!strlen($change_body) && ($old_mode == $new_mode)) { continue; } } $old_path = $this->getOldPath($change); $cur_path = $this->getCurrentPath($change); if ($old_path === null) { $old_index = 'a/'.$cur_path; $old_target = '/dev/null'; } else { $old_index = 'a/'.$old_path; $old_target = 'a/'.$old_path; } if ($cur_path === null) { $cur_index = 'b/'.$old_path; $cur_target = '/dev/null'; } else { $cur_index = 'b/'.$cur_path; $cur_target = 'b/'.$cur_path; } $result[] = "diff --git {$old_index} {$cur_index}".$eol; if ($type == ArcanistDiffChangeType::TYPE_ADD) { $result[] = "new file mode {$new_mode}".$eol; } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE || $type == ArcanistDiffChangeType::TYPE_MOVE_HERE || $type == ArcanistDiffChangeType::TYPE_COPY_AWAY || $type == ArcanistDiffChangeType::TYPE_CHANGE) { if ($old_mode !== $new_mode) { $result[] = "old mode {$old_mode}".$eol; $result[] = "new mode {$new_mode}".$eol; } } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE) { $result[] = "copy from {$old_path}".$eol; $result[] = "copy to {$cur_path}".$eol; } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_HERE) { $result[] = "rename from {$old_path}".$eol; $result[] = "rename to {$cur_path}".$eol; } else if ($type == ArcanistDiffChangeType::TYPE_DELETE || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { $old_mode = idx($change->getOldProperties(), 'unix:filemode'); if ($old_mode) { $result[] = "deleted file mode {$old_mode}".$eol; } } if ($change_body) { if (!$is_binary) { $result[] = "--- {$old_target}".$eol; $result[] = "+++ {$cur_target}".$eol; } $result[] = $change_body; } } $diff = implode('', $result).$eol; return $this->convertNonUTF8Diff($diff); } private function isGitBinaryChange(ArcanistDiffChange $change) { $file_type = $change->getFileType(); return ($file_type == ArcanistDiffChangeType::FILE_BINARY || $file_type == ArcanistDiffChangeType::FILE_IMAGE); } private function convertNonUTF8Diff($diff) { if ($this->encoding) { $diff = phutil_utf8_convert($diff, $this->encoding, 'UTF-8'); } return $diff; } public function getChanges() { return $this->changes; } private function breakHunkIntoSmallHunks(ArcanistDiffHunk $base_hunk) { $context = 3; $results = array(); $lines = phutil_split_lines($base_hunk->getCorpus()); $n = count($lines); $old_offset = $base_hunk->getOldOffset(); $new_offset = $base_hunk->getNewOffset(); $ii = 0; $jj = 0; while ($ii < $n) { // Skip lines until we find the next line with changes. Note: this skips // both ' ' (no changes) and '\' (no newline at end of file) lines. If we // don't skip the latter, we may incorrectly generate a terminal hunk // that has no actual change information when a file doesn't have a // terminal newline and not changed near the end of the file. 'patch' will // fail to apply the diff if we generate a hunk that does not actually // contain changes. for ($jj = $ii; $jj < $n; ++$jj) { $char = $lines[$jj][0]; if ($char == '-' || $char == '+') { break; } } if ($jj >= $n) { break; } $hunk_start = max($jj - $context, 0); // NOTE: There are two tricky considerations here. // We can not generate a patch with overlapping hunks, or 'git apply' // rejects it after 1.7.3.4. // We can not generate a patch with too much trailing context, or // 'patch' rejects it. // So we need to ensure that we generate disjoint hunks, but don't // generate any hunks with too much context. $old_lines = 0; $new_lines = 0; $hunk_adjust = 0; $last_change = $jj; $break_here = null; for (; $jj < $n; ++$jj) { if ($lines[$jj][0] == ' ') { if ($jj - $last_change > $context) { if ($break_here === null) { // We haven't seen a change in $context lines, so this is a // potential place to break the hunk. However, we need to keep // looking in case there is another change fewer than $context // lines away, in which case we have to merge the hunks. $break_here = $jj; } } if ($jj - $last_change > (($context + 1) * 2)) { // We definitely aren't going to merge this with the next hunk, so // break out of the loop. We'll end the hunk at $break_here. break; } } else { $break_here = null; $last_change = $jj; if ($lines[$jj][0] == '\\') { // When we have a "\ No newline at end of file" line, it does not // contribute to either hunk length. ++$hunk_adjust; } else if ($lines[$jj][0] == '-') { ++$old_lines; } else if ($lines[$jj][0] == '+') { ++$new_lines; } } } if ($break_here !== null) { $jj = $break_here; } $hunk_length = min($jj, $n) - $hunk_start; $count_length = ($hunk_length - $hunk_adjust); $hunk = new ArcanistDiffHunk(); $hunk->setOldOffset($old_offset + $hunk_start - $ii); $hunk->setNewOffset($new_offset + $hunk_start - $ii); $hunk->setOldLength($count_length - $new_lines); $hunk->setNewLength($count_length - $old_lines); $corpus = array_slice($lines, $hunk_start, $hunk_length); $corpus = implode('', $corpus); $hunk->setCorpus($corpus); $results[] = $hunk; $old_offset += ($jj - $ii) - $new_lines; $new_offset += ($jj - $ii) - $old_lines; $ii = $jj; } return $results; } private function getOldPath(ArcanistDiffChange $change) { $old_path = $change->getOldPath(); $type = $change->getType(); if (!strlen($old_path) || $type == ArcanistDiffChangeType::TYPE_ADD) { $old_path = null; } return $old_path; } private function getCurrentPath(ArcanistDiffChange $change) { $cur_path = $change->getCurrentPath(); $type = $change->getType(); if (!strlen($cur_path) || $type == ArcanistDiffChangeType::TYPE_DELETE || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { $cur_path = null; } return $cur_path; } private function buildHunkChanges(array $hunks, $eol) { assert_instances_of($hunks, 'ArcanistDiffHunk'); $result = array(); foreach ($hunks as $hunk) { $small_hunks = $this->breakHunkIntoSmallHunks($hunk); foreach ($small_hunks as $small_hunk) { $o_off = $small_hunk->getOldOffset(); $o_len = $small_hunk->getOldLength(); $n_off = $small_hunk->getNewOffset(); $n_len = $small_hunk->getNewLength(); $corpus = $small_hunk->getCorpus(); // NOTE: If the length is 1 it can be omitted. Since git does this, // we also do it so that "arc export --git" diffs are as similar to // real git diffs as possible, which helps debug issues. if ($o_len == 1) { $o_head = "{$o_off}"; } else { $o_head = "{$o_off},{$o_len}"; } if ($n_len == 1) { $n_head = "{$n_off}"; } else { $n_head = "{$n_off},{$n_len}"; } $result[] = "@@ -{$o_head} +{$n_head} @@".$eol; $result[] = $corpus; $last = substr($corpus, -1); if ($last !== false && $last != "\r" && $last != "\n") { $result[] = $eol; } } } return implode('', $result); } public function setLoadFileDataCallback($callback) { $this->loadFileDataCallback = $callback; return $this; } private function getBlob($phid, $name = null) { if ($this->loadFileDataCallback) { return call_user_func($this->loadFileDataCallback, $phid); } if ($this->diskPath) { list($blob_data) = execx('tar xfO %s blobs/%s', $this->diskPath, $phid); return $blob_data; } $console = PhutilConsole::getConsole(); if ($this->conduit) { if ($name) { $console->writeErr("Downloading binary data for '%s'...\n", $name); } else { $console->writeErr("Downloading binary data...\n"); } $data_base64 = $this->conduit->callMethodSynchronous( 'file.download', array( 'phid' => $phid, )); return base64_decode($data_base64); } throw new Exception("Nowhere to load blob '{$phid}' from!"); } private function buildBinaryChange(ArcanistDiffChange $change, $old_binary) { $eol = $this->getEOL('git'); // In Git, when we write out a binary file move or copy, we need the // original binary for the source and the current binary for the // destination. if ($old_binary) { if ($old_binary->getOriginalFileData() !== null) { $old_data = $old_binary->getOriginalFileData(); $old_phid = null; } else { $old_data = null; $old_phid = $old_binary->getMetadata('old:binary-phid'); } } else { $old_data = $change->getOriginalFileData(); $old_phid = $change->getMetadata('old:binary-phid'); } if ($old_data === null && $old_phid) { $name = basename($change->getOldPath()); $old_data = $this->getBlob($old_phid, $name); } $old_length = strlen($old_data); if ($old_data === null) { $old_data = ''; $old_sha1 = str_repeat('0', 40); } else { $old_sha1 = sha1("blob {$old_length}\0{$old_data}"); } $new_phid = $change->getMetadata('new:binary-phid'); $new_data = null; if ($change->getCurrentFileData() !== null) { $new_data = $change->getCurrentFileData(); } else if ($new_phid) { $name = basename($change->getCurrentPath()); $new_data = $this->getBlob($new_phid, $name); } $new_length = strlen($new_data); if ($new_data === null) { $new_data = ''; $new_sha1 = str_repeat('0', 40); } else { $new_sha1 = sha1("blob {$new_length}\0{$new_data}"); } $content = array(); $content[] = "index {$old_sha1}..{$new_sha1}".$eol; $content[] = 'GIT binary patch'.$eol; $content[] = "literal {$new_length}".$eol; $content[] = $this->emitBinaryDiffBody($new_data).$eol; $content[] = "literal {$old_length}".$eol; $content[] = $this->emitBinaryDiffBody($old_data).$eol; return implode('', $content); } private function emitBinaryDiffBody($data) { $eol = $this->getEOL('git'); if (!function_exists('gzcompress')) { throw new Exception( 'This patch has binary data. The PHP zlib extension is required to '. 'apply patches with binary data to git. Install the PHP zlib '. 'extension to continue.'); } // See emit_binary_diff_body() in diff.c for git's implementation. $buf = ''; $deflated = gzcompress($data); $lines = str_split($deflated, 52); foreach ($lines as $line) { $len = strlen($line); // The first character encodes the line length. if ($len <= 26) { $buf .= chr($len + ord('A') - 1); } else { $buf .= chr($len - 26 + ord('a') - 1); } $buf .= self::encodeBase85($line); $buf .= $eol; } return $buf; } public static function encodeBase85($data) { // This is implemented awkwardly in order to closely mirror git's // implementation in base85.c // It is also implemeted awkwardly to work correctly on 32-bit machines. // Broadly, this algorithm converts the binary input to printable output // by transforming each 4 binary bytes of input to 5 printable bytes of // output, one piece at a time. // // To do this, we convert the 4 bytes into a 32-bit integer, then use // modulus and division by 85 to pick out printable bytes (85^5 is slightly // larger than 2^32). In C, this algorithm is fairly easy to implement // because the accumulator can be made unsigned. // // In PHP, there are no unsigned integers, so values larger than 2^31 break // on 32-bit systems under modulus: // // $ php -r 'print (1 << 31) % 13;' # On a 32-bit machine. // -11 // // However, PHP's float type is an IEEE 754 64-bit double precision float, // so we can safely store integers up to around 2^53 without loss of // precision. To work around the lack of an unsigned type, we just use a // double and perform the modulus with fmod(). // // (Since PHP overflows integer operations into floats, we don't need much // additional casting.) static $map = array( '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~', ); $buf = ''; $pos = 0; $bytes = strlen($data); while ($bytes) { $accum = 0; for ($count = 24; $count >= 0; $count -= 8) { $val = ord($data[$pos++]); $val = $val * (1 << $count); $accum = $accum + $val; if (--$bytes == 0) { break; } } $slice = ''; for ($count = 4; $count >= 0; $count--) { $val = (int)fmod($accum, 85.0); $accum = floor($accum / 85.0); $slice .= $map[$val]; } $buf .= strrev($slice); } return $buf; } } diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index 7c772687..f9f9e441 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -1,1412 +1,1408 @@ repositoryAPI = $repository_api; return $this; } public function setDetectBinaryFiles($detect) { $this->detectBinaryFiles = $detect; return $this; } public function setTryEncoding($encoding) { $this->tryEncoding = $encoding; return $this; } public function forcePath($path) { $this->forcePath = $path; return $this; } public function setChanges(array $changes) { assert_instances_of($changes, 'ArcanistDiffChange'); $this->changes = mpull($changes, null, 'getCurrentPath'); return $this; } public function parseSubversionDiff(ArcanistSubversionAPI $api, $paths) { $this->setRepositoryAPI($api); $diffs = array(); foreach ($paths as $path => $status) { if ($status & ArcanistRepositoryAPI::FLAG_UNTRACKED || $status & ArcanistRepositoryAPI::FLAG_CONFLICT || $status & ArcanistRepositoryAPI::FLAG_MISSING) { unset($paths[$path]); } } $root = null; $from = array(); foreach ($paths as $path => $status) { $change = $this->buildChange($path); if ($status & ArcanistRepositoryAPI::FLAG_ADDED) { $change->setType(ArcanistDiffChangeType::TYPE_ADD); } else if ($status & ArcanistRepositoryAPI::FLAG_DELETED) { $change->setType(ArcanistDiffChangeType::TYPE_DELETE); } else { $change->setType(ArcanistDiffChangeType::TYPE_CHANGE); } $is_dir = is_dir($api->getPath($path)); if ($is_dir) { $change->setFileType(ArcanistDiffChangeType::FILE_DIRECTORY); // We have to go hit the diff even for directories because they may // have property changes or moves, etc. } $is_link = is_link($api->getPath($path)); if ($is_link) { $change->setFileType(ArcanistDiffChangeType::FILE_SYMLINK); } $diff = $api->getRawDiffText($path); if ($diff) { $this->parseDiff($diff); } $info = $api->getSVNInfo($path); if (idx($info, 'Copied From URL')) { if (!$root) { $rinfo = $api->getSVNInfo('.'); $root = $rinfo['URL'].'/'; } $cpath = $info['Copied From URL']; $root_len = strlen($root); if (!strncmp($cpath, $root, $root_len)) { $cpath = substr($cpath, $root_len); // The user can "svn cp /path/to/file@12345 x", which pulls a file out // of version history at a specific revision. If we just use the path, // we'll collide with possible changes to that path in the working // copy below. In particular, "svn cp"-ing a path which no longer // exists somewhere in the working copy and then adding that path // gets us to the "origin change type" branches below with a // TYPE_ADD state on the path. To avoid this, append the origin // revision to the path so we'll necessarily generate a new change. // TODO: In theory, you could have an '@' in your path and this could // cause a collision, e.g. two files named 'f' and 'f@12345'. This is // at least somewhat the user's fault, though. if ($info['Copied From Rev']) { if ($info['Copied From Rev'] != $info['Revision']) { $cpath .= '@'.$info['Copied From Rev']; } } $change->setOldPath($cpath); $from[$path] = $cpath; } } $type = $change->getType(); if (($type === ArcanistDiffChangeType::TYPE_MOVE_AWAY || $type === ArcanistDiffChangeType::TYPE_DELETE) && idx($info, 'Node Kind') === 'directory') { $change->setFileType(ArcanistDiffChangeType::FILE_DIRECTORY); } } foreach ($paths as $path => $status) { $change = $this->buildChange($path); if (empty($from[$path])) { continue; } if (empty($this->changes[$from[$path]])) { if ($change->getType() == ArcanistDiffChangeType::TYPE_COPY_HERE) { // If the origin path wasn't changed (or isn't included in this diff) // and we only copied it, don't generate a changeset for it. This // keeps us out of trouble when we go to 'arc commit' and need to // figure out which files should be included in the commit list. continue; } } $origin = $this->buildChange($from[$path]); $origin->addAwayPath($change->getCurrentPath()); $type = $origin->getType(); switch ($type) { case ArcanistDiffChangeType::TYPE_MULTICOPY: case ArcanistDiffChangeType::TYPE_COPY_AWAY: // "Add" is possible if you do some bizarre tricks with svn:ignore and // "svn copy"'ing URLs straight from the repository; you can end up with // a file that is a copy of itself. See T271. case ArcanistDiffChangeType::TYPE_ADD: break; case ArcanistDiffChangeType::TYPE_DELETE: $origin->setType(ArcanistDiffChangeType::TYPE_MOVE_AWAY); break; case ArcanistDiffChangeType::TYPE_MOVE_AWAY: $origin->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); break; case ArcanistDiffChangeType::TYPE_CHANGE: $origin->setType(ArcanistDiffChangeType::TYPE_COPY_AWAY); break; default: throw new Exception("Bad origin state {$type}."); } $type = $origin->getType(); switch ($type) { case ArcanistDiffChangeType::TYPE_MULTICOPY: case ArcanistDiffChangeType::TYPE_MOVE_AWAY: $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); break; case ArcanistDiffChangeType::TYPE_ADD: case ArcanistDiffChangeType::TYPE_COPY_AWAY: $change->setType(ArcanistDiffChangeType::TYPE_COPY_HERE); break; default: throw new Exception("Bad origin state {$type}."); } } return $this->changes; } public function parseDiff($diff) { if (!strlen(trim($diff))) { throw new Exception("Can't parse an empty diff!"); } // Detect `git-format-patch`, by looking for a "---" line somewhere in // the file and then a footer with Git version number, which looks like // this: // // -- // 1.8.4.2 // // Note that `git-format-patch` adds a space after the "--", but we don't // require it when detecting patches, as trailing whitespace can easily be // lost in transit. $detect_patch = '/^---$.*^-- ?[\s\d.]+\z/ms'; $message = null; if (preg_match($detect_patch, $diff)) { list($message, $diff) = $this->stripGitFormatPatch($diff); } $this->didStartParse($diff); // Strip off header comments. While `patch` allows comments anywhere in the // file, `git apply` is more strict. We get these comments in `hg export` // diffs, and Eclipse can also produce them. $line = $this->getLineTrimmed(); while (preg_match('/^#/', $line)) { $line = $this->nextLine(); } if (strlen($message)) { // If we found a message during pre-parse steps, add it to the resulting // changes here. $change = $this->buildChange(null) ->setType(ArcanistDiffChangeType::TYPE_MESSAGE) ->setMetadata('message', $message); } do { $patterns = array( // This is a normal SVN text change, probably from "svn diff". '(?PIndex): (?P.+)', // This is an SVN text change, probably from "svnlook diff". '(?PModified|Added|Deleted|Copied): (?P.+)', // This is an SVN property change, probably from "svn diff". '(?PProperty changes on): (?P.+)', // This is a git commit message, probably from "git show". '(?Pcommit) (?P[a-f0-9]+)(?: \(.*\))?', // This is a git diff, probably from "git show" or "git diff". // Note that the filenames may appear quoted. '(?Pdiff --git) (?P.*)', // RCS Diff '(?Prcsdiff -u) (?P.*)', // This is a unified diff, probably from "diff -u" or synthetic diffing. '(?P---) (?P.+)\s+\d{4}-\d{2}-\d{2}.*', '(?PBinary files|Files) '. '(?P.+)\s+\d{4}-\d{2}-\d{2} and '. '(?P.+)\s+\d{4}-\d{2}-\d{2} differ.*', // This is a normal Mercurial text change, probably from "hg diff". It // may have two "-r" blocks if it came from "hg diff -r x:y". '(?Pdiff -r) (?P[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P.+)', ); $line = $this->getLineTrimmed(); $match = null; $ok = $this->tryMatchHeader($patterns, $line, $match); $failed_parse = false; if (!$ok && $this->isFirstNonEmptyLine()) { // 'hg export' command creates so called "extended diff" that // contains some meta information and comment at the beginning // (isFirstNonEmptyLine() to check for beginning). Actual mercurial // code detects where comment ends and unified diff starts by // searching for "diff -r" or "diff --git" in the text. $this->saveLine(); $line = $this->nextLineThatLooksLikeDiffStart(); if (!$this->tryMatchHeader($patterns, $line, $match)) { // Restore line before guessing to display correct error. $this->restoreLine(); $failed_parse = true; } } else if (!$ok) { $failed_parse = true; } if ($failed_parse) { $this->didFailParse( "Expected a hunk header, like 'Index: /path/to/file.ext' (svn), ". "'Property changes on: /path/to/file.ext' (svn properties), ". "'commit 59bcc3ad6775562f845953cf01624225' (git show), ". "'diff --git' (git diff), '--- filename' (unified diff), or ". "'diff -r' (hg diff or patch)."); } if (isset($match['type'])) { if ($match['type'] == 'diff --git') { list($old, $new) = self::splitGitDiffPaths($match['oldnew']); $match['old'] = $old; $match['cur'] = $new; } } $change = $this->buildChange(idx($match, 'cur')); if (isset($match['old'])) { $change->setOldPath($match['old']); } if (isset($match['hash'])) { $change->setCommitHash($match['hash']); } if (isset($match['binary'])) { $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); $line = $this->nextNonemptyLine(); continue; } $line = $this->nextLine(); switch ($match['type']) { case 'Index': case 'Modified': case 'Added': case 'Deleted': case 'Copied': $this->parseIndexHunk($change); break; case 'Property changes on': $this->parsePropertyHunk($change); break; case 'diff --git': $this->setIsGit(true); $this->parseIndexHunk($change); break; case 'commit': $this->setIsGit(true); $this->parseCommitMessage($change); break; case '---': $ok = preg_match( '@^(?:\+\+\+) (.*)\s+\d{4}-\d{2}-\d{2}.*$@', $line, $match); if (!$ok) { $this->didFailParse("Expected '+++ filename' in unified diff."); } $change->setCurrentPath($match[1]); $line = $this->nextLine(); $this->parseChangeset($change); break; case 'diff -r': $this->setIsMercurial(true); $this->parseIndexHunk($change); break; case 'rcsdiff -u': $this->isRCS = true; $this->parseIndexHunk($change); break; default: $this->didFailParse('Unknown diff type.'); break; } } while ($this->getLine() !== null); $this->didFinishParse(); $this->loadSyntheticData(); return $this->changes; } protected function tryMatchHeader($patterns, $line, &$match) { foreach ($patterns as $pattern) { if (preg_match('@^'.$pattern.'$@', $line, $match)) { return true; } } return false; } protected function parseCommitMessage(ArcanistDiffChange $change) { $change->setType(ArcanistDiffChangeType::TYPE_MESSAGE); $message = array(); $line = $this->getLine(); if (preg_match('/^Merge: /', $line)) { $this->nextLine(); } $line = $this->getLine(); if (!preg_match('/^Author: /', $line)) { $this->didFailParse("Expected 'Author:'."); } $line = $this->nextLine(); if (!preg_match('/^Date: /', $line)) { $this->didFailParse("Expected 'Date:'."); } while (($line = $this->nextLineTrimmed()) !== null) { if (strlen($line) && $line[0] != ' ') { break; } // Strip leading spaces from Git commit messages. Note that empty lines // are represented as just "\n"; don't touch those. $message[] = preg_replace('/^ /', '', $this->getLine()); } $message = rtrim(implode('', $message), "\r\n"); $change->setMetadata('message', $message); } /** * Parse an SVN property change hunk. These hunks are ambiguous so just sort * of try to get it mostly right. It's entirely possible to foil this parser * (or any other parser) with a carefully constructed property change. */ protected function parsePropertyHunk(ArcanistDiffChange $change) { $line = $this->getLineTrimmed(); if (!preg_match('/^_+$/', $line)) { $this->didFailParse("Expected '______________________'."); } $line = $this->nextLine(); while ($line !== null) { $done = preg_match('/^(Index|Property changes on):/', $line); if ($done) { break; } // NOTE: Before 1.5, SVN uses "Name". At 1.5 and later, SVN uses // "Modified", "Added" and "Deleted". $matches = null; $ok = preg_match( '/^(Name|Modified|Added|Deleted): (.*)$/', $line, $matches); if (!$ok) { $this->didFailParse( "Expected 'Name', 'Added', 'Deleted', or 'Modified'."); } $op = $matches[1]; $prop = $matches[2]; list($old, $new) = $this->parseSVNPropertyChange($op, $prop); if ($old !== null) { $change->setOldProperty($prop, $old); } if ($new !== null) { $change->setNewProperty($prop, $new); } $line = $this->getLine(); } } private function parseSVNPropertyChange($op, $prop) { - $old = array(); $new = array(); $target = null; $line = $this->nextLine(); $prop_index = 2; while ($line !== null) { $done = preg_match( '/^(Modified|Added|Deleted|Index|Property changes on):/', $line); if ($done) { break; } $trimline = ltrim($line); if ($trimline && $trimline[0] == '#') { // in svn1.7, a line like ## -0,0 +1 ## is put between the Added: line // and the line with the property change. If we have such a line, we'll // just ignore it (: $line = $this->nextLine(); $prop_index = 1; $trimline = ltrim($line); } if ($trimline && $trimline[0] == '+') { if ($op == 'Deleted') { $this->didFailParse('Unexpected "+" section in property deletion.'); } $target = 'new'; $line = substr($trimline, $prop_index); } else if ($trimline && $trimline[0] == '-') { if ($op == 'Added') { $this->didFailParse('Unexpected "-" section in property addition.'); } $target = 'old'; $line = substr($trimline, $prop_index); } else if (!strncmp($trimline, 'Merged', 6)) { if ($op == 'Added') { $target = 'new'; } else { // These can appear on merges. No idea how to interpret this (unclear // what the old / new values are) and it's of dubious usefulness so // just throw it away until someone complains. $target = null; } $line = $trimline; } if ($target == 'new') { $new[] = $line; } else if ($target == 'old') { $old[] = $line; } $line = $this->nextLine(); } $old = rtrim(implode('', $old)); $new = rtrim(implode('', $new)); if (!strlen($old)) { $old = null; } if (!strlen($new)) { $new = null; } return array($old, $new); } protected function setIsGit($git) { if ($this->isGit !== null && $this->isGit != $git) { throw new Exception('Git status has changed!'); } $this->isGit = $git; return $this; } protected function getIsGit() { return $this->isGit; } public function setIsMercurial($is_mercurial) { $this->isMercurial = $is_mercurial; return $this; } public function getIsMercurial() { return $this->isMercurial; } protected function parseIndexHunk(ArcanistDiffChange $change) { $is_git = $this->getIsGit(); $is_mercurial = $this->getIsMercurial(); $is_svn = (!$is_git && !$is_mercurial); $move_source = null; $line = $this->getLine(); if ($is_git) { do { $patterns = array( '(?Pnew) file mode (?P\d+)', '(?Pdeleted) file mode (?P\d+)', // These occur when someone uses `chmod` on a file. 'old mode (?P\d+)', 'new mode (?P\d+)', // These occur when you `mv` a file and git figures it out. 'similarity index ', 'rename from (?P.*)', '(?Prename) to (?P.*)', 'copy from (?P.*)', '(?Pcopy) to (?P.*)' ); $ok = false; $match = null; foreach ($patterns as $pattern) { $ok = preg_match('@^'.$pattern.'@', $line, $match); if ($ok) { break; } } if (!$ok) { if ($line === null || preg_match('/^(diff --git|commit) /', $line)) { // In this case, there are ONLY file mode changes, or this is a // pure move. If it's a move, flag these changesets so we can build // synthetic changes later, enabling us to show file contents in // Differential -- git only gives us a block like this: // // diff --git a/README b/READYOU // similarity index 100% // rename from README // rename to READYOU // // ...i.e., there is no associated diff. // This allows us to distinguish between property changes only // and actual moves. For property changes only, we can't currently // build a synthetic diff correctly, so just skip it. // TODO: Build synthetic diffs for property changes, too. if ($change->getType() != ArcanistDiffChangeType::TYPE_CHANGE) { $change->setNeedsSyntheticGitHunks(true); if ($move_source) { $move_source->setNeedsSyntheticGitHunks(true); } } return; } break; } if (!empty($match['oldmode'])) { $change->setOldProperty('unix:filemode', $match['oldmode']); } if (!empty($match['newmode'])) { $change->setNewProperty('unix:filemode', $match['newmode']); } if (!empty($match['deleted'])) { $change->setType(ArcanistDiffChangeType::TYPE_DELETE); } if (!empty($match['new'])) { // If you replace a symlink with a normal file, git renders the change // as a "delete" of the symlink plus an "add" of the new file. We // prefer to represent this as a change. if ($change->getType() == ArcanistDiffChangeType::TYPE_DELETE) { $change->setType(ArcanistDiffChangeType::TYPE_CHANGE); } else { $change->setType(ArcanistDiffChangeType::TYPE_ADD); } } if (!empty($match['old'])) { $match['old'] = self::unescapeFilename($match['old']); $change->setOldPath($match['old']); } if (!empty($match['cur'])) { $match['cur'] = self::unescapeFilename($match['cur']); $change->setCurrentPath($match['cur']); } if (!empty($match['copy'])) { $change->setType(ArcanistDiffChangeType::TYPE_COPY_HERE); $old = $this->buildChange($change->getOldPath()); $type = $old->getType(); if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else { $old->setType(ArcanistDiffChangeType::TYPE_COPY_AWAY); } $old->addAwayPath($change->getCurrentPath()); } if (!empty($match['move'])) { $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE); $old = $this->buildChange($change->getOldPath()); $type = $old->getType(); if ($type == ArcanistDiffChangeType::TYPE_MULTICOPY) { // Great, no change. } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else if ($type == ArcanistDiffChangeType::TYPE_COPY_AWAY) { $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY); } else { $old->setType(ArcanistDiffChangeType::TYPE_MOVE_AWAY); } // We'll reference this above. $move_source = $old; $old->addAwayPath($change->getCurrentPath()); } $line = $this->nextNonemptyLine(); } while (true); } $line = $this->getLine(); if ($is_svn) { $ok = preg_match('/^=+\s*$/', $line); if (!$ok) { $this->didFailParse("Expected '=======================' divider line."); } else { // Adding an empty file in SVN can produce an empty line here. $line = $this->nextNonemptyLine(); } } else if ($is_git) { $ok = preg_match('/^index .*$/', $line); if (!$ok) { // TODO: "hg diff -g" diffs ("mercurial git-style diffs") do not include // this line, so we can't parse them if we fail on it. Maybe introduce // a flag saying "parse this diff using relaxed git-style diff rules"? // $this->didFailParse("Expected 'index af23f...a98bc' header line."); } else { // NOTE: In the git case, where this patch is the last change in the // file, we may have a final terminal newline. Skip over it so that // we'll hit the '$line === null' block below. This is covered by the // 'git-empty-file.gitdiff' test case. $line = $this->nextNonemptyLine(); } } // If there are files with only whitespace changes and -b or -w are // supplied as command-line flags to `diff', svn and git both produce // changes without any body. if ($line === null || preg_match( '/^(Index:|Property changes on:|diff --git|commit) /', $line)) { return; } $is_binary_add = preg_match( '/^Cannot display: file marked as a binary type\.$/', rtrim($line)); if ($is_binary_add) { $this->nextLine(); // Cannot display: file marked as a binary type. $this->nextNonemptyLine(); // svn:mime-type = application/octet-stream $this->markBinary($change); return; } // We can get this in git, or in SVN when a file exists in the repository // WITHOUT a binary mime-type and is changed and given a binary mime-type. $is_binary_diff = preg_match( '/^(Binary files|Files) .* and .* differ$/', rtrim($line)); if ($is_binary_diff) { $this->nextNonemptyLine(); // Binary files x and y differ $this->markBinary($change); return; } // This occurs under "hg diff --git" when a binary file is removed. See // test case "hg-binary-delete.hgdiff". (I believe it never occurs under // git, which reports the "files X and /dev/null differ" string above. Git // can not apply these patches.) $is_hg_binary_delete = preg_match( '/^Binary file .* has changed$/', rtrim($line)); if ($is_hg_binary_delete) { $this->nextNonemptyLine(); $this->markBinary($change); return; } // With "git diff --binary" (not a normal mode, but one users may explicitly // invoke and then, e.g., copy-paste into the web console) or "hg diff // --git" (normal under hg workflows), we may encounter a literal binary // patch. $is_git_binary_patch = preg_match( '/^GIT binary patch$/', rtrim($line)); if ($is_git_binary_patch) { $this->nextLine(); $this->parseGitBinaryPatch(); $line = $this->getLine(); if (preg_match('/^literal/', $line)) { // We may have old/new binaries (change) or just a new binary (hg add). // If there are two blocks, parse both. $this->parseGitBinaryPatch(); } $this->markBinary($change); return; } if ($is_git) { // "git diff -b" ignores whitespace, but has an empty hunk target if (preg_match('@^diff --git .*$@', $line)) { $this->nextLine(); return null; } } if ($this->isRCS) { // Skip the RCS headers. $this->nextLine(); $this->nextLine(); $this->nextLine(); } $old_file = $this->parseHunkTarget(); $new_file = $this->parseHunkTarget(); if ($this->isRCS) { $change->setCurrentPath($new_file); } $change->setOldPath($old_file); $this->parseChangeset($change); } private function parseGitBinaryPatch() { // TODO: We could decode the patches, but it's a giant mess so don't bother // for now. We'll pick up the data from the working copy in the common // case ("arc diff"). $line = $this->getLine(); if (!preg_match('/^literal /', $line)) { $this->didFailParse("Expected 'literal NNNN' to start git binary patch."); } do { $line = $this->nextLineTrimmed(); if ($line === '' || $line === null) { // Some versions of Mercurial apparently omit the terminal newline, // although it's unclear if Git will ever do this. In either case, // rely on the base85 check for sanity. $this->nextNonemptyLine(); return; } else if (!preg_match('/^[a-zA-Z]/', $line)) { $this->didFailParse('Expected base85 line length character (a-zA-Z).'); } } while (true); } protected function parseHunkTarget() { $line = $this->getLine(); $matches = null; $remainder = '(?:\s*\(.*\))?'; if ($this->getIsMercurial()) { // Something like "Fri Aug 26 01:20:50 2005 -0700", don't bother trying // to parse it. $remainder = '\t.*'; } else if ($this->isRCS) { $remainder = '\s.*'; } $ok = preg_match( '@^[-+]{3} (?:[ab]/)?(?P.*?)'.$remainder.'$@', $line, $matches); if (!$ok) { $this->didFailParse( "Expected hunk target '+++ path/to/file.ext (revision N)'."); } $this->nextLine(); return $matches['path']; } protected function markBinary(ArcanistDiffChange $change) { $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); return $this; } protected function parseChangeset(ArcanistDiffChange $change) { $all_changes = array(); do { $hunk = new ArcanistDiffHunk(); $line = $this->getLineTrimmed(); $real = array(); // In the case where only one line is changed, the length is omitted. // The final group is for git, which appends a guess at the function // context to the diff. $matches = null; $ok = preg_match( '/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?: .*?)?$/U', $line, $matches); if (!$ok) { // It's possible we hit the style of an svn1.7 property change. // This is a 4-line Index block, followed by an empty line, followed // by a "Property changes on:" section similar to svn1.6. if ($line == '') { $line = $this->nextNonemptyLine(); $ok = preg_match('/^Property changes on:/', $line); if (!$ok) { $this->didFailParse('Confused by empty line'); } $line = $this->nextLine(); return $this->parsePropertyHunk($change); } $this->didFailParse("Expected hunk header '@@ -NN,NN +NN,NN @@'."); } $hunk->setOldOffset($matches[1]); $hunk->setNewOffset($matches[3]); // Cover for the cases where length wasn't present (implying one line). $old_len = idx($matches, 2); if (!strlen($old_len)) { $old_len = 1; } $new_len = idx($matches, 4); if (!strlen($new_len)) { $new_len = 1; } $hunk->setOldLength($old_len); $hunk->setNewLength($new_len); $add = 0; $del = 0; $hit_next_hunk = false; while ((($line = $this->nextLine()) !== null)) { if (strlen(rtrim($line, "\r\n"))) { $char = $line[0]; } else { // Normally, we do not encouter empty lines in diffs, because // unchanged lines have an initial space. However, in Git, with // the option `diff.suppress-blank-empty` set, unchanged blank lines // emit as completely empty. If we encounter a completely empty line, // treat it as a ' ' (i.e., unchanged empty line) line. $char = ' '; } switch ($char) { case '\\': if (!preg_match('@\\ No newline at end of file@', $line)) { $this->didFailParse( "Expected '\ No newline at end of file'."); } if ($new_len) { $real[] = $line; $hunk->setIsMissingOldNewline(true); } else { $real[] = $line; $hunk->setIsMissingNewNewline(true); } if (!$new_len) { break 2; } break; case '+': ++$add; --$new_len; $real[] = $line; break; case '-': if (!$old_len) { // In this case, we've hit "---" from a new file. So don't // advance the line cursor. $hit_next_hunk = true; break 2; } ++$del; --$old_len; $real[] = $line; break; case ' ': if (!$old_len && !$new_len) { break 2; } --$old_len; --$new_len; $real[] = $line; break; default: // We hit something, likely another hunk. $hit_next_hunk = true; break 2; } } if ($old_len || $new_len) { $this->didFailParse('Found the wrong number of hunk lines.'); } $corpus = implode('', $real); $is_binary = false; if ($this->detectBinaryFiles) { $is_binary = !phutil_is_utf8($corpus); $try_encoding = $this->tryEncoding; if ($is_binary && $try_encoding) { $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus); if (!$is_binary) { $corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding); if (!phutil_is_utf8($corpus)) { throw new Exception( "Failed to convert a hunk from '{$try_encoding}' to UTF-8. ". "Check that the specified encoding is correct."); } } } } if ($is_binary) { // SVN happily treats binary files which aren't marked with the right // mime type as text files. Detect that junk here and mark the file // binary. We'll catch stuff with unicode too, but that's verboten // anyway. If there are too many false positives with this we might // need to make it threshold-triggered instead of triggering on any // unprintable byte. $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); } else { $hunk->setCorpus($corpus); $hunk->setAddLines($add); $hunk->setDelLines($del); $change->addHunk($hunk); } if (!$hit_next_hunk) { $line = $this->nextNonemptyLine(); } } while (preg_match('/^@@ /', $line)); } protected function buildChange($path = null) { $change = null; if ($path !== null) { if (!empty($this->changes[$path])) { return $this->changes[$path]; } } if ($this->forcePath) { return $this->changes[$this->forcePath]; } $change = new ArcanistDiffChange(); if ($path !== null) { $change->setCurrentPath($path); $this->changes[$path] = $change; } else { $this->changes[] = $change; } return $change; } protected function didStartParse($text) { $this->rawDiff = $text; // Eat leading whitespace. This may happen if the first change in the diff // is an SVN property change. $text = ltrim($text); // Try to strip ANSI color codes from colorized diffs. ANSI color codes // might be present in two cases: // // - You piped a colorized diff into 'arc --raw' or similar (normally // we're able to disable colorization on diffs we control the generation // of). // - You're diffing a file which actually contains ANSI color codes. // // The former is vastly more likely, but we try to distinguish between the // two cases by testing for a color code at the beginning of a line. If // we find one, we know it's a colorized diff (since the beginning of the // line should be "+", "-" or " " if the code is in the diff text). // // While it's possible a diff might be colorized and fail this test, it's // unlikely, and it covers hg's color extension which seems to be the most // stubborn about colorizing text despite stdout not being a TTY. // // We might incorrectly strip color codes from a colorized diff of a text // file with color codes inside it, but this case is stupid and pathological // and you've dug your own grave. $ansi_color_pattern = '\x1B\[[\d;]*m'; if (preg_match('/^'.$ansi_color_pattern.'/m', $text)) { $text = preg_replace('/'.$ansi_color_pattern.'/', '', $text); } $this->text = phutil_split_lines($text); $this->line = 0; } protected function getLine() { if ($this->text === null) { throw new Exception('Not parsing!'); } if (isset($this->text[$this->line])) { return $this->text[$this->line]; } return null; } protected function getLineTrimmed() { $line = $this->getLine(); if ($line !== null) { $line = trim($line, "\r\n"); } return $line; } protected function nextLine() { $this->line++; return $this->getLine(); } protected function nextLineTrimmed() { $line = $this->nextLine(); if ($line !== null) { $line = trim($line, "\r\n"); } return $line; } protected function nextNonemptyLine() { while (($line = $this->nextLine()) !== null) { if (strlen(trim($line)) !== 0) { break; } } return $this->getLine(); } protected function nextLineThatLooksLikeDiffStart() { while (($line = $this->nextLine()) !== null) { if (preg_match('/^\s*diff\s+-(?:r|-git)/', $line)) { break; } } return $this->getLine(); } protected function saveLine() { $this->lineSaved = $this->line; } protected function restoreLine() { $this->line = $this->lineSaved; } protected function isFirstNonEmptyLine() { $len = count($this->text); for ($ii = 0; $ii < $len; $ii++) { $line = $this->text[$ii]; if (!strlen(trim($line))) { // This line is empty, skip it. continue; } if (preg_match('/^#/', $line)) { // This line is a comment, skip it. continue; } return ($ii == $this->line); } // Entire file is empty. return false; } protected function didFinishParse() { $this->text = null; } public function setWriteDiffOnFailure($write) { $this->writeDiffOnFailure = $write; return $this; } protected function didFailParse($message) { $context = 5; $min = max(0, $this->line - $context); $max = min($this->line + $context, count($this->text) - 1); $context = ''; for ($ii = $min; $ii <= $max; $ii++) { $context .= sprintf( '%8.8s %6.6s %s', ($ii == $this->line) ? '>>> ' : '', $ii + 1, $this->text[$ii]); } $out = array(); $out[] = "Diff Parse Exception: {$message}"; if ($this->writeDiffOnFailure) { $temp = new TempFile(); $temp->setPreserveFile(true); Filesystem::writeFile($temp, $this->rawDiff); $out[] = 'Raw input file was written to: '.(string)$temp; } $out[] = $context; $out = implode("\n\n", $out); throw new Exception($out); } /** * Unescape escaped filenames, e.g. from "git diff". */ private static function unescapeFilename($name) { if (preg_match('/^".+"$/', $name)) { return stripcslashes(substr($name, 1, -1)); } else { return $name; } } private function loadSyntheticData() { if (!$this->changes) { return; } $repository_api = $this->repositoryAPI; if (!$repository_api) { return; } $imagechanges = array(); $changes = $this->changes; foreach ($changes as $change) { $path = $change->getCurrentPath(); // Certain types of changes (moves and copies) don't contain change data // when expressed in raw "git diff" form. Augment any such diffs with // textual data. if ($change->getNeedsSyntheticGitHunks() && ($repository_api instanceof ArcanistGitAPI)) { $diff = $repository_api->getRawDiffText($path, $moves = false); // NOTE: We're reusing the parser and it doesn't reset change state // between parses because there's an oddball SVN workflow in Phabricator // which relies on being able to inject changes. // TODO: Fix this. $parser = clone $this; $parser->setChanges(array()); $raw_changes = $parser->parseDiff($diff); foreach ($raw_changes as $raw_change) { if ($raw_change->getCurrentPath() == $path) { $change->setFileType($raw_change->getFileType()); foreach ($raw_change->getHunks() as $hunk) { // Git thinks that this file has been added. But we know that it // has been moved or copied without a change. $hunk->setCorpus( preg_replace('/^\+/m', ' ', $hunk->getCorpus())); $change->addHunk($hunk); } break; } } $change->setNeedsSyntheticGitHunks(false); } if ($change->getFileType() != ArcanistDiffChangeType::FILE_BINARY && $change->getFileType() != ArcanistDiffChangeType::FILE_IMAGE) { continue; } $imagechanges[$path] = $change; } // Fetch the actual file contents in batches so repositories // that have slow random file accesses (i.e. mercurial) can // optimize the retrieval. $paths = array_keys($imagechanges); $filedata = $repository_api->getBulkOriginalFileData($paths); foreach ($filedata as $path => $data) { $imagechanges[$path]->setOriginalFileData($data); } $filedata = $repository_api->getBulkCurrentFileData($paths); foreach ($filedata as $path => $data) { $imagechanges[$path]->setCurrentFileData($data); } $this->changes = $changes; } /** * Strip prefixes off paths from `git diff`. By default git uses a/ and b/, * but you can set `diff.mnemonicprefix` to get a different set of prefixes, * or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to * other arbitrary values. * * We strip the default and mnemonic prefixes, and trust the user knows what * they're doing in the other cases. * * @param string Path to strip. * @return string Stripped path. */ public static function stripGitPathPrefix($path) { static $regex; if ($regex === null) { $prefixes = array( // These are the defaults. 'a/', 'b/', // These show up when you set "diff.mnemonicprefix". 'i/', 'c/', 'w/', 'o/', '1/', '2/', ); foreach ($prefixes as $key => $prefix) { $prefixes[$key] = preg_quote($prefix, '@'); } $regex = '@^('.implode('|', $prefixes).')@S'; } return preg_replace($regex, '', $path); } /** * Split the paths on a "diff --git" line into old and new paths. This * is difficult because they may be ambiguous if the files contain spaces. * * @param string Text from a diff line after "diff --git ". * @return pair Old and new paths. */ public static function splitGitDiffPaths($paths) { $matches = null; $paths = rtrim($paths, "\r\n"); $patterns = array( // Try quoted paths, used for unicode filenames or filenames with quotes. '@^(?P"(?:\\\\.|[^"\\\\]+)+") (?P"(?:\\\\.|[^"\\\\]+)+")$@', // Try paths without spaces. '@^(?P[^ ]+) (?P[^ ]+)$@', // Try paths with well-known prefixes. '@^(?P[abicwo12]/.*) (?P[abicwo12]/.*)$@', // Try the exact same string twice in a row separated by a space. // This can hit a false positive for moves from files like "old file old" // to "file", but such a case combined with custom diff prefixes is // incredibly obscure. '@^(?P.*) (?P\\1)$@', ); foreach ($patterns as $pattern) { if (preg_match($pattern, $paths, $matches)) { break; } } if (!$matches) { throw new Exception( "Input diff contains ambiguous line 'diff --git {$paths}'. This line ". "is ambiguous because there are spaces in the file names, so the ". "parser can not determine where the file names begin and end. To ". "resolve this ambiguity, use standard prefixes ('a/' and 'b/') when ". "generating diffs."); } $old = $matches['old']; $old = self::unescapeFilename($old); $old = self::stripGitPathPrefix($old); $new = $matches['new']; $new = self::unescapeFilename($new); $new = self::stripGitPathPrefix($new); return array($old, $new); } /** * Strip the header and footer off a `git-format-patch` diff. * * Returns a parseable normal diff and a textual commit message. */ private function stripGitFormatPatch($diff) { - // We can parse this by splitting it into two pieces over and over again // along different section dividers: // // 1. Mail headers. // 2. ("\n\n") // 3. Mail body. // 4. ("---") // 5. Diff stat section. // 6. ("\n\n") // 7. Actual diff body. // 8. ("--") // 9. Patch footer. list($head, $tail) = preg_split('/^---$/m', $diff, 2); list($mail_headers, $mail_body) = explode("\n\n", $head, 2); list($body, $foot) = preg_split('/^-- ?$/m', $tail, 2); list($stat, $diff) = explode("\n\n", $body, 2); // Rebuild the commit message by putting the subject line back on top of it, // if we can find one. $matches = null; $pattern = '/^Subject: (?:\[PATCH\] )?(.*)$/mi'; if (preg_match($pattern, $mail_headers, $matches)) { $mail_body = $matches[1]."\n\n".$mail_body; $mail_body = rtrim($mail_body); } return array($mail_body, $diff); } } diff --git a/src/parser/__tests__/ArcanistDiffParserTestCase.php b/src/parser/__tests__/ArcanistDiffParserTestCase.php index 214b81bc..965468da 100644 --- a/src/parser/__tests__/ArcanistDiffParserTestCase.php +++ b/src/parser/__tests__/ArcanistDiffParserTestCase.php @@ -1,676 +1,674 @@ parseDiff($root.$file); } } private function parseDiff($diff_file) { $contents = Filesystem::readFile($diff_file); $file = basename($diff_file); $parser = new ArcanistDiffParser(); $changes = $parser->parseDiff($contents); switch ($file) { case 'colorized.hggitdiff': $this->assertEqual(1, count($changes)); break; case 'basic-missing-both-newlines-plus.udiff': case 'basic-missing-both-newlines.udiff': case 'basic-missing-new-newline-plus.udiff': case 'basic-missing-new-newline.udiff': case 'basic-missing-old-newline-plus.udiff': case 'basic-missing-old-newline.udiff': $expect_old = strpos($file, '-old-') || strpos($file, '-both-'); $expect_new = strpos($file, '-new-') || strpos($file, '-both-'); $expect_two = strpos($file, '-plus'); $this->assertEqual(count($changes), $expect_two ? 2 : 1); $change = reset($changes); $this->assertTrue($change !== null); $hunks = $change->getHunks(); $this->assertEqual(1, count($hunks)); $hunk = reset($hunks); $this->assertEqual((bool)$expect_old, $hunk->getIsMissingOldNewline()); $this->assertEqual((bool)$expect_new, $hunk->getIsMissingNewNewline()); break; case 'basic-binary.udiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); break; case 'basic-multi-hunk.udiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $hunks = $change->getHunks(); $this->assertEqual(4, count($hunks)); $this->assertEqual('right', $change->getCurrentPath()); $this->assertEqual('left', $change->getOldPath()); break; case 'basic-multi-hunk-content.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $hunks = $change->getHunks(); $this->assertEqual(2, count($hunks)); $there_is_a_literal_trailing_space_here = ' '; $corpus_0 = <<assertEqual( $corpus_0, $hunks[0]->getCorpus()); $this->assertEqual( $corpus_1, $hunks[1]->getCorpus()); break; case 'svn-ignore-whitespace-only.svndiff': $this->assertEqual(2, count($changes)); $hunks = reset($changes)->getHunks(); $this->assertEqual(0, count($hunks)); break; case 'svn-property-add.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $hunks = reset($changes)->getHunks(); $this->assertEqual(1, count($hunks)); $this->assertEqual( array( 'duck' => 'quack', ), $change->getNewProperties()); break; case 'svn-property-modify.svndiff': $this->assertEqual(2, count($changes)); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( array( 'svn:ignore' => '*.phpz', ), $change->getOldProperties()); $this->assertEqual( array( 'svn:ignore' => '*.php', ), $change->getNewProperties()); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( array( 'svn:special' => '*', ), $change->getOldProperties()); $this->assertEqual( array( 'svn:special' => 'moo', ), $change->getNewProperties()); break; case 'svn-property-delete.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( $change->getOldProperties(), array( 'svn:special' => '*', )); $this->assertEqual( array( ), $change->getNewProperties()); break; case 'svn-property-merged.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual(count($change->getHunks()), 0); $this->assertEqual( $change->getOldProperties(), array()); $this->assertEqual( $change->getNewProperties(), array()); break; case 'svn-property-merge.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual(count($change->getHunks()), 0); $this->assertEqual( $change->getOldProperties(), array( )); $this->assertEqual( $change->getNewProperties(), array( 'svn:mergeinfo' => <<assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual(count($change->getHunks()), 0); $this->assertEqual( $change->getOldProperties(), array( )); $this->assertEqual( $change->getNewProperties(), array( 'svn:executable' => '*', )); break; case 'svn-binary-add.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( array( 'svn:mime-type' => 'application/octet-stream', ), $change->getNewProperties()); break; case 'svn-binary-diff.svndiff': case 'svn-binary-diff-freebsd.svndiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); $this->assertEqual(count($change->getHunks()), 0); break; case 'git-delete-file.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_DELETE, $change->getType()); $this->assertEqual( 'scripts/intern/test/testfile2', $change->getCurrentPath()); $this->assertEqual(1, count($change->getHunks())); break; case 'git-binary-change.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); $this->assertEqual(0, count($change->getHunks())); break; case 'git-filemode-change.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual(1, count($change->getHunks())); $this->assertEqual( array( 'unix:filemode' => '100644', ), $change->getOldProperties()); $this->assertEqual( array( 'unix:filemode' => '100755', ), $change->getNewProperties()); break; case 'git-filemode-change-only.gitdiff': $this->assertEqual(count($changes), 2); $change = reset($changes); $this->assertEqual(count($change->getHunks()), 0); $this->assertEqual( array( 'unix:filemode' => '100644', ), $change->getOldProperties()); $this->assertEqual( array( 'unix:filemode' => '100755', ), $change->getNewProperties()); break; case 'svn-empty-file.svndiff': $this->assertEqual(2, count($changes)); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); break; case 'git-ignore-whitespace-only.gitdiff': $this->assertEqual(count($changes), 2); $change = array_shift($changes); $this->assertEqual(count($change->getHunks()), 0); $this->assertEqual( $change->getOldPath(), 'scripts/intern/test/testfile2'); $this->assertEqual( $change->getCurrentPath(), 'scripts/intern/test/testfile2'); $change = array_shift($changes); $this->assertEqual(count($change->getHunks()), 1); $this->assertEqual( $change->getOldPath(), 'scripts/intern/test/testfile3'); $this->assertEqual( $change->getCurrentPath(), 'scripts/intern/test/testfile3'); break; case 'git-move.gitdiff': case 'git-move-edit.gitdiff': case 'git-move-plus.gitdiff': $extra_changeset = (bool)strpos($file, '-plus'); $has_hunk = (bool)strpos($file, '-edit'); $this->assertEqual($extra_changeset ? 3 : 2, count($changes)); $change = array_shift($changes); $this->assertEqual($has_hunk ? 1 : 0, count($change->getHunks())); $this->assertEqual( $change->getType(), ArcanistDiffChangeType::TYPE_MOVE_HERE); $target = $change; $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( ArcanistDiffChangeType::TYPE_MOVE_AWAY, $change->getType()); $this->assertEqual( $change->getCurrentPath(), $target->getOldPath()); $this->assertTrue( in_array($target->getCurrentPath(), $change->getAwayPaths())); break; case 'git-merge-header.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_MESSAGE, $change->getType()); $this->assertEqual( '501f6d519703458471dbea6284ec5f49d1408598', $change->getCommitHash()); break; case 'git-new-file.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_ADD, $change->getType()); break; case 'git-copy.gitdiff': $this->assertEqual(2, count($changes)); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( ArcanistDiffChangeType::TYPE_COPY_HERE, $change->getType()); $this->assertEqual( 'flib/intern/widgets/ui/UIWidgetRSSBox.php', $change->getCurrentPath()); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( ArcanistDiffChangeType::TYPE_COPY_AWAY, $change->getType()); $this->assertEqual( 'lib/display/intern/ui/widget/UIWidgetRSSBox.php', $change->getCurrentPath()); break; case 'git-copy-plus.gitdiff': $this->assertEqual(2, count($changes)); $change = array_shift($changes); $this->assertEqual(3, count($change->getHunks())); $this->assertEqual( ArcanistDiffChangeType::TYPE_COPY_HERE, $change->getType()); $this->assertEqual( 'flib/intern/widgets/ui/UIWidgetGraphConnect.php', $change->getCurrentPath()); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( ArcanistDiffChangeType::TYPE_COPY_AWAY, $change->getType()); $this->assertEqual( 'lib/display/intern/ui/widget/UIWidgetLunchtime.php', $change->getCurrentPath()); break; case 'svn-property-multiline.svndiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual(0, count($change->getHunks())); $this->assertEqual( array( 'svn:ignore' => 'tags', ), $change->getOldProperties()); $this->assertEqual( array( 'svn:ignore' => "tags\nasdf\nlol\nwhat", ), $change->getNewProperties()); break; case 'git-empty-files.gitdiff': $this->assertEqual(2, count($changes)); while ($change = array_shift($changes)) { $this->assertEqual(0, count($change->getHunks())); } break; case 'git-mnemonicprefix.gitdiff': // Check parsing of diffs created with `diff.mnemonicprefix` // configuration option set to `true`. $this->assertEqual(1, count($changes)); $this->assertEqual(1, count(reset($changes)->getHunks())); break; case 'git-commit.gitdiff': case 'git-commit-logdecorate.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_MESSAGE, $change->getType()); $this->assertEqual( '76e2f1339c298c748aa0b52030799ed202a6537b', $change->getCommitHash()); $this->assertEqual( <<. I tested most of these calls, but there were some that I didn't know how to reach, so if you are one of the owners of this code, please test your feature in my sandbox: www.ngao.devrs013.facebook.com @brosenthal, I removed some logic that was setting a disabled state on a UIActionButton, which is actually a no-op. Reviewed By: brosenthal Other Commenters: sparker, egiovanola Test Plan: www.ngao.devrs013.facebook.com Explicitly tested: * ads creation flow (add keyword) * ads manager (conversion tracking) * help center (create a discussion) * new user wizard (next step button) Revert: OK DiffCamp Revision: 94064 git-svn-id: svn+ssh://tubbs/svnroot/tfb/trunk/www@223593 2c7ba8d8 EOTEXT , $change->getMetadata('message')); break; case 'git-binary.gitdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); break; case 'git-odd-filename.gitdiff': $this->assertEqual(2, count($changes)); $change = reset($changes); $this->assertEqual( 'old/'."\342\210\206".'.jpg', $change->getOldPath()); $this->assertEqual( 'new/'."\342\210\206".'.jpg', $change->getCurrentPath()); break; case 'hg-binary-change.hgdiff': case 'hg-solo-binary-change.hgdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_ADD, $change->getType()); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); break; case 'hg-binary-delete.hgdiff': $this->assertEqual(1, count($changes)); $change = reset($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_DELETE, $change->getType()); $this->assertEqual( ArcanistDiffChangeType::FILE_BINARY, $change->getFileType()); break; case 'git-replace-symlink.gitdiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); break; case 'svn-1.7-property-added.svndiff': $this->assertEqual(1, count($changes)); $change = head($changes); $new_properties = $change->getNewProperties(); $this->assertEqual(2, count($new_properties)); $this->assertEqual('*', idx($new_properties, 'svn:executable')); $this->assertEqual('text/html', idx($new_properties, 'svn:mime-type')); break; case 'hg-diff-range.hgdiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual( 'Test.java', $change->getOldPath()); $this->assertEqual( 'Test.java', $change->getCurrentPath()); break; case 'hg-patch.hgdiff': $this->assertEqual(1, count($changes)); break; case 'hg-patch-git.hgdiff': $this->assertEqual(1, count($changes)); break; case 'custom-prefixes.gitdiff': $this->assertEqual(1, count($changes)); $change = head($changes); $this->assertEqual( 'dst/file', $change->getCurrentPath()); break; case 'more-newlines.svndiff': $this->assertEqual(1, count($changes)); break; case 'suppress-blank-empty.gitdiff': $this->assertEqual(1, count($changes)); break; case 'svn-property-windows.svndiff': $this->assertEqual(1, count($changes)); break; case 'rcs-addline.rcsdiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); break; case 'rcs-deleteline.rcsdiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); break; case 'comment.svndiff': $this->assertEqual(1, count($changes)); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); break; case 'svnlook-basics.svndiff': case 'svnlook-add.svndiff': case 'svnlook-delete.svndiff': case 'svnlook-copied.svndiff': $this->assertEqual(1, count($changes)); break; case 'git-format-patch.gitdiff': $this->assertEqual(2, count($changes)); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_MESSAGE, $change->getType()); $this->assertEqual('WIP', $change->getMetadata('message')); $change = array_shift($changes); $this->assertEqual( ArcanistDiffChangeType::TYPE_CHANGE, $change->getType()); break; default: throw new Exception("No test block for diff file {$diff_file}."); break; } } public function testGitPrefixStripping() { static $tests = array( 'a/file.c' => 'file.c', 'b/file.c' => 'file.c', 'i/file.c' => 'file.c', 'c/file.c' => 'file.c', 'w/file.c' => 'file.c', 'o/file.c' => 'file.c', '1/file.c' => 'file.c', '2/file.c' => 'file.c', 'src/file.c' => 'src/file.c', 'file.c' => 'file.c', ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, ArcanistDiffParser::stripGitPathPrefix($input), "Strip git prefix from '{$input}'."); } } public function testGitPathSplitting() { static $tests = array( 'a/old.c b/new.c' => array('old.c', 'new.c'), "a/old.c b/new.c\n" => array('old.c', 'new.c'), "a/old.c b/new.c\r\n" => array('old.c', 'new.c'), 'old.c new.c' => array('old.c', 'new.c'), '1/old.c 2/new.c' => array('old.c', 'new.c'), '"a/\\"quotes1\\"" "b/\\"quotes2\\""' => array( '"quotes1"', '"quotes2"', ), '"a/\\"quotes and spaces1\\"" "b/\\"quotes and spaces2\\""' => array( '"quotes and spaces1"', '"quotes and spaces2"', ), '"a/\\342\\230\\2031" "b/\\342\\230\\2032"' => array( "\xE2\x98\x831", "\xE2\x98\x832", ), 'a/Core Data/old.c b/Core Data/new.c' => array( 'Core Data/old.c', 'Core Data/new.c', ), 'some file with spaces.c some file with spaces.c' => array( 'some file with spaces.c', 'some file with spaces.c', ), ); foreach ($tests as $input => $expect) { $result = ArcanistDiffParser::splitGitDiffPaths($input); $this->assertEqual( $expect, $result, "Split: {$input}"); } static $ambiguous = array( 'old file with spaces.c new file with spaces.c', ); foreach ($ambiguous as $input) { $caught = null; try { ArcanistDiffParser::splitGitDiffPaths($input); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue( ($caught instanceof Exception), "Ambiguous: {$input}"); } } } diff --git a/src/parser/diff/ArcanistDiffChange.php b/src/parser/diff/ArcanistDiffChange.php index ec9762b3..3b8c04fa 100644 --- a/src/parser/diff/ArcanistDiffChange.php +++ b/src/parser/diff/ArcanistDiffChange.php @@ -1,313 +1,311 @@ originalFileData = $original_file_data; return $this; } public function getOriginalFileData() { return $this->originalFileData; } public function setCurrentFileData($current_file_data) { $this->currentFileData = $current_file_data; return $this; } public function getCurrentFileData() { return $this->currentFileData; } public function toDictionary() { $hunks = array(); foreach ($this->hunks as $hunk) { $hunks[] = $hunk->toDictionary(); } return array( 'metadata' => $this->metadata, 'oldPath' => $this->oldPath, 'currentPath' => $this->currentPath, 'awayPaths' => $this->awayPaths, 'oldProperties' => $this->oldProperties, 'newProperties' => $this->newProperties, 'type' => $this->type, 'fileType' => $this->fileType, 'commitHash' => $this->commitHash, 'hunks' => $hunks, ); } public static function newFromDictionary(array $dict) { $hunks = array(); foreach ($dict['hunks'] as $hunk) { $hunks[] = ArcanistDiffHunk::newFromDictionary($hunk); } $obj = new ArcanistDiffChange(); $obj->metadata = $dict['metadata']; $obj->oldPath = $dict['oldPath']; $obj->currentPath = $dict['currentPath']; // TODO: The backend is shipping down some bogus data, e.g. diff 199453. // Should probably clean this up. $obj->awayPaths = nonempty($dict['awayPaths'], array()); $obj->oldProperties = nonempty($dict['oldProperties'], array()); $obj->newProperties = nonempty($dict['newProperties'], array()); $obj->type = $dict['type']; $obj->fileType = $dict['fileType']; $obj->commitHash = $dict['commitHash']; $obj->hunks = $hunks; return $obj; } public static function newFromConduit(array $dicts) { $changes = array(); foreach ($dicts as $dict) { $changes[] = self::newFromDictionary($dict); } return $changes; } public function getChangedLines($type) { $lines = array(); foreach ($this->hunks as $hunk) { $lines += $hunk->getChangedLines($type); } return $lines; } public function getAllMetadata() { return $this->metadata; } public function setMetadata($key, $value) { $this->metadata[$key] = $value; return $this; } public function getMetadata($key) { return idx($this->metadata, $key); } public function setCommitHash($hash) { $this->commitHash = $hash; return $this; } public function getCommitHash() { return $this->commitHash; } public function addAwayPath($path) { $this->awayPaths[] = $path; return $this; } public function getAwayPaths() { return $this->awayPaths; } public function setFileType($type) { $this->fileType = $type; return $this; } public function getFileType() { return $this->fileType; } public function setType($type) { $this->type = $type; return $this; } public function getType() { return $this->type; } public function setOldProperty($key, $value) { $this->oldProperties[$key] = $value; return $this; } public function setNewProperty($key, $value) { $this->newProperties[$key] = $value; return $this; } public function getOldProperties() { return $this->oldProperties; } public function getNewProperties() { return $this->newProperties; } public function setCurrentPath($path) { $this->currentPath = $this->filterPath($path); return $this; } public function getCurrentPath() { return $this->currentPath; } public function setOldPath($path) { $this->oldPath = $this->filterPath($path); return $this; } public function getOldPath() { return $this->oldPath; } public function addHunk(ArcanistDiffHunk $hunk) { $this->hunks[] = $hunk; return $this; } public function getHunks() { return $this->hunks; } /** * @return array $old => array($new, ) */ public function buildLineMap() { $line_map = array(); $old = 1; $new = 1; foreach ($this->getHunks() as $hunk) { for ($n = $old; $n < $hunk->getOldOffset(); $n++) { $line_map[$n] = array($n + $new - $old); } $old = $hunk->getOldOffset(); $new = $hunk->getNewOffset(); $olds = array(); $news = array(); $lines = explode("\n", $hunk->getCorpus()); foreach ($lines as $line) { $type = substr($line, 0, 1); if ($type == '-' || $type == ' ') { $olds[] = $old; $old++; } if ($type == '+' || $type == ' ') { $news[] = $new; $new++; } if ($type == ' ' || $type == '') { $line_map += array_fill_keys($olds, $news); $olds = array(); $news = array(); } } } return $line_map; } public function convertToBinaryChange(ArcanistRepositoryAPI $api) { // Fill in the binary data from the working copy. $this->setOriginalFileData( $api->getOriginalFileData( $this->getOldPath())); $this->setCurrentFileData( $api->getCurrentFileData( $this->getCurrentPath())); $this->hunks = array(); $this->setFileType(ArcanistDiffChangeType::FILE_BINARY); return $this; } protected function filterPath($path) { if ($path == '/dev/null') { return null; } return $path; } public function renderTextSummary() { $type = $this->getType(); $file = $this->getFileType(); $char = ArcanistDiffChangeType::getSummaryCharacterForChangeType($type); $attr = ArcanistDiffChangeType::getShortNameForFileType($file); if ($attr) { $attr = '('.$attr.')'; } $summary = array(); $summary[] = sprintf( '%s %5.5s %s', $char, $attr, $this->getCurrentPath()); if (ArcanistDiffChangeType::isOldLocationChangeType($type)) { foreach ($this->getAwayPaths() as $path) { $summary[] = ' to: '.$path; } } if (ArcanistDiffChangeType::isNewLocationChangeType($type)) { $summary[] = ' from: '.$this->getOldPath(); } return implode("\n", $summary); } public function getSymlinkTarget() { if ($this->getFileType() != ArcanistDiffChangeType::FILE_SYMLINK) { throw new Exception('Not a symlink!'); } $hunks = $this->getHunks(); $hunk = reset($hunks); $corpus = $hunk->getCorpus(); $match = null; if (!preg_match('/^\+(?:link )?(.*)$/m', $corpus, $match)) { throw new Exception('Failed to extract link target!'); } return trim($match[1]); } public function setNeedsSyntheticGitHunks($needs_synthetic_git_hunks) { $this->needsSyntheticGitHunks = $needs_synthetic_git_hunks; return $this; } public function getNeedsSyntheticGitHunks() { return $this->needsSyntheticGitHunks; } } diff --git a/src/parser/diff/ArcanistDiffChangeType.php b/src/parser/diff/ArcanistDiffChangeType.php index 60dd3fc4..2e513922 100644 --- a/src/parser/diff/ArcanistDiffChangeType.php +++ b/src/parser/diff/ArcanistDiffChangeType.php @@ -1,113 +1,112 @@ 'A', self::TYPE_CHANGE => 'M', self::TYPE_DELETE => 'D', self::TYPE_MOVE_AWAY => 'V', self::TYPE_COPY_AWAY => 'P', self::TYPE_MOVE_HERE => 'V', self::TYPE_COPY_HERE => 'P', self::TYPE_MULTICOPY => 'P', self::TYPE_MESSAGE => 'Q', self::TYPE_CHILD => '@', ); return idx($types, coalesce($type, '?'), '~'); } public static function getShortNameForFileType($type) { static $names = array( self::FILE_TEXT => null, self::FILE_DIRECTORY => 'dir', self::FILE_IMAGE => 'img', self::FILE_BINARY => 'bin', self::FILE_SYMLINK => 'sym', ); return idx($names, coalesce($type, '?'), '???'); } public static function isOldLocationChangeType($type) { static $types = array( ArcanistDiffChangeType::TYPE_MOVE_AWAY => true, ArcanistDiffChangeType::TYPE_COPY_AWAY => true, ArcanistDiffChangeType::TYPE_MULTICOPY => true, ); return isset($types[$type]); } public static function isNewLocationChangeType($type) { static $types = array( ArcanistDiffChangeType::TYPE_MOVE_HERE => true, ArcanistDiffChangeType::TYPE_COPY_HERE => true, ); return isset($types[$type]); } public static function isDeleteChangeType($type) { static $types = array( ArcanistDiffChangeType::TYPE_DELETE => true, ArcanistDiffChangeType::TYPE_MOVE_AWAY => true, ArcanistDiffChangeType::TYPE_MULTICOPY => true, ); return isset($types[$type]); } public static function isCreateChangeType($type) { static $types = array( ArcanistDiffChangeType::TYPE_ADD => true, ArcanistDiffChangeType::TYPE_COPY_HERE => true, ArcanistDiffChangeType::TYPE_MOVE_HERE => true, ); return isset($types[$type]); } public static function isModifyChangeType($type) { static $types = array( ArcanistDiffChangeType::TYPE_CHANGE => true, ); return isset($types[$type]); } public static function getFullNameForChangeType($type) { static $types = array( self::TYPE_ADD => 'Added', self::TYPE_CHANGE => 'Modified', self::TYPE_DELETE => 'Deleted', self::TYPE_MOVE_AWAY => 'Moved Away', self::TYPE_COPY_AWAY => 'Copied Away', self::TYPE_MOVE_HERE => 'Moved Here', self::TYPE_COPY_HERE => 'Copied Here', self::TYPE_MULTICOPY => 'Deleted After Multiple Copy', self::TYPE_MESSAGE => 'Commit Message', self::TYPE_CHILD => 'Contents Modified', ); return idx($types, coalesce($type, '?'), 'Unknown'); } } diff --git a/src/parser/diff/ArcanistDiffHunk.php b/src/parser/diff/ArcanistDiffHunk.php index 78269f43..f47dd242 100644 --- a/src/parser/diff/ArcanistDiffHunk.php +++ b/src/parser/diff/ArcanistDiffHunk.php @@ -1,173 +1,171 @@ $this->oldOffset, 'newOffset' => $this->newOffset, 'oldLength' => $this->oldLength, 'newLength' => $this->newLength, 'addLines' => $this->addLines, 'delLines' => $this->delLines, 'isMissingOldNewline' => $this->isMissingOldNewline, 'isMissingNewNewline' => $this->isMissingNewNewline, 'corpus' => (string)$this->corpus, ); } public static function newFromDictionary(array $dict) { $obj = new ArcanistDiffHunk(); $obj->oldOffset = $dict['oldOffset']; $obj->newOffset = $dict['newOffset']; $obj->oldLength = $dict['oldLength']; $obj->newLength = $dict['newLength']; $obj->addLines = $dict['addLines']; $obj->delLines = $dict['delLines']; $obj->isMissingOldNewline = $dict['isMissingOldNewline']; $obj->isMissingNewNewline = $dict['isMissingNewNewline']; $obj->corpus = $dict['corpus']; return $obj; } public function getChangedLines($type) { $old_map = array(); $new_map = array(); $cover_map = array(); $oline = $this->getOldOffset(); $nline = $this->getNewOffset(); foreach (explode("\n", $this->getCorpus()) as $line) { $char = strlen($line) ? $line[0] : '~'; switch ($char) { case '-': $old_map[$oline] = true; $cover_map[$oline] = true; ++$oline; break; case '+': $new_map[$nline] = true; if ($oline > 1) { $cover_map[$oline - 1] = true; } $cover_map[$oline] = true; ++$nline; break; default: ++$oline; ++$nline; break; } } switch ($type) { case 'new': return $new_map; case 'old': return $old_map; case 'cover': return $cover_map; default: throw new Exception("Unknown line change type '{$type}'."); } } public function setOldOffset($old_offset) { $this->oldOffset = $old_offset; return $this; } public function getOldOffset() { return $this->oldOffset; } public function setNewOffset($new_offset) { $this->newOffset = $new_offset; return $this; } public function getNewOffset() { return $this->newOffset; } public function setOldLength($old_length) { $this->oldLength = $old_length; return $this; } public function getOldLength() { return $this->oldLength; } public function setNewLength($new_length) { $this->newLength = $new_length; return $this; } public function getNewLength() { return $this->newLength; } public function setAddLines($add_lines) { $this->addLines = $add_lines; return $this; } public function getAddLines() { return $this->addLines; } public function setDelLines($del_lines) { $this->delLines = $del_lines; return $this; } public function getDelLines() { return $this->delLines; } public function setCorpus($corpus) { $this->corpus = $corpus; return $this; } public function getCorpus() { return $this->corpus; } public function setIsMissingOldNewline($missing) { $this->isMissingOldNewline = (bool)$missing; return $this; } public function getIsMissingOldNewline() { return $this->isMissingOldNewline; } public function setIsMissingNewNewline($missing) { $this->isMissingNewNewline = (bool)$missing; return $this; } public function getIsMissingNewNewline() { return $this->isMissingNewNewline; } } diff --git a/src/repository/api/ArcanistGitAPI.php b/src/repository/api/ArcanistGitAPI.php index d637fced..5f4d180b 100644 --- a/src/repository/api/ArcanistGitAPI.php +++ b/src/repository/api/ArcanistGitAPI.php @@ -1,1216 +1,1212 @@ setCWD($this->getPath()); return $future; } public function execPassthru($pattern /* , ... */) { $args = func_get_args(); static $git = null; if ($git === null) { if (phutil_is_windows()) { // NOTE: On Windows, phutil_passthru() uses 'bypass_shell' because // everything goes to hell if we don't. We must provide an absolute // path to Git for this to work properly. $git = Filesystem::resolveBinary('git'); $git = csprintf('%s', $git); } else { $git = 'git'; } } $args[0] = $git.' '.$args[0]; return call_user_func_array('phutil_passthru', $args); } public function getSourceControlSystemName() { return 'git'; } public function getMetadataPath() { static $path = null; if ($path === null) { list($stdout) = $this->execxLocal('rev-parse --git-dir'); $path = rtrim($stdout, "\n"); // the output of git rev-parse --git-dir is an absolute path, unless // the cwd is the root of the repository, in which case it uses the // relative path of .git. If we get this relative path, turn it into // an absolute path. if ($path === '.git') { $path = $this->getPath('.git'); } } return $path; } public function getHasCommits() { return !$this->repositoryHasNoCommits; } /** * Tests if a child commit is descendant of a parent commit. * If child and parent are the same, it returns false. * @param Child commit SHA. * @param Parent commit SHA. * @return bool True if the child is a descendant of the parent. */ private function isDescendant($child, $parent) { list($common_ancestor) = $this->execxLocal( 'merge-base %s %s', $child, $parent); $common_ancestor = trim($common_ancestor); return ($common_ancestor == $parent) && ($common_ancestor != $child); } public function getLocalCommitInformation() { if ($this->repositoryHasNoCommits) { // Zero commits. throw new Exception( "You can't get local commit information for a repository with no ". "commits."); } else if ($this->getBaseCommit() == self::GIT_MAGIC_ROOT_COMMIT) { // One commit. $against = 'HEAD'; } else { // 2..N commits. We include commits reachable from HEAD which are // not reachable from the base commit; this is consistent with user // expectations even though it is not actually the diff range. // Particularly: // // | // D <----- master branch // | // C Y <- feature branch // | /| // B X // | / // A // | // // If "A, B, C, D" are master, and the user is at Y, when they run // "arc diff B" they want (and get) a diff of B vs Y, but they think about // this as being the commits X and Y. If we log "B..Y", we only show // Y. With "Y --not B", we show X and Y. if ($this->symbolicHeadCommit !== null) { $base_commit = $this->getBaseCommit(); $resolved_base = $this->resolveCommit($base_commit); $head_commit = $this->symbolicHeadCommit; $resolved_head = $this->getHeadCommit(); if (!$this->isDescendant($resolved_head, $resolved_base)) { // NOTE: Since the base commit will have been resolved as the // merge-base of the specified base and the specified HEAD, we can't // easily tell exactly what's wrong with the range. // For example, `arc diff HEAD --head HEAD^^^` is invalid because it // is reversed, but resolving the commit "HEAD" will compute its // merge-base with "HEAD^^^", which is "HEAD^^^", so the range will // appear empty. throw new ArcanistUsageException( pht( 'The specified commit range is empty, backward or invalid: the '. 'base (%s) is not an ancestor of the head (%s). You can not '. 'diff an empty or reversed commit range.', $base_commit, $head_commit)); } } $against = csprintf( '%s --not %s', $this->getHeadCommit(), $this->getBaseCommit()); } // NOTE: Windows escaping of "%" symbols apparently is inherently broken; // when passed throuhgh escapeshellarg() they are replaced with spaces. // TODO: Learn how cmd.exe works and find some clever workaround? // NOTE: If we use "%x00", output is truncated in Windows. list($info) = $this->execxLocal( phutil_is_windows() ? 'log %C --format=%C --' : 'log %C --format=%s --', $against, // NOTE: "%B" is somewhat new, use "%s%n%n%b" instead. '%H%x01%T%x01%P%x01%at%x01%an%x01%aE%x01%s%x01%s%n%n%b%x02'); $commits = array(); $info = trim($info, " \n\2"); if (!strlen($info)) { return array(); } $info = explode("\2", $info); foreach ($info as $line) { list($commit, $tree, $parents, $time, $author, $author_email, $title, $message) = explode("\1", trim($line), 8); $message = rtrim($message); $commits[$commit] = array( 'commit' => $commit, 'tree' => $tree, 'parents' => array_filter(explode(' ', $parents)), 'time' => $time, 'author' => $author, 'summary' => $title, 'message' => $message, 'authorEmail' => $author_email, ); } return $commits; } protected function buildBaseCommit($symbolic_commit) { if ($symbolic_commit !== null) { if ($symbolic_commit == ArcanistGitAPI::GIT_MAGIC_ROOT_COMMIT) { $this->setBaseCommitExplanation( 'you explicitly specified the empty tree.'); return $symbolic_commit; } list($err, $merge_base) = $this->execManualLocal( 'merge-base %s %s', $symbolic_commit, $this->getHeadCommit()); if ($err) { throw new ArcanistUsageException( "Unable to find any git commit named '{$symbolic_commit}' in ". "this repository."); } if ($this->symbolicHeadCommit === null) { $this->setBaseCommitExplanation( "it is the merge-base of the explicitly specified base commit ". "'{$symbolic_commit}' and HEAD."); } else { $this->setBaseCommitExplanation( "it is the merge-base of the explicitly specified base commit ". "'{$symbolic_commit}' and the explicitly specified head ". "commit '{$this->symbolicHeadCommit}'."); } return trim($merge_base); } // Detect zero-commit or one-commit repositories. There is only one // relative-commit value that makes any sense in these repositories: the // empty tree. list($err) = $this->execManualLocal('rev-parse --verify HEAD^'); if ($err) { list($err) = $this->execManualLocal('rev-parse --verify HEAD'); if ($err) { $this->repositoryHasNoCommits = true; } if ($this->repositoryHasNoCommits) { $this->setBaseCommitExplanation( 'the repository has no commits.'); } else { $this->setBaseCommitExplanation( 'the repository has only one commit.'); } return self::GIT_MAGIC_ROOT_COMMIT; } if ($this->getBaseCommitArgumentRules() || $this->getConfigurationManager()->getConfigFromAnySource('base')) { $base = $this->resolveBaseCommit(); if (!$base) { throw new ArcanistUsageException( "None of the rules in your 'base' configuration matched a valid ". "commit. Adjust rules or specify which commit you want to use ". "explicitly."); } return $base; } $do_write = false; $default_relative = null; $working_copy = $this->getWorkingCopyIdentity(); if ($working_copy) { $default_relative = $working_copy->getProjectConfig( 'git.default-relative-commit'); $this->setBaseCommitExplanation( "it is the merge-base of '{$default_relative}' and HEAD, as ". "specified in 'git.default-relative-commit' in '.arcconfig'. This ". "setting overrides other settings."); } if (!$default_relative) { list($err, $upstream) = $this->execManualLocal( "rev-parse --abbrev-ref --symbolic-full-name '@{upstream}'"); if (!$err) { $default_relative = trim($upstream); $this->setBaseCommitExplanation( "it is the merge-base of '{$default_relative}' (the Git upstream ". "of the current branch) HEAD."); } } if (!$default_relative) { $default_relative = $this->readScratchFile('default-relative-commit'); $default_relative = trim($default_relative); if ($default_relative) { $this->setBaseCommitExplanation( "it is the merge-base of '{$default_relative}' and HEAD, as ". "specified in '.git/arc/default-relative-commit'."); } } if (!$default_relative) { // TODO: Remove the history lesson soon. echo phutil_console_format( "** Select a Default Commit Range **\n\n"); echo phutil_console_wrap( "You're running a command which operates on a range of revisions ". "(usually, from some revision to HEAD) but have not specified the ". "revision that should determine the start of the range.\n\n". "Previously, arc assumed you meant 'HEAD^' when you did not specify ". "a start revision, but this behavior does not make much sense in ". "most workflows outside of Facebook's historic git-svn workflow.\n\n". "arc no longer assumes 'HEAD^'. You must specify a relative commit ". "explicitly when you invoke a command (e.g., `arc diff HEAD^`, not ". "just `arc diff`) or select a default for this working copy.\n\n". "In most cases, the best default is 'origin/master'. You can also ". "select 'HEAD^' to preserve the old behavior, or some other remote ". "or branch. But you almost certainly want to select ". "'origin/master'.\n\n". "(Technically: the merge-base of the selected revision and HEAD is ". "used to determine the start of the commit range.)"); $prompt = 'What default do you want to use? [origin/master]'; $default = phutil_console_prompt($prompt); if (!strlen(trim($default))) { $default = 'origin/master'; } $default_relative = $default; $do_write = true; } list($object_type) = $this->execxLocal( 'cat-file -t %s', $default_relative); if (trim($object_type) !== 'commit') { throw new Exception( "Relative commit '{$default_relative}' is not the name of a commit!"); } if ($do_write) { // Don't perform this write until we've verified that the object is a // valid commit name. $this->writeScratchFile('default-relative-commit', $default_relative); $this->setBaseCommitExplanation( "it is the merge-base of '{$default_relative}' and HEAD, as you ". "just specified."); } list($merge_base) = $this->execxLocal( 'merge-base %s HEAD', $default_relative); return trim($merge_base); } public function getHeadCommit() { if ($this->resolvedHeadCommit === null) { $this->resolvedHeadCommit = $this->resolveCommit( coalesce($this->symbolicHeadCommit, 'HEAD')); } return $this->resolvedHeadCommit; } final public function setHeadCommit($symbolic_commit) { $this->symbolicHeadCommit = $symbolic_commit; $this->reloadCommitRange(); return $this; } /** * Translates a symbolic commit (like "HEAD^") to a commit identifier. * @param string_symbol commit. * @return string the commit SHA. */ private function resolveCommit($symbolic_commit) { list($err, $commit_hash) = $this->execManualLocal( 'rev-parse %s', $symbolic_commit); if ($err) { throw new ArcanistUsageException( "Unable to find any git commit named '{$symbolic_commit}' in ". "this repository."); } return trim($commit_hash); } private function getDiffFullOptions($detect_moves_and_renames = true) { $options = array( self::getDiffBaseOptions(), '--no-color', '--src-prefix=a/', '--dst-prefix=b/', '-U'.$this->getDiffLinesOfContext(), ); if ($detect_moves_and_renames) { $options[] = '-M'; $options[] = '-C'; } return implode(' ', $options); } private function getDiffBaseOptions() { $options = array( // Disable external diff drivers, like graphical differs, since Arcanist // needs to capture the diff text. '--no-ext-diff', // Disable textconv so we treat binary files as binary, even if they have // an alternative textual representation. TODO: Ideally, Differential // would ship up the binaries for 'arc patch' but display the textconv // output in the visual diff. '--no-textconv', ); return implode(' ', $options); } /** * @param the base revision * @param head revision. If this is null, the generated diff will include the * working copy */ public function getFullGitDiff($base, $head=null) { $options = $this->getDiffFullOptions(); $diff_revision = $base; if ($head) { $diff_revision .= '..'.$head; } list($stdout) = $this->execxLocal( "diff {$options} %s --", $diff_revision); return $stdout; } /** * @param string Path to generate a diff for. * @param bool If true, detect moves and renames. Otherwise, ignore * moves/renames; this is useful because it prompts git to * generate real diff text. */ public function getRawDiffText($path, $detect_moves_and_renames = true) { $options = $this->getDiffFullOptions($detect_moves_and_renames); list($stdout) = $this->execxLocal( "diff {$options} %s -- %s", $this->getBaseCommit(), $path); return $stdout; } public function getBranchName() { // TODO: consider: // // $ git rev-parse --abbrev-ref `git symbolic-ref HEAD` // // But that may fail if you're not on a branch. list($stdout) = $this->execxLocal('branch --no-color'); // Assume that any branch beginning with '(' means 'no branch', or whatever // 'no branch' is in the current locale. $matches = null; if (preg_match('/^\* ([^\(].*)$/m', $stdout, $matches)) { return $matches[1]; } return null; } public function getRemoteURI() { list($stdout) = $this->execxLocal('remote show -n origin'); $matches = null; if (preg_match('/^\s*Fetch URL: (.*)$/m', $stdout, $matches)) { return trim($matches[1]); } return null; } public function getSourceControlPath() { // TODO: Try to get something useful here. return null; } public function getGitCommitLog() { $relative = $this->getBaseCommit(); if ($this->repositoryHasNoCommits) { // No commits yet. return ''; } else if ($relative == self::GIT_MAGIC_ROOT_COMMIT) { // First commit. list($stdout) = $this->execxLocal( 'log --format=medium HEAD'); } else { // 2..N commits. list($stdout) = $this->execxLocal( 'log --first-parent --format=medium %s..%s', $this->getBaseCommit(), $this->getHeadCommit()); } return $stdout; } public function getGitHistoryLog() { list($stdout) = $this->execxLocal( 'log --format=medium -n%d %s', self::SEARCH_LENGTH_FOR_PARENT_REVISIONS, $this->getBaseCommit()); return $stdout; } public function getSourceControlBaseRevision() { list($stdout) = $this->execxLocal( 'rev-parse %s', $this->getBaseCommit()); return rtrim($stdout, "\n"); } public function getCanonicalRevisionName($string) { $match = null; if (preg_match('/@([0-9]+)$/', $string, $match)) { $stdout = $this->getHashFromFromSVNRevisionNumber($match[1]); } else { list($stdout) = $this->execxLocal( 'show -s --format=%C %s', '%H', $string); } return rtrim($stdout); } private function executeSVNFindRev($input, $vcs) { $match = array(); list($stdout) = $this->execxLocal( 'svn find-rev %s', $input); if (!$stdout) { - throw new ArcanistUsageException("Cannot find the {$vcs} equivalent " - ."of {$input}."); + throw new ArcanistUsageException( + "Cannot find the {$vcs} equivalent of {$input}."); } // When git performs a partial-rebuild during svn // look-up, we need to parse the final line $lines = explode("\n", $stdout); $stdout = $lines[count($lines) - 2]; return rtrim($stdout); } // Convert svn revision number to git hash public function getHashFromFromSVNRevisionNumber($revision_id) { return $this->executeSVNFindRev('r'.$revision_id, 'Git'); } // Convert a git hash to svn revision number public function getSVNRevisionNumberFromHash($hash) { return $this->executeSVNFindRev($hash, 'SVN'); } protected function buildUncommittedStatus() { $diff_options = $this->getDiffBaseOptions(); if ($this->repositoryHasNoCommits) { $diff_base = self::GIT_MAGIC_ROOT_COMMIT; } else { $diff_base = 'HEAD'; } // Find uncommitted changes. $uncommitted_future = $this->buildLocalFuture( array( 'diff %C --raw %s --', $diff_options, $diff_base, )); $untracked_future = $this->buildLocalFuture( array( 'ls-files --others --exclude-standard', )); // Unstaged changes $unstaged_future = $this->buildLocalFuture( array( 'diff-files --name-only', )); $futures = array( $uncommitted_future, $untracked_future, // NOTE: `git diff-files` races with each of these other commands // internally, and resolves with inconsistent results if executed // in parallel. To work around this, DO NOT run it at the same time. // After the other commands exit, we can start the `diff-files` command. ); Futures($futures)->resolveAll(); // We're clear to start the `git diff-files` now. $unstaged_future->start(); $result = new PhutilArrayWithDefaultValue(); list($stdout) = $uncommitted_future->resolvex(); $uncommitted_files = $this->parseGitStatus($stdout); foreach ($uncommitted_files as $path => $mask) { $result[$path] |= ($mask | self::FLAG_UNCOMMITTED); } list($stdout) = $untracked_future->resolvex(); $stdout = rtrim($stdout, "\n"); if (strlen($stdout)) { $stdout = explode("\n", $stdout); foreach ($stdout as $path) { $result[$path] |= self::FLAG_UNTRACKED; } } list($stdout, $stderr) = $unstaged_future->resolvex(); $stdout = rtrim($stdout, "\n"); if (strlen($stdout)) { $stdout = explode("\n", $stdout); foreach ($stdout as $path) { $result[$path] |= self::FLAG_UNSTAGED; } } return $result->toArray(); } protected function buildCommitRangeStatus() { list($stdout, $stderr) = $this->execxLocal( 'diff %C --raw %s --', $this->getDiffBaseOptions(), $this->getBaseCommit()); return $this->parseGitStatus($stdout); } public function getGitConfig($key, $default = null) { list($err, $stdout) = $this->execManualLocal('config %s', $key); if ($err) { return $default; } return rtrim($stdout); } public function getAuthor() { list($stdout) = $this->execxLocal('var GIT_AUTHOR_IDENT'); return preg_replace('/\s+<.*/', '', rtrim($stdout, "\n")); } public function addToCommit(array $paths) { $this->execxLocal( 'add -A -- %Ls', $paths); $this->reloadWorkingCopy(); return $this; } public function doCommit($message) { $tmp_file = new TempFile(); Filesystem::writeFile($tmp_file, $message); // NOTE: "--allow-empty-message" was introduced some time after 1.7.0.4, // so we do not provide it and thus require a message. $this->execxLocal( 'commit -F %s', $tmp_file); $this->reloadWorkingCopy(); return $this; } public function amendCommit($message = null) { if ($message === null) { $this->execxLocal('commit --amend --allow-empty -C HEAD'); } else { $tmp_file = new TempFile(); Filesystem::writeFile($tmp_file, $message); $this->execxLocal( 'commit --amend --allow-empty -F %s', $tmp_file); } $this->reloadWorkingCopy(); return $this; } public function getPreReceiveHookStatus($old_ref, $new_ref) { $options = $this->getDiffBaseOptions(); list($stdout) = $this->execxLocal( "diff {$options} --raw %s %s --", $old_ref, $new_ref); return $this->parseGitStatus($stdout, $full = true); } private function parseGitStatus($status, $full = false) { static $flags = array( 'A' => self::FLAG_ADDED, 'M' => self::FLAG_MODIFIED, 'D' => self::FLAG_DELETED, ); $status = trim($status); $lines = array(); foreach (explode("\n", $status) as $line) { if ($line) { $lines[] = preg_split("/[ \t]/", $line, 6); } } $files = array(); foreach ($lines as $line) { $mask = 0; $flag = $line[4]; $file = $line[5]; foreach ($flags as $key => $bits) { if ($flag == $key) { $mask |= $bits; } } if ($full) { $files[$file] = array( 'mask' => $mask, 'ref' => rtrim($line[3], '.'), ); } else { $files[$file] = $mask; } } return $files; } public function getAllFiles() { $future = $this->buildLocalFuture(array('ls-files -z')); return id(new LinesOfALargeExecFuture($future)) ->setDelimiter("\0"); } public function getChangedFiles($since_commit) { list($stdout) = $this->execxLocal( 'diff --raw %s', $since_commit); return $this->parseGitStatus($stdout); } public function getBlame($path) { // TODO: 'git blame' supports --porcelain and we should probably use it. list($stdout) = $this->execxLocal( 'blame --date=iso -w -M %s -- %s', $this->getBaseCommit(), $path); $blame = array(); foreach (explode("\n", trim($stdout)) as $line) { if (!strlen($line)) { continue; } // lines predating a git repo's history are blamed to the oldest revision, // with the commit hash prepended by a ^. we shouldn't count these lines // as blaming to the oldest diff's unfortunate author if ($line[0] == '^') { continue; } $matches = null; $ok = preg_match( '/^([0-9a-f]+)[^(]+?[(](.*?) +\d\d\d\d-\d\d-\d\d/', $line, $matches); if (!$ok) { throw new Exception("Bad blame? `{$line}'"); } $revision = $matches[1]; $author = $matches[2]; $blame[] = array($author, $revision); } return $blame; } public function getOriginalFileData($path) { return $this->getFileDataAtRevision($path, $this->getBaseCommit()); } public function getCurrentFileData($path) { return $this->getFileDataAtRevision($path, 'HEAD'); } private function parseGitTree($stdout) { $result = array(); $stdout = trim($stdout); if (!strlen($stdout)) { return $result; } $lines = explode("\n", $stdout); foreach ($lines as $line) { $matches = array(); $ok = preg_match( '/^(\d{6}) (blob|tree|commit) ([a-z0-9]{40})[\t](.*)$/', $line, $matches); if (!$ok) { throw new Exception('Failed to parse git ls-tree output!'); } $result[$matches[4]] = array( 'mode' => $matches[1], 'type' => $matches[2], 'ref' => $matches[3], ); } return $result; } private function getFileDataAtRevision($path, $revision) { - // NOTE: We don't want to just "git show {$revision}:{$path}" since if the // path was a directory at the given revision we'll get a list of its files // and treat it as though it as a file containing a list of other files, // which is silly. list($stdout) = $this->execxLocal( 'ls-tree %s -- %s', $revision, $path); $info = $this->parseGitTree($stdout); if (empty($info[$path])) { // No such path, or the path is a directory and we executed 'ls-tree dir/' // and got a list of its contents back. return null; } if ($info[$path]['type'] != 'blob') { // Path is or was a directory, not a file. return null; } list($stdout) = $this->execxLocal( 'cat-file blob %s', $info[$path]['ref']); return $stdout; } /** * Returns names of all the branches in the current repository. * * @return list> Dictionary of branch information. */ public function getAllBranches() { list($branch_info) = $this->execxLocal( 'branch --no-color'); $lines = explode("\n", rtrim($branch_info)); $result = array(); foreach ($lines as $line) { if (preg_match('@^[* ]+\(no branch|detached from \w+/\w+\)@', $line)) { // This is indicating that the working copy is in a detached state; // just ignore it. continue; } list($current, $name) = preg_split('/\s+/', $line, 2); $result[] = array( 'current' => !empty($current), 'name' => $name, ); } return $result; } public function getWorkingCopyRevision() { list($stdout) = $this->execxLocal('rev-parse HEAD'); return rtrim($stdout, "\n"); } public function getUnderlyingWorkingCopyRevision() { list($err, $stdout) = $this->execManualLocal('svn find-rev HEAD'); if (!$err && $stdout) { return rtrim($stdout, "\n"); } return $this->getWorkingCopyRevision(); } public function isHistoryDefaultImmutable() { return false; } public function supportsAmend() { return true; } public function supportsCommitRanges() { return true; } public function supportsLocalCommits() { return true; } public function hasLocalCommit($commit) { try { if (!$this->getCanonicalRevisionName($commit)) { return false; } } catch (CommandException $exception) { return false; } return true; } public function getAllLocalChanges() { $diff = $this->getFullGitDiff($this->getBaseCommit()); if (!strlen(trim($diff))) { return array(); } $parser = new ArcanistDiffParser(); return $parser->parseDiff($diff); } public function supportsLocalBranchMerge() { return true; } public function performLocalBranchMerge($branch, $message) { if (!$branch) { throw new ArcanistUsageException( 'Under git, you must specify the branch you want to merge.'); } $err = phutil_passthru( '(cd %s && git merge --no-ff -m %s %s)', $this->getPath(), $message, $branch); if ($err) { throw new ArcanistUsageException('Merge failed!'); } } public function getFinalizedRevisionMessage() { return "You may now push this commit upstream, as appropriate (e.g. with ". "'git push', or 'git svn dcommit', or by printing and faxing it)."; } public function getCommitMessage($commit) { list($message) = $this->execxLocal( 'log -n1 --format=%C %s --', '%s%n%n%b', $commit); return $message; } public function loadWorkingCopyDifferentialRevisions( ConduitClient $conduit, array $query) { $messages = $this->getGitCommitLog(); if (!strlen($messages)) { return array(); } $parser = new ArcanistDiffParser(); $messages = $parser->parseDiff($messages); // First, try to find revisions by explicit revision IDs in commit messages. $reason_map = array(); $revision_ids = array(); foreach ($messages as $message) { $object = ArcanistDifferentialCommitMessage::newFromRawCorpus( $message->getMetadata('message')); if ($object->getRevisionID()) { $revision_ids[] = $object->getRevisionID(); $reason_map[$object->getRevisionID()] = $message->getCommitHash(); } } if ($revision_ids) { $results = $conduit->callMethodSynchronous( 'differential.query', $query + array( 'ids' => $revision_ids, )); foreach ($results as $key => $result) { $hash = substr($reason_map[$result['id']], 0, 16); $results[$key]['why'] = "Commit message for '{$hash}' has explicit 'Differential Revision'."; } return $results; } // If we didn't succeed, try to find revisions by hash. $hashes = array(); foreach ($this->getLocalCommitInformation() as $commit) { $hashes[] = array('gtcm', $commit['commit']); $hashes[] = array('gttr', $commit['tree']); } $results = $conduit->callMethodSynchronous( 'differential.query', $query + array( 'commitHashes' => $hashes, )); foreach ($results as $key => $result) { $results[$key]['why'] = 'A git commit or tree hash in the commit range is already attached '. 'to the Differential revision.'; } return $results; } public function updateWorkingCopy() { $this->execxLocal('pull'); $this->reloadWorkingCopy(); } public function getCommitSummary($commit) { if ($commit == self::GIT_MAGIC_ROOT_COMMIT) { return '(The Empty Tree)'; } list($summary) = $this->execxLocal( 'log -n 1 --format=%C %s', '%s', $commit); return trim($summary); } public function backoutCommit($commit_hash) { $this->execxLocal( 'revert %s -n --no-edit', $commit_hash); $this->reloadWorkingCopy(); if (!$this->getUncommittedStatus()) { throw new ArcanistUsageException( "{$commit_hash} has already been reverted."); } } public function getBackoutMessage($commit_hash) { return 'This reverts commit '.$commit_hash.'.'; } public function isGitSubversionRepo() { return Filesystem::pathExists($this->getPath('.git/svn')); } public function resolveBaseCommitRule($rule, $source) { list($type, $name) = explode(':', $rule, 2); switch ($type) { case 'git': $matches = null; if (preg_match('/^merge-base\((.+)\)$/', $name, $matches)) { list($err, $merge_base) = $this->execManualLocal( 'merge-base %s HEAD', $matches[1]); if (!$err) { $this->setBaseCommitExplanation( "it is the merge-base of '{$matches[1]}' and HEAD, as ". "specified by '{$rule}' in your {$source} 'base' ". "configuration."); return trim($merge_base); } } else if (preg_match('/^branch-unique\((.+)\)$/', $name, $matches)) { list($err, $merge_base) = $this->execManualLocal( 'merge-base %s HEAD', $matches[1]); if ($err) { return null; } $merge_base = trim($merge_base); list($commits) = $this->execxLocal( 'log --format=%C %s..HEAD --', '%H', $merge_base); $commits = array_filter(explode("\n", $commits)); if (!$commits) { return null; } $commits[] = $merge_base; $head_branch_count = null; foreach ($commits as $commit) { list($branches) = $this->execxLocal( 'branch --contains %s', $commit); $branches = array_filter(explode("\n", $branches)); if ($head_branch_count === null) { // If this is the first commit, it's HEAD. Count how many // branches it is on; we want to include commits on the same // number of branches. This covers a case where this branch // has sub-branches and we're running "arc diff" here again // for whatever reason. $head_branch_count = count($branches); } else if (count($branches) > $head_branch_count) { foreach ($branches as $key => $branch) { $branches[$key] = trim($branch, ' *'); } $branches = implode(', ', $branches); $this->setBaseCommitExplanation( "it is the first commit between '{$merge_base}' (the ". "merge-base of '{$matches[1]}' and HEAD) which is also ". "contained by another branch ({$branches})."); return $commit; } } } else { list($err) = $this->execManualLocal( 'cat-file -t %s', $name); if (!$err) { $this->setBaseCommitExplanation( "it is specified by '{$rule}' in your {$source} 'base' ". "configuration."); return $name; } } break; case 'arc': switch ($name) { case 'empty': $this->setBaseCommitExplanation( "you specified '{$rule}' in your {$source} 'base' ". "configuration."); return self::GIT_MAGIC_ROOT_COMMIT; case 'amended': $text = $this->getCommitMessage('HEAD'); $message = ArcanistDifferentialCommitMessage::newFromRawCorpus( $text); if ($message->getRevisionID()) { $this->setBaseCommitExplanation( "HEAD has been amended with 'Differential Revision:', ". "as specified by '{$rule}' in your {$source} 'base' ". "configuration."); return 'HEAD^'; } break; case 'upstream': list($err, $upstream) = $this->execManualLocal( "rev-parse --abbrev-ref --symbolic-full-name '@{upstream}'"); if (!$err) { $upstream = rtrim($upstream); list($upstream_merge_base) = $this->execxLocal( 'merge-base %s HEAD', $upstream); $upstream_merge_base = rtrim($upstream_merge_base); $this->setBaseCommitExplanation( "it is the merge-base of the upstream of the current branch ". "and HEAD, and matched the rule '{$rule}' in your {$source} ". "'base' configuration."); return $upstream_merge_base; } break; case 'this': $this->setBaseCommitExplanation( "you specified '{$rule}' in your {$source} 'base' ". "configuration."); return 'HEAD^'; } default: return null; } return null; } public function canStashChanges() { return true; } public function stashChanges() { $this->execxLocal('stash'); $this->reloadWorkingCopy(); } public function unstashChanges() { $this->execxLocal('stash pop'); } protected function didReloadCommitRange() { // After an amend, the symbolic head may resolve to a different commit. $this->resolvedHeadCommit = null; } } diff --git a/src/repository/api/ArcanistMercurialAPI.php b/src/repository/api/ArcanistMercurialAPI.php index 6ba31870..27e579e8 100644 --- a/src/repository/api/ArcanistMercurialAPI.php +++ b/src/repository/api/ArcanistMercurialAPI.php @@ -1,1094 +1,1090 @@ setCWD($this->getPath()); return $future; } public function execPassthru($pattern /* , ... */) { $args = func_get_args(); if (phutil_is_windows()) { $args[0] = 'hg '.$args[0]; } else { $args[0] = 'HGPLAIN=1 hg '.$args[0]; } return call_user_func_array('phutil_passthru', $args); } public function getSourceControlSystemName() { return 'hg'; } public function getMetadataPath() { return $this->getPath('.hg'); } public function getSourceControlBaseRevision() { return $this->getCanonicalRevisionName($this->getBaseCommit()); } public function getCanonicalRevisionName($string) { $match = null; if ($this->isHgSubversionRepo() && preg_match('/@([0-9]+)$/', $string, $match)) { $string = hgsprintf('svnrev(%s)', $match[1]); } list($stdout) = $this->execxLocal( 'log -l 1 --template %s -r %s --', '{node}', $string); return $stdout; } public function getHashFromFromSVNRevisionNumber($revision_id) { $matches = array(); $string = hgsprintf('svnrev(%s)', $revision_id); list($stdout) = $this->execxLocal( 'log -l 1 --template %s -r %s --', '{node}', $string); if (!$stdout) { - throw new ArcanistUsageException("Cannot find the HG equivalent " - ."of {$revision_id} given."); + throw new ArcanistUsageException( + "Cannot find the HG equivalent of {$revision_id} given."); } return $stdout; } public function getSVNRevisionNumberFromHash($hash) { $matches = array(); list($stdout) = $this->execxLocal( 'log -r %s --template {svnrev}', $hash); if (!$stdout) { - throw new ArcanistUsageException("Cannot find the SVN equivalent " - ."of {$hash} given."); + throw new ArcanistUsageException( + "Cannot find the SVN equivalent of {$hash} given."); } return $stdout; } public function getSourceControlPath() { return '/'; } public function getBranchName() { if (!$this->branch) { list($stdout) = $this->execxLocal('branch'); $this->branch = trim($stdout); } return $this->branch; } public function didReloadCommitRange() { $this->localCommitInfo = null; } protected function buildBaseCommit($symbolic_commit) { if ($symbolic_commit !== null) { try { $commit = $this->getCanonicalRevisionName( hgsprintf('ancestor(%s,.)', $symbolic_commit)); } catch (Exception $ex) { // Try it as a revset instead of a commit id try { $commit = $this->getCanonicalRevisionName( hgsprintf('ancestor(%R,.)', $symbolic_commit)); } catch (Exception $ex) { throw new ArcanistUsageException( - "Commit '{$symbolic_commit}' is not a valid Mercurial commit ". - "identifier."); + "Commit '{$symbolic_commit}' is not a valid Mercurial commit ". + "identifier."); } } - $this->setBaseCommitExplanation('it is the greatest common ancestor of '. - 'the working directory and the commit you specified explicitly.'); + $this->setBaseCommitExplanation( + 'it is the greatest common ancestor of the working directory '. + 'and the commit you specified explicitly.'); return $commit; } if ($this->getBaseCommitArgumentRules() || $this->getConfigurationManager()->getConfigFromAnySource('base')) { $base = $this->resolveBaseCommit(); if (!$base) { throw new ArcanistUsageException( "None of the rules in your 'base' configuration matched a valid ". "commit. Adjust rules or specify which commit you want to use ". "explicitly."); } return $base; } // Mercurial 2.1 and up have phases which indicate if something is // published or not. To find which revs are outgoing, it's much // faster to check the phase instead of actually checking the server. if ($this->supportsPhases()) { list($err, $stdout) = $this->execManualLocal( 'log --branch %s -r %s --style default', $this->getBranchName(), 'draft()'); } else { list($err, $stdout) = $this->execManualLocal( 'outgoing --branch %s --style default', - $this->getBranchName()); + $this->getBranchName()); } if (!$err) { $logs = ArcanistMercurialParser::parseMercurialLog($stdout); } else { // Mercurial (in some versions?) raises an error when there's nothing // outgoing. $logs = array(); } if (!$logs) { $this->setBaseCommitExplanation( 'you have no outgoing commits, so arc assumes you intend to submit '. 'uncommitted changes in the working copy.'); return $this->getWorkingCopyRevision(); } $outgoing_revs = ipull($logs, 'rev'); // This is essentially an implementation of a theoretical `hg merge-base` // command. $against = $this->getWorkingCopyRevision(); while (true) { // NOTE: The "^" and "~" syntaxes were only added in hg 1.9, which is // new as of July 2011, so do this in a compatible way. Also, "hg log" // and "hg outgoing" don't necessarily show parents (even if given an // explicit template consisting of just the parents token) so we need // to separately execute "hg parents". list($stdout) = $this->execxLocal( 'parents --style default --rev %s', $against); $parents_logs = ArcanistMercurialParser::parseMercurialLog($stdout); list($p1, $p2) = array_merge($parents_logs, array(null, null)); if ($p1 && !in_array($p1['rev'], $outgoing_revs)) { $against = $p1['rev']; break; } else if ($p2 && !in_array($p2['rev'], $outgoing_revs)) { $against = $p2['rev']; break; } else if ($p1) { $against = $p1['rev']; } else { // This is the case where you have a new repository and the entire // thing is outgoing; Mercurial literally accepts "--rev null" as // meaning "diff against the empty state". $against = 'null'; break; } } if ($against == 'null') { $this->setBaseCommitExplanation( 'this is a new repository (all changes are outgoing).'); } else { $this->setBaseCommitExplanation( 'it is the first commit reachable from the working copy state '. 'which is not outgoing.'); } return $against; } public function getLocalCommitInformation() { if ($this->localCommitInfo === null) { $base_commit = $this->getBaseCommit(); list($info) = $this->execxLocal( 'log --template %s --rev %s --branch %s --', "{node}\1{rev}\1{author}\1". "{date|rfc822date}\1{branch}\1{tag}\1{parents}\1{desc}\2", hgsprintf('(%s::. - %s)', $base_commit, $base_commit), $this->getBranchName()); $logs = array_filter(explode("\2", $info)); $last_node = null; $futures = array(); $commits = array(); foreach ($logs as $log) { list($node, $rev, $full_author, $date, $branch, $tag, $parents, $desc) = explode("\1", $log, 9); list ($author, $author_email) = $this->parseFullAuthor($full_author); // NOTE: If a commit has only one parent, {parents} returns empty. // If it has two parents, {parents} returns revs and short hashes, not // full hashes. Try to avoid making calls to "hg parents" because it's // relatively expensive. $commit_parents = null; if (!$parents) { if ($last_node) { $commit_parents = array($last_node); } } if (!$commit_parents) { // We didn't get a cheap hit on previous commit, so do the full-cost // "hg parents" call. We can run these in parallel, at least. $futures[$node] = $this->execFutureLocal( 'parents --template %s --rev %s', '{node}\n', $node); } $commits[$node] = array( 'author' => $author, 'time' => strtotime($date), 'branch' => $branch, 'tag' => $tag, 'commit' => $node, 'rev' => $node, // TODO: Remove eventually. 'local' => $rev, 'parents' => $commit_parents, 'summary' => head(explode("\n", $desc)), 'message' => $desc, 'authorEmail' => $author_email, ); $last_node = $node; } foreach (Futures($futures)->limit(4) as $node => $future) { list($parents) = $future->resolvex(); $parents = array_filter(explode("\n", $parents)); $commits[$node]['parents'] = $parents; } // Put commits in newest-first order, to be consistent with Git and the // expected order of "hg log" and "git log" under normal circumstances. // The order of ancestors() is oldest-first. $commits = array_reverse($commits); $this->localCommitInfo = $commits; } return $this->localCommitInfo; } public function getAllFiles() { // TODO: Handle paths with newlines. $future = $this->buildLocalFuture(array('manifest')); return new LinesOfALargeExecFuture($future); } public function getChangedFiles($since_commit) { list($stdout) = $this->execxLocal( 'status --rev %s', $since_commit); return ArcanistMercurialParser::parseMercurialStatus($stdout); } public function getBlame($path) { list($stdout) = $this->execxLocal( 'annotate -u -v -c --rev %s -- %s', $this->getBaseCommit(), $path); $lines = phutil_split_lines($stdout, $retain_line_endings = true); $blame = array(); foreach ($lines as $line) { if (!strlen($line)) { continue; } $matches = null; $ok = preg_match('/^\s*([^:]+?) ([a-f0-9]{12}):/', $line, $matches); if (!$ok) { throw new Exception("Unable to parse Mercurial blame line: {$line}"); } $revision = $matches[2]; $author = trim($matches[1]); $blame[] = array($author, $revision); } return $blame; } protected function buildUncommittedStatus() { list($stdout) = $this->execxLocal('status'); $results = new PhutilArrayWithDefaultValue(); $working_status = ArcanistMercurialParser::parseMercurialStatus($stdout); foreach ($working_status as $path => $mask) { if (!($mask & ArcanistRepositoryAPI::FLAG_UNTRACKED)) { // Mark tracked files as uncommitted. $mask |= self::FLAG_UNCOMMITTED; } $results[$path] |= $mask; } return $results->toArray(); } protected function buildCommitRangeStatus() { // TODO: Possibly we should use "hg status --rev X --rev ." for this // instead, but we must run "hg diff" later anyway in most cases, so // building and caching it shouldn't hurt us. $diff = $this->getFullMercurialDiff(); if (!$diff) { return array(); } $parser = new ArcanistDiffParser(); $changes = $parser->parseDiff($diff); $status_map = array(); foreach ($changes as $change) { $flags = 0; switch ($change->getType()) { case ArcanistDiffChangeType::TYPE_ADD: case ArcanistDiffChangeType::TYPE_MOVE_HERE: case ArcanistDiffChangeType::TYPE_COPY_HERE: $flags |= self::FLAG_ADDED; break; case ArcanistDiffChangeType::TYPE_CHANGE: case ArcanistDiffChangeType::TYPE_COPY_AWAY: // Check for changes? $flags |= self::FLAG_MODIFIED; break; case ArcanistDiffChangeType::TYPE_DELETE: case ArcanistDiffChangeType::TYPE_MOVE_AWAY: case ArcanistDiffChangeType::TYPE_MULTICOPY: $flags |= self::FLAG_DELETED; break; } $status_map[$change->getCurrentPath()] = $flags; } return $status_map; } protected function didReloadWorkingCopy() { // Diffs are against ".", so we need to drop the cache if we change the // working copy. $this->rawDiffCache = array(); $this->branch = null; } private function getDiffOptions() { $options = array( '--git', '-U'.$this->getDiffLinesOfContext(), ); return implode(' ', $options); } public function getRawDiffText($path) { $options = $this->getDiffOptions(); $range = $this->getBaseCommit(); $raw_diff_cache_key = $options.' '.$range.' '.$path; if (idx($this->rawDiffCache, $raw_diff_cache_key)) { return idx($this->rawDiffCache, $raw_diff_cache_key); } list($stdout) = $this->execxLocal( 'diff %C --rev %s -- %s', $options, $range, $path); $this->rawDiffCache[$raw_diff_cache_key] = $stdout; return $stdout; } public function getFullMercurialDiff() { return $this->getRawDiffText(''); } public function getOriginalFileData($path) { return $this->getFileDataAtRevision($path, $this->getBaseCommit()); } public function getCurrentFileData($path) { return $this->getFileDataAtRevision( $path, $this->getWorkingCopyRevision()); } public function getBulkOriginalFileData($paths) { return $this->getBulkFileDataAtRevision($paths, $this->getBaseCommit()); } public function getBulkCurrentFileData($paths) { return $this->getBulkFileDataAtRevision( $paths, $this->getWorkingCopyRevision()); } private function getBulkFileDataAtRevision($paths, $revision) { // Calling 'hg cat' on each file individually is slow (1 second per file // on a large repo) because mercurial has to decompress and parse the - // entire manifest every time. Do it in one large batch instead. + // entire manifest every time. Do it in one large batch instead. // hg cat will write the file data to files in a temp directory $tmpdir = Filesystem::createTemporaryDirectory(); // Mercurial doesn't create the directories for us :( foreach ($paths as $path) { $tmppath = $tmpdir.'/'.$path; Filesystem::createDirectory(dirname($tmppath), 0755, true); } list($err, $stdout) = $this->execManualLocal( 'cat --rev %s --output %s -- %C', $revision, // %p is the formatter for the repo-relative filepath $tmpdir.'/%p', implode(' ', $paths)); $filedata = array(); foreach ($paths as $path) { $tmppath = $tmpdir.'/'.$path; if (Filesystem::pathExists($tmppath)) { $filedata[$path] = Filesystem::readFile($tmppath); } } Filesystem::remove($tmpdir); return $filedata; } private function getFileDataAtRevision($path, $revision) { list($err, $stdout) = $this->execManualLocal( 'cat --rev %s -- %s', $revision, $path); if ($err) { // Assume this is "no file at revision", i.e. a deleted or added file. return null; } else { return $stdout; } } public function getWorkingCopyRevision() { return '.'; } public function isHistoryDefaultImmutable() { return true; } public function supportsAmend() { list($err, $stdout) = $this->execManualLocal('help commit'); if ($err) { return false; } else { return (strpos($stdout, 'amend') !== false); } } public function supportsRebase() { if ($this->supportsRebase === null) { list ($err) = $this->execManualLocal('help rebase'); $this->supportsRebase = $err === 0; } return $this->supportsRebase; } public function supportsPhases() { if ($this->supportsPhases === null) { list ($err) = $this->execManualLocal('help phase'); $this->supportsPhases = $err === 0; } return $this->supportsPhases; } public function supportsCommitRanges() { return true; } public function supportsLocalCommits() { return true; } public function getAllBranches() { list($branch_info) = $this->execxLocal('bookmarks'); if (trim($branch_info) == 'no bookmarks set') { return array(); } $matches = null; preg_match_all( '/^\s*(\*?)\s*(.+)\s(\S+)$/m', $branch_info, $matches, PREG_SET_ORDER); $return = array(); foreach ($matches as $match) { list(, $current, $name) = $match; $return[] = array( 'current' => (bool)$current, 'name' => rtrim($name), ); } return $return; } public function hasLocalCommit($commit) { try { $this->getCanonicalRevisionName($commit); return true; } catch (Exception $ex) { return false; } } public function getCommitMessage($commit) { list($message) = $this->execxLocal( 'log --template={desc} --rev %s', $commit); return $message; } public function getAllLocalChanges() { $diff = $this->getFullMercurialDiff(); if (!strlen(trim($diff))) { return array(); } $parser = new ArcanistDiffParser(); return $parser->parseDiff($diff); } public function supportsLocalBranchMerge() { return true; } public function performLocalBranchMerge($branch, $message) { if ($branch) { $err = phutil_passthru( '(cd %s && HGPLAIN=1 hg merge --rev %s && hg commit -m %s)', $this->getPath(), $branch, $message); } else { $err = phutil_passthru( '(cd %s && HGPLAIN=1 hg merge && hg commit -m %s)', $this->getPath(), $message); } if ($err) { throw new ArcanistUsageException('Merge failed!'); } } public function getFinalizedRevisionMessage() { return "You may now push this commit upstream, as appropriate (e.g. with ". "'hg push' or by printing and faxing it)."; } public function getCommitMessageLog() { $base_commit = $this->getBaseCommit(); list($stdout) = $this->execxLocal( 'log --template %s --rev %s --branch %s --', "{node}\1{desc}\2", hgsprintf('(%s::. - %s)', $base_commit, $base_commit), $this->getBranchName()); $map = array(); $logs = explode("\2", trim($stdout)); foreach (array_filter($logs) as $log) { list($node, $desc) = explode("\1", $log); $map[$node] = $desc; } return array_reverse($map); } public function loadWorkingCopyDifferentialRevisions( ConduitClient $conduit, array $query) { $messages = $this->getCommitMessageLog(); $parser = new ArcanistDiffParser(); // First, try to find revisions by explicit revision IDs in commit messages. $reason_map = array(); $revision_ids = array(); foreach ($messages as $node_id => $message) { $object = ArcanistDifferentialCommitMessage::newFromRawCorpus($message); if ($object->getRevisionID()) { $revision_ids[] = $object->getRevisionID(); $reason_map[$object->getRevisionID()] = $node_id; } } if ($revision_ids) { $results = $conduit->callMethodSynchronous( 'differential.query', $query + array( 'ids' => $revision_ids, )); foreach ($results as $key => $result) { $hash = substr($reason_map[$result['id']], 0, 16); $results[$key]['why'] = "Commit message for '{$hash}' has explicit 'Differential Revision'."; } return $results; } // Try to find revisions by hash. $hashes = array(); foreach ($this->getLocalCommitInformation() as $commit) { $hashes[] = array('hgcm', $commit['commit']); } if ($hashes) { // NOTE: In the case of "arc diff . --uncommitted" in a Mercurial working // copy with dirty changes, there may be no local commits. $results = $conduit->callMethodSynchronous( 'differential.query', $query + array( 'commitHashes' => $hashes, )); foreach ($results as $key => $hash) { $results[$key]['why'] = 'A mercurial commit hash in the commit range is already attached '. 'to the Differential revision.'; } return $results; } return array(); } public function updateWorkingCopy() { $this->execxLocal('up'); $this->reloadWorkingCopy(); } private function getMercurialConfig($key, $default = null) { list($stdout) = $this->execxLocal('showconfig %s', $key); if ($stdout == '') { return $default; } return rtrim($stdout); } public function getAuthor() { $full_author = $this->getMercurialConfig('ui.username'); list($author, $author_email) = $this->parseFullAuthor($full_author); return $author; } /** - * Parse the Mercurial author field + * Parse the Mercurial author field. * * Not everyone enters their email address as a part of the username - * field. Try to make it work when it's obvious + * field. Try to make it work when it's obvious. * * @param string $full_author * @return array */ protected function parseFullAuthor($full_author) { if (strpos($full_author, '@') === false) { $author = $full_author; $author_email = null; } else { $email = new PhutilEmailAddress($full_author); $author = $email->getDisplayName(); $author_email = $email->getAddress(); } return array($author, $author_email); } public function addToCommit(array $paths) { $this->execxLocal( 'addremove -- %Ls', $paths); $this->reloadWorkingCopy(); } public function doCommit($message) { $tmp_file = new TempFile(); Filesystem::writeFile($tmp_file, $message); - $this->execxLocal( - 'commit -l %s', - $tmp_file); + $this->execxLocal('commit -l %s', $tmp_file); $this->reloadWorkingCopy(); } public function amendCommit($message = null) { if ($message === null) { $message = $this->getCommitMessage('.'); } $tmp_file = new TempFile(); Filesystem::writeFile($tmp_file, $message); try { $this->execxLocal( 'commit --amend -l %s', $tmp_file); } catch (CommandException $ex) { if (preg_match('/nothing changed/', $ex->getStdOut())) { // NOTE: Mercurial considers it an error to make a no-op amend. Although // we generally defer to the underlying VCS to dictate behavior, this // one seems a little goofy, and we use amend as part of various // workflows under the assumption that no-op amends are fine. If this // amend failed because it's a no-op, just continue. } else { throw $ex; } } $this->reloadWorkingCopy(); } public function getCommitSummary($commit) { if ($commit == 'null') { return '(The Empty Void)'; } list($summary) = $this->execxLocal( 'log --template {desc} --limit 1 --rev %s', $commit); $summary = head(explode("\n", $summary)); return trim($summary); } public function backoutCommit($commit_hash) { $this->execxLocal( 'backout -r %s', $commit_hash); $this->reloadWorkingCopy(); if (!$this->getUncommittedStatus()) { throw new ArcanistUsageException( "{$commit_hash} has already been reverted."); } } public function getBackoutMessage($commit_hash) { return 'Backed out changeset '.$commit_hash.'.'; } public function resolveBaseCommitRule($rule, $source) { list($type, $name) = explode(':', $rule, 2); // NOTE: This function MUST return node hashes or symbolic commits (like // branch names or the word "tip"), not revsets. This includes ".^" and // similar, which a revset, not a symbolic commit identifier. If you return // a revset it will be escaped later and looked up literally. switch ($type) { case 'hg': $matches = null; if (preg_match('/^gca\((.+)\)$/', $name, $matches)) { list($err, $merge_base) = $this->execManualLocal( 'log --template={node} --rev %s', sprintf('ancestor(., %s)', $matches[1])); if (!$err) { $this->setBaseCommitExplanation( "it is the greatest common ancestor of '{$matches[1]}' and ., as". " specified by '{$rule}' in your {$source} 'base' ". "configuration."); return trim($merge_base); } } else { list($err, $commit) = $this->execManualLocal( 'log --template {node} --rev %s', hgsprintf('%s', $name)); if ($err) { list($err, $commit) = $this->execManualLocal( 'log --template {node} --rev %s', $name); } if (!$err) { $this->setBaseCommitExplanation( "it is specified by '{$rule}' in your {$source} 'base' ". "configuration."); return trim($commit); } } break; case 'arc': switch ($name) { case 'empty': $this->setBaseCommitExplanation( "you specified '{$rule}' in your {$source} 'base' ". "configuration."); return 'null'; case 'outgoing': list($err, $outgoing_base) = $this->execManualLocal( 'log --template={node} --rev %s', 'limit(reverse(ancestors(.) - outgoing()), 1)'); if (!$err) { $this->setBaseCommitExplanation( "it is the first ancestor of the working copy that is not ". "outgoing, and it matched the rule {$rule} in your {$source} ". "'base' configuration."); return trim($outgoing_base); } case 'amended': $text = $this->getCommitMessage('.'); $message = ArcanistDifferentialCommitMessage::newFromRawCorpus( $text); if ($message->getRevisionID()) { $this->setBaseCommitExplanation( "'.' has been amended with 'Differential Revision:', ". "as specified by '{$rule}' in your {$source} 'base' ". "configuration."); // NOTE: This should be safe because Mercurial doesn't support // amend until 2.2. return $this->getCanonicalRevisionName('.^'); } break; case 'bookmark': $revset = 'limit('. ' sort('. ' (ancestors(.) and bookmark() - .) or'. ' (ancestors(.) - outgoing()), '. ' -rev),'. '1)'; list($err, $bookmark_base) = $this->execManualLocal( 'log --template={node} --rev %s', $revset); if (!$err) { $this->setBaseCommitExplanation( "it is the first ancestor of . that either has a bookmark, or ". "is already in the remote and it matched the rule {$rule} in ". "your {$source} 'base' configuration"); return trim($bookmark_base); } break; case 'this': $this->setBaseCommitExplanation( "you specified '{$rule}' in your {$source} 'base' ". "configuration."); return $this->getCanonicalRevisionName('.^'); default: if (preg_match('/^nodiff\((.+)\)$/', $name, $matches)) { list($results) = $this->execxLocal( 'log --template %s --rev %s', "{node}\1{desc}\2", sprintf('ancestor(.,%s)::.^', $matches[1])); $results = array_reverse(explode("\2", trim($results))); foreach ($results as $result) { if (empty($result)) { continue; } list($node, $desc) = explode("\1", $result, 2); $message = ArcanistDifferentialCommitMessage::newFromRawCorpus( $desc); if ($message->getRevisionID()) { $this->setBaseCommitExplanation( "it is the first ancestor of . that has a diff ". "and is the gca or a descendant of the gca with ". "'{$matches[1]}', specified by '{$rule}' in your ". "{$source} 'base' configuration."); return $node; } } } break; } break; default: return null; } return null; } public function isHgSubversionRepo() { return file_exists($this->getPath('.hg/svn/rev_map')); } public function getSubversionInfo() { $info = array(); $base_path = null; $revision = null; list($err, $raw_info) = $this->execManualLocal('svn info'); if (!$err) { foreach (explode("\n", trim($raw_info)) as $line) { list($key, $value) = explode(': ', $line, 2); switch ($key) { case 'URL': $info['base_path'] = $value; $base_path = $value; break; case 'Repository UUID': $info['uuid'] = $value; break; case 'Revision': $revision = $value; break; default: break; } } if ($base_path && $revision) { $info['base_revision'] = $base_path.'@'.$revision; } } return $info; } public function getActiveBookmark() { $bookmarks = $this->getBookmarks(); foreach ($bookmarks as $bookmark) { if ($bookmark['is_active']) { return $bookmark['name']; } } return null; } public function isBookmark($name) { $bookmarks = $this->getBookmarks(); foreach ($bookmarks as $bookmark) { if ($bookmark['name'] === $name) { return true; } } return false; } public function isBranch($name) { $branches = $this->getBranches(); foreach ($branches as $branch) { if ($branch['name'] === $name) { return true; } } return false; } public function getBranches() { list($stdout) = $this->execxLocal('--debug branches'); $lines = ArcanistMercurialParser::parseMercurialBranches($stdout); $branches = array(); foreach ($lines as $name => $spec) { $branches[] = array( 'name' => $name, 'revision' => $spec['rev'], ); } return $branches; } public function getBookmarks() { $bookmarks = array(); list($raw_output) = $this->execxLocal('bookmarks'); $raw_output = trim($raw_output); if ($raw_output !== 'no bookmarks set') { foreach (explode("\n", $raw_output) as $line) { // example line: * mybook 2:6b274d49be97 list($name, $revision) = $this->splitBranchOrBookmarkLine($line); $is_active = false; if ('*' === $name[0]) { $is_active = true; $name = substr($name, 2); } $bookmarks[] = array( 'is_active' => $is_active, 'name' => $name, 'revision' => $revision); } } return $bookmarks; } private function splitBranchOrBookmarkLine($line) { // branches and bookmarks are printed in the format: // default 0:a5ead76cdf85 (inactive) // * mybook 2:6b274d49be97 // this code divides the name half from the revision half // it does not parse the * and (inactive) bits $colon_index = strrpos($line, ':'); $before_colon = substr($line, 0, $colon_index); $start_rev_index = strrpos($before_colon, ' '); $name = substr($line, 0, $start_rev_index); $rev = substr($line, $start_rev_index); return array(trim($name), trim($rev)); } public function getRemoteURI() { list($stdout) = $this->execxLocal('paths default'); $stdout = trim($stdout); if (strlen($stdout)) { return $stdout; } return null; } } diff --git a/src/repository/api/ArcanistRepositoryAPI.php b/src/repository/api/ArcanistRepositoryAPI.php index 10da4697..2c1956f4 100644 --- a/src/repository/api/ArcanistRepositoryAPI.php +++ b/src/repository/api/ArcanistRepositoryAPI.php @@ -1,660 +1,659 @@ diffLinesOfContext; } public function setDiffLinesOfContext($lines) { $this->diffLinesOfContext = $lines; return $this; } public function getWorkingCopyIdentity() { return $this->configurationManager->getWorkingCopyIdentity(); } public function getConfigurationManager() { return $this->configurationManager; } public static function newAPIFromConfigurationManager( ArcanistConfigurationManager $configuration_manager) { $working_copy = $configuration_manager->getWorkingCopyIdentity(); if (!$working_copy) { throw new Exception( pht( 'Trying to create a RepositoryAPI without a working copy!')); } $root = $working_copy->getProjectRoot(); switch ($working_copy->getVCSType()) { case 'svn': $api = new ArcanistSubversionAPI($root); break; case 'hg': $api = new ArcanistMercurialAPI($root); break; case 'git': $api = new ArcanistGitAPI($root); break; default: throw new Exception( pht( 'The current working directory is not part of a working copy for '. 'a supported version control system (Git, Subversion or '. 'Mercurial).')); } $api->configurationManager = $configuration_manager; return $api; } public function __construct($path) { $this->path = $path; } public function getPath($to_file = null) { if ($to_file !== null) { return $this->path.DIRECTORY_SEPARATOR. ltrim($to_file, DIRECTORY_SEPARATOR); } else { return $this->path.DIRECTORY_SEPARATOR; } } /* -( Path Status )-------------------------------------------------------- */ abstract protected function buildUncommittedStatus(); abstract protected function buildCommitRangeStatus(); /** * Get a list of uncommitted paths in the working copy that have been changed * or are affected by other status effects, like conflicts or untracked * files. * * Convenience methods @{method:getUntrackedChanges}, * @{method:getUnstagedChanges}, @{method:getUncommittedChanges}, * @{method:getMergeConflicts}, and @{method:getIncompleteChanges} allow * simpler selection of paths in a specific state. * * This method returns a map of paths to bitmasks with status, using * `FLAG_` constants. For example: * * array( * 'some/uncommitted/file.txt' => ArcanistRepositoryAPI::FLAG_UNSTAGED, * ); * * A file may be in several states. Not all states are possible with all * version control systems. * * @return map Map of paths, see above. * @task status */ final public function getUncommittedStatus() { if ($this->uncommittedStatusCache === null) { $status = $this->buildUncommittedStatus(); ksort($status); $this->uncommittedStatusCache = $status; } return $this->uncommittedStatusCache; } /** * @task status */ final public function getUntrackedChanges() { return $this->getUncommittedPathsWithMask(self::FLAG_UNTRACKED); } /** * @task status */ final public function getUnstagedChanges() { return $this->getUncommittedPathsWithMask(self::FLAG_UNSTAGED); } /** * @task status */ final public function getUncommittedChanges() { return $this->getUncommittedPathsWithMask(self::FLAG_UNCOMMITTED); } /** * @task status */ final public function getMergeConflicts() { return $this->getUncommittedPathsWithMask(self::FLAG_CONFLICT); } /** * @task status */ final public function getIncompleteChanges() { return $this->getUncommittedPathsWithMask(self::FLAG_INCOMPLETE); } /** * @task status */ final public function getMissingChanges() { return $this->getUncommittedPathsWithMask(self::FLAG_MISSING); } /** * @task status */ private function getUncommittedPathsWithMask($mask) { $match = array(); foreach ($this->getUncommittedStatus() as $path => $flags) { if ($flags & $mask) { $match[] = $path; } } return $match; } /** * Get a list of paths affected by the commits in the current commit range. * * See @{method:getUncommittedStatus} for a description of the return value. * * @return map Map from paths to status. * @task status */ final public function getCommitRangeStatus() { if ($this->commitRangeStatusCache === null) { $status = $this->buildCommitRangeStatus(); ksort($status); $this->commitRangeStatusCache = $status; } return $this->commitRangeStatusCache; } /** * Get a list of paths affected by commits in the current commit range, or * uncommitted changes in the working copy. See @{method:getUncommittedStatus} * or @{method:getCommitRangeStatus} to retreive smaller parts of the status. * * See @{method:getUncommittedStatus} for a description of the return value. * * @return map Map from paths to status. * @task status */ final public function getWorkingCopyStatus() { $range_status = $this->getCommitRangeStatus(); $uncommitted_status = $this->getUncommittedStatus(); $result = new PhutilArrayWithDefaultValue($range_status); foreach ($uncommitted_status as $path => $mask) { $result[$path] |= $mask; } $result = $result->toArray(); ksort($result); return $result; } /** * Drops caches after changes to the working copy. By default, some queries * against the working copy are cached. They * * @return this * @task status */ final public function reloadWorkingCopy() { $this->uncommittedStatusCache = null; $this->commitRangeStatusCache = null; $this->didReloadWorkingCopy(); $this->reloadCommitRange(); return $this; } /** * Hook for implementations to dirty working copy caches after the working * copy has been updated. * * @return this * @task status */ protected function didReloadWorkingCopy() { return; } /** * Fetches the original file data for each path provided. * * @return map Map from path to file data. */ public function getBulkOriginalFileData($paths) { $filedata = array(); foreach ($paths as $path) { $filedata[$path] = $this->getOriginalFileData($path); } return $filedata; } /** * Fetches the current file data for each path provided. * * @return map Map from path to file data. */ public function getBulkCurrentFileData($paths) { $filedata = array(); foreach ($paths as $path) { $filedata[$path] = $this->getCurrentFileData($path); } return $filedata; } /** * @return Traversable */ abstract public function getAllFiles(); abstract public function getBlame($path); abstract public function getRawDiffText($path); abstract public function getOriginalFileData($path); abstract public function getCurrentFileData($path); abstract public function getLocalCommitInformation(); abstract public function getSourceControlBaseRevision(); abstract public function getCanonicalRevisionName($string); abstract public function getBranchName(); abstract public function getSourceControlPath(); abstract public function isHistoryDefaultImmutable(); abstract public function supportsAmend(); abstract public function getWorkingCopyRevision(); abstract public function updateWorkingCopy(); abstract public function getMetadataPath(); abstract public function loadWorkingCopyDifferentialRevisions( ConduitClient $conduit, array $query); abstract public function getRemoteURI(); public function getUnderlyingWorkingCopyRevision() { return $this->getWorkingCopyRevision(); } public function getChangedFiles($since_commit) { throw new ArcanistCapabilityNotSupportedException($this); } public function getAuthor() { throw new ArcanistCapabilityNotSupportedException($this); } public function addToCommit(array $paths) { throw new ArcanistCapabilityNotSupportedException($this); } abstract public function supportsLocalCommits(); public function doCommit($message) { throw new ArcanistCapabilityNotSupportedException($this); } public function amendCommit($message = null) { throw new ArcanistCapabilityNotSupportedException($this); } public function getAllBranches() { // TODO: Implement for Mercurial/SVN and make abstract. return array(); } public function hasLocalCommit($commit) { throw new ArcanistCapabilityNotSupportedException($this); } public function getCommitMessage($commit) { throw new ArcanistCapabilityNotSupportedException($this); } public function getCommitSummary($commit) { throw new ArcanistCapabilityNotSupportedException($this); } public function getAllLocalChanges() { throw new ArcanistCapabilityNotSupportedException($this); } abstract public function supportsLocalBranchMerge(); public function performLocalBranchMerge($branch, $message) { throw new ArcanistCapabilityNotSupportedException($this); } public function getFinalizedRevisionMessage() { throw new ArcanistCapabilityNotSupportedException($this); } public function execxLocal($pattern /* , ... */) { $args = func_get_args(); return $this->buildLocalFuture($args)->resolvex(); } public function execManualLocal($pattern /* , ... */) { $args = func_get_args(); return $this->buildLocalFuture($args)->resolve(); } public function execFutureLocal($pattern /* , ... */) { $args = func_get_args(); return $this->buildLocalFuture($args); } abstract protected function buildLocalFuture(array $argv); public function canStashChanges() { return false; } public function stashChanges() { throw new ArcanistCapabilityNotSupportedException($this); } public function unstashChanges() { throw new ArcanistCapabilityNotSupportedException($this); } /* -( Scratch Files )------------------------------------------------------ */ /** * Try to read a scratch file, if it exists and is readable. * * @param string Scratch file name. * @return mixed String for file contents, or false for failure. * @task scratch */ public function readScratchFile($path) { $full_path = $this->getScratchFilePath($path); if (!$full_path) { return false; } if (!Filesystem::pathExists($full_path)) { return false; } try { $result = Filesystem::readFile($full_path); } catch (FilesystemException $ex) { return false; } return $result; } /** * Try to write a scratch file, if there's somewhere to put it and we can * write there. * * @param string Scratch file name to write. * @param string Data to write. * @return bool True on success, false on failure. * @task scratch */ public function writeScratchFile($path, $data) { $dir = $this->getScratchFilePath(''); if (!$dir) { return false; } if (!Filesystem::pathExists($dir)) { try { Filesystem::createDirectory($dir); } catch (Exception $ex) { return false; } } try { Filesystem::writeFile($this->getScratchFilePath($path), $data); } catch (FilesystemException $ex) { return false; } return true; } /** * Try to remove a scratch file. * * @param string Scratch file name to remove. * @return bool True if the file was removed successfully. * @task scratch */ public function removeScratchFile($path) { $full_path = $this->getScratchFilePath($path); if (!$full_path) { return false; } try { Filesystem::remove($full_path); } catch (FilesystemException $ex) { return false; } return true; } /** * Get a human-readable description of the scratch file location. * * @param string Scratch file name. * @return mixed String, or false on failure. * @task scratch */ public function getReadableScratchFilePath($path) { $full_path = $this->getScratchFilePath($path); if ($full_path) { return Filesystem::readablePath( $full_path, $this->getPath()); } else { return false; } } /** * Get the path to a scratch file, if possible. * * @param string Scratch file name. * @return mixed File path, or false on failure. * @task scratch */ public function getScratchFilePath($path) { $new_scratch_path = Filesystem::resolvePath( 'arc', $this->getMetadataPath()); static $checked = false; if (!$checked) { $checked = true; $old_scratch_path = $this->getPath('.arc'); // we only want to do the migration once // unfortunately, people have checked in .arc directories which // means that the old one may get recreated after we delete it if (Filesystem::pathExists($old_scratch_path) && !Filesystem::pathExists($new_scratch_path)) { Filesystem::createDirectory($new_scratch_path); $existing_files = Filesystem::listDirectory($old_scratch_path, true); foreach ($existing_files as $file) { $new_path = Filesystem::resolvePath($file, $new_scratch_path); $old_path = Filesystem::resolvePath($file, $old_scratch_path); Filesystem::writeFile( $new_path, Filesystem::readFile($old_path)); } Filesystem::remove($old_scratch_path); } } return Filesystem::resolvePath($path, $new_scratch_path); } /* -( Base Commits )------------------------------------------------------- */ abstract public function supportsCommitRanges(); final public function setBaseCommit($symbolic_commit) { if (!$this->supportsCommitRanges()) { throw new ArcanistCapabilityNotSupportedException($this); } $this->symbolicBaseCommit = $symbolic_commit; $this->reloadCommitRange(); return $this; } public function setHeadCommit($symbolic_commit) { throw new ArcanistCapabilityNotSupportedException($this); } final public function getBaseCommit() { if (!$this->supportsCommitRanges()) { throw new ArcanistCapabilityNotSupportedException($this); } if ($this->resolvedBaseCommit === null) { $commit = $this->buildBaseCommit($this->symbolicBaseCommit); $this->resolvedBaseCommit = $commit; } return $this->resolvedBaseCommit; } public function getHeadCommit() { throw new ArcanistCapabilityNotSupportedException($this); } final public function reloadCommitRange() { $this->resolvedBaseCommit = null; $this->baseCommitExplanation = null; $this->didReloadCommitRange(); return $this; } protected function didReloadCommitRange() { return; } protected function buildBaseCommit($symbolic_commit) { throw new ArcanistCapabilityNotSupportedException($this); } public function getBaseCommitExplanation() { return $this->baseCommitExplanation; } public function setBaseCommitExplanation($explanation) { $this->baseCommitExplanation = $explanation; return $this; } public function resolveBaseCommitRule($rule, $source) { return null; } public function setBaseCommitArgumentRules($base_commit_argument_rules) { $this->baseCommitArgumentRules = $base_commit_argument_rules; return $this; } public function getBaseCommitArgumentRules() { return $this->baseCommitArgumentRules; } public function resolveBaseCommit() { $base_commit_rules = array( 'runtime' => $this->getBaseCommitArgumentRules(), 'local' => '', 'project' => '', 'user' => '', 'system' => '', ); $all_sources = $this->configurationManager->getConfigFromAllSources('base'); $base_commit_rules = $all_sources + $base_commit_rules; $parser = new ArcanistBaseCommitParser($this); $commit = $parser->resolveBaseCommit($base_commit_rules); return $commit; } public function getRepositoryUUID() { return null; } } diff --git a/src/repository/api/ArcanistSubversionAPI.php b/src/repository/api/ArcanistSubversionAPI.php index 1d2157e6..a4114bd0 100644 --- a/src/repository/api/ArcanistSubversionAPI.php +++ b/src/repository/api/ArcanistSubversionAPI.php @@ -1,695 +1,692 @@ getPath()) as $parent) { $possible_svn_dir = Filesystem::resolvePath('.svn', $parent); if (Filesystem::pathExists($possible_svn_dir)) { $svn_dir = $possible_svn_dir; break; } } } return $svn_dir; } protected function buildLocalFuture(array $argv) { $argv[0] = 'svn '.$argv[0]; $future = newv('ExecFuture', $argv); $future->setCWD($this->getPath()); return $future; } protected function buildCommitRangeStatus() { // In SVN, the commit range is always "uncommitted changes", so these // statuses are equivalent. return $this->getUncommittedStatus(); } protected function buildUncommittedStatus() { return $this->getSVNStatus(); } public function getSVNBaseRevisions() { if ($this->svnBaseRevisions === null) { $this->getSVNStatus(); } return $this->svnBaseRevisions; } public function limitStatusToPaths(array $paths) { $this->statusPaths = $paths; return $this; } public function getSVNStatus($with_externals = false) { if ($this->svnStatus === null) { if ($this->statusPaths) { list($status) = $this->execxLocal( '--xml status %Ls', $this->statusPaths); } else { list($status) = $this->execxLocal('--xml status'); } $xml = new SimpleXMLElement($status); $externals = array(); $files = array(); foreach ($xml->target as $target) { $this->svnBaseRevisions = array(); foreach ($target->entry as $entry) { $path = (string)$entry['path']; // On Windows, we get paths with backslash directory separators here. // Normalize them to the format everything else expects and generates. if (phutil_is_windows()) { $path = str_replace(DIRECTORY_SEPARATOR, '/', $path); } $mask = 0; $props = (string)($entry->{'wc-status'}[0]['props']); $item = (string)($entry->{'wc-status'}[0]['item']); $base = (string)($entry->{'wc-status'}[0]['revision']); $this->svnBaseRevisions[$path] = $base; switch ($props) { case 'none': case 'normal': break; case 'modified': $mask |= self::FLAG_MODIFIED; break; default: throw new Exception("Unrecognized property status '{$props}'."); } $mask |= $this->parseSVNStatus($item); if ($item == 'external') { $externals[] = $path; } // This is new in or around Subversion 1.6. $tree_conflicts = ($entry->{'wc-status'}[0]['tree-conflicted']); if ((string)$tree_conflicts) { $mask |= self::FLAG_CONFLICT; } $files[$path] = $mask; } } foreach ($files as $path => $mask) { foreach ($externals as $external) { if (!strncmp($path.'/', $external.'/', strlen($external) + 1)) { $files[$path] |= self::FLAG_EXTERNALS; } } } $this->svnStatus = $files; } $status = $this->svnStatus; if (!$with_externals) { foreach ($status as $path => $mask) { if ($mask & ArcanistRepositoryAPI::FLAG_EXTERNALS) { unset($status[$path]); } } } return $status; } private function parseSVNStatus($item) { switch ($item) { case 'none': // We can get 'none' for property changes on a directory. case 'normal': return 0; case 'external': return self::FLAG_EXTERNALS; case 'unversioned': return self::FLAG_UNTRACKED; case 'obstructed': return self::FLAG_OBSTRUCTED; case 'missing': return self::FLAG_MISSING; case 'added': return self::FLAG_ADDED; case 'replaced': // This is the result of "svn rm"-ing a file, putting another one // in place of it, and then "svn add"-ing the new file. Just treat // this as equivalent to "modified". return self::FLAG_MODIFIED; case 'modified': return self::FLAG_MODIFIED; case 'deleted': return self::FLAG_DELETED; case 'conflicted': return self::FLAG_CONFLICT; case 'incomplete': return self::FLAG_INCOMPLETE; default: throw new Exception("Unrecognized item status '{$item}'."); } } public function addToCommit(array $paths) { $add = array_filter($paths, 'Filesystem::pathExists'); if ($add) { $this->execxLocal( 'add -- %Ls', $add); } if ($add != $paths) { $this->execxLocal( 'delete -- %Ls', array_diff($paths, $add)); } $this->svnStatus = null; } public function getSVNProperty($path, $property) { list($stdout) = execx( 'svn propget %s %s@', $property, $this->getPath($path)); return trim($stdout); } public function getSourceControlPath() { return idx($this->getSVNInfo('/'), 'URL'); } public function getSourceControlBaseRevision() { $info = $this->getSVNInfo('/'); return $info['URL'].'@'.$this->getSVNBaseRevisionNumber(); } public function getCanonicalRevisionName($string) { throw new ArcanistCapabilityNotSupportedException($this); } public function getSVNBaseRevisionNumber() { if ($this->svnBaseRevisionNumber) { return $this->svnBaseRevisionNumber; } $info = $this->getSVNInfo('/'); return $info['Revision']; } public function overrideSVNBaseRevisionNumber($effective_base_revision) { $this->svnBaseRevisionNumber = $effective_base_revision; return $this; } public function getBranchName() { $info = $this->getSVNInfo('/'); $repo_root = idx($info, 'Repository Root'); $repo_root_length = strlen($repo_root); $url = idx($info, 'URL'); if (substr($url, 0, $repo_root_length) == $repo_root) { return substr($url, $repo_root_length); } return 'svn'; } public function getRemoteURI() { return idx($this->getSVNInfo('/'), 'Repository Root'); } public function buildInfoFuture($path) { if ($path == '/') { // When the root of a working copy is referenced by a symlink and you // execute 'svn info' on that symlink, svn fails. This is a longstanding // bug in svn: // // See http://subversion.tigris.org/issues/show_bug.cgi?id=2305 // // To reproduce, do: // // $ ln -s working_copy working_link // $ svn info working_copy # ok // $ svn info working_link # fails // // Work around this by cd-ing into the directory before executing // 'svn info'. return $this->buildLocalFuture(array('info .')); } else { // Note: here and elsewhere we need to append "@" to the path because if // a file has a literal "@" in it, everything after that will be // interpreted as a revision. By appending "@" with no argument, SVN // parses it properly. return $this->buildLocalFuture(array('info %s@', $this->getPath($path))); } } public function buildDiffFuture($path) { // The "--depth empty" flag prevents us from picking up changes in // children when we run 'diff' against a directory. Specifically, when a // user has added or modified some directory "example/", we want to return // ONLY changes to that directory when given it as a path. If we run // without "--depth empty", svn will give us changes to the directory // itself (such as property changes) and also give us changes to any // files within the directory (basically, implicit recursion). We don't // want that, so prevent recursive diffing. $root = phutil_get_library_root('arcanist'); if (phutil_is_windows()) { // TODO: Provide a binary_safe_diff script for Windows. // TODO: Provide a diff command which can take lines of context somehow. return $this->buildLocalFuture( array( 'diff --depth empty %s', $path, )); } else { $diff_bin = $root.'/../scripts/repository/binary_safe_diff.sh'; $diff_cmd = Filesystem::resolvePath($diff_bin); return $this->buildLocalFuture( array( 'diff --depth empty --diff-cmd %s -x -U%d %s', $diff_cmd, $this->getDiffLinesOfContext(), $path, )); } } public function primeSVNInfoResult($path, $result) { $this->svnInfoRaw[$path] = $result; return $this; } public function primeSVNDiffResult($path, $result) { $this->svnDiffRaw[$path] = $result; return $this; } public function getSVNInfo($path) { - if (empty($this->svnInfo[$path])) { if (empty($this->svnInfoRaw[$path])) { $this->svnInfoRaw[$path] = $this->buildInfoFuture($path)->resolve(); } list($err, $stdout) = $this->svnInfoRaw[$path]; if ($err) { throw new Exception( "Error #{$err} executing svn info against '{$path}'."); } // TODO: Hack for Windows. $stdout = str_replace("\r\n", "\n", $stdout); $patterns = array( '/^(URL): (\S+)$/m', '/^(Revision): (\d+)$/m', '/^(Last Changed Author): (\S+)$/m', '/^(Last Changed Rev): (\d+)$/m', '/^(Last Changed Date): (.+) \(.+\)$/m', '/^(Copied From URL): (\S+)$/m', '/^(Copied From Rev): (\d+)$/m', '/^(Repository Root): (\S+)$/m', '/^(Repository UUID): (\S+)$/m', '/^(Node Kind): (\S+)$/m', ); $result = array(); foreach ($patterns as $pattern) { $matches = null; if (preg_match($pattern, $stdout, $matches)) { $result[$matches[1]] = $matches[2]; } } if (isset($result['Last Changed Date'])) { $result['Last Changed Date'] = strtotime($result['Last Changed Date']); } if (empty($result)) { throw new Exception('Unable to parse SVN info.'); } $this->svnInfo[$path] = $result; } return $this->svnInfo[$path]; } public function getRawDiffText($path) { $status = $this->getSVNStatus(); if (!isset($status[$path])) { return null; } $status = $status[$path]; // Build meaningful diff text for "svn copy" operations. if ($status & ArcanistRepositoryAPI::FLAG_ADDED) { $info = $this->getSVNInfo($path); if (!empty($info['Copied From URL'])) { return $this->buildSyntheticAdditionDiff( $path, $info['Copied From URL'], $info['Copied From Rev']); } } // If we run "diff" on a binary file which doesn't have the "svn:mime-type" // of "application/octet-stream", `diff' will explode in a rain of // unhelpful hellfire as it tries to build a textual diff of the two // files. We just fix this inline since it's pretty unambiguous. // TODO: Move this to configuration? $matches = null; if (preg_match('/\.(gif|png|jpe?g|swf|pdf|ico)$/i', $path, $matches)) { // Check if the file is deleted first; SVN will complain if we try to // get properties of a deleted file. if ($status & ArcanistRepositoryAPI::FLAG_DELETED) { return <<getSVNProperty($path, 'svn:mime-type'); if ($mime != 'application/octet-stream') { execx( 'svn propset svn:mime-type application/octet-stream %s', self::escapeFileNameForSVN($this->getPath($path))); } } if (empty($this->svnDiffRaw[$path])) { $this->svnDiffRaw[$path] = $this->buildDiffFuture($path)->resolve(); } list($err, $stdout, $stderr) = $this->svnDiffRaw[$path]; // Note: GNU Diff returns 2 when SVN hands it binary files to diff and they // differ. This is not an error; it is documented behavior. But SVN isn't // happy about it. SVN will exit with code 1 and return the string below. if ($err != 0 && $stderr !== "svn: 'diff' returned 2\n") { throw new Exception( "svn diff returned unexpected error code: $err\n". "stdout: $stdout\n". "stderr: $stderr"); } if ($err == 0 && empty($stdout)) { // If there are no changes, 'diff' exits with no output, but that means // we can not distinguish between empty and unmodified files. Build a // synthetic "diff" without any changes in it. return $this->buildSyntheticUnchangedDiff($path); } return $stdout; } protected function buildSyntheticAdditionDiff($path, $source, $rev) { $type = $this->getSVNProperty($path, 'svn:mime-type'); if ($type == 'application/octet-stream') { return <<getPath($path))) { return null; } $data = Filesystem::readFile($this->getPath($path)); list($orig) = execx('svn cat %s@%s', $source, $rev); $src = new TempFile(); $dst = new TempFile(); Filesystem::writeFile($src, $orig); Filesystem::writeFile($dst, $data); list($err, $diff) = exec_manual( 'diff -L a/%s -L b/%s -U%d %s %s', str_replace($this->getSourceControlPath().'/', '', $source), $path, $this->getDiffLinesOfContext(), $src, $dst); if ($err == 1) { // 1 means there are differences. return <<buildSyntheticUnchangedDiff($path); } } protected function buildSyntheticUnchangedDiff($path) { $full_path = $this->getPath($path); if (is_dir($full_path)) { return null; } if (!file_exists($full_path)) { return null; } $data = Filesystem::readFile($full_path); $lines = explode("\n", $data); $len = count($lines); foreach ($lines as $key => $line) { $lines[$key] = ' '.$line; } $lines = implode("\n", $lines); return <<buildLocalFuture(array('list -R')); return new PhutilCallbackFilterIterator( new LinesOfALargeExecFuture($future), array($this, 'filterFiles')); } public function getChangedFiles($since_commit) { $url = ''; $match = null; if (preg_match('/(.*)@(.*)/', $since_commit, $match)) { list(, $url, $since_commit) = $match; } // TODO: Handle paths with newlines. list($stdout) = $this->execxLocal( '--xml diff --revision %s:HEAD --summarize %s', $since_commit, $url); $xml = new SimpleXMLElement($stdout); $return = array(); foreach ($xml->paths[0]->path as $path) { $return[(string)$path] = $this->parseSVNStatus($path['item']); } return $return; } public function filterFiles($path) { // NOTE: SVN uses '/' also on Windows. if ($path == '' || substr($path, -1) == '/') { return null; } return $path; } public function getBlame($path) { $blame = array(); list($stdout) = $this->execxLocal('blame %s', $path); $stdout = trim($stdout); if (!strlen($stdout)) { // Empty file. return $blame; } foreach (explode("\n", $stdout) as $line) { $m = array(); if (!preg_match('/^\s*(\d+)\s+(\S+)/', $line, $m)) { throw new Exception("Bad blame? `{$line}'"); } $revision = $m[1]; $author = $m[2]; $blame[] = array($author, $revision); } return $blame; } public function getOriginalFileData($path) { // SVN issues warnings for nonexistent paths, directories, etc., but still // returns no error code. However, for new paths in the working copy it // fails. Assume that failure means the original file does not exist. list($err, $stdout) = $this->execManualLocal('cat %s@', $path); if ($err) { return null; } return $stdout; } public function getCurrentFileData($path) { $full_path = $this->getPath($path); if (Filesystem::pathExists($full_path)) { return Filesystem::readFile($full_path); } return null; } public function getRepositoryUUID() { $info = $this->getSVNInfo('/'); return $info['Repository UUID']; } public function getLocalCommitInformation() { return null; } public function isHistoryDefaultImmutable() { return true; } public function supportsAmend() { return false; } public function supportsCommitRanges() { return false; } public function supportsLocalCommits() { return false; } public function hasLocalCommit($commit) { return false; } public function getWorkingCopyRevision() { return $this->getSourceControlBaseRevision(); } public function supportsLocalBranchMerge() { return false; } public function getFinalizedRevisionMessage() { // In other VCSes we give push instructions here, but it never makes sense // in SVN. return 'Done.'; } public function loadWorkingCopyDifferentialRevisions( ConduitClient $conduit, array $query) { // We don't have much to go on in SVN, look for revisions that came from // this directory and belong to the same project. $project = $this->getWorkingCopyIdentity()->getProjectID(); if (!$project) { return array(); } $results = $conduit->callMethodSynchronous( 'differential.query', $query + array( 'arcanistProjects' => array($project), )); foreach ($results as $key => $result) { if ($result['sourcePath'] != $this->getPath()) { unset($results[$key]); } } foreach ($results as $key => $result) { $results[$key]['why'] = 'Matching arcanist project name and working copy directory path.'; } return $results; } public function updateWorkingCopy() { $this->execxLocal('up'); } public static function escapeFileNamesForSVN(array $files) { foreach ($files as $k => $file) { $files[$k] = self::escapeFileNameForSVN($file); } return $files; } public static function escapeFileNameForSVN($file) { // SVN interprets "x@1" as meaning "file x at revision 1", which is not // intended for files named "sprite@2x.png" or similar. For files with an // "@" in their names, escape them by adding "@" at the end, which SVN // interprets as "at the working copy revision". There is a special case // where ".@" means "fail with an error" instead of ". at the working copy // revision", so avoid escaping "." into ".@". if (strpos($file, '@') !== false) { $file = $file.'@'; } return $file; } } diff --git a/src/repository/api/__tests__/ArcanistRepositoryAPIStateTestCase.php b/src/repository/api/__tests__/ArcanistRepositoryAPIStateTestCase.php index d349d5e5..ed13af09 100644 --- a/src/repository/api/__tests__/ArcanistRepositoryAPIStateTestCase.php +++ b/src/repository/api/__tests__/ArcanistRepositoryAPIStateTestCase.php @@ -1,124 +1,123 @@ parseState('git_basic.git.tgz'); } else { $this->assertSkipped('Git is not installed'); } } public function testHgStateParsing() { if (Filesystem::binaryExists('hg')) { $this->parseState('hg_basic.hg.tgz'); } else { $this->assertSkipped('Mercurial is not installed'); } } public function testSvnStateParsing() { if (Filesystem::binaryExists('svn')) { $this->parseState('svn_basic.svn.tgz'); } else { $this->assertSkipped('Subversion is not installed'); } } private function parseState($test) { $dir = dirname(__FILE__).'/state/'; $fixture = PhutilDirectoryFixture::newFromArchive($dir.'/'.$test); $fixture_path = $fixture->getPath(); $working_copy = ArcanistWorkingCopyIdentity::newFromPath($fixture_path); $configuration_manager = new ArcanistConfigurationManager(); $configuration_manager->setWorkingCopyIdentity($working_copy); $api = ArcanistRepositoryAPI::newAPIFromConfigurationManager( $configuration_manager); $api->setBaseCommitArgumentRules('arc:this'); if ($api instanceof ArcanistSubversionAPI) { // Upgrade the repository so that the test will still pass if the local // `svn` is newer than the `svn` which created the repository. // NOTE: Some versions of Subversion (1.7.x?) exit with an error code on // a no-op upgrade, although newer versions do not. We just ignore the // error here; if it's because of an actual problem we'll hit an error // shortly anyway. $api->execManualLocal('upgrade'); } $this->assertCorrectState($test, $api); } private function assertCorrectState($test, ArcanistRepositoryAPI $api) { $f_mod = ArcanistRepositoryAPI::FLAG_MODIFIED; $f_add = ArcanistRepositoryAPI::FLAG_ADDED; $f_del = ArcanistRepositoryAPI::FLAG_DELETED; $f_unt = ArcanistRepositoryAPI::FLAG_UNTRACKED; $f_con = ArcanistRepositoryAPI::FLAG_CONFLICT; $f_mis = ArcanistRepositoryAPI::FLAG_MISSING; $f_uns = ArcanistRepositoryAPI::FLAG_UNSTAGED; $f_unc = ArcanistRepositoryAPI::FLAG_UNCOMMITTED; $f_ext = ArcanistRepositoryAPI::FLAG_EXTERNALS; $f_obs = ArcanistRepositoryAPI::FLAG_OBSTRUCTED; $f_inc = ArcanistRepositoryAPI::FLAG_INCOMPLETE; switch ($test) { case 'svn_basic.svn.tgz': $expect = array( 'ADDED' => $f_add, 'COPIED_TO' => $f_add, 'DELETED' => $f_del, 'MODIFIED' => $f_mod, 'MOVED' => $f_del, 'MOVED_TO' => $f_add, 'PROPCHANGE' => $f_mod, 'UNTRACKED' => $f_unt, ); $this->assertEqual($expect, $api->getUncommittedStatus()); $this->assertEqual($expect, $api->getCommitRangeStatus()); break; case 'git_basic.git.tgz': $expect_uncommitted = array( 'UNCOMMITTED' => $f_add | $f_unc, 'UNSTAGED' => $f_mod | $f_uns | $f_unc, 'UNTRACKED' => $f_unt, ); $this->assertEqual($expect_uncommitted, $api->getUncommittedStatus()); $expect_range = array( 'ADDED' => $f_add, 'DELETED' => $f_del, 'MODIFIED' => $f_mod, 'UNCOMMITTED' => $f_add, 'UNSTAGED' => $f_add, ); $this->assertEqual($expect_range, $api->getCommitRangeStatus()); break; case 'hg_basic.hg.tgz': $expect_uncommitted = array( 'UNCOMMITTED' => $f_mod | $f_unc, 'UNTRACKED' => $f_unt, ); $this->assertEqual($expect_uncommitted, $api->getUncommittedStatus()); $expect_range = array( 'ADDED' => $f_add, 'DELETED' => $f_del, 'MODIFIED' => $f_mod, 'UNCOMMITTED' => $f_add, ); $this->assertEqual($expect_range, $api->getCommitRangeStatus()); break; default: throw new Exception( "No test cases for working copy '{$test}'!"); } } - } diff --git a/src/repository/hookapi/ArcanistHookAPI.php b/src/repository/hookapi/ArcanistHookAPI.php index 7c461a31..2c9a217f 100644 --- a/src/repository/hookapi/ArcanistHookAPI.php +++ b/src/repository/hookapi/ArcanistHookAPI.php @@ -1,9 +1,9 @@ root = $root; $this->transaction = $transaction; $this->repository = $repository; } public function getCurrentFileData($path) { list($err, $file) = exec_manual( 'svnlook cat --transaction %s %s %s', $this->transaction, $this->repository, $path); return ($err? null : $file); } public function getUpstreamFileData($path) { list($err, $file) = exec_manual( 'svnlook cat %s %s', $this->repository, $this->root."/$path"); return ($err ? null : $file); } + } diff --git a/src/repository/parser/ArcanistMercurialParser.php b/src/repository/parser/ArcanistMercurialParser.php index 62df7789..d5d2bf13 100644 --- a/src/repository/parser/ArcanistMercurialParser.php +++ b/src/repository/parser/ArcanistMercurialParser.php @@ -1,226 +1,225 @@ $flags, 'from' => null, ); $last_path = $path; } return $result; } /** * Parse the output of "hg status". This provides only basic information, you * can get more detailed information by invoking * @{method:parseMercurialStatusDetails}. * * @param string The stdout from running an "hg status" command. * @return dict Map of paths to ArcanistRepositoryAPI status flags. * @task parse */ public static function parseMercurialStatus($stdout) { $result = self::parseMercurialStatusDetails($stdout); return ipull($result, 'flags'); } /** * Parse the output of "hg log". This also parses "hg outgoing", "hg parents", * and other similar commands. This assumes "--style default". * * @param string The stdout from running an "hg log" command. * @return list List of dictionaries with commit information. * @task parse */ public static function parseMercurialLog($stdout) { $result = array(); $stdout = trim($stdout); if (!strlen($stdout)) { return $result; } $chunks = explode("\n\n", $stdout); foreach ($chunks as $chunk) { $commit = array(); $lines = explode("\n", $chunk); foreach ($lines as $line) { if (preg_match('/^(comparing with|searching for changes)/', $line)) { // These are sent to stdout when you run "hg outgoing" although the // format is otherwise identical to "hg log". continue; } if (preg_match('/^remote:/', $line)) { // This indicates remote error in "hg outgoing". continue; } list($name, $value) = explode(':', $line, 2); $value = trim($value); switch ($name) { case 'user': $commit['user'] = $value; break; case 'date': $commit['date'] = strtotime($value); break; case 'summary': $commit['summary'] = $value; break; case 'changeset': list($local, $rev) = explode(':', $value, 2); $commit['local'] = $local; $commit['rev'] = $rev; break; case 'parent': if (empty($commit['parents'])) { $commit['parents'] = array(); } list($local, $rev) = explode(':', $value, 2); $commit['parents'][] = array( 'local' => $local, 'rev' => $rev, ); break; case 'branch': $commit['branch'] = $value; break; case 'tag': $commit['tag'] = $value; break; case 'bookmark': $commit['bookmark'] = $value; break; default: throw new Exception("Unknown Mercurial log field '{$name}'!"); } } $result[] = $commit; } return $result; } /** * Parse the output of "hg branches". * * @param string The stdout from running an "hg branches" command. * @return list A list of dictionaries with branch information. * @task parse */ public static function parseMercurialBranches($stdout) { $stdout = rtrim($stdout, "\n"); if (!strlen($stdout)) { // No branches; commonly, this occurs in a newly initialized repository. return array(); } $lines = explode("\n", $stdout); $branches = array(); foreach ($lines as $line) { $matches = null; // Output of "hg branches" normally looks like: // // default 15101:a21ccf4412d5 // // ...but may also have human-readable cues like: // // stable 15095:ec222a29bdf0 (inactive) // // See the unit tests for more examples. $regexp = '/^(\S+(?:\s+\S+)*)\s+(\d+):([a-f0-9]+)(\s+\\(inactive\\))?$/'; if (!preg_match($regexp, $line, $matches)) { throw new Exception("Failed to parse 'hg branches' output: {$line}"); } $branches[$matches[1]] = array( 'local' => $matches[2], 'rev' => $matches[3], ); } return $branches; } } diff --git a/src/repository/parser/__tests__/ArcanistMercurialParserTestCase.php b/src/repository/parser/__tests__/ArcanistMercurialParserTestCase.php index 8e199669..2129448d 100644 --- a/src/repository/parser/__tests__/ArcanistMercurialParserTestCase.php +++ b/src/repository/parser/__tests__/ArcanistMercurialParserTestCase.php @@ -1,88 +1,89 @@ parseData( basename($file), Filesystem::readFile($root.'/'.$file)); } } private function parseData($name, $data) { switch ($name) { case 'branches-basic.txt': $output = ArcanistMercurialParser::parseMercurialBranches($data); $this->assertEqual( array('default', 'stable'), array_keys($output)); $this->assertEqual( array('a21ccf4412d5', 'ec222a29bdf0'), array_values(ipull($output, 'rev'))); break; case 'branches-with-spaces.txt': $output = ArcanistMercurialParser::parseMercurialBranches($data); $this->assertEqual( array( 'm m m m m 2:ffffffffffff (inactive)', 'xxx yyy zzz', 'default', "'", ), array_keys($output)); $this->assertEqual( array('0b9d8290c4e0', '78963faacfc7', '5db03c5500c6', 'ffffffffffff'), array_values(ipull($output, 'rev'))); break; case 'branches-empty.txt': $output = ArcanistMercurialParser::parseMercurialBranches($data); $this->assertEqual(array(), $output); break; case 'log-basic.txt': $output = ArcanistMercurialParser::parseMercurialLog($data); $this->assertEqual( 3, count($output)); $this->assertEqual( array('a21ccf4412d5', 'a051f8a6a7cc', 'b1f49efeab65'), array_values(ipull($output, 'rev'))); break; case 'log-empty.txt': // Empty logs (e.g., "hg parents" for a root revision) should parse // correctly. $output = ArcanistMercurialParser::parseMercurialLog($data); $this->assertEqual( array(), $output); break; case 'status-basic.txt': $output = ArcanistMercurialParser::parseMercurialStatus($data); $this->assertEqual( 4, count($output)); $this->assertEqual( array('changed', 'added', 'removed', 'untracked'), array_keys($output)); break; case 'status-moves.txt': $output = ArcanistMercurialParser::parseMercurialStatusDetails($data); $this->assertEqual( 'move_source', $output['moved_file']['from']); $this->assertEqual( null, $output['changed_file']['from']); $this->assertEqual( 'copy_source', $output['copied_file']['from']); $this->assertEqual( null, idx($output, 'copy_source')); break; default: throw new Exception("No test information for test data '{$name}'!"); } } + } diff --git a/src/unit/ArcanistUnitTestResult.php b/src/unit/ArcanistUnitTestResult.php index a1cd34a8..9628b591 100644 --- a/src/unit/ArcanistUnitTestResult.php +++ b/src/unit/ArcanistUnitTestResult.php @@ -1,137 +1,135 @@ namespace = $namespace; return $this; } public function getNamespace() { return $this->namespace; } public function setName($name) { $this->name = $name; return $this; } public function getName() { return $this->name; } public function setLink($link) { $this->link = $link; return $this; } public function getLink() { return $this->link; } public function setResult($result) { $this->result = $result; return $this; } public function getResult() { return $this->result; } public function setDuration($duration) { $this->duration = $duration; return $this; } public function getDuration() { return $this->duration; } public function setUserData($user_data) { $this->userData = $user_data; return $this; } public function getUserData() { return $this->userData; } /** - * "extra data" allows an implementation to store additional - * key/value metadata along with the result of the test run. + * "extra data" allows an implementation to store additional key/value + * metadata along with the result of the test run. */ public function setExtraData(array $extra_data = null) { $this->extraData = $extra_data; return $this; } public function getExtraData() { return $this->extraData; } public function setCoverage($coverage) { $this->coverage = $coverage; return $this; } public function getCoverage() { return $this->coverage; } /** * Merge several coverage reports into a comprehensive coverage report. * * @param list List of coverage report strings. * @return string Cumulative coverage report. */ public static function mergeCoverage(array $coverage) { if (empty($coverage)) { return null; } $base = reset($coverage); foreach ($coverage as $more_coverage) { $len = min(strlen($base), strlen($more_coverage)); for ($ii = 0; $ii < $len; $ii++) { if ($more_coverage[$ii] == 'C') { $base[$ii] = 'C'; } } } return $base; } public function toDictionary() { return array( 'name' => $this->getName(), 'link' => $this->getLink(), 'result' => $this->getResult(), 'duration' => $this->getDuration(), 'extra' => $this->getExtraData(), 'userData' => $this->getUserData(), 'coverage' => $this->getCoverage(), ); } } diff --git a/src/unit/engine/ArcanistBaseTestResultParser.php b/src/unit/engine/ArcanistBaseTestResultParser.php index d10fe123..09233f90 100644 --- a/src/unit/engine/ArcanistBaseTestResultParser.php +++ b/src/unit/engine/ArcanistBaseTestResultParser.php @@ -1,52 +1,49 @@ enableCoverage = $enable_coverage; - return $this; } public function setProjectRoot($project_root) { $this->projectRoot = $project_root; - return $this; } public function setCoverageFile($coverage_file) { $this->coverageFile = $coverage_file; - return $this; } public function setAffectedTests($affected_tests) { $this->affectedTests = $affected_tests; - return $this; } public function setStderr($stderr) { $this->stderr = $stderr; return $this; } /** * Parse test results from provided input and return an array * of ArcanistUnitTestResult * * @param string $path Path to test * @param string $test_results String containing test results * * @return array ArcanistUnitTestResult */ abstract public function parseTestResults($path, $test_results); + } diff --git a/src/unit/engine/ArcanistBaseUnitTestEngine.php b/src/unit/engine/ArcanistBaseUnitTestEngine.php index 9f3ae5e2..be31b25e 100644 --- a/src/unit/engine/ArcanistBaseUnitTestEngine.php +++ b/src/unit/engine/ArcanistBaseUnitTestEngine.php @@ -1,116 +1,115 @@ supportsRunAllTests() && $run_all_tests) { $class = get_class($this); throw new Exception( "Engine '{$class}' does not support --everything."); } $this->runAllTests = $run_all_tests; return $this; } public function getRunAllTests() { return $this->runAllTests; } protected function supportsRunAllTests() { return false; } final public function __construct() { } public function setConfigurationManager( ArcanistConfigurationManager $configuration_manager) { $this->configurationManager = $configuration_manager; return $this; } public function getConfigurationManager() { return $this->configurationManager; } final public function setWorkingCopy( ArcanistWorkingCopyIdentity $working_copy) { $this->workingCopy = $working_copy; return $this; } final public function getWorkingCopy() { return $this->workingCopy; } final public function setPaths(array $paths) { $this->paths = $paths; return $this; } final public function getPaths() { return $this->paths; } final public function setArguments(array $arguments) { $this->arguments = $arguments; return $this; } final public function getArgument($key, $default = null) { return idx($this->arguments, $key, $default); } final public function setEnableAsyncTests($enable_async_tests) { $this->enableAsyncTests = $enable_async_tests; return $this; } final public function getEnableAsyncTests() { return $this->enableAsyncTests; } final public function setEnableCoverage($enable_coverage) { $this->enableCoverage = $enable_coverage; return $this; } final public function getEnableCoverage() { return $this->enableCoverage; } public function setRenderer(ArcanistUnitRenderer $renderer) { $this->renderer = $renderer; return $this; } abstract public function run(); /** - * Modify the return value of this function in the child class, if - * you do not need to echo the test results after all the tests have - * been run. This is the case for example when the child class - * prints the tests results while the tests are running. + * Modify the return value of this function in the child class, if you do + * not need to echo the test results after all the tests have been run. This + * is the case for example when the child class prints the tests results + * while the tests are running. */ public function shouldEchoTestResults() { return true; } + } diff --git a/src/unit/engine/ArcanistXUnitTestResultParser.php b/src/unit/engine/ArcanistXUnitTestResultParser.php index 4c75b7e6..d8649172 100644 --- a/src/unit/engine/ArcanistXUnitTestResultParser.php +++ b/src/unit/engine/ArcanistXUnitTestResultParser.php @@ -1,96 +1,93 @@ loadXML($test_results); if (!$load_success) { $input_start = phutil_utf8_shorten($test_results, 150); throw new Exception( "Failed to load XUnit report; Input starts with:\n\n {$input_start}"); } $results = array(); $testcases = $xunit_dom->getElementsByTagName('testcase'); foreach ($testcases as $testcase) { $classname = $testcase->getAttribute('classname'); $name = $testcase->getAttribute('name'); $time = $testcase->getAttribute('time'); $status = ArcanistUnitTestResult::RESULT_PASS; $user_data = ''; // A skipped test is a test which was ignored using framework - // mechanizms (e.g. @skip decorator) + // mechanisms (e.g. @skip decorator) $skipped = $testcase->getElementsByTagName('skipped'); if ($skipped->length > 0) { $status = ArcanistUnitTestResult::RESULT_SKIP; $messages = array(); for ($ii = 0; $ii < $skipped->length; $ii++) { $messages[] = trim($skipped->item($ii)->nodeValue, " \n"); } $user_data .= implode("\n", $messages); } // Failure is a test which the code has explicitly failed by using - // the mechanizms for that purpose. e.g., via an assertEquals + // the mechanisms for that purpose. e.g., via an assertEquals $failures = $testcase->getElementsByTagName('failure'); if ($failures->length > 0) { $status = ArcanistUnitTestResult::RESULT_FAIL; $messages = array(); for ($ii = 0; $ii < $failures->length; $ii++) { $messages[] = trim($failures->item($ii)->nodeValue, " \n"); } $user_data .= implode("\n", $messages)."\n"; } // An errored test is one that had an unanticipated problem. e.g., an - // unchecked throwable, or a problem with an implementation of the - // test. + // unchecked throwable, or a problem with an implementation of the test. $errors = $testcase->getElementsByTagName('error'); if ($errors->length > 0) { $status = ArcanistUnitTestResult::RESULT_BROKEN; $messages = array(); for ($ii = 0; $ii < $errors->length; $ii++) { $messages[] = trim($errors->item($ii)->nodeValue, " \n"); } $user_data .= implode("\n", $messages)."\n"; } $result = new ArcanistUnitTestResult(); $result->setName($classname.'.'.$name); $result->setResult($status); $result->setDuration($time); $result->setUserData($user_data); $results[] = $result; } return $results; } } diff --git a/src/unit/engine/CSharpToolsTestEngine.php b/src/unit/engine/CSharpToolsTestEngine.php index f39e9796..aec00c73 100644 --- a/src/unit/engine/CSharpToolsTestEngine.php +++ b/src/unit/engine/CSharpToolsTestEngine.php @@ -1,286 +1,280 @@ getConfigurationManager(); - $this->cscoverHintPath = - $config->getConfigFromAnySource('unit.csharp.cscover.binary'); - $this->matchRegex = - $config->getConfigFromAnySource('unit.csharp.coverage.match'); - $this->excludedFiles = - $config->getConfigFromAnySource('unit.csharp.coverage.excluded'); + $this->cscoverHintPath = $config->getConfigFromAnySource( + 'unit.csharp.cscover.binary'); + $this->matchRegex = $config->getConfigFromAnySource( + 'unit.csharp.coverage.match'); + $this->excludedFiles = $config->getConfigFromAnySource( + 'unit.csharp.coverage.excluded'); parent::loadEnvironment(); if ($this->getEnableCoverage() === false) { return; } // Determine coverage path. if ($this->cscoverHintPath === null) { throw new Exception( - "Unable to locate cscover. Configure it with ". + "Unable to locate cscover. Configure it with ". "the `unit.csharp.coverage.binary' option in .arcconfig"); } $cscover = $this->projectRoot.DIRECTORY_SEPARATOR.$this->cscoverHintPath; if (file_exists($cscover)) { $this->coverEngine = Filesystem::resolvePath($cscover); } else { throw new Exception( - 'Unable to locate cscover coverage runner '. - '(have you built yet?)'); + 'Unable to locate cscover coverage runner (have you built yet?)'); } - } /** * Returns whether the specified assembly should be instrumented for - * code coverage reporting. Checks the excluded file list and the + * code coverage reporting. Checks the excluded file list and the * matching regex if they are configured. * * @return boolean Whether the assembly should be instrumented. */ private function assemblyShouldBeInstrumented($file) { if ($this->excludedFiles !== null) { if (array_key_exists((string)$file, $this->excludedFiles)) { return false; } } if ($this->matchRegex !== null) { if (preg_match($this->matchRegex, $file) === 1) { return true; } else { return false; } } return true; } /** * Overridden version of `buildTestFuture` so that the unit test can be run * via `cscover`, which instruments assemblies and reports on code coverage. * * @param string Name of the test assembly. * @return array The future, output filename and coverage filename * stored in an array. */ protected function buildTestFuture($test_assembly) { if ($this->getEnableCoverage() === false) { return parent::buildTestFuture($test_assembly); } // FIXME: Can't use TempFile here as xUnit doesn't like - // UNIX-style full paths. It sees the leading / as the + // UNIX-style full paths. It sees the leading / as the // start of an option flag, even when quoted. $xunit_temp = Filesystem::readRandomCharacters(10).'.results.xml'; if (file_exists($xunit_temp)) { unlink($xunit_temp); } $cover_temp = new TempFile(); $cover_temp->setPreserveFile(true); $xunit_cmd = $this->runtimeEngine; $xunit_args = null; if ($xunit_cmd === '') { $xunit_cmd = $this->testEngine; $xunit_args = csprintf( '%s /xml %s', $test_assembly, $xunit_temp); } else { $xunit_args = csprintf( '%s %s /xml %s', $this->testEngine, $test_assembly, $xunit_temp); } $assembly_dir = dirname($test_assembly); $assemblies_to_instrument = array(); foreach (Filesystem::listDirectory($assembly_dir) as $file) { if (substr($file, -4) == '.dll' || substr($file, -4) == '.exe') { if ($this->assemblyShouldBeInstrumented($file)) { $assemblies_to_instrument[] = $assembly_dir.DIRECTORY_SEPARATOR.$file; } } } if (count($assemblies_to_instrument) === 0) { return parent::buildTestFuture($test_assembly); } $future = new ExecFuture( '%C -o %s -c %s -a %s -w %s %Ls', trim($this->runtimeEngine.' '.$this->coverEngine), $cover_temp, $xunit_cmd, $xunit_args, $assembly_dir, $assemblies_to_instrument); $future->setCWD(Filesystem::resolvePath($this->projectRoot)); return array( $future, $assembly_dir.DIRECTORY_SEPARATOR.$xunit_temp, $cover_temp); } /** * Returns coverage results for the unit tests. * * @param string The name of the coverage file if one was provided by * `buildTestFuture`. * @return array Code coverage results, or null. */ protected function parseCoverageResult($cover_file) { if ($this->getEnableCoverage() === false) { return parent::parseCoverageResult($cover_file); } return $this->readCoverage($cover_file); } /** - * Retrieves the cached results for a coverage result file. The coverage + * Retrieves the cached results for a coverage result file. The coverage * result file is XML and can be large depending on what has been instrumented * so we cache it in case it's requested again. * * @param string The name of the coverage file. * @return array Code coverage results, or null if not cached. */ private function getCachedResultsIfPossible($cover_file) { if ($this->cachedResults == null) { $this->cachedResults = array(); } if (array_key_exists((string)$cover_file, $this->cachedResults)) { return $this->cachedResults[(string)$cover_file]; } return null; } /** * Stores the code coverage results in the cache. * * @param string The name of the coverage file. * @param array The results to cache. */ private function addCachedResults($cover_file, array $results) { if ($this->cachedResults == null) { $this->cachedResults = array(); } $this->cachedResults[(string)$cover_file] = $results; } /** - * Processes a set of XML tags as code coverage results. We parse + * Processes a set of XML tags as code coverage results. We parse * the `instrumented` and `executed` tags with this method so that * we can access the data multiple times without a performance hit. * * @param array The array of XML tags to parse. * @return array A PHP array containing the data. */ private function processTags($tags) { $results = array(); foreach ($tags as $tag) { $results[] = array( 'file' => $tag->getAttribute('file'), 'start' => $tag->getAttribute('start'), 'end' => $tag->getAttribute('end')); } return $results; } /** * Reads the code coverage results from the cscover results file. * * @param string The path to the code coverage file. * @return array The code coverage results. */ public function readCoverage($cover_file) { $cached = $this->getCachedResultsIfPossible($cover_file); if ($cached !== null) { return $cached; } $coverage_dom = new DOMDocument(); $coverage_dom->loadXML(Filesystem::readFile($cover_file)); $modified = $this->getPaths(); $files = array(); $reports = array(); $instrumented = array(); $executed = array(); $instrumented = $this->processTags( $coverage_dom->getElementsByTagName('instrumented')); $executed = $this->processTags( $coverage_dom->getElementsByTagName('executed')); foreach ($instrumented as $instrument) { $absolute_file = $instrument['file']; $relative_file = substr($absolute_file, strlen($this->projectRoot) + 1); if (!in_array($relative_file, $files)) { $files[] = $relative_file; } } foreach ($files as $file) { $absolute_file = Filesystem::resolvePath( $this->projectRoot.DIRECTORY_SEPARATOR.$file); // get total line count in file $line_count = count(file($absolute_file)); $coverage = array(); for ($i = 0; $i < $line_count; $i++) { $coverage[$i] = 'N'; } foreach ($instrumented as $instrument) { if ($instrument['file'] !== $absolute_file) { continue; } for ( $i = $instrument['start']; $i <= $instrument['end']; $i++) { $coverage[$i - 1] = 'U'; } } foreach ($executed as $execute) { if ($execute['file'] !== $absolute_file) { continue; } for ( $i = $execute['start']; $i <= $execute['end']; $i++) { $coverage[$i - 1] = 'C'; } } $reports[$file] = implode($coverage); } $this->addCachedResults($cover_file, $reports); return $reports; } + } diff --git a/src/unit/engine/GoTestResultParser.php b/src/unit/engine/GoTestResultParser.php index 827f4ab7..547dfc00 100644 --- a/src/unit/engine/GoTestResultParser.php +++ b/src/unit/engine/GoTestResultParser.php @@ -1,140 +1,134 @@ $line) { if (strncmp($line, '--- PASS', 8) === 0) { // We have a passing test $meta = array(); preg_match( '/^--- PASS: (?P.+) \((?P