diff --git a/src/lint/linter/ArcanistScriptAndRegexLinter.php b/src/lint/linter/ArcanistScriptAndRegexLinter.php index f2df82ec..c2422f47 100644 --- a/src/lint/linter/ArcanistScriptAndRegexLinter.php +++ b/src/lint/linter/ArcanistScriptAndRegexLinter.php @@ -1,409 +1,450 @@ &1' * * The return code of the script must be 0, or an exception will be raised * reporting that the linter failed. If you have a script which exits nonzero * under normal circumstances, you can force it to always exit 0 by using a * configuration like this: * * sh -c '/opt/lint/lint.sh "$0" || true' * * Multiple instances of the script will be run in parallel if there are * multiple files to be linted, so they should not use any unique resources. * For instance, this configuration would not work properly, because several * processes may attempt to write to the file at the same time: * * COUNTEREXAMPLE * sh -c '/opt/lint/lint.sh --output /tmp/lint.out "$0" && cat /tmp/lint.out' * * There are necessary limits to how gracefully this linter can deal with * edge cases, because it is just a script and a regex. If you need to do * things that this linter can't handle, you can write a phutil linter and move * the logic to handle those cases into PHP. PHP is a better general-purpose * programming language than regular expressions are, if only by a small margin. * * == ...and Regex == * * The regex must be a valid PHP PCRE regex, including delimiters and flags. * * The regex will be matched against the entire output of the script, so it * should generally be in this form if messages are one-per-line: * * /^...$/m * * The regex should capture these named patterns with `(?P...)`: * * - `message` (required) Text describing the lint message. For example, * "This is a syntax error.". * - `name` (optional) Text summarizing the lint message. For example, * "Syntax Error". * - `severity` (optional) The word "error", "warning", "autofix", "advice", * or "disabled", in any combination of upper and lower case. Instead, you * may match groups called `error`, `warning`, `advice`, `autofix`, or * `disabled`. These allow you to match output formats like "E123" and * "W123" to indicate errors and warnings, even though the word "error" is * not present in the output. If no severity capturing group is present, * messages are raised with "error" severity. If multiple severity capturing * groups are present, messages are raised with the highest captured * severity. Capturing groups like `error` supersede the `severity` * capturing group. * - `error` (optional) Match some nonempty substring to indicate that this * message has "error" severity. * - `warning` (optional) Match some nonempty substring to indicate that this * message has "warning" severity. * - `advice` (optional) Match some nonempty substring to indicate that this * message has "advice" severity. * - `autofix` (optional) Match some nonempty substring to indicate that this * message has "autofix" severity. * - `disabled` (optional) Match some nonempty substring to indicate that this * message has "disabled" severity. * - `file` (optional) The name of the file to raise the lint message in. If * not specified, defaults to the linted file. It is generally not necessary * to capture this unless the linter can raise messages in files other than * the one it is linting. * - `line` (optional) The line number of the message. * - `char` (optional) The character offset of the message. * - `offset` (optional) The byte offset of the message. If captured, this * supersedes `line` and `char`. * - `original` (optional) The text the message affects. * - `replacement` (optional) The text that the range captured by `original` * should be automatically replaced by to resolve the message. * - `code` (optional) A short error type identifier which can be used * elsewhere to configure handling of specific types of messages. For * example, "EXAMPLE1", "EXAMPLE2", etc., where each code identifies a * class of message like "syntax error", "missing whitespace", etc. This * allows configuration to later change the severity of all whitespace * messages, for example. * - `ignore` (optional) Match some nonempty substring to ignore the match. * You can use this if your linter sometimes emits text like "No lint * errors". * - `stop` (optional) Match some nonempty substring to stop processing input. * Remaining matches for this file will be discarded, but linting will * continue with other linters and other files. * - `halt` (optional) Match some nonempty substring to halt all linting of * this file by any linter. Linting will continue with other files. * - `throw` (optional) Match some nonempty substring to throw an error, which * will stop `arc` completely. You can use this to fail abruptly if you * encounter unexpected output. All processing will abort. * * Numbered capturing groups are ignored. * * For example, if your lint script's output looks like this: * * error:13 Too many goats! * warning:22 Not enough boats. * * ...you could use this regex to parse it: * * /^(?Pwarning|error):(?P\d+) (?P.*)$/m * * The simplest valid regex for line-oriented output is something like this: * * /^(?P.*)$/m * * @task lint Linting * @task linterinfo Linter Information * @task parse Parsing Output * @task config Validating Configuration */ final class ArcanistScriptAndRegexLinter extends ArcanistLinter { + private $script = null; + private $regex = null; private $output = array(); public function getInfoName() { return pht('Script and Regex'); } public function getInfoDescription() { return pht( 'Run an external script, then parse its output with a regular '. 'expression. This is a generic binding that can be used to '. 'run custom lint scripts.'); } /* -( Linting )------------------------------------------------------------ */ /** * Run the script on each file to be linted. * * @task lint */ public function willLintPaths(array $paths) { $script = $this->getConfiguredScript(); $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); $futures = array(); foreach ($paths as $path) { $future = new ExecFuture('%C %s', $script, $path); $future->setCWD($root); $futures[$path] = $future; } $futures = id(new FutureIterator($futures)) ->limit(4); foreach ($futures as $path => $future) { list($stdout) = $future->resolvex(); $this->output[$path] = $stdout; } } /** * Run the regex on the output of the script. * * @task lint */ public function lintPath($path) { $regex = $this->getConfiguredRegex(); $output = idx($this->output, $path); if (!strlen($output)) { // No output, but it exited 0, so just move on. return; } $matches = null; if (!preg_match_all($regex, $output, $matches, PREG_SET_ORDER)) { // Output with no matches. This might be a configuration error, but more // likely it's something like "No lint errors." and the user just hasn't // written a sufficiently powerful/ridiculous regexp to capture it into an // 'ignore' group. Don't make them figure this out; advanced users can // capture 'throw' to handle this case. return; } foreach ($matches as $match) { if (!empty($match['throw'])) { $throw = $match['throw']; throw new ArcanistUsageException( - "ArcanistScriptAndRegexLinter: ". - "configuration captured a 'throw' named capturing group, ". - "'{$throw}'. Script output:\n". - $output); + pht( + "%s: configuration captured a '%s' named capturing group, ". + "'%s'. Script output:\n%s", + __CLASS__, + 'throw', + $throw, + $output)); } if (!empty($match['halt'])) { $this->stopAllLinters(); break; } if (!empty($match['stop'])) { break; } if (!empty($match['ignore'])) { continue; } list($line, $char) = $this->getMatchLineAndChar($match, $path); $dict = array( 'path' => idx($match, 'file', $path), 'line' => $line, 'char' => $char, 'code' => idx($match, 'code', $this->getLinterName()), 'severity' => $this->getMatchSeverity($match), 'name' => idx($match, 'name', 'Lint'), 'description' => idx($match, 'message', 'Undefined Lint Message'), ); $original = idx($match, 'original'); if ($original !== null) { $dict['original'] = $original; } $replacement = idx($match, 'replacement'); if ($replacement !== null) { $dict['replacement'] = $replacement; } $lint = ArcanistLintMessage::newFromDictionary($dict); $this->addLintMessage($lint); } } /* -( Linter Information )------------------------------------------------- */ /** * Return the short name of the linter. * * @return string Short linter identifier. * * @task linterinfo */ public function getLinterName() { return 'S&RX'; } public function getLinterConfigurationName() { return 'script-and-regex'; } + public function getLinterConfigurationOptions() { + // These fields are optional only to avoid breaking things. + $options = array( + 'script-and-regex.script' => array( + 'type' => 'optional string', + 'help' => pht('Script to execute.'), + ), + 'script-and-regex.regex' => array( + 'type' => 'optional regex', + 'help' => pht('The regex to process output with.'), + ), + ); + + return $options + parent::getLinterConfigurationOptions(); + } + + public function setLinterConfigurationValue($key, $value) { + switch ($key) { + case 'script-and-regex.script': + $this->script = $value; + return; + case 'script-and-regex.regex': + $this->regex = $value; + return; + } + + return parent::setLinterConfigurationValue($key, $value); + } /* -( Parsing Output )----------------------------------------------------- */ /** * Get the line and character of the message from the regex match. * * @param dict Captured groups from regex. * @return pair Line and character of the message. * * @task parse */ private function getMatchLineAndChar(array $match, $path) { if (!empty($match['offset'])) { list($line, $char) = $this->getEngine()->getLineAndCharFromOffset( idx($match, 'file', $path), $match['offset']); return array($line + 1, $char + 1); } $line = idx($match, 'line', 1); $char = idx($match, 'char'); return array($line, $char); } /** * Map the regex matching groups to a message severity. We look for either * a nonempty severity name group like 'error', or a group called 'severity' * with a valid name. * * @param dict Captured groups from regex. * @return const @{class:ArcanistLintSeverity} constant. * * @task parse */ private function getMatchSeverity(array $match) { $map = array( 'error' => ArcanistLintSeverity::SEVERITY_ERROR, 'warning' => ArcanistLintSeverity::SEVERITY_WARNING, 'autofix' => ArcanistLintSeverity::SEVERITY_AUTOFIX, 'advice' => ArcanistLintSeverity::SEVERITY_ADVICE, 'disabled' => ArcanistLintSeverity::SEVERITY_DISABLED, ); $severity_name = strtolower(idx($match, 'severity')); foreach ($map as $name => $severity) { if (!empty($match[$name])) { return $severity; } else if ($severity_name == $name) { return $severity; } } return ArcanistLintSeverity::SEVERITY_ERROR; } /* -( Validating Configuration )------------------------------------------- */ /** * Load, validate, and return the "script" configuration. * * @return string The shell command fragment to use to run the linter. * * @task config */ private function getConfiguredScript() { - $key = 'linter.scriptandregex.script'; - $config = $this->getEngine() - ->getConfigurationManager() - ->getConfigFromAnySource($key); + if (strlen($this->script)) { + return $this->script; + } + + $config = $this->getDeprecatedConfiguration('linter.scriptandregex.script'); if (!$config) { throw new ArcanistUsageException( - "ArcanistScriptAndRegexLinter: ". - "You must configure '{$key}' to point to a script to execute."); + pht( + 'No "script" configured for script-and-regex linter, which '. + 'requires a script. Use "%s" to configure one.', + 'script-and-regex.script')); } // NOTE: No additional validation since the "script" can be some random // shell command and/or include flags, so it does not need to point to some // file on disk. return $config; } /** * Load, validate, and return the "regex" configuration. * * @return string A valid PHP PCRE regular expression. * * @task config */ private function getConfiguredRegex() { + if (strlen($this->regex)) { + return $this->regex; + } + $key = 'linter.scriptandregex.regex'; - $config = $this->getEngine() - ->getConfigurationManager() - ->getConfigFromAnySource($key); + $config = $this->getDeprecatedConfiguration($key); if (!$config) { throw new ArcanistUsageException( - "ArcanistScriptAndRegexLinter: ". - "You must configure '{$key}' with a valid PHP PCRE regex."); + pht( + 'No "regex" configured for script-and-regex linter, which '. + 'requires a regex. Use "%s" to configure one.', + 'script-and-regex.regex')); } // NOTE: preg_match() returns 0 for no matches and false for compile error; // this won't match, but will validate the syntax of the regex. - $ok = preg_match($config, 'syntax-check'); + $ok = @preg_match($config, 'syntax-check'); if ($ok === false) { throw new ArcanistUsageException( - "ArcanistScriptAndRegexLinter: ". - "Regex '{$config}' does not compile. You must configure '{$key}' with ". - "a valid PHP PCRE regex, including delimiters."); + pht( + 'Regular expression passed to script-and-regex linter ("%s") is '. + 'not a valid regular expression.', + $config)); } return $config; } }