diff --git a/src/lint/linter/ArcanistCpplintLinter.php b/src/lint/linter/ArcanistCpplintLinter.php index 0b3416d9..f9c6ddf5 100644 --- a/src/lint/linter/ArcanistCpplintLinter.php +++ b/src/lint/linter/ArcanistCpplintLinter.php @@ -1,76 +1,77 @@ $match) { $matches[$key] = trim($match); } $severity = $this->getLintMessageSeverity($matches[3]); $message = new ArcanistLintMessage(); $message->setPath($path); $message->setLine($matches[1]); $message->setCode($matches[3]); $message->setName($matches[3]); $message->setDescription($matches[2]); $message->setSeverity($severity); $messages[] = $message; } return $messages; } protected function getLintCodeFromLinterConfigurationKey($code) { - if (!preg_match('@^[a-z_]+/[a-z_]+$@', $code)) { + if (!preg_match('@^[a-z_]+/[a-z0-9_+]+$@', $code)) { throw new Exception( pht( 'Unrecognized lint message code "%s". Expected a valid cpplint '. 'lint code like "%s" or "%s".', $code, 'build/include_order', 'whitespace/braces')); } return $code; } } diff --git a/src/lint/linter/ArcanistScriptAndRegexLinter.php b/src/lint/linter/ArcanistScriptAndRegexLinter.php index ca8883cb..0c3d9d9a 100644 --- a/src/lint/linter/ArcanistScriptAndRegexLinter.php +++ b/src/lint/linter/ArcanistScriptAndRegexLinter.php @@ -1,388 +1,392 @@ &1' * * The return code of the script must be 0, or an exception will be raised * reporting that the linter failed. If you have a script which exits nonzero * under normal circumstances, you can force it to always exit 0 by using a * configuration like this: * * sh -c '/opt/lint/lint.sh "$0" || true' * * Multiple instances of the script will be run in parallel if there are * multiple files to be linted, so they should not use any unique resources. * For instance, this configuration would not work properly, because several * processes may attempt to write to the file at the same time: * * COUNTEREXAMPLE * sh -c '/opt/lint/lint.sh --output /tmp/lint.out "$0" && cat /tmp/lint.out' * * There are necessary limits to how gracefully this linter can deal with * edge cases, because it is just a script and a regex. If you need to do * things that this linter can't handle, you can write a phutil linter and move * the logic to handle those cases into PHP. PHP is a better general-purpose * programming language than regular expressions are, if only by a small margin. * * == ...and Regex == * * The regex must be a valid PHP PCRE regex, including delimiters and flags. * * The regex will be matched against the entire output of the script, so it * should generally be in this form if messages are one-per-line: * * /^...$/m * * The regex should capture these named patterns with `(?P...)`: * * - `message` (required) Text describing the lint message. For example, * "This is a syntax error.". * - `name` (optional) Text summarizing the lint message. For example, * "Syntax Error". * - `severity` (optional) The word "error", "warning", "autofix", "advice", * or "disabled", in any combination of upper and lower case. Instead, you * may match groups called `error`, `warning`, `advice`, `autofix`, or * `disabled`. These allow you to match output formats like "E123" and * "W123" to indicate errors and warnings, even though the word "error" is * not present in the output. If no severity capturing group is present, * messages are raised with "error" severity. If multiple severity capturing * groups are present, messages are raised with the highest captured * severity. Capturing groups like `error` supersede the `severity` * capturing group. * - `error` (optional) Match some nonempty substring to indicate that this * message has "error" severity. * - `warning` (optional) Match some nonempty substring to indicate that this * message has "warning" severity. * - `advice` (optional) Match some nonempty substring to indicate that this * message has "advice" severity. * - `autofix` (optional) Match some nonempty substring to indicate that this * message has "autofix" severity. * - `disabled` (optional) Match some nonempty substring to indicate that this * message has "disabled" severity. * - `file` (optional) The name of the file to raise the lint message in. If * not specified, defaults to the linted file. It is generally not necessary * to capture this unless the linter can raise messages in files other than * the one it is linting. - * - `line` (optional) The line number of the message. + * - `line` (optional) The line number of the message. If no text is + * captured, the message is assumed to affect the entire file. * - `char` (optional) The character offset of the message. * - `offset` (optional) The byte offset of the message. If captured, this * supersedes `line` and `char`. * - `original` (optional) The text the message affects. * - `replacement` (optional) The text that the range captured by `original` * should be automatically replaced by to resolve the message. * - `code` (optional) A short error type identifier which can be used * elsewhere to configure handling of specific types of messages. For * example, "EXAMPLE1", "EXAMPLE2", etc., where each code identifies a * class of message like "syntax error", "missing whitespace", etc. This * allows configuration to later change the severity of all whitespace * messages, for example. * - `ignore` (optional) Match some nonempty substring to ignore the match. * You can use this if your linter sometimes emits text like "No lint * errors". * - `stop` (optional) Match some nonempty substring to stop processing input. * Remaining matches for this file will be discarded, but linting will * continue with other linters and other files. * - `halt` (optional) Match some nonempty substring to halt all linting of * this file by any linter. Linting will continue with other files. * - `throw` (optional) Match some nonempty substring to throw an error, which * will stop `arc` completely. You can use this to fail abruptly if you * encounter unexpected output. All processing will abort. * * Numbered capturing groups are ignored. * * For example, if your lint script's output looks like this: * * error:13 Too many goats! * warning:22 Not enough boats. * * ...you could use this regex to parse it: * * /^(?Pwarning|error):(?P\d+) (?P.*)$/m * * The simplest valid regex for line-oriented output is something like this: * * /^(?P.*)$/m * * @task lint Linting * @task linterinfo Linter Information * @task parse Parsing Output * @task config Validating Configuration */ final class ArcanistScriptAndRegexLinter extends ArcanistLinter { private $script = null; private $regex = null; private $output = array(); public function getInfoName() { return pht('Script and Regex'); } public function getInfoDescription() { return pht( 'Run an external script, then parse its output with a regular '. 'expression. This is a generic binding that can be used to '. 'run custom lint scripts.'); } /* -( Linting )------------------------------------------------------------ */ /** * Run the script on each file to be linted. * * @task lint */ public function willLintPaths(array $paths) { $root = $this->getProjectRoot(); $futures = array(); foreach ($paths as $path) { $future = new ExecFuture('%C %s', $this->script, $path); $future->setCWD($root); $futures[$path] = $future; } $futures = id(new FutureIterator($futures)) ->limit(4); foreach ($futures as $path => $future) { list($stdout) = $future->resolvex(); $this->output[$path] = $stdout; } } /** * Run the regex on the output of the script. * * @task lint */ public function lintPath($path) { $output = idx($this->output, $path); if (!strlen($output)) { // No output, but it exited 0, so just move on. return; } $matches = null; if (!preg_match_all($this->regex, $output, $matches, PREG_SET_ORDER)) { // Output with no matches. This might be a configuration error, but more // likely it's something like "No lint errors." and the user just hasn't // written a sufficiently powerful/ridiculous regexp to capture it into an // 'ignore' group. Don't make them figure this out; advanced users can // capture 'throw' to handle this case. return; } foreach ($matches as $match) { if (!empty($match['throw'])) { $throw = $match['throw']; throw new ArcanistUsageException( pht( "%s: configuration captured a '%s' named capturing group, ". "'%s'. Script output:\n%s", __CLASS__, 'throw', $throw, $output)); } if (!empty($match['halt'])) { $this->stopAllLinters(); break; } if (!empty($match['stop'])) { break; } if (!empty($match['ignore'])) { continue; } list($line, $char) = $this->getMatchLineAndChar($match, $path); $dict = array( 'path' => idx($match, 'file', $path), 'line' => $line, 'char' => $char, 'code' => idx($match, 'code', $this->getLinterName()), 'severity' => $this->getMatchSeverity($match), 'name' => idx($match, 'name', 'Lint'), 'description' => idx($match, 'message', pht('Undefined Lint Message')), ); $original = idx($match, 'original'); if ($original !== null) { $dict['original'] = $original; } $replacement = idx($match, 'replacement'); if ($replacement !== null) { $dict['replacement'] = $replacement; } $lint = ArcanistLintMessage::newFromDictionary($dict); $this->addLintMessage($lint); } } /* -( Linter Information )------------------------------------------------- */ /** * Return the short name of the linter. * * @return string Short linter identifier. * * @task linterinfo */ public function getLinterName() { return 'S&RX'; } public function getLinterConfigurationName() { return 'script-and-regex'; } public function getLinterConfigurationOptions() { // These fields are optional only to avoid breaking things. $options = array( 'script-and-regex.script' => array( 'type' => 'string', 'help' => pht('Script to execute.'), ), 'script-and-regex.regex' => array( 'type' => 'regex', 'help' => pht('The regex to process output with.'), ), ); return $options + parent::getLinterConfigurationOptions(); } public function setLinterConfigurationValue($key, $value) { switch ($key) { case 'script-and-regex.script': $this->script = $value; return; case 'script-and-regex.regex': $this->regex = $value; return; } return parent::setLinterConfigurationValue($key, $value); } /* -( Parsing Output )----------------------------------------------------- */ /** * Get the line and character of the message from the regex match. * * @param dict Captured groups from regex. - * @return pair Line and character of the message. + * @return pair Line and character of the message. * * @task parse */ private function getMatchLineAndChar(array $match, $path) { if (!empty($match['offset'])) { list($line, $char) = $this->getEngine()->getLineAndCharFromOffset( idx($match, 'file', $path), $match['offset']); return array($line + 1, $char + 1); } $line = idx($match, 'line'); - if ($line) { + if (strlen($line)) { $line = (int)$line; + if (!$line) { + $line = 1; + } } else { - $line = 1; + $line = null; } $char = idx($match, 'char'); if ($char) { $char = (int)$char; } else { $char = null; } return array($line, $char); } /** * Map the regex matching groups to a message severity. We look for either * a nonempty severity name group like 'error', or a group called 'severity' * with a valid name. * * @param dict Captured groups from regex. * @return const @{class:ArcanistLintSeverity} constant. * * @task parse */ private function getMatchSeverity(array $match) { $map = array( 'error' => ArcanistLintSeverity::SEVERITY_ERROR, 'warning' => ArcanistLintSeverity::SEVERITY_WARNING, 'autofix' => ArcanistLintSeverity::SEVERITY_AUTOFIX, 'advice' => ArcanistLintSeverity::SEVERITY_ADVICE, 'disabled' => ArcanistLintSeverity::SEVERITY_DISABLED, ); $severity_name = strtolower(idx($match, 'severity')); foreach ($map as $name => $severity) { if (!empty($match[$name])) { return $severity; } else if ($severity_name == $name) { return $severity; } } return ArcanistLintSeverity::SEVERITY_ERROR; } }