Changeset View
Standalone View
src/lint/linter/ArcanistTextLinter.php
| <?php | <?php | ||||
| /** | /** | ||||
| * Enforces basic text file rules. | * Enforces basic text file rules. | ||||
| */ | */ | ||||
| final class ArcanistTextLinter extends ArcanistLinter { | final class ArcanistTextLinter extends ArcanistLinter { | ||||
| const LINT_DOS_NEWLINE = 1; | const LINT_END_OF_LINE = 1; | ||||
| const LINT_TAB_LITERAL = 2; | const LINT_INDENT_STYLE = 2; | ||||
| const LINT_LINE_WRAP = 3; | const LINT_LINE_WRAP = 3; | ||||
| const LINT_EOF_NEWLINE = 4; | const LINT_EOF_NEWLINE = 4; | ||||
| const LINT_BAD_CHARSET = 5; | const LINT_CHARSET = 5; | ||||
| const LINT_TRAILING_WHITESPACE = 6; | const LINT_TRAILING_WHITESPACE = 6; | ||||
| const LINT_BOF_WHITESPACE = 8; | const LINT_BOF_WHITESPACE = 8; | ||||
| const LINT_EOF_WHITESPACE = 9; | const LINT_EOF_WHITESPACE = 9; | ||||
| private $editorconfig; | |||||
| private $maxLineLength = 80; | private $maxLineLength = 80; | ||||
epriestley: I think we should still default to 80, using a sentinel if that's easiest. | |||||
| public function getInfoName() { | public function getInfoName() { | ||||
| return pht('Basic Text Linter'); | return pht('Basic Text Linter'); | ||||
| } | } | ||||
| public function getInfoDescription() { | public function getInfoDescription() { | ||||
| return pht( | return pht( | ||||
| 'Enforces basic text rules like line length, character encoding, '. | 'Enforces basic text rules like line length, character encoding, '. | ||||
| 'and trailing whitespace.'); | 'and trailing whitespace. Supports EditorConfig configuration.'); | ||||
| } | } | ||||
| public function getLinterPriority() { | public function getLinterPriority() { | ||||
| return 0.5; | return 0.5; | ||||
| } | } | ||||
| public function getLinterConfigurationOptions() { | public function getLinterConfigurationOptions() { | ||||
| $options = array( | $options = array( | ||||
| 'text.max-line-length' => array( | 'text.max-line-length' => array( | ||||
| 'type' => 'optional int', | 'type' => 'optional int', | ||||
| 'help' => pht( | 'help' => pht( | ||||
| 'Adjust the maximum line length before a warning is raised. By '. | 'Adjust the maximum line length before a warning is raised. By '. | ||||
| 'default, a warning is raised on lines exceeding 80 characters.'), | 'default, a warning is raised on lines exceeding 80 characters. '. | ||||
| 'This is deprecated and you should use an EditorConfig file to '. | |||||
| 'configure this value.'), | |||||
Done Inline Actions(Particularly since 80 is still documented as the default behavior.) epriestley: (Particularly since 80 is still documented as the default behavior.) | |||||
| ), | ), | ||||
| ); | ); | ||||
| return $options + parent::getLinterConfigurationOptions(); | return $options + parent::getLinterConfigurationOptions(); | ||||
| } | } | ||||
| public function setMaxLineLength($new_length) { | public function setMaxLineLength($new_length) { | ||||
| $this->maxLineLength = $new_length; | $this->maxLineLength = $new_length; | ||||
| return $this; | return $this; | ||||
| } | } | ||||
| public function setLinterConfigurationValue($key, $value) { | public function setLinterConfigurationValue($key, $value) { | ||||
| switch ($key) { | switch ($key) { | ||||
| case 'text.max-line-length': | case 'text.max-line-length': | ||||
| phutil_deprecated( | |||||
| $key, | |||||
| 'You should specify this configuration in an EditorConfig file.'); | |||||
| $this->setMaxLineLength($value); | $this->setMaxLineLength($value); | ||||
| return; | return; | ||||
| } | } | ||||
| return parent::setLinterConfigurationValue($key, $value); | return parent::setLinterConfigurationValue($key, $value); | ||||
| } | } | ||||
| public function getLinterName() { | public function getLinterName() { | ||||
| return 'TXT'; | return 'TXT'; | ||||
| } | } | ||||
| public function getLinterConfigurationName() { | public function getLinterConfigurationName() { | ||||
| return 'text'; | return 'text'; | ||||
| } | } | ||||
| public function getLintSeverityMap() { | public function getLintSeverityMap() { | ||||
| return array( | return array( | ||||
| self::LINT_LINE_WRAP => ArcanistLintSeverity::SEVERITY_WARNING, | self::LINT_LINE_WRAP => ArcanistLintSeverity::SEVERITY_WARNING, | ||||
| self::LINT_TRAILING_WHITESPACE => ArcanistLintSeverity::SEVERITY_AUTOFIX, | self::LINT_TRAILING_WHITESPACE => ArcanistLintSeverity::SEVERITY_ADVICE, | ||||
| self::LINT_BOF_WHITESPACE => ArcanistLintSeverity::SEVERITY_AUTOFIX, | self::LINT_BOF_WHITESPACE => ArcanistLintSeverity::SEVERITY_ADVICE, | ||||
| self::LINT_EOF_WHITESPACE => ArcanistLintSeverity::SEVERITY_AUTOFIX, | self::LINT_EOF_WHITESPACE => ArcanistLintSeverity::SEVERITY_ADVICE, | ||||
| ); | ); | ||||
| } | } | ||||
| public function getLintNameMap() { | public function getLintNameMap() { | ||||
| return array( | return array( | ||||
| self::LINT_DOS_NEWLINE => pht('DOS Newlines'), | self::LINT_END_OF_LINE => pht('End Of Line'), | ||||
| self::LINT_TAB_LITERAL => pht('Tab Literal'), | self::LINT_INDENT_STYLE => pht('Indent Style'), | ||||
| self::LINT_LINE_WRAP => pht('Line Too Long'), | self::LINT_LINE_WRAP => pht('Line Too Long'), | ||||
| self::LINT_EOF_NEWLINE => pht('File Does Not End in Newline'), | self::LINT_EOF_NEWLINE => pht('File Does Not End in Newline'), | ||||
| self::LINT_BAD_CHARSET => pht('Bad Charset'), | self::LINT_CHARSET => pht('Bad Charset'), | ||||
| self::LINT_TRAILING_WHITESPACE => pht('Trailing Whitespace'), | self::LINT_TRAILING_WHITESPACE => pht('Trailing Whitespace'), | ||||
| self::LINT_BOF_WHITESPACE => pht('Leading Whitespace at BOF'), | self::LINT_BOF_WHITESPACE => pht('Leading Whitespace at BOF'), | ||||
| self::LINT_EOF_WHITESPACE => pht('Trailing Whitespace at EOF'), | self::LINT_EOF_WHITESPACE => pht('Trailing Whitespace at EOF'), | ||||
| ); | ); | ||||
| } | } | ||||
| public function willLintPaths(array $paths) { | |||||
| $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); | |||||
| $this->editorconfig = new PhutilEditorConfig($root); | |||||
| } | |||||
| public function lintPath($path) { | public function lintPath($path) { | ||||
| if (!strlen($this->getData($path))) { | if (!strlen($this->getData($path))) { | ||||
| // If the file is empty, don't bother; particularly, don't require | // If the file is empty, don't bother; particularly, don't require | ||||
| // the user to add a newline. | // the user to add a newline. | ||||
| return; | return; | ||||
| } | } | ||||
| $this->lintNewlines($path); | $this->lintEndOfLine($path); | ||||
| $this->lintTabs($path); | $this->lintIndentStyle($path); | ||||
| if ($this->didStopAllLinters()) { | if ($this->didStopAllLinters()) { | ||||
| return; | return; | ||||
| } | } | ||||
| $this->lintCharset($path); | $this->lintCharset($path); | ||||
| if ($this->didStopAllLinters()) { | if ($this->didStopAllLinters()) { | ||||
| return; | return; | ||||
| } | } | ||||
| $this->lintLineLength($path); | $this->lintLineLength($path); | ||||
| $this->lintEOFNewline($path); | $this->lintEOFNewline($path); | ||||
| $this->lintTrailingWhitespace($path); | $this->lintTrailingWhitespace($path); | ||||
| $this->lintBOFWhitespace($path); | $this->lintBOFWhitespace($path); | ||||
| $this->lintEOFWhitespace($path); | $this->lintEOFWhitespace($path); | ||||
| } | } | ||||
| protected function lintNewlines($path) { | private function lintEndOfLine($path) { | ||||
| switch ($this->getEditorConfig($path, PhutilEditorConfig::END_OF_LINE)) { | |||||
aviveyUnsubmitted Not Done Inline ActionsWould it be too preachy to default to lf here? avivey: Would it be too preachy to default to `lf` here? | |||||
| case 'lf': | |||||
| $search = "\r"; | |||||
| $replace = array( | |||||
| "\r\n" => "\n", | |||||
| ); | |||||
| break; | |||||
| case 'cr': | |||||
| $search = "\n"; | |||||
| $replace = array( | |||||
| "\r\n" => "\r", | |||||
epriestleyUnsubmitted Not Done Inline ActionsShould probably also do "\n" => "\r"? epriestley: Should probably also do "\n" => "\r"? | |||||
| ); | |||||
| break; | |||||
| case 'lfcr': | |||||
| $search = "\n"; | |||||
| $replace = array( | |||||
| "\r\n" => "\n\r", | |||||
aviveyUnsubmitted Not Done Inline ActionsThis doesn't look right... maybe something like '\r([^\n])' => '\r\n\1', '([^\r])\n' => '\1\r\n', (I'm pretty sure that's the wrong syntax for it) avivey: This doesn't look right...
maybe something like
```lang=php
'\r([^\n])' => '\r\n\1',
'… | |||||
epriestleyUnsubmitted Not Done Inline ActionsThis is sort of right, modulo @avivey's comment, except that it's consistent with "lfcr" (unused anywhere) and should probably be "crlf" (windows). epriestley: This is sort of right, modulo @avivey's comment, except that it's consistent with "lfcr"… | |||||
| ); | |||||
| break; | |||||
| default: | |||||
| return; | |||||
| } | |||||
| $data = $this->getData($path); | $data = $this->getData($path); | ||||
| $pos = strpos($this->getData($path), "\r"); | $pos = strpos($this->getData($path), $search); | ||||
| if ($pos !== false) { | if ($pos !== false) { | ||||
| $this->raiseLintAtOffset( | $this->raiseLintAtOffset( | ||||
| 0, | 0, | ||||
| self::LINT_DOS_NEWLINE, | self::LINT_END_OF_LINE, | ||||
| pht('You must use ONLY Unix linebreaks ("%s") in source code.', '\n'), | pht('Invalid end-of-line character(s).'), | ||||
| $data, | $data, | ||||
| str_replace("\r\n", "\n", $data)); | str_replace(array_keys($replace), array_values($replace), $data)); | ||||
| if ($this->isMessageEnabled(self::LINT_DOS_NEWLINE)) { | if ($this->isMessageEnabled(self::LINT_END_OF_LINE)) { | ||||
| $this->stopAllLinters(); | $this->stopAllLinters(); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| protected function lintTabs($path) { | private function lintIndentStyle($path) { | ||||
| switch ($this->getEditorConfig($path, PhutilEditorConfig::INDENT_STYLE)) { | |||||
| case 'space': | |||||
| break; | |||||
| case 'tab': | |||||
| // TODO: Can we do anything here? | |||||
| return; | |||||
Not Done Inline ActionsShouldn't this be basically inverse of space for indentation? johnny-bit: Shouldn't this be basically inverse of space for indentation? | |||||
| default: | |||||
| return; | |||||
| } | |||||
| $pos = strpos($this->getData($path), "\t"); | $pos = strpos($this->getData($path), "\t"); | ||||
| if ($pos !== false) { | if ($pos !== false) { | ||||
| $this->raiseLintAtOffset( | $this->raiseLintAtOffset( | ||||
| $pos, | $pos, | ||||
| self::LINT_TAB_LITERAL, | self::LINT_INDENT_STYLE, | ||||
| pht('Configure your editor to use spaces for indentation.'), | pht('Configure your editor to use spaces for indentation.'), | ||||
| "\t"); | "\t"); | ||||
Done Inline ActionsThis error should depend on indentation type set by editorconfig, right? So in case of tabs for indentation it should suggest tabs and not spaces. johnny-bit: This error should depend on indentation type set by editorconfig, right? So in case of tabs for… | |||||
Done Inline ActionsThis won't be reached because we return from the 'tab' branch above. epriestley: This won't be reached because we `return` from the 'tab' branch above. | |||||
| } | } | ||||
| } | } | ||||
| protected function lintLineLength($path) { | private function lintCharset($path) { | ||||
| $lines = explode("\n", $this->getData($path)); | switch ($this->getEditorConfig($path, PhutilEditorConfig::CHARSET)) { | ||||
| case 'latin1': | |||||
| $charset = 'ISO-8859-1'; | |||||
| break; | |||||
| case 'utf-8': | |||||
| case 'utf-8-bom': | |||||
| // TODO: Do we really care about the BOM? | |||||
| $charset = 'UTF-8'; | |||||
| break; | |||||
Not Done Inline ActionsSome projects do use BOM and require it in the source. EditorConfig project dis-encourages use of BOM, but nevertheless respects it. I'd recommend to do something like: case 'utf-8-bom': $checkbom = true; case 'utf-8': $charset = 'UTF-8'; break; And add one additional check for BOM after successful mb_check_encoding if UTF-8 check succeeded. johnny-bit: Some projects do use BOM and require it in the source. EditorConfig project dis-encourages use… | |||||
| case 'utf-16be': | |||||
| $charset = 'UTF-16BE'; | |||||
| break; | |||||
| case 'utf-16le': | |||||
| $charset = 'UTF-16LE'; | |||||
| break; | |||||
| default: | |||||
| return; | |||||
| } | |||||
| $data = $this->getData($path); | |||||
| if (mb_check_encoding($data, $charset)) { | |||||
| return; | |||||
| } | |||||
| $this->raiseLintAtPath( | |||||
| self::LINT_CHARSET, | |||||
| pht('Invalid characters in charset.')); | |||||
| if ($this->isMessageEnabled(self::LINT_BAD_CHARSET)) { | |||||
| $this->stopAllLinters(); | |||||
| } | |||||
| } | |||||
| private function lintLineLength($path) { | |||||
| $lines = phutil_split_lines($this->getData($path), false); | |||||
epriestleyUnsubmitted Not Done Inline ActionsNote that phutil_split_lines() only recognizes "\n" (lf, unix) and "\r\n" (crlf, windows) newlines, because no VCS recognizes "\r" newlines. So this probably won't work with "cr" newlines. However, I believe nothing uses them. epriestley: Note that `phutil_split_lines()` only recognizes "\n" (lf, unix) and "\r\n" (crlf, windows)… | |||||
| $width = $this->getEditorConfig( | |||||
| $path, | |||||
| PhutilEditorConfig::LINE_LENGTH, | |||||
| $this->maxLineLength); | |||||
| if (!$width) { | |||||
| return; | |||||
| } | |||||
| $width = $this->maxLineLength; | |||||
| foreach ($lines as $line_idx => $line) { | foreach ($lines as $line_idx => $line) { | ||||
| if (strlen($line) > $width) { | if (strlen($line) > $width) { | ||||
| $this->raiseLintAtLine( | $this->raiseLintAtLine( | ||||
| $line_idx + 1, | $line_idx + 1, | ||||
| 1, | 1, | ||||
| self::LINT_LINE_WRAP, | self::LINT_LINE_WRAP, | ||||
| pht( | pht( | ||||
| 'This line is %s characters long, but the '. | 'This line is %s characters long, but the '. | ||||
| 'convention is %s characters.', | 'convention is %s characters.', | ||||
| new PhutilNumber(strlen($line)), | new PhutilNumber(strlen($line)), | ||||
| $width), | $width), | ||||
| $line); | $line); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| protected function lintEOFNewline($path) { | private function lintEOFNewline($path) { | ||||
| $data = $this->getData($path); | $config = $this->getEditorConfig( | ||||
| if (!strlen($data) || $data[strlen($data) - 1] != "\n") { | $path, | ||||
| $this->raiseLintAtOffset( | PhutilEditorConfig::FINAL_NEWLINE, | ||||
| strlen($data), | false); | ||||
| self::LINT_EOF_NEWLINE, | |||||
| pht('Files must end in a newline.'), | if (!$config) { | ||||
| '', | return; | ||||
| "\n"); | |||||
| } | |||||
| } | } | ||||
| protected function lintCharset($path) { | |||||
| $data = $this->getData($path); | $data = $this->getData($path); | ||||
| $matches = null; | $matches = null; | ||||
| $bad = '[^\x09\x0A\x20-\x7E]'; | $preg = preg_match( | ||||
| $preg = preg_match_all( | '/(\n|\r|\r\n)$/', | ||||
| "/{$bad}(.*{$bad})?/", | |||||
| $data, | $data, | ||||
| $matches, | $matches, | ||||
| PREG_OFFSET_CAPTURE); | PREG_OFFSET_CAPTURE); | ||||
| if (!$preg) { | if ($preg) { | ||||
| return; | return; | ||||
| } | } | ||||
| foreach ($matches[0] as $match) { | |||||
| list($string, $offset) = $match; | |||||
| $this->raiseLintAtOffset( | $this->raiseLintAtOffset( | ||||
| $offset, | strlen($data), | ||||
| self::LINT_BAD_CHARSET, | self::LINT_EOF_NEWLINE, | ||||
| pht( | pht('Files must end in a newline.'), | ||||
| 'Source code should contain only ASCII bytes with ordinal '. | '', | ||||
| 'decimal values between 32 and 126 inclusive, plus linefeed. '. | $this->getEndOfLineCharacters($path)); | ||||
| 'Do not use UTF-8 or other multibyte charsets.'), | |||||
| $string); | |||||
| } | } | ||||
| if ($this->isMessageEnabled(self::LINT_BAD_CHARSET)) { | private function lintTrailingWhitespace($path) { | ||||
| $this->stopAllLinters(); | $config = $this->getEditorConfig( | ||||
| } | $path, | ||||
| PhutilEditorConfig::TRAILING_WHITESPACE, | |||||
| false); | |||||
| if (!$config) { | |||||
| return; | |||||
| } | } | ||||
| protected function lintTrailingWhitespace($path) { | |||||
| $data = $this->getData($path); | $data = $this->getData($path); | ||||
| $matches = null; | $matches = null; | ||||
| $preg = preg_match_all( | $preg = preg_match_all( | ||||
| '/ +$/m', | '/ +$/m', | ||||
| $data, | $data, | ||||
| $matches, | $matches, | ||||
| PREG_OFFSET_CAPTURE); | PREG_OFFSET_CAPTURE); | ||||
| if (!$preg) { | if (!$preg) { | ||||
| Show All 9 Lines | foreach ($matches[0] as $match) { | ||||
| 'This line contains trailing whitespace. Consider setting '. | 'This line contains trailing whitespace. Consider setting '. | ||||
| 'up your editor to automatically remove trailing whitespace, '. | 'up your editor to automatically remove trailing whitespace, '. | ||||
| 'you will save time.'), | 'you will save time.'), | ||||
| $string, | $string, | ||||
| ''); | ''); | ||||
| } | } | ||||
| } | } | ||||
| protected function lintBOFWhitespace($path) { | private function lintBOFWhitespace($path) { | ||||
| $data = $this->getData($path); | $config = $this->getEditorConfig( | ||||
| $path, | |||||
| PhutilEditorConfig::TRAILING_WHITESPACE, | |||||
| false); | |||||
| if (!$config) { | |||||
| return; | |||||
| } | |||||
| $data = $this->getData($path); | |||||
| $matches = null; | $matches = null; | ||||
| $preg = preg_match( | $preg = preg_match( | ||||
| '/^\s*\n/', | '/^\s*\n/', | ||||
| $data, | $data, | ||||
| $matches, | $matches, | ||||
| PREG_OFFSET_CAPTURE); | PREG_OFFSET_CAPTURE); | ||||
| if (!$preg) { | if (!$preg) { | ||||
| return; | return; | ||||
| } | } | ||||
| list($string, $offset) = $matches[0]; | list($string, $offset) = $matches[0]; | ||||
| $this->raiseLintAtOffset( | $this->raiseLintAtOffset( | ||||
| $offset, | $offset, | ||||
| self::LINT_BOF_WHITESPACE, | self::LINT_BOF_WHITESPACE, | ||||
| pht( | pht( | ||||
| 'This file contains leading whitespace at the beginning of the file. '. | 'This file contains leading whitespace at the beginning of the file. '. | ||||
| 'This is unnecessary and should be avoided when possible.'), | 'This is unnecessary and should be avoided when possible.'), | ||||
| $string, | $string, | ||||
| ''); | ''); | ||||
| } | } | ||||
| protected function lintEOFWhitespace($path) { | private function lintEOFWhitespace($path) { | ||||
| $data = $this->getData($path); | $config = $this->getEditorConfig( | ||||
| $path, | |||||
| PhutilEditorConfig::TRAILING_WHITESPACE, | |||||
| false); | |||||
| if (!$config) { | |||||
| return; | |||||
| } | |||||
| $data = $this->getData($path); | |||||
| $matches = null; | $matches = null; | ||||
| $preg = preg_match( | $preg = preg_match( | ||||
| '/(?<=\n)\s+$/', | '/(?<=\n)\s+$/', | ||||
| $data, | $data, | ||||
| $matches, | $matches, | ||||
| PREG_OFFSET_CAPTURE); | PREG_OFFSET_CAPTURE); | ||||
| if (!$preg) { | if (!$preg) { | ||||
| return; | return; | ||||
| } | } | ||||
| list($string, $offset) = $matches[0]; | list($string, $offset) = $matches[0]; | ||||
| $this->raiseLintAtOffset( | $this->raiseLintAtOffset( | ||||
| $offset, | $offset, | ||||
| self::LINT_EOF_WHITESPACE, | self::LINT_EOF_WHITESPACE, | ||||
| pht( | pht( | ||||
| 'This file contains trailing whitespace at the end of the file. '. | 'This file contains trailing whitespace at the end of the file. '. | ||||
| 'This is unnecessary and should be avoided when possible.'), | 'This is unnecessary and should be avoided when possible.'), | ||||
| $string, | $string, | ||||
| ''); | ''); | ||||
| } | } | ||||
| /** | |||||
| * Retrieve a value from the EditorConfig files. | |||||
| * | |||||
| * This function delegates handling of the EditorConfig file(s) to | |||||
| * @{class:PhutilEditorConfigParser}. | |||||
| * | |||||
| * @param string The path of the file. | |||||
| * @param string The name of the EditorConfig property. | |||||
| * @param mixed Default value. | |||||
| * @return mixed | |||||
| */ | |||||
| protected function getEditorConfig($path, $property, $default = null) { | |||||
| if (!$this->editorconfig) { | |||||
| throw new PhutilInvalidStateException('willLintPaths'); | |||||
| } | |||||
| $path = $this->getEngine()->getFilePathOnDisk($path); | |||||
| $value = $this->editorconfig->getProperty($path, $property); | |||||
| return coalesce($value, $default); | |||||
| } | |||||
| /** | |||||
| * Return the end-of-line character(s) for a given path. | |||||
| * | |||||
| * @param string The path to the file. | |||||
| * @return string|null | |||||
| */ | |||||
| protected function getEndOfLineCharacters($path) { | |||||
| switch ($this->getEditorConfig($path, PhutilEditorConfig::END_OF_LINE)) { | |||||
| case 'lf': | |||||
| return "\n"; | |||||
| case 'cr': | |||||
| return "\r"; | |||||
| case 'lfcr': | |||||
| return "\r\n"; | |||||
epriestleyUnsubmitted Not Done Inline Actionse.g., called lfcr but string is crlf epriestley: e.g., called lfcr but string is crlf | |||||
| } | |||||
| } | |||||
| } | } | ||||
I think we should still default to 80, using a sentinel if that's easiest.