diff --git a/externals/pep8/pep8.py b/externals/pep8/pep8.py --- a/externals/pep8/pep8.py +++ b/externals/pep8/pep8.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # pep8.py - Check Python source code formatting, according to PEP 8 # Copyright (C) 2006-2009 Johann C. Rocholl -# Copyright (C) 2009-2012 Florent Xicluna +# Copyright (C) 2009-2014 Florent Xicluna # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -24,8 +24,7 @@ # SOFTWARE. r""" -Check Python source code formatting, according to PEP 8: -http://www.python.org/dev/peps/pep-0008/ +Check Python source code formatting, according to PEP 8. For usage and a list of options, try this: $ python pep8.py -h @@ -44,57 +43,10 @@ 600 deprecation 700 statements 900 syntax error - -You can add checks to this program by writing plugins. Each plugin is -a simple function that is called for each line of source code, either -physical or logical. - -Physical line: -- Raw line of text from the input file. - -Logical line: -- Multi-line statements converted to a single line. -- Stripped left and right. -- Contents of strings replaced with 'xxx' of same length. -- Comments removed. - -The check function requests physical or logical lines by the name of -the first argument: - -def maximum_line_length(physical_line) -def extraneous_whitespace(logical_line) -def blank_lines(logical_line, blank_lines, indent_level, line_number) - -The last example above demonstrates how check plugins can request -additional information with extra arguments. All attributes of the -Checker object are available. Some examples: - -lines: a list of the raw lines from the input file -tokens: the tokens that contribute to this logical line -line_number: line number in the input file -blank_lines: blank lines before this one -indent_char: first indentation character in this file (' ' or '\t') -indent_level: indentation (with tabs expanded to multiples of 8) -previous_indent_level: indentation on previous line -previous_logical: previous logical line - -The docstring of each check function shall be the relevant part of -text from PEP 8. It is printed if the user enables --show-pep8. -Several docstrings contain examples directly from the PEP 8 document. - -Okay: spam(ham[1], {eggs: 2}) -E201: spam( ham[1], {eggs: 2}) - -These examples are verified automatically when pep8.py is run with the ---doctest option. You can add examples for your own check functions. -The format is simple: "Okay" or error/warning code followed by colon -and space, the rest of the line is example source code. If you put 'r' -before the docstring, you can use \n for newline, \t for tab and \s -for space. - """ +from __future__ import with_statement -__version__ = '1.3.4' +__version__ = '1.5.7' import os import sys @@ -111,46 +63,49 @@ except ImportError: from ConfigParser import RawConfigParser -DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' -DEFAULT_IGNORE = 'E24' +DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__' +DEFAULT_IGNORE = 'E123,E226,E24' if sys.platform == 'win32': DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') else: DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'), 'pep8') +PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8') +TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite') MAX_LINE_LENGTH = 79 REPORT_FORMAT = { 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', } - +PyCF_ONLY_AST = 1024 SINGLETONS = frozenset(['False', 'None', 'True']) KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS -BINARY_OPERATORS = frozenset([ - '**=', '*=', '+=', '-=', '!=', '<>', - '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=', - '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<']) UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) -OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS +ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-']) +WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) +WS_NEEDED_OPERATORS = frozenset([ + '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', + '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=']) WHITESPACE = frozenset(' \t') -SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE, - tokenize.INDENT, tokenize.DEDENT]) +NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE]) +SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT]) +# ERRORTOKEN is triggered by backticks in Python 3 +SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN]) BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] INDENT_REGEX = re.compile(r'([ \t]*)') -RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') -RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+') -SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') -ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') +RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') +RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$') +ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') DOCSTRING_REGEX = re.compile(r'u?r?["\']') EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') -COMPARE_TYPE_REGEX = re.compile(r'([=!]=|is|is\s+not)\s*type(?:s\.(\w+)Type' - r'|\(\s*(\(\s*\)|[^)]*[^ )])\s*\))') -KEYWORD_REGEX = re.compile(r'(?:[^\s]|\b)(\s*)\b(?:%s)\b(\s*)' % - r'|'.join(KEYWORDS)) +COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s') +COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' + r'|\s*\(\s*([^)]*[^ )])\s*\))') +KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') LAMBDA_REGEX = re.compile(r'\blambda\b') HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') @@ -166,8 +121,7 @@ def tabs_or_spaces(physical_line, indent_char): - r""" - Never mix tabs and spaces. + r"""Never mix tabs and spaces. The most popular way of indenting Python is with spaces only. The second-most popular way is with tabs only. Code indented with a mixture @@ -186,9 +140,7 @@ def tabs_obsolete(physical_line): - r""" - For new projects, spaces-only are strongly recommended over tabs. Most - editors have features that make this easy to do. + r"""For new projects, spaces-only are strongly recommended over tabs. Okay: if True:\n return W191: if True:\n\treturn @@ -199,22 +151,13 @@ def trailing_whitespace(physical_line): - r""" - JCR: Trailing whitespace is superfluous. - FBM: Except when it occurs as part of a blank line (i.e. the line is - nothing but whitespace). According to Python docs[1] a line with only - whitespace is considered a blank line, and is to be ignored. However, - matching a blank line to its indentation level avoids mistakenly - terminating a multi-line statement (e.g. class declaration) when - pasting code into the standard Python interpreter. - - [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines + r"""Trailing whitespace is superfluous. The warning returned varies on whether the line itself is blank, for easier filtering for those who want to indent their blank lines. - Okay: spam(1) - W291: spam(1)\s + Okay: spam(1)\n# + W291: spam(1) \n# W293: class Foo(object):\n \n bang = 12 """ physical_line = physical_line.rstrip('\n') # chr(10), newline @@ -228,30 +171,24 @@ return 0, "W293 blank line contains whitespace" -def trailing_blank_lines(physical_line, lines, line_number): - r""" - JCR: Trailing blank lines are superfluous. +def trailing_blank_lines(physical_line, lines, line_number, total_lines): + r"""Trailing blank lines are superfluous. Okay: spam(1) W391: spam(1)\n - """ - if not physical_line.rstrip() and line_number == len(lines): - return 0, "W391 blank line at end of file" - - -def missing_newline(physical_line): - """ - JCR: The last line should have a newline. - Reports warning W292. + However the last line should end with a new line (warning W292). """ - if physical_line.rstrip() == physical_line: - return len(physical_line), "W292 no newline at end of file" + if line_number == total_lines: + stripped_last_line = physical_line.rstrip() + if not stripped_last_line: + return 0, "W391 blank line at end of file" + if stripped_last_line == physical_line: + return len(physical_line), "W292 no newline at end of file" -def maximum_line_length(physical_line, max_line_length): - """ - Limit all lines to a maximum of 79 characters. +def maximum_line_length(physical_line, max_line_length, multiline): + r"""Limit all lines to a maximum of 79 characters. There are still many devices around that are limited to 80 character lines; plus, limiting windows to 80 characters makes it possible to have @@ -264,7 +201,14 @@ """ line = physical_line.rstrip() length = len(line) - if length > max_line_length: + if length > max_line_length and not noqa(line): + # Special case for long URLs in multi-line docstrings or comments, + # but still report the error when the 72 first chars are whitespaces. + chunks = line.split() + if ((len(chunks) == 1 and multiline) or + (len(chunks) == 2 and chunks[0] == '#')) and \ + len(line) - len(chunks[-1]) < max_line_length - 7: + return if hasattr(line, 'decode'): # Python 2 # The line could contain multi-byte characters try: @@ -282,9 +226,8 @@ def blank_lines(logical_line, blank_lines, indent_level, line_number, - previous_logical, previous_indent_level): - r""" - Separate top-level function and class definitions with two blank lines. + blank_before, previous_logical, previous_indent_level): + r"""Separate top-level function and class definitions with two blank lines. Method definitions inside a class are separated by a single blank line. @@ -303,7 +246,7 @@ E303: def a():\n\n\n\n pass E304: @decorator\n\ndef a():\n pass """ - if line_number == 1: + if line_number < 3 and not previous_logical: return # Don't expect blank lines before the first line if previous_logical.startswith('@'): if blank_lines: @@ -312,19 +255,18 @@ yield 0, "E303 too many blank lines (%d)" % blank_lines elif logical_line.startswith(('def ', 'class ', '@')): if indent_level: - if not (blank_lines or previous_indent_level < indent_level or + if not (blank_before or previous_indent_level < indent_level or DOCSTRING_REGEX.match(previous_logical)): yield 0, "E301 expected 1 blank line, found 0" - elif blank_lines != 2: - yield 0, "E302 expected 2 blank lines, found %d" % blank_lines + elif blank_before != 2: + yield 0, "E302 expected 2 blank lines, found %d" % blank_before def extraneous_whitespace(logical_line): - """ - Avoid extraneous whitespace in the following situations: + r"""Avoid extraneous whitespace. + Avoid extraneous whitespace in these situations: - Immediately inside parentheses, brackets or braces. - - Immediately before a comma, semicolon, or colon. Okay: spam(ham[1], {eggs: 2}) @@ -353,8 +295,7 @@ def whitespace_around_keywords(logical_line): - r""" - Avoid extraneous whitespace around keywords. + r"""Avoid extraneous whitespace around keywords. Okay: True and False E271: True and False @@ -377,8 +318,7 @@ def missing_whitespace(logical_line): - """ - JCR: Each comma, semicolon or colon should be followed by whitespace. + r"""Each comma, semicolon or colon should be followed by whitespace. Okay: [a, b] Okay: (3,) @@ -405,8 +345,7 @@ def indentation(logical_line, previous_logical, indent_char, indent_level, previous_indent_level): - r""" - Use 4 spaces per indentation level. + r"""Use 4 spaces per indentation level. For really old code that you don't want to mess up, you can continue to use 8-space tabs. @@ -430,16 +369,16 @@ yield 0, "E113 unexpected indentation" -def continuation_line_indentation(logical_line, tokens, indent_level, verbose): - r""" - Continuation lines should align wrapped elements either vertically using - Python's implicit line joining inside parentheses, brackets and braces, or - using a hanging indent. +def continued_indentation(logical_line, tokens, indent_level, hang_closing, + indent_char, noqa, verbose): + r"""Continuation lines indentation. - When using a hanging indent the following considerations should be applied: + Continuation lines should align wrapped elements either vertically + using Python's implicit line joining inside parentheses, brackets + and braces, or using a hanging indent. + When using a hanging indent these considerations should be applied: - there should be no arguments on the first line, and - - further indentation should be used to clearly distinguish itself as a continuation line. @@ -451,14 +390,16 @@ E122: a = (\n42) E123: a = (\n 42\n ) E124: a = (24,\n 42\n) - E125: if (a or\n b):\n pass + E125: if (\n b):\n pass E126: a = (\n 42) E127: a = (24,\n 42) E128: a = (24,\n 42) + E129: if (a or\n b):\n pass + E131: a = (\n 42\n 24) """ first_row = tokens[0][2][0] nrows = 1 + tokens[-1][2][0] - first_row - if nrows == 1: + if noqa or nrows == 1: return # indent_next tells us whether the next block is indented; assuming @@ -468,23 +409,30 @@ indent_next = logical_line.endswith(':') row = depth = 0 + valid_hangs = (4,) if indent_char != '\t' else (4, 8) # remember how many brackets were opened on each line parens = [0] * nrows # relative indents of physical lines rel_indent = [0] * nrows + # for each depth, collect a list of opening rows + open_rows = [[0]] + # for each depth, memorize the hanging indentation + hangs = [None] # visual indents - indent = [indent_level] indent_chances = {} last_indent = tokens[0][2] + visual_indent = None + # for each depth, memorize the visual indent column + indent = [last_indent[1]] if verbose >= 3: print(">>> " + tokens[0][4].rstrip()) for token_type, text, start, end, line in tokens: + newline = row < start[0] - first_row if newline: row = start[0] - first_row - newline = (not last_token_multiline and - token_type not in (tokenize.NL, tokenize.NEWLINE)) + newline = not last_token_multiline and token_type not in NEWLINE if newline: # this is the beginning of a continuation line. @@ -493,53 +441,63 @@ print("... " + line.rstrip()) # record the initial indent. - rel_indent[row] = start[1] - indent_level - - if depth: - # a bracket expression in a continuation line. - # find the line that it was opened on - for open_row in range(row - 1, -1, -1): - if parens[open_row]: - break - else: - # an unbracketed continuation line (ie, backslash) - open_row = 0 - hang = rel_indent[row] - rel_indent[open_row] - visual_indent = indent_chances.get(start[1]) - - if token_type == tokenize.OP and text in ']})': - # this line starts with a closing bracket - if indent[depth]: - if start[1] != indent[depth]: - yield (start, 'E124 closing bracket does not match ' - 'visual indentation') - elif hang: - yield (start, 'E123 closing bracket does not match ' - 'indentation of opening bracket\'s line') + rel_indent[row] = expand_indent(line) - indent_level + + # identify closing bracket + close_bracket = (token_type == tokenize.OP and text in ']})') + + # is the indent relative to an opening bracket line? + for open_row in reversed(open_rows[depth]): + hang = rel_indent[row] - rel_indent[open_row] + hanging_indent = hang in valid_hangs + if hanging_indent: + break + if hangs[depth]: + hanging_indent = (hang == hangs[depth]) + # is there any chance of visual indent? + visual_indent = (not close_bracket and hang > 0 and + indent_chances.get(start[1])) + + if close_bracket and indent[depth]: + # closing bracket for visual indent + if start[1] != indent[depth]: + yield (start, "E124 closing bracket does not match " + "visual indentation") + elif close_bracket and not hang: + # closing bracket matches indentation of opening bracket's line + if hang_closing: + yield start, "E133 closing bracket is missing indentation" + elif indent[depth] and start[1] < indent[depth]: + if visual_indent is not True: + # visual indent is broken + yield (start, "E128 continuation line " + "under-indented for visual indent") + elif hanging_indent or (indent_next and rel_indent[row] == 8): + # hanging indent is verified + if close_bracket and not hang_closing: + yield (start, "E123 closing bracket does not match " + "indentation of opening bracket's line") + hangs[depth] = hang elif visual_indent is True: # visual indent is verified - if not indent[depth]: - indent[depth] = start[1] + indent[depth] = start[1] elif visual_indent in (text, str): # ignore token lined up with matching one from a previous line pass - elif indent[depth] and start[1] < indent[depth]: - # visual indent is broken - yield (start, 'E128 continuation line ' - 'under-indented for visual indent') - elif hang == 4 or (indent_next and rel_indent[row] == 8): - # hanging indent is verified - pass else: # indent is broken if hang <= 0: - error = 'E122', 'missing indentation or outdented' + error = "E122", "missing indentation or outdented" elif indent[depth]: - error = 'E127', 'over-indented for visual indent' - elif hang % 4: - error = 'E121', 'indentation is not a multiple of four' + error = "E127", "over-indented for visual indent" + elif not close_bracket and hangs[depth]: + error = "E131", "unaligned for hanging indent" else: - error = 'E126', 'over-indented for hanging indent' + hangs[depth] = hang + if hang > 4: + error = "E126", "over-indented for hanging indent" + else: + error = "E121", "under-indented for hanging indent" yield start, "%s continuation line %s" % error # look for visual indenting @@ -553,12 +511,21 @@ elif (token_type in (tokenize.STRING, tokenize.COMMENT) or text in ('u', 'ur', 'b', 'br')): indent_chances[start[1]] = str + # special case for the "if" statement because len("if (") == 4 + elif not indent_chances and not row and not depth and text == 'if': + indent_chances[end[1] + 1] = True + elif text == ':' and line[end[1]:].isspace(): + open_rows[depth].append(row) # keep track of bracket depth if token_type == tokenize.OP: if text in '([{': depth += 1 indent.append(0) + hangs.append(None) + if len(open_rows) == depth: + open_rows.append([]) + open_rows[depth].append(row) parens[row] += 1 if verbose >= 4: print("bracket depth %s seen, col %s, visual min = %s" % @@ -566,12 +533,14 @@ elif text in ')]}' and depth > 0: # parent indents should not be more than this one prev_indent = indent.pop() or last_indent[1] + hangs.pop() for d in range(depth): if indent[d] > prev_indent: indent[d] = 0 for ind in list(indent_chances): if ind >= prev_indent: del indent_chances[ind] + del open_rows[depth + 1:] depth -= 1 if depth: indent_chances[indent[depth]] = True @@ -585,21 +554,25 @@ indent_chances[start[1]] = text last_token_multiline = (start[0] != end[0]) + if last_token_multiline: + rel_indent[end[0] - first_row] = rel_indent[row] - if indent_next and rel_indent[-1] == 4: - yield (last_indent, "E125 continuation line does not distinguish " - "itself from next logical line") + if indent_next and expand_indent(line) == indent_level + 4: + pos = (start[0], indent[0] + 4) + if visual_indent: + code = "E129 visually indented line" + else: + code = "E125 continuation line" + yield pos, "%s with same indent as next logical line" % code def whitespace_before_parameters(logical_line, tokens): - """ - Avoid extraneous whitespace in the following situations: + r"""Avoid extraneous whitespace. - - Immediately before the open parenthesis that starts the argument - list of a function call. - - - Immediately before the open parenthesis that starts an indexing or - slicing. + Avoid extraneous whitespace in the following situations: + - before the open parenthesis that starts the argument list of a + function call. + - before the open parenthesis that starts an indexing or slicing. Okay: spam(1) E211: spam (1) @@ -608,11 +581,9 @@ E211: dict ['key'] = list[index] E211: dict['key'] = list [index] """ - prev_type = tokens[0][0] - prev_text = tokens[0][1] - prev_end = tokens[0][3] + prev_type, prev_text, __, prev_end, __ = tokens[0] for index in range(1, len(tokens)): - token_type, text, start, end, line = tokens[index] + token_type, text, start, end, __ = tokens[index] if (token_type == tokenize.OP and text in '([' and start != prev_end and @@ -628,11 +599,7 @@ def whitespace_around_operator(logical_line): - r""" - Avoid extraneous whitespace in the following situations: - - - More than one space around an assignment (or other) operator to - align it with another. + r"""Avoid extraneous whitespace around an operator. Okay: a = 12 + 3 E221: a = 4 + 5 @@ -655,13 +622,15 @@ def missing_whitespace_around_operator(logical_line, tokens): - r""" + r"""Surround operators with a single space on either side. + - Always surround these binary operators with a single space on either side: assignment (=), augmented assignment (+=, -= etc.), - comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), + comparisons (==, <, >, !=, <=, >=, in, not in, is, is not), Booleans (and, or, not). - - Use spaces around arithmetic operators. + - If operators with different priorities are used, consider adding + whitespace around the operators with the lowest priorities. Okay: i = i + 1 Okay: submitted += 1 @@ -669,28 +638,23 @@ Okay: hypot2 = x * x + y * y Okay: c = (a + b) * (a - b) Okay: foo(bar, key='word', *args, **kwargs) - Okay: baz(**kwargs) - Okay: negative = -1 - Okay: spam(-1) Okay: alpha[:-i] - Okay: if not -5 < x < +5:\n pass - Okay: lambda *args, **kw: (args, kw) E225: i=i+1 E225: submitted +=1 - E225: x = x*2 - 1 - E225: hypot2 = x*x + y*y - E225: c = (a+b) * (a-b) - E225: c = alpha -4 + E225: x = x /2 - 1 E225: z = x **y + E226: c = (a+b) * (a-b) + E226: hypot2 = x*x + y*y + E227: c = a|b + E228: msg = fmt%(errno, errmsg) """ parens = 0 need_space = False prev_type = tokenize.OP prev_text = prev_end = None for token_type, text, start, end, line in tokens: - if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): - # ERRORTOKEN is triggered by backticks in Python 3000 + if token_type in SKIP_COMMENTS: continue if text in ('(', 'lambda'): parens += 1 @@ -698,32 +662,50 @@ parens -= 1 if need_space: if start != prev_end: + # Found a (probably) needed space + if need_space is not True and not need_space[1]: + yield (need_space[0], + "E225 missing whitespace around operator") need_space = False elif text == '>' and prev_text in ('<', '-'): # Tolerate the "<>" operator, even if running Python 3 # Deal with Python 3's annotated return value "->" pass else: - yield prev_end, "E225 missing whitespace around operator" + if need_space is True or need_space[1]: + # A needed trailing space was not found + yield prev_end, "E225 missing whitespace around operator" + else: + code, optype = 'E226', 'arithmetic' + if prev_text == '%': + code, optype = 'E228', 'modulo' + elif prev_text not in ARITHMETIC_OP: + code, optype = 'E227', 'bitwise or shift' + yield (need_space[0], "%s missing whitespace " + "around %s operator" % (code, optype)) need_space = False elif token_type == tokenize.OP and prev_end is not None: if text == '=' and parens: # Allow keyword args or defaults: foo(bar=None). pass - elif text in BINARY_OPERATORS: + elif text in WS_NEEDED_OPERATORS: need_space = True elif text in UNARY_OPERATORS: + # Check if the operator is being used as a binary operator # Allow unary operators: -123, -x, +1. # Allow argument unpacking: foo(*args, **kwargs). - if prev_type == tokenize.OP: - if prev_text in '}])': - need_space = True - elif prev_type == tokenize.NAME: - if prev_text not in KEYWORDS: - need_space = True - elif prev_type not in SKIP_TOKENS: - need_space = True - if need_space and start == prev_end: + if (prev_text in '}])' if prev_type == tokenize.OP + else prev_text not in KEYWORDS): + need_space = None + elif text in WS_OPTIONAL_OPERATORS: + need_space = None + + if need_space is None: + # Surrounding space is optional, but ensure that + # trailing space matches opening space + need_space = (prev_end, start != prev_end) + elif need_space and start == prev_end: + # A needed opening space was not found yield prev_end, "E225 missing whitespace around operator" need_space = False prev_type = token_type @@ -732,11 +714,7 @@ def whitespace_around_comma(logical_line): - r""" - Avoid extraneous whitespace in the following situations: - - - More than one space around an assignment (or other) operator to - align it with another. + r"""Avoid extraneous whitespace after a comma or a colon. Note: these checks are disabled by default @@ -754,7 +732,8 @@ def whitespace_around_named_parameter_equals(logical_line, tokens): - """ + r"""Don't use spaces around the '=' sign in function arguments. + Don't use spaces around the '=' sign when used to indicate a keyword argument or a default parameter value. @@ -771,12 +750,14 @@ parens = 0 no_space = False prev_end = None + message = "E251 unexpected spaces around keyword / parameter equals" for token_type, text, start, end, line in tokens: + if token_type == tokenize.NL: + continue if no_space: no_space = False if start != prev_end: - yield (prev_end, - "E251 no spaces around keyword / parameter equals") + yield (prev_end, message) elif token_type == tokenize.OP: if text == '(': parens += 1 @@ -785,42 +766,50 @@ elif parens and text == '=': no_space = True if start != prev_end: - yield (prev_end, - "E251 no spaces around keyword / parameter equals") + yield (prev_end, message) prev_end = end -def whitespace_before_inline_comment(logical_line, tokens): - """ - Separate inline comments by at least two spaces. +def whitespace_before_comment(logical_line, tokens): + r"""Separate inline comments by at least two spaces. An inline comment is a comment on the same line as a statement. Inline comments should be separated by at least two spaces from the statement. They should start with a # and a single space. + Each line of a block comment starts with a # and a single space + (unless it is indented text inside the comment). + Okay: x = x + 1 # Increment x Okay: x = x + 1 # Increment x + Okay: # Block comment E261: x = x + 1 # Increment x E262: x = x + 1 #Increment x E262: x = x + 1 # Increment x + E265: #Block comment """ prev_end = (0, 0) for token_type, text, start, end, line in tokens: if token_type == tokenize.COMMENT: - if not line[:start[1]].strip(): - continue - if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: - yield (prev_end, - "E261 at least two spaces before inline comment") - if text.startswith('# ') or not text.startswith('# '): - yield start, "E262 inline comment should start with '# '" + inline_comment = line[:start[1]].strip() + if inline_comment: + if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: + yield (prev_end, + "E261 at least two spaces before inline comment") + symbol, sp, comment = text.partition(' ') + bad_prefix = symbol not in ('#', '#:') + if inline_comment: + if bad_prefix or comment[:1].isspace(): + yield start, "E262 inline comment should start with '# '" + elif bad_prefix: + if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'): + yield start, "E265 block comment should start with '# '" elif token_type != tokenize.NL: prev_end = end def imports_on_separate_lines(logical_line): - r""" - Imports should usually be on separate lines. + r"""Imports should usually be on separate lines. Okay: import os\nimport sys E401: import sys, os @@ -839,13 +828,11 @@ def compound_statements(logical_line): - r""" - Compound statements (multiple statements on the same line) are - generally discouraged. + r"""Compound statements (on the same line) are generally discouraged. While sometimes it's okay to put an if/for/while with a small body - on the same line, never do this for multi-clause statements. Also - avoid folding such long lines! + on the same line, never do this for multi-clause statements. + Also avoid folding such long lines! Okay: if foo == 'blah':\n do_blah_thing() Okay: do_one() @@ -862,24 +849,30 @@ E701: if foo == 'blah': one(); two(); three() E702: do_one(); do_two(); do_three() + E703: do_four(); # useless semicolon """ line = logical_line + last_char = len(line) - 1 found = line.find(':') - if -1 < found < len(line) - 1: + while -1 < found < last_char: before = line[:found] if (before.count('{') <= before.count('}') and # {'a': 1} (dict) before.count('[') <= before.count(']') and # [1:2] (slice) before.count('(') <= before.count(')') and # (Python 3 annotation) not LAMBDA_REGEX.search(before)): # lambda x: x yield found, "E701 multiple statements on one line (colon)" + found = line.find(':', found + 1) found = line.find(';') - if -1 < found: - yield found, "E702 multiple statements on one line (semicolon)" + while -1 < found: + if found < last_char: + yield found, "E702 multiple statements on one line (semicolon)" + else: + yield found, "E703 statement ends with a semicolon" + found = line.find(';', found + 1) def explicit_line_join(logical_line, tokens): - r""" - Avoid explicit line join between brackets. + r"""Avoid explicit line join between brackets. The preferred way of wrapping long lines is by using Python's implied line continuation inside parentheses, brackets and braces. Long lines can be @@ -912,8 +905,9 @@ parens -= 1 -def comparison_to_singleton(logical_line): - """ +def comparison_to_singleton(logical_line, noqa): + r"""Comparison to singletons should use "is" or "is not". + Comparisons to singletons like None should always be done with "is" or "is not", never the equality operators. @@ -926,7 +920,7 @@ set to some other value. The other value might have a type (such as a container) that could be false in a boolean context! """ - match = COMPARE_SINGLETON_REGEX.search(logical_line) + match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line) if match: same = (match.group(1) == '==') singleton = match.group(2) @@ -942,10 +936,31 @@ (code, singleton, msg)) -def comparison_type(logical_line): +def comparison_negative(logical_line): + r"""Negative comparison should be done using "not in" and "is not". + + Okay: if x not in y:\n pass + Okay: assert (X in Y or X is Z) + Okay: if not (X in Y):\n pass + Okay: zz = x is not y + E713: Z = not X in Y + E713: if not X.B in Y:\n pass + E714: if not X is Y:\n pass + E714: Z = not X.B is Y """ - Object type comparisons should always use isinstance() instead of - comparing types directly. + match = COMPARE_NEGATIVE_REGEX.search(logical_line) + if match: + pos = match.start(1) + if match.group(2) == 'in': + yield pos, "E713 test for membership should be 'not in'" + else: + yield pos, "E714 test for object identity should be 'is not'" + + +def comparison_type(logical_line): + r"""Object type comparisons should always use isinstance(). + + Do not compare types directly. Okay: if isinstance(obj, int): E721: if type(obj) is type(1): @@ -959,48 +974,40 @@ """ match = COMPARE_TYPE_REGEX.search(logical_line) if match: - inst = match.group(3) + inst = match.group(1) if inst and isidentifier(inst) and inst not in SINGLETONS: return # Allow comparison for types which are not obvious - yield match.start(1), "E721 do not compare types, use 'isinstance()'" + yield match.start(), "E721 do not compare types, use 'isinstance()'" -def python_3000_has_key(logical_line): - r""" - The {}.has_key() method will be removed in the future version of - Python. Use the 'in' operation instead. +def python_3000_has_key(logical_line, noqa): + r"""The {}.has_key() method is removed in Python 3: use the 'in' operator. Okay: if "alph" in d:\n print d["alph"] W601: assert d.has_key('alph') """ pos = logical_line.find('.has_key(') - if pos > -1: + if pos > -1 and not noqa: yield pos, "W601 .has_key() is deprecated, use 'in'" def python_3000_raise_comma(logical_line): - """ - When raising an exception, use "raise ValueError('message')" - instead of the older form "raise ValueError, 'message'". + r"""When raising an exception, use "raise ValueError('message')". - The paren-using form is preferred because when the exception arguments - are long or include string formatting, you don't need to use line - continuation characters thanks to the containing parentheses. The older - form will be removed in Python 3000. + The older form is removed in Python 3. Okay: raise DummyError("Message") W602: raise DummyError, "Message" """ match = RAISE_COMMA_REGEX.match(logical_line) if match and not RERAISE_COMMA_REGEX.match(logical_line): - yield match.start(1), "W602 deprecated form of raising exception" + yield match.end() - 1, "W602 deprecated form of raising exception" def python_3000_not_equal(logical_line): - """ - != can also be written <>, but this is an obsolete usage kept for - backwards compatibility only. New code should always use !=. - The older syntax is removed in Python 3000. + r"""New code should always use != instead of <>. + + The older syntax is removed in Python 3. Okay: if a != 'no': W603: if a <> 'no': @@ -1011,9 +1018,7 @@ def python_3000_backticks(logical_line): - """ - Backticks are removed in Python 3000. - Use repr() instead. + r"""Backticks are removed in Python 3: use repr() instead. Okay: val = repr(1 + 2) W604: val = `1 + 2` @@ -1031,48 +1036,40 @@ if '' == ''.encode(): # Python 2: implicit encoding. def readlines(filename): - f = open(filename) - try: + """Read the source code.""" + with open(filename, 'rU') as f: return f.readlines() - finally: - f.close() - isidentifier = re.compile(r'[a-zA-Z_]\w*').match stdin_get_value = sys.stdin.read else: # Python 3 def readlines(filename): - f = open(filename, 'rb') + """Read the source code.""" try: - coding, lines = tokenize.detect_encoding(f.readline) - f = TextIOWrapper(f, coding, line_buffering=True) - return [l.decode(coding) for l in lines] + f.readlines() + with open(filename, 'rb') as f: + (coding, lines) = tokenize.detect_encoding(f.readline) + f = TextIOWrapper(f, coding, line_buffering=True) + return [l.decode(coding) for l in lines] + f.readlines() except (LookupError, SyntaxError, UnicodeError): - f.close() - # Fall back if files are improperly declared - f = open(filename, encoding='latin-1') - return f.readlines() - finally: - f.close() - + # Fall back if file encoding is improperly declared + with open(filename, encoding='latin-1') as f: + return f.readlines() isidentifier = str.isidentifier def stdin_get_value(): return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() -readlines.__doc__ = " Read the source code." +noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search def expand_indent(line): - r""" - Return the amount of indentation. + r"""Return the amount of indentation. + Tabs are expanded to the next multiple of 8. >>> expand_indent(' ') 4 >>> expand_indent('\t') 8 - >>> expand_indent(' \t') - 8 >>> expand_indent(' \t') 8 >>> expand_indent(' \t') @@ -1092,8 +1089,7 @@ def mute_string(text): - """ - Replace contents with 'xxx' to prevent syntax matching. + """Replace contents with 'xxx' to prevent syntax matching. >>> mute_string('"abc"') '"xxx"' @@ -1113,6 +1109,9 @@ def parse_udiff(diff, patterns=None, parent='.'): + """Return a dictionary of matching lines.""" + # For each file of the diff, the entry key is the filename, + # and the value is a set of row numbers to consider. rv = {} path = nrows = None for line in diff.splitlines(): @@ -1122,7 +1121,7 @@ continue if line[:3] == '@@ ': hunk_match = HUNK_REGEX.match(line) - row, nrows = [int(g or '1') for g in hunk_match.groups()] + (row, nrows) = [int(g or '1') for g in hunk_match.groups()] rv[path].update(range(row, row + nrows)) elif line[:3] == '+++': path = line[4:].split('\t', 1)[0] @@ -1134,9 +1133,24 @@ if rows and filename_match(path, patterns)]) -def filename_match(filename, patterns, default=True): +def normalize_paths(value, parent=os.curdir): + """Parse a comma-separated list of paths. + + Return a list of absolute paths. """ - Check if patterns contains a pattern that matches filename. + if not value or isinstance(value, list): + return value + paths = [] + for path in value.split(','): + if '/' in path: + path = os.path.abspath(os.path.join(parent, path)) + paths.append(path.rstrip('/')) + return paths + + +def filename_match(filename, patterns, default=True): + """Check if patterns contains a pattern that matches filename. + If patterns is unspecified, this always returns True. """ if not patterns: @@ -1144,31 +1158,56 @@ return any(fnmatch(filename, pattern) for pattern in patterns) +if COMMENT_WITH_NL: + def _is_eol_token(token): + return (token[0] in NEWLINE or + (token[0] == tokenize.COMMENT and token[1] == token[4])) +else: + def _is_eol_token(token): + return token[0] in NEWLINE + + ############################################################################## # Framework to run all checks ############################################################################## -def find_checks(argument_name): - """ - Find all globally visible functions where the first argument name - starts with argument_name. +_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} + + +def register_check(check, codes=None): + """Register a new check object.""" + def _add_check(check, kind, codes, args): + if check in _checks[kind]: + _checks[kind][check][0].extend(codes or []) + else: + _checks[kind][check] = (codes or [''], args) + if inspect.isfunction(check): + args = inspect.getargspec(check)[0] + if args and args[0] in ('physical_line', 'logical_line'): + if codes is None: + codes = ERRORCODE_REGEX.findall(check.__doc__ or '') + _add_check(check, args[0], codes, args) + elif inspect.isclass(check): + if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']: + _add_check(check, 'tree', codes, None) + + +def init_checks_registry(): + """Register all globally visible functions. + + The first argument name is either 'physical_line' or 'logical_line'. """ - for name, function in globals().items(): - if not inspect.isfunction(function): - continue - args = inspect.getargspec(function)[0] - if args and args[0].startswith(argument_name): - codes = ERRORCODE_REGEX.findall(function.__doc__ or '') - yield name, codes, function, args + mod = inspect.getmodule(register_check) + for (name, function) in inspect.getmembers(mod, inspect.isfunction): + register_check(function) +init_checks_registry() class Checker(object): - """ - Load a Python source file, tokenize it, check coding style. - """ + """Load a Python source file, tokenize it, check coding style.""" - def __init__(self, filename, lines=None, + def __init__(self, filename=None, lines=None, options=None, report=None, **kwargs): if options is None: options = StyleGuide(kwargs).options @@ -1177,7 +1216,10 @@ self._io_error = None self._physical_checks = options.physical_checks self._logical_checks = options.logical_checks + self._ast_checks = options.ast_checks self.max_line_length = options.max_line_length + self.multiline = False # in a multiline string? + self.hang_closing = options.hang_closing self.verbose = options.verbose self.filename = filename if filename is None: @@ -1190,144 +1232,190 @@ try: self.lines = readlines(filename) except IOError: - exc_type, exc = sys.exc_info()[:2] + (exc_type, exc) = sys.exc_info()[:2] self._io_error = '%s: %s' % (exc_type.__name__, exc) self.lines = [] else: self.lines = lines + if self.lines: + ord0 = ord(self.lines[0][0]) + if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM + if ord0 == 0xfeff: + self.lines[0] = self.lines[0][1:] + elif self.lines[0][:3] == '\xef\xbb\xbf': + self.lines[0] = self.lines[0][3:] self.report = report or options.report self.report_error = self.report.error + def report_invalid_syntax(self): + """Check if the syntax is valid.""" + (exc_type, exc) = sys.exc_info()[:2] + if len(exc.args) > 1: + offset = exc.args[1] + if len(offset) > 2: + offset = offset[1:3] + else: + offset = (1, 0) + self.report_error(offset[0], offset[1] or 0, + 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), + self.report_invalid_syntax) + def readline(self): - """ - Get the next line from the input buffer. - """ - self.line_number += 1 - if self.line_number > len(self.lines): + """Get the next line from the input buffer.""" + if self.line_number >= self.total_lines: return '' - return self.lines[self.line_number - 1] - - def readline_check_physical(self): - """ - Check and return the next physical line. This method can be - used to feed tokenize.generate_tokens. - """ - line = self.readline() - if line: - self.check_physical(line) + line = self.lines[self.line_number] + self.line_number += 1 + if self.indent_char is None and line[:1] in WHITESPACE: + self.indent_char = line[0] return line def run_check(self, check, argument_names): - """ - Run a check plugin. - """ + """Run a check plugin.""" arguments = [] for name in argument_names: arguments.append(getattr(self, name)) return check(*arguments) def check_physical(self, line): - """ - Run all physical checks on a raw input line. - """ + """Run all physical checks on a raw input line.""" self.physical_line = line - if self.indent_char is None and line[:1] in WHITESPACE: - self.indent_char = line[0] for name, check, argument_names in self._physical_checks: result = self.run_check(check, argument_names) if result is not None: - offset, text = result + (offset, text) = result self.report_error(self.line_number, offset, text, check) + if text[:4] == 'E101': + self.indent_char = line[0] def build_tokens_line(self): - """ - Build a logical line from tokens. - """ - self.mapping = [] + """Build a logical line from tokens.""" logical = [] + comments = [] length = 0 - previous = None - for token in self.tokens: - token_type, text = token[0:2] + prev_row = prev_col = mapping = None + for token_type, text, start, end, line in self.tokens: if token_type in SKIP_TOKENS: continue + if not mapping: + mapping = [(0, start)] + if token_type == tokenize.COMMENT: + comments.append(text) + continue if token_type == tokenize.STRING: text = mute_string(text) - if previous: - end_row, end = previous[3] - start_row, start = token[2] - if end_row != start_row: # different row - prev_text = self.lines[end_row - 1][end - 1] + if prev_row: + (start_row, start_col) = start + if prev_row != start_row: # different row + prev_text = self.lines[prev_row - 1][prev_col - 1] if prev_text == ',' or (prev_text not in '{[(' and text not in '}])'): - logical.append(' ') - length += 1 - elif end != start: # different column - fill = self.lines[end_row - 1][end:start] - logical.append(fill) - length += len(fill) - self.mapping.append((length, token)) + text = ' ' + text + elif prev_col != start_col: # different column + text = line[prev_col:start_col] + text logical.append(text) length += len(text) - previous = token + mapping.append((length, end)) + (prev_row, prev_col) = end self.logical_line = ''.join(logical) - assert self.logical_line.strip() == self.logical_line + self.noqa = comments and noqa(''.join(comments)) + return mapping def check_logical(self): - """ - Build a line from tokens and run all logical checks on it. - """ - self.build_tokens_line() + """Build a line from tokens and run all logical checks on it.""" self.report.increment_logical_line() - first_line = self.lines[self.mapping[0][1][2][0] - 1] - indent = first_line[:self.mapping[0][1][2][1]] - self.previous_indent_level = self.indent_level - self.indent_level = expand_indent(indent) + mapping = self.build_tokens_line() + (start_row, start_col) = mapping[0][1] + start_line = self.lines[start_row - 1] + self.indent_level = expand_indent(start_line[:start_col]) + if self.blank_before < self.blank_lines: + self.blank_before = self.blank_lines if self.verbose >= 2: print(self.logical_line[:80].rstrip()) for name, check, argument_names in self._logical_checks: if self.verbose >= 4: print(' ' + name) - for result in self.run_check(check, argument_names): - offset, text = result - if isinstance(offset, tuple): - orig_number, orig_offset = offset - else: - for token_offset, token in self.mapping: - if offset >= token_offset: - orig_number = token[2][0] - orig_offset = (token[2][1] + offset - token_offset) - self.report_error(orig_number, orig_offset, text, check) - self.previous_logical = self.logical_line + for offset, text in self.run_check(check, argument_names) or (): + if not isinstance(offset, tuple): + for token_offset, pos in mapping: + if offset <= token_offset: + break + offset = (pos[0], pos[1] + offset - token_offset) + self.report_error(offset[0], offset[1], text, check) + if self.logical_line: + self.previous_indent_level = self.indent_level + self.previous_logical = self.logical_line + self.blank_lines = 0 + self.tokens = [] + + def check_ast(self): + """Build the file's AST and run all AST checks.""" + try: + tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) + except (SyntaxError, TypeError): + return self.report_invalid_syntax() + for name, cls, __ in self._ast_checks: + checker = cls(tree, self.filename) + for lineno, offset, text, check in checker.run(): + if not self.lines or not noqa(self.lines[lineno - 1]): + self.report_error(lineno, offset, text, check) def generate_tokens(self): + """Tokenize the file, run physical line checks and yield tokens.""" if self._io_error: self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) - tokengen = tokenize.generate_tokens(self.readline_check_physical) + tokengen = tokenize.generate_tokens(self.readline) try: for token in tokengen: + if token[2][0] > self.total_lines: + return + self.maybe_check_physical(token) yield token except (SyntaxError, tokenize.TokenError): - exc_type, exc = sys.exc_info()[:2] - offset = exc.args[1] - if len(offset) > 2: - offset = offset[1:3] - self.report_error(offset[0], offset[1], - 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), - self.generate_tokens) - generate_tokens.__doc__ = " Check if the syntax is valid." + self.report_invalid_syntax() + + def maybe_check_physical(self, token): + """If appropriate (based on token), check current physical line(s).""" + # Called after every token, but act only on end of line. + if _is_eol_token(token): + # Obviously, a newline token ends a single physical line. + self.check_physical(token[4]) + elif token[0] == tokenize.STRING and '\n' in token[1]: + # Less obviously, a string that contains newlines is a + # multiline string, either triple-quoted or with internal + # newlines backslash-escaped. Check every physical line in the + # string *except* for the last one: its newline is outside of + # the multiline string, so we consider it a regular physical + # line, and will check it like any other physical line. + # + # Subtleties: + # - we don't *completely* ignore the last line; if it contains + # the magical "# noqa" comment, we disable all physical + # checks for the entire multiline string + # - have to wind self.line_number back because initially it + # points to the last line of the string, and we want + # check_physical() to give accurate feedback + if noqa(token[4]): + return + self.multiline = True + self.line_number = token[2][0] + for line in token[1].split('\n')[:-1]: + self.check_physical(line + '\n') + self.line_number += 1 + self.multiline = False def check_all(self, expected=None, line_offset=0): - """ - Run all checks on the input file. - """ + """Run all checks on the input file.""" self.report.init_file(self.filename, self.lines, expected, line_offset) + self.total_lines = len(self.lines) + if self._ast_checks: + self.check_ast() self.line_number = 0 self.indent_char = None - self.indent_level = 0 + self.indent_level = self.previous_indent_level = 0 self.previous_logical = '' self.tokens = [] - self.blank_lines = blank_lines_before_comment = 0 + self.blank_lines = self.blank_before = 0 parens = 0 for token in self.generate_tokens(): self.tokens.append(token) @@ -1345,29 +1433,33 @@ elif text in '}])': parens -= 1 elif not parens: - if token_type == tokenize.NEWLINE: - if self.blank_lines < blank_lines_before_comment: - self.blank_lines = blank_lines_before_comment - self.check_logical() - self.tokens = [] - self.blank_lines = blank_lines_before_comment = 0 - elif token_type == tokenize.NL: - if len(self.tokens) == 1: + if token_type in NEWLINE: + if token_type == tokenize.NEWLINE: + self.check_logical() + self.blank_before = 0 + elif len(self.tokens) == 1: # The physical line contains only this token. self.blank_lines += 1 - self.tokens = [] - elif token_type == tokenize.COMMENT and len(self.tokens) == 1: - if blank_lines_before_comment < self.blank_lines: - blank_lines_before_comment = self.blank_lines - self.blank_lines = 0 - if COMMENT_WITH_NL: + del self.tokens[0] + else: + self.check_logical() + elif COMMENT_WITH_NL and token_type == tokenize.COMMENT: + if len(self.tokens) == 1: # The comment also ends a physical line - self.tokens = [] + token = list(token) + token[1] = text.rstrip('\r\n') + token[3] = (token[2][0], token[2][1] + len(token[1])) + self.tokens = [tuple(token)] + self.check_logical() + if self.tokens: + self.check_physical(self.lines[-1]) + self.check_logical() return self.report.get_file_results() class BaseReport(object): """Collect the results of the checks.""" + print_filename = False def __init__(self, options): @@ -1430,8 +1522,7 @@ for key in self.messages if key.startswith(prefix)]) def get_statistics(self, prefix=''): - """ - Get statistics for message codes that start with the prefix. + """Get statistics for message codes that start with the prefix. prefix='' matches all errors and warnings prefix='E' matches all errors @@ -1457,6 +1548,7 @@ class FileReport(BaseReport): + """Collect the results of the checks and print only the filenames.""" print_filename = True @@ -1471,17 +1563,29 @@ self._show_source = options.show_source self._show_pep8 = options.show_pep8 + def init_file(self, filename, lines, expected, line_offset): + """Signal a new file.""" + self._deferred_print = [] + return super(StandardReport, self).init_file( + filename, lines, expected, line_offset) + def error(self, line_number, offset, text, check): - """ - Report an error, according to options. - """ + """Report an error, according to options.""" code = super(StandardReport, self).error(line_number, offset, text, check) if code and (self.counters[code] == 1 or self._repeat): + self._deferred_print.append( + (line_number, offset, code, text[5:], check.__doc__)) + return code + + def get_file_results(self): + """Print the result and return the overall count for this file.""" + self._deferred_print.sort() + for line_number, offset, code, text, doc in self._deferred_print: print(self._fmt % { 'path': self.filename, 'row': self.line_offset + line_number, 'col': offset + 1, - 'code': code, 'text': text[5:], + 'code': code, 'text': text, }) if self._show_source: if line_number > len(self.lines): @@ -1489,10 +1593,10 @@ else: line = self.lines[line_number - 1] print(line.rstrip()) - print(' ' * offset + '^') - if self._show_pep8: - print(check.__doc__.lstrip('\n').rstrip()) - return code + print(re.sub(r'\S', ' ', line[:offset]) + '^') + if self._show_pep8 and doc: + print(' ' + doc.strip()) + return self.file_errors class DiffReport(StandardReport): @@ -1508,57 +1612,21 @@ return super(DiffReport, self).error(line_number, offset, text, check) -class TestReport(StandardReport): - """Collect the results for the tests.""" - - def __init__(self, options): - options.benchmark_keys += ['test cases', 'failed tests'] - super(TestReport, self).__init__(options) - self._verbose = options.verbose - - def get_file_results(self): - # Check if the expected errors were found - label = '%s:%s:1' % (self.filename, self.line_offset) - codes = sorted(self.expected) - for code in codes: - if not self.counters.get(code): - self.file_errors += 1 - self.total_errors += 1 - print('%s: error %s not found' % (label, code)) - if self._verbose and not self.file_errors: - print('%s: passed (%s)' % - (label, ' '.join(codes) or 'Okay')) - self.counters['test cases'] += 1 - if self.file_errors: - self.counters['failed tests'] += 1 - # Reset counters - for key in set(self.counters) - set(self._benchmark_keys): - del self.counters[key] - self.messages = {} - return self.file_errors - - def print_results(self): - results = ("%(physical lines)d lines tested: %(files)d files, " - "%(test cases)d test cases%%s." % self.counters) - if self.total_errors: - print(results % ", %s failures" % self.total_errors) - else: - print(results % "") - print("Test failed." if self.total_errors else "Test passed.") - - class StyleGuide(object): """Initialize a PEP-8 instance with few options.""" def __init__(self, *args, **kwargs): # build options from the command line + self.checker_class = kwargs.pop('checker_class', Checker) parse_argv = kwargs.pop('parse_argv', False) config_file = kwargs.pop('config_file', None) - options, self.paths = process_options(parse_argv=parse_argv, - config_file=config_file) - if args or kwargs: - # build options from dict - options_dict = dict(*args, **kwargs) + parser = kwargs.pop('parser', None) + # build options from dict + options_dict = dict(*args, **kwargs) + arglist = None if parse_argv else options_dict.get('paths', None) + options, self.paths = process_options( + arglist, parse_argv, config_file, parser) + if options_dict: options.__dict__.update(options_dict) if 'paths' in options_dict: self.paths = options_dict['paths'] @@ -1569,15 +1637,19 @@ if not options.reporter: options.reporter = BaseReport if options.quiet else StandardReport - for index, value in enumerate(options.exclude): - options.exclude[index] = value.rstrip('/') - # Ignore all checks which are not explicitly selected options.select = tuple(options.select or ()) - options.ignore = tuple(options.ignore or options.select and ('',)) + if not (options.select or options.ignore or + options.testsuite or options.doctest) and DEFAULT_IGNORE: + # The default choice: ignore controversial checks + options.ignore = tuple(DEFAULT_IGNORE.split(',')) + else: + # Ignore all checks which are not explicitly selected + options.ignore = ('',) if options.select else tuple(options.ignore) options.benchmark_keys = BENCHMARK_KEYS[:] options.ignore_code = self.ignore_code options.physical_checks = self.get_checks('physical_line') options.logical_checks = self.get_checks('logical_line') + options.ast_checks = self.get_checks('tree') self.init_report() def init_report(self, reporter=None): @@ -1592,11 +1664,14 @@ report = self.options.report runner = self.runner report.start() - for path in paths: - if os.path.isdir(path): - self.input_dir(path) - elif not self.excluded(path): - runner(path) + try: + for path in paths: + if os.path.isdir(path): + self.input_dir(path) + elif not self.excluded(path): + runner(path) + except KeyboardInterrupt: + print('... stopped') report.stop() return report @@ -1604,7 +1679,8 @@ """Run all checks on a Python source file.""" if self.options.verbose: print('checking %s' % filename) - fchecker = Checker(filename, lines=lines, options=self.options) + fchecker = self.checker_class( + filename, lines=lines, options=self.options) return fchecker.check_all(expected=expected, line_offset=line_offset) def input_dir(self, dirname): @@ -1621,207 +1697,63 @@ print('directory ' + root) counters['directories'] += 1 for subdir in sorted(dirs): - if self.excluded(os.path.join(root, subdir)): + if self.excluded(subdir, root): dirs.remove(subdir) for filename in sorted(files): # contain a pattern that matches? if ((filename_match(filename, filepatterns) and - not self.excluded(filename))): + not self.excluded(filename, root))): runner(os.path.join(root, filename)) - def excluded(self, filename): - """ - Check if options.exclude contains a pattern that matches filename. + def excluded(self, filename, parent=None): + """Check if the file should be excluded. + + Check if 'options.exclude' contains a pattern that matches filename. """ + if not self.options.exclude: + return False basename = os.path.basename(filename) - return any((filename_match(filename, self.options.exclude, - default=False), - filename_match(basename, self.options.exclude, - default=False))) + if filename_match(basename, self.options.exclude): + return True + if parent: + filename = os.path.join(parent, filename) + filename = os.path.abspath(filename) + return filename_match(filename, self.options.exclude) def ignore_code(self, code): - """ - Check if the error code should be ignored. + """Check if the error code should be ignored. If 'options.select' contains a prefix of the error code, return False. Else, if 'options.ignore' contains a prefix of the error code, return True. """ + if len(code) < 4 and any(s.startswith(code) + for s in self.options.select): + return False return (code.startswith(self.options.ignore) and not code.startswith(self.options.select)) def get_checks(self, argument_name): - """ + """Get all the checks for this category. + Find all globally visible functions where the first argument name starts with argument_name and which contain selected tests. """ checks = [] - for name, codes, function, args in find_checks(argument_name): + for check, attrs in _checks[argument_name].items(): + (codes, args) = attrs if any(not (code and self.ignore_code(code)) for code in codes): - checks.append((name, function, args)) + checks.append((check.__name__, check, args)) return sorted(checks) -def init_tests(pep8style): - """ - Initialize testing framework. - - A test file can provide many tests. Each test starts with a - declaration. This declaration is a single line starting with '#:'. - It declares codes of expected failures, separated by spaces or 'Okay' - if no failure is expected. - If the file does not contain such declaration, it should pass all - tests. If the declaration is empty, following lines are not checked, - until next declaration. - - Examples: - - * Only E224 and W701 are expected: #: E224 W701 - * Following example is conform: #: Okay - * Don't check these lines: #: - """ - report = pep8style.init_report(TestReport) - runner = pep8style.input_file - - def run_tests(filename): - """Run all the tests from a file.""" - lines = readlines(filename) + ['#:\n'] - line_offset = 0 - codes = ['Okay'] - testcase = [] - count_files = report.counters['files'] - for index, line in enumerate(lines): - if not line.startswith('#:'): - if codes: - # Collect the lines of the test case - testcase.append(line) - continue - if codes and index: - codes = [c for c in codes if c != 'Okay'] - # Run the checker - runner(filename, testcase, expected=codes, - line_offset=line_offset) - # output the real line numbers - line_offset = index + 1 - # configure the expected errors - codes = line.split()[1:] - # empty the test case buffer - del testcase[:] - report.counters['files'] = count_files + 1 - return report.counters['failed tests'] - - pep8style.runner = run_tests - - -def selftest(options): - """ - Test all check functions with test cases in docstrings. - """ - count_failed = count_all = 0 - report = BaseReport(options) - counters = report.counters - checks = options.physical_checks + options.logical_checks - for name, check, argument_names in checks: - for line in check.__doc__.splitlines(): - line = line.lstrip() - match = SELFTEST_REGEX.match(line) - if match is None: - continue - code, source = match.groups() - checker = Checker(None, options=options, report=report) - for part in source.split(r'\n'): - part = part.replace(r'\t', '\t') - part = part.replace(r'\s', ' ') - checker.lines.append(part + '\n') - checker.check_all() - error = None - if code == 'Okay': - if len(counters) > len(options.benchmark_keys): - codes = [key for key in counters - if key not in options.benchmark_keys] - error = "incorrectly found %s" % ', '.join(codes) - elif not counters.get(code): - error = "failed to find %s" % code - # Keep showing errors for multiple tests - for key in set(counters) - set(options.benchmark_keys): - del counters[key] - report.messages = {} - count_all += 1 - if not error: - if options.verbose: - print("%s: %s" % (code, source)) - else: - count_failed += 1 - print("%s: %s:" % (__file__, error)) - for line in checker.lines: - print(line.rstrip()) - return count_failed, count_all - - -def read_config(options, args, arglist, parser): - """Read both user configuration and local configuration.""" - config = RawConfigParser() - - user_conf = options.config - if user_conf and os.path.isfile(user_conf): - if options.verbose: - print('user configuration: %s' % user_conf) - config.read(user_conf) - - parent = tail = args and os.path.abspath(os.path.commonprefix(args)) - while tail: - local_conf = os.path.join(parent, '.pep8') - if os.path.isfile(local_conf): - if options.verbose: - print('local configuration: %s' % local_conf) - config.read(local_conf) - break - parent, tail = os.path.split(parent) - - if config.has_section('pep8'): - option_list = dict([(o.dest, o.type or o.action) - for o in parser.option_list]) - - # First, read the default values - new_options, _ = parser.parse_args([]) - - # Second, parse the configuration - for opt in config.options('pep8'): - if options.verbose > 1: - print(' %s = %s' % (opt, config.get('pep8', opt))) - if opt.replace('_', '-') not in parser.config_options: - print('Unknown option: \'%s\'\n not in [%s]' % - (opt, ' '.join(parser.config_options))) - sys.exit(1) - normalized_opt = opt.replace('-', '_') - opt_type = option_list[normalized_opt] - if opt_type in ('int', 'count'): - value = config.getint('pep8', opt) - elif opt_type == 'string': - value = config.get('pep8', opt) - else: - assert opt_type in ('store_true', 'store_false') - value = config.getboolean('pep8', opt) - setattr(new_options, normalized_opt, value) - - # Third, overwrite with the command-line options - options, _ = parser.parse_args(arglist, values=new_options) - - return options - - -def process_options(arglist=None, parse_argv=False, config_file=None): - """Process options passed either via arglist or via command line args.""" - if not arglist and not parse_argv: - # Don't read the command line if the module is used as a library. - arglist = [] - if config_file is True: - config_file = DEFAULT_CONFIG - parser = OptionParser(version=__version__, +def get_parser(prog='pep8', version=__version__): + parser = OptionParser(prog=prog, version=version, usage="%prog [options] input ...") parser.config_options = [ - 'exclude', 'filename', 'select', 'ignore', 'max-line-length', 'count', - 'format', 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose'] + 'exclude', 'filename', 'select', 'ignore', 'max-line-length', + 'hang-closing', 'count', 'format', 'quiet', 'show-pep8', + 'show-source', 'statistics', 'verbose'] parser.add_option('-v', '--verbose', default=0, action='count', help="print status messages, or debug with -vv") parser.add_option('-q', '--quiet', default=0, action='count', @@ -1856,51 +1788,118 @@ default=MAX_LINE_LENGTH, help="set maximum allowed line length " "(default: %default)") + parser.add_option('--hang-closing', action='store_true', + help="hang closing bracket instead of matching " + "indentation of opening bracket's line") parser.add_option('--format', metavar='format', default='default', help="set the error format [default|pylint|]") parser.add_option('--diff', action='store_true', help="report only lines changed according to the " "unified diff received on STDIN") group = parser.add_option_group("Testing Options") - group.add_option('--testsuite', metavar='dir', - help="run regression tests from dir") - group.add_option('--doctest', action='store_true', - help="run doctest on myself") + if os.path.exists(TESTSUITE_PATH): + group.add_option('--testsuite', metavar='dir', + help="run regression tests from dir") + group.add_option('--doctest', action='store_true', + help="run doctest on myself") group.add_option('--benchmark', action='store_true', help="measure processing speed") - group = parser.add_option_group("Configuration", description=( - "The project options are read from the [pep8] section of the .pep8 " - "file located in any parent folder of the path(s) being processed. " - "Allowed options are: %s." % ', '.join(parser.config_options))) - group.add_option('--config', metavar='path', default=config_file, - help="config file location (default: %default)") - - options, args = parser.parse_args(arglist) + return parser + + +def read_config(options, args, arglist, parser): + """Read both user configuration and local configuration.""" + config = RawConfigParser() + + user_conf = options.config + if user_conf and os.path.isfile(user_conf): + if options.verbose: + print('user configuration: %s' % user_conf) + config.read(user_conf) + + local_dir = os.curdir + parent = tail = args and os.path.abspath(os.path.commonprefix(args)) + while tail: + if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]): + local_dir = parent + if options.verbose: + print('local configuration: in %s' % parent) + break + (parent, tail) = os.path.split(parent) + + pep8_section = parser.prog + if config.has_section(pep8_section): + option_list = dict([(o.dest, o.type or o.action) + for o in parser.option_list]) + + # First, read the default values + (new_options, __) = parser.parse_args([]) + + # Second, parse the configuration + for opt in config.options(pep8_section): + if opt.replace('_', '-') not in parser.config_options: + print(" unknown option '%s' ignored" % opt) + continue + if options.verbose > 1: + print(" %s = %s" % (opt, config.get(pep8_section, opt))) + normalized_opt = opt.replace('-', '_') + opt_type = option_list[normalized_opt] + if opt_type in ('int', 'count'): + value = config.getint(pep8_section, opt) + elif opt_type == 'string': + value = config.get(pep8_section, opt) + if normalized_opt == 'exclude': + value = normalize_paths(value, local_dir) + else: + assert opt_type in ('store_true', 'store_false') + value = config.getboolean(pep8_section, opt) + setattr(new_options, normalized_opt, value) + + # Third, overwrite with the command-line options + (options, __) = parser.parse_args(arglist, values=new_options) + options.doctest = options.testsuite = False + return options + + +def process_options(arglist=None, parse_argv=False, config_file=None, + parser=None): + """Process options passed either via arglist or via command line args.""" + if not parser: + parser = get_parser() + if not parser.has_option('--config'): + if config_file is True: + config_file = DEFAULT_CONFIG + group = parser.add_option_group("Configuration", description=( + "The project options are read from the [%s] section of the " + "tox.ini file or the setup.cfg file located in any parent folder " + "of the path(s) being processed. Allowed options are: %s." % + (parser.prog, ', '.join(parser.config_options)))) + group.add_option('--config', metavar='path', default=config_file, + help="user config file location (default: %default)") + # Don't read the command line if the module is used as a library. + if not arglist and not parse_argv: + arglist = [] + # If parse_argv is True and arglist is None, arguments are + # parsed from the command line (sys.argv) + (options, args) = parser.parse_args(arglist) options.reporter = None - if options.testsuite: + if options.ensure_value('testsuite', False): args.append(options.testsuite) - elif not options.doctest: + elif not options.ensure_value('doctest', False): if parse_argv and not args: - if os.path.exists('.pep8') or options.diff: + if options.diff or any(os.path.exists(name) + for name in PROJECT_CONFIG): args = ['.'] else: parser.error('input not specified') options = read_config(options, args, arglist, parser) options.reporter = parse_argv and options.quiet == 1 and FileReport - if options.filename: - options.filename = options.filename.split(',') - options.exclude = options.exclude.split(',') - if options.select: - options.select = options.select.split(',') - if options.ignore: - options.ignore = options.ignore.split(',') - elif not (options.select or - options.testsuite or options.doctest) and DEFAULT_IGNORE: - # The default choice: ignore controversial checks - # (for doctest and testsuite, all checks are required) - options.ignore = DEFAULT_IGNORE.split(',') + options.filename = options.filename and options.filename.split(',') + options.exclude = normalize_paths(options.exclude) + options.select = options.select and options.select.split(',') + options.ignore = options.ignore and options.ignore.split(',') if options.diff: options.reporter = DiffReport @@ -1913,22 +1912,21 @@ def _main(): """Parse options and run checks on Python source.""" + import signal + + # Handle "Broken pipe" gracefully + try: + signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1)) + except AttributeError: + pass # not supported on Windows + pep8style = StyleGuide(parse_argv=True, config_file=True) options = pep8style.options - if options.doctest: - import doctest - fail_d, done_d = doctest.testmod(report=False, verbose=options.verbose) - fail_s, done_s = selftest(options) - count_failed = fail_s + fail_d - if not options.quiet: - count_passed = done_d + done_s - count_failed - print("%d passed and %d failed." % (count_passed, count_failed)) - print("Test failed." if count_failed else "Test passed.") - if count_failed: - sys.exit(1) - if options.testsuite: - init_tests(pep8style) - report = pep8style.check_files() + if options.doctest or options.testsuite: + from testsuite.support import run_tests + report = run_tests(pep8style) + else: + report = pep8style.check_files() if options.statistics: report.print_statistics() if options.benchmark: @@ -1940,6 +1938,5 @@ sys.stderr.write(str(report.total_errors) + '\n') sys.exit(1) - if __name__ == '__main__': - _main() + _main() \ No newline at end of file