Merged in tebeka/flake8 (pull request #18)

This commit is contained in:
Tarek Ziadé 2012-07-09 16:02:56 +02:00
commit abd1155e19
2 changed files with 466 additions and 138 deletions

View file

@ -1,3 +1,3 @@
# #
__version__ = '1.4' __version__ = '1.5'

View file

@ -1,5 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
# flake8: noqa
# pep8.py - Check Python source code formatting, according to PEP 8 # pep8.py - Check Python source code formatting, according to PEP 8
# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net> # Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
# #
@ -23,7 +22,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.
""" r"""
Check Python source code formatting, according to PEP 8: Check Python source code formatting, according to PEP 8:
http://www.python.org/dev/peps/pep-0008/ http://www.python.org/dev/peps/pep-0008/
@ -43,6 +42,7 @@ W warnings
500 line length 500 line length
600 deprecation 600 deprecation
700 statements 700 statements
900 syntax error
You can add checks to this program by writing plugins. Each plugin is You can add checks to this program by writing plugins. Each plugin is
a simple function that is called for each line of source code, either a simple function that is called for each line of source code, either
@ -92,10 +92,12 @@ before the docstring, you can use \n for newline, \t for tab and \s
for space. for space.
""" """
from flake8 import __version__ as flake8_version
from flake8.pyflakes import __version__ as pep8_version
__version__ = '0.6.1' from flake8.pyflakes import __version__ as pyflakes_version
from flake8 import __version__ as flake8_version
__version__ = '1.2'
import os import os
import sys import sys
@ -109,38 +111,44 @@ from fnmatch import fnmatch
try: try:
frozenset frozenset
except NameError: except NameError:
from sets import ImmutableSet as frozenset from sets import Set as set, ImmutableSet as frozenset
from flake8.util import skip_line from flake8.util import skip_line
DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
DEFAULT_IGNORE = 'E24' DEFAULT_IGNORE = 'E12,E24'
MAX_LINE_LENGTH = 79 MAX_LINE_LENGTH = 79
INDENT_REGEX = re.compile(r'([ \t]*)') INDENT_REGEX = re.compile(r'([ \t]*)')
RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
QUOTED_REGEX = re.compile(r"""([""'])(?:(?=(\\?))\2.)*?\1""") RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+')
SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
DOCSTRING_REGEX = re.compile(r'u?r?["\']') DOCSTRING_REGEX = re.compile(r'u?r?["\']')
WHITESPACE_AROUND_OPERATOR_REGEX = \ WHITESPACE_AROUND_OPERATOR_REGEX = \
re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)') re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)')
WHITESPACE_AROUND_KEYWORD_REGEX = \
re.compile('([^\s]*)\s*(\t| )\s*([^\s]*)')
EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \ WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \
re.compile(r'[()]|\s=[^=]|[^=!<>]=\s') re.compile(r'[()]|\s=[^=]|[^=!<>]=\s')
COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
COMPARE_TYPE_REGEX = re.compile(r'([=!]=|is|is\s+not)\s*type(?:s\.(\w+)Type'
r'|\(\s*(\(\s*\)|[^)]*[^ )])\s*\))')
LAMBDA_REGEX = re.compile(r'\blambda\b')
WHITESPACE = ' \t' WHITESPACE = frozenset(' \t')
BINARY_OPERATORS = frozenset([
BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>', '**=', '*=', '+=', '-=', '!=', '<>',
'%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=', '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=',
'%', '^', '&', '|', '=', '/', '//', '<', '>', '<<']) '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<'])
UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT, SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
tokenize.DEDENT, tokenize.NEWLINE]) tokenize.INDENT, tokenize.DEDENT])
E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) - SINGLETONS = frozenset(['False', 'None', 'True'])
frozenset(['False', 'None', 'True'])) KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines') BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines')
options = None options = None
@ -181,7 +189,7 @@ def tabs_obsolete(physical_line):
W191: if True:\n\treturn W191: if True:\n\treturn
""" """
indent = INDENT_REGEX.match(physical_line).group(1) indent = INDENT_REGEX.match(physical_line).group(1)
if indent.count('\t'): if '\t' in indent:
return indent.index('\t'), "W191 indentation contains tabs" return indent.index('\t'), "W191 indentation contains tabs"
@ -222,7 +230,7 @@ def trailing_blank_lines(physical_line, lines, line_number):
Okay: spam(1) Okay: spam(1)
W391: spam(1)\n W391: spam(1)\n
""" """
if physical_line.strip() == '' and line_number == len(lines): if not physical_line.rstrip() and line_number == len(lines):
return 0, "W391 blank line at end of file" return 0, "W391 blank line at end of file"
@ -247,16 +255,17 @@ def maximum_line_length(physical_line):
""" """
line = physical_line.rstrip() line = physical_line.rstrip()
length = len(line) length = len(line)
if length > MAX_LINE_LENGTH: if length > options.max_line_length:
try: try:
# The line could contain multi-byte characters # The line could contain multi-byte characters
if not hasattr(line, 'decode'): # Python 3 if not hasattr(line, 'decode'): # Python 3
line = line.encode('latin-1') line = line.encode('latin-1')
length = len(line.decode('utf-8')) length = len(line.decode('utf-8'))
except UnicodeDecodeError: except UnicodeError:
pass pass
if length > MAX_LINE_LENGTH: if length > options.max_line_length:
return MAX_LINE_LENGTH, "E501 line too long (%d characters)" % length return options.max_line_length, \
"E501 line too long (%d characters)" % length
############################################################################## ##############################################################################
@ -292,18 +301,18 @@ def blank_lines(logical_line, blank_lines, indent_level, line_number,
max_blank_lines = max(blank_lines, blank_lines_before_comment) max_blank_lines = max(blank_lines, blank_lines_before_comment)
if previous_logical.startswith('@'): if previous_logical.startswith('@'):
if max_blank_lines: if max_blank_lines:
return 0, "E304 blank lines found after function decorator" yield 0, "E304 blank lines found after function decorator"
elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
return 0, "E303 too many blank lines (%d)" % max_blank_lines yield 0, "E303 too many blank lines (%d)" % max_blank_lines
elif (logical_line.startswith('def ') or elif (logical_line.startswith('def ') or
logical_line.startswith('class ') or logical_line.startswith('class ') or
logical_line.startswith('@')): logical_line.startswith('@')):
if indent_level: if indent_level:
if not (max_blank_lines or previous_indent_level < indent_level or if not (max_blank_lines or previous_indent_level < indent_level or
DOCSTRING_REGEX.match(previous_logical)): DOCSTRING_REGEX.match(previous_logical)):
return 0, "E301 expected 1 blank line, found 0" yield 0, "E301 expected 1 blank line, found 0"
elif max_blank_lines != 2: elif max_blank_lines != 2:
return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines yield 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
def extraneous_whitespace(logical_line): def extraneous_whitespace(logical_line):
@ -332,12 +341,34 @@ def extraneous_whitespace(logical_line):
char = text.strip() char = text.strip()
found = match.start() found = match.start()
if text == char + ' ' and char in '([{': if text == char + ' ' and char in '([{':
return found + 1, "E201 whitespace after '%s'" % char yield found + 1, "E201 whitespace after '%s'" % char
if text == ' ' + char and line[found - 1] != ',': if text == ' ' + char and line[found - 1] != ',':
if char in '}])': if char in '}])':
return found, "E202 whitespace before '%s'" % char yield found, "E202 whitespace before '%s'" % char
if char in ',;:': if char in ',;:':
return found, "E203 whitespace before '%s'" % char yield found, "E203 whitespace before '%s'" % char
def whitespace_around_keywords(logical_line):
r"""
Avoid extraneous whitespace around keywords.
Okay: True and False
E271: True and False
E272: True and False
E273: True and\tFalse
E274: True\tand False
"""
for match in WHITESPACE_AROUND_KEYWORD_REGEX.finditer(logical_line):
before, whitespace, after = match.groups()
tab = whitespace == '\t'
offset = match.start(2)
if before in KEYWORDS:
yield offset, (tab and "E273 tab after keyword" or
"E271 multiple spaces after keyword")
elif after in KEYWORDS:
yield offset, (tab and "E274 tab before keyword" or
"E272 multiple spaces before keyword")
def missing_whitespace(logical_line): def missing_whitespace(logical_line):
@ -362,7 +393,7 @@ def missing_whitespace(logical_line):
continue # Slice syntax, no space required continue # Slice syntax, no space required
if char == ',' and line[index + 1] == ')': if char == ',' and line[index + 1] == ')':
continue # Allow tuple with only one element: (3,) continue # Allow tuple with only one element: (3,)
return index, "E231 missing whitespace after '%s'" % char yield index, "E231 missing whitespace after '%s'" % char
def indentation(logical_line, previous_logical, indent_char, def indentation(logical_line, previous_logical, indent_char,
@ -384,12 +415,191 @@ def indentation(logical_line, previous_logical, indent_char,
E113: a = 1\n b = 2 E113: a = 1\n b = 2
""" """
if indent_char == ' ' and indent_level % 4: if indent_char == ' ' and indent_level % 4:
return 0, "E111 indentation is not a multiple of four" yield 0, "E111 indentation is not a multiple of four"
indent_expect = previous_logical.endswith(':') indent_expect = previous_logical.endswith(':')
if indent_expect and indent_level <= previous_indent_level: if indent_expect and indent_level <= previous_indent_level:
return 0, "E112 expected an indented block" yield 0, "E112 expected an indented block"
if indent_level > previous_indent_level and not indent_expect: if indent_level > previous_indent_level and not indent_expect:
return 0, "E113 unexpected indentation" yield 0, "E113 unexpected indentation"
def continuation_line_indentation(logical_line, tokens, indent_level):
r"""
Continuation lines should align wrapped elements either vertically using
Python's implicit line joining inside parentheses, brackets and braces, or
using a hanging indent.
When using a hanging indent the following considerations should be applied:
- there should be no arguments on the first line, and
- further indentation should be used to clearly distinguish itself as a
continuation line.
Okay: a = (\n)
E123: a = (\n )
Okay: a = (\n 42)
E121: a = (\n 42)
E122: a = (\n42)
E123: a = (42\n )
E124: a = (24,\n 42\n)
E125: if (a or\n b):\n pass
E126: a = (\n 42)
E127: a = (24,\n 42)
E128: a = (24,\n 42)
"""
first_row = tokens[0][2][0]
nrows = 1 + tokens[-1][2][0] - first_row
if nrows == 1:
return
# indent_next tells us whether the next block is indented; assuming
# that it is indented by 4 spaces, then we should not allow 4-space
# indents on the final continuation line; in turn, some other
# indents are allowed to have an extra 4 spaces.
indent_next = logical_line.endswith(':')
indent_string = None
row = depth = 0
# remember how many brackets were opened on each line
parens = [0] * nrows
# relative indents of physical lines
rel_indent = [0] * nrows
# visual indent columns by indent depth
indent = [indent_level]
if options.verbose >= 3:
print(">>> " + tokens[0][4].rstrip())
for token_type, text, start, end, line in tokens:
newline = row < start[0] - first_row
if newline:
row = start[0] - first_row
newline = (not last_token_multiline and
token_type not in (tokenize.NL, tokenize.NEWLINE))
if newline:
# this is the beginning of a continuation line.
last_indent = start
if options.verbose >= 3:
print("... " + line.rstrip())
# record the initial indent.
rel_indent[row] = start[1] - indent_level
if depth:
# a bracket expression in a continuation line.
# find the line that it was opened on
for open_row in range(row - 1, -1, -1):
if parens[open_row]:
break
else:
# an unbracketed continuation line (ie, backslash)
open_row = 0
hang = rel_indent[row] - rel_indent[open_row]
# we have candidates for visual indent
d = depth
while d and hasattr(indent[d], 'add'):
if start[1] in indent[d]:
is_visual = True
break
d -= 1
else:
is_visual = (d and start[1] == indent[d])
is_not_hanging = not (hang == 4 or
(indent_next and rel_indent[row] == 8))
if token_type != tokenize.OP and start[1] == indent_string:
# Indented string with implicit concatenation
pass
elif token_type == tokenize.OP and text in ']})':
# This line starts with a closing bracket
if hang == 0:
if hasattr(indent[depth], 'add'):
indent[depth] = min(indent[depth] or [0])
if start[1] < indent[depth] - 4:
yield (start, 'E124 closing bracket '
'missing visual indentation')
elif hang == 4 or not is_visual:
yield (start, 'E123 closing bracket does not match '
'indentation of opening bracket\'s line')
elif is_visual:
# Visual indent is verified
for d1 in range(d, depth + 1):
indent[d1] = start[1]
indent[depth] = start[1]
elif indent[depth]:
# Visual indent is broken
if hasattr(indent[depth], 'add'):
indent[depth] = min(indent[depth])
if start[1] < indent[depth]:
yield (start, 'E128 continuation line '
'under-indented for visual indent')
elif is_not_hanging:
yield (start, 'E127 continuation line over-'
'indented for visual indent')
else:
# hanging indent.
if hasattr(indent[depth], 'add'):
indent[depth] = None
if hang <= 0:
yield (start, 'E122 continuation line '
'missing indentation or outdented')
elif hang % 4:
yield (start, 'E121 continuation line indentation '
'is not a multiple of four')
elif is_not_hanging:
yield (start, 'E126 continuation line over-indented '
'for hanging indent')
# parent indents should not be more than this one
indent[depth] = start[1]
d = depth - 1
while hasattr(indent[d], 'add'):
indent[d] = set([i for i in indent[d] if i <= start[1]])
d -= 1
# look for visual indenting
if ((parens[row] and token_type != tokenize.NL and
hasattr(indent[depth], 'add'))):
# text after an open parens starts visual indenting
indent[depth].add(start[1])
if options.verbose >= 4:
print("bracket depth %s indent to %s" % (depth, start[1]))
# deal with implicit string concatenation
if indent_string:
if token_type == tokenize.OP and text != '%':
indent_string = None
elif token_type == tokenize.STRING:
indent_string = start[1]
# keep track of bracket depth
if token_type == tokenize.OP:
if text in '([{':
indent.append(set())
depth += 1
parens[row] += 1
if options.verbose >= 4:
print("bracket depth %s seen, col %s, visual min = %s" %
(depth, start[1], indent[depth]))
elif text in ')]}':
indent.pop()
depth -= 1
for idx in range(row, -1, -1):
if parens[idx]:
parens[idx] -= 1
break
assert len(indent) == depth + 1
last_token_multiline = (start[0] != end[0])
if indent_next and rel_indent[-1] == 4:
yield (last_indent, "E125 continuation line does not distinguish "
"itself from next logical line")
def whitespace_before_parameters(logical_line, tokens): def whitespace_before_parameters(logical_line, tokens):
@ -421,15 +631,15 @@ def whitespace_before_parameters(logical_line, tokens):
# Syntax "class A (B):" is allowed, but avoid it # Syntax "class A (B):" is allowed, but avoid it
(index < 2 or tokens[index - 2][1] != 'class') and (index < 2 or tokens[index - 2][1] != 'class') and
# Allow "return (a.foo for a in range(5))" # Allow "return (a.foo for a in range(5))"
(not keyword.iskeyword(prev_text))): not keyword.iskeyword(prev_text)):
return prev_end, "E211 whitespace before '%s'" % text yield prev_end, "E211 whitespace before '%s'" % text
prev_type = token_type prev_type = token_type
prev_text = text prev_text = text
prev_end = end prev_end = end
def whitespace_around_operator(logical_line): def whitespace_around_operator(logical_line):
""" r"""
Avoid extraneous whitespace in the following situations: Avoid extraneous whitespace in the following situations:
- More than one space around an assignment (or other) operator to - More than one space around an assignment (or other) operator to
@ -446,10 +656,10 @@ def whitespace_around_operator(logical_line):
tab = whitespace == '\t' tab = whitespace == '\t'
offset = match.start(2) offset = match.start(2)
if before in OPERATORS: if before in OPERATORS:
return offset, (tab and "E224 tab after operator" or yield offset, (tab and "E224 tab after operator" or
"E222 multiple spaces after operator") "E222 multiple spaces after operator")
elif after in OPERATORS: elif after in OPERATORS:
return offset, (tab and "E223 tab before operator" or yield offset, (tab and "E223 tab before operator" or
"E221 multiple spaces before operator") "E221 multiple spaces before operator")
@ -498,11 +708,13 @@ def missing_whitespace_around_operator(logical_line, tokens):
if need_space: if need_space:
if start != prev_end: if start != prev_end:
need_space = False need_space = False
elif text == '>' and prev_text == '<': elif text == '>' and prev_text in ('<', '-'):
# Tolerate the "<>" operator, even if running Python 3 # Tolerate the "<>" operator, even if running Python 3
# Deal with Python 3's annotated return value "->"
pass pass
else: else:
return prev_end, "E225 missing whitespace around operator" yield prev_end, "E225 missing whitespace around operator"
need_space = False
elif token_type == tokenize.OP and prev_end is not None: elif token_type == tokenize.OP and prev_end is not None:
if text == '=' and parens: if text == '=' and parens:
# Allow keyword args or defaults: foo(bar=None). # Allow keyword args or defaults: foo(bar=None).
@ -516,19 +728,20 @@ def missing_whitespace_around_operator(logical_line, tokens):
if prev_text in '}])': if prev_text in '}])':
need_space = True need_space = True
elif prev_type == tokenize.NAME: elif prev_type == tokenize.NAME:
if prev_text not in E225NOT_KEYWORDS: if prev_text not in KEYWORDS:
need_space = True need_space = True
else: else:
need_space = True need_space = True
if need_space and start == prev_end: if need_space and start == prev_end:
return prev_end, "E225 missing whitespace around operator" yield prev_end, "E225 missing whitespace around operator"
need_space = False
prev_type = token_type prev_type = token_type
prev_text = text prev_text = text
prev_end = end prev_end = end
def whitespace_around_comma(logical_line): def whitespace_around_comma(logical_line):
""" r"""
Avoid extraneous whitespace in the following situations: Avoid extraneous whitespace in the following situations:
- More than one space around an assignment (or other) operator to - More than one space around an assignment (or other) operator to
@ -545,10 +758,10 @@ def whitespace_around_comma(logical_line):
for separator in ',;:': for separator in ',;:':
found = line.find(separator + ' ') found = line.find(separator + ' ')
if found > -1: if found > -1:
return found + 1, "E241 multiple spaces after '%s'" % separator yield found + 1, "E241 multiple spaces after '%s'" % separator
found = line.find(separator + '\t') found = line.find(separator + '\t')
if found > -1: if found > -1:
return found + 1, "E242 tab after '%s'" % separator yield found + 1, "E242 tab after '%s'" % separator
def whitespace_around_named_parameter_equals(logical_line): def whitespace_around_named_parameter_equals(logical_line):
@ -572,7 +785,7 @@ def whitespace_around_named_parameter_equals(logical_line):
text = match.group() text = match.group()
if parens and len(text) == 3: if parens and len(text) == 3:
issue = "E251 no spaces around keyword / parameter equals" issue = "E251 no spaces around keyword / parameter equals"
return match.start(), issue yield match.start(), issue
if text == '(': if text == '(':
parens += 1 parens += 1
elif text == ')': elif text == ')':
@ -601,11 +814,10 @@ def whitespace_before_inline_comment(logical_line, tokens):
if not line[:start[1]].strip(): if not line[:start[1]].strip():
continue continue
if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
return (prev_end, yield (prev_end,
"E261 at least two spaces before inline comment") "E261 at least two spaces before inline comment")
if (len(text) > 1 and text.startswith('# ') if text.startswith('# ') or not text.startswith('# '):
or not text.startswith('# ')): yield start, "E262 inline comment should start with '# '"
return start, "E262 inline comment should start with '# '"
else: else:
prev_end = end prev_end = end
@ -626,8 +838,8 @@ def imports_on_separate_lines(logical_line):
line = logical_line line = logical_line
if line.startswith('import '): if line.startswith('import '):
found = line.find(',') found = line.find(',')
if found > -1: if -1 < found:
return found, "E401 multiple imports on one line" yield found, "E401 multiple imports on one line"
def compound_statements(logical_line): def compound_statements(logical_line):
@ -661,11 +873,98 @@ def compound_statements(logical_line):
before = line[:found] before = line[:found]
if (before.count('{') <= before.count('}') and # {'a': 1} (dict) if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
before.count('[') <= before.count(']') and # [1:2] (slice) before.count('[') <= before.count(']') and # [1:2] (slice)
not re.search(r'\blambda\b', before)): # lambda x: x before.count('(') <= before.count(')') and # (Python 3 annotation)
return found, "E701 multiple statements on one line (colon)" not LAMBDA_REGEX.search(before)): # lambda x: x
yield found, "E701 multiple statements on one line (colon)"
found = line.find(';') found = line.find(';')
if -1 < found: if -1 < found:
return found, "E702 multiple statements on one line (semicolon)" yield found, "E702 multiple statements on one line (semicolon)"
def explicit_line_join(logical_line, tokens):
r"""
Avoid explicit line join between brackets.
The preferred way of wrapping long lines is by using Python's implied line
continuation inside parentheses, brackets and braces. Long lines can be
broken over multiple lines by wrapping expressions in parentheses. These
should be used in preference to using a backslash for line continuation.
E502: aaa = [123, \\n 123]
E502: aaa = ("bbb " \\n "ccc")
Okay: aaa = [123,\n 123]
Okay: aaa = ("bbb "\n "ccc")
Okay: aaa = "bbb " \\n "ccc"
"""
prev_start = prev_end = parens = 0
for token_type, text, start, end, line in tokens:
if start[0] != prev_start and parens and backslash:
yield backslash, "E502 the backslash is redundant between brackets"
if end[0] != prev_end:
if line.rstrip('\r\n').endswith('\\'):
backslash = (end[0], len(line.splitlines()[-1]) - 1)
else:
backslash = None
if token_type == tokenize.OP:
if text in '([{':
parens += 1
elif text in ')]}':
parens -= 1
prev_start, prev_end = start[0], end[0]
def comparison_to_singleton(logical_line):
"""
Comparisons to singletons like None should always be done
with "is" or "is not", never the equality operators.
E711: if arg != None:
Okay: if arg is not None:
Also, beware of writing if x when you really mean if x is not None --
e.g. when testing whether a variable or argument that defaults to None was
set to some other value. The other value might have a type (such as a
container) that could be false in a boolean context!
"""
match = COMPARE_SINGLETON_REGEX.search(logical_line)
if match:
same = (match.group(1) == '==')
singleton = match.group(2)
if singleton in ('None',):
code = 'E711'
msg = "'if cond %s %s:'" % (same and 'is' or 'is not', singleton)
else:
code = 'E712'
nonzero = ((same and singleton == 'True' and same) or
(singleton == 'False' and not same))
msg = ("'if cond is %s:' or 'if%scond:'" %
(nonzero and 'True' or 'False', nonzero and ' ' or ' not '))
yield match.start(1), ("%s comparison to %s should be %s" %
(code, singleton, msg))
def comparison_type(logical_line):
"""
Object type comparisons should always use isinstance() instead of
comparing types directly.
Okay: if isinstance(obj, int):
E721: if type(obj) is type(1):
When checking if an object is a string, keep in mind that it might be a
unicode string too! In Python 2.3, str and unicode have a common base
class, basestring, so you can do:
Okay: if isinstance(obj, basestring):
Okay: if type(a1) is type(b1):
"""
match = COMPARE_TYPE_REGEX.search(logical_line)
if match:
inst = match.group(3)
if inst and isidentifier(inst) and inst not in SINGLETONS:
return # Allow comparison for types which are not obvious
yield match.start(1), "E721 do not compare types, use 'isinstance()'"
def python_3000_has_key(logical_line): def python_3000_has_key(logical_line):
@ -678,7 +977,7 @@ def python_3000_has_key(logical_line):
""" """
pos = logical_line.find('.has_key(') pos = logical_line.find('.has_key(')
if pos > -1: if pos > -1:
return pos, "W601 .has_key() is deprecated, use 'in'" yield pos, "W601 .has_key() is deprecated, use 'in'"
def python_3000_raise_comma(logical_line): def python_3000_raise_comma(logical_line):
@ -692,11 +991,8 @@ def python_3000_raise_comma(logical_line):
form will be removed in Python 3000. form will be removed in Python 3000.
""" """
match = RAISE_COMMA_REGEX.match(logical_line) match = RAISE_COMMA_REGEX.match(logical_line)
if match: if match and not RERAISE_COMMA_REGEX.match(logical_line):
rest = QUOTED_REGEX.sub("", logical_line) yield match.start(1), "W602 deprecated form of raising exception"
# but allow three argument form of raise
if rest.count(",") == 1:
return match.start(1), "W602 deprecated form of raising exception"
def python_3000_not_equal(logical_line): def python_3000_not_equal(logical_line):
@ -707,7 +1003,7 @@ def python_3000_not_equal(logical_line):
""" """
pos = logical_line.find('<>') pos = logical_line.find('<>')
if pos > -1: if pos > -1:
return pos, "W603 '<>' is deprecated, use '!='" yield pos, "W603 '<>' is deprecated, use '!='"
def python_3000_backticks(logical_line): def python_3000_backticks(logical_line):
@ -717,7 +1013,7 @@ def python_3000_backticks(logical_line):
""" """
pos = logical_line.find('`') pos = logical_line.find('`')
if pos > -1: if pos > -1:
return pos, "W604 backticks are deprecated, use 'repr()'" yield pos, "W604 backticks are deprecated, use 'repr()'"
############################################################################## ##############################################################################
@ -727,32 +1023,36 @@ def python_3000_backticks(logical_line):
if '' == ''.encode(): if '' == ''.encode():
# Python 2: implicit encoding. # Python 2: implicit encoding.
def readlines(filename): def readlines(filename):
return open(filename).readlines() return open(filename).readlines()
def isidentifier(s):
return re.match('[a-zA-Z_]\w*', s)
else: else:
# Python 3: decode to latin-1. # Python 3: decode to latin-1.
# This function is lazy, it does not read the encoding declaration. # This function is lazy, it does not read the encoding declaration.
# XXX: use tokenize.detect_encoding() # XXX: use tokenize.detect_encoding()
def readlines(filename):
def readlines(filename): # NOQA
return open(filename, encoding='latin-1').readlines() return open(filename, encoding='latin-1').readlines()
def isidentifier(s):
return s.isidentifier()
def expand_indent(line): def expand_indent(line):
""" r"""
Return the amount of indentation. Return the amount of indentation.
Tabs are expanded to the next multiple of 8. Tabs are expanded to the next multiple of 8.
>>> expand_indent(' ') >>> expand_indent(' ')
4 4
>>> expand_indent('\\t') >>> expand_indent('\t')
8 8
>>> expand_indent(' \\t') >>> expand_indent(' \t')
8 8
>>> expand_indent(' \\t') >>> expand_indent(' \t')
8 8
>>> expand_indent(' \\t') >>> expand_indent(' \t')
16 16
""" """
result = 0 result = 0
@ -791,13 +1091,6 @@ def mute_string(text):
return text[:start] + 'x' * (end - start) + text[end:] return text[:start] + 'x' * (end - start) + text[end:]
def message(text):
"""Print a message."""
# print >> sys.stderr, options.prog + ': ' + text
# print >> sys.stderr, text
print(text)
############################################################################## ##############################################################################
# Framework to run all checks # Framework to run all checks
############################################################################## ##############################################################################
@ -872,7 +1165,7 @@ class Checker(object):
Run all physical checks on a raw input line. Run all physical checks on a raw input line.
""" """
self.physical_line = line self.physical_line = line
if self.indent_char is None and len(line) and line[0] in ' \t': if self.indent_char is None and line[:1] in WHITESPACE:
self.indent_char = line[0] self.indent_char = line[0]
for name, check, argument_names in options.physical_checks: for name, check, argument_names in options.physical_checks:
result = self.run_check(check, argument_names) result = self.run_check(check, argument_names)
@ -895,16 +1188,16 @@ class Checker(object):
if token_type == tokenize.STRING: if token_type == tokenize.STRING:
text = mute_string(text) text = mute_string(text)
if previous: if previous:
end_line, end = previous[3] end_row, end = previous[3]
start_line, start = token[2] start_row, start = token[2]
if end_line != start_line: # different row if end_row != start_row: # different row
prev_text = self.lines[end_line - 1][end - 1] prev_text = self.lines[end_row - 1][end - 1]
if prev_text == ',' or (prev_text not in '{[(' if prev_text == ',' or (prev_text not in '{[('
and text not in '}])'): and text not in '}])'):
logical.append(' ') logical.append(' ')
length += 1 length += 1
elif end != start: # different column elif end != start: # different column
fill = self.lines[end_line - 1][end:start] fill = self.lines[end_row - 1][end:start]
logical.append(fill) logical.append(fill)
length += len(fill) length += len(fill)
self.mapping.append((length, token)) self.mapping.append((length, token))
@ -912,8 +1205,7 @@ class Checker(object):
length += len(text) length += len(text)
previous = token previous = token
self.logical_line = ''.join(logical) self.logical_line = ''.join(logical)
assert self.logical_line.lstrip() == self.logical_line assert self.logical_line.strip() == self.logical_line
assert self.logical_line.rstrip() == self.logical_line
def check_logical(self): def check_logical(self):
""" """
@ -930,8 +1222,7 @@ class Checker(object):
for name, check, argument_names in options.logical_checks: for name, check, argument_names in options.logical_checks:
if options.verbose >= 4: if options.verbose >= 4:
print(' ' + name) print(' ' + name)
result = self.run_check(check, argument_names) for result in self.run_check(check, argument_names):
if result is not None:
offset, text = result offset, text = result
if isinstance(offset, tuple): if isinstance(offset, tuple):
original_number, original_offset = offset original_number, original_offset = offset
@ -945,6 +1236,23 @@ class Checker(object):
text, check) text, check)
self.previous_logical = self.logical_line self.previous_logical = self.logical_line
def generate_tokens(self):
"""
Check if the syntax is valid.
"""
tokengen = tokenize.generate_tokens(self.readline_check_physical)
try:
for token in tokengen:
yield token
except (SyntaxError, tokenize.TokenError):
exc_type, exc = sys.exc_info()[:2]
offset = exc.args[1]
if len(offset) > 2:
offset = offset[1:3]
self.report_error(offset[0], offset[1],
'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
self.generate_tokens)
def check_all(self, expected=None, line_offset=0): def check_all(self, expected=None, line_offset=0):
""" """
Run all checks on the input file. Run all checks on the input file.
@ -960,35 +1268,38 @@ class Checker(object):
self.blank_lines_before_comment = 0 self.blank_lines_before_comment = 0
self.tokens = [] self.tokens = []
parens = 0 parens = 0
for token in tokenize.generate_tokens(self.readline_check_physical): for token in self.generate_tokens():
if options.verbose >= 3: if options.verbose >= 3:
if token[2][0] == token[3][0]: if token[2][0] == token[3][0]:
pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
else: else:
pos = 'l.%s' % token[3][0] pos = 'l.%s' % token[3][0]
print('l.%s\t%s\t%s\t%r' % print(
'l.%s\t%s\t%s\t%r' %
(token[2][0], pos, tokenize.tok_name[token[0]], token[1])) (token[2][0], pos, tokenize.tok_name[token[0]], token[1]))
self.tokens.append(token) self.tokens.append(token)
token_type, text = token[0:2] token_type, text = token[0:2]
if token_type == tokenize.OP and text in '([{': if token_type == tokenize.OP:
if text in '([{':
parens += 1 parens += 1
if token_type == tokenize.OP and text in '}])': elif text in '}])':
parens -= 1 parens -= 1
if token_type == tokenize.NEWLINE and not parens: elif token_type == tokenize.NEWLINE and not parens:
self.check_logical() self.check_logical()
self.blank_lines = 0 self.blank_lines = 0
self.blank_lines_before_comment = 0 self.blank_lines_before_comment = 0
self.tokens = [] self.tokens = []
if token_type == tokenize.NL and not parens: elif token_type == tokenize.NL and not parens:
if len(self.tokens) <= 1: if len(self.tokens) <= 1:
# The physical line contains only this token. # The physical line contains only this token.
self.blank_lines += 1 self.blank_lines += 1
self.tokens = [] self.tokens = []
if token_type == tokenize.COMMENT: elif token_type == tokenize.COMMENT:
source_line = token[4] source_line = token[4]
token_start = token[2][1] token_start = token[2][1]
if source_line[:token_start].strip() == '': if source_line[:token_start].strip() == '':
self.blank_lines_before_comment = max(self.blank_lines, self.blank_lines_before_comment = max(
self.blank_lines,
self.blank_lines_before_comment) self.blank_lines_before_comment)
self.blank_lines = 0 self.blank_lines = 0
if text.endswith('\n') and not parens: if text.endswith('\n') and not parens:
@ -1004,11 +1315,12 @@ class Checker(object):
""" """
if skip_line(self.physical_line): if skip_line(self.physical_line):
return return
code = text[:4] code = text[:4]
if ignore_code(code): if ignore_code(code):
return return
if options.quiet == 1 and not self.file_errors: if options.quiet == 1 and not self.file_errors:
message(self.filename) print(self.filename)
if code in options.counters: if code in options.counters:
options.counters[code] += 1 options.counters[code] += 1
else: else:
@ -1019,15 +1331,18 @@ class Checker(object):
return return
self.file_errors += 1 self.file_errors += 1
if options.counters[code] == 1 or not options.no_repeat: if options.counters[code] == 1 or not options.no_repeat:
message("%s:%s:%d: %s" % print("%s:%s:%d: %s" %
(self.filename, self.line_offset + line_number, (self.filename, self.line_offset + line_number,
offset + 1, text)) offset + 1, text))
if options.show_source: if options.show_source:
if line_number > len(self.lines):
line = ''
else:
line = self.lines[line_number - 1] line = self.lines[line_number - 1]
message(line.rstrip()) print(line.rstrip())
message(' ' * offset + '^') print(' ' * offset + '^')
if options.show_pep8: if options.show_pep8:
message(check.__doc__.lstrip('\n').rstrip()) print(check.__doc__.lstrip('\n').rstrip())
def input_file(filename): def input_file(filename):
@ -1035,7 +1350,7 @@ def input_file(filename):
Run all checks on a Python source file. Run all checks on a Python source file.
""" """
if options.verbose: if options.verbose:
message('checking ' + filename) print('checking ' + filename)
errors = Checker(filename).check_all() errors = Checker(filename).check_all()
return errors return errors
@ -1051,10 +1366,10 @@ def input_dir(dirname, runner=None):
runner = input_file runner = input_file
for root, dirs, files in os.walk(dirname): for root, dirs, files in os.walk(dirname):
if options.verbose: if options.verbose:
message('directory ' + root) print('directory ' + root)
options.counters['directories'] += 1 options.counters['directories'] += 1
dirs.sort() dirs.sort()
for subdir in dirs: for subdir in dirs[:]:
if excluded(subdir): if excluded(subdir):
dirs.remove(subdir) dirs.remove(subdir)
files.sort() files.sort()
@ -1191,8 +1506,8 @@ def run_tests(filename):
# Collect the lines of the test case # Collect the lines of the test case
testcase.append(line) testcase.append(line)
continue continue
if codes and index > 0: if codes and index:
label = '%s:%s:1' % (filename, line_offset + 1) label = '%s:%s:1' % (filename, line_offset)
codes = [c for c in codes if c != 'Okay'] codes = [c for c in codes if c != 'Okay']
# Run the checker # Run the checker
errors = Checker(filename, testcase).check_all(codes, line_offset) errors = Checker(filename, testcase).check_all(codes, line_offset)
@ -1200,13 +1515,13 @@ def run_tests(filename):
for code in codes: for code in codes:
if not options.counters.get(code): if not options.counters.get(code):
errors += 1 errors += 1
message('%s: error %s not found' % (label, code)) print('%s: error %s not found' % (label, code))
if options.verbose and not errors: if options.verbose and not errors:
message('%s: passed (%s)' % (label, ' '.join(codes))) print('%s: passed (%s)' % (label, ' '.join(codes) or 'Okay'))
# Keep showing errors for multiple tests # Keep showing errors for multiple tests
reset_counters() reset_counters()
# output the real line numbers # output the real line numbers
line_offset = index line_offset = index + 1
# configure the expected errors # configure the expected errors
codes = line.split()[1:] codes = line.split()[1:]
# empty the test case buffer # empty the test case buffer
@ -1268,28 +1583,26 @@ def process_options(arglist=None):
Process options passed either via arglist or via command line args. Process options passed either via arglist or via command line args.
""" """
global options, args global options, args
version = '%s (pyflakes: %s, pep8: %s)' % (flake8_version, pep8_version,
__version__) version = '%s (pyflakes: %s, pep8: %s)' % \
(flake8_version, pyflakes_version, __version__)
parser = OptionParser(version=version, parser = OptionParser(version=version,
usage="%prog [options] input ...") usage="%prog [options] input ...")
parser.add_option('--builtins', default=[], action="append", parser.add_option('--builtins', default=[], action="append",
help="append builtin function (pyflakes " help="append builtin function (pyflakes "
"_MAGIC_GLOBALS)") "_MAGIC_GLOBALS)")
parser.add_option('--max-complexity', default=-1, action='store',
type='int', help="McCabe complexity treshold")
parser.add_option('-v', '--verbose', default=0, action='count', parser.add_option('-v', '--verbose', default=0, action='count',
help="print status messages, or debug with -vv") help="print status messages, or debug with -vv")
parser.add_option('-q', '--quiet', default=0, action='count', parser.add_option('-q', '--quiet', default=0, action='count',
help="report only file names, or nothing with -qq") help="report only file names, or nothing with -qq")
parser.add_option('-r', '--no-repeat', action='store_true', parser.add_option('-r', '--no-repeat', action='store_true',
help="don't show all occurrences of the same error") help="don't show all occurrences of the same error")
parser.add_option('--first', action='store_false', dest='repeat',
help="show first occurrence of each error")
parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
help="exclude files or directories which match these " help="exclude files or directories which match these "
"comma separated patterns (default: %s)" % "comma separated patterns (default: %s)" %
DEFAULT_EXCLUDE) DEFAULT_EXCLUDE)
parser.add_option('--exit-zero', action='store_true',
help="use exit code 0 (success), even if there are "
"warnings")
parser.add_option('--filename', metavar='patterns', default='*.py', parser.add_option('--filename', metavar='patterns', default='*.py',
help="when parsing directories, only check filenames " help="when parsing directories, only check filenames "
"matching these comma separated patterns (default: " "matching these comma separated patterns (default: "
@ -1301,7 +1614,8 @@ def process_options(arglist=None):
parser.add_option('--show-source', action='store_true', parser.add_option('--show-source', action='store_true',
help="show source code for each error") help="show source code for each error")
parser.add_option('--show-pep8', action='store_true', parser.add_option('--show-pep8', action='store_true',
help="show text of PEP 8 for each error") help="show text of PEP 8 for each error "
"(implies --first)")
parser.add_option('--statistics', action='store_true', parser.add_option('--statistics', action='store_true',
help="count errors and warnings") help="count errors and warnings")
parser.add_option('--count', action='store_true', parser.add_option('--count', action='store_true',
@ -1312,19 +1626,32 @@ def process_options(arglist=None):
help="measure processing speed") help="measure processing speed")
parser.add_option('--testsuite', metavar='dir', parser.add_option('--testsuite', metavar='dir',
help="run regression tests from dir") help="run regression tests from dir")
parser.add_option('--max-line-length', type='int', metavar='n',
default=MAX_LINE_LENGTH,
help="set maximum allowed line length (default: %d)" %
MAX_LINE_LENGTH)
parser.add_option('--doctest', action='store_true', parser.add_option('--doctest', action='store_true',
help="run doctest on myself") help="run doctest on myself")
parser.add_option('--exit-zero', action='store_true',
help="use exit code 0 (success), even if there are "
"warnings")
parser.add_option('--max-complexity', default=-1, action='store',
type='int', help="McCabe complexity treshold")
options, args = parser.parse_args(arglist) options, args = parser.parse_args(arglist)
if options.show_pep8:
options.repeat = False
if options.testsuite: if options.testsuite:
args.append(options.testsuite) args.append(options.testsuite)
if not args and not options.doctest: if not args and not options.doctest:
parser.error('input not specified') parser.error('input not specified')
options.prog = os.path.basename(sys.argv[0]) options.prog = os.path.basename(sys.argv[0])
options.exclude = options.exclude.split(',') options.exclude = options.exclude.split(',')
for index in range(len(options.exclude)): for index, value in enumerate(options.exclude):
options.exclude[index] = options.exclude[index].rstrip('/') options.exclude[index] = value.rstrip('/')
if options.filename: if options.filename:
options.filename = options.filename.split(',') options.filename = options.filename.split(',')
if options.select: if options.select:
options.select = options.select.split(',') options.select = options.select.split(',')
else: else:
@ -1334,12 +1661,13 @@ def process_options(arglist=None):
elif options.select: elif options.select:
# Ignore all checks which are not explicitly selected # Ignore all checks which are not explicitly selected
options.ignore = [''] options.ignore = ['']
elif options.testsuite or options.doctest: elif options.testsuite or options.doctest or not DEFAULT_IGNORE:
# For doctest and testsuite, all checks are required # For doctest and testsuite, all checks are required
options.ignore = [] options.ignore = []
else: else:
# The default choice: ignore controversial checks # The default choice: ignore controversial checks
options.ignore = DEFAULT_IGNORE.split(',') options.ignore = DEFAULT_IGNORE.split(',')
options.physical_checks = find_checks('physical_line') options.physical_checks = find_checks('physical_line')
options.logical_checks = find_checks('logical_line') options.logical_checks = find_checks('logical_line')
options.counters = dict.fromkeys(BENCHMARK_KEYS, 0) options.counters = dict.fromkeys(BENCHMARK_KEYS, 0)