forked from microsoft/vscode-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnormalizeForInterpreter.py
More file actions
122 lines (95 loc) · 4.39 KB
/
normalizeForInterpreter.py
File metadata and controls
122 lines (95 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import ast
import io
import operator
import os
import sys
import token
import tokenize
class Visitor(ast.NodeVisitor):
def __init__(self, lines):
self._lines = lines
self.line_numbers_with_nodes = set()
self.line_numbers_with_statements = []
def generic_visit(self, node):
if hasattr(node, 'col_offset') and hasattr(node, 'lineno') and node.col_offset == 0:
self.line_numbers_with_nodes.add(node.lineno)
if isinstance(node, ast.stmt):
self.line_numbers_with_statements.append(node.lineno)
ast.NodeVisitor.generic_visit(self, node)
def _tokenize(source):
"""Tokenize Python source code."""
# Using an undocumented API as the documented one in Python 2.7 does not work as needed
# cross-version.
return tokenize.generate_tokens(io.StringIO(source).readline)
def _indent_size(line):
for index, char in enumerate(line):
if not char.isspace():
return index
def _get_global_statement_blocks(source, lines):
"""Return a list of all global statement blocks.
The list comprises of 3-item tuples that contain the starting line number,
ending line number and whether the statement is a single line.
"""
tree = ast.parse(source)
visitor = Visitor(lines)
visitor.visit(tree)
statement_ranges = []
for index, line_number in enumerate(visitor.line_numbers_with_statements):
remaining_line_numbers = visitor.line_numbers_with_statements[index+1:]
end_line_number = len(lines) if len(remaining_line_numbers) == 0 else min(remaining_line_numbers) - 1
current_statement_is_oneline = line_number == end_line_number
if len(statement_ranges) == 0:
statement_ranges.append((line_number, end_line_number, current_statement_is_oneline))
continue
previous_statement = statement_ranges[-1]
previous_statement_is_oneline = previous_statement[2]
if previous_statement_is_oneline and current_statement_is_oneline:
statement_ranges[-1] = previous_statement[0], end_line_number, True
else:
statement_ranges.append((line_number, end_line_number, current_statement_is_oneline))
return statement_ranges
def normalize_lines(source):
"""Normalize blank lines for sending to the terminal.
Blank lines within a statement block are removed to prevent the REPL
from thinking the block is finished. Newlines are added to separate
top-level statements so that the REPL does not think there is a syntax
error.
"""
lines = source.splitlines(False)
# Find out if we have any trailing blank lines
if len(lines[-1].strip()) == 0 or source.endswith('\n'):
trailing_newline = '\n'
else:
trailing_newline = ''
# Step 1: Remove empty lines.
tokens = _tokenize(source)
newlines_indexes_to_remove = (spos[0] for (toknum, tokval, spos, epos, line) in tokens
if len(line.strip()) == 0
and token.tok_name[toknum] == 'NL'
and spos[0] == epos[0])
for line_number in reversed(list(newlines_indexes_to_remove)):
del lines[line_number-1]
# Step 2: Add blank lines between each global statement block.
# A consequtive single lines blocks of code will be treated as a single statement,
# just to ensure we do not unnecessarily add too many blank lines.
source = '\n'.join(lines)
tokens = _tokenize(source)
dedent_indexes = (spos[0] for (toknum, tokval, spos, epos, line) in tokens
if toknum == token.DEDENT and _indent_size(line) == 0)
global_statement_ranges = _get_global_statement_blocks(source, lines)
start_positions = map(operator.itemgetter(0), reversed(global_statement_ranges))
for line_number in filter(lambda x: x > 1, start_positions):
lines.insert(line_number-1, '')
sys.stdout.write('\n'.join(lines) + trailing_newline)
sys.stdout.flush()
if __name__ == '__main__':
contents = sys.argv[1]
try:
default_encoding = sys.getdefaultencoding()
encoded_contents = contents.encode(default_encoding, 'surrogateescape')
contents = encoded_contents.decode(default_encoding, 'replace')
except (UnicodeError, LookupError):
pass
if isinstance(contents, bytes):
contents = contents.decode('utf8')
normalize_lines(contents)