forked from DonJayamanne/pythonVSCode
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnormalizeSelection.py
More file actions
125 lines (103 loc) · 4.75 KB
/
normalizeSelection.py
File metadata and controls
125 lines (103 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import ast
import json
import re
import sys
import textwrap
def split_lines(source):
"""
Split selection lines in a version-agnostic way.
Python grammar only treats \r, \n, and \r\n as newlines.
But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
As such, this function will split lines across all Python versions.
"""
return re.split(r"[\n\r]+", source)
def _get_statements(selection):
"""
Process a multiline selection into a list of its top-level statements.
This will remove empty newlines around and within the selection, dedent it,
and split it using the result of `ast.parse()`.
"""
# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
lines = (line for line in split_lines(selection) if line.strip() != "")
# Dedent the selection and parse it using the ast module.
# Note that leading comments in the selection will be discarded during parsing.
source = textwrap.dedent("\n".join(lines))
tree = ast.parse(source)
# We'll need the dedented lines to rebuild the selection.
lines = split_lines(source)
# Get the line ranges for top-level blocks returned from parsing the dedented text
# and split the selection accordingly.
# tree.body is a list of AST objects, which we rely on to extract top-level statements.
# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
# to get the boundaries of each block.
# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
# `end = next_block.lineno - 1`
# for all blocks except the last one, which will will just run until the last line.
ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]
for node, end in zip(tree.body, ends):
# Given this selection:
# 1: if (m > 0 and
# 2: n < 3):
# 3: print('foo')
# 4: value = 'bar'
#
# The first block would have lineno = 1,and the second block lineno = 4
start = node.lineno - 1
block = "\n".join(lines[start:end])
# If the block is multiline, add an extra newline character at its end.
# This way, when joining blocks back together, there will be a blank line between each multiline statement
# and no blank lines between single-line statements, or it would look like this:
# >>> x = 22
# >>>
# >>> total = x + 30
# >>>
# Note that for the multiline parentheses case this newline is redundant,
# since the closing parenthesis terminates the statement already.
# This means that for this pattern we'll end up with:
# >>> x = [
# ... 1
# ... ]
# >>>
# >>> y = [
# ... 2
# ...]
if end - start > 1:
block += "\n"
yield block
def normalize_lines(selection):
"""
Normalize the text selection received from the extension.
If it is a single line selection, dedent it and append a newline and
send it back to the extension.
Otherwise, sanitize the multiline selection before returning it:
split it in a list of top-level statements
and add newlines between each of them so the REPL knows where each block ends.
"""
try:
# Parse the selection into a list of top-level blocks.
# We don't differentiate between single and multiline statements
# because it's not a perf bottleneck,
# and the overhead from splitting and rejoining strings in the multiline case is one-off.
statements = _get_statements(selection)
# Insert a newline between each top-level statement, and append a newline to the selection.
source = "\n".join(statements) + "\n"
except:
# If there's a problem when parsing statements,
# append a blank line to end the block and send it as-is.
source = selection + "\n\n"
return source
if __name__ == "__main__":
# Content is being sent from the extension as a JSON object.
# Decode the data from the raw bytes.
stdin = sys.stdin if sys.version_info < (3,) else sys.stdin.buffer
raw = stdin.read()
contents = json.loads(raw.decode("utf-8"))
normalized = normalize_lines(contents["code"])
# Send the normalized code back to the extension in a JSON object.
data = json.dumps({"normalized": normalized})
stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
stdout.write(data.encode("utf-8"))
stdout.close()