pythonVSCode/pythonFiles/normalizeSelection.py at main · AgentIvan/pythonVSCode

History

125 lines (103 loc) · 4.75 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

# Licensed under the MIT License.

import ast

import json

import re

import sys

import textwrap

def split_lines(source):

"""

Split selection lines in a version-agnostic way.

Python grammar only treats \r, \n, and \r\n as newlines.

But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.

As such, this function will split lines across all Python versions.

"""

return re.split(r"[\n\r]+", source)

def _get_statements(selection):

"""

Process a multiline selection into a list of its top-level statements.

This will remove empty newlines around and within the selection, dedent it,

and split it using the result of `ast.parse()`.

"""

# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.

lines = (line for line in split_lines(selection) if line.strip() != "")

# Dedent the selection and parse it using the ast module.

# Note that leading comments in the selection will be discarded during parsing.

source = textwrap.dedent("\n".join(lines))

tree = ast.parse(source)

# We'll need the dedented lines to rebuild the selection.

lines = split_lines(source)

# Get the line ranges for top-level blocks returned from parsing the dedented text

# and split the selection accordingly.

# tree.body is a list of AST objects, which we rely on to extract top-level statements.

# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object

# to get the boundaries of each block.

# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).

# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do

# `end = next_block.lineno - 1`

# for all blocks except the last one, which will will just run until the last line.

ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]

for node, end in zip(tree.body, ends):

# Given this selection:

# 1: if (m > 0 and

# 2: n < 3):

# 3: print('foo')

# 4: value = 'bar'

# The first block would have lineno = 1,and the second block lineno = 4

start = node.lineno - 1

block = "\n".join(lines[start:end])

# If the block is multiline, add an extra newline character at its end.

# This way, when joining blocks back together, there will be a blank line between each multiline statement

# and no blank lines between single-line statements, or it would look like this:

# >>> x = 22

# >>>

# >>> total = x + 30

# >>>

# Note that for the multiline parentheses case this newline is redundant,

# since the closing parenthesis terminates the statement already.

# This means that for this pattern we'll end up with:

# >>> x = [

# ... 1

# ... ]

# >>>

# >>> y = [

# ... 2

# ...]

if end - start > 1:

block += "\n"

yield block

def normalize_lines(selection):

"""

Normalize the text selection received from the extension.

If it is a single line selection, dedent it and append a newline and

send it back to the extension.

Otherwise, sanitize the multiline selection before returning it:

split it in a list of top-level statements

and add newlines between each of them so the REPL knows where each block ends.

"""

try:

# Parse the selection into a list of top-level blocks.

# We don't differentiate between single and multiline statements

# because it's not a perf bottleneck,

# and the overhead from splitting and rejoining strings in the multiline case is one-off.

statements = _get_statements(selection)

# Insert a newline between each top-level statement, and append a newline to the selection.

source = "\n".join(statements) + "\n"

except:

# If there's a problem when parsing statements,

# append a blank line to end the block and send it as-is.

source = selection + "\n\n"

return source

if __name__ == "__main__":

# Content is being sent from the extension as a JSON object.

# Decode the data from the raw bytes.

stdin = sys.stdin if sys.version_info < (3,) else sys.stdin.buffer

raw = stdin.read()

contents = json.loads(raw.decode("utf-8"))

normalized = normalize_lines(contents["code"])

# Send the normalized code back to the extension in a JSON object.

data = json.dumps({"normalized": normalized})

stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer

stdout.write(data.encode("utf-8"))

stdout.close()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

normalizeSelection.py

Latest commit

History

normalizeSelection.py

File metadata and controls