vscode-python/pythonFiles/parso/python/parser.py at master · Goom11/vscode-python

History

212 lines (183 loc) · 8.26 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

from parso.python import tree

from parso.python.token import PythonTokenTypes

from parso.parser import BaseParser

NAME = PythonTokenTypes.NAME

INDENT = PythonTokenTypes.INDENT

DEDENT = PythonTokenTypes.DEDENT

class Parser(BaseParser):

"""

This class is used to parse a Python file, it then divides them into a

class structure of different scopes.

:param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.

"""

node_map = {

'expr_stmt': tree.ExprStmt,

'classdef': tree.Class,

'funcdef': tree.Function,

'file_input': tree.Module,

'import_name': tree.ImportName,

'import_from': tree.ImportFrom,

'break_stmt': tree.KeywordStatement,

'continue_stmt': tree.KeywordStatement,

'return_stmt': tree.ReturnStmt,

'raise_stmt': tree.KeywordStatement,

'yield_expr': tree.YieldExpr,

'del_stmt': tree.KeywordStatement,

'pass_stmt': tree.KeywordStatement,

'global_stmt': tree.GlobalStmt,

'nonlocal_stmt': tree.KeywordStatement,

'print_stmt': tree.KeywordStatement,

'assert_stmt': tree.AssertStmt,

'if_stmt': tree.IfStmt,

'with_stmt': tree.WithStmt,

'for_stmt': tree.ForStmt,

'while_stmt': tree.WhileStmt,

'try_stmt': tree.TryStmt,

'comp_for': tree.CompFor,

# Not sure if this is the best idea, but IMO it's the easiest way to

# avoid extreme amounts of work around the subtle difference of 2/3

# grammar in list comoprehensions.

'list_for': tree.CompFor,

# Same here. This just exists in Python 2.6.

'gen_for': tree.CompFor,

'decorator': tree.Decorator,

'lambdef': tree.Lambda,

'old_lambdef': tree.Lambda,

'lambdef_nocond': tree.Lambda,

}

default_node = tree.PythonNode

# Names/Keywords are handled separately

_leaf_map = {

PythonTokenTypes.STRING: tree.String,

PythonTokenTypes.NUMBER: tree.Number,

PythonTokenTypes.NEWLINE: tree.Newline,

PythonTokenTypes.ENDMARKER: tree.EndMarker,

PythonTokenTypes.FSTRING_STRING: tree.FStringString,

PythonTokenTypes.FSTRING_START: tree.FStringStart,

PythonTokenTypes.FSTRING_END: tree.FStringEnd,

}

def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):

super(Parser, self).__init__(pgen_grammar, start_nonterminal,

error_recovery=error_recovery)

self.syntax_errors = []

self._omit_dedent_list = []

self._indent_counter = 0

def parse(self, tokens):

if self._error_recovery:

if self._start_nonterminal != 'file_input':

raise NotImplementedError

tokens = self._recovery_tokenize(tokens)

return super(Parser, self).parse(tokens)

def convert_node(self, nonterminal, children):

"""

Convert raw node information to a PythonBaseNode instance.

This is passed to the parser driver which calls it whenever a reduction of a

grammar rule produces a new complete node, so that the tree is build

strictly bottom-up.

"""

try:

return self.node_map[nonterminal](children)

except KeyError:

if nonterminal == 'suite':

# We don't want the INDENT/DEDENT in our parser tree. Those

# leaves are just cancer. They are virtual leaves and not real

# ones and therefore have pseudo start/end positions and no

# prefixes. Just ignore them.

children = [children[0]] + children[2:-1]

elif nonterminal == 'list_if':

# Make transitioning from 2 to 3 easier.

nonterminal = 'comp_if'

elif nonterminal == 'listmaker':

# Same as list_if above.

nonterminal = 'testlist_comp'

return self.default_node(nonterminal, children)

def convert_leaf(self, type, value, prefix, start_pos):

# print('leaf', repr(value), token.tok_name[type])

if type == NAME:

if value in self._pgen_grammar.reserved_syntax_strings:

return tree.Keyword(value, start_pos, prefix)

else:

return tree.Name(value, start_pos, prefix)

return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)

def error_recovery(self, token):

tos_nodes = self.stack[-1].nodes

if tos_nodes:

last_leaf = tos_nodes[-1].get_last_leaf()

else:

last_leaf = None

if self._start_nonterminal == 'file_input' and \

(token.type == PythonTokenTypes.ENDMARKER or

token.type == DEDENT and '\n' not in last_leaf.value):

# In Python statements need to end with a newline. But since it's

# possible (and valid in Python ) that there's no newline at the

# end of a file, we have to recover even if the user doesn't want

# error recovery.

if self.stack[-1].dfa.from_rule == 'simple_stmt':

try:

plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]

except KeyError:

pass

else:

if plan.next_dfa.is_final and not plan.dfa_pushes:

# We are ignoring here that the newline would be

# required for a simple_stmt.

self.stack[-1].dfa = plan.next_dfa

self._add_token(token)

return

if not self._error_recovery:

return super(Parser, self).error_recovery(token)

def current_suite(stack):

# For now just discard everything that is not a suite or

# file_input, if we detect an error.

for until_index, stack_node in reversed(list(enumerate(stack))):

# `suite` can sometimes be only simple_stmt, not stmt.

if stack_node.nonterminal == 'file_input':

break

elif stack_node.nonterminal == 'suite':

# In the case where we just have a newline we don't want to

# do error recovery here. In all other cases, we want to do

# error recovery.

if len(stack_node.nodes) != 1:

break

return until_index

until_index = current_suite(self.stack)

if self._stack_removal(until_index + 1):

self._add_token(token)

else:

typ, value, start_pos, prefix = token

if typ == INDENT:

# For every deleted INDENT we have to delete a DEDENT as well.

# Otherwise the parser will get into trouble and DEDENT too early.

self._omit_dedent_list.append(self._indent_counter)

error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)

self.stack[-1].nodes.append(error_leaf)

tos = self.stack[-1]

if tos.nonterminal == 'suite':

# Need at least one statement in the suite. This happend with the

# error recovery above.

try:

tos.dfa = tos.dfa.arcs['stmt']

except KeyError:

# We're already in a final state.

pass

def _stack_removal(self, start_index):

all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]

if all_nodes:

self.stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes))

self.stack[start_index:] = []

return bool(all_nodes)

def _recovery_tokenize(self, tokens):

for token in tokens:

typ = token[0]

# print(tok_name[typ], repr(value), start_pos, repr(prefix))

if typ == DEDENT:

# We need to count indents, because if we just omit any DEDENT,

# we might omit them in the wrong place.

o = self._omit_dedent_list

if o and o[-1] == self._indent_counter:

o.pop()

continue

self._indent_counter -= 1

elif typ == INDENT:

self._indent_counter += 1

yield token

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

parser.py

Latest commit

History

parser.py

File metadata and controls