crepu.dev Git - config.git/blame_incremental - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/yapf/pyparser/pyparser.py

... / ...

Commit	Line	Data
	1	# Copyright 2022 Bill Wendling, All Rights Reserved.
	2	#
	3	# Licensed under the Apache License, Version 2.0 (the "License");
	4	# you may not use this file except in compliance with the License.
	5	# You may obtain a copy of the License at
	6	#
	7	# http://www.apache.org/licenses/LICENSE-2.0
	8	#
	9	# Unless required by applicable law or agreed to in writing, software
	10	# distributed under the License is distributed on an "AS IS" BASIS,
	11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	12	# See the License for the specific language governing permissions and
	13	# limitations under the License.
	14	"""Simple Python Parser
	15
	16	Parse Python code into a list of logical lines, represented by LogicalLine
	17	objects. This uses Python's tokenizer to generate the tokens. As such, YAPF must
	18	be run with the appropriate Python version---Python >=3.7 for Python 3.7 code,
	19	Python >=3.8 for Python 3.8 code, etc.
	20
	21	This parser uses Python's native "tokenizer" module to generate a list of tokens
	22	for the source code. It then uses Python's native "ast" module to assign
	23	subtypes, calculate split penalties, etc.
	24
	25	A "logical line" produced by Python's "tokenizer" module ends with a
	26	tokenize.NEWLINE, rather than a tokenize.NL, making it easy to separate them
	27	out. Comments all end with a tokentizer.NL, so we need to make sure we don't
	28	errantly pick up non-comment tokens when parsing comment blocks.
	29
	30	ParseCode(): parse the code producing a list of logical lines.
	31	"""
	32
	33	# TODO: Call from yapf_api.FormatCode.
	34
	35	import ast
	36	import codecs
	37	import os
	38	import token
	39	import tokenize
	40	from io import StringIO
	41	from tokenize import TokenInfo
	42
	43	from yapf.pyparser import split_penalty_visitor
	44	from yapf.yapflib import format_token
	45	from yapf.yapflib import logical_line
	46
	47	CONTINUATION = token.N_TOKENS
	48
	49
	50	def ParseCode(unformatted_source, filename='<unknown>'):
	51	"""Parse a string of Python code into logical lines.
	52
	53	This provides an alternative entry point to YAPF.
	54
	55	Arguments:
	56	unformatted_source: (unicode) The code to format.
	57	filename: (unicode) The name of the file being reformatted.
	58
	59	Returns:
	60	A list of LogicalLines.
	61
	62	Raises:
	63	An exception is raised if there's an error during AST parsing.
	64	"""
	65	if not unformatted_source.endswith(os.linesep):
	66	unformatted_source += os.linesep
	67
	68	try:
	69	ast_tree = ast.parse(unformatted_source, filename)
	70	ast.fix_missing_locations(ast_tree)
	71	readline = StringIO(unformatted_source).readline
	72	tokens = tokenize.generate_tokens(readline)
	73	except Exception:
	74	raise
	75
	76	logical_lines = _CreateLogicalLines(tokens)
	77
	78	# Process the logical lines.
	79	split_penalty_visitor.SplitPenalty(logical_lines).visit(ast_tree)
	80
	81	return logical_lines
	82
	83
	84	def _CreateLogicalLines(tokens):
	85	"""Separate tokens into logical lines.
	86
	87	Arguments:
	88	tokens: (list of tokenizer.TokenInfo) Tokens generated by tokenizer.
	89
	90	Returns:
	91	A list of LogicalLines.
	92	"""
	93	formatted_tokens = []
	94
	95	# Convert tokens into "TokenInfo" and add tokens for continuation markers.
	96	prev_tok = None
	97	for tok in tokens:
	98	tok = TokenInfo(*tok)
	99
	100	if (prev_tok and prev_tok.line.rstrip().endswith('\\') and
	101	prev_tok.start[0] < tok.start[0]):
	102	ctok = TokenInfo(
	103	type=CONTINUATION,
	104	string='\\',
	105	start=(prev_tok.start[0], prev_tok.start[1] + 1),
	106	end=(prev_tok.end[0], prev_tok.end[0] + 2),
	107	line=prev_tok.line)
	108	ctok.lineno = ctok.start[0]
	109	ctok.column = ctok.start[1]
	110	ctok.value = '\\'
	111	formatted_tokens.append(format_token.FormatToken(ctok, 'CONTINUATION'))
	112
	113	tok.lineno = tok.start[0]
	114	tok.column = tok.start[1]
	115	tok.value = tok.string
	116	formatted_tokens.append(
	117	format_token.FormatToken(tok, token.tok_name[tok.type]))
	118	prev_tok = tok
	119
	120	# Generate logical lines.
	121	logical_lines, cur_logical_line = [], []
	122	depth = 0
	123	for tok in formatted_tokens:
	124	if tok.type == tokenize.ENDMARKER:
	125	break
	126
	127	if tok.type == tokenize.NEWLINE:
	128	# End of a logical line.
	129	logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
	130	cur_logical_line = []
	131	elif tok.type == tokenize.INDENT:
	132	depth += 1
	133	elif tok.type == tokenize.DEDENT:
	134	depth -= 1
	135	elif tok.type == tokenize.NL:
	136	pass
	137	else:
	138	if (cur_logical_line and not tok.type == tokenize.COMMENT and
	139	cur_logical_line[0].type == tokenize.COMMENT):
	140	# We were parsing a comment block, but now we have real code to worry
	141	# about. Store the comment and carry on.
	142	logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
	143	cur_logical_line = []
	144
	145	cur_logical_line.append(tok)
	146
	147	# Link the FormatTokens in each line together to form a doubly linked list.
	148	for line in logical_lines:
	149	previous = line.first
	150	bracket_stack = [previous] if previous.OpensScope() else []
	151	for tok in line.tokens[1:]:
	152	tok.previous_token = previous
	153	previous.next_token = tok
	154	previous = tok
	155
	156	# Set up the "matching_bracket" attribute.
	157	if tok.OpensScope():
	158	bracket_stack.append(tok)
	159	elif tok.ClosesScope():
	160	bracket_stack[-1].matching_bracket = tok
	161	tok.matching_bracket = bracket_stack.pop()
	162
	163	return logical_lines