pre-commit-hooks/.venv/lib/python3.10/site-packages/packaging/_parser.py
2024-04-13 00:00:20 +00:00

363 lines
10 KiB
Python

"""Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains ENBF-inspired grammar representing
the implementation.
"""
from __future__ import annotations
import ast
from typing import Any
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Tuple
from typing import Union
from ._tokenizer import DEFAULT_RULES
from ._tokenizer import Tokenizer
class Node:
def __init__(self, value: str) -> None:
self.value = value
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
MarkerVar = Union[Variable, Value]
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
# MarkerList = List[Union["MarkerList", MarkerAtom, str]]
# mypy does not support recursive type definition
# https://github.com/python/mypy/issues/731
MarkerAtom = Any
MarkerList = List[Any]
class ParsedRequirement(NamedTuple):
name: str
url: str
extras: list[str]
specifier: str
marker: MarkerList | None
# --------------------------------------------------------------------------------------
# Recursive descent parser for dependency specifier
# --------------------------------------------------------------------------------------
def parse_requirement(source: str) -> ParsedRequirement:
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
"""
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
"""
tokenizer.consume('WS')
name_token = tokenizer.expect(
'IDENTIFIER', expected='package name at the start of dependency specifier',
)
name = name_token.text
tokenizer.consume('WS')
extras = _parse_extras(tokenizer)
tokenizer.consume('WS')
url, specifier, marker = _parse_requirement_details(tokenizer)
tokenizer.expect('END', expected='end of dependency specifier')
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details(
tokenizer: Tokenizer,
) -> tuple[str, str, MarkerList | None]:
"""
requirement_details = AT URL (WS requirement_marker?)?
| specifier WS? (requirement_marker)?
"""
specifier = ''
url = ''
marker = None
if tokenizer.check('AT'):
tokenizer.read()
tokenizer.consume('WS')
url_start = tokenizer.position
url = tokenizer.expect('URL', expected='URL after @').text
if tokenizer.check('END', peek=True):
return (url, specifier, marker)
tokenizer.expect('WS', expected='whitespace after URL')
# The input might end after whitespace.
if tokenizer.check('END', peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer, span_start=url_start, after='URL and whitespace',
)
else:
specifier_start = tokenizer.position
specifier = _parse_specifier(tokenizer)
tokenizer.consume('WS')
if tokenizer.check('END', peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer,
span_start=specifier_start,
after=(
'version specifier'
if specifier
else 'name and no valid version specifier'
),
)
return (url, specifier, marker)
def _parse_requirement_marker(
tokenizer: Tokenizer, *, span_start: int, after: str,
) -> MarkerList:
"""
requirement_marker = SEMICOLON marker WS?
"""
if not tokenizer.check('SEMICOLON'):
tokenizer.raise_syntax_error(
f'Expected end or semicolon (after {after})',
span_start=span_start,
)
tokenizer.read()
marker = _parse_marker(tokenizer)
tokenizer.consume('WS')
return marker
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
"""
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
"""
if not tokenizer.check('LEFT_BRACKET', peek=True):
return []
with tokenizer.enclosing_tokens(
'LEFT_BRACKET',
'RIGHT_BRACKET',
around='extras',
):
tokenizer.consume('WS')
extras = _parse_extras_list(tokenizer)
tokenizer.consume('WS')
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
"""
extras_list = identifier (wsp* ',' wsp* identifier)*
"""
extras: list[str] = []
if not tokenizer.check('IDENTIFIER'):
return extras
extras.append(tokenizer.read().text)
while True:
tokenizer.consume('WS')
if tokenizer.check('IDENTIFIER', peek=True):
tokenizer.raise_syntax_error('Expected comma between extra names')
elif not tokenizer.check('COMMA'):
break
tokenizer.read()
tokenizer.consume('WS')
extra_token = tokenizer.expect('IDENTIFIER', expected='extra name after comma')
extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str:
"""
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens(
'LEFT_PARENTHESIS',
'RIGHT_PARENTHESIS',
around='version specifier',
):
tokenizer.consume('WS')
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume('WS')
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
"""
parsed_specifiers = ''
while tokenizer.check('SPECIFIER'):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check('VERSION_PREFIX_TRAIL', peek=True):
tokenizer.raise_syntax_error(
'.* suffix can only be used with `==` or `!=` operators',
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check('VERSION_LOCAL_LABEL_TRAIL', peek=True):
tokenizer.raise_syntax_error(
'Local version label can only be used with `==` or `!=` operators',
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume('WS')
if not tokenizer.check('COMMA'):
break
parsed_specifiers += tokenizer.read().text
tokenizer.consume('WS')
return parsed_specifiers
# --------------------------------------------------------------------------------------
# Recursive descent parser for marker expression
# --------------------------------------------------------------------------------------
def parse_marker(source: str) -> MarkerList:
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
retval = _parse_marker(tokenizer)
tokenizer.expect('END', expected='end of marker expression')
return retval
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
"""
marker = marker_atom (BOOLOP marker_atom)+
"""
expression = [_parse_marker_atom(tokenizer)]
while tokenizer.check('BOOLOP'):
token = tokenizer.read()
expr_right = _parse_marker_atom(tokenizer)
expression.extend((token.text, expr_right))
return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
"""
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
| WS? marker_item WS?
"""
tokenizer.consume('WS')
if tokenizer.check('LEFT_PARENTHESIS', peek=True):
with tokenizer.enclosing_tokens(
'LEFT_PARENTHESIS',
'RIGHT_PARENTHESIS',
around='marker expression',
):
tokenizer.consume('WS')
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume('WS')
else:
marker = _parse_marker_item(tokenizer)
tokenizer.consume('WS')
return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
"""
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
"""
tokenizer.consume('WS')
marker_var_left = _parse_marker_var(tokenizer)
tokenizer.consume('WS')
marker_op = _parse_marker_op(tokenizer)
tokenizer.consume('WS')
marker_var_right = _parse_marker_var(tokenizer)
tokenizer.consume('WS')
return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
"""
marker_var = VARIABLE | QUOTED_STRING
"""
if tokenizer.check('VARIABLE'):
return process_env_var(tokenizer.read().text.replace('.', '_'))
elif tokenizer.check('QUOTED_STRING'):
return process_python_str(tokenizer.read().text)
else:
tokenizer.raise_syntax_error(
message='Expected a marker variable or quoted string',
)
def process_env_var(env_var: str) -> Variable:
if env_var in ('platform_python_implementation', 'python_implementation'):
return Variable('platform_python_implementation')
else:
return Variable(env_var)
def process_python_str(python_str: str) -> Value:
value = ast.literal_eval(python_str)
return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
"""
marker_op = IN | NOT IN | OP
"""
if tokenizer.check('IN'):
tokenizer.read()
return Op('in')
elif tokenizer.check('NOT'):
tokenizer.read()
tokenizer.expect('WS', expected="whitespace after 'not'")
tokenizer.expect('IN', expected="'in' after 'not'")
return Op('not in')
elif tokenizer.check('OP'):
return Op(tokenizer.read().text)
else:
return tokenizer.raise_syntax_error(
'Expected marker operator, one of '
'<=, <, !=, ==, >=, >, ~=, ===, in, not in',
)