[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
This commit is contained in:
pre-commit-ci[bot] 2024-04-13 00:00:18 +00:00
parent 72ad6dc953
commit f4cd1ba0d6
813 changed files with 66015 additions and 58839 deletions

View file

@ -1,44 +1,44 @@
from .package_data import __version__
from .core import (
IDNABidiError,
IDNAError,
InvalidCodepoint,
InvalidCodepointContext,
alabel,
check_bidi,
check_hyphen_ok,
check_initial_combiner,
check_label,
check_nfc,
decode,
encode,
ulabel,
uts46_remap,
valid_contextj,
valid_contexto,
valid_label_length,
valid_string_length,
)
from __future__ import annotations
from .core import alabel
from .core import check_bidi
from .core import check_hyphen_ok
from .core import check_initial_combiner
from .core import check_label
from .core import check_nfc
from .core import decode
from .core import encode
from .core import IDNABidiError
from .core import IDNAError
from .core import InvalidCodepoint
from .core import InvalidCodepointContext
from .core import ulabel
from .core import uts46_remap
from .core import valid_contextj
from .core import valid_contexto
from .core import valid_label_length
from .core import valid_string_length
from .intranges import intranges_contain
from .package_data import __version__
__all__ = [
"IDNABidiError",
"IDNAError",
"InvalidCodepoint",
"InvalidCodepointContext",
"alabel",
"check_bidi",
"check_hyphen_ok",
"check_initial_combiner",
"check_label",
"check_nfc",
"decode",
"encode",
"intranges_contain",
"ulabel",
"uts46_remap",
"valid_contextj",
"valid_contexto",
"valid_label_length",
"valid_string_length",
'IDNABidiError',
'IDNAError',
'InvalidCodepoint',
'InvalidCodepointContext',
'alabel',
'check_bidi',
'check_hyphen_ok',
'check_initial_combiner',
'check_label',
'check_nfc',
'decode',
'encode',
'intranges_contain',
'ulabel',
'uts46_remap',
'valid_contextj',
'valid_contexto',
'valid_label_length',
'valid_string_length',
]

View file

@ -1,37 +1,47 @@
from .core import encode, decode, alabel, ulabel, IDNAError
from __future__ import annotations
import codecs
import re
from typing import Tuple, Optional
from typing import Optional
from typing import Tuple
from .core import alabel
from .core import decode
from .core import encode
from .core import IDNAError
from .core import ulabel
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
class Codec(codecs.Codec):
def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]:
def encode(self, data: str, errors: str = 'strict') -> tuple[bytes, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
raise IDNAError(f'Unsupported error handling \"{errors}\"')
if not data:
return b"", 0
return b'', 0
return encode(data), len(data)
def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]:
def decode(self, data: bytes, errors: str = 'strict') -> tuple[str, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
raise IDNAError(f'Unsupported error handling \"{errors}\"')
if not data:
return '', 0
return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
def _buffer_encode(self, data: str, errors: str, final: bool) -> tuple[str, int]: # type: ignore
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
raise IDNAError(f'Unsupported error handling \"{errors}\"')
if not data:
return "", 0
return '', 0
labels = _unicode_dots_re.split(data)
trailing_dot = ''
@ -58,10 +68,11 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
size += len(trailing_dot)
return result_str, size
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
def _buffer_decode(self, data: str, errors: str, final: bool) -> tuple[str, int]: # type: ignore
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
raise IDNAError(f'Unsupported error handling \"{errors}\"')
if not data:
return ('', 0)

View file

@ -1,13 +1,19 @@
from .core import *
from __future__ import annotations
from typing import Any
from typing import Union
from .codec import *
from typing import Any, Union
from .core import *
def ToASCII(label: str) -> bytes:
return encode(label)
def ToUnicode(label: Union[bytes, bytearray]) -> str:
def ToUnicode(label: bytes | bytearray) -> str:
return decode(label)
def nameprep(s: Any) -> None:
raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol')

View file

@ -1,14 +1,19 @@
from . import idnadata
from __future__ import annotations
import bisect
import unicodedata
import re
from typing import Union, Optional
import unicodedata
from typing import Optional
from typing import Union
from . import idnadata
from .intranges import intranges_contain
_virama_combining_class = 9
_alabel_prefix = b'xn--'
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
class IDNAError(UnicodeError):
""" Base exception for all IDNA-encoding related problems """
pass
@ -36,23 +41,26 @@ def _combining_class(cp: int) -> int:
raise ValueError('Unknown character in unicodedata')
return v
def _is_script(cp: str, script: str) -> bool:
return intranges_contain(ord(cp), idnadata.scripts[script])
def _punycode(s: str) -> bytes:
return s.encode('punycode')
def _unot(s: int) -> str:
return 'U+{:04X}'.format(s)
return f'U+{s:04X}'
def valid_label_length(label: Union[bytes, str]) -> bool:
def valid_label_length(label: bytes | str) -> bool:
if len(label) > 63:
return False
return True
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
def valid_string_length(label: bytes | str, trailing_dot: bool) -> bool:
if len(label) > (254 if trailing_dot else 253):
return False
return True
@ -65,7 +73,7 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
direction = unicodedata.bidirectional(cp)
if direction == '':
# String likely comes from a newer version of Unicode
raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx))
raise IDNABidiError(f'Unknown directionality in label {repr(label)} at position {idx}')
if direction in ['R', 'AL', 'AN']:
bidi_label = True
if not bidi_label and not check_ltr:
@ -78,7 +86,7 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
elif direction == 'L':
rtl = False
else:
raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label)))
raise IDNABidiError(f'First codepoint in label {repr(label)} must be directionality L, R or AL')
valid_ending = False
number_type = None # type: Optional[str]
@ -88,7 +96,7 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
if rtl:
# Bidi rule 2
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx))
raise IDNABidiError(f'Invalid direction for codepoint at position {idx} in a right-to-left label')
# Bidi rule 3
if direction in ['R', 'AL', 'EN', 'AN']:
valid_ending = True
@ -104,7 +112,7 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
else:
# Bidi rule 5
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx))
raise IDNABidiError(f'Invalid direction for codepoint at position {idx} in a left-to-right label')
# Bidi rule 6
if direction in ['L', 'EN']:
valid_ending = True
@ -146,7 +154,7 @@ def valid_contextj(label: str, pos: int) -> bool:
return True
ok = False
for i in range(pos-1, -1, -1):
for i in range(pos - 1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
@ -158,7 +166,7 @@ def valid_contextj(label: str, pos: int) -> bool:
return False
ok = False
for i in range(pos+1, len(label)):
for i in range(pos + 1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
@ -183,13 +191,13 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x00b7:
if 0 < pos < len(label)-1:
if 0 < pos < len(label) - 1:
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
return True
return False
elif cp_value == 0x0375:
if pos < len(label)-1 and len(label) > 1:
if pos < len(label) - 1 and len(label) > 1:
return _is_script(label[pos + 1], 'Greek')
return False
@ -221,7 +229,7 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
return False
def check_label(label: Union[str, bytes, bytearray]) -> None:
def check_label(label: str | bytes | bytearray) -> None:
if isinstance(label, (bytes, bytearray)):
label = label.decode('utf-8')
if len(label) == 0:
@ -238,16 +246,22 @@ def check_label(label: Union[str, bytes, bytearray]) -> None:
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
try:
if not valid_contextj(label, pos):
raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format(
_unot(cp_value), pos+1, repr(label)))
raise InvalidCodepointContext(
'Joiner {} not allowed at position {} in {}'.format(
_unot(cp_value), pos + 1, repr(label),
),
)
except ValueError:
raise IDNAError('Unknown codepoint adjacent to joiner {} at position {} in {}'.format(
_unot(cp_value), pos+1, repr(label)))
raise IDNAError(
'Unknown codepoint adjacent to joiner {} at position {} in {}'.format(
_unot(cp_value), pos + 1, repr(label),
),
)
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
if not valid_contexto(label, pos):
raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label)))
raise InvalidCodepointContext(f'Codepoint {_unot(cp_value)} not allowed at position {pos+1} in {repr(label)}')
else:
raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
raise InvalidCodepoint(f'Codepoint {_unot(cp_value)} at position {pos+1} of {repr(label)} not allowed')
check_bidi(label)
@ -276,7 +290,7 @@ def alabel(label: str) -> bytes:
return label_bytes
def ulabel(label: Union[str, bytes, bytearray]) -> str:
def ulabel(label: str | bytes | bytearray) -> str:
if not isinstance(label, (bytes, bytearray)):
try:
label_bytes = label.encode('ascii')
@ -313,31 +327,39 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False
for pos, char in enumerate(domain):
code_point = ord(char)
try:
uts46row = uts46data[code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1]
uts46row = uts46data[
code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1
]
status = uts46row[1]
replacement = None # type: Optional[str]
if len(uts46row) == 3:
replacement = uts46row[2] # type: ignore
if (status == 'V' or
(status == 'D' and not transitional) or
(status == '3' and not std3_rules and replacement is None)):
if (
status == 'V' or
(status == 'D' and not transitional) or
(status == '3' and not std3_rules and replacement is None)
):
output += char
elif replacement is not None and (status == 'M' or
(status == '3' and not std3_rules) or
(status == 'D' and transitional)):
elif replacement is not None and (
status == 'M' or
(status == '3' and not std3_rules) or
(status == 'D' and transitional)
):
output += replacement
elif status != 'I':
raise IndexError()
except IndexError:
raise InvalidCodepoint(
'Codepoint {} not allowed at position {} in {}'.format(
_unot(code_point), pos + 1, repr(domain)))
_unot(code_point), pos + 1, repr(domain),
),
)
return unicodedata.normalize('NFC', output)
def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes:
def encode(s: str | bytes | bytearray, strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes:
if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii')
if uts46:
@ -367,7 +389,7 @@ def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool =
return s
def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str:
def decode(s: str | bytes | bytearray, strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str:
try:
if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii')

View file

@ -1,4 +1,5 @@
# This file is automatically generated by tools/idna-data
from __future__ import annotations
__version__ = '14.0.0'
scripts = {

View file

@ -4,11 +4,14 @@ of consecutive integers, compute a representation of the form
((start1, end1), (start2, end2) ...). Then answer the question "was x present
in the original list?" in time O(log(# runs)).
"""
from __future__ import annotations
import bisect
from typing import List, Tuple
from typing import List
from typing import Tuple
def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
def intranges_from_list(list_: list[int]) -> tuple[int, ...]:
"""Represent a list of integers as a sequence of ranges:
((start_0, end_0), (start_1, end_1), ...), such that the original
integers are exactly those x such that start_i <= x < end_i for some i.
@ -20,30 +23,32 @@ def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
ranges = []
last_write = -1
for i in range(len(sorted_list)):
if i+1 < len(sorted_list):
if sorted_list[i] == sorted_list[i+1]-1:
if i + 1 < len(sorted_list):
if sorted_list[i] == sorted_list[i + 1] - 1:
continue
current_range = sorted_list[last_write+1:i+1]
current_range = sorted_list[last_write + 1:i + 1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i
return tuple(ranges)
def _encode_range(start: int, end: int) -> int:
return (start << 32) | end
def _decode_range(r: int) -> Tuple[int, int]:
def _decode_range(r: int) -> tuple[int, int]:
return (r >> 32), (r & ((1 << 32) - 1))
def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
def intranges_contain(int_: int, ranges: tuple[int, ...]) -> bool:
"""Determine if `int_` falls into one of the ranges in `ranges`."""
tuple_ = _encode_range(int_, 0)
pos = bisect.bisect_left(ranges, tuple_)
# we could be immediately ahead of a tuple (start, end)
# with start < int_ <= end
if pos > 0:
left, right = _decode_range(ranges[pos-1])
left, right = _decode_range(ranges[pos - 1])
if left <= int_ < right:
return True
# or we could be immediately behind a tuple (int_, end)

View file

@ -1,2 +1,2 @@
from __future__ import annotations
__version__ = '3.3'

File diff suppressed because it is too large Load diff