图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
5.2 KiB

  1. import contextlib
  2. import re
  3. from dataclasses import dataclass
  4. from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union
  5. from .specifiers import Specifier
  6. @dataclass
  7. class Token:
  8. name: str
  9. text: str
  10. position: int
  11. class ParserSyntaxError(Exception):
  12. """The provided source text could not be parsed correctly."""
  13. def __init__(
  14. self,
  15. message: str,
  16. *,
  17. source: str,
  18. span: Tuple[int, int],
  19. ) -> None:
  20. self.span = span
  21. self.message = message
  22. self.source = source
  23. super().__init__()
  24. def __str__(self) -> str:
  25. marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
  26. return "\n ".join([self.message, self.source, marker])
  27. DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
  28. "LEFT_PARENTHESIS": r"\(",
  29. "RIGHT_PARENTHESIS": r"\)",
  30. "LEFT_BRACKET": r"\[",
  31. "RIGHT_BRACKET": r"\]",
  32. "SEMICOLON": r";",
  33. "COMMA": r",",
  34. "QUOTED_STRING": re.compile(
  35. r"""
  36. (
  37. ('[^']*')
  38. |
  39. ("[^"]*")
  40. )
  41. """,
  42. re.VERBOSE,
  43. ),
  44. "OP": r"(===|==|~=|!=|<=|>=|<|>)",
  45. "BOOLOP": r"\b(or|and)\b",
  46. "IN": r"\bin\b",
  47. "NOT": r"\bnot\b",
  48. "VARIABLE": re.compile(
  49. r"""
  50. \b(
  51. python_version
  52. |python_full_version
  53. |os[._]name
  54. |sys[._]platform
  55. |platform_(release|system)
  56. |platform[._](version|machine|python_implementation)
  57. |python_implementation
  58. |implementation_(name|version)
  59. |extra
  60. )\b
  61. """,
  62. re.VERBOSE,
  63. ),
  64. "SPECIFIER": re.compile(
  65. Specifier._operator_regex_str + Specifier._version_regex_str,
  66. re.VERBOSE | re.IGNORECASE,
  67. ),
  68. "AT": r"\@",
  69. "URL": r"[^ \t]+",
  70. "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
  71. "VERSION_PREFIX_TRAIL": r"\.\*",
  72. "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
  73. "WS": r"[ \t]+",
  74. "END": r"$",
  75. }
  76. class Tokenizer:
  77. """Context-sensitive token parsing.
  78. Provides methods to examine the input stream to check whether the next token
  79. matches.
  80. """
  81. def __init__(
  82. self,
  83. source: str,
  84. *,
  85. rules: "Dict[str, Union[str, re.Pattern[str]]]",
  86. ) -> None:
  87. self.source = source
  88. self.rules: Dict[str, re.Pattern[str]] = {
  89. name: re.compile(pattern) for name, pattern in rules.items()
  90. }
  91. self.next_token: Optional[Token] = None
  92. self.position = 0
  93. def consume(self, name: str) -> None:
  94. """Move beyond provided token name, if at current position."""
  95. if self.check(name):
  96. self.read()
  97. def check(self, name: str, *, peek: bool = False) -> bool:
  98. """Check whether the next token has the provided name.
  99. By default, if the check succeeds, the token *must* be read before
  100. another check. If `peek` is set to `True`, the token is not loaded and
  101. would need to be checked again.
  102. """
  103. assert (
  104. self.next_token is None
  105. ), f"Cannot check for {name!r}, already have {self.next_token!r}"
  106. assert name in self.rules, f"Unknown token name: {name!r}"
  107. expression = self.rules[name]
  108. match = expression.match(self.source, self.position)
  109. if match is None:
  110. return False
  111. if not peek:
  112. self.next_token = Token(name, match[0], self.position)
  113. return True
  114. def expect(self, name: str, *, expected: str) -> Token:
  115. """Expect a certain token name next, failing with a syntax error otherwise.
  116. The token is *not* read.
  117. """
  118. if not self.check(name):
  119. raise self.raise_syntax_error(f"Expected {expected}")
  120. return self.read()
  121. def read(self) -> Token:
  122. """Consume the next token and return it."""
  123. token = self.next_token
  124. assert token is not None
  125. self.position += len(token.text)
  126. self.next_token = None
  127. return token
  128. def raise_syntax_error(
  129. self,
  130. message: str,
  131. *,
  132. span_start: Optional[int] = None,
  133. span_end: Optional[int] = None,
  134. ) -> NoReturn:
  135. """Raise ParserSyntaxError at the given position."""
  136. span = (
  137. self.position if span_start is None else span_start,
  138. self.position if span_end is None else span_end,
  139. )
  140. raise ParserSyntaxError(
  141. message,
  142. source=self.source,
  143. span=span,
  144. )
  145. @contextlib.contextmanager
  146. def enclosing_tokens(
  147. self, open_token: str, close_token: str, *, around: str
  148. ) -> Iterator[None]:
  149. if self.check(open_token):
  150. open_position = self.position
  151. self.read()
  152. else:
  153. open_position = None
  154. yield
  155. if open_position is None:
  156. return
  157. if not self.check(close_token):
  158. self.raise_syntax_error(
  159. f"Expected matching {close_token} for {open_token}, after {around}",
  160. span_start=open_position,
  161. )
  162. self.read()