m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

171 lines
5.1 KiB

6 months ago
  1. import sys
  2. import logging
  3. import ply.lex
  4. from jsonpath_ng.exceptions import JsonPathLexerError
  5. logger = logging.getLogger(__name__)
  6. class JsonPathLexer:
  7. '''
  8. A Lexical analyzer for JsonPath.
  9. '''
  10. def __init__(self, debug=False):
  11. self.debug = debug
  12. if self.__doc__ is None:
  13. raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
  14. def tokenize(self, string):
  15. '''
  16. Maps a string to an iterator over tokens. In other words: [char] -> [token]
  17. '''
  18. new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
  19. new_lexer.latest_newline = 0
  20. new_lexer.string_value = None
  21. new_lexer.input(string)
  22. while True:
  23. t = new_lexer.token()
  24. if t is None:
  25. break
  26. t.col = t.lexpos - new_lexer.latest_newline
  27. yield t
  28. if new_lexer.string_value is not None:
  29. raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
  30. # ============== PLY Lexer specification ==================
  31. #
  32. # This probably should be private but:
  33. # - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
  34. # - things like `literals` might be a legitimate part of the public interface.
  35. #
  36. # Anyhow, it is pythonic to give some rope to hang oneself with :-)
  37. literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&', '~']
  38. reserved_words = { 'where': 'WHERE' }
  39. tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())
  40. states = [ ('singlequote', 'exclusive'),
  41. ('doublequote', 'exclusive'),
  42. ('backquote', 'exclusive') ]
  43. # Normal lexing, rather easy
  44. t_DOUBLEDOT = r'\.\.'
  45. t_ignore = ' \t'
  46. def t_ID(self, t):
  47. r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
  48. t.type = self.reserved_words.get(t.value, 'ID')
  49. return t
  50. def t_NUMBER(self, t):
  51. r'-?\d+'
  52. t.value = int(t.value)
  53. return t
  54. # Single-quoted strings
  55. t_singlequote_ignore = ''
  56. def t_singlequote(self, t):
  57. r"'"
  58. t.lexer.string_start = t.lexer.lexpos
  59. t.lexer.string_value = ''
  60. t.lexer.push_state('singlequote')
  61. def t_singlequote_content(self, t):
  62. r"[^'\\]+"
  63. t.lexer.string_value += t.value
  64. def t_singlequote_escape(self, t):
  65. r'\\.'
  66. t.lexer.string_value += t.value[1]
  67. def t_singlequote_end(self, t):
  68. r"'"
  69. t.value = t.lexer.string_value
  70. t.type = 'ID'
  71. t.lexer.string_value = None
  72. t.lexer.pop_state()
  73. return t
  74. def t_singlequote_error(self, t):
  75. raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
  76. # Double-quoted strings
  77. t_doublequote_ignore = ''
  78. def t_doublequote(self, t):
  79. r'"'
  80. t.lexer.string_start = t.lexer.lexpos
  81. t.lexer.string_value = ''
  82. t.lexer.push_state('doublequote')
  83. def t_doublequote_content(self, t):
  84. r'[^"\\]+'
  85. t.lexer.string_value += t.value
  86. def t_doublequote_escape(self, t):
  87. r'\\.'
  88. t.lexer.string_value += t.value[1]
  89. def t_doublequote_end(self, t):
  90. r'"'
  91. t.value = t.lexer.string_value
  92. t.type = 'ID'
  93. t.lexer.string_value = None
  94. t.lexer.pop_state()
  95. return t
  96. def t_doublequote_error(self, t):
  97. raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
  98. # Back-quoted "magic" operators
  99. t_backquote_ignore = ''
  100. def t_backquote(self, t):
  101. r'`'
  102. t.lexer.string_start = t.lexer.lexpos
  103. t.lexer.string_value = ''
  104. t.lexer.push_state('backquote')
  105. def t_backquote_escape(self, t):
  106. r'\\.'
  107. t.lexer.string_value += t.value[1]
  108. def t_backquote_content(self, t):
  109. r"[^`\\]+"
  110. t.lexer.string_value += t.value
  111. def t_backquote_end(self, t):
  112. r'`'
  113. t.value = t.lexer.string_value
  114. t.type = 'NAMED_OPERATOR'
  115. t.lexer.string_value = None
  116. t.lexer.pop_state()
  117. return t
  118. def t_backquote_error(self, t):
  119. raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
  120. # Counting lines, handling errors
  121. def t_newline(self, t):
  122. r'\n'
  123. t.lexer.lineno += 1
  124. t.lexer.latest_newline = t.lexpos
  125. def t_error(self, t):
  126. raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
  127. if __name__ == '__main__':
  128. logging.basicConfig()
  129. lexer = JsonPathLexer(debug=True)
  130. for token in lexer.tokenize(sys.stdin.read()):
  131. print('%-20s%s' % (token.value, token.type))