m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 lines
7.5 KiB

6 months ago
  1. from __future__ import absolute_import
  2. import io
  3. import time
  4. from kafka.codec import (has_gzip, has_snappy, has_lz4, has_zstd,
  5. gzip_decode, snappy_decode, zstd_decode,
  6. lz4_decode, lz4_decode_old_kafka)
  7. from kafka.protocol.frame import KafkaBytes
  8. from kafka.protocol.struct import Struct
  9. from kafka.protocol.types import (
  10. Int8, Int32, Int64, Bytes, Schema, AbstractType
  11. )
  12. from kafka.util import crc32, WeakMethod
  13. class Message(Struct):
  14. SCHEMAS = [
  15. Schema(
  16. ('crc', Int32),
  17. ('magic', Int8),
  18. ('attributes', Int8),
  19. ('key', Bytes),
  20. ('value', Bytes)),
  21. Schema(
  22. ('crc', Int32),
  23. ('magic', Int8),
  24. ('attributes', Int8),
  25. ('timestamp', Int64),
  26. ('key', Bytes),
  27. ('value', Bytes)),
  28. ]
  29. SCHEMA = SCHEMAS[1]
  30. CODEC_MASK = 0x07
  31. CODEC_GZIP = 0x01
  32. CODEC_SNAPPY = 0x02
  33. CODEC_LZ4 = 0x03
  34. CODEC_ZSTD = 0x04
  35. TIMESTAMP_TYPE_MASK = 0x08
  36. HEADER_SIZE = 22 # crc(4), magic(1), attributes(1), timestamp(8), key+value size(4*2)
  37. def __init__(self, value, key=None, magic=0, attributes=0, crc=0,
  38. timestamp=None):
  39. assert value is None or isinstance(value, bytes), 'value must be bytes'
  40. assert key is None or isinstance(key, bytes), 'key must be bytes'
  41. assert magic > 0 or timestamp is None, 'timestamp not supported in v0'
  42. # Default timestamp to now for v1 messages
  43. if magic > 0 and timestamp is None:
  44. timestamp = int(time.time() * 1000)
  45. self.timestamp = timestamp
  46. self.crc = crc
  47. self._validated_crc = None
  48. self.magic = magic
  49. self.attributes = attributes
  50. self.key = key
  51. self.value = value
  52. self.encode = WeakMethod(self._encode_self)
  53. @property
  54. def timestamp_type(self):
  55. """0 for CreateTime; 1 for LogAppendTime; None if unsupported.
  56. Value is determined by broker; produced messages should always set to 0
  57. Requires Kafka >= 0.10 / message version >= 1
  58. """
  59. if self.magic == 0:
  60. return None
  61. elif self.attributes & self.TIMESTAMP_TYPE_MASK:
  62. return 1
  63. else:
  64. return 0
  65. def _encode_self(self, recalc_crc=True):
  66. version = self.magic
  67. if version == 1:
  68. fields = (self.crc, self.magic, self.attributes, self.timestamp, self.key, self.value)
  69. elif version == 0:
  70. fields = (self.crc, self.magic, self.attributes, self.key, self.value)
  71. else:
  72. raise ValueError('Unrecognized message version: %s' % (version,))
  73. message = Message.SCHEMAS[version].encode(fields)
  74. if not recalc_crc:
  75. return message
  76. self.crc = crc32(message[4:])
  77. crc_field = self.SCHEMAS[version].fields[0]
  78. return crc_field.encode(self.crc) + message[4:]
  79. @classmethod
  80. def decode(cls, data):
  81. _validated_crc = None
  82. if isinstance(data, bytes):
  83. _validated_crc = crc32(data[4:])
  84. data = io.BytesIO(data)
  85. # Partial decode required to determine message version
  86. base_fields = cls.SCHEMAS[0].fields[0:3]
  87. crc, magic, attributes = [field.decode(data) for field in base_fields]
  88. remaining = cls.SCHEMAS[magic].fields[3:]
  89. fields = [field.decode(data) for field in remaining]
  90. if magic == 1:
  91. timestamp = fields[0]
  92. else:
  93. timestamp = None
  94. msg = cls(fields[-1], key=fields[-2],
  95. magic=magic, attributes=attributes, crc=crc,
  96. timestamp=timestamp)
  97. msg._validated_crc = _validated_crc
  98. return msg
  99. def validate_crc(self):
  100. if self._validated_crc is None:
  101. raw_msg = self._encode_self(recalc_crc=False)
  102. self._validated_crc = crc32(raw_msg[4:])
  103. if self.crc == self._validated_crc:
  104. return True
  105. return False
  106. def is_compressed(self):
  107. return self.attributes & self.CODEC_MASK != 0
  108. def decompress(self):
  109. codec = self.attributes & self.CODEC_MASK
  110. assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4, self.CODEC_ZSTD)
  111. if codec == self.CODEC_GZIP:
  112. assert has_gzip(), 'Gzip decompression unsupported'
  113. raw_bytes = gzip_decode(self.value)
  114. elif codec == self.CODEC_SNAPPY:
  115. assert has_snappy(), 'Snappy decompression unsupported'
  116. raw_bytes = snappy_decode(self.value)
  117. elif codec == self.CODEC_LZ4:
  118. assert has_lz4(), 'LZ4 decompression unsupported'
  119. if self.magic == 0:
  120. raw_bytes = lz4_decode_old_kafka(self.value)
  121. else:
  122. raw_bytes = lz4_decode(self.value)
  123. elif codec == self.CODEC_ZSTD:
  124. assert has_zstd(), "ZSTD decompression unsupported"
  125. raw_bytes = zstd_decode(self.value)
  126. else:
  127. raise Exception('This should be impossible')
  128. return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
  129. def __hash__(self):
  130. return hash(self._encode_self(recalc_crc=False))
  131. class PartialMessage(bytes):
  132. def __repr__(self):
  133. return 'PartialMessage(%s)' % (self,)
  134. class MessageSet(AbstractType):
  135. ITEM = Schema(
  136. ('offset', Int64),
  137. ('message', Bytes)
  138. )
  139. HEADER_SIZE = 12 # offset + message_size
  140. @classmethod
  141. def encode(cls, items, prepend_size=True):
  142. # RecordAccumulator encodes messagesets internally
  143. if isinstance(items, (io.BytesIO, KafkaBytes)):
  144. size = Int32.decode(items)
  145. if prepend_size:
  146. # rewind and return all the bytes
  147. items.seek(items.tell() - 4)
  148. size += 4
  149. return items.read(size)
  150. encoded_values = []
  151. for (offset, message) in items:
  152. encoded_values.append(Int64.encode(offset))
  153. encoded_values.append(Bytes.encode(message))
  154. encoded = b''.join(encoded_values)
  155. if prepend_size:
  156. return Bytes.encode(encoded)
  157. else:
  158. return encoded
  159. @classmethod
  160. def decode(cls, data, bytes_to_read=None):
  161. """Compressed messages should pass in bytes_to_read (via message size)
  162. otherwise, we decode from data as Int32
  163. """
  164. if isinstance(data, bytes):
  165. data = io.BytesIO(data)
  166. if bytes_to_read is None:
  167. bytes_to_read = Int32.decode(data)
  168. # if FetchRequest max_bytes is smaller than the available message set
  169. # the server returns partial data for the final message
  170. # So create an internal buffer to avoid over-reading
  171. raw = io.BytesIO(data.read(bytes_to_read))
  172. items = []
  173. while bytes_to_read:
  174. try:
  175. offset = Int64.decode(raw)
  176. msg_bytes = Bytes.decode(raw)
  177. bytes_to_read -= 8 + 4 + len(msg_bytes)
  178. items.append((offset, len(msg_bytes), Message.decode(msg_bytes)))
  179. except ValueError:
  180. # PartialMessage to signal that max_bytes may be too small
  181. items.append((None, None, PartialMessage()))
  182. break
  183. return items
  184. @classmethod
  185. def repr(cls, messages):
  186. if isinstance(messages, (KafkaBytes, io.BytesIO)):
  187. offset = messages.tell()
  188. decoded = cls.decode(messages)
  189. messages.seek(offset)
  190. messages = decoded
  191. return str([cls.ITEM.repr(m) for m in messages])