m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

397 lines
14 KiB

6 months ago
  1. from __future__ import absolute_import
  2. import collections
  3. import copy
  4. import logging
  5. import threading
  6. import time
  7. from kafka.vendor import six
  8. from kafka import errors as Errors
  9. from kafka.conn import collect_hosts
  10. from kafka.future import Future
  11. from kafka.structs import BrokerMetadata, PartitionMetadata, TopicPartition
  12. log = logging.getLogger(__name__)
  13. class ClusterMetadata(object):
  14. """
  15. A class to manage kafka cluster metadata.
  16. This class does not perform any IO. It simply updates internal state
  17. given API responses (MetadataResponse, GroupCoordinatorResponse).
  18. Keyword Arguments:
  19. retry_backoff_ms (int): Milliseconds to backoff when retrying on
  20. errors. Default: 100.
  21. metadata_max_age_ms (int): The period of time in milliseconds after
  22. which we force a refresh of metadata even if we haven't seen any
  23. partition leadership changes to proactively discover any new
  24. brokers or partitions. Default: 300000
  25. bootstrap_servers: 'host[:port]' string (or list of 'host[:port]'
  26. strings) that the client should contact to bootstrap initial
  27. cluster metadata. This does not have to be the full node list.
  28. It just needs to have at least one broker that will respond to a
  29. Metadata API Request. Default port is 9092. If no servers are
  30. specified, will default to localhost:9092.
  31. """
  32. DEFAULT_CONFIG = {
  33. 'retry_backoff_ms': 100,
  34. 'metadata_max_age_ms': 300000,
  35. 'bootstrap_servers': [],
  36. }
  37. def __init__(self, **configs):
  38. self._brokers = {} # node_id -> BrokerMetadata
  39. self._partitions = {} # topic -> partition -> PartitionMetadata
  40. self._broker_partitions = collections.defaultdict(set) # node_id -> {TopicPartition...}
  41. self._groups = {} # group_name -> node_id
  42. self._last_refresh_ms = 0
  43. self._last_successful_refresh_ms = 0
  44. self._need_update = True
  45. self._future = None
  46. self._listeners = set()
  47. self._lock = threading.Lock()
  48. self.need_all_topic_metadata = False
  49. self.unauthorized_topics = set()
  50. self.internal_topics = set()
  51. self.controller = None
  52. self.config = copy.copy(self.DEFAULT_CONFIG)
  53. for key in self.config:
  54. if key in configs:
  55. self.config[key] = configs[key]
  56. self._bootstrap_brokers = self._generate_bootstrap_brokers()
  57. self._coordinator_brokers = {}
  58. def _generate_bootstrap_brokers(self):
  59. # collect_hosts does not perform DNS, so we should be fine to re-use
  60. bootstrap_hosts = collect_hosts(self.config['bootstrap_servers'])
  61. brokers = {}
  62. for i, (host, port, _) in enumerate(bootstrap_hosts):
  63. node_id = 'bootstrap-%s' % i
  64. brokers[node_id] = BrokerMetadata(node_id, host, port, None)
  65. return brokers
  66. def is_bootstrap(self, node_id):
  67. return node_id in self._bootstrap_brokers
  68. def brokers(self):
  69. """Get all BrokerMetadata
  70. Returns:
  71. set: {BrokerMetadata, ...}
  72. """
  73. return set(self._brokers.values()) or set(self._bootstrap_brokers.values())
  74. def broker_metadata(self, broker_id):
  75. """Get BrokerMetadata
  76. Arguments:
  77. broker_id (int): node_id for a broker to check
  78. Returns:
  79. BrokerMetadata or None if not found
  80. """
  81. return (
  82. self._brokers.get(broker_id) or
  83. self._bootstrap_brokers.get(broker_id) or
  84. self._coordinator_brokers.get(broker_id)
  85. )
  86. def partitions_for_topic(self, topic):
  87. """Return set of all partitions for topic (whether available or not)
  88. Arguments:
  89. topic (str): topic to check for partitions
  90. Returns:
  91. set: {partition (int), ...}
  92. """
  93. if topic not in self._partitions:
  94. return None
  95. return set(self._partitions[topic].keys())
  96. def available_partitions_for_topic(self, topic):
  97. """Return set of partitions with known leaders
  98. Arguments:
  99. topic (str): topic to check for partitions
  100. Returns:
  101. set: {partition (int), ...}
  102. None if topic not found.
  103. """
  104. if topic not in self._partitions:
  105. return None
  106. return set([partition for partition, metadata
  107. in six.iteritems(self._partitions[topic])
  108. if metadata.leader != -1])
  109. def leader_for_partition(self, partition):
  110. """Return node_id of leader, -1 unavailable, None if unknown."""
  111. if partition.topic not in self._partitions:
  112. return None
  113. elif partition.partition not in self._partitions[partition.topic]:
  114. return None
  115. return self._partitions[partition.topic][partition.partition].leader
  116. def partitions_for_broker(self, broker_id):
  117. """Return TopicPartitions for which the broker is a leader.
  118. Arguments:
  119. broker_id (int): node id for a broker
  120. Returns:
  121. set: {TopicPartition, ...}
  122. None if the broker either has no partitions or does not exist.
  123. """
  124. return self._broker_partitions.get(broker_id)
  125. def coordinator_for_group(self, group):
  126. """Return node_id of group coordinator.
  127. Arguments:
  128. group (str): name of consumer group
  129. Returns:
  130. int: node_id for group coordinator
  131. None if the group does not exist.
  132. """
  133. return self._groups.get(group)
  134. def ttl(self):
  135. """Milliseconds until metadata should be refreshed"""
  136. now = time.time() * 1000
  137. if self._need_update:
  138. ttl = 0
  139. else:
  140. metadata_age = now - self._last_successful_refresh_ms
  141. ttl = self.config['metadata_max_age_ms'] - metadata_age
  142. retry_age = now - self._last_refresh_ms
  143. next_retry = self.config['retry_backoff_ms'] - retry_age
  144. return max(ttl, next_retry, 0)
  145. def refresh_backoff(self):
  146. """Return milliseconds to wait before attempting to retry after failure"""
  147. return self.config['retry_backoff_ms']
  148. def request_update(self):
  149. """Flags metadata for update, return Future()
  150. Actual update must be handled separately. This method will only
  151. change the reported ttl()
  152. Returns:
  153. kafka.future.Future (value will be the cluster object after update)
  154. """
  155. with self._lock:
  156. self._need_update = True
  157. if not self._future or self._future.is_done:
  158. self._future = Future()
  159. return self._future
  160. def topics(self, exclude_internal_topics=True):
  161. """Get set of known topics.
  162. Arguments:
  163. exclude_internal_topics (bool): Whether records from internal topics
  164. (such as offsets) should be exposed to the consumer. If set to
  165. True the only way to receive records from an internal topic is
  166. subscribing to it. Default True
  167. Returns:
  168. set: {topic (str), ...}
  169. """
  170. topics = set(self._partitions.keys())
  171. if exclude_internal_topics:
  172. return topics - self.internal_topics
  173. else:
  174. return topics
  175. def failed_update(self, exception):
  176. """Update cluster state given a failed MetadataRequest."""
  177. f = None
  178. with self._lock:
  179. if self._future:
  180. f = self._future
  181. self._future = None
  182. if f:
  183. f.failure(exception)
  184. self._last_refresh_ms = time.time() * 1000
  185. def update_metadata(self, metadata):
  186. """Update cluster state given a MetadataResponse.
  187. Arguments:
  188. metadata (MetadataResponse): broker response to a metadata request
  189. Returns: None
  190. """
  191. # In the common case where we ask for a single topic and get back an
  192. # error, we should fail the future
  193. if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
  194. error_code, topic = metadata.topics[0][:2]
  195. error = Errors.for_code(error_code)(topic)
  196. return self.failed_update(error)
  197. if not metadata.brokers:
  198. log.warning("No broker metadata found in MetadataResponse -- ignoring.")
  199. return self.failed_update(Errors.MetadataEmptyBrokerList(metadata))
  200. _new_brokers = {}
  201. for broker in metadata.brokers:
  202. if metadata.API_VERSION == 0:
  203. node_id, host, port = broker
  204. rack = None
  205. else:
  206. node_id, host, port, rack = broker
  207. _new_brokers.update({
  208. node_id: BrokerMetadata(node_id, host, port, rack)
  209. })
  210. if metadata.API_VERSION == 0:
  211. _new_controller = None
  212. else:
  213. _new_controller = _new_brokers.get(metadata.controller_id)
  214. _new_partitions = {}
  215. _new_broker_partitions = collections.defaultdict(set)
  216. _new_unauthorized_topics = set()
  217. _new_internal_topics = set()
  218. for topic_data in metadata.topics:
  219. if metadata.API_VERSION == 0:
  220. error_code, topic, partitions = topic_data
  221. is_internal = False
  222. else:
  223. error_code, topic, is_internal, partitions = topic_data
  224. if is_internal:
  225. _new_internal_topics.add(topic)
  226. error_type = Errors.for_code(error_code)
  227. if error_type is Errors.NoError:
  228. _new_partitions[topic] = {}
  229. for p_error, partition, leader, replicas, isr in partitions:
  230. _new_partitions[topic][partition] = PartitionMetadata(
  231. topic=topic, partition=partition, leader=leader,
  232. replicas=replicas, isr=isr, error=p_error)
  233. if leader != -1:
  234. _new_broker_partitions[leader].add(
  235. TopicPartition(topic, partition))
  236. # Specific topic errors can be ignored if this is a full metadata fetch
  237. elif self.need_all_topic_metadata:
  238. continue
  239. elif error_type is Errors.LeaderNotAvailableError:
  240. log.warning("Topic %s is not available during auto-create"
  241. " initialization", topic)
  242. elif error_type is Errors.UnknownTopicOrPartitionError:
  243. log.error("Topic %s not found in cluster metadata", topic)
  244. elif error_type is Errors.TopicAuthorizationFailedError:
  245. log.error("Topic %s is not authorized for this client", topic)
  246. _new_unauthorized_topics.add(topic)
  247. elif error_type is Errors.InvalidTopicError:
  248. log.error("'%s' is not a valid topic name", topic)
  249. else:
  250. log.error("Error fetching metadata for topic %s: %s",
  251. topic, error_type)
  252. with self._lock:
  253. self._brokers = _new_brokers
  254. self.controller = _new_controller
  255. self._partitions = _new_partitions
  256. self._broker_partitions = _new_broker_partitions
  257. self.unauthorized_topics = _new_unauthorized_topics
  258. self.internal_topics = _new_internal_topics
  259. f = None
  260. if self._future:
  261. f = self._future
  262. self._future = None
  263. self._need_update = False
  264. now = time.time() * 1000
  265. self._last_refresh_ms = now
  266. self._last_successful_refresh_ms = now
  267. if f:
  268. f.success(self)
  269. log.debug("Updated cluster metadata to %s", self)
  270. for listener in self._listeners:
  271. listener(self)
  272. if self.need_all_topic_metadata:
  273. # the listener may change the interested topics,
  274. # which could cause another metadata refresh.
  275. # If we have already fetched all topics, however,
  276. # another fetch should be unnecessary.
  277. self._need_update = False
  278. def add_listener(self, listener):
  279. """Add a callback function to be called on each metadata update"""
  280. self._listeners.add(listener)
  281. def remove_listener(self, listener):
  282. """Remove a previously added listener callback"""
  283. self._listeners.remove(listener)
  284. def add_group_coordinator(self, group, response):
  285. """Update with metadata for a group coordinator
  286. Arguments:
  287. group (str): name of group from GroupCoordinatorRequest
  288. response (GroupCoordinatorResponse): broker response
  289. Returns:
  290. string: coordinator node_id if metadata is updated, None on error
  291. """
  292. log.debug("Updating coordinator for %s: %s", group, response)
  293. error_type = Errors.for_code(response.error_code)
  294. if error_type is not Errors.NoError:
  295. log.error("GroupCoordinatorResponse error: %s", error_type)
  296. self._groups[group] = -1
  297. return
  298. # Use a coordinator-specific node id so that group requests
  299. # get a dedicated connection
  300. node_id = 'coordinator-{}'.format(response.coordinator_id)
  301. coordinator = BrokerMetadata(
  302. node_id,
  303. response.host,
  304. response.port,
  305. None)
  306. log.info("Group coordinator for %s is %s", group, coordinator)
  307. self._coordinator_brokers[node_id] = coordinator
  308. self._groups[group] = node_id
  309. return node_id
  310. def with_partitions(self, partitions_to_add):
  311. """Returns a copy of cluster metadata with partitions added"""
  312. new_metadata = ClusterMetadata(**self.config)
  313. new_metadata._brokers = copy.deepcopy(self._brokers)
  314. new_metadata._partitions = copy.deepcopy(self._partitions)
  315. new_metadata._broker_partitions = copy.deepcopy(self._broker_partitions)
  316. new_metadata._groups = copy.deepcopy(self._groups)
  317. new_metadata.internal_topics = copy.deepcopy(self.internal_topics)
  318. new_metadata.unauthorized_topics = copy.deepcopy(self.unauthorized_topics)
  319. for partition in partitions_to_add:
  320. new_metadata._partitions[partition.topic][partition.partition] = partition
  321. if partition.leader is not None and partition.leader != -1:
  322. new_metadata._broker_partitions[partition.leader].add(
  323. TopicPartition(partition.topic, partition.partition))
  324. return new_metadata
  325. def __str__(self):
  326. return 'ClusterMetadata(brokers: %d, topics: %d, groups: %d)' % \
  327. (len(self._brokers), len(self._partitions), len(self._groups))