m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

833 lines
38 KiB

6 months ago
  1. from __future__ import absolute_import, division
  2. import collections
  3. import copy
  4. import functools
  5. import logging
  6. import time
  7. from kafka.vendor import six
  8. from kafka.coordinator.base import BaseCoordinator, Generation
  9. from kafka.coordinator.assignors.range import RangePartitionAssignor
  10. from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
  11. from kafka.coordinator.assignors.sticky.sticky_assignor import StickyPartitionAssignor
  12. from kafka.coordinator.protocol import ConsumerProtocol
  13. import kafka.errors as Errors
  14. from kafka.future import Future
  15. from kafka.metrics import AnonMeasurable
  16. from kafka.metrics.stats import Avg, Count, Max, Rate
  17. from kafka.protocol.commit import OffsetCommitRequest, OffsetFetchRequest
  18. from kafka.structs import OffsetAndMetadata, TopicPartition
  19. from kafka.util import WeakMethod
  20. log = logging.getLogger(__name__)
  21. class ConsumerCoordinator(BaseCoordinator):
  22. """This class manages the coordination process with the consumer coordinator."""
  23. DEFAULT_CONFIG = {
  24. 'group_id': 'kafka-python-default-group',
  25. 'enable_auto_commit': True,
  26. 'auto_commit_interval_ms': 5000,
  27. 'default_offset_commit_callback': None,
  28. 'assignors': (RangePartitionAssignor, RoundRobinPartitionAssignor, StickyPartitionAssignor),
  29. 'session_timeout_ms': 10000,
  30. 'heartbeat_interval_ms': 3000,
  31. 'max_poll_interval_ms': 300000,
  32. 'retry_backoff_ms': 100,
  33. 'api_version': (0, 10, 1),
  34. 'exclude_internal_topics': True,
  35. 'metric_group_prefix': 'consumer'
  36. }
  37. def __init__(self, client, subscription, metrics, **configs):
  38. """Initialize the coordination manager.
  39. Keyword Arguments:
  40. group_id (str): name of the consumer group to join for dynamic
  41. partition assignment (if enabled), and to use for fetching and
  42. committing offsets. Default: 'kafka-python-default-group'
  43. enable_auto_commit (bool): If true the consumer's offset will be
  44. periodically committed in the background. Default: True.
  45. auto_commit_interval_ms (int): milliseconds between automatic
  46. offset commits, if enable_auto_commit is True. Default: 5000.
  47. default_offset_commit_callback (callable): called as
  48. callback(offsets, exception) response will be either an Exception
  49. or None. This callback can be used to trigger custom actions when
  50. a commit request completes.
  51. assignors (list): List of objects to use to distribute partition
  52. ownership amongst consumer instances when group management is
  53. used. Default: [RangePartitionAssignor, RoundRobinPartitionAssignor]
  54. heartbeat_interval_ms (int): The expected time in milliseconds
  55. between heartbeats to the consumer coordinator when using
  56. Kafka's group management feature. Heartbeats are used to ensure
  57. that the consumer's session stays active and to facilitate
  58. rebalancing when new consumers join or leave the group. The
  59. value must be set lower than session_timeout_ms, but typically
  60. should be set no higher than 1/3 of that value. It can be
  61. adjusted even lower to control the expected time for normal
  62. rebalances. Default: 3000
  63. session_timeout_ms (int): The timeout used to detect failures when
  64. using Kafka's group management facilities. Default: 30000
  65. retry_backoff_ms (int): Milliseconds to backoff when retrying on
  66. errors. Default: 100.
  67. exclude_internal_topics (bool): Whether records from internal topics
  68. (such as offsets) should be exposed to the consumer. If set to
  69. True the only way to receive records from an internal topic is
  70. subscribing to it. Requires 0.10+. Default: True
  71. """
  72. super(ConsumerCoordinator, self).__init__(client, metrics, **configs)
  73. self.config = copy.copy(self.DEFAULT_CONFIG)
  74. for key in self.config:
  75. if key in configs:
  76. self.config[key] = configs[key]
  77. self._subscription = subscription
  78. self._is_leader = False
  79. self._joined_subscription = set()
  80. self._metadata_snapshot = self._build_metadata_snapshot(subscription, client.cluster)
  81. self._assignment_snapshot = None
  82. self._cluster = client.cluster
  83. self.auto_commit_interval = self.config['auto_commit_interval_ms'] / 1000
  84. self.next_auto_commit_deadline = None
  85. self.completed_offset_commits = collections.deque()
  86. if self.config['default_offset_commit_callback'] is None:
  87. self.config['default_offset_commit_callback'] = self._default_offset_commit_callback
  88. if self.config['group_id'] is not None:
  89. if self.config['api_version'] >= (0, 9):
  90. if not self.config['assignors']:
  91. raise Errors.KafkaConfigurationError('Coordinator requires assignors')
  92. if self.config['api_version'] < (0, 10, 1):
  93. if self.config['max_poll_interval_ms'] != self.config['session_timeout_ms']:
  94. raise Errors.KafkaConfigurationError("Broker version %s does not support "
  95. "different values for max_poll_interval_ms "
  96. "and session_timeout_ms")
  97. if self.config['enable_auto_commit']:
  98. if self.config['api_version'] < (0, 8, 1):
  99. log.warning('Broker version (%s) does not support offset'
  100. ' commits; disabling auto-commit.',
  101. self.config['api_version'])
  102. self.config['enable_auto_commit'] = False
  103. elif self.config['group_id'] is None:
  104. log.warning('group_id is None: disabling auto-commit.')
  105. self.config['enable_auto_commit'] = False
  106. else:
  107. self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
  108. self.consumer_sensors = ConsumerCoordinatorMetrics(
  109. metrics, self.config['metric_group_prefix'], self._subscription)
  110. self._cluster.request_update()
  111. self._cluster.add_listener(WeakMethod(self._handle_metadata_update))
  112. def __del__(self):
  113. if hasattr(self, '_cluster') and self._cluster:
  114. self._cluster.remove_listener(WeakMethod(self._handle_metadata_update))
  115. super(ConsumerCoordinator, self).__del__()
  116. def protocol_type(self):
  117. return ConsumerProtocol.PROTOCOL_TYPE
  118. def group_protocols(self):
  119. """Returns list of preferred (protocols, metadata)"""
  120. if self._subscription.subscription is None:
  121. raise Errors.IllegalStateError('Consumer has not subscribed to topics')
  122. # dpkp note: I really dislike this.
  123. # why? because we are using this strange method group_protocols,
  124. # which is seemingly innocuous, to set internal state (_joined_subscription)
  125. # that is later used to check whether metadata has changed since we joined a group
  126. # but there is no guarantee that this method, group_protocols, will get called
  127. # in the correct sequence or that it will only be called when we want it to be.
  128. # So this really should be moved elsewhere, but I don't have the energy to
  129. # work that out right now. If you read this at some later date after the mutable
  130. # state has bitten you... I'm sorry! It mimics the java client, and that's the
  131. # best I've got for now.
  132. self._joined_subscription = set(self._subscription.subscription)
  133. metadata_list = []
  134. for assignor in self.config['assignors']:
  135. metadata = assignor.metadata(self._joined_subscription)
  136. group_protocol = (assignor.name, metadata)
  137. metadata_list.append(group_protocol)
  138. return metadata_list
  139. def _handle_metadata_update(self, cluster):
  140. # if we encounter any unauthorized topics, raise an exception
  141. if cluster.unauthorized_topics:
  142. raise Errors.TopicAuthorizationFailedError(cluster.unauthorized_topics)
  143. if self._subscription.subscribed_pattern:
  144. topics = []
  145. for topic in cluster.topics(self.config['exclude_internal_topics']):
  146. if self._subscription.subscribed_pattern.match(topic):
  147. topics.append(topic)
  148. if set(topics) != self._subscription.subscription:
  149. self._subscription.change_subscription(topics)
  150. self._client.set_topics(self._subscription.group_subscription())
  151. # check if there are any changes to the metadata which should trigger
  152. # a rebalance
  153. if self._subscription.partitions_auto_assigned():
  154. metadata_snapshot = self._build_metadata_snapshot(self._subscription, cluster)
  155. if self._metadata_snapshot != metadata_snapshot:
  156. self._metadata_snapshot = metadata_snapshot
  157. # If we haven't got group coordinator support,
  158. # just assign all partitions locally
  159. if self._auto_assign_all_partitions():
  160. self._subscription.assign_from_subscribed([
  161. TopicPartition(topic, partition)
  162. for topic in self._subscription.subscription
  163. for partition in self._metadata_snapshot[topic]
  164. ])
  165. def _auto_assign_all_partitions(self):
  166. # For users that use "subscribe" without group support,
  167. # we will simply assign all partitions to this consumer
  168. if self.config['api_version'] < (0, 9):
  169. return True
  170. elif self.config['group_id'] is None:
  171. return True
  172. else:
  173. return False
  174. def _build_metadata_snapshot(self, subscription, cluster):
  175. metadata_snapshot = {}
  176. for topic in subscription.group_subscription():
  177. partitions = cluster.partitions_for_topic(topic) or []
  178. metadata_snapshot[topic] = set(partitions)
  179. return metadata_snapshot
  180. def _lookup_assignor(self, name):
  181. for assignor in self.config['assignors']:
  182. if assignor.name == name:
  183. return assignor
  184. return None
  185. def _on_join_complete(self, generation, member_id, protocol,
  186. member_assignment_bytes):
  187. # only the leader is responsible for monitoring for metadata changes
  188. # (i.e. partition changes)
  189. if not self._is_leader:
  190. self._assignment_snapshot = None
  191. assignor = self._lookup_assignor(protocol)
  192. assert assignor, 'Coordinator selected invalid assignment protocol: %s' % (protocol,)
  193. assignment = ConsumerProtocol.ASSIGNMENT.decode(member_assignment_bytes)
  194. # set the flag to refresh last committed offsets
  195. self._subscription.needs_fetch_committed_offsets = True
  196. # update partition assignment
  197. try:
  198. self._subscription.assign_from_subscribed(assignment.partitions())
  199. except ValueError as e:
  200. log.warning("%s. Probably due to a deleted topic. Requesting Re-join" % e)
  201. self.request_rejoin()
  202. # give the assignor a chance to update internal state
  203. # based on the received assignment
  204. assignor.on_assignment(assignment)
  205. if assignor.name == 'sticky':
  206. assignor.on_generation_assignment(generation)
  207. # reschedule the auto commit starting from now
  208. self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
  209. assigned = set(self._subscription.assigned_partitions())
  210. log.info("Setting newly assigned partitions %s for group %s",
  211. assigned, self.group_id)
  212. # execute the user's callback after rebalance
  213. if self._subscription.listener:
  214. try:
  215. self._subscription.listener.on_partitions_assigned(assigned)
  216. except Exception:
  217. log.exception("User provided listener %s for group %s"
  218. " failed on partition assignment: %s",
  219. self._subscription.listener, self.group_id,
  220. assigned)
  221. def poll(self):
  222. """
  223. Poll for coordinator events. Only applicable if group_id is set, and
  224. broker version supports GroupCoordinators. This ensures that the
  225. coordinator is known, and if using automatic partition assignment,
  226. ensures that the consumer has joined the group. This also handles
  227. periodic offset commits if they are enabled.
  228. """
  229. if self.group_id is None:
  230. return
  231. self._invoke_completed_offset_commit_callbacks()
  232. self.ensure_coordinator_ready()
  233. if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned():
  234. if self.need_rejoin():
  235. # due to a race condition between the initial metadata fetch and the
  236. # initial rebalance, we need to ensure that the metadata is fresh
  237. # before joining initially, and then request the metadata update. If
  238. # metadata update arrives while the rebalance is still pending (for
  239. # example, when the join group is still inflight), then we will lose
  240. # track of the fact that we need to rebalance again to reflect the
  241. # change to the topic subscription. Without ensuring that the
  242. # metadata is fresh, any metadata update that changes the topic
  243. # subscriptions and arrives while a rebalance is in progress will
  244. # essentially be ignored. See KAFKA-3949 for the complete
  245. # description of the problem.
  246. if self._subscription.subscribed_pattern:
  247. metadata_update = self._client.cluster.request_update()
  248. self._client.poll(future=metadata_update)
  249. self.ensure_active_group()
  250. self.poll_heartbeat()
  251. self._maybe_auto_commit_offsets_async()
  252. def time_to_next_poll(self):
  253. """Return seconds (float) remaining until :meth:`.poll` should be called again"""
  254. if not self.config['enable_auto_commit']:
  255. return self.time_to_next_heartbeat()
  256. if time.time() > self.next_auto_commit_deadline:
  257. return 0
  258. return min(self.next_auto_commit_deadline - time.time(),
  259. self.time_to_next_heartbeat())
  260. def _perform_assignment(self, leader_id, assignment_strategy, members):
  261. assignor = self._lookup_assignor(assignment_strategy)
  262. assert assignor, 'Invalid assignment protocol: %s' % (assignment_strategy,)
  263. member_metadata = {}
  264. all_subscribed_topics = set()
  265. for member_id, metadata_bytes in members:
  266. metadata = ConsumerProtocol.METADATA.decode(metadata_bytes)
  267. member_metadata[member_id] = metadata
  268. all_subscribed_topics.update(metadata.subscription) # pylint: disable-msg=no-member
  269. # the leader will begin watching for changes to any of the topics
  270. # the group is interested in, which ensures that all metadata changes
  271. # will eventually be seen
  272. # Because assignment typically happens within response callbacks,
  273. # we cannot block on metadata updates here (no recursion into poll())
  274. self._subscription.group_subscribe(all_subscribed_topics)
  275. self._client.set_topics(self._subscription.group_subscription())
  276. # keep track of the metadata used for assignment so that we can check
  277. # after rebalance completion whether anything has changed
  278. self._cluster.request_update()
  279. self._is_leader = True
  280. self._assignment_snapshot = self._metadata_snapshot
  281. log.debug("Performing assignment for group %s using strategy %s"
  282. " with subscriptions %s", self.group_id, assignor.name,
  283. member_metadata)
  284. assignments = assignor.assign(self._cluster, member_metadata)
  285. log.debug("Finished assignment for group %s: %s", self.group_id, assignments)
  286. group_assignment = {}
  287. for member_id, assignment in six.iteritems(assignments):
  288. group_assignment[member_id] = assignment
  289. return group_assignment
  290. def _on_join_prepare(self, generation, member_id):
  291. # commit offsets prior to rebalance if auto-commit enabled
  292. self._maybe_auto_commit_offsets_sync()
  293. # execute the user's callback before rebalance
  294. log.info("Revoking previously assigned partitions %s for group %s",
  295. self._subscription.assigned_partitions(), self.group_id)
  296. if self._subscription.listener:
  297. try:
  298. revoked = set(self._subscription.assigned_partitions())
  299. self._subscription.listener.on_partitions_revoked(revoked)
  300. except Exception:
  301. log.exception("User provided subscription listener %s"
  302. " for group %s failed on_partitions_revoked",
  303. self._subscription.listener, self.group_id)
  304. self._is_leader = False
  305. self._subscription.reset_group_subscription()
  306. def need_rejoin(self):
  307. """Check whether the group should be rejoined
  308. Returns:
  309. bool: True if consumer should rejoin group, False otherwise
  310. """
  311. if not self._subscription.partitions_auto_assigned():
  312. return False
  313. if self._auto_assign_all_partitions():
  314. return False
  315. # we need to rejoin if we performed the assignment and metadata has changed
  316. if (self._assignment_snapshot is not None
  317. and self._assignment_snapshot != self._metadata_snapshot):
  318. return True
  319. # we need to join if our subscription has changed since the last join
  320. if (self._joined_subscription is not None
  321. and self._joined_subscription != self._subscription.subscription):
  322. return True
  323. return super(ConsumerCoordinator, self).need_rejoin()
  324. def refresh_committed_offsets_if_needed(self):
  325. """Fetch committed offsets for assigned partitions."""
  326. if self._subscription.needs_fetch_committed_offsets:
  327. offsets = self.fetch_committed_offsets(self._subscription.assigned_partitions())
  328. for partition, offset in six.iteritems(offsets):
  329. # verify assignment is still active
  330. if self._subscription.is_assigned(partition):
  331. self._subscription.assignment[partition].committed = offset
  332. self._subscription.needs_fetch_committed_offsets = False
  333. def fetch_committed_offsets(self, partitions):
  334. """Fetch the current committed offsets for specified partitions
  335. Arguments:
  336. partitions (list of TopicPartition): partitions to fetch
  337. Returns:
  338. dict: {TopicPartition: OffsetAndMetadata}
  339. """
  340. if not partitions:
  341. return {}
  342. while True:
  343. self.ensure_coordinator_ready()
  344. # contact coordinator to fetch committed offsets
  345. future = self._send_offset_fetch_request(partitions)
  346. self._client.poll(future=future)
  347. if future.succeeded():
  348. return future.value
  349. if not future.retriable():
  350. raise future.exception # pylint: disable-msg=raising-bad-type
  351. time.sleep(self.config['retry_backoff_ms'] / 1000)
  352. def close(self, autocommit=True):
  353. """Close the coordinator, leave the current group,
  354. and reset local generation / member_id.
  355. Keyword Arguments:
  356. autocommit (bool): If auto-commit is configured for this consumer,
  357. this optional flag causes the consumer to attempt to commit any
  358. pending consumed offsets prior to close. Default: True
  359. """
  360. try:
  361. if autocommit:
  362. self._maybe_auto_commit_offsets_sync()
  363. finally:
  364. super(ConsumerCoordinator, self).close()
  365. def _invoke_completed_offset_commit_callbacks(self):
  366. while self.completed_offset_commits:
  367. callback, offsets, exception = self.completed_offset_commits.popleft()
  368. callback(offsets, exception)
  369. def commit_offsets_async(self, offsets, callback=None):
  370. """Commit specific offsets asynchronously.
  371. Arguments:
  372. offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit
  373. callback (callable, optional): called as callback(offsets, response)
  374. response will be either an Exception or a OffsetCommitResponse
  375. struct. This callback can be used to trigger custom actions when
  376. a commit request completes.
  377. Returns:
  378. kafka.future.Future
  379. """
  380. self._invoke_completed_offset_commit_callbacks()
  381. if not self.coordinator_unknown():
  382. future = self._do_commit_offsets_async(offsets, callback)
  383. else:
  384. # we don't know the current coordinator, so try to find it and then
  385. # send the commit or fail (we don't want recursive retries which can
  386. # cause offset commits to arrive out of order). Note that there may
  387. # be multiple offset commits chained to the same coordinator lookup
  388. # request. This is fine because the listeners will be invoked in the
  389. # same order that they were added. Note also that BaseCoordinator
  390. # prevents multiple concurrent coordinator lookup requests.
  391. future = self.lookup_coordinator()
  392. future.add_callback(lambda r: functools.partial(self._do_commit_offsets_async, offsets, callback)())
  393. if callback:
  394. future.add_errback(lambda e: self.completed_offset_commits.appendleft((callback, offsets, e)))
  395. # ensure the commit has a chance to be transmitted (without blocking on
  396. # its completion). Note that commits are treated as heartbeats by the
  397. # coordinator, so there is no need to explicitly allow heartbeats
  398. # through delayed task execution.
  399. self._client.poll(timeout_ms=0) # no wakeup if we add that feature
  400. return future
  401. def _do_commit_offsets_async(self, offsets, callback=None):
  402. assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
  403. assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
  404. assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
  405. offsets.values()))
  406. if callback is None:
  407. callback = self.config['default_offset_commit_callback']
  408. self._subscription.needs_fetch_committed_offsets = True
  409. future = self._send_offset_commit_request(offsets)
  410. future.add_both(lambda res: self.completed_offset_commits.appendleft((callback, offsets, res)))
  411. return future
  412. def commit_offsets_sync(self, offsets):
  413. """Commit specific offsets synchronously.
  414. This method will retry until the commit completes successfully or an
  415. unrecoverable error is encountered.
  416. Arguments:
  417. offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit
  418. Raises error on failure
  419. """
  420. assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
  421. assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
  422. assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
  423. offsets.values()))
  424. self._invoke_completed_offset_commit_callbacks()
  425. if not offsets:
  426. return
  427. while True:
  428. self.ensure_coordinator_ready()
  429. future = self._send_offset_commit_request(offsets)
  430. self._client.poll(future=future)
  431. if future.succeeded():
  432. return future.value
  433. if not future.retriable():
  434. raise future.exception # pylint: disable-msg=raising-bad-type
  435. time.sleep(self.config['retry_backoff_ms'] / 1000)
  436. def _maybe_auto_commit_offsets_sync(self):
  437. if self.config['enable_auto_commit']:
  438. try:
  439. self.commit_offsets_sync(self._subscription.all_consumed_offsets())
  440. # The three main group membership errors are known and should not
  441. # require a stacktrace -- just a warning
  442. except (Errors.UnknownMemberIdError,
  443. Errors.IllegalGenerationError,
  444. Errors.RebalanceInProgressError):
  445. log.warning("Offset commit failed: group membership out of date"
  446. " This is likely to cause duplicate message"
  447. " delivery.")
  448. except Exception:
  449. log.exception("Offset commit failed: This is likely to cause"
  450. " duplicate message delivery")
  451. def _send_offset_commit_request(self, offsets):
  452. """Commit offsets for the specified list of topics and partitions.
  453. This is a non-blocking call which returns a request future that can be
  454. polled in the case of a synchronous commit or ignored in the
  455. asynchronous case.
  456. Arguments:
  457. offsets (dict of {TopicPartition: OffsetAndMetadata}): what should
  458. be committed
  459. Returns:
  460. Future: indicating whether the commit was successful or not
  461. """
  462. assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
  463. assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
  464. assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
  465. offsets.values()))
  466. if not offsets:
  467. log.debug('No offsets to commit')
  468. return Future().success(None)
  469. node_id = self.coordinator()
  470. if node_id is None:
  471. return Future().failure(Errors.GroupCoordinatorNotAvailableError)
  472. # create the offset commit request
  473. offset_data = collections.defaultdict(dict)
  474. for tp, offset in six.iteritems(offsets):
  475. offset_data[tp.topic][tp.partition] = offset
  476. if self._subscription.partitions_auto_assigned():
  477. generation = self.generation()
  478. else:
  479. generation = Generation.NO_GENERATION
  480. # if the generation is None, we are not part of an active group
  481. # (and we expect to be). The only thing we can do is fail the commit
  482. # and let the user rejoin the group in poll()
  483. if self.config['api_version'] >= (0, 9) and generation is None:
  484. return Future().failure(Errors.CommitFailedError())
  485. if self.config['api_version'] >= (0, 9):
  486. request = OffsetCommitRequest[2](
  487. self.group_id,
  488. generation.generation_id,
  489. generation.member_id,
  490. OffsetCommitRequest[2].DEFAULT_RETENTION_TIME,
  491. [(
  492. topic, [(
  493. partition,
  494. offset.offset,
  495. offset.metadata
  496. ) for partition, offset in six.iteritems(partitions)]
  497. ) for topic, partitions in six.iteritems(offset_data)]
  498. )
  499. elif self.config['api_version'] >= (0, 8, 2):
  500. request = OffsetCommitRequest[1](
  501. self.group_id, -1, '',
  502. [(
  503. topic, [(
  504. partition,
  505. offset.offset,
  506. -1,
  507. offset.metadata
  508. ) for partition, offset in six.iteritems(partitions)]
  509. ) for topic, partitions in six.iteritems(offset_data)]
  510. )
  511. elif self.config['api_version'] >= (0, 8, 1):
  512. request = OffsetCommitRequest[0](
  513. self.group_id,
  514. [(
  515. topic, [(
  516. partition,
  517. offset.offset,
  518. offset.metadata
  519. ) for partition, offset in six.iteritems(partitions)]
  520. ) for topic, partitions in six.iteritems(offset_data)]
  521. )
  522. log.debug("Sending offset-commit request with %s for group %s to %s",
  523. offsets, self.group_id, node_id)
  524. future = Future()
  525. _f = self._client.send(node_id, request)
  526. _f.add_callback(self._handle_offset_commit_response, offsets, future, time.time())
  527. _f.add_errback(self._failed_request, node_id, request, future)
  528. return future
  529. def _handle_offset_commit_response(self, offsets, future, send_time, response):
  530. # TODO look at adding request_latency_ms to response (like java kafka)
  531. self.consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
  532. unauthorized_topics = set()
  533. for topic, partitions in response.topics:
  534. for partition, error_code in partitions:
  535. tp = TopicPartition(topic, partition)
  536. offset = offsets[tp]
  537. error_type = Errors.for_code(error_code)
  538. if error_type is Errors.NoError:
  539. log.debug("Group %s committed offset %s for partition %s",
  540. self.group_id, offset, tp)
  541. if self._subscription.is_assigned(tp):
  542. self._subscription.assignment[tp].committed = offset
  543. elif error_type is Errors.GroupAuthorizationFailedError:
  544. log.error("Not authorized to commit offsets for group %s",
  545. self.group_id)
  546. future.failure(error_type(self.group_id))
  547. return
  548. elif error_type is Errors.TopicAuthorizationFailedError:
  549. unauthorized_topics.add(topic)
  550. elif error_type in (Errors.OffsetMetadataTooLargeError,
  551. Errors.InvalidCommitOffsetSizeError):
  552. # raise the error to the user
  553. log.debug("OffsetCommit for group %s failed on partition %s"
  554. " %s", self.group_id, tp, error_type.__name__)
  555. future.failure(error_type())
  556. return
  557. elif error_type is Errors.GroupLoadInProgressError:
  558. # just retry
  559. log.debug("OffsetCommit for group %s failed: %s",
  560. self.group_id, error_type.__name__)
  561. future.failure(error_type(self.group_id))
  562. return
  563. elif error_type in (Errors.GroupCoordinatorNotAvailableError,
  564. Errors.NotCoordinatorForGroupError,
  565. Errors.RequestTimedOutError):
  566. log.debug("OffsetCommit for group %s failed: %s",
  567. self.group_id, error_type.__name__)
  568. self.coordinator_dead(error_type())
  569. future.failure(error_type(self.group_id))
  570. return
  571. elif error_type in (Errors.UnknownMemberIdError,
  572. Errors.IllegalGenerationError,
  573. Errors.RebalanceInProgressError):
  574. # need to re-join group
  575. error = error_type(self.group_id)
  576. log.debug("OffsetCommit for group %s failed: %s",
  577. self.group_id, error)
  578. self.reset_generation()
  579. future.failure(Errors.CommitFailedError())
  580. return
  581. else:
  582. log.error("Group %s failed to commit partition %s at offset"
  583. " %s: %s", self.group_id, tp, offset,
  584. error_type.__name__)
  585. future.failure(error_type())
  586. return
  587. if unauthorized_topics:
  588. log.error("Not authorized to commit to topics %s for group %s",
  589. unauthorized_topics, self.group_id)
  590. future.failure(Errors.TopicAuthorizationFailedError(unauthorized_topics))
  591. else:
  592. future.success(None)
  593. def _send_offset_fetch_request(self, partitions):
  594. """Fetch the committed offsets for a set of partitions.
  595. This is a non-blocking call. The returned future can be polled to get
  596. the actual offsets returned from the broker.
  597. Arguments:
  598. partitions (list of TopicPartition): the partitions to fetch
  599. Returns:
  600. Future: resolves to dict of offsets: {TopicPartition: OffsetAndMetadata}
  601. """
  602. assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
  603. assert all(map(lambda k: isinstance(k, TopicPartition), partitions))
  604. if not partitions:
  605. return Future().success({})
  606. node_id = self.coordinator()
  607. if node_id is None:
  608. return Future().failure(Errors.GroupCoordinatorNotAvailableError)
  609. # Verify node is ready
  610. if not self._client.ready(node_id):
  611. log.debug("Node %s not ready -- failing offset fetch request",
  612. node_id)
  613. return Future().failure(Errors.NodeNotReadyError)
  614. log.debug("Group %s fetching committed offsets for partitions: %s",
  615. self.group_id, partitions)
  616. # construct the request
  617. topic_partitions = collections.defaultdict(set)
  618. for tp in partitions:
  619. topic_partitions[tp.topic].add(tp.partition)
  620. if self.config['api_version'] >= (0, 8, 2):
  621. request = OffsetFetchRequest[1](
  622. self.group_id,
  623. list(topic_partitions.items())
  624. )
  625. else:
  626. request = OffsetFetchRequest[0](
  627. self.group_id,
  628. list(topic_partitions.items())
  629. )
  630. # send the request with a callback
  631. future = Future()
  632. _f = self._client.send(node_id, request)
  633. _f.add_callback(self._handle_offset_fetch_response, future)
  634. _f.add_errback(self._failed_request, node_id, request, future)
  635. return future
  636. def _handle_offset_fetch_response(self, future, response):
  637. offsets = {}
  638. for topic, partitions in response.topics:
  639. for partition, offset, metadata, error_code in partitions:
  640. tp = TopicPartition(topic, partition)
  641. error_type = Errors.for_code(error_code)
  642. if error_type is not Errors.NoError:
  643. error = error_type()
  644. log.debug("Group %s failed to fetch offset for partition"
  645. " %s: %s", self.group_id, tp, error)
  646. if error_type is Errors.GroupLoadInProgressError:
  647. # just retry
  648. future.failure(error)
  649. elif error_type is Errors.NotCoordinatorForGroupError:
  650. # re-discover the coordinator and retry
  651. self.coordinator_dead(error_type())
  652. future.failure(error)
  653. elif error_type is Errors.UnknownTopicOrPartitionError:
  654. log.warning("OffsetFetchRequest -- unknown topic %s"
  655. " (have you committed any offsets yet?)",
  656. topic)
  657. continue
  658. else:
  659. log.error("Unknown error fetching offsets for %s: %s",
  660. tp, error)
  661. future.failure(error)
  662. return
  663. elif offset >= 0:
  664. # record the position with the offset
  665. # (-1 indicates no committed offset to fetch)
  666. offsets[tp] = OffsetAndMetadata(offset, metadata)
  667. else:
  668. log.debug("Group %s has no committed offset for partition"
  669. " %s", self.group_id, tp)
  670. future.success(offsets)
  671. def _default_offset_commit_callback(self, offsets, exception):
  672. if exception is not None:
  673. log.error("Offset commit failed: %s", exception)
  674. def _commit_offsets_async_on_complete(self, offsets, exception):
  675. if exception is not None:
  676. log.warning("Auto offset commit failed for group %s: %s",
  677. self.group_id, exception)
  678. if getattr(exception, 'retriable', False):
  679. self.next_auto_commit_deadline = min(time.time() + self.config['retry_backoff_ms'] / 1000, self.next_auto_commit_deadline)
  680. else:
  681. log.debug("Completed autocommit of offsets %s for group %s",
  682. offsets, self.group_id)
  683. def _maybe_auto_commit_offsets_async(self):
  684. if self.config['enable_auto_commit']:
  685. if self.coordinator_unknown():
  686. self.next_auto_commit_deadline = time.time() + self.config['retry_backoff_ms'] / 1000
  687. elif time.time() > self.next_auto_commit_deadline:
  688. self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
  689. self.commit_offsets_async(self._subscription.all_consumed_offsets(),
  690. self._commit_offsets_async_on_complete)
  691. class ConsumerCoordinatorMetrics(object):
  692. def __init__(self, metrics, metric_group_prefix, subscription):
  693. self.metrics = metrics
  694. self.metric_group_name = '%s-coordinator-metrics' % (metric_group_prefix,)
  695. self.commit_latency = metrics.sensor('commit-latency')
  696. self.commit_latency.add(metrics.metric_name(
  697. 'commit-latency-avg', self.metric_group_name,
  698. 'The average time taken for a commit request'), Avg())
  699. self.commit_latency.add(metrics.metric_name(
  700. 'commit-latency-max', self.metric_group_name,
  701. 'The max time taken for a commit request'), Max())
  702. self.commit_latency.add(metrics.metric_name(
  703. 'commit-rate', self.metric_group_name,
  704. 'The number of commit calls per second'), Rate(sampled_stat=Count()))
  705. num_parts = AnonMeasurable(lambda config, now:
  706. len(subscription.assigned_partitions()))
  707. metrics.add_metric(metrics.metric_name(
  708. 'assigned-partitions', self.metric_group_name,
  709. 'The number of partitions currently assigned to this consumer'),
  710. num_parts)