m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

749 lines
37 KiB

6 months ago
  1. from __future__ import absolute_import
  2. import atexit
  3. import copy
  4. import logging
  5. import socket
  6. import threading
  7. import time
  8. import weakref
  9. from kafka.vendor import six
  10. import kafka.errors as Errors
  11. from kafka.client_async import KafkaClient, selectors
  12. from kafka.codec import has_gzip, has_snappy, has_lz4, has_zstd
  13. from kafka.metrics import MetricConfig, Metrics
  14. from kafka.partitioner.default import DefaultPartitioner
  15. from kafka.producer.future import FutureRecordMetadata, FutureProduceResult
  16. from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator
  17. from kafka.producer.sender import Sender
  18. from kafka.record.default_records import DefaultRecordBatchBuilder
  19. from kafka.record.legacy_records import LegacyRecordBatchBuilder
  20. from kafka.serializer import Serializer
  21. from kafka.structs import TopicPartition
  22. log = logging.getLogger(__name__)
  23. PRODUCER_CLIENT_ID_SEQUENCE = AtomicInteger()
  24. class KafkaProducer(object):
  25. """A Kafka client that publishes records to the Kafka cluster.
  26. The producer is thread safe and sharing a single producer instance across
  27. threads will generally be faster than having multiple instances.
  28. The producer consists of a pool of buffer space that holds records that
  29. haven't yet been transmitted to the server as well as a background I/O
  30. thread that is responsible for turning these records into requests and
  31. transmitting them to the cluster.
  32. :meth:`~kafka.KafkaProducer.send` is asynchronous. When called it adds the
  33. record to a buffer of pending record sends and immediately returns. This
  34. allows the producer to batch together individual records for efficiency.
  35. The 'acks' config controls the criteria under which requests are considered
  36. complete. The "all" setting will result in blocking on the full commit of
  37. the record, the slowest but most durable setting.
  38. If the request fails, the producer can automatically retry, unless
  39. 'retries' is configured to 0. Enabling retries also opens up the
  40. possibility of duplicates (see the documentation on message
  41. delivery semantics for details:
  42. https://kafka.apache.org/documentation.html#semantics
  43. ).
  44. The producer maintains buffers of unsent records for each partition. These
  45. buffers are of a size specified by the 'batch_size' config. Making this
  46. larger can result in more batching, but requires more memory (since we will
  47. generally have one of these buffers for each active partition).
  48. By default a buffer is available to send immediately even if there is
  49. additional unused space in the buffer. However if you want to reduce the
  50. number of requests you can set 'linger_ms' to something greater than 0.
  51. This will instruct the producer to wait up to that number of milliseconds
  52. before sending a request in hope that more records will arrive to fill up
  53. the same batch. This is analogous to Nagle's algorithm in TCP. Note that
  54. records that arrive close together in time will generally batch together
  55. even with linger_ms=0 so under heavy load batching will occur regardless of
  56. the linger configuration; however setting this to something larger than 0
  57. can lead to fewer, more efficient requests when not under maximal load at
  58. the cost of a small amount of latency.
  59. The buffer_memory controls the total amount of memory available to the
  60. producer for buffering. If records are sent faster than they can be
  61. transmitted to the server then this buffer space will be exhausted. When
  62. the buffer space is exhausted additional send calls will block.
  63. The key_serializer and value_serializer instruct how to turn the key and
  64. value objects the user provides into bytes.
  65. Keyword Arguments:
  66. bootstrap_servers: 'host[:port]' string (or list of 'host[:port]'
  67. strings) that the producer should contact to bootstrap initial
  68. cluster metadata. This does not have to be the full node list.
  69. It just needs to have at least one broker that will respond to a
  70. Metadata API Request. Default port is 9092. If no servers are
  71. specified, will default to localhost:9092.
  72. client_id (str): a name for this client. This string is passed in
  73. each request to servers and can be used to identify specific
  74. server-side log entries that correspond to this client.
  75. Default: 'kafka-python-producer-#' (appended with a unique number
  76. per instance)
  77. key_serializer (callable): used to convert user-supplied keys to bytes
  78. If not None, called as f(key), should return bytes. Default: None.
  79. value_serializer (callable): used to convert user-supplied message
  80. values to bytes. If not None, called as f(value), should return
  81. bytes. Default: None.
  82. acks (0, 1, 'all'): The number of acknowledgments the producer requires
  83. the leader to have received before considering a request complete.
  84. This controls the durability of records that are sent. The
  85. following settings are common:
  86. 0: Producer will not wait for any acknowledgment from the server.
  87. The message will immediately be added to the socket
  88. buffer and considered sent. No guarantee can be made that the
  89. server has received the record in this case, and the retries
  90. configuration will not take effect (as the client won't
  91. generally know of any failures). The offset given back for each
  92. record will always be set to -1.
  93. 1: Wait for leader to write the record to its local log only.
  94. Broker will respond without awaiting full acknowledgement from
  95. all followers. In this case should the leader fail immediately
  96. after acknowledging the record but before the followers have
  97. replicated it then the record will be lost.
  98. all: Wait for the full set of in-sync replicas to write the record.
  99. This guarantees that the record will not be lost as long as at
  100. least one in-sync replica remains alive. This is the strongest
  101. available guarantee.
  102. If unset, defaults to acks=1.
  103. compression_type (str): The compression type for all data generated by
  104. the producer. Valid values are 'gzip', 'snappy', 'lz4', 'zstd' or None.
  105. Compression is of full batches of data, so the efficacy of batching
  106. will also impact the compression ratio (more batching means better
  107. compression). Default: None.
  108. retries (int): Setting a value greater than zero will cause the client
  109. to resend any record whose send fails with a potentially transient
  110. error. Note that this retry is no different than if the client
  111. resent the record upon receiving the error. Allowing retries
  112. without setting max_in_flight_requests_per_connection to 1 will
  113. potentially change the ordering of records because if two batches
  114. are sent to a single partition, and the first fails and is retried
  115. but the second succeeds, then the records in the second batch may
  116. appear first.
  117. Default: 0.
  118. batch_size (int): Requests sent to brokers will contain multiple
  119. batches, one for each partition with data available to be sent.
  120. A small batch size will make batching less common and may reduce
  121. throughput (a batch size of zero will disable batching entirely).
  122. Default: 16384
  123. linger_ms (int): The producer groups together any records that arrive
  124. in between request transmissions into a single batched request.
  125. Normally this occurs only under load when records arrive faster
  126. than they can be sent out. However in some circumstances the client
  127. may want to reduce the number of requests even under moderate load.
  128. This setting accomplishes this by adding a small amount of
  129. artificial delay; that is, rather than immediately sending out a
  130. record the producer will wait for up to the given delay to allow
  131. other records to be sent so that the sends can be batched together.
  132. This can be thought of as analogous to Nagle's algorithm in TCP.
  133. This setting gives the upper bound on the delay for batching: once
  134. we get batch_size worth of records for a partition it will be sent
  135. immediately regardless of this setting, however if we have fewer
  136. than this many bytes accumulated for this partition we will
  137. 'linger' for the specified time waiting for more records to show
  138. up. This setting defaults to 0 (i.e. no delay). Setting linger_ms=5
  139. would have the effect of reducing the number of requests sent but
  140. would add up to 5ms of latency to records sent in the absence of
  141. load. Default: 0.
  142. partitioner (callable): Callable used to determine which partition
  143. each message is assigned to. Called (after key serialization):
  144. partitioner(key_bytes, all_partitions, available_partitions).
  145. The default partitioner implementation hashes each non-None key
  146. using the same murmur2 algorithm as the java client so that
  147. messages with the same key are assigned to the same partition.
  148. When a key is None, the message is delivered to a random partition
  149. (filtered to partitions with available leaders only, if possible).
  150. buffer_memory (int): The total bytes of memory the producer should use
  151. to buffer records waiting to be sent to the server. If records are
  152. sent faster than they can be delivered to the server the producer
  153. will block up to max_block_ms, raising an exception on timeout.
  154. In the current implementation, this setting is an approximation.
  155. Default: 33554432 (32MB)
  156. connections_max_idle_ms: Close idle connections after the number of
  157. milliseconds specified by this config. The broker closes idle
  158. connections after connections.max.idle.ms, so this avoids hitting
  159. unexpected socket disconnected errors on the client.
  160. Default: 540000
  161. max_block_ms (int): Number of milliseconds to block during
  162. :meth:`~kafka.KafkaProducer.send` and
  163. :meth:`~kafka.KafkaProducer.partitions_for`. These methods can be
  164. blocked either because the buffer is full or metadata unavailable.
  165. Blocking in the user-supplied serializers or partitioner will not be
  166. counted against this timeout. Default: 60000.
  167. max_request_size (int): The maximum size of a request. This is also
  168. effectively a cap on the maximum record size. Note that the server
  169. has its own cap on record size which may be different from this.
  170. This setting will limit the number of record batches the producer
  171. will send in a single request to avoid sending huge requests.
  172. Default: 1048576.
  173. metadata_max_age_ms (int): The period of time in milliseconds after
  174. which we force a refresh of metadata even if we haven't seen any
  175. partition leadership changes to proactively discover any new
  176. brokers or partitions. Default: 300000
  177. retry_backoff_ms (int): Milliseconds to backoff when retrying on
  178. errors. Default: 100.
  179. request_timeout_ms (int): Client request timeout in milliseconds.
  180. Default: 30000.
  181. receive_buffer_bytes (int): The size of the TCP receive buffer
  182. (SO_RCVBUF) to use when reading data. Default: None (relies on
  183. system defaults). Java client defaults to 32768.
  184. send_buffer_bytes (int): The size of the TCP send buffer
  185. (SO_SNDBUF) to use when sending data. Default: None (relies on
  186. system defaults). Java client defaults to 131072.
  187. socket_options (list): List of tuple-arguments to socket.setsockopt
  188. to apply to broker connection sockets. Default:
  189. [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
  190. reconnect_backoff_ms (int): The amount of time in milliseconds to
  191. wait before attempting to reconnect to a given host.
  192. Default: 50.
  193. reconnect_backoff_max_ms (int): The maximum amount of time in
  194. milliseconds to backoff/wait when reconnecting to a broker that has
  195. repeatedly failed to connect. If provided, the backoff per host
  196. will increase exponentially for each consecutive connection
  197. failure, up to this maximum. Once the maximum is reached,
  198. reconnection attempts will continue periodically with this fixed
  199. rate. To avoid connection storms, a randomization factor of 0.2
  200. will be applied to the backoff resulting in a random range between
  201. 20% below and 20% above the computed value. Default: 1000.
  202. max_in_flight_requests_per_connection (int): Requests are pipelined
  203. to kafka brokers up to this number of maximum requests per
  204. broker connection. Note that if this setting is set to be greater
  205. than 1 and there are failed sends, there is a risk of message
  206. re-ordering due to retries (i.e., if retries are enabled).
  207. Default: 5.
  208. security_protocol (str): Protocol used to communicate with brokers.
  209. Valid values are: PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL.
  210. Default: PLAINTEXT.
  211. ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping
  212. socket connections. If provided, all other ssl_* configurations
  213. will be ignored. Default: None.
  214. ssl_check_hostname (bool): flag to configure whether ssl handshake
  215. should verify that the certificate matches the brokers hostname.
  216. default: true.
  217. ssl_cafile (str): optional filename of ca file to use in certificate
  218. veriication. default: none.
  219. ssl_certfile (str): optional filename of file in pem format containing
  220. the client certificate, as well as any ca certificates needed to
  221. establish the certificate's authenticity. default: none.
  222. ssl_keyfile (str): optional filename containing the client private key.
  223. default: none.
  224. ssl_password (str): optional password to be used when loading the
  225. certificate chain. default: none.
  226. ssl_crlfile (str): optional filename containing the CRL to check for
  227. certificate expiration. By default, no CRL check is done. When
  228. providing a file, only the leaf certificate will be checked against
  229. this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+.
  230. default: none.
  231. ssl_ciphers (str): optionally set the available ciphers for ssl
  232. connections. It should be a string in the OpenSSL cipher list
  233. format. If no cipher can be selected (because compile-time options
  234. or other configuration forbids use of all the specified ciphers),
  235. an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
  236. api_version (tuple): Specify which Kafka API version to use. If set to
  237. None, the client will attempt to infer the broker version by probing
  238. various APIs. Example: (0, 10, 2). Default: None
  239. api_version_auto_timeout_ms (int): number of milliseconds to throw a
  240. timeout exception from the constructor when checking the broker
  241. api version. Only applies if api_version set to None.
  242. metric_reporters (list): A list of classes to use as metrics reporters.
  243. Implementing the AbstractMetricsReporter interface allows plugging
  244. in classes that will be notified of new metric creation. Default: []
  245. metrics_num_samples (int): The number of samples maintained to compute
  246. metrics. Default: 2
  247. metrics_sample_window_ms (int): The maximum age in milliseconds of
  248. samples used to compute metrics. Default: 30000
  249. selector (selectors.BaseSelector): Provide a specific selector
  250. implementation to use for I/O multiplexing.
  251. Default: selectors.DefaultSelector
  252. sasl_mechanism (str): Authentication mechanism when security_protocol
  253. is configured for SASL_PLAINTEXT or SASL_SSL. Valid values are:
  254. PLAIN, GSSAPI, OAUTHBEARER, SCRAM-SHA-256, SCRAM-SHA-512.
  255. sasl_plain_username (str): username for sasl PLAIN and SCRAM authentication.
  256. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
  257. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
  258. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
  259. sasl_kerberos_service_name (str): Service name to include in GSSAPI
  260. sasl mechanism handshake. Default: 'kafka'
  261. sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
  262. sasl mechanism handshake. Default: one of bootstrap servers
  263. sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
  264. instance. (See kafka.oauth.abstract). Default: None
  265. Note:
  266. Configuration parameters are described in more detail at
  267. https://kafka.apache.org/0100/configuration.html#producerconfigs
  268. """
  269. DEFAULT_CONFIG = {
  270. 'bootstrap_servers': 'localhost',
  271. 'client_id': None,
  272. 'key_serializer': None,
  273. 'value_serializer': None,
  274. 'acks': 1,
  275. 'bootstrap_topics_filter': set(),
  276. 'compression_type': None,
  277. 'retries': 0,
  278. 'batch_size': 16384,
  279. 'linger_ms': 0,
  280. 'partitioner': DefaultPartitioner(),
  281. 'buffer_memory': 33554432,
  282. 'connections_max_idle_ms': 9 * 60 * 1000,
  283. 'max_block_ms': 60000,
  284. 'max_request_size': 1048576,
  285. 'metadata_max_age_ms': 300000,
  286. 'retry_backoff_ms': 100,
  287. 'request_timeout_ms': 30000,
  288. 'receive_buffer_bytes': None,
  289. 'send_buffer_bytes': None,
  290. 'socket_options': [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)],
  291. 'sock_chunk_bytes': 4096, # undocumented experimental option
  292. 'sock_chunk_buffer_count': 1000, # undocumented experimental option
  293. 'reconnect_backoff_ms': 50,
  294. 'reconnect_backoff_max_ms': 1000,
  295. 'max_in_flight_requests_per_connection': 5,
  296. 'security_protocol': 'PLAINTEXT',
  297. 'ssl_context': None,
  298. 'ssl_check_hostname': True,
  299. 'ssl_cafile': None,
  300. 'ssl_certfile': None,
  301. 'ssl_keyfile': None,
  302. 'ssl_crlfile': None,
  303. 'ssl_password': None,
  304. 'ssl_ciphers': None,
  305. 'api_version': None,
  306. 'api_version_auto_timeout_ms': 2000,
  307. 'metric_reporters': [],
  308. 'metrics_num_samples': 2,
  309. 'metrics_sample_window_ms': 30000,
  310. 'selector': selectors.DefaultSelector,
  311. 'sasl_mechanism': None,
  312. 'sasl_plain_username': None,
  313. 'sasl_plain_password': None,
  314. 'sasl_kerberos_service_name': 'kafka',
  315. 'sasl_kerberos_domain_name': None,
  316. 'sasl_oauth_token_provider': None
  317. }
  318. _COMPRESSORS = {
  319. 'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP),
  320. 'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY),
  321. 'lz4': (has_lz4, LegacyRecordBatchBuilder.CODEC_LZ4),
  322. 'zstd': (has_zstd, DefaultRecordBatchBuilder.CODEC_ZSTD),
  323. None: (lambda: True, LegacyRecordBatchBuilder.CODEC_NONE),
  324. }
  325. def __init__(self, **configs):
  326. log.debug("Starting the Kafka producer") # trace
  327. self.config = copy.copy(self.DEFAULT_CONFIG)
  328. for key in self.config:
  329. if key in configs:
  330. self.config[key] = configs.pop(key)
  331. # Only check for extra config keys in top-level class
  332. assert not configs, 'Unrecognized configs: %s' % (configs,)
  333. if self.config['client_id'] is None:
  334. self.config['client_id'] = 'kafka-python-producer-%s' % \
  335. (PRODUCER_CLIENT_ID_SEQUENCE.increment(),)
  336. if self.config['acks'] == 'all':
  337. self.config['acks'] = -1
  338. # api_version was previously a str. accept old format for now
  339. if isinstance(self.config['api_version'], str):
  340. deprecated = self.config['api_version']
  341. if deprecated == 'auto':
  342. self.config['api_version'] = None
  343. else:
  344. self.config['api_version'] = tuple(map(int, deprecated.split('.')))
  345. log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
  346. str(self.config['api_version']), deprecated)
  347. # Configure metrics
  348. metrics_tags = {'client-id': self.config['client_id']}
  349. metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
  350. time_window_ms=self.config['metrics_sample_window_ms'],
  351. tags=metrics_tags)
  352. reporters = [reporter() for reporter in self.config['metric_reporters']]
  353. self._metrics = Metrics(metric_config, reporters)
  354. client = KafkaClient(metrics=self._metrics, metric_group_prefix='producer',
  355. wakeup_timeout_ms=self.config['max_block_ms'],
  356. **self.config)
  357. # Get auto-discovered version from client if necessary
  358. if self.config['api_version'] is None:
  359. self.config['api_version'] = client.config['api_version']
  360. if self.config['compression_type'] == 'lz4':
  361. assert self.config['api_version'] >= (0, 8, 2), 'LZ4 Requires >= Kafka 0.8.2 Brokers'
  362. if self.config['compression_type'] == 'zstd':
  363. assert self.config['api_version'] >= (2, 1, 0), 'Zstd Requires >= Kafka 2.1.0 Brokers'
  364. # Check compression_type for library support
  365. ct = self.config['compression_type']
  366. if ct not in self._COMPRESSORS:
  367. raise ValueError("Not supported codec: {}".format(ct))
  368. else:
  369. checker, compression_attrs = self._COMPRESSORS[ct]
  370. assert checker(), "Libraries for {} compression codec not found".format(ct)
  371. self.config['compression_attrs'] = compression_attrs
  372. message_version = self._max_usable_produce_magic()
  373. self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config)
  374. self._metadata = client.cluster
  375. guarantee_message_order = bool(self.config['max_in_flight_requests_per_connection'] == 1)
  376. self._sender = Sender(client, self._metadata,
  377. self._accumulator, self._metrics,
  378. guarantee_message_order=guarantee_message_order,
  379. **self.config)
  380. self._sender.daemon = True
  381. self._sender.start()
  382. self._closed = False
  383. self._cleanup = self._cleanup_factory()
  384. atexit.register(self._cleanup)
  385. log.debug("Kafka producer started")
  386. def bootstrap_connected(self):
  387. """Return True if the bootstrap is connected."""
  388. return self._sender.bootstrap_connected()
  389. def _cleanup_factory(self):
  390. """Build a cleanup clojure that doesn't increase our ref count"""
  391. _self = weakref.proxy(self)
  392. def wrapper():
  393. try:
  394. _self.close(timeout=0)
  395. except (ReferenceError, AttributeError):
  396. pass
  397. return wrapper
  398. def _unregister_cleanup(self):
  399. if getattr(self, '_cleanup', None):
  400. if hasattr(atexit, 'unregister'):
  401. atexit.unregister(self._cleanup) # pylint: disable=no-member
  402. # py2 requires removing from private attribute...
  403. else:
  404. # ValueError on list.remove() if the exithandler no longer exists
  405. # but that is fine here
  406. try:
  407. atexit._exithandlers.remove( # pylint: disable=no-member
  408. (self._cleanup, (), {}))
  409. except ValueError:
  410. pass
  411. self._cleanup = None
  412. def __del__(self):
  413. # Disable logger during destruction to avoid touching dangling references
  414. class NullLogger(object):
  415. def __getattr__(self, name):
  416. return lambda *args: None
  417. global log
  418. log = NullLogger()
  419. self.close()
  420. def close(self, timeout=None):
  421. """Close this producer.
  422. Arguments:
  423. timeout (float, optional): timeout in seconds to wait for completion.
  424. """
  425. # drop our atexit handler now to avoid leaks
  426. self._unregister_cleanup()
  427. if not hasattr(self, '_closed') or self._closed:
  428. log.info('Kafka producer closed')
  429. return
  430. if timeout is None:
  431. # threading.TIMEOUT_MAX is available in Python3.3+
  432. timeout = getattr(threading, 'TIMEOUT_MAX', float('inf'))
  433. if getattr(threading, 'TIMEOUT_MAX', False):
  434. assert 0 <= timeout <= getattr(threading, 'TIMEOUT_MAX')
  435. else:
  436. assert timeout >= 0
  437. log.info("Closing the Kafka producer with %s secs timeout.", timeout)
  438. invoked_from_callback = bool(threading.current_thread() is self._sender)
  439. if timeout > 0:
  440. if invoked_from_callback:
  441. log.warning("Overriding close timeout %s secs to 0 in order to"
  442. " prevent useless blocking due to self-join. This"
  443. " means you have incorrectly invoked close with a"
  444. " non-zero timeout from the producer call-back.",
  445. timeout)
  446. else:
  447. # Try to close gracefully.
  448. if self._sender is not None:
  449. self._sender.initiate_close()
  450. self._sender.join(timeout)
  451. if self._sender is not None and self._sender.is_alive():
  452. log.info("Proceeding to force close the producer since pending"
  453. " requests could not be completed within timeout %s.",
  454. timeout)
  455. self._sender.force_close()
  456. self._metrics.close()
  457. try:
  458. self.config['key_serializer'].close()
  459. except AttributeError:
  460. pass
  461. try:
  462. self.config['value_serializer'].close()
  463. except AttributeError:
  464. pass
  465. self._closed = True
  466. log.debug("The Kafka producer has closed.")
  467. def partitions_for(self, topic):
  468. """Returns set of all known partitions for the topic."""
  469. max_wait = self.config['max_block_ms'] / 1000.0
  470. return self._wait_on_metadata(topic, max_wait)
  471. def _max_usable_produce_magic(self):
  472. if self.config['api_version'] >= (0, 11):
  473. return 2
  474. elif self.config['api_version'] >= (0, 10):
  475. return 1
  476. else:
  477. return 0
  478. def _estimate_size_in_bytes(self, key, value, headers=[]):
  479. magic = self._max_usable_produce_magic()
  480. if magic == 2:
  481. return DefaultRecordBatchBuilder.estimate_size_in_bytes(
  482. key, value, headers)
  483. else:
  484. return LegacyRecordBatchBuilder.estimate_size_in_bytes(
  485. magic, self.config['compression_type'], key, value)
  486. def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None):
  487. """Publish a message to a topic.
  488. Arguments:
  489. topic (str): topic where the message will be published
  490. value (optional): message value. Must be type bytes, or be
  491. serializable to bytes via configured value_serializer. If value
  492. is None, key is required and message acts as a 'delete'.
  493. See kafka compaction documentation for more details:
  494. https://kafka.apache.org/documentation.html#compaction
  495. (compaction requires kafka >= 0.8.1)
  496. partition (int, optional): optionally specify a partition. If not
  497. set, the partition will be selected using the configured
  498. 'partitioner'.
  499. key (optional): a key to associate with the message. Can be used to
  500. determine which partition to send the message to. If partition
  501. is None (and producer's partitioner config is left as default),
  502. then messages with the same key will be delivered to the same
  503. partition (but if key is None, partition is chosen randomly).
  504. Must be type bytes, or be serializable to bytes via configured
  505. key_serializer.
  506. headers (optional): a list of header key value pairs. List items
  507. are tuples of str key and bytes value.
  508. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC)
  509. to use as the message timestamp. Defaults to current time.
  510. Returns:
  511. FutureRecordMetadata: resolves to RecordMetadata
  512. Raises:
  513. KafkaTimeoutError: if unable to fetch topic metadata, or unable
  514. to obtain memory buffer prior to configured max_block_ms
  515. """
  516. assert value is not None or self.config['api_version'] >= (0, 8, 1), (
  517. 'Null messages require kafka >= 0.8.1')
  518. assert not (value is None and key is None), 'Need at least one: key or value'
  519. key_bytes = value_bytes = None
  520. try:
  521. self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
  522. key_bytes = self._serialize(
  523. self.config['key_serializer'],
  524. topic, key)
  525. value_bytes = self._serialize(
  526. self.config['value_serializer'],
  527. topic, value)
  528. assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
  529. assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))
  530. partition = self._partition(topic, partition, key, value,
  531. key_bytes, value_bytes)
  532. if headers is None:
  533. headers = []
  534. assert type(headers) == list
  535. assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers)
  536. message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers)
  537. self._ensure_valid_record_size(message_size)
  538. tp = TopicPartition(topic, partition)
  539. log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp)
  540. result = self._accumulator.append(tp, timestamp_ms,
  541. key_bytes, value_bytes, headers,
  542. self.config['max_block_ms'],
  543. estimated_size=message_size)
  544. future, batch_is_full, new_batch_created = result
  545. if batch_is_full or new_batch_created:
  546. log.debug("Waking up the sender since %s is either full or"
  547. " getting a new batch", tp)
  548. self._sender.wakeup()
  549. return future
  550. # handling exceptions and record the errors;
  551. # for API exceptions return them in the future,
  552. # for other exceptions raise directly
  553. except Errors.BrokerResponseError as e:
  554. log.debug("Exception occurred during message send: %s", e)
  555. return FutureRecordMetadata(
  556. FutureProduceResult(TopicPartition(topic, partition)),
  557. -1, None, None,
  558. len(key_bytes) if key_bytes is not None else -1,
  559. len(value_bytes) if value_bytes is not None else -1,
  560. sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1,
  561. ).failure(e)
  562. def flush(self, timeout=None):
  563. """
  564. Invoking this method makes all buffered records immediately available
  565. to send (even if linger_ms is greater than 0) and blocks on the
  566. completion of the requests associated with these records. The
  567. post-condition of :meth:`~kafka.KafkaProducer.flush` is that any
  568. previously sent record will have completed
  569. (e.g. Future.is_done() == True). A request is considered completed when
  570. either it is successfully acknowledged according to the 'acks'
  571. configuration for the producer, or it results in an error.
  572. Other threads can continue sending messages while one thread is blocked
  573. waiting for a flush call to complete; however, no guarantee is made
  574. about the completion of messages sent after the flush call begins.
  575. Arguments:
  576. timeout (float, optional): timeout in seconds to wait for completion.
  577. Raises:
  578. KafkaTimeoutError: failure to flush buffered records within the
  579. provided timeout
  580. """
  581. log.debug("Flushing accumulated records in producer.") # trace
  582. self._accumulator.begin_flush()
  583. self._sender.wakeup()
  584. self._accumulator.await_flush_completion(timeout=timeout)
  585. def _ensure_valid_record_size(self, size):
  586. """Validate that the record size isn't too large."""
  587. if size > self.config['max_request_size']:
  588. raise Errors.MessageSizeTooLargeError(
  589. "The message is %d bytes when serialized which is larger than"
  590. " the maximum request size you have configured with the"
  591. " max_request_size configuration" % (size,))
  592. if size > self.config['buffer_memory']:
  593. raise Errors.MessageSizeTooLargeError(
  594. "The message is %d bytes when serialized which is larger than"
  595. " the total memory buffer you have configured with the"
  596. " buffer_memory configuration." % (size,))
  597. def _wait_on_metadata(self, topic, max_wait):
  598. """
  599. Wait for cluster metadata including partitions for the given topic to
  600. be available.
  601. Arguments:
  602. topic (str): topic we want metadata for
  603. max_wait (float): maximum time in secs for waiting on the metadata
  604. Returns:
  605. set: partition ids for the topic
  606. Raises:
  607. KafkaTimeoutError: if partitions for topic were not obtained before
  608. specified max_wait timeout
  609. """
  610. # add topic to metadata topic list if it is not there already.
  611. self._sender.add_topic(topic)
  612. begin = time.time()
  613. elapsed = 0.0
  614. metadata_event = None
  615. while True:
  616. partitions = self._metadata.partitions_for_topic(topic)
  617. if partitions is not None:
  618. return partitions
  619. if not metadata_event:
  620. metadata_event = threading.Event()
  621. log.debug("Requesting metadata update for topic %s", topic)
  622. metadata_event.clear()
  623. future = self._metadata.request_update()
  624. future.add_both(lambda e, *args: e.set(), metadata_event)
  625. self._sender.wakeup()
  626. metadata_event.wait(max_wait - elapsed)
  627. elapsed = time.time() - begin
  628. if not metadata_event.is_set():
  629. raise Errors.KafkaTimeoutError(
  630. "Failed to update metadata after %.1f secs." % (max_wait,))
  631. elif topic in self._metadata.unauthorized_topics:
  632. raise Errors.TopicAuthorizationFailedError(topic)
  633. else:
  634. log.debug("_wait_on_metadata woke after %s secs.", elapsed)
  635. def _serialize(self, f, topic, data):
  636. if not f:
  637. return data
  638. if isinstance(f, Serializer):
  639. return f.serialize(topic, data)
  640. return f(data)
  641. def _partition(self, topic, partition, key, value,
  642. serialized_key, serialized_value):
  643. if partition is not None:
  644. assert partition >= 0
  645. assert partition in self._metadata.partitions_for_topic(topic), 'Unrecognized partition'
  646. return partition
  647. all_partitions = sorted(self._metadata.partitions_for_topic(topic))
  648. available = list(self._metadata.available_partitions_for_topic(topic))
  649. return self.config['partitioner'](serialized_key,
  650. all_partitions,
  651. available)
  652. def metrics(self, raw=False):
  653. """Get metrics on producer performance.
  654. This is ported from the Java Producer, for details see:
  655. https://kafka.apache.org/documentation/#producer_monitoring
  656. Warning:
  657. This is an unstable interface. It may change in future
  658. releases without warning.
  659. """
  660. if raw:
  661. return self._metrics.metrics.copy()
  662. metrics = {}
  663. for k, v in six.iteritems(self._metrics.metrics.copy()):
  664. if k.group not in metrics:
  665. metrics[k.group] = {}
  666. if k.name not in metrics[k.group]:
  667. metrics[k.group][k.name] = {}
  668. metrics[k.group][k.name] = v.value()
  669. return metrics