图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

189 lines
5.6 KiB

  1. import redis
  2. from ...asyncio.client import Pipeline as AsyncioPipeline
  3. from .commands import (
  4. AGGREGATE_CMD,
  5. CONFIG_CMD,
  6. INFO_CMD,
  7. PROFILE_CMD,
  8. SEARCH_CMD,
  9. SPELLCHECK_CMD,
  10. SYNDUMP_CMD,
  11. AsyncSearchCommands,
  12. SearchCommands,
  13. )
  14. class Search(SearchCommands):
  15. """
  16. Create a client for talking to search.
  17. It abstracts the API of the module and lets you just use the engine.
  18. """
  19. class BatchIndexer:
  20. """
  21. A batch indexer allows you to automatically batch
  22. document indexing in pipelines, flushing it every N documents.
  23. """
  24. def __init__(self, client, chunk_size=1000):
  25. self.client = client
  26. self.execute_command = client.execute_command
  27. self._pipeline = client.pipeline(transaction=False, shard_hint=None)
  28. self.total = 0
  29. self.chunk_size = chunk_size
  30. self.current_chunk = 0
  31. def __del__(self):
  32. if self.current_chunk:
  33. self.commit()
  34. def add_document(
  35. self,
  36. doc_id,
  37. nosave=False,
  38. score=1.0,
  39. payload=None,
  40. replace=False,
  41. partial=False,
  42. no_create=False,
  43. **fields,
  44. ):
  45. """
  46. Add a document to the batch query
  47. """
  48. self.client._add_document(
  49. doc_id,
  50. conn=self._pipeline,
  51. nosave=nosave,
  52. score=score,
  53. payload=payload,
  54. replace=replace,
  55. partial=partial,
  56. no_create=no_create,
  57. **fields,
  58. )
  59. self.current_chunk += 1
  60. self.total += 1
  61. if self.current_chunk >= self.chunk_size:
  62. self.commit()
  63. def add_document_hash(self, doc_id, score=1.0, replace=False):
  64. """
  65. Add a hash to the batch query
  66. """
  67. self.client._add_document_hash(
  68. doc_id, conn=self._pipeline, score=score, replace=replace
  69. )
  70. self.current_chunk += 1
  71. self.total += 1
  72. if self.current_chunk >= self.chunk_size:
  73. self.commit()
  74. def commit(self):
  75. """
  76. Manually commit and flush the batch indexing query
  77. """
  78. self._pipeline.execute()
  79. self.current_chunk = 0
  80. def __init__(self, client, index_name="idx"):
  81. """
  82. Create a new Client for the given index_name.
  83. The default name is `idx`
  84. If conn is not None, we employ an already existing redis connection
  85. """
  86. self._MODULE_CALLBACKS = {}
  87. self.client = client
  88. self.index_name = index_name
  89. self.execute_command = client.execute_command
  90. self._pipeline = client.pipeline
  91. self._RESP2_MODULE_CALLBACKS = {
  92. INFO_CMD: self._parse_info,
  93. SEARCH_CMD: self._parse_search,
  94. AGGREGATE_CMD: self._parse_aggregate,
  95. PROFILE_CMD: self._parse_profile,
  96. SPELLCHECK_CMD: self._parse_spellcheck,
  97. CONFIG_CMD: self._parse_config_get,
  98. SYNDUMP_CMD: self._parse_syndump,
  99. }
  100. def pipeline(self, transaction=True, shard_hint=None):
  101. """Creates a pipeline for the SEARCH module, that can be used for executing
  102. SEARCH commands, as well as classic core commands.
  103. """
  104. p = Pipeline(
  105. connection_pool=self.client.connection_pool,
  106. response_callbacks=self._MODULE_CALLBACKS,
  107. transaction=transaction,
  108. shard_hint=shard_hint,
  109. )
  110. p.index_name = self.index_name
  111. return p
  112. class AsyncSearch(Search, AsyncSearchCommands):
  113. class BatchIndexer(Search.BatchIndexer):
  114. """
  115. A batch indexer allows you to automatically batch
  116. document indexing in pipelines, flushing it every N documents.
  117. """
  118. async def add_document(
  119. self,
  120. doc_id,
  121. nosave=False,
  122. score=1.0,
  123. payload=None,
  124. replace=False,
  125. partial=False,
  126. no_create=False,
  127. **fields,
  128. ):
  129. """
  130. Add a document to the batch query
  131. """
  132. self.client._add_document(
  133. doc_id,
  134. conn=self._pipeline,
  135. nosave=nosave,
  136. score=score,
  137. payload=payload,
  138. replace=replace,
  139. partial=partial,
  140. no_create=no_create,
  141. **fields,
  142. )
  143. self.current_chunk += 1
  144. self.total += 1
  145. if self.current_chunk >= self.chunk_size:
  146. await self.commit()
  147. async def commit(self):
  148. """
  149. Manually commit and flush the batch indexing query
  150. """
  151. await self._pipeline.execute()
  152. self.current_chunk = 0
  153. def pipeline(self, transaction=True, shard_hint=None):
  154. """Creates a pipeline for the SEARCH module, that can be used for executing
  155. SEARCH commands, as well as classic core commands.
  156. """
  157. p = AsyncPipeline(
  158. connection_pool=self.client.connection_pool,
  159. response_callbacks=self._MODULE_CALLBACKS,
  160. transaction=transaction,
  161. shard_hint=shard_hint,
  162. )
  163. p.index_name = self.index_name
  164. return p
  165. class Pipeline(SearchCommands, redis.client.Pipeline):
  166. """Pipeline for the module."""
  167. class AsyncPipeline(AsyncSearchCommands, AsyncioPipeline, Pipeline):
  168. """AsyncPipeline for the module."""