m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
4.2 KiB

6 months ago
  1. # SPDX-FileCopyrightText: 2015 Eric Larson
  2. #
  3. # SPDX-License-Identifier: Apache-2.0
  4. from __future__ import annotations
  5. import mmap
  6. from tempfile import NamedTemporaryFile
  7. from typing import TYPE_CHECKING, Any, Callable
  8. if TYPE_CHECKING:
  9. from http.client import HTTPResponse
  10. class CallbackFileWrapper:
  11. """
  12. Small wrapper around a fp object which will tee everything read into a
  13. buffer, and when that file is closed it will execute a callback with the
  14. contents of that buffer.
  15. All attributes are proxied to the underlying file object.
  16. This class uses members with a double underscore (__) leading prefix so as
  17. not to accidentally shadow an attribute.
  18. The data is stored in a temporary file until it is all available. As long
  19. as the temporary files directory is disk-based (sometimes it's a
  20. memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
  21. pressure is high. For small files the disk usually won't be used at all,
  22. it'll all be in the filesystem memory cache, so there should be no
  23. performance impact.
  24. """
  25. def __init__(
  26. self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
  27. ) -> None:
  28. self.__buf = NamedTemporaryFile("rb+", delete=True)
  29. self.__fp = fp
  30. self.__callback = callback
  31. def __getattr__(self, name: str) -> Any:
  32. # The vaguaries of garbage collection means that self.__fp is
  33. # not always set. By using __getattribute__ and the private
  34. # name[0] allows looking up the attribute value and raising an
  35. # AttributeError when it doesn't exist. This stop thigns from
  36. # infinitely recursing calls to getattr in the case where
  37. # self.__fp hasn't been set.
  38. #
  39. # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
  40. fp = self.__getattribute__("_CallbackFileWrapper__fp")
  41. return getattr(fp, name)
  42. def __is_fp_closed(self) -> bool:
  43. try:
  44. return self.__fp.fp is None
  45. except AttributeError:
  46. pass
  47. try:
  48. closed: bool = self.__fp.closed
  49. return closed
  50. except AttributeError:
  51. pass
  52. # We just don't cache it then.
  53. # TODO: Add some logging here...
  54. return False
  55. def _close(self) -> None:
  56. if self.__callback:
  57. if self.__buf.tell() == 0:
  58. # Empty file:
  59. result = b""
  60. else:
  61. # Return the data without actually loading it into memory,
  62. # relying on Python's buffer API and mmap(). mmap() just gives
  63. # a view directly into the filesystem's memory cache, so it
  64. # doesn't result in duplicate memory use.
  65. self.__buf.seek(0, 0)
  66. result = memoryview(
  67. mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
  68. )
  69. self.__callback(result)
  70. # We assign this to None here, because otherwise we can get into
  71. # really tricky problems where the CPython interpreter dead locks
  72. # because the callback is holding a reference to something which
  73. # has a __del__ method. Setting this to None breaks the cycle
  74. # and allows the garbage collector to do it's thing normally.
  75. self.__callback = None
  76. # Closing the temporary file releases memory and frees disk space.
  77. # Important when caching big files.
  78. self.__buf.close()
  79. def read(self, amt: int | None = None) -> bytes:
  80. data: bytes = self.__fp.read(amt)
  81. if data:
  82. # We may be dealing with b'', a sign that things are over:
  83. # it's passed e.g. after we've already closed self.__buf.
  84. self.__buf.write(data)
  85. if self.__is_fp_closed():
  86. self._close()
  87. return data
  88. def _safe_read(self, amt: int) -> bytes:
  89. data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
  90. if amt == 2 and data == b"\r\n":
  91. # urllib executes this read to toss the CRLF at the end
  92. # of the chunk.
  93. return data
  94. self.__buf.write(data)
  95. if self.__is_fp_closed():
  96. self._close()
  97. return data