m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

285 lines
8.5 KiB

6 months ago
  1. import logging
  2. import mimetypes
  3. import os
  4. from collections import defaultdict
  5. from typing import Callable, Dict, Iterable, List, Optional, Tuple
  6. from pip._vendor.packaging.utils import (
  7. InvalidSdistFilename,
  8. InvalidVersion,
  9. InvalidWheelFilename,
  10. canonicalize_name,
  11. parse_sdist_filename,
  12. parse_wheel_filename,
  13. )
  14. from pip._internal.models.candidate import InstallationCandidate
  15. from pip._internal.models.link import Link
  16. from pip._internal.utils.urls import path_to_url, url_to_path
  17. from pip._internal.vcs import is_url
  18. logger = logging.getLogger(__name__)
  19. FoundCandidates = Iterable[InstallationCandidate]
  20. FoundLinks = Iterable[Link]
  21. CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
  22. PageValidator = Callable[[Link], bool]
  23. class LinkSource:
  24. @property
  25. def link(self) -> Optional[Link]:
  26. """Returns the underlying link, if there's one."""
  27. raise NotImplementedError()
  28. def page_candidates(self) -> FoundCandidates:
  29. """Candidates found by parsing an archive listing HTML file."""
  30. raise NotImplementedError()
  31. def file_links(self) -> FoundLinks:
  32. """Links found by specifying archives directly."""
  33. raise NotImplementedError()
  34. def _is_html_file(file_url: str) -> bool:
  35. return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
  36. class _FlatDirectoryToUrls:
  37. """Scans directory and caches results"""
  38. def __init__(self, path: str) -> None:
  39. self._path = path
  40. self._page_candidates: List[str] = []
  41. self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
  42. self._scanned_directory = False
  43. def _scan_directory(self) -> None:
  44. """Scans directory once and populates both page_candidates
  45. and project_name_to_urls at the same time
  46. """
  47. for entry in os.scandir(self._path):
  48. url = path_to_url(entry.path)
  49. if _is_html_file(url):
  50. self._page_candidates.append(url)
  51. continue
  52. # File must have a valid wheel or sdist name,
  53. # otherwise not worth considering as a package
  54. try:
  55. project_filename = parse_wheel_filename(entry.name)[0]
  56. except (InvalidWheelFilename, InvalidVersion):
  57. try:
  58. project_filename = parse_sdist_filename(entry.name)[0]
  59. except (InvalidSdistFilename, InvalidVersion):
  60. continue
  61. self._project_name_to_urls[project_filename].append(url)
  62. self._scanned_directory = True
  63. @property
  64. def page_candidates(self) -> List[str]:
  65. if not self._scanned_directory:
  66. self._scan_directory()
  67. return self._page_candidates
  68. @property
  69. def project_name_to_urls(self) -> Dict[str, List[str]]:
  70. if not self._scanned_directory:
  71. self._scan_directory()
  72. return self._project_name_to_urls
  73. class _FlatDirectorySource(LinkSource):
  74. """Link source specified by ``--find-links=<path-to-dir>``.
  75. This looks the content of the directory, and returns:
  76. * ``page_candidates``: Links listed on each HTML file in the directory.
  77. * ``file_candidates``: Archives in the directory.
  78. """
  79. _paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
  80. def __init__(
  81. self,
  82. candidates_from_page: CandidatesFromPage,
  83. path: str,
  84. project_name: str,
  85. ) -> None:
  86. self._candidates_from_page = candidates_from_page
  87. self._project_name = canonicalize_name(project_name)
  88. # Get existing instance of _FlatDirectoryToUrls if it exists
  89. if path in self._paths_to_urls:
  90. self._path_to_urls = self._paths_to_urls[path]
  91. else:
  92. self._path_to_urls = _FlatDirectoryToUrls(path=path)
  93. self._paths_to_urls[path] = self._path_to_urls
  94. @property
  95. def link(self) -> Optional[Link]:
  96. return None
  97. def page_candidates(self) -> FoundCandidates:
  98. for url in self._path_to_urls.page_candidates:
  99. yield from self._candidates_from_page(Link(url))
  100. def file_links(self) -> FoundLinks:
  101. for url in self._path_to_urls.project_name_to_urls[self._project_name]:
  102. yield Link(url)
  103. class _LocalFileSource(LinkSource):
  104. """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
  105. If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
  106. the option, it is converted to a URL first. This returns:
  107. * ``page_candidates``: Links listed on an HTML file.
  108. * ``file_candidates``: The non-HTML file.
  109. """
  110. def __init__(
  111. self,
  112. candidates_from_page: CandidatesFromPage,
  113. link: Link,
  114. ) -> None:
  115. self._candidates_from_page = candidates_from_page
  116. self._link = link
  117. @property
  118. def link(self) -> Optional[Link]:
  119. return self._link
  120. def page_candidates(self) -> FoundCandidates:
  121. if not _is_html_file(self._link.url):
  122. return
  123. yield from self._candidates_from_page(self._link)
  124. def file_links(self) -> FoundLinks:
  125. if _is_html_file(self._link.url):
  126. return
  127. yield self._link
  128. class _RemoteFileSource(LinkSource):
  129. """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
  130. This returns:
  131. * ``page_candidates``: Links listed on an HTML file.
  132. * ``file_candidates``: The non-HTML file.
  133. """
  134. def __init__(
  135. self,
  136. candidates_from_page: CandidatesFromPage,
  137. page_validator: PageValidator,
  138. link: Link,
  139. ) -> None:
  140. self._candidates_from_page = candidates_from_page
  141. self._page_validator = page_validator
  142. self._link = link
  143. @property
  144. def link(self) -> Optional[Link]:
  145. return self._link
  146. def page_candidates(self) -> FoundCandidates:
  147. if not self._page_validator(self._link):
  148. return
  149. yield from self._candidates_from_page(self._link)
  150. def file_links(self) -> FoundLinks:
  151. yield self._link
  152. class _IndexDirectorySource(LinkSource):
  153. """``--[extra-]index-url=<path-to-directory>``.
  154. This is treated like a remote URL; ``candidates_from_page`` contains logic
  155. for this by appending ``index.html`` to the link.
  156. """
  157. def __init__(
  158. self,
  159. candidates_from_page: CandidatesFromPage,
  160. link: Link,
  161. ) -> None:
  162. self._candidates_from_page = candidates_from_page
  163. self._link = link
  164. @property
  165. def link(self) -> Optional[Link]:
  166. return self._link
  167. def page_candidates(self) -> FoundCandidates:
  168. yield from self._candidates_from_page(self._link)
  169. def file_links(self) -> FoundLinks:
  170. return ()
  171. def build_source(
  172. location: str,
  173. *,
  174. candidates_from_page: CandidatesFromPage,
  175. page_validator: PageValidator,
  176. expand_dir: bool,
  177. cache_link_parsing: bool,
  178. project_name: str,
  179. ) -> Tuple[Optional[str], Optional[LinkSource]]:
  180. path: Optional[str] = None
  181. url: Optional[str] = None
  182. if os.path.exists(location): # Is a local path.
  183. url = path_to_url(location)
  184. path = location
  185. elif location.startswith("file:"): # A file: URL.
  186. url = location
  187. path = url_to_path(location)
  188. elif is_url(location):
  189. url = location
  190. if url is None:
  191. msg = (
  192. "Location '%s' is ignored: "
  193. "it is either a non-existing path or lacks a specific scheme."
  194. )
  195. logger.warning(msg, location)
  196. return (None, None)
  197. if path is None:
  198. source: LinkSource = _RemoteFileSource(
  199. candidates_from_page=candidates_from_page,
  200. page_validator=page_validator,
  201. link=Link(url, cache_link_parsing=cache_link_parsing),
  202. )
  203. return (url, source)
  204. if os.path.isdir(path):
  205. if expand_dir:
  206. source = _FlatDirectorySource(
  207. candidates_from_page=candidates_from_page,
  208. path=path,
  209. project_name=project_name,
  210. )
  211. else:
  212. source = _IndexDirectorySource(
  213. candidates_from_page=candidates_from_page,
  214. link=Link(url, cache_link_parsing=cache_link_parsing),
  215. )
  216. return (url, source)
  217. elif os.path.isfile(path):
  218. source = _LocalFileSource(
  219. candidates_from_page=candidates_from_page,
  220. link=Link(url, cache_link_parsing=cache_link_parsing),
  221. )
  222. return (url, source)
  223. logger.warning(
  224. "Location '%s' is ignored: it is neither a file nor a directory.",
  225. location,
  226. )
  227. return (url, None)