图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

285 lines
8.5 KiB

  1. import logging
  2. import mimetypes
  3. import os
  4. from collections import defaultdict
  5. from typing import Callable, Dict, Iterable, List, Optional, Tuple
  6. from pip._vendor.packaging.utils import (
  7. InvalidSdistFilename,
  8. InvalidVersion,
  9. InvalidWheelFilename,
  10. canonicalize_name,
  11. parse_sdist_filename,
  12. parse_wheel_filename,
  13. )
  14. from pip._internal.models.candidate import InstallationCandidate
  15. from pip._internal.models.link import Link
  16. from pip._internal.utils.urls import path_to_url, url_to_path
  17. from pip._internal.vcs import is_url
  18. logger = logging.getLogger(__name__)
  19. FoundCandidates = Iterable[InstallationCandidate]
  20. FoundLinks = Iterable[Link]
  21. CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
  22. PageValidator = Callable[[Link], bool]
  23. class LinkSource:
  24. @property
  25. def link(self) -> Optional[Link]:
  26. """Returns the underlying link, if there's one."""
  27. raise NotImplementedError()
  28. def page_candidates(self) -> FoundCandidates:
  29. """Candidates found by parsing an archive listing HTML file."""
  30. raise NotImplementedError()
  31. def file_links(self) -> FoundLinks:
  32. """Links found by specifying archives directly."""
  33. raise NotImplementedError()
  34. def _is_html_file(file_url: str) -> bool:
  35. return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
  36. class _FlatDirectoryToUrls:
  37. """Scans directory and caches results"""
  38. def __init__(self, path: str) -> None:
  39. self._path = path
  40. self._page_candidates: List[str] = []
  41. self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
  42. self._scanned_directory = False
  43. def _scan_directory(self) -> None:
  44. """Scans directory once and populates both page_candidates
  45. and project_name_to_urls at the same time
  46. """
  47. for entry in os.scandir(self._path):
  48. url = path_to_url(entry.path)
  49. if _is_html_file(url):
  50. self._page_candidates.append(url)
  51. continue
  52. # File must have a valid wheel or sdist name,
  53. # otherwise not worth considering as a package
  54. try:
  55. project_filename = parse_wheel_filename(entry.name)[0]
  56. except (InvalidWheelFilename, InvalidVersion):
  57. try:
  58. project_filename = parse_sdist_filename(entry.name)[0]
  59. except (InvalidSdistFilename, InvalidVersion):
  60. continue
  61. self._project_name_to_urls[project_filename].append(url)
  62. self._scanned_directory = True
  63. @property
  64. def page_candidates(self) -> List[str]:
  65. if not self._scanned_directory:
  66. self._scan_directory()
  67. return self._page_candidates
  68. @property
  69. def project_name_to_urls(self) -> Dict[str, List[str]]:
  70. if not self._scanned_directory:
  71. self._scan_directory()
  72. return self._project_name_to_urls
  73. class _FlatDirectorySource(LinkSource):
  74. """Link source specified by ``--find-links=<path-to-dir>``.
  75. This looks the content of the directory, and returns:
  76. * ``page_candidates``: Links listed on each HTML file in the directory.
  77. * ``file_candidates``: Archives in the directory.
  78. """
  79. _paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
  80. def __init__(
  81. self,
  82. candidates_from_page: CandidatesFromPage,
  83. path: str,
  84. project_name: str,
  85. ) -> None:
  86. self._candidates_from_page = candidates_from_page
  87. self._project_name = canonicalize_name(project_name)
  88. # Get existing instance of _FlatDirectoryToUrls if it exists
  89. if path in self._paths_to_urls:
  90. self._path_to_urls = self._paths_to_urls[path]
  91. else:
  92. self._path_to_urls = _FlatDirectoryToUrls(path=path)
  93. self._paths_to_urls[path] = self._path_to_urls
  94. @property
  95. def link(self) -> Optional[Link]:
  96. return None
  97. def page_candidates(self) -> FoundCandidates:
  98. for url in self._path_to_urls.page_candidates:
  99. yield from self._candidates_from_page(Link(url))
  100. def file_links(self) -> FoundLinks:
  101. for url in self._path_to_urls.project_name_to_urls[self._project_name]:
  102. yield Link(url)
  103. class _LocalFileSource(LinkSource):
  104. """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
  105. If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
  106. the option, it is converted to a URL first. This returns:
  107. * ``page_candidates``: Links listed on an HTML file.
  108. * ``file_candidates``: The non-HTML file.
  109. """
  110. def __init__(
  111. self,
  112. candidates_from_page: CandidatesFromPage,
  113. link: Link,
  114. ) -> None:
  115. self._candidates_from_page = candidates_from_page
  116. self._link = link
  117. @property
  118. def link(self) -> Optional[Link]:
  119. return self._link
  120. def page_candidates(self) -> FoundCandidates:
  121. if not _is_html_file(self._link.url):
  122. return
  123. yield from self._candidates_from_page(self._link)
  124. def file_links(self) -> FoundLinks:
  125. if _is_html_file(self._link.url):
  126. return
  127. yield self._link
  128. class _RemoteFileSource(LinkSource):
  129. """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
  130. This returns:
  131. * ``page_candidates``: Links listed on an HTML file.
  132. * ``file_candidates``: The non-HTML file.
  133. """
  134. def __init__(
  135. self,
  136. candidates_from_page: CandidatesFromPage,
  137. page_validator: PageValidator,
  138. link: Link,
  139. ) -> None:
  140. self._candidates_from_page = candidates_from_page
  141. self._page_validator = page_validator
  142. self._link = link
  143. @property
  144. def link(self) -> Optional[Link]:
  145. return self._link
  146. def page_candidates(self) -> FoundCandidates:
  147. if not self._page_validator(self._link):
  148. return
  149. yield from self._candidates_from_page(self._link)
  150. def file_links(self) -> FoundLinks:
  151. yield self._link
  152. class _IndexDirectorySource(LinkSource):
  153. """``--[extra-]index-url=<path-to-directory>``.
  154. This is treated like a remote URL; ``candidates_from_page`` contains logic
  155. for this by appending ``index.html`` to the link.
  156. """
  157. def __init__(
  158. self,
  159. candidates_from_page: CandidatesFromPage,
  160. link: Link,
  161. ) -> None:
  162. self._candidates_from_page = candidates_from_page
  163. self._link = link
  164. @property
  165. def link(self) -> Optional[Link]:
  166. return self._link
  167. def page_candidates(self) -> FoundCandidates:
  168. yield from self._candidates_from_page(self._link)
  169. def file_links(self) -> FoundLinks:
  170. return ()
  171. def build_source(
  172. location: str,
  173. *,
  174. candidates_from_page: CandidatesFromPage,
  175. page_validator: PageValidator,
  176. expand_dir: bool,
  177. cache_link_parsing: bool,
  178. project_name: str,
  179. ) -> Tuple[Optional[str], Optional[LinkSource]]:
  180. path: Optional[str] = None
  181. url: Optional[str] = None
  182. if os.path.exists(location): # Is a local path.
  183. url = path_to_url(location)
  184. path = location
  185. elif location.startswith("file:"): # A file: URL.
  186. url = location
  187. path = url_to_path(location)
  188. elif is_url(location):
  189. url = location
  190. if url is None:
  191. msg = (
  192. "Location '%s' is ignored: "
  193. "it is either a non-existing path or lacks a specific scheme."
  194. )
  195. logger.warning(msg, location)
  196. return (None, None)
  197. if path is None:
  198. source: LinkSource = _RemoteFileSource(
  199. candidates_from_page=candidates_from_page,
  200. page_validator=page_validator,
  201. link=Link(url, cache_link_parsing=cache_link_parsing),
  202. )
  203. return (url, source)
  204. if os.path.isdir(path):
  205. if expand_dir:
  206. source = _FlatDirectorySource(
  207. candidates_from_page=candidates_from_page,
  208. path=path,
  209. project_name=project_name,
  210. )
  211. else:
  212. source = _IndexDirectorySource(
  213. candidates_from_page=candidates_from_page,
  214. link=Link(url, cache_link_parsing=cache_link_parsing),
  215. )
  216. return (url, source)
  217. elif os.path.isfile(path):
  218. source = _LocalFileSource(
  219. candidates_from_page=candidates_from_page,
  220. link=Link(url, cache_link_parsing=cache_link_parsing),
  221. )
  222. return (url, source)
  223. logger.warning(
  224. "Location '%s' is ignored: it is neither a file nor a directory.",
  225. location,
  226. )
  227. return (url, None)