m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

351 lines
13 KiB

6 months ago
  1. # Human friendly input/output in Python.
  2. #
  3. # Author: Peter Odding <peter@peterodding.com>
  4. # Last Change: June 11, 2021
  5. # URL: https://humanfriendly.readthedocs.io
  6. """
  7. Parsing and reformatting of usage messages.
  8. The :mod:`~humanfriendly.usage` module parses and reformats usage messages:
  9. - The :func:`format_usage()` function takes a usage message and inserts ANSI
  10. escape sequences that highlight items of special significance like command
  11. line options, meta variables, etc. The resulting usage message is (intended
  12. to be) easier to read on a terminal.
  13. - The :func:`render_usage()` function takes a usage message and rewrites it to
  14. reStructuredText_ suitable for inclusion in the documentation of a Python
  15. package. This provides a DRY solution to keeping a single authoritative
  16. definition of the usage message while making it easily available in
  17. documentation. As a cherry on the cake it's not just a pre-formatted dump of
  18. the usage message but a nicely formatted reStructuredText_ fragment.
  19. - The remaining functions in this module support the two functions above.
  20. Usage messages in general are free format of course, however the functions in
  21. this module assume a certain structure from usage messages in order to
  22. successfully parse and reformat them, refer to :func:`parse_usage()` for
  23. details.
  24. .. _DRY: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself
  25. .. _reStructuredText: https://en.wikipedia.org/wiki/ReStructuredText
  26. """
  27. # Standard library modules.
  28. import csv
  29. import functools
  30. import logging
  31. import re
  32. # Standard library module or external dependency (see setup.py).
  33. from importlib import import_module
  34. # Modules included in our package.
  35. from humanfriendly.compat import StringIO
  36. from humanfriendly.text import dedent, split_paragraphs, trim_empty_lines
  37. # Public identifiers that require documentation.
  38. __all__ = (
  39. 'find_meta_variables',
  40. 'format_usage',
  41. 'import_module', # previously exported (backwards compatibility)
  42. 'inject_usage',
  43. 'parse_usage',
  44. 'render_usage',
  45. 'USAGE_MARKER',
  46. )
  47. USAGE_MARKER = "Usage:"
  48. """The string that starts the first line of a usage message."""
  49. START_OF_OPTIONS_MARKER = "Supported options:"
  50. """The string that marks the start of the documented command line options."""
  51. # Compiled regular expression used to tokenize usage messages.
  52. USAGE_PATTERN = re.compile(r'''
  53. # Make sure whatever we're matching isn't preceded by a non-whitespace
  54. # character.
  55. (?<!\S)
  56. (
  57. # A short command line option or a long command line option
  58. # (possibly including a meta variable for a value).
  59. (-\w|--\w+(-\w+)*(=\S+)?)
  60. # Or ...
  61. |
  62. # An environment variable.
  63. \$[A-Za-z_][A-Za-z0-9_]*
  64. # Or ...
  65. |
  66. # Might be a meta variable (usage() will figure it out).
  67. [A-Z][A-Z0-9_]+
  68. )
  69. ''', re.VERBOSE)
  70. # Compiled regular expression used to recognize options.
  71. OPTION_PATTERN = re.compile(r'^(-\w|--\w+(-\w+)*(=\S+)?)$')
  72. # Initialize a logger for this module.
  73. logger = logging.getLogger(__name__)
  74. def format_usage(usage_text):
  75. """
  76. Highlight special items in a usage message.
  77. :param usage_text: The usage message to process (a string).
  78. :returns: The usage message with special items highlighted.
  79. This function highlights the following special items:
  80. - The initial line of the form "Usage: ..."
  81. - Short and long command line options
  82. - Environment variables
  83. - Meta variables (see :func:`find_meta_variables()`)
  84. All items are highlighted in the color defined by
  85. :data:`.HIGHLIGHT_COLOR`.
  86. """
  87. # Ugly workaround to avoid circular import errors due to interdependencies
  88. # between the humanfriendly.terminal and humanfriendly.usage modules.
  89. from humanfriendly.terminal import ansi_wrap, HIGHLIGHT_COLOR
  90. formatted_lines = []
  91. meta_variables = find_meta_variables(usage_text)
  92. for line in usage_text.strip().splitlines(True):
  93. if line.startswith(USAGE_MARKER):
  94. # Highlight the "Usage: ..." line in bold font and color.
  95. formatted_lines.append(ansi_wrap(line, color=HIGHLIGHT_COLOR))
  96. else:
  97. # Highlight options, meta variables and environment variables.
  98. formatted_lines.append(replace_special_tokens(
  99. line, meta_variables,
  100. lambda token: ansi_wrap(token, color=HIGHLIGHT_COLOR),
  101. ))
  102. return ''.join(formatted_lines)
  103. def find_meta_variables(usage_text):
  104. """
  105. Find the meta variables in the given usage message.
  106. :param usage_text: The usage message to parse (a string).
  107. :returns: A list of strings with any meta variables found in the usage
  108. message.
  109. When a command line option requires an argument, the convention is to
  110. format such options as ``--option=ARG``. The text ``ARG`` in this example
  111. is the meta variable.
  112. """
  113. meta_variables = set()
  114. for match in USAGE_PATTERN.finditer(usage_text):
  115. token = match.group(0)
  116. if token.startswith('-'):
  117. option, _, value = token.partition('=')
  118. if value:
  119. meta_variables.add(value)
  120. return list(meta_variables)
  121. def parse_usage(text):
  122. """
  123. Parse a usage message by inferring its structure (and making some assumptions :-).
  124. :param text: The usage message to parse (a string).
  125. :returns: A tuple of two lists:
  126. 1. A list of strings with the paragraphs of the usage message's
  127. "introduction" (the paragraphs before the documentation of the
  128. supported command line options).
  129. 2. A list of strings with pairs of command line options and their
  130. descriptions: Item zero is a line listing a supported command
  131. line option, item one is the description of that command line
  132. option, item two is a line listing another supported command
  133. line option, etc.
  134. Usage messages in general are free format of course, however
  135. :func:`parse_usage()` assume a certain structure from usage messages in
  136. order to successfully parse them:
  137. - The usage message starts with a line ``Usage: ...`` that shows a symbolic
  138. representation of the way the program is to be invoked.
  139. - After some free form text a line ``Supported options:`` (surrounded by
  140. empty lines) precedes the documentation of the supported command line
  141. options.
  142. - The command line options are documented as follows::
  143. -v, --verbose
  144. Make more noise.
  145. So all of the variants of the command line option are shown together on a
  146. separate line, followed by one or more paragraphs describing the option.
  147. - There are several other minor assumptions, but to be honest I'm not sure if
  148. anyone other than me is ever going to use this functionality, so for now I
  149. won't list every intricate detail :-).
  150. If you're curious anyway, refer to the usage message of the `humanfriendly`
  151. package (defined in the :mod:`humanfriendly.cli` module) and compare it with
  152. the usage message you see when you run ``humanfriendly --help`` and the
  153. generated usage message embedded in the readme.
  154. Feel free to request more detailed documentation if you're interested in
  155. using the :mod:`humanfriendly.usage` module outside of the little ecosystem
  156. of Python packages that I have been building over the past years.
  157. """
  158. introduction = []
  159. documented_options = []
  160. # Split the raw usage message into paragraphs.
  161. paragraphs = split_paragraphs(text)
  162. # Get the paragraphs that are part of the introduction.
  163. while paragraphs:
  164. # Check whether we've found the end of the introduction.
  165. end_of_intro = (paragraphs[0] == START_OF_OPTIONS_MARKER)
  166. # Append the current paragraph to the introduction.
  167. introduction.append(paragraphs.pop(0))
  168. # Stop after we've processed the complete introduction.
  169. if end_of_intro:
  170. break
  171. logger.debug("Parsed introduction: %s", introduction)
  172. # Parse the paragraphs that document command line options.
  173. while paragraphs:
  174. documented_options.append(dedent(paragraphs.pop(0)))
  175. description = []
  176. while paragraphs:
  177. # Check if the next paragraph starts the documentation of another
  178. # command line option. We split on a comma followed by a space so
  179. # that our parsing doesn't trip up when the label used for an
  180. # option's value contains commas.
  181. tokens = [t.strip() for t in re.split(r',\s', paragraphs[0]) if t and not t.isspace()]
  182. if all(OPTION_PATTERN.match(t) for t in tokens):
  183. break
  184. else:
  185. description.append(paragraphs.pop(0))
  186. # Join the description's paragraphs back together so we can remove
  187. # common leading indentation.
  188. documented_options.append(dedent('\n\n'.join(description)))
  189. logger.debug("Parsed options: %s", documented_options)
  190. return introduction, documented_options
  191. def render_usage(text):
  192. """
  193. Reformat a command line program's usage message to reStructuredText_.
  194. :param text: The plain text usage message (a string).
  195. :returns: The usage message rendered to reStructuredText_ (a string).
  196. """
  197. meta_variables = find_meta_variables(text)
  198. introduction, options = parse_usage(text)
  199. output = [render_paragraph(p, meta_variables) for p in introduction]
  200. if options:
  201. output.append('\n'.join([
  202. '.. csv-table::',
  203. ' :header: Option, Description',
  204. ' :widths: 30, 70',
  205. '',
  206. ]))
  207. csv_buffer = StringIO()
  208. csv_writer = csv.writer(csv_buffer)
  209. while options:
  210. variants = options.pop(0)
  211. description = options.pop(0)
  212. csv_writer.writerow([
  213. render_paragraph(variants, meta_variables),
  214. ('\n\n'.join(render_paragraph(p, meta_variables) for p in split_paragraphs(description))).rstrip(),
  215. ])
  216. csv_lines = csv_buffer.getvalue().splitlines()
  217. output.append('\n'.join(' %s' % line for line in csv_lines))
  218. logger.debug("Rendered output: %s", output)
  219. return '\n\n'.join(trim_empty_lines(o) for o in output)
  220. def inject_usage(module_name):
  221. """
  222. Use cog_ to inject a usage message into a reStructuredText_ file.
  223. :param module_name: The name of the module whose ``__doc__`` attribute is
  224. the source of the usage message (a string).
  225. This simple wrapper around :func:`render_usage()` makes it very easy to
  226. inject a reformatted usage message into your documentation using cog_. To
  227. use it you add a fragment like the following to your ``*.rst`` file::
  228. .. [[[cog
  229. .. from humanfriendly.usage import inject_usage
  230. .. inject_usage('humanfriendly.cli')
  231. .. ]]]
  232. .. [[[end]]]
  233. The lines in the fragment above are single line reStructuredText_ comments
  234. that are not copied to the output. Their purpose is to instruct cog_ where
  235. to inject the reformatted usage message. Once you've added these lines to
  236. your ``*.rst`` file, updating the rendered usage message becomes really
  237. simple thanks to cog_:
  238. .. code-block:: sh
  239. $ cog.py -r README.rst
  240. This will inject or replace the rendered usage message in your
  241. ``README.rst`` file with an up to date copy.
  242. .. _cog: http://nedbatchelder.com/code/cog/
  243. """
  244. import cog
  245. usage_text = import_module(module_name).__doc__
  246. cog.out("\n" + render_usage(usage_text) + "\n\n")
  247. def render_paragraph(paragraph, meta_variables):
  248. # Reformat the "Usage:" line to highlight "Usage:" in bold and show the
  249. # remainder of the line as pre-formatted text.
  250. if paragraph.startswith(USAGE_MARKER):
  251. tokens = paragraph.split()
  252. return "**%s** `%s`" % (tokens[0], ' '.join(tokens[1:]))
  253. # Reformat the "Supported options:" line to highlight it in bold.
  254. if paragraph == 'Supported options:':
  255. return "**%s**" % paragraph
  256. # Reformat shell transcripts into code blocks.
  257. if re.match(r'^\s*\$\s+\S', paragraph):
  258. # Split the paragraph into lines.
  259. lines = paragraph.splitlines()
  260. # Check if the paragraph is already indented.
  261. if not paragraph[0].isspace():
  262. # If the paragraph isn't already indented we'll indent it now.
  263. lines = [' %s' % line for line in lines]
  264. lines.insert(0, '.. code-block:: sh')
  265. lines.insert(1, '')
  266. return "\n".join(lines)
  267. # The following reformatting applies only to paragraphs which are not
  268. # indented. Yes this is a hack - for now we assume that indented paragraphs
  269. # are code blocks, even though this assumption can be wrong.
  270. if not paragraph[0].isspace():
  271. # Change UNIX style `quoting' so it doesn't trip up DocUtils.
  272. paragraph = re.sub("`(.+?)'", r'"\1"', paragraph)
  273. # Escape asterisks.
  274. paragraph = paragraph.replace('*', r'\*')
  275. # Reformat inline tokens.
  276. paragraph = replace_special_tokens(
  277. paragraph, meta_variables,
  278. lambda token: '``%s``' % token,
  279. )
  280. return paragraph
  281. def replace_special_tokens(text, meta_variables, replace_fn):
  282. return USAGE_PATTERN.sub(functools.partial(
  283. replace_tokens_callback,
  284. meta_variables=meta_variables,
  285. replace_fn=replace_fn
  286. ), text)
  287. def replace_tokens_callback(match, meta_variables, replace_fn):
  288. token = match.group(0)
  289. if not (re.match('^[A-Z][A-Z0-9_]+$', token) and token not in meta_variables):
  290. token = replace_fn(token)
  291. return token