nixos/make-options-doc: split docbook conversion from mergeJSON

this restores mergeJSON to its former glory if…merging json, and extracts the MD rendering into a new script that will run instead of the py+nix+xslt pipeline we previously ran to convert options.json to docbook. this change alone gives a noticable performance boost when building docs (18s instead of 27s to build optionsDocBook). no changes to rendered output, except for a single example in the rsnapshot module that uses hard tabs for indentation instead of spaces. this probably isn't important. docbook warnings remain with mergeJSON since the other processing steps output single files instead of directories. since we'll only keep the check until 23.11 this is probably also not important to fix. also contains a few improvements to error reporting in the MD renderers.
2025-06-10 03:23:29 +03:00 · 2023-01-23 18:37:33 +01:00 · 2023-01-23 18:37:33 +01:00 · 0175a91aa3
commit 0175a91aa3
parent 381dcd7f9d
6 changed files with 383 additions and 500 deletions
--- a/nixos/lib/make-options-doc/mergeJSON.py
+++ b/nixos/lib/make-options-doc/mergeJSON.py
@ -3,19 +3,6 @@ import json
 import os
 import sys
 from typing import Any, Dict, List
-from collections.abc import MutableMapping, Sequence
-import inspect
-
-# for MD conversion
-import markdown_it
-import markdown_it.renderer
-from markdown_it.token import Token
-from markdown_it.utils import OptionsDict
-from mdit_py_plugins.container import container_plugin
-from mdit_py_plugins.deflist import deflist_plugin
-from mdit_py_plugins.myst_role import myst_role_plugin
-import re
-from xml.sax.saxutils import escape, quoteattr

 JSON = Dict[str, Any]

@ -55,236 +42,9 @@ def unpivot(options: Dict[Key, Option]) -> Dict[str, JSON]:
        result[opt.name] = opt.value
    return result

-manpage_urls = json.load(open(os.getenv('MANPAGE_URLS')))
-
-class Renderer(markdown_it.renderer.RendererProtocol):
-    __output__ = "docbook"
-    def __init__(self, parser=None):
-        self.rules = {
-            k: v
-            for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
-            if not (k.startswith("render") or k.startswith("_"))
-        } | {
-            "container_{.note}_open": self._note_open,
-            "container_{.note}_close": self._note_close,
-            "container_{.important}_open": self._important_open,
-            "container_{.important}_close": self._important_close,
-            "container_{.warning}_open": self._warning_open,
-            "container_{.warning}_close": self._warning_close,
-        }
-    def render(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str:
-        assert '-link-tag-stack' not in env
-        env['-link-tag-stack'] = []
-        assert '-deflist-stack' not in env
-        env['-deflist-stack'] = []
-        def do_one(i, token):
-            if token.type == "inline":
-                assert token.children is not None
-                return self.renderInline(token.children, options, env)
-            elif token.type in self.rules:
-                return self.rules[token.type](tokens[i], tokens, i, options, env)
-            else:
-                raise NotImplementedError("md token not supported yet", token)
-        return "".join(map(lambda arg: do_one(*arg), enumerate(tokens)))
-    def renderInline(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str:
-        # HACK to support docbook links and xrefs. link handling is only necessary because the docbook
-        # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
-        for i, token in enumerate(tokens):
-            if token.type != 'link_open':
-                continue
-            token.tag = 'link'
-            # turn [](#foo) into xrefs
-            if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close':
-                token.tag = "xref"
-            # turn <x> into links without contents
-            if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
-                tokens[i + 1].content = ''
-
-        def do_one(i, token):
-            if token.type in self.rules:
-                return self.rules[token.type](tokens[i], tokens, i, options, env)
-            else:
-                raise NotImplementedError("md node not supported yet", token)
-        return "".join(map(lambda arg: do_one(*arg), enumerate(tokens)))
-
-    def text(self, token, tokens, i, options, env):
-        return escape(token.content)
-    def paragraph_open(self, token, tokens, i, options, env):
-        return "<para>"
-    def paragraph_close(self, token, tokens, i, options, env):
-        return "</para>"
-    def hardbreak(self, token, tokens, i, options, env):
-        return "<literallayout>\n</literallayout>"
-    def softbreak(self, token, tokens, i, options, env):
-        # should check options.breaks() and emit hard break if so
-        return "\n"
-    def code_inline(self, token, tokens, i, options, env):
-        return f"<literal>{escape(token.content)}</literal>"
-    def code_block(self, token, tokens, i, options, env):
-        return f"<programlisting>{escape(token.content)}</programlisting>"
-    def link_open(self, token, tokens, i, options, env):
-        env['-link-tag-stack'].append(token.tag)
-        (attr, start) = ('linkend', 1) if token.attrs['href'][0] == '#' else ('xlink:href', 0)
-        return f"<{token.tag} {attr}={quoteattr(token.attrs['href'][start:])}>"
-    def link_close(self, token, tokens, i, options, env):
-        return f"</{env['-link-tag-stack'].pop()}>"
-    def list_item_open(self, token, tokens, i, options, env):
-        return "<listitem>"
-    def list_item_close(self, token, tokens, i, options, env):
-        return "</listitem>\n"
-    # HACK open and close para for docbook change size. remove soon.
-    def bullet_list_open(self, token, tokens, i, options, env):
-        return "<para><itemizedlist>\n"
-    def bullet_list_close(self, token, tokens, i, options, env):
-        return "\n</itemizedlist></para>"
-    def em_open(self, token, tokens, i, options, env):
-        return "<emphasis>"
-    def em_close(self, token, tokens, i, options, env):
-        return "</emphasis>"
-    def strong_open(self, token, tokens, i, options, env):
-        return "<emphasis role=\"strong\">"
-    def strong_close(self, token, tokens, i, options, env):
-        return "</emphasis>"
-    def fence(self, token, tokens, i, options, env):
-        info = f" language={quoteattr(token.info)}" if token.info != "" else ""
-        return f"<programlisting{info}>{escape(token.content)}</programlisting>"
-    def blockquote_open(self, token, tokens, i, options, env):
-        return "<para><blockquote>"
-    def blockquote_close(self, token, tokens, i, options, env):
-        return "</blockquote></para>"
-    def _note_open(self, token, tokens, i, options, env):
-        return "<para><note>"
-    def _note_close(self, token, tokens, i, options, env):
-        return "</note></para>"
-    def _important_open(self, token, tokens, i, options, env):
-        return "<para><important>"
-    def _important_close(self, token, tokens, i, options, env):
-        return "</important></para>"
-    def _warning_open(self, token, tokens, i, options, env):
-        return "<para><warning>"
-    def _warning_close(self, token, tokens, i, options, env):
-        return "</warning></para>"
-    # markdown-it emits tokens based on the html syntax tree, but docbook is
-    # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
-    # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
-    # we have to reject multiple definitions for the same term for time being.
-    def dl_open(self, token, tokens, i, options, env):
-        env['-deflist-stack'].append({})
-        return "<para><variablelist>"
-    def dl_close(self, token, tokens, i, options, env):
-        env['-deflist-stack'].pop()
-        return "</variablelist></para>"
-    def dt_open(self, token, tokens, i, options, env):
-        env['-deflist-stack'][-1]['has-dd'] = False
-        return "<varlistentry><term>"
-    def dt_close(self, token, tokens, i, options, env):
-        return "</term>"
-    def dd_open(self, token, tokens, i, options, env):
-        if env['-deflist-stack'][-1]['has-dd']:
-            raise Exception("multiple definitions per term not supported")
-        env['-deflist-stack'][-1]['has-dd'] = True
-        return "<listitem>"
-    def dd_close(self, token, tokens, i, options, env):
-        return "</listitem></varlistentry>"
-    def myst_role(self, token, tokens, i, options, env):
-        if token.meta['name'] == 'command':
-            return f"<command>{escape(token.content)}</command>"
-        if token.meta['name'] == 'file':
-            return f"<filename>{escape(token.content)}</filename>"
-        if token.meta['name'] == 'var':
-            return f"<varname>{escape(token.content)}</varname>"
-        if token.meta['name'] == 'env':
-            return f"<envar>{escape(token.content)}</envar>"
-        if token.meta['name'] == 'option':
-            return f"<option>{escape(token.content)}</option>"
-        if token.meta['name'] == 'manpage':
-            [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
-            section = section[:-1]
-            man = f"{page}({section})"
-            title = f"<refentrytitle>{escape(page)}</refentrytitle>"
-            vol = f"<manvolnum>{escape(section)}</manvolnum>"
-            ref = f"<citerefentry>{title}{vol}</citerefentry>"
-            if man in manpage_urls:
-                return f"<link xlink:href={quoteattr(manpage_urls[man])}>{ref}</link>"
-            else:
-                return ref
-        raise NotImplementedError("md node not supported yet", token)
-
-md = (
-    markdown_it.MarkdownIt(renderer_cls=Renderer)
-    # TODO maybe fork the plugin and have only a single rule for all?
-    .use(container_plugin, name="{.note}")
-    .use(container_plugin, name="{.important}")
-    .use(container_plugin, name="{.warning}")
-    .use(deflist_plugin)
-    .use(myst_role_plugin)
-)
-
-# converts in-place!
-def convertMD(options: Dict[str, Any]) -> str:
-    def convertString(path: str, text: str) -> str:
-        try:
-            rendered = md.render(text)
-            return rendered
-        except:
-            print(f"error in {path}")
-            raise
-
-    def optionIs(option: Dict[str, Any], key: str, typ: str) -> bool:
-        if key not in option: return False
-        if type(option[key]) != dict: return False
-        if '_type' not in option[key]: return False
-        return option[key]['_type'] == typ
-
-    def convertCode(name: str, option: Dict[str, Any], key: str):
-        rendered = f"{key}-db"
-        if optionIs(option, key, 'literalMD'):
-            option[rendered] = convertString(name, f"*{key.capitalize()}:*\n{option[key]['text']}")
-        elif optionIs(option, key, 'literalExpression'):
-            code = option[key]['text']
-            # for multi-line code blocks we only have to count ` runs at the beginning
-            # of a line, but this is much easier.
-            multiline = '\n' in code
-            longest, current = (0, 0)
-            for c in code:
-                current = current + 1 if c == '`' else 0
-                longest = max(current, longest)
-            # inline literals need a space to separate ticks from content, code blocks
-            # need newlines. inline literals need one extra tick, code blocks need three.
-            ticks, sep = ('`' * (longest + (3 if multiline else 1)), '\n' if multiline else ' ')
-            code = f"{ticks}{sep}{code}{sep}{ticks}"
-            option[rendered] = convertString(name, f"*{key.capitalize()}:*\n{code}")
-        elif optionIs(option, key, 'literalDocBook'):
-            option[rendered] = f"<para><emphasis>{key.capitalize()}:</emphasis> {option[key]['text']}</para>"
-        elif key in option:
-            raise Exception(f"{name} {key} has unrecognized type", option[key])
-
-    for (name, option) in options.items():
-        try:
-            if optionIs(option, 'description', 'mdDoc'):
-                option['description'] = convertString(name, option['description']['text'])
-            elif markdownByDefault:
-                option['description'] = convertString(name, option['description'])
-            else:
-                option['description'] = ("<nixos:option-description><para>" +
-                                         option['description'] +
-                                         "</para></nixos:option-description>")
-
-            convertCode(name, option, 'example')
-            convertCode(name, option, 'default')
-
-            if 'relatedPackages' in option:
-                option['relatedPackages'] = convertString(name, option['relatedPackages'])
-        except Exception as e:
-            raise Exception(f"Failed to render option {name}: {str(e)}")
-
-
-    return options
-
 warningsAreErrors = False
 warnOnDocbook = False
 errorOnDocbook = False
-markdownByDefault = False
 optOffset = 0
 for arg in sys.argv[1:]:
    if arg == "--warnings-are-errors":
@ -296,9 +56,6 @@ for arg in sys.argv[1:]:
    elif arg == "--error-on-docbook":
        optOffset += 1
        errorOnDocbook = True
-    if arg == "--markdown-by-default":
-        optOffset += 1
-        markdownByDefault = True

 options = pivot(json.load(open(sys.argv[1 + optOffset], 'r')))
 overrides = pivot(json.load(open(sys.argv[2 + optOffset], 'r')))
@ -404,4 +161,4 @@ if hasWarnings and warningsAreErrors:
        file=sys.stderr)
    sys.exit(1)

-json.dump(convertMD(unpivot(options)), fp=sys.stdout)
+json.dump(unpivot(options), fp=sys.stdout)