2023-01-24 20:47:52 +01:00
|
|
|
|
import argparse
|
2023-02-15 16:28:52 +01:00
|
|
|
|
import html
|
2023-01-24 20:47:52 +01:00
|
|
|
|
import json
|
2023-02-15 16:28:52 +01:00
|
|
|
|
import re
|
|
|
|
|
import xml.sax.saxutils as xml
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
|
|
|
|
from abc import abstractmethod
|
2023-02-17 21:29:22 +01:00
|
|
|
|
from collections.abc import Mapping, Sequence
|
2023-02-02 22:27:18 +01:00
|
|
|
|
from pathlib import Path
|
2023-06-14 14:39:00 +12:00
|
|
|
|
from typing import Any, cast, ClassVar, Generic, get_args, NamedTuple
|
2023-02-11 08:48:06 +01:00
|
|
|
|
|
2023-01-24 20:47:52 +01:00
|
|
|
|
from markdown_it.token import Token
|
|
|
|
|
|
2023-02-15 12:57:32 +01:00
|
|
|
|
from . import md, options
|
2023-02-15 16:28:52 +01:00
|
|
|
|
from .docbook import DocBookRenderer, Heading, make_xml_id
|
|
|
|
|
from .html import HTMLRenderer, UnresolvedXrefError
|
|
|
|
|
from .manual_structure import check_structure, FragmentType, is_include, TocEntry, TocEntryType, XrefTarget
|
|
|
|
|
from .md import Converter, Renderer
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
2023-02-15 12:57:32 +01:00
|
|
|
|
class BaseConverter(Converter[md.TR], Generic[md.TR]):
|
2023-02-18 21:23:48 +01:00
|
|
|
|
# per-converter configuration for ns:arg=value arguments to include blocks, following
|
|
|
|
|
# the include type. html converters need something like this to support chunking, or
|
|
|
|
|
# another external method like the chunktocs docbook uses (but block options seem like
|
|
|
|
|
# a much nicer of doing this).
|
|
|
|
|
INCLUDE_ARGS_NS: ClassVar[str]
|
|
|
|
|
INCLUDE_FRAGMENT_ALLOWED_ARGS: ClassVar[set[str]] = set()
|
|
|
|
|
INCLUDE_OPTIONS_ALLOWED_ARGS: ClassVar[set[str]] = set()
|
|
|
|
|
|
2023-02-15 12:57:32 +01:00
|
|
|
|
_base_paths: list[Path]
|
2023-02-18 20:34:31 +01:00
|
|
|
|
_current_type: list[TocEntryType]
|
2023-02-15 12:57:32 +01:00
|
|
|
|
|
2023-02-18 21:10:42 +01:00
|
|
|
|
def convert(self, infile: Path, outfile: Path) -> None:
|
|
|
|
|
self._base_paths = [ infile ]
|
2023-02-18 20:34:31 +01:00
|
|
|
|
self._current_type = ['book']
|
2023-02-15 12:57:32 +01:00
|
|
|
|
try:
|
2023-02-15 16:28:52 +01:00
|
|
|
|
tokens = self._parse(infile.read_text())
|
|
|
|
|
self._postprocess(infile, outfile, tokens)
|
|
|
|
|
converted = self._renderer.render(tokens)
|
2023-02-18 21:10:42 +01:00
|
|
|
|
outfile.write_text(converted)
|
2023-02-15 12:57:32 +01:00
|
|
|
|
except Exception as e:
|
2023-02-18 21:10:42 +01:00
|
|
|
|
raise RuntimeError(f"failed to render manual {infile}") from e
|
2023-02-15 12:57:32 +01:00
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
|
|
|
|
|
pass
|
|
|
|
|
|
2023-02-15 12:57:32 +01:00
|
|
|
|
def _parse(self, src: str) -> list[Token]:
|
|
|
|
|
tokens = super()._parse(src)
|
2023-02-18 20:48:12 +01:00
|
|
|
|
check_structure(self._current_type[-1], tokens)
|
2023-02-15 12:57:32 +01:00
|
|
|
|
for token in tokens:
|
2023-02-18 20:48:12 +01:00
|
|
|
|
if not is_include(token):
|
2023-02-15 12:57:32 +01:00
|
|
|
|
continue
|
2023-02-18 21:23:48 +01:00
|
|
|
|
directive = token.info[12:].split()
|
|
|
|
|
if not directive:
|
|
|
|
|
continue
|
|
|
|
|
args = { k: v for k, _sep, v in map(lambda s: s.partition('='), directive[1:]) }
|
|
|
|
|
typ = directive[0]
|
2023-02-15 12:57:32 +01:00
|
|
|
|
if typ == 'options':
|
|
|
|
|
token.type = 'included_options'
|
2023-02-18 21:23:48 +01:00
|
|
|
|
self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
|
|
|
|
|
self._parse_options(token, args)
|
2023-02-18 20:34:31 +01:00
|
|
|
|
else:
|
|
|
|
|
fragment_type = typ.removesuffix('s')
|
|
|
|
|
if fragment_type not in get_args(FragmentType):
|
|
|
|
|
raise RuntimeError(f"unsupported structural include type '{typ}'")
|
|
|
|
|
self._current_type.append(cast(FragmentType, fragment_type))
|
2023-02-15 12:57:32 +01:00
|
|
|
|
token.type = 'included_' + typ
|
2023-02-18 21:23:48 +01:00
|
|
|
|
self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
|
|
|
|
|
self._parse_included_blocks(token, args)
|
2023-02-18 20:34:31 +01:00
|
|
|
|
self._current_type.pop()
|
2023-02-15 12:57:32 +01:00
|
|
|
|
return tokens
|
|
|
|
|
|
2023-02-18 21:23:48 +01:00
|
|
|
|
def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None:
|
|
|
|
|
ns = self.INCLUDE_ARGS_NS + ":"
|
|
|
|
|
args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) }
|
|
|
|
|
if unknown := set(args.keys()) - allowed:
|
|
|
|
|
assert token.map
|
|
|
|
|
raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown)
|
|
|
|
|
token.meta['include-args'] = args
|
|
|
|
|
|
|
|
|
|
def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None:
|
2023-02-15 12:57:32 +01:00
|
|
|
|
assert token.map
|
|
|
|
|
included = token.meta['included'] = []
|
|
|
|
|
for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
|
|
|
|
|
line = line.strip()
|
|
|
|
|
path = self._base_paths[-1].parent / line
|
|
|
|
|
if path in self._base_paths:
|
|
|
|
|
raise RuntimeError(f"circular include found in line {lnum}")
|
|
|
|
|
try:
|
|
|
|
|
self._base_paths.append(path)
|
|
|
|
|
with open(path, 'r') as f:
|
|
|
|
|
tokens = self._parse(f.read())
|
|
|
|
|
included.append((tokens, path))
|
|
|
|
|
self._base_paths.pop()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise RuntimeError(f"processing included file {path} from line {lnum}") from e
|
|
|
|
|
|
2023-02-18 21:23:48 +01:00
|
|
|
|
def _parse_options(self, token: Token, block_args: dict[str, str]) -> None:
|
2023-02-15 12:57:32 +01:00
|
|
|
|
assert token.map
|
|
|
|
|
|
|
|
|
|
items = {}
|
|
|
|
|
for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
|
|
|
|
|
if len(args := line.split(":", 1)) != 2:
|
|
|
|
|
raise RuntimeError(f"options directive with no argument in line {lnum}")
|
|
|
|
|
(k, v) = (args[0].strip(), args[1].strip())
|
|
|
|
|
if k in items:
|
|
|
|
|
raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
|
|
|
|
|
items[k] = v
|
|
|
|
|
try:
|
|
|
|
|
id_prefix = items.pop('id-prefix')
|
|
|
|
|
varlist_id = items.pop('list-id')
|
|
|
|
|
source = items.pop('source')
|
|
|
|
|
except KeyError as e:
|
|
|
|
|
raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
|
|
|
|
|
if items.keys():
|
|
|
|
|
raise RuntimeError(
|
|
|
|
|
f"unsupported options directives in block at line {token.map[0] + 1}",
|
|
|
|
|
" ".join(items.keys()))
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with open(self._base_paths[-1].parent / source, 'r') as f:
|
|
|
|
|
token.meta['id-prefix'] = id_prefix
|
|
|
|
|
token.meta['list-id'] = varlist_id
|
|
|
|
|
token.meta['source'] = json.load(f)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
|
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
class RendererMixin(Renderer):
|
2023-02-11 08:48:06 +01:00
|
|
|
|
_toplevel_tag: str
|
2023-02-19 15:43:11 +01:00
|
|
|
|
_revision: str
|
2023-02-11 08:48:06 +01:00
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
def __init__(self, toplevel_tag: str, revision: str, *args: Any, **kwargs: Any):
|
|
|
|
|
super().__init__(*args, **kwargs)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
self._toplevel_tag = toplevel_tag
|
2023-02-19 15:43:11 +01:00
|
|
|
|
self._revision = revision
|
2023-02-11 08:48:06 +01:00
|
|
|
|
self.rules |= {
|
|
|
|
|
'included_sections': lambda *args: self._included_thing("section", *args),
|
|
|
|
|
'included_chapters': lambda *args: self._included_thing("chapter", *args),
|
|
|
|
|
'included_preface': lambda *args: self._included_thing("preface", *args),
|
|
|
|
|
'included_parts': lambda *args: self._included_thing("part", *args),
|
|
|
|
|
'included_appendix': lambda *args: self._included_thing("appendix", *args),
|
|
|
|
|
'included_options': self.included_options,
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def render(self, tokens: Sequence[Token]) -> str:
|
2023-02-11 08:48:06 +01:00
|
|
|
|
# books get special handling because they have *two* title tags. doing this with
|
|
|
|
|
# generic code is more complicated than it's worth. the checks above have verified
|
|
|
|
|
# that both titles actually exist.
|
|
|
|
|
if self._toplevel_tag == 'book':
|
2023-02-15 16:28:52 +01:00
|
|
|
|
return self._render_book(tokens)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
|
2023-02-17 21:29:22 +01:00
|
|
|
|
return super().render(tokens)
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
@abstractmethod
|
|
|
|
|
def _render_book(self, tokens: Sequence[Token]) -> str:
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
class ManualDocBookRenderer(RendererMixin, DocBookRenderer):
|
|
|
|
|
def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]):
|
|
|
|
|
super().__init__(toplevel_tag, revision, manpage_urls)
|
|
|
|
|
|
|
|
|
|
def _render_book(self, tokens: Sequence[Token]) -> str:
|
|
|
|
|
assert tokens[1].children
|
|
|
|
|
assert tokens[4].children
|
|
|
|
|
if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
|
|
|
|
|
maybe_id = "xml:id=" + xml.quoteattr(maybe_id)
|
|
|
|
|
return (f'<book xmlns="http://docbook.org/ns/docbook"'
|
|
|
|
|
f' xmlns:xlink="http://www.w3.org/1999/xlink"'
|
|
|
|
|
f' {maybe_id} version="5.0">'
|
|
|
|
|
f' <title>{self.renderInline(tokens[1].children)}</title>'
|
|
|
|
|
f' <subtitle>{self.renderInline(tokens[4].children)}</subtitle>'
|
|
|
|
|
f' {super(DocBookRenderer, self).render(tokens[6:])}'
|
|
|
|
|
f'</book>')
|
|
|
|
|
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
|
|
|
|
|
(tag, attrs) = super()._heading_tag(token, tokens, i)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
# render() has already verified that we don't have supernumerary headings and since the
|
|
|
|
|
# book tag is handled specially we can leave the check this simple
|
|
|
|
|
if token.tag != 'h1':
|
2023-01-24 20:47:52 +01:00
|
|
|
|
return (tag, attrs)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
return (self._toplevel_tag, attrs | {
|
2023-01-24 20:47:52 +01:00
|
|
|
|
'xmlns': "http://docbook.org/ns/docbook",
|
|
|
|
|
'xmlns:xlink': "http://www.w3.org/1999/xlink",
|
|
|
|
|
})
|
|
|
|
|
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
|
2023-02-11 08:48:06 +01:00
|
|
|
|
result = []
|
|
|
|
|
# close existing partintro. the generic render doesn't really need this because
|
|
|
|
|
# it doesn't have a concept of structure in the way the manual does.
|
|
|
|
|
if self._headings and self._headings[-1] == Heading('part', 1):
|
|
|
|
|
result.append("</partintro>")
|
|
|
|
|
self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
|
|
|
|
|
# must nest properly for structural includes. this requires saving at least
|
|
|
|
|
# the headings stack, but creating new renderers is cheap and much easier.
|
2023-02-19 15:43:11 +01:00
|
|
|
|
r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
for (included, path) in token.meta['included']:
|
|
|
|
|
try:
|
2023-02-17 21:29:22 +01:00
|
|
|
|
result.append(r.render(included))
|
2023-02-11 08:48:06 +01:00
|
|
|
|
except Exception as e:
|
|
|
|
|
raise RuntimeError(f"rendering {path}") from e
|
|
|
|
|
return "".join(result)
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
2023-06-11 20:09:54 +02:00
|
|
|
|
conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment',
|
2023-02-19 15:56:52 +01:00
|
|
|
|
token.meta['list-id'], token.meta['id-prefix'])
|
|
|
|
|
conv.add_options(token.meta['source'])
|
|
|
|
|
return conv.finalize(fragment=True)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
|
2023-01-24 20:47:52 +01:00
|
|
|
|
# TODO minimize docbook diffs with existing conversions. remove soon.
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
return super().paragraph_open(token, tokens, i) + "\n "
|
|
|
|
|
def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
return "\n" + super().paragraph_close(token, tokens, i)
|
|
|
|
|
def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
2023-02-15 16:28:52 +01:00
|
|
|
|
return f"<programlisting>\n{xml.escape(token.content)}</programlisting>"
|
2023-02-17 21:29:22 +01:00
|
|
|
|
def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
2023-02-15 16:28:52 +01:00
|
|
|
|
info = f" language={xml.quoteattr(token.info)}" if token.info != "" else ""
|
|
|
|
|
return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>"
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
2023-02-15 12:57:32 +01:00
|
|
|
|
class DocBookConverter(BaseConverter[ManualDocBookRenderer]):
|
2023-02-18 21:23:48 +01:00
|
|
|
|
INCLUDE_ARGS_NS = "docbook"
|
|
|
|
|
|
2023-02-11 08:48:06 +01:00
|
|
|
|
def __init__(self, manpage_urls: Mapping[str, str], revision: str):
|
2023-02-17 17:49:08 +01:00
|
|
|
|
super().__init__()
|
2023-02-19 15:43:11 +01:00
|
|
|
|
self._renderer = ManualDocBookRenderer('book', revision, manpage_urls)
|
2023-02-07 14:41:29 +01:00
|
|
|
|
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
class HTMLParameters(NamedTuple):
|
|
|
|
|
generator: str
|
|
|
|
|
stylesheets: Sequence[str]
|
|
|
|
|
scripts: Sequence[str]
|
|
|
|
|
toc_depth: int
|
|
|
|
|
chunk_toc_depth: int
|
|
|
|
|
|
|
|
|
|
class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
|
|
|
|
|
_base_path: Path
|
|
|
|
|
_html_params: HTMLParameters
|
|
|
|
|
|
|
|
|
|
def __init__(self, toplevel_tag: str, revision: str, html_params: HTMLParameters,
|
|
|
|
|
manpage_urls: Mapping[str, str], xref_targets: dict[str, XrefTarget],
|
|
|
|
|
base_path: Path):
|
|
|
|
|
super().__init__(toplevel_tag, revision, manpage_urls, xref_targets)
|
|
|
|
|
self._base_path, self._html_params = base_path, html_params
|
|
|
|
|
|
|
|
|
|
def _push(self, tag: str, hlevel_offset: int) -> Any:
|
|
|
|
|
result = (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset)
|
|
|
|
|
self._hlevel_offset += hlevel_offset
|
|
|
|
|
self._toplevel_tag, self._headings, self._attrspans = tag, [], []
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
def _pop(self, state: Any) -> None:
|
|
|
|
|
(self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset) = state
|
|
|
|
|
|
|
|
|
|
def _render_book(self, tokens: Sequence[Token]) -> str:
|
|
|
|
|
assert tokens[4].children
|
|
|
|
|
title_id = cast(str, tokens[0].attrs.get('id', ""))
|
|
|
|
|
title = self._xref_targets[title_id].title
|
|
|
|
|
# subtitles don't have IDs, so we can't use xrefs to get them
|
|
|
|
|
subtitle = self.renderInline(tokens[4].children)
|
|
|
|
|
|
|
|
|
|
toc = TocEntry.of(tokens[0])
|
|
|
|
|
return "\n".join([
|
|
|
|
|
self._file_header(toc),
|
|
|
|
|
' <div class="book">',
|
|
|
|
|
' <div class="titlepage">',
|
|
|
|
|
' <div>',
|
|
|
|
|
f' <div><h1 class="title"><a id="{html.escape(title_id, True)}"></a>{title}</h1></div>',
|
|
|
|
|
f' <div><h2 class="subtitle">{subtitle}</h2></div>',
|
|
|
|
|
' </div>',
|
|
|
|
|
" <hr />",
|
|
|
|
|
' </div>',
|
|
|
|
|
self._build_toc(tokens, 0),
|
|
|
|
|
super(HTMLRenderer, self).render(tokens[6:]),
|
|
|
|
|
' </div>',
|
|
|
|
|
self._file_footer(toc),
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def _file_header(self, toc: TocEntry) -> str:
|
|
|
|
|
prev_link, up_link, next_link = "", "", ""
|
|
|
|
|
prev_a, next_a, parent_title = "", "", " "
|
|
|
|
|
home = toc.root
|
|
|
|
|
if toc.prev:
|
|
|
|
|
prev_link = f'<link rel="prev" href="{toc.prev.target.href()}" title="{toc.prev.target.title}" />'
|
|
|
|
|
prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
|
|
|
|
|
if toc.parent:
|
|
|
|
|
up_link = (
|
|
|
|
|
f'<link rel="up" href="{toc.parent.target.href()}" '
|
|
|
|
|
f'title="{toc.parent.target.title}" />'
|
|
|
|
|
)
|
|
|
|
|
if (part := toc.parent) and part.kind != 'book':
|
|
|
|
|
assert part.target.title
|
|
|
|
|
parent_title = part.target.title
|
|
|
|
|
if toc.next:
|
|
|
|
|
next_link = f'<link rel="next" href="{toc.next.target.href()}" title="{toc.next.target.title}" />'
|
|
|
|
|
next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
|
|
|
|
|
return "\n".join([
|
|
|
|
|
'<?xml version="1.0" encoding="utf-8" standalone="no"?>',
|
|
|
|
|
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"',
|
|
|
|
|
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
|
|
|
|
|
'<html xmlns="http://www.w3.org/1999/xhtml">',
|
2023-06-26 21:13:11 +02:00
|
|
|
|
' <head>',
|
|
|
|
|
' <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
|
2023-02-15 16:28:52 +01:00
|
|
|
|
f' <title>{toc.target.title}</title>',
|
|
|
|
|
"".join((f'<link rel="stylesheet" type="text/css" href="{html.escape(style, True)}" />'
|
|
|
|
|
for style in self._html_params.stylesheets)),
|
|
|
|
|
"".join((f'<script src="{html.escape(script, True)}" type="text/javascript"></script>'
|
|
|
|
|
for script in self._html_params.scripts)),
|
|
|
|
|
f' <meta name="generator" content="{html.escape(self._html_params.generator, True)}" />',
|
|
|
|
|
f' <link rel="home" href="{home.target.href()}" title="{home.target.title}" />',
|
|
|
|
|
f' {up_link}{prev_link}{next_link}',
|
|
|
|
|
' </head>',
|
|
|
|
|
' <body>',
|
|
|
|
|
' <div class="navheader">',
|
|
|
|
|
' <table width="100%" summary="Navigation header">',
|
|
|
|
|
' <tr>',
|
|
|
|
|
f' <th colspan="3" align="center">{toc.target.title}</th>',
|
|
|
|
|
' </tr>',
|
|
|
|
|
' <tr>',
|
|
|
|
|
f' <td width="20%" align="left">{prev_a} </td>',
|
|
|
|
|
f' <th width="60%" align="center">{parent_title}</th>',
|
|
|
|
|
f' <td width="20%" align="right"> {next_a}</td>',
|
|
|
|
|
' </tr>',
|
|
|
|
|
' </table>',
|
|
|
|
|
' <hr />',
|
|
|
|
|
' </div>',
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def _file_footer(self, toc: TocEntry) -> str:
|
|
|
|
|
# prev, next = self._get_prev_and_next()
|
|
|
|
|
prev_a, up_a, home_a, next_a = "", " ", " ", ""
|
|
|
|
|
prev_text, up_text, next_text = "", "", ""
|
|
|
|
|
home = toc.root
|
|
|
|
|
if toc.prev:
|
|
|
|
|
prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
|
|
|
|
|
assert toc.prev.target.title
|
|
|
|
|
prev_text = toc.prev.target.title
|
|
|
|
|
if toc.parent:
|
|
|
|
|
home_a = f'<a accesskey="h" href="{home.target.href()}">Home</a>'
|
|
|
|
|
if toc.parent != home:
|
|
|
|
|
up_a = f'<a accesskey="u" href="{toc.parent.target.href()}">Up</a>'
|
|
|
|
|
if toc.next:
|
|
|
|
|
next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
|
|
|
|
|
assert toc.next.target.title
|
|
|
|
|
next_text = toc.next.target.title
|
|
|
|
|
return "\n".join([
|
|
|
|
|
' <div class="navfooter">',
|
|
|
|
|
' <hr />',
|
|
|
|
|
' <table width="100%" summary="Navigation footer">',
|
|
|
|
|
' <tr>',
|
|
|
|
|
f' <td width="40%" align="left">{prev_a} </td>',
|
|
|
|
|
f' <td width="20%" align="center">{up_a}</td>',
|
|
|
|
|
f' <td width="40%" align="right"> {next_a}</td>',
|
|
|
|
|
' </tr>',
|
|
|
|
|
' <tr>',
|
|
|
|
|
f' <td width="40%" align="left" valign="top">{prev_text} </td>',
|
|
|
|
|
f' <td width="20%" align="center">{home_a}</td>',
|
|
|
|
|
f' <td width="40%" align="right" valign="top"> {next_text}</td>',
|
|
|
|
|
' </tr>',
|
|
|
|
|
' </table>',
|
|
|
|
|
' </div>',
|
|
|
|
|
' </body>',
|
|
|
|
|
'</html>',
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
if token.tag == 'h1':
|
|
|
|
|
return self._toplevel_tag
|
|
|
|
|
return super()._heading_tag(token, tokens, i)
|
|
|
|
|
def _build_toc(self, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
toc = TocEntry.of(tokens[i])
|
|
|
|
|
if toc.kind == 'section':
|
|
|
|
|
return ""
|
|
|
|
|
def walk_and_emit(toc: TocEntry, depth: int) -> list[str]:
|
|
|
|
|
if depth <= 0:
|
|
|
|
|
return []
|
|
|
|
|
result = []
|
|
|
|
|
for child in toc.children:
|
|
|
|
|
result.append(
|
|
|
|
|
f'<dt>'
|
|
|
|
|
f' <span class="{html.escape(child.kind, True)}">'
|
|
|
|
|
f' <a href="{child.target.href()}">{child.target.toc_html}</a>'
|
|
|
|
|
f' </span>'
|
|
|
|
|
f'</dt>'
|
|
|
|
|
)
|
|
|
|
|
# we want to look straight through parts because docbook-xsl does too, but it
|
|
|
|
|
# also makes for more uesful top-level tocs.
|
|
|
|
|
next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1))
|
|
|
|
|
if next_level:
|
|
|
|
|
result.append(f'<dd><dl>{"".join(next_level)}</dl></dd>')
|
|
|
|
|
return result
|
|
|
|
|
toc_depth = (
|
|
|
|
|
self._html_params.chunk_toc_depth
|
|
|
|
|
if toc.starts_new_chunk and toc.kind != 'book'
|
|
|
|
|
else self._html_params.toc_depth
|
|
|
|
|
)
|
|
|
|
|
if not (items := walk_and_emit(toc, toc_depth)):
|
|
|
|
|
return ""
|
2023-03-08 09:15:48 +01:00
|
|
|
|
examples = ""
|
|
|
|
|
if toc.examples:
|
|
|
|
|
examples_entries = [
|
|
|
|
|
f'<dt>{i + 1}. <a href="{ex.target.href()}">{ex.target.toc_html}</a></dt>'
|
|
|
|
|
for i, ex in enumerate(toc.examples)
|
|
|
|
|
]
|
|
|
|
|
examples = (
|
|
|
|
|
'<div class="list-of-examples">'
|
|
|
|
|
'<p><strong>List of Examples</strong><p>'
|
|
|
|
|
f'<dl>{"".join(examples_entries)}</dl>'
|
|
|
|
|
'</div>'
|
|
|
|
|
)
|
2023-02-15 16:28:52 +01:00
|
|
|
|
return (
|
|
|
|
|
f'<div class="toc">'
|
|
|
|
|
f' <p><strong>Table of Contents</strong></p>'
|
|
|
|
|
f' <dl class="toc">'
|
|
|
|
|
f' {"".join(items)}'
|
|
|
|
|
f' </dl>'
|
|
|
|
|
f'</div>'
|
2023-03-08 09:15:48 +01:00
|
|
|
|
f'{examples}'
|
2023-02-15 16:28:52 +01:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _make_hN(self, level: int) -> tuple[str, str]:
|
|
|
|
|
# for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate
|
|
|
|
|
# this for consistency.
|
|
|
|
|
if self._toplevel_tag == 'chapter':
|
|
|
|
|
level -= 1
|
|
|
|
|
# TODO docbook compat. these are never useful for us, but not having them breaks manual
|
|
|
|
|
# compare workflows while docbook is still allowed.
|
|
|
|
|
style = ""
|
|
|
|
|
if level + self._hlevel_offset < 3 \
|
|
|
|
|
and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)):
|
|
|
|
|
style = "clear: both"
|
|
|
|
|
tag, hstyle = super()._make_hN(max(1, level))
|
|
|
|
|
return tag, style
|
|
|
|
|
|
|
|
|
|
def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
|
|
|
|
|
outer, inner = [], []
|
|
|
|
|
# since books have no non-include content the toplevel book wrapper will not count
|
|
|
|
|
# towards nesting depth. other types will have at least a title+id heading which
|
|
|
|
|
# *does* count towards the nesting depth. chapters give a -1 to included sections
|
|
|
|
|
# mirroring the special handing in _make_hN. sigh.
|
|
|
|
|
hoffset = (
|
|
|
|
|
0 if not self._headings
|
|
|
|
|
else self._headings[-1].level - 1 if self._toplevel_tag == 'chapter'
|
|
|
|
|
else self._headings[-1].level
|
|
|
|
|
)
|
|
|
|
|
outer.append(self._maybe_close_partintro())
|
|
|
|
|
into = token.meta['include-args'].get('into-file')
|
|
|
|
|
fragments = token.meta['included']
|
|
|
|
|
state = self._push(tag, hoffset)
|
|
|
|
|
if into:
|
|
|
|
|
toc = TocEntry.of(fragments[0][0][0])
|
|
|
|
|
inner.append(self._file_header(toc))
|
|
|
|
|
# we do not set _hlevel_offset=0 because docbook doesn't either.
|
|
|
|
|
else:
|
|
|
|
|
inner = outer
|
|
|
|
|
for included, path in fragments:
|
|
|
|
|
try:
|
|
|
|
|
inner.append(self.render(included))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise RuntimeError(f"rendering {path}") from e
|
|
|
|
|
if into:
|
|
|
|
|
inner.append(self._file_footer(toc))
|
|
|
|
|
(self._base_path / into).write_text("".join(inner))
|
|
|
|
|
self._pop(state)
|
|
|
|
|
return "".join(outer)
|
|
|
|
|
|
|
|
|
|
def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
|
2023-06-11 20:09:54 +02:00
|
|
|
|
conv = options.HTMLConverter(self._manpage_urls, self._revision,
|
2023-02-15 16:28:52 +01:00
|
|
|
|
token.meta['list-id'], token.meta['id-prefix'],
|
|
|
|
|
self._xref_targets)
|
|
|
|
|
conv.add_options(token.meta['source'])
|
|
|
|
|
return conv.finalize()
|
|
|
|
|
|
|
|
|
|
def _to_base26(n: int) -> str:
|
|
|
|
|
return (_to_base26(n // 26) if n > 26 else "") + chr(ord("A") + n % 26)
|
|
|
|
|
|
|
|
|
|
class HTMLConverter(BaseConverter[ManualHTMLRenderer]):
|
|
|
|
|
INCLUDE_ARGS_NS = "html"
|
|
|
|
|
INCLUDE_FRAGMENT_ALLOWED_ARGS = { 'into-file' }
|
|
|
|
|
|
|
|
|
|
_revision: str
|
|
|
|
|
_html_params: HTMLParameters
|
|
|
|
|
_manpage_urls: Mapping[str, str]
|
|
|
|
|
_xref_targets: dict[str, XrefTarget]
|
|
|
|
|
_redirection_targets: set[str]
|
|
|
|
|
_appendix_count: int = 0
|
|
|
|
|
|
|
|
|
|
def _next_appendix_id(self) -> str:
|
|
|
|
|
self._appendix_count += 1
|
|
|
|
|
return _to_base26(self._appendix_count - 1)
|
|
|
|
|
|
|
|
|
|
def __init__(self, revision: str, html_params: HTMLParameters, manpage_urls: Mapping[str, str]):
|
|
|
|
|
super().__init__()
|
|
|
|
|
self._revision, self._html_params, self._manpage_urls = revision, html_params, manpage_urls
|
|
|
|
|
self._xref_targets = {}
|
|
|
|
|
self._redirection_targets = set()
|
|
|
|
|
# renderer not set on purpose since it has a dependency on the output path!
|
|
|
|
|
|
|
|
|
|
def convert(self, infile: Path, outfile: Path) -> None:
|
|
|
|
|
self._renderer = ManualHTMLRenderer('book', self._revision, self._html_params,
|
|
|
|
|
self._manpage_urls, self._xref_targets, outfile.parent)
|
|
|
|
|
super().convert(infile, outfile)
|
|
|
|
|
|
|
|
|
|
def _parse(self, src: str) -> list[Token]:
|
|
|
|
|
tokens = super()._parse(src)
|
|
|
|
|
for token in tokens:
|
|
|
|
|
if not token.type.startswith('included_') \
|
|
|
|
|
or not (into := token.meta['include-args'].get('into-file')):
|
|
|
|
|
continue
|
|
|
|
|
assert token.map
|
|
|
|
|
if len(token.meta['included']) == 0:
|
|
|
|
|
raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!")
|
|
|
|
|
# we use blender-style //path to denote paths relative to the origin file
|
|
|
|
|
# (usually index.html). this makes everything a lot easier and clearer.
|
|
|
|
|
if not into.startswith("//") or '/' in into[2:]:
|
2023-06-14 14:39:00 +12:00
|
|
|
|
raise RuntimeError("html:into-file must be a relative-to-origin //filename", into)
|
2023-02-15 16:28:52 +01:00
|
|
|
|
into = token.meta['include-args']['into-file'] = into[2:]
|
|
|
|
|
if into in self._redirection_targets:
|
|
|
|
|
raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use")
|
|
|
|
|
self._redirection_targets.add(into)
|
|
|
|
|
return tokens
|
|
|
|
|
|
2023-03-08 09:15:48 +01:00
|
|
|
|
def _number_examples(self, tokens: Sequence[Token], start: int = 1) -> int:
|
|
|
|
|
for (i, token) in enumerate(tokens):
|
|
|
|
|
if token.type == "example_title_open":
|
|
|
|
|
title = tokens[i + 1]
|
|
|
|
|
assert title.type == 'inline' and title.children
|
|
|
|
|
# the prefix is split into two tokens because the xref title_html will want
|
|
|
|
|
# only the first of the two, but both must be rendered into the example itself.
|
|
|
|
|
title.children = (
|
|
|
|
|
[
|
|
|
|
|
Token('text', '', 0, content=f'Example {start}'),
|
|
|
|
|
Token('text', '', 0, content='. ')
|
|
|
|
|
] + title.children
|
|
|
|
|
)
|
|
|
|
|
start += 1
|
|
|
|
|
elif token.type.startswith('included_') and token.type != 'included_options':
|
|
|
|
|
for sub, _path in token.meta['included']:
|
|
|
|
|
start = self._number_examples(sub, start)
|
|
|
|
|
return start
|
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
# xref | (id, type, heading inlines, file, starts new file)
|
|
|
|
|
def _collect_ids(self, tokens: Sequence[Token], target_file: str, typ: str, file_changed: bool
|
|
|
|
|
) -> list[XrefTarget | tuple[str, str, Token, str, bool]]:
|
|
|
|
|
result: list[XrefTarget | tuple[str, str, Token, str, bool]] = []
|
|
|
|
|
# collect all IDs and their xref substitutions. headings are deferred until everything
|
|
|
|
|
# has been parsed so we can resolve links in headings. if that's even used anywhere.
|
|
|
|
|
for (i, bt) in enumerate(tokens):
|
|
|
|
|
if bt.type == 'heading_open' and (id := cast(str, bt.attrs.get('id', ''))):
|
|
|
|
|
result.append((id, typ if bt.tag == 'h1' else 'section', tokens[i + 1], target_file,
|
|
|
|
|
i == 0 and file_changed))
|
|
|
|
|
elif bt.type == 'included_options':
|
|
|
|
|
id_prefix = bt.meta['id-prefix']
|
|
|
|
|
for opt in bt.meta['source'].keys():
|
|
|
|
|
id = make_xml_id(f"{id_prefix}{opt}")
|
|
|
|
|
name = html.escape(opt)
|
|
|
|
|
result.append(XrefTarget(id, f'<code class="option">{name}</code>', name, None, target_file))
|
|
|
|
|
elif bt.type.startswith('included_'):
|
|
|
|
|
sub_file = bt.meta['include-args'].get('into-file', target_file)
|
|
|
|
|
subtyp = bt.type.removeprefix('included_').removesuffix('s')
|
|
|
|
|
for si, (sub, _path) in enumerate(bt.meta['included']):
|
|
|
|
|
result += self._collect_ids(sub, sub_file, subtyp, si == 0 and sub_file != target_file)
|
2023-03-08 09:15:48 +01:00
|
|
|
|
elif bt.type == 'example_open' and (id := cast(str, bt.attrs.get('id', ''))):
|
|
|
|
|
result.append((id, 'example', tokens[i + 2], target_file, False))
|
2023-02-15 16:28:52 +01:00
|
|
|
|
elif bt.type == 'inline':
|
|
|
|
|
assert bt.children
|
|
|
|
|
result += self._collect_ids(bt.children, target_file, typ, False)
|
|
|
|
|
elif id := cast(str, bt.attrs.get('id', '')):
|
|
|
|
|
# anchors and examples have no titles we could use, but we'll have to put
|
|
|
|
|
# *something* here to communicate that there's no title.
|
|
|
|
|
result.append(XrefTarget(id, "???", None, None, target_file))
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
def _render_xref(self, id: str, typ: str, inlines: Token, path: str, drop_fragment: bool) -> XrefTarget:
|
|
|
|
|
assert inlines.children
|
|
|
|
|
title_html = self._renderer.renderInline(inlines.children)
|
|
|
|
|
if typ == 'appendix':
|
|
|
|
|
# NOTE the docbook compat is strong here
|
|
|
|
|
n = self._next_appendix_id()
|
|
|
|
|
prefix = f"Appendix\u00A0{n}.\u00A0"
|
|
|
|
|
# HACK for docbook compat: prefix the title inlines with appendix id if
|
|
|
|
|
# necessary. the alternative is to mess with titlepage rendering in headings,
|
|
|
|
|
# which seems just a lot worse than this
|
|
|
|
|
prefix_tokens = [Token(type='text', tag='', nesting=0, content=prefix)]
|
|
|
|
|
inlines.children = prefix_tokens + list(inlines.children)
|
|
|
|
|
title = prefix + title_html
|
|
|
|
|
toc_html = f"{n}. {title_html}"
|
|
|
|
|
title_html = f"Appendix {n}"
|
2023-03-08 09:15:48 +01:00
|
|
|
|
elif typ == 'example':
|
|
|
|
|
# skip the prepended `Example N. ` from _number_examples
|
|
|
|
|
toc_html, title = self._renderer.renderInline(inlines.children[2:]), title_html
|
|
|
|
|
# xref title wants only the prepended text, sans the trailing colon and space
|
|
|
|
|
title_html = self._renderer.renderInline(inlines.children[0:1])
|
2023-02-15 16:28:52 +01:00
|
|
|
|
else:
|
|
|
|
|
toc_html, title = title_html, title_html
|
|
|
|
|
title_html = (
|
|
|
|
|
f"<em>{title_html}</em>"
|
|
|
|
|
if typ == 'chapter'
|
|
|
|
|
else title_html if typ in [ 'book', 'part' ]
|
|
|
|
|
else f'the section called “{title_html}”'
|
|
|
|
|
)
|
|
|
|
|
return XrefTarget(id, title_html, toc_html, re.sub('<.*?>', '', title), path, drop_fragment)
|
|
|
|
|
|
|
|
|
|
def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
|
2023-03-08 09:15:48 +01:00
|
|
|
|
self._number_examples(tokens)
|
2023-02-15 16:28:52 +01:00
|
|
|
|
xref_queue = self._collect_ids(tokens, outfile.name, 'book', True)
|
|
|
|
|
|
|
|
|
|
failed = False
|
|
|
|
|
deferred = []
|
|
|
|
|
while xref_queue:
|
|
|
|
|
for item in xref_queue:
|
|
|
|
|
try:
|
|
|
|
|
target = item if isinstance(item, XrefTarget) else self._render_xref(*item)
|
2023-06-14 14:39:00 +12:00
|
|
|
|
except UnresolvedXrefError:
|
2023-02-15 16:28:52 +01:00
|
|
|
|
if failed:
|
|
|
|
|
raise
|
|
|
|
|
deferred.append(item)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if target.id in self._xref_targets:
|
|
|
|
|
raise RuntimeError(f"found duplicate id #{target.id}")
|
|
|
|
|
self._xref_targets[target.id] = target
|
|
|
|
|
if len(deferred) == len(xref_queue):
|
|
|
|
|
failed = True # do another round and report the first error
|
|
|
|
|
xref_queue = deferred
|
|
|
|
|
|
|
|
|
|
TocEntry.collect_and_link(self._xref_targets, tokens)
|
|
|
|
|
|
|
|
|
|
|
2023-02-11 08:48:06 +01:00
|
|
|
|
|
|
|
|
|
def _build_cli_db(p: argparse.ArgumentParser) -> None:
|
|
|
|
|
p.add_argument('--manpage-urls', required=True)
|
|
|
|
|
p.add_argument('--revision', required=True)
|
|
|
|
|
p.add_argument('infile', type=Path)
|
|
|
|
|
p.add_argument('outfile', type=Path)
|
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
def _build_cli_html(p: argparse.ArgumentParser) -> None:
|
|
|
|
|
p.add_argument('--manpage-urls', required=True)
|
|
|
|
|
p.add_argument('--revision', required=True)
|
|
|
|
|
p.add_argument('--generator', default='nixos-render-docs')
|
|
|
|
|
p.add_argument('--stylesheet', default=[], action='append')
|
|
|
|
|
p.add_argument('--script', default=[], action='append')
|
|
|
|
|
p.add_argument('--toc-depth', default=1, type=int)
|
|
|
|
|
p.add_argument('--chunk-toc-depth', default=1, type=int)
|
|
|
|
|
p.add_argument('infile', type=Path)
|
|
|
|
|
p.add_argument('outfile', type=Path)
|
|
|
|
|
|
2023-02-11 08:48:06 +01:00
|
|
|
|
def _run_cli_db(args: argparse.Namespace) -> None:
|
2023-02-07 14:41:29 +01:00
|
|
|
|
with open(args.manpage_urls, 'r') as manpage_urls:
|
2023-02-11 08:48:06 +01:00
|
|
|
|
md = DocBookConverter(json.load(manpage_urls), args.revision)
|
2023-02-18 21:10:42 +01:00
|
|
|
|
md.convert(args.infile, args.outfile)
|
2023-02-07 14:41:29 +01:00
|
|
|
|
|
2023-02-15 16:28:52 +01:00
|
|
|
|
def _run_cli_html(args: argparse.Namespace) -> None:
|
|
|
|
|
with open(args.manpage_urls, 'r') as manpage_urls:
|
|
|
|
|
md = HTMLConverter(
|
|
|
|
|
args.revision,
|
|
|
|
|
HTMLParameters(args.generator, args.stylesheet, args.script, args.toc_depth,
|
|
|
|
|
args.chunk_toc_depth),
|
|
|
|
|
json.load(manpage_urls))
|
|
|
|
|
md.convert(args.infile, args.outfile)
|
|
|
|
|
|
2023-01-24 20:47:52 +01:00
|
|
|
|
def build_cli(p: argparse.ArgumentParser) -> None:
|
|
|
|
|
formats = p.add_subparsers(dest='format', required=True)
|
2023-02-11 08:48:06 +01:00
|
|
|
|
_build_cli_db(formats.add_parser('docbook'))
|
2023-02-15 16:28:52 +01:00
|
|
|
|
_build_cli_html(formats.add_parser('html'))
|
2023-01-24 20:47:52 +01:00
|
|
|
|
|
|
|
|
|
def run_cli(args: argparse.Namespace) -> None:
|
2023-02-11 08:48:06 +01:00
|
|
|
|
if args.format == 'docbook':
|
|
|
|
|
_run_cli_db(args)
|
2023-02-15 16:28:52 +01:00
|
|
|
|
elif args.format == 'html':
|
|
|
|
|
_run_cli_html(args)
|
2023-01-24 20:47:52 +01:00
|
|
|
|
else:
|
|
|
|
|
raise RuntimeError('format not hooked up', args)
|