mistune
mistune copied to clipboard
Add meta data plugin
From the current set of plugins I'm missing a plugin for document meta data similar to this: https://python-markdown.github.io/extensions/meta_data/
Some renderers might need this type of document meta information (e.g. a PDFRenderer).
This was previously discussed in #211
@karlcow Thanks for the hint. It did checkout https://github.com/lepture/mistune-contrib/blob/master/mistune_contrib/meta.py but that leaves a lot to be desired and does not seem to compatible with the current v2 design.
I did implement a meta data plugin. I also designed a Plugin abstract class (as suggested here: https://github.com/lepture/mistune/issues/266). My implementation of the meta data plugin follows the specification of MultiMarkdown.
@karlcow @lepture Please let me know what you think. I can open up a pull request for my implementation but I would like to first discuss the Plugin class design here: https://github.com/lepture/mistune/issues/266.
import re
from abc import ABC, abstractmethod
from re import Match, Pattern
from typing import Any, Callable, Dict
from mistune.block_parser import BlockParser
from mistune.inline_parser import InlineParser
from mistune.markdown import Markdown
from mistune.renderers import BaseRenderer
from mistune.scanner import ScannerParser
class Plugin(ABC):
def __call__(self, markdown: Markdown):
self.register(markdown)
@property
@abstractmethod
def name(self) -> str:
pass
@property
@abstractmethod
def pattern(self) -> Pattern:
pass
@property
def renderers(self) -> Dict[str, Callable]:
return {
'html': self.render_html,
'ast': self.render_ast,
}
def _get_render_method(self, renderer_name: str) -> Callable:
if renderer_name not in self.renderers:
raise NotImplementedError(
f"Plugin {self.name} does not implement a render method for {renderer_name}")
return self.renderers[renderer_name]
def _register_parser(self, parser: ScannerParser):
parser.register_rule(self.name, self.pattern, self.parse)
parser.rules.append(self.name)
def _register_renderer(self, renderer: BaseRenderer):
renderer.register(self.name, self._get_render_method(renderer.NAME))
@abstractmethod
def register(self, markdown: Markdown) -> None:
pass
@abstractmethod
def parse(self, parser: ScannerParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
pass
@abstractmethod
def render_html(self, *args) -> str:
pass
@abstractmethod
def render_ast(self, *args) -> Dict[str, Any]:
pass
class InlinePlugin(Plugin, ABC):
@abstractmethod
def parse(self, parser: InlineParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
pass
def register(self, markdown: Markdown) -> None:
self._register_parser(markdown.inline)
self._register_renderer(markdown.renderer)
class BlockPlugin(Plugin, ABC):
@abstractmethod
def parse(self, parser: BlockParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
pass
def register(self, markdown: Markdown) -> None:
self._register_parser(markdown.block)
self._register_renderer(markdown.renderer)
class MetaPlugin(BlockPlugin):
@property
def name(self) -> str:
return 'meta'
@property
def pattern(self) -> Pattern:
r"""
Pattern to match meta data block as defined by [MultiMarkdown](http://fletcher.github.io/MultiMarkdown-5/metadata.html).
Regex explanation:
- `\A`: Meta data block has to be at the start of markdown file
- `(-{3}\n)?`: The block starts directly or with a `---` line
- `.+:`: The meta data block must start with one key
- `(.+\n)+?`: The meta data block can contain any new lines
- `(-{3}|\.{3}|\n)`: The block ends with a `---`, `...`, or empty line
- `?:`: Do not extract these groups
Returns:
Pattern: Regex pattern matching meta data block
"""
return re.compile(r'\A(?:-{3}\n)?(.+:(?:.+\n)+?)(?:-{3}|\.{3}|\n)')
def parse(self, parser: BlockParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
meta_block: str = match.group(1)
single_entry_pattern = re.compile(r'(?P<key>\w+):\s*(?P<value>.+)\n')
meta_data = {key.lower(): value for key, value in single_entry_pattern.findall(meta_block)}
return {'type': self.name, 'raw': meta_data, 'params': None}
def render_html_meta_element(self, name: str, content: Any) -> str:
return f' <meta name="{name}" content="{content}">'
def render_html(self, *args) -> str:
meta_data: Dict[str, Any] = args[0]
meta_elements = [
self.render_html_meta_element(name, content) for name, content in meta_data.items()
]
return '<head>\n' + '\n'.join(meta_elements) + '\n</head>\n'
def render_ast(self, *args) -> Dict[str, Any]:
meta_data: Dict[str, Any] = args[0]
return {'type': self.name, 'meta_data': meta_data}