mistune icon indicating copy to clipboard operation
mistune copied to clipboard

Add meta data plugin

Open Spenhouet opened this issue 4 years ago • 2 comments

From the current set of plugins I'm missing a plugin for document meta data similar to this: https://python-markdown.github.io/extensions/meta_data/

Some renderers might need this type of document meta information (e.g. a PDFRenderer).

Spenhouet avatar Jan 03 '21 00:01 Spenhouet

This was previously discussed in #211

karlcow avatar Jan 03 '21 01:01 karlcow

@karlcow Thanks for the hint. It did checkout https://github.com/lepture/mistune-contrib/blob/master/mistune_contrib/meta.py but that leaves a lot to be desired and does not seem to compatible with the current v2 design.

I did implement a meta data plugin. I also designed a Plugin abstract class (as suggested here: https://github.com/lepture/mistune/issues/266). My implementation of the meta data plugin follows the specification of MultiMarkdown.

@karlcow @lepture Please let me know what you think. I can open up a pull request for my implementation but I would like to first discuss the Plugin class design here: https://github.com/lepture/mistune/issues/266.

import re
from abc import ABC, abstractmethod
from re import Match, Pattern
from typing import Any, Callable, Dict

from mistune.block_parser import BlockParser
from mistune.inline_parser import InlineParser
from mistune.markdown import Markdown
from mistune.renderers import BaseRenderer
from mistune.scanner import ScannerParser


class Plugin(ABC):

    def __call__(self, markdown: Markdown):
        self.register(markdown)

    @property
    @abstractmethod
    def name(self) -> str:
        pass

    @property
    @abstractmethod
    def pattern(self) -> Pattern:
        pass

    @property
    def renderers(self) -> Dict[str, Callable]:
        return {
            'html': self.render_html,
            'ast': self.render_ast,
        }

    def _get_render_method(self, renderer_name: str) -> Callable:
        if renderer_name not in self.renderers:
            raise NotImplementedError(
                f"Plugin {self.name} does not implement a render method for {renderer_name}")

        return self.renderers[renderer_name]

    def _register_parser(self, parser: ScannerParser):
        parser.register_rule(self.name, self.pattern, self.parse)
        parser.rules.append(self.name)

    def _register_renderer(self, renderer: BaseRenderer):
        renderer.register(self.name, self._get_render_method(renderer.NAME))

    @abstractmethod
    def register(self, markdown: Markdown) -> None:
        pass

    @abstractmethod
    def parse(self, parser: ScannerParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
        pass

    @abstractmethod
    def render_html(self, *args) -> str:
        pass

    @abstractmethod
    def render_ast(self, *args) -> Dict[str, Any]:
        pass


class InlinePlugin(Plugin, ABC):

    @abstractmethod
    def parse(self, parser: InlineParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
        pass

    def register(self, markdown: Markdown) -> None:
        self._register_parser(markdown.inline)
        self._register_renderer(markdown.renderer)


class BlockPlugin(Plugin, ABC):

    @abstractmethod
    def parse(self, parser: BlockParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
        pass

    def register(self, markdown: Markdown) -> None:
        self._register_parser(markdown.block)
        self._register_renderer(markdown.renderer)


class MetaPlugin(BlockPlugin):

    @property
    def name(self) -> str:
        return 'meta'

    @property
    def pattern(self) -> Pattern:
        r"""
        Pattern to match meta data block as defined by [MultiMarkdown](http://fletcher.github.io/MultiMarkdown-5/metadata.html).

        Regex explanation:
        - `\A`: Meta data block has to be at the start of markdown file
        - `(-{3}\n)?`: The block starts directly or with a `---` line
        - `.+:`: The meta data block must start with one key
        - `(.+\n)+?`: The meta data block can contain any new lines
        - `(-{3}|\.{3}|\n)`: The block ends with a `---`, `...`, or empty line
        - `?:`: Do not extract these groups

        Returns:
            Pattern: Regex pattern matching meta data block
        """
        return re.compile(r'\A(?:-{3}\n)?(.+:(?:.+\n)+?)(?:-{3}|\.{3}|\n)')

    def parse(self, parser: BlockParser, match: Match, state: Dict[str, Any]) -> Dict[str, Any]:
        meta_block: str = match.group(1)

        single_entry_pattern = re.compile(r'(?P<key>\w+):\s*(?P<value>.+)\n')

        meta_data = {key.lower(): value for key, value in single_entry_pattern.findall(meta_block)}

        return {'type': self.name, 'raw': meta_data, 'params': None}

    def render_html_meta_element(self, name: str, content: Any) -> str:
        return f'  <meta name="{name}" content="{content}">'

    def render_html(self, *args) -> str:
        meta_data: Dict[str, Any] = args[0]
        meta_elements = [
            self.render_html_meta_element(name, content) for name, content in meta_data.items()
        ]
        return '<head>\n' + '\n'.join(meta_elements) + '\n</head>\n'

    def render_ast(self, *args) -> Dict[str, Any]:
        meta_data: Dict[str, Any] = args[0]
        return {'type': self.name, 'meta_data': meta_data}

Spenhouet avatar Jan 03 '21 21:01 Spenhouet