sparv-pipeline icon indicating copy to clipboard operation
sparv-pipeline copied to clipboard

Add initial testing module

Open kod-kristoff opened this issue 8 months ago • 0 comments

I have written some mock classes to be able to run tests for a annotator function.

Maybe it is interesting to put it in something like sparv.testing or shall I put it in a separate repo (maybe sparv-testing) to let i mature there?

Update: I have created https://github.com/spraakbanken/sparv-pipeline-testing to use it several plugin repos.

# file: testing.py
from typing import Dict, Generator, Generic, List, Optional, Tuple, TypeVar

from sparv.api.classes import Annotation, BaseAnnotation, Output  # type: ignore [import-untyped]
from sparv.core import log_handler  # type: ignore [import-untyped] # noqa: F401


class MockAnnotation(Annotation):
    def __init__(
        self,
        name: str = "",
        source_file: Optional[str] = None,
        values: Optional[List[str]] = None,
        children: Optional[Dict[str, List[List[int]]]] = None,
    ) -> None:
        super().__init__(name)
        self._values = values or []
        self._children = children or {}

    def read(self, allow_newlines: bool = False) -> Generator[str, None, None]:
        """Yield each line from the annotation."""
        if not self._values:
            return
        yield from self._values

    def get_children(
        self,
        child: BaseAnnotation,
        *,
        orphan_alert: bool = False,
        preserve_parent_annotation_order: bool = False,
    ) -> Tuple[List, List]:
        """Return two lists.

        The first one is a list with n (= total number of parents) elements where every element is a list
        of indices in the child annotation.
        The second one is a list of orphans, i.e. containing indices in the child annotation that have no parent.
        Both parents and children are sorted according to their position in the source file, unless
        preserve_parent_annotation_order is set to True, in which case the parents keep the order from the parent
        annotation.
        """
        return self._children[child.name], []

    def create_empty_attribute(self) -> List:
        return [None] * max(len(val) for val in self._children.values())


T = TypeVar("T")


class MemoryOutput(Output, Generic[T]):
    def __init__(self) -> None:
        self.values: List[T] = []

    def write(
        self,
        values: List[T],
        *,
        append: bool = False,
        allow_newlines: bool = False,
        source_file: Optional[str] = None,
    ) -> None:
        """Write an annotation to file. Existing annotation will be overwritten.

        'values' should be a list of values.
        """
        if append:
            self.values.extend(values)
        else:
            self.values = values

Example usage:

from sbx_sentence_sentiment_kb_sent.annotations import annotate_sentence_sentiment

from tests.testing import MemoryOutput, MockAnnotation


def test_annotate_sentence_sentiment(snapshot) -> None:  # noqa: ANN001
    output: MemoryOutput = MemoryOutput()
    word = MockAnnotation(
        name="<token>", values=["Han", "var", "glad", ".", "Rihanna", "uppges", "gravid", "."]
    )
    sentence = MockAnnotation(
        name="<sentence>", children={"<token>": [[0, 1, 2, 3], [4, 5, 6, 7]]}
    )

    annotate_sentence_sentiment(output, word, sentence)

    assert output.values == snapshot

kod-kristoff avatar May 28 '24 09:05 kod-kristoff