sparv-pipeline
sparv-pipeline copied to clipboard
Add initial testing module
I have written some mock classes to be able to run tests for a annotator function.
Maybe it is interesting to put it in something like sparv.testing
or shall I put it in a separate repo (maybe sparv-testing
) to let i mature there?
Update: I have created https://github.com/spraakbanken/sparv-pipeline-testing to use it several plugin repos.
# file: testing.py
from typing import Dict, Generator, Generic, List, Optional, Tuple, TypeVar
from sparv.api.classes import Annotation, BaseAnnotation, Output # type: ignore [import-untyped]
from sparv.core import log_handler # type: ignore [import-untyped] # noqa: F401
class MockAnnotation(Annotation):
def __init__(
self,
name: str = "",
source_file: Optional[str] = None,
values: Optional[List[str]] = None,
children: Optional[Dict[str, List[List[int]]]] = None,
) -> None:
super().__init__(name)
self._values = values or []
self._children = children or {}
def read(self, allow_newlines: bool = False) -> Generator[str, None, None]:
"""Yield each line from the annotation."""
if not self._values:
return
yield from self._values
def get_children(
self,
child: BaseAnnotation,
*,
orphan_alert: bool = False,
preserve_parent_annotation_order: bool = False,
) -> Tuple[List, List]:
"""Return two lists.
The first one is a list with n (= total number of parents) elements where every element is a list
of indices in the child annotation.
The second one is a list of orphans, i.e. containing indices in the child annotation that have no parent.
Both parents and children are sorted according to their position in the source file, unless
preserve_parent_annotation_order is set to True, in which case the parents keep the order from the parent
annotation.
"""
return self._children[child.name], []
def create_empty_attribute(self) -> List:
return [None] * max(len(val) for val in self._children.values())
T = TypeVar("T")
class MemoryOutput(Output, Generic[T]):
def __init__(self) -> None:
self.values: List[T] = []
def write(
self,
values: List[T],
*,
append: bool = False,
allow_newlines: bool = False,
source_file: Optional[str] = None,
) -> None:
"""Write an annotation to file. Existing annotation will be overwritten.
'values' should be a list of values.
"""
if append:
self.values.extend(values)
else:
self.values = values
Example usage:
from sbx_sentence_sentiment_kb_sent.annotations import annotate_sentence_sentiment
from tests.testing import MemoryOutput, MockAnnotation
def test_annotate_sentence_sentiment(snapshot) -> None: # noqa: ANN001
output: MemoryOutput = MemoryOutput()
word = MockAnnotation(
name="<token>", values=["Han", "var", "glad", ".", "Rihanna", "uppges", "gravid", "."]
)
sentence = MockAnnotation(
name="<sentence>", children={"<token>": [[0, 1, 2, 3], [4, 5, 6, 7]]}
)
annotate_sentence_sentiment(output, word, sentence)
assert output.values == snapshot