openff-evaluator
openff-evaluator copied to clipboard
Validating curation models with "before" breaks CurationWorkflowSchema
See https://github.com/openforcefield/openff-evaluator/pull/584#discussion_r2244495821_
For more context, when I adapt the test in test_workflow.py as below, it fails. This seems to only affect the selection filters, I'm not sure why. It goes away when I change the validation to happen "after".
diff --git a/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py b/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
index e75459c..e8d67fb 100644
--- a/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
+++ b/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
@@ -12,6 +12,7 @@ from openff.evaluator.datasets.curation.components.filtering import (
FilterByPressureSchema,
FilterByTemperatureSchema,
)
+from openff.evaluator.datasets.curation.components.selection import SelectDataPointsSchema
from openff.evaluator.datasets.curation.workflow import (
CurationWorkflow,
CurationWorkflowSchema,
@@ -19,6 +20,7 @@ from openff.evaluator.datasets.curation.workflow import (
from openff.evaluator.properties import Density
from openff.evaluator.substances import Substance
from openff.evaluator.thermodynamics import ThermodynamicState
+from openff.evaluator.datasets.curation.components.selection import State, TargetState
@pytest.fixture(scope="module")
@@ -71,12 +73,28 @@ def data_set(data_frame: pandas.DataFrame) -> PhysicalPropertyDataSet:
def test_workflow_data_frame(data_frame):
"""Test that a simple curation workflow can be applied to a data frame."""
+ TARGET_STATES = [
+ TargetState(
+ property_types=[
+ ("Density", 1),
+ ],
+ states=[
+ State(
+ temperature=298.15,
+ pressure=101.325,
+ mole_fractions=(1.0,),
+ ),
+ ],
+ ),
+ ]
+
schema = CurationWorkflowSchema(
component_schemas=[
FilterByTemperatureSchema(
minimum_temperature=290.0, maximum_temperature=300.0
),
FilterByPressureSchema(minimum_pressure=101.3, maximum_pressure=101.4),
+ SelectDataPointsSchema(target_states=TARGET_STATES)
]
)