openff-evaluator icon indicating copy to clipboard operation
openff-evaluator copied to clipboard

Validating curation models with "before" breaks CurationWorkflowSchema

Open lilyminium opened this issue 3 months ago • 3 comments

See https://github.com/openforcefield/openff-evaluator/pull/584#discussion_r2244495821_

For more context, when I adapt the test in test_workflow.py as below, it fails. This seems to only affect the selection filters, I'm not sure why. It goes away when I change the validation to happen "after".

diff --git a/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py b/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
index e75459c..e8d67fb 100644
--- a/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
+++ b/openff/evaluator/_tests/test_datasets/test_curation/test_workflow.py
@@ -12,6 +12,7 @@ from openff.evaluator.datasets.curation.components.filtering import (
     FilterByPressureSchema,
     FilterByTemperatureSchema,
 )
+from openff.evaluator.datasets.curation.components.selection import SelectDataPointsSchema
 from openff.evaluator.datasets.curation.workflow import (
     CurationWorkflow,
     CurationWorkflowSchema,
@@ -19,6 +20,7 @@ from openff.evaluator.datasets.curation.workflow import (
 from openff.evaluator.properties import Density
 from openff.evaluator.substances import Substance
 from openff.evaluator.thermodynamics import ThermodynamicState
+from openff.evaluator.datasets.curation.components.selection import State, TargetState
 
 
 @pytest.fixture(scope="module")
@@ -71,12 +73,28 @@ def data_set(data_frame: pandas.DataFrame) -> PhysicalPropertyDataSet:
 def test_workflow_data_frame(data_frame):
     """Test that a simple curation workflow can be applied to a data frame."""
 
+    TARGET_STATES = [
+        TargetState(
+            property_types=[
+                ("Density", 1),
+            ],
+            states=[
+                State(
+                    temperature=298.15,
+                    pressure=101.325,
+                    mole_fractions=(1.0,),
+                ),
+            ],
+        ),
+    ]
+
     schema = CurationWorkflowSchema(
         component_schemas=[
             FilterByTemperatureSchema(
                 minimum_temperature=290.0, maximum_temperature=300.0
             ),
             FilterByPressureSchema(minimum_pressure=101.3, maximum_pressure=101.4),
+            SelectDataPointsSchema(target_states=TARGET_STATES)
         ]
     )

lilyminium avatar Jul 31 '25 06:07 lilyminium