evalml
evalml copied to clipboard
Raise an error if an the target column has an invalid logical type
trafficstars
- As a user of EvalML, I expect EvalML to check the Logical Type of my target column to determine if it is valid.
- If it is not valid, but can be cast to the correct type, I expect EvalML to change the Logical type.
- If it is not valid, and cannot be cast to the correct type, I expect EvalML to raise an error.
def check_target_logical_type(y, problem_type):
if problem_type in [
ProblemTypes.REGRESSION,
ProblemTypes.TIME_SERIES_REGRESSION,
] and not any(
isinstance(y.ww.schema.logical_type, x)
for x in [
Integer,
IntegerNullable,
Double,
]
):
raise ValueError(
"Regression problem type requires a Integer, IntegerNullable or Double target",
)
elif problem_type == ProblemTypes.MULTICLASS and not isinstance(
y.ww.schema.logical_type,
Categorical,
):
y = y.ww.set_logical_type("Categorical")
elif problem_type == ProblemTypes.BINARY and not any(
isinstance(y.ww.schema.logical_type, x)
for x in [
Boolean,
BooleanNullable,
Categorical,
]
):
raise ValueError(
"Binary problem type requires a Boolean, BooleanNullable or Categorical target",
)
return y
Tests
def test_check_target_logical_type():
y = pd.Series([1, 2, 2, 3, 3, 1], dtype="int64")
y.ww.init(logical_type="Integer")
check_target_logical_type(y, ProblemTypes.REGRESSION)
check_target_logical_type(y, ProblemTypes.TIME_SERIES_REGRESSION)
with pytest.raises(ValueError, match="Binary problem type requires a"):
check_target_logical_type(y, ProblemTypes.BINARY)
new_y = check_target_logical_type(y, ProblemTypes.MULTICLASS)
assert new_y.ww.schema.logical_type.__class__ == Categorical
y = pd.Series(["red", "blue", "blue"], dtype="category")
y.ww.init(logical_type="Categorical")
with pytest.raises(ValueError, match="Regression problem type requires a"):
check_target_logical_type(y, ProblemTypes.REGRESSION)