evalml Raise an error if an the target column has an invalid logical type

Raise an error if an the target column has an invalid logical type

Open gsheni opened this issue 2 years ago • 0 comments

trafficstars

As a user of EvalML, I expect EvalML to check the Logical Type of my target column to determine if it is valid.
- If it is not valid, but can be cast to the correct type, I expect EvalML to change the Logical type.
- If it is not valid, and cannot be cast to the correct type, I expect EvalML to raise an error.

def check_target_logical_type(y, problem_type):
    if problem_type in [
        ProblemTypes.REGRESSION,
        ProblemTypes.TIME_SERIES_REGRESSION,
    ] and not any(
        isinstance(y.ww.schema.logical_type, x)
        for x in [
            Integer,
            IntegerNullable,
            Double,
        ]
    ):
        raise ValueError(
            "Regression problem type requires a Integer, IntegerNullable or Double target",
        )
    elif problem_type == ProblemTypes.MULTICLASS and not isinstance(
        y.ww.schema.logical_type,
        Categorical,
    ):
        y = y.ww.set_logical_type("Categorical")
    elif problem_type == ProblemTypes.BINARY and not any(
        isinstance(y.ww.schema.logical_type, x)
        for x in [
            Boolean,
            BooleanNullable,
            Categorical,
        ]
    ):
        raise ValueError(
            "Binary problem type requires a Boolean, BooleanNullable or Categorical target",
        )
    return y

Tests

def test_check_target_logical_type():
    y = pd.Series([1, 2, 2, 3, 3, 1], dtype="int64")
    y.ww.init(logical_type="Integer")
    check_target_logical_type(y, ProblemTypes.REGRESSION)
    check_target_logical_type(y, ProblemTypes.TIME_SERIES_REGRESSION)

    with pytest.raises(ValueError, match="Binary problem type requires a"):
        check_target_logical_type(y, ProblemTypes.BINARY)
    new_y = check_target_logical_type(y, ProblemTypes.MULTICLASS)
    assert new_y.ww.schema.logical_type.__class__ == Categorical

    y = pd.Series(["red", "blue", "blue"], dtype="category")
    y.ww.init(logical_type="Categorical")
    with pytest.raises(ValueError, match="Regression problem type requires a"):
        check_target_logical_type(y, ProblemTypes.REGRESSION)

Mar 28 '23 18:03 gsheni

evalml evalml copied to clipboard

Raise an error if an the target column has an invalid logical type

Tests

evalml
evalml copied to clipboard