great_expectations icon indicating copy to clipboard operation
great_expectations copied to clipboard

custom expectation data doc render error with evaluation parameter

Open ramananayak opened this issue 1 year ago • 0 comments

Describe the bug I have created a custom expectation to check if the column output is True/False similar to column_value_equal_three. It throws below error when I use evaluation parameter.

great_expectations.render.renderer.site_builder - ERROR - An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to diagnose and repair the underlying issue.  Detailed information follows:
                TypeError: "unhashable type: 'dict'".  Traceback: "Traceback (most recent call last):
  File "/Users/rnayak/.pyenv/versions/venv_gx_17/lib/python3.9/site-packages/great_expectations/render/renderer/site_builder.py", line 474, in build
    rendered_content = self.renderer_class.render(resource)
  File "/Users/rnayak/.pyenv/versions/venv_gx_17/lib/python3.9/site-packages/great_expectations/render/renderer/page_renderer.py", line 674, in render
    ) = expectations.get_grouped_and_ordered_expectations_by_column()
  File "/Users/rnayak/.pyenv/versions/venv_gx_17/lib/python3.9/site-packages/great_expectations/core/expectation_suite.py", line 984, in get_grouped_and_ordered_expectations_by_column
    if column not in expectations_by_column:
TypeError: unhashable type: 'dict'
".

and the datadoc ouput look slike this image

To Reproduce below is the custom expectation

# expect_column_value_to_equal_true.py custom expectation
from typing import Dict, Optional

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.metric_domain_types import MetricDomainTypes
from great_expectations.core.metric_function_types import MetricPartialFunctionTypes
from great_expectations.execution_engine import (
    ExecutionEngine,
    PandasExecutionEngine,
    SqlAlchemyExecutionEngine,
)
from great_expectations.expectations.expectation import (
    ColumnMapExpectation,
    ExpectationValidationResult,
    render_evaluation_parameter_string,
)
from great_expectations.expectations.metrics import (
    ColumnMapMetricProvider,
    column_condition_partial,
)
from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa
from great_expectations.expectations.metrics.metric_provider import metric_partial
from great_expectations.render import (
    CollapseContent,
    RenderedStringTemplateContent,
    RenderedTableContent,
)
from great_expectations.render.renderer.renderer import renderer
from great_expectations.render.util import num_to_str
from great_expectations.validator.metric_configuration import MetricConfiguration

# snippets reference this object without creating it
result_dict = {}

class ColumnValueEqualTrue(ColumnMapMetricProvider):

    condition_metric_name = "column_value.equal_true"

    @column_condition_partial(engine=PandasExecutionEngine)
    def _pandas(cls, column, **kwargs):
        return column == True

    @column_condition_partial(engine=SqlAlchemyExecutionEngine)
    def _sqlalchemy(cls, column, **kwargs):
        return (sa.and_(column.in_([1,0]), column.is_(True)))

    @classmethod
    def _get_evaluation_dependencies(
        cls,
        metric: MetricConfiguration,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[Dict] = None,
    ):
        """Returns a dictionary of given metric names and their corresponding configuration, specifying the metric
        types and their respective domains"""
        dependencies: Dict = super()._get_evaluation_dependencies(
            metric=metric,
            configuration=configuration,
            execution_engine=execution_engine,
            runtime_configuration=runtime_configuration,
        )

        table_domain_kwargs: Dict = {
            k: v for k, v in metric.metric_domain_kwargs.items() if k != "column"
        }
        dependencies["table.column_types"] = MetricConfiguration(
            metric_name="table.column_types",
            metric_domain_kwargs=table_domain_kwargs,
            metric_value_kwargs={
                "include_nested": True,
            },
        )

        return dependencies



class ExpectColumnValueToEqualTrue(ColumnMapExpectation):
    """Expect value in this column to equal to Boolean True."""
    examples = [
        {
            "data": {
                "test_true_flag": [True, True, True, True, True],
                "test_false_flag": [False, False, False, False, False]

            },
            "tests": [
                {
                    "title": "basic_positive_test",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "test_true_flag"},
                    "out": {
                        "success": True,
                    },
                },
                {
                    "title": "basic_negative_test",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "test_false_flag", "mostly": 0.8},
                    "out": {
                        "success": False,
                    },
                },
            ],
        }
    ]

    map_metric = "column_value.equal_true"

    success_keys = ("mostly",)

    # This dictionary contains default values for any parameters that should have default values
    default_kwarg_values = {
        "mostly": 1.0
    }

    @classmethod
    @renderer(renderer_type="renderer.diagnostic.observed_value")
    @render_evaluation_parameter_string
    def _diagnostic_observed_value_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        runtime_configuration: Optional[dict] = None,
        **kwargs,
    ):
        assert result, "Must provide a result object."

        result_dict = result.result
        if result_dict is None:
            return "--"

        if result_dict.get("observed_value"):
            observed_value = result_dict.get("observed_value")
            if isinstance(observed_value, (int, float)) and not isinstance(
                observed_value, bool
            ):
                return num_to_str(observed_value, precision=10, use_locale=True)
            return str(observed_value)
        elif result_dict.get("unexpected_percent") is not None:
            return (
                num_to_str(result_dict.get("unexpected_percent"), precision=5)
                + "% unexpected"
            )
        else:
            return "--"

    @classmethod
    @renderer(renderer_type="renderer.diagnostic.unexpected_statement")
    @render_evaluation_parameter_string
    def _diagnostic_unexpected_statement_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        runtime_configuration: Optional[dict] = None,
        **kwargs,
    ):
        assert result, "Must provide a result object."

        success = result.success
        result_dict = result.result

        if result.exception_info["raised_exception"]:
            exception_message_template_str = (
                "\n\n$expectation_type raised an exception:\n$exception_message"
            )

            exception_message = RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": exception_message_template_str,
                        "params": {
                            "expectation_type": result.expectation_config.expectation_type,
                            "exception_message": result.exception_info[
                                "exception_message"
                            ],
                        },
                        "tag": "strong",
                        "styling": {
                            "classes": ["text-danger"],
                            "params": {
                                "exception_message": {"tag": "code"},
                                "expectation_type": {
                                    "classes": ["badge", "badge-danger", "mb-2"]
                                },
                            },
                        },
                    },
                }
            )

            exception_traceback_collapse = CollapseContent(
                **{
                    "collapse_toggle_link": "Show exception traceback...",
                    "collapse": [
                        RenderedStringTemplateContent(
                            **{
                                "content_block_type": "string_template",
                                "string_template": {
                                    "template": result.exception_info[
                                        "exception_traceback"
                                    ],
                                    "tag": "code",
                                },
                            }
                        )
                    ],
                }
            )

            return [exception_message, exception_traceback_collapse]

        if success or not result_dict.get("unexpected_count"):
            return []
        else:
            unexpected_count = num_to_str(
                result_dict["unexpected_count"], use_locale=True, precision=20
            )
            unexpected_percent = (
                f"{num_to_str(result_dict['unexpected_percent'], precision=4)}%"
            )
            element_count = num_to_str(
                result_dict["element_count"], use_locale=True, precision=20
            )

            template_str = (
                "\n\n$unexpected_count unexpected values found. "
                "$unexpected_percent of $element_count total rows."
            )

            return [
                RenderedStringTemplateContent(
                    **{
                        "content_block_type": "string_template",
                        "string_template": {
                            "template": template_str,
                            "params": {
                                "unexpected_count": unexpected_count,
                                "unexpected_percent": unexpected_percent,
                                "element_count": element_count,
                            },
                            "tag": "strong",
                            "styling": {"classes": ["text-danger"]},
                        },
                    }
                )
            ]

    @classmethod
    @renderer(renderer_type="renderer.diagnostic.unexpected_table")
    @render_evaluation_parameter_string
    def _diagnostic_unexpected_table_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        runtime_configuration: Optional[dict] = None,
        **kwargs,
    ):
        try:
            result_dict = result.result
        except KeyError:
            return None

        if result_dict is None:
            return None

        if not result_dict.get("partial_unexpected_list") and not result_dict.get(
            "partial_unexpected_counts"
        ):
            return None

        table_rows = []

        if result_dict.get("partial_unexpected_counts"):
            total_count = 0
            for unexpected_count_dict in result_dict.get("partial_unexpected_counts"):
                value = unexpected_count_dict.get("value")
                count = unexpected_count_dict.get("count")
                total_count += count
                if value is not None and value != "":
                    table_rows.append([value, count])
                elif value == "":
                    table_rows.append(["EMPTY", count])
                else:
                    table_rows.append(["null", count])

            if total_count == result_dict.get("unexpected_count"):
                header_row = ["Unexpected Value", "Count"]
            else:
                header_row = ["Sampled Unexpected Values"]
                table_rows = [[row[0]] for row in table_rows]

        else:
            header_row = ["Sampled Unexpected Values"]
            sampled_values_set = set()
            for unexpected_value in result_dict.get("partial_unexpected_list"):
                if unexpected_value:
                    string_unexpected_value = str(unexpected_value)
                elif unexpected_value == "":
                    string_unexpected_value = "EMPTY"
                else:
                    string_unexpected_value = "null"
                if string_unexpected_value not in sampled_values_set:
                    table_rows.append([unexpected_value])
                    sampled_values_set.add(string_unexpected_value)

        unexpected_table_content_block = RenderedTableContent(
            **{
                "content_block_type": "table",
                "table": table_rows,
                "header_row": header_row,
                "styling": {
                    "body": {"classes": ["table-bordered", "table-sm", "mt-3"]}
                },
            }
        )

        return unexpected_table_content_block

    library_metadata = {
        "tags": ["extremely basic math"],
        "contributors": ["@joegargery"],
    }


if __name__ == "__main__":
    ExpectColumnValueToEqualTrue().print_diagnostic_checklist()

create a checkpoint with below batch request

      runtime_parameters:
        query: "select count(1)<10 as status from schema.table"
    expectation_suite_name: ce.check_if_true
    evaluation_parameters: { "column_name": "status"}

expectation suite

{
    "data_asset_type": null,
    "expectation_suite_name": "ce.check_if_true",
    "expectations": [
        {
            "expectation_type": "expect_column_value_to_equal_true",
            "kwargs": {
                "column": {
                    "$PARAMETER": "column_name"
                }
            },
            "meta": {}
        }
    ],
    "ge_cloud_id": null,
    "meta": {
        "great_expectations_version": "0.17.1"
    }
}

Expected behavior datadoc should render without any errors

Environment (please complete the following information):

  • Operating System: MacOS
  • Great Expectations Version: 0.17.1
  • Data Source: Redshift
  • Cloud environment: Airflow

Additional context I am sure this has to do with dict created when evaluation parameter is replaced, but I am not sure where exactly this has to be fixed.

ramananayak avatar Dec 18 '23 14:12 ramananayak