great_expectations
great_expectations copied to clipboard
SimpleCheckpoint RepresenterError: cannot represent an object with BigQuery
Describe the bug
After getting BatchRequest and ExpectationSuite, I tried to programmatically create a SimpleCheckpoint
, then persistent it to config file. There was an error when convert Checkpoint to YAML string. RepresenterError: cannot represent an object
To Reproduce I have listed the code used to reproduce the error
import datetime
import pandas as pd
import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.data_context.types.resource_identifiers import ExpectationSuiteIdentifier
from great_expectations.exceptions import DataContextError
from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from ruamel.yaml import YAML
yaml = YAML()
context = ge.data_context.DataContext()
expectation_suite_name = "taxi.check"
try:
suite = context.get_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f'Loaded ExpectationSuite "{suite.expectation_suite_name}" containing {len(suite.expectations)} expectations.')
except DataContextError:
suite = context.create_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f'Created ExpectationSuite "{suite.expectation_suite_name}".')
batch_request = BatchRequest(
datasource_name="bq_taxi",
data_connector_name="default_inferred_data_connector_name",
data_asset_name="tlc_yellow_trips_2018", # this is the name of the table you want to retrieve
batch_spec_passthrough={
"bigquery_temp_table": "ge_temp"
}, # this is the name of the table you would like to use a 'temp_table'
)
validator = context.get_validator(
batch_request=batch_request, expectation_suite_name=expectation_suite_name
)
validator.expect_table_row_count_to_equal(112234626)
suite_config = validator.get_expectation_suite()
context.save_expectation_suite(expectation_suite=suite_config, expectation_suite_name=expectation_suite_name)
checkpoint_config = {
"class_name": "SimpleCheckpoint",
"validations": [
{
"batch_request": batch_request,
"expectation_suite_name": expectation_suite_name
}
]
}
checkpoint = SimpleCheckpoint(
f"checkpoint_{expectation_suite_name}",
context,
**checkpoint_config
)
# RepresenterError: cannot represent an object
checkpoin_config = checkpoint.get_substituted_config().to_yaml_str()
context.add_checkpoint(**yaml.load(checkpoin_config))
Related configs
datasources:
bq_taxi:
data_connectors:
default_runtime_data_connector_name:
module_name: great_expectations.datasource.data_connector
class_name: RuntimeDataConnector
batch_identifiers:
- default_identifier_name
default_inferred_data_connector_name:
module_name: great_expectations.datasource.data_connector
class_name: InferredAssetSqlDataConnector
execution_engine:
connection_string: bigquery://bigquery-public-data/new_york_taxi_trips
module_name: great_expectations.execution_engine
class_name: SqlAlchemyExecutionEngine
module_name: great_expectations.datasource
class_name: Datasource
// expectations/taxi/check.json
{
"data_asset_type": null,
"expectation_suite_name": "taxi.check",
"expectations": [
{
"expectation_type": "expect_table_row_count_to_equal",
"ge_cloud_id": null,
"kwargs": {
"value": 112234626
},
"meta": {}
}
],
"ge_cloud_id": null,
"meta": {
"great_expectations_version": "0.13.43"
}
}
Expected behavior
when convert checkpoint to YAML config using checkpoint.get_substituted_config().to_yaml_str()
the following error popup.
---------------------------------------------------------------------------
RepresenterError Traceback (most recent call last)
/tmp/ipykernel_15378/3999394519.py in <module>
----> 1 checkpoint.get_substituted_config().to_yaml_str()
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/great_expectations/data_context/types/base.py in to_yaml_str(self)
105 :returns a YAML string containing the project configuration
106 """
--> 107 return object_to_yaml_str(self.commented_map)
108
109 def to_json_dict(self) -> dict:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/great_expectations/data_context/types/base.py in object_to_yaml_str(obj)
42 output_str: str
43 with StringIO() as string_stream:
---> 44 yaml.dump(obj, string_stream)
45 output_str = string_stream.getvalue()
46 return output_str
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/main.py in dump(self, data, stream, transform)
568 ''.format(self.__class__.__name__)
569 )
--> 570 self._context_manager.dump(data)
571 else: # old style
572 if stream is None:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/main.py in dump(self, data)
913 self.init_output(data)
914 try:
--> 915 self._yaml.representer.represent(data)
916 except AttributeError:
917 # nprint(dir(dumper._representer))
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent(self, data)
78 def represent(self, data):
79 # type: (Any) -> None
---> 80 node = self.represent_data(data)
81 self.serializer.serialize(node)
82 self.represented_objects = {}
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_data(self, data)
101 data_types = type(data).__mro__
102 if data_types[0] in self.yaml_representers:
--> 103 node = self.yaml_representers[data_types[0]](self, data)
104 else:
105 for data_type in data_types:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_dict(self, data)
1032 else:
1033 tag = 'tag:yaml.org,2002:map'
-> 1034 return self.represent_mapping(tag, data)
1035
1036 def represent_list(self, data):
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_mapping(self, tag, mapping, flow_style)
867 item_count += 1
868 node_key = self.represent_key(item_key)
--> 869 node_value = self.represent_data(item_value)
870 item_comment = item_comments.get(item_key)
871 if item_comment:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_data(self, data)
101 data_types = type(data).__mro__
102 if data_types[0] in self.yaml_representers:
--> 103 node = self.yaml_representers[data_types[0]](self, data)
104 else:
105 for data_type in data_types:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_list(self, data)
309 # break
310 # if not pairs:
--> 311 return self.represent_sequence('tag:yaml.org,2002:seq', data)
312
313 # value = []
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_sequence(self, tag, sequence, flow_style)
775 item_comments = {}
776 for idx, item in enumerate(sequence):
--> 777 node_item = self.represent_data(item)
778 self.merge_comments(node_item, item_comments.get(idx))
779 if not (isinstance(node_item, ScalarNode) and not node_item.style):
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_data(self, data)
101 data_types = type(data).__mro__
102 if data_types[0] in self.yaml_representers:
--> 103 node = self.yaml_representers[data_types[0]](self, data)
104 else:
105 for data_type in data_types:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_dict(self, data)
319 def represent_dict(self, data):
320 # type: (Any) -> Any
--> 321 return self.represent_mapping('tag:yaml.org,2002:map', data)
322
323 def represent_ordereddict(self, data):
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_mapping(self, tag, mapping, flow_style)
867 item_count += 1
868 node_key = self.represent_key(item_key)
--> 869 node_value = self.represent_data(item_value)
870 item_comment = item_comments.get(item_key)
871 if item_comment:
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_data(self, data)
111 node = self.yaml_multi_representers[None](self, data)
112 elif None in self.yaml_representers:
--> 113 node = self.yaml_representers[None](self, data)
114 else:
115 node = ScalarNode(None, str(data))
~/.pyenv/versions/3.9.0/envs/ge_env/lib/python3.9/site-packages/ruamel/yaml/representer.py in represent_undefined(self, data)
352 def represent_undefined(self, data):
353 # type: (Any) -> None
--> 354 raise RepresenterError(_F('cannot represent an object: {data!s}', data=data))
355
356
RepresenterError: cannot represent an object: {
"datasource_name": "bq_taxi",
"data_connector_name": "default_inferred_data_connector_name",
"data_asset_name": "tlc_yellow_trips_2018",
"batch_spec_passthrough": {
"bigquery_temp_table": "ge_temp"
}
}
Environment:
- Operating System: Ubuntu 21.04
- Great Expectations Version: 0.13.43
- API V3 is used used
- BigQuery DataSource used
Hey @xudong-sph, thanks for surfacing this! We'll review internally and be in touch.
@xudong-sph were you able to attempt what you were trying before in a more straightforward way? Is this still an issue?
Is this issue still relevant? If so, what is blocking it? Is there anything you can do to help move it forward?
This issue has been automatically marked as stale because it has not had recent activity.
It will be closed if no further activity occurs. Thank you for your contributions 🙇