onnxmltools
onnxmltools copied to clipboard
ValueError with sparkML convert
Hello all, I've been trying to convert a sparkML pipeline model to onnx for a couple of days, have tried 2 different versions of spark:
- 2.4.5 and,
- 3.0.1 Both with the same results, here is my code:
from onnxmltools import convert_sparkml
from onnxmltools.convert.sparkml.utils import buildInitialTypesSimple
sample_input = test.drop('Time', 'Class', 'Amount') #Dropping unused columns for the model. Only retaining features
print(sample_input)
initial_types = buildInitialTypesSimple(sample_input)
print(initial_types)
onnx_model = convert_sparkml(pipelineModel, name='Pyspark model', initial_types=initial_types, spark_session = spark )`
And the output:
`DataFrame[V1: double, V2: double, V3: double, V4: double, V5: double, V6: double, V7: double, V8: double, V9: double, V10: double, V11: double, V12: double, V13: double, V14: double, V15: double, V16: double, V17: double, V18: double, V19: double, V20: double, V21: double, V22: double, V23: double, V24: double, V25: double, V26: double, V27: double, V28: double]
[('V1', FloatTensorType(shape=[1, 1])), ('V2', FloatTensorType(shape=[1, 1])), ('V3', FloatTensorType(shape=[1, 1])), ('V4', FloatTensorType(shape=[1, 1])), ('V5', FloatTensorType(shape=[1, 1])), ('V6', FloatTensorType(shape=[1, 1])), ('V7', FloatTensorType(shape=[1, 1])), ('V8', FloatTensorType(shape=[1, 1])), ('V9', FloatTensorType(shape=[1, 1])), ('V10', FloatTensorType(shape=[1, 1])), ('V11', FloatTensorType(shape=[1, 1])), ('V12', FloatTensorType(shape=[1, 1])), ('V13', FloatTensorType(shape=[1, 1])), ('V14', FloatTensorType(shape=[1, 1])), ('V15', FloatTensorType(shape=[1, 1])), ('V16', FloatTensorType(shape=[1, 1])), ('V17', FloatTensorType(shape=[1, 1])), ('V18', FloatTensorType(shape=[1, 1])), ('V19', FloatTensorType(shape=[1, 1])), ('V20', FloatTensorType(shape=[1, 1])), ('V21', FloatTensorType(shape=[1, 1])), ('V22', FloatTensorType(shape=[1, 1])), ('V23', FloatTensorType(shape=[1, 1])), ('V24', FloatTensorType(shape=[1, 1])), ('V25', FloatTensorType(shape=[1, 1])), ('V26', FloatTensorType(shape=[1, 1])), ('V27', FloatTensorType(shape=[1, 1])), ('V28', FloatTensorType(shape=[1, 1]))]
Stack trace:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<command-2555591288018935> in <module>
6 initial_types = buildInitialTypesSimple(sample_input)
7 print(initial_types)
----> 8 onnx_model = convert_sparkml(pipelineModel, name='Pyspark model', initial_types=initial_types, spark_session = spark )
9 print(onnx_model)
/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/main.py in convert_sparkml(model, name, initial_types, doc_string, target_opset, targeted_onnx, custom_conversion_functions, custom_shape_calculators, spark_session)
75 from .sparkml.convert import convert
76 return convert(model, name, initial_types, doc_string, target_opset, targeted_onnx,
---> 77 custom_conversion_functions, custom_shape_calculators, spark_session)
78
79
/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/convert.py in convert(model, name, initial_types, doc_string, target_opset, targeted_onnx, custom_conversion_functions, custom_shape_calculators, spark_session)
73
74 # Convert our Topology object into ONNX. The outcome is an ONNX model.
---> 75 onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
76
77 return onnx_model
/databricks/python/lib/python3.7/site-packages/onnxconverter_common/topology.py in convert_topology(topology, model_name, doc_string, target_opset, targeted_onnx, channel_first_inputs)
774 else:
775 # Convert the selected operator into some ONNX objects and save them into the container
--> 776 get_converter(operator.type)(scope, operator, container)
777
778 # When calling ModelComponentContainer's add_initializer(...), nothing is added into the input list.
/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/operator_converters/gbt_classifier.py in convert_gbt_classifier(scope, operator, container)
27 regressor_output_names.append(regressor_output.full_name)
28 regressor_op.outputs.append(regressor_output)
---> 29 convert_decision_tree_regressor(scope, regressor_op, container)
30 regressor_op.is_evaluated = True
31
/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/operator_converters/decision_tree_regressor.py in convert_decision_tree_regressor(scope, operator, container)
26
27 container.add_node(op_type, operator.input_full_names, operator.output_full_names,
---> 28 op_domain='ai.onnx.ml', **attrs)
29
30
/databricks/python/lib/python3.7/site-packages/onnxconverter_common/container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, **attrs)
169 raise ValueError('Failed to create ONNX node. Undefined attribute pair (%s, %s) found' % (k, v))
170
--> 171 node = helper.make_node(op_type, inputs, outputs, **attrs)
172 node.domain = op_domain
173
/databricks/python/lib/python3.7/site-packages/onnx/helper.py in make_node(op_type, inputs, outputs, name, doc_string, domain, **kwargs)
108 node.attribute.extend(
109 make_attribute(key, value)
--> 110 for key, value in sorted(kwargs.items()))
111 return node
112
/databricks/python/lib/python3.7/site-packages/onnx/helper.py in <genexpr>(.0)
108 node.attribute.extend(
109 make_attribute(key, value)
--> 110 for key, value in sorted(kwargs.items()))
111 return node
112
/databricks/python/lib/python3.7/site-packages/onnx/helper.py in make_attribute(key, value, doc_string)
388 else:
389 raise ValueError(
--> 390 "You passed in an iterable attribute but I cannot figure out "
391 "its applicable type.")
392 else:
ValueError: You passed in an iterable attribute but I cannot figure out its applicable type.
Any help/ideas/suggestions are well appreciated
There was very limited maintenance on sparkml converters. Unit tests were disabled some time ago. Fixing this issue may require quite some work.