onnxmltools icon indicating copy to clipboard operation
onnxmltools copied to clipboard

ValueError with sparkML convert

Open GitOP opened this issue 4 years ago • 1 comments

Hello all, I've been trying to convert a sparkML pipeline model to onnx for a couple of days, have tried 2 different versions of spark:

  • 2.4.5 and,
  • 3.0.1 Both with the same results, here is my code:
from onnxmltools import convert_sparkml
from onnxmltools.convert.sparkml.utils import buildInitialTypesSimple

sample_input = test.drop('Time', 'Class', 'Amount') #Dropping unused columns for the model. Only retaining features
print(sample_input)
initial_types = buildInitialTypesSimple(sample_input) 
print(initial_types)
onnx_model = convert_sparkml(pipelineModel, name='Pyspark model', initial_types=initial_types, spark_session = spark )`

And the output:
`DataFrame[V1: double, V2: double, V3: double, V4: double, V5: double, V6: double, V7: double, V8: double, V9: double, V10: double, V11: double, V12: double, V13: double, V14: double, V15: double, V16: double, V17: double, V18: double, V19: double, V20: double, V21: double, V22: double, V23: double, V24: double, V25: double, V26: double, V27: double, V28: double]
[('V1', FloatTensorType(shape=[1, 1])), ('V2', FloatTensorType(shape=[1, 1])), ('V3', FloatTensorType(shape=[1, 1])), ('V4', FloatTensorType(shape=[1, 1])), ('V5', FloatTensorType(shape=[1, 1])), ('V6', FloatTensorType(shape=[1, 1])), ('V7', FloatTensorType(shape=[1, 1])), ('V8', FloatTensorType(shape=[1, 1])), ('V9', FloatTensorType(shape=[1, 1])), ('V10', FloatTensorType(shape=[1, 1])), ('V11', FloatTensorType(shape=[1, 1])), ('V12', FloatTensorType(shape=[1, 1])), ('V13', FloatTensorType(shape=[1, 1])), ('V14', FloatTensorType(shape=[1, 1])), ('V15', FloatTensorType(shape=[1, 1])), ('V16', FloatTensorType(shape=[1, 1])), ('V17', FloatTensorType(shape=[1, 1])), ('V18', FloatTensorType(shape=[1, 1])), ('V19', FloatTensorType(shape=[1, 1])), ('V20', FloatTensorType(shape=[1, 1])), ('V21', FloatTensorType(shape=[1, 1])), ('V22', FloatTensorType(shape=[1, 1])), ('V23', FloatTensorType(shape=[1, 1])), ('V24', FloatTensorType(shape=[1, 1])), ('V25', FloatTensorType(shape=[1, 1])), ('V26', FloatTensorType(shape=[1, 1])), ('V27', FloatTensorType(shape=[1, 1])), ('V28', FloatTensorType(shape=[1, 1]))]

Stack trace:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<command-2555591288018935> in <module>
      6 initial_types = buildInitialTypesSimple(sample_input)
      7 print(initial_types)
----> 8 onnx_model = convert_sparkml(pipelineModel, name='Pyspark model', initial_types=initial_types, spark_session = spark )
      9 print(onnx_model)

/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/main.py in convert_sparkml(model, name, initial_types, doc_string, target_opset, targeted_onnx, custom_conversion_functions, custom_shape_calculators, spark_session)
     75     from .sparkml.convert import convert
     76     return convert(model, name, initial_types, doc_string, target_opset, targeted_onnx,
---> 77                    custom_conversion_functions, custom_shape_calculators, spark_session)
     78 
     79 

/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/convert.py in convert(model, name, initial_types, doc_string, target_opset, targeted_onnx, custom_conversion_functions, custom_shape_calculators, spark_session)
     73 
     74     # Convert our Topology object into ONNX. The outcome is an ONNX model.
---> 75     onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
     76 
     77     return onnx_model

/databricks/python/lib/python3.7/site-packages/onnxconverter_common/topology.py in convert_topology(topology, model_name, doc_string, target_opset, targeted_onnx, channel_first_inputs)
    774         else:
    775             # Convert the selected operator into some ONNX objects and save them into the container
--> 776             get_converter(operator.type)(scope, operator, container)
    777 
    778     # When calling ModelComponentContainer's add_initializer(...), nothing is added into the input list.

/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/operator_converters/gbt_classifier.py in convert_gbt_classifier(scope, operator, container)
     27         regressor_output_names.append(regressor_output.full_name)
     28         regressor_op.outputs.append(regressor_output)
---> 29         convert_decision_tree_regressor(scope, regressor_op, container)
     30         regressor_op.is_evaluated = True
     31 

/databricks/python/lib/python3.7/site-packages/onnxmltools/convert/sparkml/operator_converters/decision_tree_regressor.py in convert_decision_tree_regressor(scope, operator, container)
     26 
     27     container.add_node(op_type, operator.input_full_names, operator.output_full_names,
---> 28                        op_domain='ai.onnx.ml', **attrs)
     29 
     30 

/databricks/python/lib/python3.7/site-packages/onnxconverter_common/container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, **attrs)
    169                 raise ValueError('Failed to create ONNX node. Undefined attribute pair (%s, %s) found' % (k, v))
    170 
--> 171         node = helper.make_node(op_type, inputs, outputs, **attrs)
    172         node.domain = op_domain
    173 

/databricks/python/lib/python3.7/site-packages/onnx/helper.py in make_node(op_type, inputs, outputs, name, doc_string, domain, **kwargs)
    108         node.attribute.extend(
    109             make_attribute(key, value)
--> 110             for key, value in sorted(kwargs.items()))
    111     return node
    112 

/databricks/python/lib/python3.7/site-packages/onnx/helper.py in <genexpr>(.0)
    108         node.attribute.extend(
    109             make_attribute(key, value)
--> 110             for key, value in sorted(kwargs.items()))
    111     return node
    112 

/databricks/python/lib/python3.7/site-packages/onnx/helper.py in make_attribute(key, value, doc_string)
    388         else:
    389             raise ValueError(
--> 390                 "You passed in an iterable attribute but I cannot figure out "
    391                 "its applicable type.")
    392     else:

ValueError: You passed in an iterable attribute but I cannot figure out its applicable type.

Any help/ideas/suggestions are well appreciated

GitOP avatar Dec 08 '20 13:12 GitOP

There was very limited maintenance on sparkml converters. Unit tests were disabled some time ago. Fixing this issue may require quite some work.

xadupre avatar Jan 06 '21 16:01 xadupre