SynapseML
SynapseML copied to clipboard
Cognitive Services functions and latest spark
Using the released version of the package (rc3) and the snapshot referenced in #1012 when using LanguageDetector() I get the following error:
java.lang.NoClassDefFoundError: org/apache/spark/ml/util/MLWritable$class
To Reproduce Databricks cluster with Runtime Version 8.2 (includes Spark 3.1.1, Scala 2.12) Run the following in a python notebook
from mmlspark.cognitive import *
COG_API_KEY = "eddaa4f329a44" # your key here!
COG_API_ENDPOINT = "https://uksouth.api.cognitive.microsoft.com/"
language = LanguageDetector()\
.setTextCol("tweet")\
.setUrl(COG_API_ENDPOINT)\
.setSubscriptionKey(COG_API_KEY)\
.setOutputCol("language")
stack trace
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<command-4205613831081899> in <module>
----> 1 language = LanguageDetector()\
2 .setTextCol("tweet")\
3 .setUrl(COG_API_ENDPOINT)\
4 .setSubscriptionKey(COG_API_KEY)\
5 .setOutputCol("language")
/databricks/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/local_disk0/spark-4e5a90f8-f6ee-455f-8815-101186961bee/userFiles-92689190-0ad7-4507-aa3e-744a499bfa41/addedFile5625105897818921472mmlspark_2_11_1_0_0_rc3-86e1e.jar/mmlspark/cognitive/LanguageDetector.py in __init__(self, concurrency, concurrentTimeout, errorCol, handler, language, outputCol, subscriptionKey, text, timeout, url)
40 def __init__(self, concurrency=1, concurrentTimeout=100.0, errorCol=None, handler=None, language=None, outputCol=None, subscriptionKey=None, text=None, timeout=60.0, url=None):
41 super(LanguageDetector, self).__init__()
---> 42 self._java_obj = self._new_java_obj("com.microsoft.ml.spark.cognitive.LanguageDetector")
43 self._cache = {}
44 self.concurrency = Param(self, "concurrency", "concurrency: max number of concurrent calls (default: 1)")
/databricks/spark/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
64 java_obj = getattr(java_obj, name)
65 java_args = [_py2java(sc, arg) for arg in args]
---> 66 return java_obj(*java_args)
67
68 @staticmethod
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1566
1567 answer = self._gateway_client.send_command(command)
-> 1568 return_value = get_return_value(
1569 answer, self._gateway_client, None, self._fqn)
1570
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
108 def deco(*a, **kw):
109 try:
--> 110 return f(*a, **kw)
111 except py4j.protocol.Py4JJavaError as e:
112 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling None.com.microsoft.ml.spark.cognitive.LanguageDetector.
: java.lang.NoClassDefFoundError: org/apache/spark/ml/util/MLWritable$class
at com.microsoft.ml.spark.cognitive.CognitiveServicesBaseWithoutHandler.<init>(CognitiveServiceBase.scala:266)
at com.microsoft.ml.spark.cognitive.CognitiveServicesBase.<init>(CognitiveServiceBase.scala:322)
at com.microsoft.ml.spark.cognitive.TextAnalyticsBase.<init>(TextAnalytics.scala:22)
at com.microsoft.ml.spark.cognitive.LanguageDetector.<init>(TextAnalytics.scala:305)
at com.microsoft.ml.spark.cognitive.LanguageDetector.<init>(TextAnalytics.scala:307)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:250)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
👋 Thanks for opening your first issue here! If you're reporting a 🐞 bug, please make sure you include steps to reproduce it.
java.lang.NoClassDefFoundError: org/apache/spark/ml/util/MLWritable$class
The above error occurs to a version mismatch. According to your description, you are using scala 2.12, while MMLSpark supports 2.11. Please try to use Spark 2.4.+ to run your code.
@stephlocke, are you still experiencing this with latest SynapseML version on a spark 3 scala 2.12 cluster?