nlu icon indicating copy to clipboard operation
nlu copied to clipboard

predict() - pyspark IndexError on python 3.11.4

Open sonurdogan opened this issue 1 year ago • 0 comments

Python version: 3.11.4 pyspark version: 3.1.2

model.predict('I love NLU! <3') sentence_detector_dl download started this may take some time. Approximate size to download 354.6 KB [OK!]

Warning::Spark Session already created, some configs may not take.
Traceback (most recent call last):
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/serializers.py", line 437, in dumps
    return cloudpickle.dumps(obj, pickle_protocol)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 72, in dumps
    cp.dump(obj)
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 540, in dump
    return Pickler.dump(self, obj)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 630, in reducer_override
    return self._function_reduce(obj)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 503, in _function_reduce
    return self._dynamic_function_reduce(obj)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 484, in _dynamic_function_reduce
    state = _function_getstate(func)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 156, in _function_getstate
    f_globals_ref = _extract_code_globals(func.__code__)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle.py", line 236, in _extract_code_globals
    out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle.py", line 236, in <setcomp>
    out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
                 ~~~~~^^^^^^^
IndexError: tuple index out of range
Traceback (most recent call last):
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/serializers.py", line 437, in dumps
    return cloudpickle.dumps(obj, pickle_protocol)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 72, in dumps
    cp.dump(obj)
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 540, in dump
    return Pickler.dump(self, obj)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 630, in reducer_override
    return self._function_reduce(obj)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 503, in _function_reduce
    return self._dynamic_function_reduce(obj)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 484, in _dynamic_function_reduce
    state = _function_getstate(func)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle_fast.py", line 156, in _function_getstate
    f_globals_ref = _extract_code_globals(func.__code__)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle.py", line 236, in _extract_code_globals
    out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/cloudpickle/cloudpickle.py", line 236, in <setcomp>
    out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
                 ~~~~~^^^^^^^
IndexError: tuple index out of range

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users//nlu/nlu/pipe/pipeline.py", line 485, in predict
    return __predict__(self, data, output_level, positions, keep_stranger_features, metadata, multithread,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//nlu/nlu/pipe/utils/predict_helper.py", line 267, in __predict__
    pipe.fit()
  File "/Users//nlu/nlu/pipe/pipeline.py", line 204, in fit
    self.vanilla_transformer_pipe = self.spark_estimator_pipe.fit(self.get_sample_spark_dataframe())
                                                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//nlu/nlu/pipe/pipeline.py", line 103, in get_sample_spark_dataframe
    return sparknlp.start().createDataFrame(data=text_df)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/sql/session.py", line 673, in createDataFrame
    return super(SparkSession, self).createDataFrame(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/sql/pandas/conversion.py", line 300, in createDataFrame
    return self._create_dataframe(data, schema, samplingRatio, verifySchema)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/sql/session.py", line 701, in _create_dataframe
    jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
                                           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/rdd.py", line 2618, in _to_java_object_rdd
    return self.ctx._jvm.SerDeUtil.pythonToJava(rdd._jrdd, True)
                                                ^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/rdd.py", line 2949, in _jrdd
    wrapped_func = _wrap_function(self.ctx, self.func, self._prev_jrdd_deserializer,
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/rdd.py", line 2828, in _wrap_function
    pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
                                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/rdd.py", line 2814, in _prepare_for_python_RDD
    pickled_command = ser.dumps(command)
                      ^^^^^^^^^^^^^^^^^^
  File "/Users//miniconda3/lib/python3.11/site-packages/pyspark/serializers.py", line 447, in dumps
    raise pickle.PicklingError(msg)
_pickle.PicklingError: Could not serialize object: IndexError: tuple index out of range

sonurdogan avatar Oct 27 '23 12:10 sonurdogan