docker-spark
docker-spark copied to clipboard
Exception: Java gateway process exited before sending its port number Error when try to run spark cluster in python app
I am trying to create python app which use spark cluster consists with docker.
version: "3.6"
services:
webapp:
container_name: webapp
image: todhm/flask_spark_webapp
build:
context: ./flask_with_spark
working_dir: /app
command: gunicorn -b 0.0.0.0:8000 --reload -w 4 wsgi:app
networks:
- sparknetwork
ports:
- "8000:8000"
volumes:
- ./flask_with_spark:/app
depends_on:
- spark-master
environment:
- SPARK_APPLICATION_PYTHON_LOCATION =/app/wsgi.py
- SPARK_MASTER_NAME=spark-master
- SPARK_MASTER_PORT=7077
spark-master:
image: bde2020/spark-master:2.3.0-hadoop2.7
container_name: spark-master
networks:
- sparknetwork
ports:
- "8080:8080"
- "7077:7077"
environment:
- INIT_DAEMON_STEP=setup_spark
spark-worker-1:
image: bde2020/spark-worker:2.3.0-hadoop2.7
container_name: spark-worker-1
networks:
- sparknetwork
depends_on:
- spark-master
ports:
- "8081:8081"
environment:
- "SPARK_MASTER=spark://spark-master:7077"
networks:
sparknetwork:
However when I try to create pyspark app in webapp container with following configuration it gave me some error.
spark = SparkSession.\
builder.\
master("spark://spark-master:7077").\
config("spark.submit.deployMode", "cluster").\
config("spark.jars.packages", "org.mongodb.spark:mongo-spark-connector_2.11:2.2.3").\
config("spark.executor.memory", self.executor_memory).\
getOrCreate()
Traceback (most recent call last):
File "/app/spark/tests.py", line 36, in test_write_duplicate_names
self.sa.return_all_books()
File "/app/spark/sparkapp.py", line 37, in return_all_books
self.create_spark_app()
File "/app/spark/sparkapp.py", line 33, in create_spark_app
getOrCreate()
File "/usr/local/lib/python3.4/dist-packages/pyspark/sql/session.py", line 173, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/usr/local/lib/python3.4/dist-packages/pyspark/context.py", line 343, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/usr/local/lib/python3.4/dist-packages/pyspark/context.py", line 115, in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
File "/usr/local/lib/python3.4/dist-packages/pyspark/context.py", line 292, in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
File "/usr/local/lib/python3.4/dist-packages/pyspark/java_gateway.py", line 93, in launch_gateway
raise Exception("Java gateway process exited before sending its port number")
Exception: Java gateway process exited before sending its port number
How can I solve this problem?
@todhm
In my case, this error starting with JAVA_HOME is not set
. I tried to save the JAVA_HOME manually but it was not working.
JAVA_HOME is not set
JAVA_HOME is not set
Traceback (most recent call last):
File "run.py", line 1, in <module>
from app import app
File "./app/__init__.py", line 6, in <module>
from app import route
File "./app/route.py", line 4, in <module>
from app.resources.recommendation import Recommendation, GetRecommendation, Model
File "./app/resources/recommendation.py", line 5, in <module>
from app.core import core
File "./app/core/core.py", line 15, in <module>
sc =SparkContext.getOrCreate()
File "/usr/local/lib/python3.7/site-packages/pyspark/context.py", line 367, in getOrCreate
Traceback (most recent call last):
File "run.py", line 1, in <module>
from app import app
SparkContext(conf=conf or SparkConf())
File "./app/__init__.py", line 6, in <module>
File "/usr/local/lib/python3.7/site-packages/pyspark/context.py", line 133, in __init__
from app import route
File "./app/route.py", line 4, in <module>
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
File "/usr/local/lib/python3.7/site-packages/pyspark/context.py", line 316, in _ensure_initialized
from app.resources.recommendation import Recommendation, GetRecommendation, Model
File "./app/resources/recommendation.py", line 5, in <module>
from app.core import core
File "./app/core/core.py", line 15, in <module>
SparkContext._gateway = gateway or launch_gateway(conf)
File "/usr/local/lib/python3.7/site-packages/pyspark/java_gateway.py", line 46, in launch_gateway
sc =SparkContext.getOrCreate()
File "/usr/local/lib/python3.7/site-packages/pyspark/context.py", line 367, in getOrCreate
return _launch_gateway(conf)
File "/usr/local/lib/python3.7/site-packages/pyspark/java_gateway.py", line 108, in _launch_gateway
raise Exception("Java gateway process exited before sending its port number")
Exception: Java gateway process exited before sending its port number
Has the error been solved?