Describe the bug
this error happend occassionally:
org.apache.spark.SparkException: External scheduler cannot be instantiated
at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2794)
at org.apache.spark.SparkContext.(SparkContext.scala:493)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:930)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:921)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
at cn.yingmi.advisor.spark.asset.util.SparkUtils$.setSparkConf(SparkUtils.scala:24)
at cn.yingmi.advisor.spark.asset.job.AbstractJob$class.main(AbstractJob.scala:56)
at cn.yingmi.advisor.spark.asset.job.DataCleaningJob$.main(DataCleaningJob.scala:10)
at cn.yingmi.advisor.spark.asset.job.DataCleaningJob.main(DataCleaningJob.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: io.fabric8.kubernetes.client.KubernetesClientException: Operation: [get] for kind: [Pod] with name: [isac-asset-data-cleaning-316] in namespace: [tp] failed.
at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:64)
at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:72)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.getMandatory(BaseOperation.java:237)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.get(BaseOperation.java:170)
at org.apache.spark.scheduler.cluster.k8s.ExecutorPodsAllocator$$anonfun$1.apply(ExecutorPodsAllocator.scala:57)
at org.apache.spark.scheduler.cluster.k8s.ExecutorPodsAllocator$$anonfun$1.apply(ExecutorPodsAllocator.scala:55)
at scala.Option.map(Option.scala:146)
at org.apache.spark.scheduler.cluster.k8s.ExecutorPodsAllocator.(ExecutorPodsAllocator.scala:55)
at org.apache.spark.scheduler.cluster.k8s.KubernetesClusterManager.createSchedulerBackend(KubernetesClusterManager.scala:89)
at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2788)
... 22 more
Caused by: java.net.SocketException: Broken pipe (Write failed)
at java.net.SocketOutputStream.socketWrite0(Native Method)
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:111)
at java.net.SocketOutputStream.write(SocketOutputStream.java:155)
at sun.security.ssl.OutputRecord.writeBuffer(OutputRecord.java:431)
at sun.security.ssl.OutputRecord.write(OutputRecord.java:417)
at sun.security.ssl.SSLSocketImpl.writeRecordInternal(SSLSocketImpl.java:894)
at sun.security.ssl.SSLSocketImpl.writeRecord(SSLSocketImpl.java:865)
at sun.security.ssl.AppOutputStream.write(AppOutputStream.java:123)
at okio.Okio$1.write(Okio.java:79)
at okio.AsyncTimeout$1.write(AsyncTimeout.java:180)
at okio.RealBufferedSink.flush(RealBufferedSink.java:224)
at okhttp3.internal.http2.Http2Writer.windowUpdate(Http2Writer.java:262)
at okhttp3.internal.http2.Http2Connection.start(Http2Connection.java:518)
at okhttp3.internal.http2.Http2Connection.start(Http2Connection.java:505)
at okhttp3.internal.connection.RealConnection.startHttp2(RealConnection.java:298)
at okhttp3.internal.connection.RealConnection.establishProtocol(RealConnection.java:287)
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:168)
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257)
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135)
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114)
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at io.fabric8.kubernetes.client.utils.BackwardsCompatibilityInterceptor.intercept(BackwardsCompatibilityInterceptor.java:119)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at io.fabric8.kubernetes.client.utils.ImpersonatorInterceptor.intercept(ImpersonatorInterceptor.java:68)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at io.fabric8.kubernetes.client.utils.HttpClientUtils.lambda$createHttpClient$3(HttpClientUtils.java:110)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121)
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:254)
at okhttp3.RealCall.execute(RealCall.java:92)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:411)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:372)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleGet(OperationSupport.java:337)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleGet(OperationSupport.java:318)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.handleGet(BaseOperation.java:833)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.getMandatory(BaseOperation.java:226)
... 29 more
Fabric8 Kubernetes Client version
5.4.2
Steps to reproduce
spark version: 3.2
use spark-submit to run pod
occasionally will get unable to get pod error
we followed docs to set default sa will clusterrole edit , sometimes is ok, sometimes not ,why ?
Expected behavior
since rbac is set correctlly ,should not show the error with :Operation: [get] for kind: [Pod] with name: [isac-asset-data-cleaning-316] in namespace: [tp] failed.
Runtime
Kubernetes (vanilla)
Kubernetes API Server version
1.20.12
Environment
Linux
Fabric8 Kubernetes Client Logs
No response
Additional context
No response
Hello @ly123-liu , Thanks for reporting this issue.
It will be very hard for us to analyse that and reproduce this issue. It may be a network issue. Could you file an issue at the spark project? Let us know how things are going,
Thanks
This issue has been automatically marked as stale because it has not had any activity since 90 days. It will be closed if no further activity occurs within 7 days. Thank you for your contributions!