oshdb
oshdb copied to clipboard
Improve error message on remote failures in ignite backend jobs
Problem The Stacktrace of a failure in the .map() step is cryptic:
Apr 13, 2019 7:53:20 PM org.apache.ignite.logger.java.JavaLogger error
SCHWERWIEGEND: Failed to obtain remote job result policy for result fromComputeTask.result(..) method due to undeclared user exception (will fail the whole task): GridJobResultImpl [job=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask$1@a0c5be, sib=GridJobSiblingImpl [sesId=d5512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, nodeId=ae70ffba-a3eb-46ba-b77f-16807812ab2c, isJobDone=false], jobCtx=GridJobContextImpl [jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, timeoutObj=null, attrs={}], node=TcpDiscoveryNode [id=ae70ffba-a3eb-46ba-b77f-16807812ab2c, addrs=[0:0:0:0:0:0:0:1%lo, 10.11.12.121, 127.0.0.1], sockAddrs=[/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500, worker-21/10.11.12.121:47500], discPort=47500, order=1, intOrder=1, lastExchangeTime=1555177807274, loc=false, ver=2.7.0#20181130-sha1:256ae401, isClient=false], ex=class o.a.i.compute.ComputeUserUndeclaredException: Failed to execute job due to unexpected runtime exception [jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, ses=GridJobSessionImpl [ses=GridTaskSessionImpl [taskName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, dep=SharedDeployment [rmv=false, super=GridDeployment [ts=1555177817809, depMode=SHARED, clsLdr=GridDeploymentClassLoader [id=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, singleNode=false, nodeLdrMap={c4ca1a0b-a539-43e6-8eb3-d9b44af32688=bad02d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688}, p2pTimeout=60000, usrVer=0, depMode=SHARED, quiet=false], clsLdrId=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, userVer=0, loc=false, sampleClsName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, pendingUndeploy=false, undeployed=false, usage=1]], taskClsName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, sesId=d5512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, startTime=1555177999898, endTime=9223372036854775807, taskNodeId=c4ca1a0b-a539-43e6-8eb3-d9b44af32688, clsLdr=GridDeploymentClassLoader [id=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, singleNode=false, nodeLdrMap={c4ca1a0b-a539-43e6-8eb3-d9b44af32688=bad02d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688}, p2pTimeout=60000, usrVer=0, depMode=SHARED, quiet=false], closed=false, cpSpi=null, failSpi=null, loadSpi=null, usage=1, fullSup=false, internal=false, topPred=o.a.i.i.cluster.ClusterGroupAdapter$AttributeFilter@2d20f94f, subjId=c4ca1a0b-a539-43e6-8eb3-d9b44af32688, mapFut=IgniteFuture [orig=GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=57460764]], execName=null], jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688], err=found non-noded intersection between LINESTRING ( -1.9373004999999999 53.071713499999994, -1.9374068 53.0715085 ) and LINESTRING ( -1.9374068 53.0715085, -1.937392000000003 53.071490600000004 ) [ (-1.9374068, 53.0715085, NaN) ]], hasRes=true, isCancelled=false, isOccupied=true]
java.lang.NullPointerException
at org.heigit.bigspatialdata.oshdb.api.mapreducer.MapAggregator.lambda$reduce$b43096be$1(MapAggregator.java:864)
at org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask.result(OSHDBIgniteMapReduceComputeTask.java:81)
at org.apache.ignite.internal.processors.task.GridTaskWorker$5.apply(GridTaskWorker.java:1053)
at org.apache.ignite.internal.processors.task.GridTaskWorker$5.apply(GridTaskWorker.java:1046)
at org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6848)
at org.apache.ignite.internal.processors.task.GridTaskWorker.result(GridTaskWorker.java:1046)
at org.apache.ignite.internal.processors.task.GridTaskWorker.onResponse(GridTaskWorker.java:864)
at org.apache.ignite.internal.processors.task.GridTaskProcessor.processJobExecuteResponse(GridTaskProcessor.java:1081)
at org.apache.ignite.internal.processors.task.GridTaskProcessor$JobMessageListener.onMessage(GridTaskProcessor.java:1316)
at org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1569)
at org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1197)
at org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:127)
at org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1093)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Exception in thread "main" class org.apache.ignite.compute.ComputeUserUndeclaredException: Failed to obtain remote job result policy for result fromComputeTask.result(..) method due to undeclared user exception (will fail the whole task): GridJobResultImpl [job=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask$1@a0c5be, sib=GridJobSiblingImpl [sesId=d5512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, nodeId=ae70ffba-a3eb-46ba-b77f-16807812ab2c, isJobDone=false], jobCtx=GridJobContextImpl [jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, timeoutObj=null, attrs={}], node=TcpDiscoveryNode [id=ae70ffba-a3eb-46ba-b77f-16807812ab2c, addrs=[0:0:0:0:0:0:0:1%lo, 10.11.12.121, 127.0.0.1], sockAddrs=[/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500, worker-21/10.11.12.121:47500], discPort=47500, order=1, intOrder=1, lastExchangeTime=1555177807274, loc=false, ver=2.7.0#20181130-sha1:256ae401, isClient=false], ex=class org.apache.ignite.compute.ComputeUserUndeclaredException: Failed to execute job due to unexpected runtime exception [jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, ses=GridJobSessionImpl [ses=GridTaskSessionImpl [taskName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, dep=SharedDeployment [rmv=false, super=GridDeployment [ts=1555177817809, depMode=SHARED, clsLdr=GridDeploymentClassLoader [id=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, singleNode=false, nodeLdrMap={c4ca1a0b-a539-43e6-8eb3-d9b44af32688=bad02d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688}, p2pTimeout=60000, usrVer=0, depMode=SHARED, quiet=false], clsLdrId=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, userVer=0, loc=false, sampleClsName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, pendingUndeploy=false, undeployed=false, usage=1]], taskClsName=org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask, sesId=d5512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688, startTime=1555177999898, endTime=9223372036854775807, taskNodeId=c4ca1a0b-a539-43e6-8eb3-d9b44af32688, clsLdr=GridDeploymentClassLoader [id=7b34d38f961-ae70ffba-a3eb-46ba-b77f-16807812ab2c, singleNode=false, nodeLdrMap={c4ca1a0b-a539-43e6-8eb3-d9b44af32688=bad02d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688}, p2pTimeout=60000, usrVer=0, depMode=SHARED, quiet=false], closed=false, cpSpi=null, failSpi=null, loadSpi=null, usage=1, fullSup=false, internal=false, topPred=org.apache.ignite.internal.cluster.ClusterGroupAdapter$AttributeFilter@2d20f94f, subjId=c4ca1a0b-a539-43e6-8eb3-d9b44af32688, mapFut=IgniteFuture [orig=GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=57460764]], execName=null], jobId=96512d71a61-c4ca1a0b-a539-43e6-8eb3-d9b44af32688], err=found non-noded intersection between LINESTRING ( -1.9373004999999999 53.071713499999994, -1.9374068 53.0715085 ) and LINESTRING ( -1.9374068 53.0715085, -1.937392000000003 53.071490600000004 ) [ (-1.9374068, 53.0715085, NaN) ]], hasRes=true, isCancelled=false, isOccupied=true]
at org.apache.ignite.internal.processors.task.GridTaskWorker$5.apply(GridTaskWorker.java:1112)
at org.apache.ignite.internal.processors.task.GridTaskWorker$5.apply(GridTaskWorker.java:1046)
at org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6848)
at org.apache.ignite.internal.processors.task.GridTaskWorker.result(GridTaskWorker.java:1046)
at org.apache.ignite.internal.processors.task.GridTaskWorker.onResponse(GridTaskWorker.java:864)
at org.apache.ignite.internal.processors.task.GridTaskProcessor.processJobExecuteResponse(GridTaskProcessor.java:1081)
at org.apache.ignite.internal.processors.task.GridTaskProcessor$JobMessageListener.onMessage(GridTaskProcessor.java:1316)
at org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1569)
at org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1197)
at org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:127)
at org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1093)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
at org.heigit.bigspatialdata.oshdb.api.mapreducer.MapAggregator.lambda$reduce$b43096be$1(MapAggregator.java:864)
at org.heigit.bigspatialdata.oshdb.api.mapreducer.backend.OSHDBIgniteMapReduceComputeTask.result(OSHDBIgniteMapReduceComputeTask.java:81)
at org.apache.ignite.internal.processors.task.GridTaskWorker$5.apply(GridTaskWorker.java:1053)
... 13 more
Possible Solution The user should be explicitly informed, that the map-step threw an error. Also the Message thrown on the ignite-worker has more information but never arrives at the client.
Alternative Also one could catch this behaviour and return an incomplete result (while notifying the client!).
@rtroilo will check if this is still the case.