rubix
rubix copied to clipboard
Handle Interrupts better
Today interruptions are thrown as RuntimeException which bubbles up as warning logs. Most of the times these interrupts are expected and not needed to be logged e.g. user cancel a query or LIMIT operator of Presto cancels upstream tasks. In such cases these kind of logs show up:
2020-05-28T09:40:56.013Z DEBUG 20200528_094053_00004_d87z3.2.0-0-59 com.qubole.rubix.core.CachingInputStream Could not get cache status from server
java.lang.RuntimeException: java.lang.InterruptedException
at com.qubole.rubix.spi.fop.ObjectPool.getObject(ObjectPool.java:108)
at com.qubole.rubix.spi.fop.ObjectPool.borrowObject(ObjectPool.java:71)
at com.qubole.rubix.spi.BookKeeperFactory.createBookKeeperClient(BookKeeperFactory.java:75)
at com.qubole.rubix.spi.BookKeeperFactory.createBookKeeperClient(BookKeeperFactory.java:114)
at com.qubole.rubix.core.CachingInputStream.setupReadRequestChains(CachingInputStream.java:330)
at com.qubole.rubix.core.CachingInputStream.readInternal(CachingInputStream.java:251)
at com.qubole.rubix.core.CachingInputStream.read(CachingInputStream.java:181)
at java.base/java.io.BufferedInputStream.fill(BufferedInputStream.java:252)
at java.base/java.io.BufferedInputStream.read1(BufferedInputStream.java:292)
at java.base/java.io.BufferedInputStream.read(BufferedInputStream.java:351)
at java.base/java.io.DataInputStream.read(DataInputStream.java:149)
at java.base/java.io.DataInputStream.read(DataInputStream.java:149)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:237)
at org.apache.hadoop.util.LineReader.readLine(LineReader.java:193)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:255)
at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:48)
at io.prestosql.plugin.hive.GenericHiveRecordCursor.advanceNextPosition(GenericHiveRecordCursor.java:209)
at io.prestosql.plugin.hive.HiveRecordCursor.advanceNextPosition(HiveRecordCursor.java:175)
at io.prestosql.spi.connector.RecordPageSource.getNextPage(RecordPageSource.java:95)
at io.prestosql.operator.TableScanOperator.getOutput(TableScanOperator.java:306)
at io.prestosql.operator.Driver.processInternal(Driver.java:379)
at io.prestosql.operator.Driver.lambda$processFor$8(Driver.java:283)
at io.prestosql.operator.Driver.tryWithLock(Driver.java:675)
at io.prestosql.operator.Driver.processFor(Driver.java:276)
at io.prestosql.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1082)
at io.prestosql.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:163)
at io.prestosql.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:493)
at io.prestosql.$gen.Presto_____20200528_092938_1.run(Unknown Source)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.InterruptedException
at java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1261)
at java.base/java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:317)
at java.base/java.util.concurrent.ArrayBlockingQueue.poll(ArrayBlockingQueue.java:427)
at com.qubole.rubix.spi.fop.ObjectPool.getObject(ObjectPool.java:101)
... 32 more
We need to find all places where we catch InterruptedException and forward it as RTE. We should forward these as InterruptedException itself and at the top level (client facing methods) we can forward these as InterruptedIOException
Another instance with a LIMIT 10 query:
2020-06-04T06:28:44.795Z ERROR 20200604_062840_00009_vupau.1.1-1-46 com.qubole.rubix.bookkeeper.BookKeeper Could not fetch FileStatus from remote file system for s3a://XXX/jane-presto-benchmark-test-data/XXX_orc/20190221_133439_00001_t4knf_d4294c0a-f465-44ed-8511-5a0f0c26ea71
java.lang.RuntimeException: java.lang.InterruptedException: sleep interrupted
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:154)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getS3ObjectMetadata(PrestoS3FileSystem.java:631)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getS3ObjectMetadata(PrestoS3FileSystem.java:615)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getFileStatus(PrestoS3FileSystem.java:354)
at com.qubole.rubix.core.CachingFileSystem.getFileStatus(CachingFileSystem.java:299)
at com.qubole.rubix.bookkeeper.BookKeeper.getFileInfo(BookKeeper.java:518)
at com.qubole.rubix.spi.LocalBookKeeperClient.getFileInfo(LocalBookKeeperClient.java:76)
at com.qubole.rubix.core.CachingInputStream.<init>(CachingInputStream.java:104)
at com.qubole.rubix.core.CachingFileSystem.open(CachingFileSystem.java:206)
at org.apache.hadoop.fs.PrestoFileSystemCache$FileSystemWrapper.open(PrestoFileSystemCache.java:327)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:899)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.lambda$createOrcPageSource$0(OrcPageSourceFactory.java:217)
at io.prestosql.plugin.hive.authentication.NoHdfsAuthentication.doAs(NoHdfsAuthentication.java:23)
at io.prestosql.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:89)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.createOrcPageSource(OrcPageSourceFactory.java:217)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.createPageSource(OrcPageSourceFactory.java:160)
at io.prestosql.plugin.hive.HivePageSourceProvider.createHivePageSource(HivePageSourceProvider.java:175)
at io.prestosql.plugin.hive.HivePageSourceProvider.createPageSource(HivePageSourceProvider.java:105)
at io.prestosql.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:57)
at io.prestosql.split.PageSourceManager.createPageSource(PageSourceManager.java:64)
at io.prestosql.operator.TableScanOperator.getOutput(TableScanOperator.java:298)
at io.prestosql.operator.Driver.processInternal(Driver.java:379)
at io.prestosql.operator.Driver.lambda$processFor$8(Driver.java:283)
at io.prestosql.operator.Driver.tryWithLock(Driver.java:675)
at io.prestosql.operator.Driver.processFor(Driver.java:276)
at io.prestosql.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1075)
at io.prestosql.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:163)
at io.prestosql.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:484)
at io.prestosql.$gen.Presto_334____20200604_061259_2.run(Unknown Source)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Suppressed: com.amazonaws.AbortedException:
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleInterruptedException(AmazonHttpClient.java:862)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:740)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:698)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:680)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:544)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:524)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5054)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5000)
at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1335)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.lambda$getS3ObjectMetadata$4(PrestoS3FileSystem.java:634)
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:130)
... 31 more
Caused by: com.amazonaws.http.timers.client.SdkInterruptedException
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.checkInterrupted(AmazonHttpClient.java:917)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.checkInterrupted(AmazonHttpClient.java:903)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1097)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:796)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:764)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:738)
... 40 more
Caused by: java.lang.InterruptedException: sleep interrupted
at java.base/java.lang.Thread.sleep(Native Method)
at java.base/java.lang.Thread.sleep(Thread.java:339)
at java.base/java.util.concurrent.TimeUnit.sleep(TimeUnit.java:446)
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:150)
... 31 more
2020-06-04T06:28:44.796Z WARN 20200604_062840_00009_vupau.1.1-1-46 com.qubole.rubix.core.CachingFileSystem Error in opening Caching Input Stream, skipping cache
java.lang.RuntimeException: java.lang.InterruptedException: sleep interrupted
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:154)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getS3ObjectMetadata(PrestoS3FileSystem.java:631)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getS3ObjectMetadata(PrestoS3FileSystem.java:615)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.getFileStatus(PrestoS3FileSystem.java:354)
at com.qubole.rubix.core.CachingFileSystem.getFileStatus(CachingFileSystem.java:299)
at com.qubole.rubix.core.CachingInputStream.<init>(CachingInputStream.java:113)
at com.qubole.rubix.core.CachingFileSystem.open(CachingFileSystem.java:206)
at org.apache.hadoop.fs.PrestoFileSystemCache$FileSystemWrapper.open(PrestoFileSystemCache.java:327)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:899)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.lambda$createOrcPageSource$0(OrcPageSourceFactory.java:217)
at io.prestosql.plugin.hive.authentication.NoHdfsAuthentication.doAs(NoHdfsAuthentication.java:23)
at io.prestosql.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:89)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.createOrcPageSource(OrcPageSourceFactory.java:217)
at io.prestosql.plugin.hive.orc.OrcPageSourceFactory.createPageSource(OrcPageSourceFactory.java:160)
at io.prestosql.plugin.hive.HivePageSourceProvider.createHivePageSource(HivePageSourceProvider.java:175)
at io.prestosql.plugin.hive.HivePageSourceProvider.createPageSource(HivePageSourceProvider.java:105)
at io.prestosql.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:57)
at io.prestosql.split.PageSourceManager.createPageSource(PageSourceManager.java:64)
at io.prestosql.operator.TableScanOperator.getOutput(TableScanOperator.java:298)
at io.prestosql.operator.Driver.processInternal(Driver.java:379)
at io.prestosql.operator.Driver.lambda$processFor$8(Driver.java:283)
at io.prestosql.operator.Driver.tryWithLock(Driver.java:675)
at io.prestosql.operator.Driver.processFor(Driver.java:276)
at io.prestosql.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1075)
at io.prestosql.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:163)
at io.prestosql.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:484)
at io.prestosql.$gen.Presto_334____20200604_061259_2.run(Unknown Source)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Suppressed: com.amazonaws.AbortedException:
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleInterruptedException(AmazonHttpClient.java:862)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:740)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:698)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:680)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:544)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:524)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5054)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5000)
at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1335)
at io.prestosql.plugin.hive.s3.PrestoS3FileSystem.lambda$getS3ObjectMetadata$4(PrestoS3FileSystem.java:634)
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:130)
... 29 more
Caused by: com.amazonaws.http.timers.client.SdkInterruptedException
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.checkInterrupted(AmazonHttpClient.java:917)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.checkInterrupted(AmazonHttpClient.java:903)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1097)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:796)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:764)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:738)
... 38 more
Caused by: java.lang.InterruptedException: sleep interrupted
at java.base/java.lang.Thread.sleep(Native Method)
at java.base/java.lang.Thread.sleep(Thread.java:339)
at java.base/java.util.concurrent.TimeUnit.sleep(TimeUnit.java:446)
at io.prestosql.plugin.hive.util.RetryDriver.run(RetryDriver.java:150)
... 29 more