Browser4 icon indicating copy to clipboard operation
Browser4 copied to clipboard

Handle loadJsoupResource exceptions properly

Open platonai opened this issue 7 months ago • 0 comments

The exception is throw by loadJsoupResource.

Can reproduce by fetch a url with -resource option:

curl -X POST --location "http://localhost:8182/api/x/e" -H "Content-Type: text/plain" -d "
  select
      dom_base_uri(dom) as url
  from load_and_select('https://www.amazon.com/dp/B0C1H26C46 -resource -refresh', 'body');"
       at ai.platon.pulsar.skeleton.crawl.fetch.driver.AbstractWebDriver$loadJsoupResource$2.invokeSuspend(AbstractWebDriver.kt:655)

2025-05-13 16:53:05.267 WARN [r-worker-7] a.p.p.s.c.i.StreamingCrawler - [Unexpected] Exception details: >>>>

java.net.SocketTimeoutException: Read timed out at java.base/sun.nio.ch.NioSocketImpl.timedRead(NioSocketImpl.java:278) at java.base/sun.nio.ch.NioSocketImpl.implRead(NioSocketImpl.java:304) at java.base/sun.nio.ch.NioSocketImpl.read(NioSocketImpl.java:346) at java.base/sun.nio.ch.NioSocketImpl$1.read(NioSocketImpl.java:796) at java.base/java.net.Socket$SocketInputStream.read(Socket.java:1099) at java.base/sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:489) at java.base/sun.security.ssl.SSLSocketInputRecord.readHeader(SSLSocketInputRecord.java:483) at java.base/sun.security.ssl.SSLSocketInputRecord.bytesInCompletePacket(SSLSocketInputRecord.java:70) at java.base/sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1461) at java.base/sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:1066) at java.base/java.io.BufferedInputStream.fill(BufferedInputStream.java:291) at java.base/java.io.BufferedInputStream.read1(BufferedInputStream.java:347) at java.base/java.io.BufferedInputStream.implRead(BufferedInputStream.java:420) at java.base/java.io.BufferedInputStream.read(BufferedInputStream.java:399) at java.base/sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:827) at java.base/sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:759) at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1705) at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1614) at java.base/java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:531) at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:307) at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:867) at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829) at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366) at ai.platon.pulsar.skeleton.crawl.fetch.driver.AbstractWebDriver$loadJsoupResource$2.invokeSuspend(AbstractWebDriver.kt:655) at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33) at kotlinx.coroutines.DispatchedTask.run(DispatchedTask.kt:104) at kotlinx.coroutines.internal.LimitedDispatcher$Worker.run(LimitedDispatcher.kt:111) at kotlinx.coroutines.scheduling.TaskImpl.run(Tasks.kt:99) at kotlinx.coroutines.scheduling.CoroutineScheduler.runSafely(CoroutineScheduler.kt:584) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.executeTask(CoroutineScheduler.kt:811) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.runWorker(CoroutineScheduler.kt:715) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(CoroutineScheduler.kt:702)

2025-05-13 16:53:08.050 WARN [-worker-16] a.p.p.s.c.i.StreamingCrawler - [Unexpected] Exception details: >>>>

java.net.SocketTimeoutException: Read timed out at java.base/sun.nio.ch.NioSocketImpl.timedRead(NioSocketImpl.java:278) at java.base/sun.nio.ch.NioSocketImpl.implRead(NioSocketImpl.java:304) at java.base/sun.nio.ch.NioSocketImpl.read(NioSocketImpl.java:346) at java.base/sun.nio.ch.NioSocketImpl$1.read(NioSocketImpl.java:796) at java.base/java.net.Socket$SocketInputStream.read(Socket.java:1099) at java.base/sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:489) at java.base/sun.security.ssl.SSLSocketInputRecord.readHeader(SSLSocketInputRecord.java:483) at java.base/sun.security.ssl.SSLSocketInputRecord.bytesInCompletePacket(SSLSocketInputRecord.java:70) at java.base/sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1461) at java.base/sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:1066) at java.base/java.io.BufferedInputStream.fill(BufferedInputStream.java:291) at java.base/java.io.BufferedInputStream.read1(BufferedInputStream.java:347) at java.base/java.io.BufferedInputStream.implRead(BufferedInputStream.java:420) at java.base/java.io.BufferedInputStream.read(BufferedInputStream.java:399) at java.base/sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:827) at java.base/sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:759) at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1705) at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1614) at java.base/java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:531) at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:307) at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:867) at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829) at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366) at ai.platon.pulsar.skeleton.crawl.fetch.driver.AbstractWebDriver$loadJsoupResource$2.invokeSuspend(AbstractWebDriver.kt:655) at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33) at kotlinx.coroutines.DispatchedTask.run(DispatchedTask.kt:104) at kotlinx.coroutines.internal.LimitedDispatcher$Worker.run(LimitedDispatcher.kt:111) at kotlinx.coroutines.scheduling.TaskImpl.run(Tasks.kt:99) at kotlinx.coroutines.scheduling.CoroutineScheduler.runSafely(CoroutineScheduler.kt:584) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.executeTask(CoroutineScheduler.kt:811) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.runWorker(CoroutineScheduler.kt:715) at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(CoroutineScheduler.kt:702)

platonai avatar May 13 '25 09:05 platonai