rdf4j
rdf4j copied to clipboard
OutOfMemory error when creating enormous IllegalArgumentException
Current Behavior
I have got this errors when performing a query with a big response (1.5GB)
SEVERE [http-nio-8080-exec-11] org.apache.catalina.core.StandardWrapperValve.invoke Servlet.service() for servlet [rdf4j-http-server] in context with path [/rdf4j-server] threw exception [Handler processing failed; nested exception is java.lang.OutOfMemoryError: Required array length 2147483638 + 1086650766 is too large] with root cause
java.lang.OutOfMemoryError: Required array length 2147483638 + 1086650766 is too large
at java.base/jdk.internal.util.ArraysSupport.hugeLength(ArraysSupport.java:649)
at java.base/jdk.internal.util.ArraysSupport.newLength(ArraysSupport.java:642)
at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:257)
at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:229)
at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:582)
at java.base/java.lang.StringBuilder.append(StringBuilder.java:179)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.subjoinExceptionMessage(ThrowableProxyConverter.java:180)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.subjoinFirstLine(ThrowableProxyConverter.java:176)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.recursiveAppend(ThrowableProxyConverter.java:159)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.recursiveAppend(ThrowableProxyConverter.java:168)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.throwableProxyToString(ThrowableProxyConverter.java:151)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.convert(ThrowableProxyConverter.java:145)
at ch.qos.logback.classic.pattern.ThrowableProxyConverter.convert(ThrowableProxyConverter.java:35)
at ch.qos.logback.core.pattern.FormattingConverter.write(FormattingConverter.java:36)
at ch.qos.logback.core.pattern.PatternLayoutBase.writeLoopOnConverters(PatternLayoutBase.java:115)
at ch.qos.logback.classic.PatternLayout.doLayout(PatternLayout.java:165)
at ch.qos.logback.classic.PatternLayout.doLayout(PatternLayout.java:39)
at ch.qos.logback.core.encoder.LayoutWrappingEncoder.encode(LayoutWrappingEncoder.java:116)
at ch.qos.logback.core.OutputStreamAppender.subAppend(OutputStreamAppender.java:230)
at ch.qos.logback.core.rolling.RollingFileAppender.subAppend(RollingFileAppender.java:235)
at ch.qos.logback.core.OutputStreamAppender.append(OutputStreamAppender.java:102)
at ch.qos.logback.core.UnsynchronizedAppenderBase.doAppend(UnsynchronizedAppenderBase.java:84)
at ch.qos.logback.core.spi.AppenderAttachableImpl.appendLoopOnAppenders(AppenderAttachableImpl.java:51)
at ch.qos.logback.classic.Logger.appendLoopOnAppenders(Logger.java:270)
at ch.qos.logback.classic.Logger.callAppenders(Logger.java:257)
at ch.qos.logback.classic.Logger.buildLoggingEventAndAppend(Logger.java:421)
at ch.qos.logback.classic.Logger.filterAndLog_0_Or3Plus(Logger.java:383)
at ch.qos.logback.classic.Logger.error(Logger.java:538)
at org.eclipse.rdf4j.http.server.repository.TupleQueryResultView.renderInternal(TupleQueryResultView.java:107)
at org.eclipse.rdf4j.http.server.repository.QueryResultView.render(QueryResultView.java:66)
at org.springframework.web.servlet.DispatcherServlet.render(DispatcherServlet.java:1405)
at org.springframework.web.servlet.DispatcherServlet.processDispatchResult(DispatcherServlet.java:1149)
at org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:1088)
at org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:964)
at org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:1006)
at org.springframework.web.servlet.FrameworkServlet.doPost(FrameworkServlet.java:909)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:555)
at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:883)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:623)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:209)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:153)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:51)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:178)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:153)
at com.github.ziplet.filter.compression.CompressingFilter.doFilter(CompressingFilter.java:263)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:178)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:153)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:168)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:90)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:481)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:130)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:93)
at org.apache.catalina.valves.AbstractAccessLogValve.invoke(AbstractAccessLogValve.java:670)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:74)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:342)
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:390)
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:63)
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:928)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1794)
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:52)
at org.apache.tomcat.util.threads.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1191)
at org.apache.tomcat.util.threads.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:659)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.base/java.lang.Thread.run(Thread.java:833)
Expected Behavior
OutOfMemory error are causing the service become unstable, it think it should be avoided. Even if the data is corrupted the IllegalArgument exception shouldn't takeon with a complete object value to be printed into a log
Steps To Reproduce
Base of this case is a corrupt object in a triple (I don't know the cause why it is corrupted) The value of the object don't match with any type (neither uri, nor bnode, nor literal) and it is gotten from the values dataStore as 1.5GB byte array.
Steps to reproduce:
- Runs a query that excepts to have the corrupt value
- The request reaches the
org.eclipse.rdf4j.sail.nativerdf.ValueStoreclass, invokingpublic NativeValue getValue(int id) throws IOException;method. getValueinternally get enormous data byte array and invoke theprivate NativeValue data2value(int id, byte[] data)method- in the
data2valuethe first byte doesn't match with switch options and continues with the default path that throws anIllegalArgumentExceptionThe creation exception contains data enormous byte array
private NativeValue data2value(int id, byte[] data) throws IOException {
switch (data[0]) {
case URI_VALUE:
return data2uri(id, data);
case BNODE_VALUE:
return data2bnode(id, data);
case LITERAL_VALUE:
return data2literal(id, data);
default:
throw new IllegalArgumentException(
"Namespaces cannot be converted into values: " + data2namespace(data));
}
}
The exception is captured in org.eclipse.rdf4j.sail.TripleSourceIterationWrapper.hasNext(TripleSourceIterationWrapper.java:50) and rethrows a org.eclipse.rdf4j.query.QueryEvaluationException . When the exception is captured at at org.eclipse.rdf4j.http.server.repository.TupleQueryResultView.renderInternal(TupleQueryResultView.java:107). It tries to log the error with the byte array but it fails when it tries to flush file because it causes the arrays size exceded.
Version
4.3.2
Are you interested in contributing a solution yourself?
Perhaps?
Anything else?
No response
@diegoferbp thanks for finding this! If you are up for making a PR to fix this just tag me and I'll review and merge the fix for you.
My idea is to modify the class org.eclipse.rdf4j.sail.nativerdf.ValueStore to modify the data2value like this
private NativeValue data2value(int id, byte[] data) throws IOException {
switch (data[0]) {
case URI_VALUE:
return data2uri(id, data);
case BNODE_VALUE:
return data2bnode(id, data);
case LITERAL_VALUE:
return data2literal(id, data);
default:
throw new IllegalArgumentException(
"Namespaces cannot be converted into values: " + data2namespace(trimValueBytes(data)));
}
}
Create the method trimValueBytes
private byte[] trimValueBytes(byte[] data) {
return data.length < MAX_PRINTED_BYTES ? data
: Arrays.copyOfRange(data, 0, MAX_PRINTED_BYTES - 1);
}
and add constant
private static final int MAX_PRINTED_BYTES = 1048576; // 1MB
I
Looks like a good solution. We can probably go with 2048 bytes as the standard. It'll typically end up in someone's log, and it's a good idea to keep each log message below 4kb.