Memory corruption on large responses for ft.search
Bug Report
When using ftSearch and the response from Redis is large (~200KB), it results in memory corruption.
Depending on the netty version, this either crashes the JVM process or it simply produces garbage in the returned results.
It seems that ByteBuffer from data read at the beginning of the processing is overriden by later data.
Current Behavior
- netty 4.2.4.Final
- JVM crashes in thread "lettuce-nioEventLoop" in sub-routine jshort_disjoint_arraycopy
- netty 4.1.118.Final
- on protocol version RESP2 the result is just garbage
- on protocol version RESP3 the result is just empty (because the SearchReplyParser can not find "results" key in the garbage ByteBuffers)
Input Code
The following code can be used to reproduce the behaviour:
Input Code
package io.lettuce.core;
import io.lettuce.core.api.StatefulRedisConnection;
import io.lettuce.core.json.JsonPath;
import io.lettuce.core.protocol.DecodeBufferPolicies;
import io.lettuce.core.protocol.ProtocolVersion;
import io.lettuce.core.search.arguments.CreateArgs;
import io.lettuce.core.search.arguments.NumericFieldArgs;
import io.lettuce.core.search.arguments.SearchArgs;
import io.lettuce.core.search.arguments.SortByArgs;
import org.opentest4j.AssertionFailedError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class RediSearchBugProver {
private static final Logger log = LoggerFactory.getLogger(RediSearchBugProver.class);
public static void main(String[] args) {
// System.setProperty("org.slf4j.simpleLogger.log.io.lettuce.core.protocol", "trace");
var uri = RedisURI.create("localhost", 6379);
try (var client = RedisClient.create(uri)) {
client.setOptions(ClientOptions.builder()
// increase it drastically just to be sure discardBytesRatio is not the cause
.decodeBufferPolicy(DecodeBufferPolicies.ratio(Integer.MAX_VALUE / 2.0f))
// ft.search result on RESP2 is an array, on RESP3 a map. So error is different
.protocolVersion(ProtocolVersion.RESP2)
.build());
try (var connection = client.connect()) {
runTest(connection, String.valueOf(System.currentTimeMillis()));
}
}
}
private static void runTest(StatefulRedisConnection<String, String> con, String space) {
var prefix = "test-" + space + ":";
var index = "idx-" + space;
log.info("===> running with {}, {}", prefix, index);
con.sync().ftCreate(
index,
CreateArgs.<String, String>builder()
.on(CreateArgs.TargetType.JSON)
.withPrefix(prefix)
.build(),
List.of(NumericFieldArgs.<String>builder().name("pos").build())
);
var searchArgs = SearchArgs.<String, String>builder()
.sortBy(SortByArgs.<String>builder().attribute("pos").build())
.limit(0, 10_000)
.build();
var expected = new ArrayList<>();
for (int i = 1; i <= 1000; i++) {
var latest = """
{"pos":%d,"ts":%d,"large":"just here to make the response larger to some great extend and overflow the buffers"}
""".formatted(i, System.currentTimeMillis()).trim();
// lettuce<7.x: con.sync().jsonSet(prefix + i, JsonPath.ROOT_PATH, con.sync().getJsonParser().createJsonValue(latest));
con.sync().jsonSet(prefix + i, JsonPath.ROOT_PATH, latest);
expected.add(latest);
if (i >= 924) {
log.info("=== search {}", i);
var searchReply = con.sync().ftSearch(index, "*", searchArgs);
// with RESP3 this simply returns 0 as the map keys in Resp3SearchResultsParser are not found
assertEquals(expected.size(), searchReply.getCount());
for (int t = 1; t <= expected.size(); t++) {
var fields = searchReply.getResults().get(t - 1).getFields();
try {
assertEquals(expected.get(t - 1), fields.get("$"));
} catch (AssertionFailedError e) {
// with RESP2 this shows strange fields instead of the expected '$={"pos":..."'
log.info("Fields at pos {}: {}", t - 1, fields);
throw new AssertionFailedError("On loop " + i + ": " + e.getMessage(), e);
}
}
}
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.lettuce.core</groupId>
<artifactId>lettuce-redis-search-bug</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lettuce-core.version>7.1.0.RELEASE</lettuce-core.version>
<!-- with 4.1 it reports wrong data -->
<netty.version>4.1.118.Final</netty.version>
<!-- with 4.2 it crashes the jvm -->
<!--netty.version>4.2.4.Final</netty.version-->
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-bom</artifactId>
<version>${netty.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>io.lettuce</groupId>
<artifactId>lettuce-core</artifactId>
<version>${lettuce-core.version}</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-handler</artifactId>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-transport</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.42</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>6.0.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>2.0.17</version>
</dependency>
</dependencies>
</project>
Expected behavior/code
The ftSearch() function correctly returns the results.
Environment
- Lettuce version(s): 7.1.0.RELEASE, 6.8.1.RELEASE
- Redis version: redis/redis-stack-server:7.2.0-v10 (Redis 7.2.4)
- Java: Amazon Corretto 21.0.1
- Netty versions: 4.1.118.Final, 4.2.4.Final
Additional context
Some notes to checks already done:
- the default
DecodeBufferPolicy(withbuffer.discardReadBytes()) is not the cause of the error - when debugging
io.lettuce.core.protocol.CommandHandler#channelRead- parsing of the response is working fine
- the values put into
ComplexOutputare correct when being put but later becoming garbage - the
readBuffercontains the correct data when the ft-search command is completed - so it may have todo with the automatic expansion during
readBuffer.writeBytes(input)
- the old implementation in lettucemod's
SearchOutputcreated the documents on the fly and therefore was not affected- the new implemenation using
ComplexOutputkeeps all the ByteBuffers and later transforms them inSearchReplyParser
- the new implemenation using
- I tried replacing
buffer.internalNioBufferbybuffer.nioBufferinRedisStateMachine#readBytes0but it has no effect
Hey, @garry-mcfly Thanks for reporting this bug. Me and @tishun looked into the problem - it seems the problem is indeed in how the ComplexOutput parsing works. Since the ByteBuffers are not a copy, but a reference to the ByteBuffers in the direct memory, when we try to decode bigger results (more processing time), there is a chance that some other process makes changes to this piece of memory and than the memory corruption occurs. We will address this issue with a fix in next release and will probably backport it.
Curious.
I am not able to reproduce the same behaviour using the provided example. What I get instead is a set of misaligned results that I believe are partially an issue in the test case itself.
Perhaps the behavior is tied to the runtime? Have you attempted the same exercise using some other JRE?
Curious.
I am not able to reproduce the same behaviour using the provided example. What I get instead is a set of misaligned results that I believe are partially an issue in the test case itself.
Perhaps the behavior is tied to the runtime? Have you attempted the same exercise using some other JRE?
I tried it with Amazon Corretto 21.0.1 and OpenJDK 17.0.1 with the same result (running on Ubuntu 22.04). What else can I provide to make it reproducible on your side?
Regarding the misaligned results, here is the test which does not rely on redis sorting:
Test code without redis sorting
package io.lettuce.core;
import io.lettuce.core.api.StatefulRedisConnection;
import io.lettuce.core.json.JsonPath;
import io.lettuce.core.protocol.DecodeBufferPolicies;
import io.lettuce.core.protocol.ProtocolVersion;
import io.lettuce.core.search.arguments.CreateArgs;
import io.lettuce.core.search.arguments.NumericFieldArgs;
import io.lettuce.core.search.arguments.SearchArgs;
import org.opentest4j.AssertionFailedError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class RediSearchBugProver {
private static final Logger log = LoggerFactory.getLogger(RediSearchBugProver.class);
public static void main(String[] args) {
// System.setProperty("org.slf4j.simpleLogger.log.io.lettuce.core.protocol", "trace");
var uri = RedisURI.create("localhost", 6379);
try (var client = RedisClient.create(uri)) {
client.setOptions(ClientOptions.builder()
// increase it drastically just to be sure discardBytesRatio is not the cause
.decodeBufferPolicy(DecodeBufferPolicies.ratio(Integer.MAX_VALUE / 2.0f))
// ft.search result on RESP2 is an array, on RESP3 a map. So error is different
.protocolVersion(ProtocolVersion.RESP2)
.build());
try (var connection = client.connect()) {
runTest(connection, String.valueOf(System.currentTimeMillis()));
}
}
}
private static void runTest(StatefulRedisConnection<String, String> con, String space) {
var prefix = "test-" + space + ":";
var index = "idx-" + space;
log.info("===> running with {}, {}", prefix, index);
con.sync().ftCreate(
index,
CreateArgs.<String, String>builder()
.on(CreateArgs.TargetType.JSON)
.withPrefix(prefix)
.build(),
List.of(NumericFieldArgs.<String>builder().name("pos").build())
);
var searchArgs = SearchArgs.<String, String>builder()
//.sortBy(SortByArgs.<String>builder().attribute("pos").build())
.limit(0, 10_000)
.build();
var expected = new ArrayList<>();
for (int i = 1; i <= 1000; i++) {
var latest = """
{"pos":%d,"ts":%d,"large":"just here to make the response larger to some great extend and overflow the buffers"}
""".formatted(i, System.currentTimeMillis()).trim();
// lettuce<7.x: con.sync().jsonSet(prefix + i, JsonPath.ROOT_PATH, con.sync().getJsonParser().createJsonValue(latest));
con.sync().jsonSet(prefix + i, JsonPath.ROOT_PATH, latest);
expected.add(latest);
if (i >= 924) {
log.info("=== search {}", i);
var searchReply = con.sync().ftSearch(index, "*", searchArgs);
// with RESP3 this simply returns 0 as the map keys in Resp3SearchResultsParser are not found
assertEquals(expected.size(), searchReply.getCount());
var unprocessed = new HashSet<>(expected);
for (int t = 1; t <= expected.size(); t++) {
var fields = searchReply.getResults().get(t - 1).getFields();
try {
var document = fields.get("$");
assertTrue(unprocessed.remove(document), () -> "Unexpected document "+document);
//assertEquals(expected.get(t - 1), fields.get("$"));
} catch (AssertionFailedError e) {
// with RESP2 this shows strange fields instead of the expected '$={"pos":..."'
var formatted = fields.entrySet().stream()
.map(entry -> "Key='" + entry.getKey()+"', Value='"+entry.getValue()+"'\n")
.collect(Collectors.joining());
log.info("Fields at pos {}:\n{}", t - 1, formatted);
throw new AssertionFailedError("On loop " + i + ": " + e.getMessage(), e);
}
}
assertEquals(0, unprocessed.size());
}
}
}
}
Yes we were able to reproduce it on ubuntu:
ERROR
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x000077ff9fd6faf5, pid=450054, tid=450093
#
# JRE version: OpenJDK Runtime Environment (21.0.9+10) (build 21.0.9+10-Ubuntu-124.04)
# Java VM: OpenJDK 64-Bit Server VM (21.0.9+10-Ubuntu-124.04, mixed mode, sharing, tiered, compressed oops, compressed class ptrs, serial gc, linux-amd64)
# Problematic frame:
# v ~StubRoutines::jbyte_disjoint_arraycopy_avx3 0x000077ff9fd6faf5
#
# Core dump will be written. Default location: Core dumps may be processed with "/usr/share/apport/apport -p%p -s%s -c%c -d%d -P%P -u%u -g%g -F%F -- %E" (or dumping to /home/ubuntu/memory-corruption/core.450054)
#
# If you would like to submit a bug report, please visit:
# https://bugs.launchpad.net/ubuntu/+source/openjdk-21
#
--------------- S U M M A R Y ------------
Command Line: -Djansi.mode=force -Dclassworlds.conf=/usr/share/maven/bin/m2.conf -Dmaven.home=/usr/share/maven -Dlibrary.jansi.path=/usr/share/maven/lib/jansi-native -Dmaven.multiModuleProjectDirectory=/home/ubuntu/memory-corruption org.codehaus.plexus.classworlds.launcher.Launcher -q org.codehaus.mojo:exec-maven-plugin:3.3.0:java -Dexec.mainClass=RediSearchBugProver
Host: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz, 2 cores, 914M, Ubuntu 24.04.2 LTS
Time: Wed Nov 26 08:45:18 2025 UTC elapsed time: 4.237877 seconds (0d 0h 0m 4s)
--------------- T H R E A D ---------------
Current thread (0x000077ff204c7070): JavaThread "lettuce-nioEventLoop-4-1" daemon [_thread_in_Java, id=450093, stack(0x000077ffb5fd6000,0x000077ffb60d6000) (1024K)]
Stack: [0x000077ffb5fd6000,0x000077ffb60d6000], sp=0x000077ffb60d39f0, free space=1014k
Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
v ~StubRoutines::jbyte_disjoint_arraycopy_avx3 0x000077ff9fd6faf5
siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: 0x000077ffb50c1125
Registers:
RAX=0x00000000000fffff, RBX=0x00000000f1c2c8b0, RCX=0x0000000000000000, RDX=0x0000000000000014
RSP=0x000077ffb60d39f0, RBP=0x000077ffb60d39f0, RSI=0x00000000f73a5ac8, RDI=0x000077ffb50c1125
R8 =0x0000000000000014, R9 =0x0000000000000000, R10=0x000077ff9fd73e80, R11=0x0000000000000000
R12=0x0000000000000000, R13=0x00000000f73a5ab8, R14=0x0000000000000000, R15=0x000077ff204c7070
RIP=0x000077ff9fd6faf5, EFLAGS=0x0000000000010202, CSGSFS=0x002b000000000033, ERR=0x0000000000000004
TRAPNO=0x000000000000000e