yasson
yasson copied to clipboard
Multithreading leads to file corruption
Describe the bug
When writing JSON files in parallel with multiple threads (>>100), JSON files are very likely (>80%) to get corrupted like so:
{
"results": [
{ "key": "val]}
}
Sometimes even data from other files is included randomly, again invalidating the JSON file. This does not happen with FasterXML Jackson nor with Apache Johnzon (the latter also implementing Jakarta JSON-B).
To Reproduce
Here's working code with Jackson:
public class PatchJsonWriter implements AutoCloseable {
private final Path outputDir;
private final ExecutorService executorService;
private final ObjectMapper om;
public PatchJsonWriter(Path outputDir, ExecutorService executorService) {
this.outputDir = outputDir;
this.executorService = executorService;
this.om = new ObjectMapper().findAndRegisterModules().setDefaultPrettyPrinter(new MinimalPrettyPrinter());
}
public void writeParallel(Collection<PatchParameterInfo> parameterInfos) {
List<CompletableFuture<Void>> futures =
parameterInfos.stream().map(this::writeAsync).toList();
CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join();
}
private CompletableFuture<Void> writeAsync(PatchParameterInfo patchParameterInfo) {
return CompletableFuture.runAsync(() -> doWritePatchParameterInfo(patchParameterInfo), this.executorService);
}
private void doWritePatchParameterInfo(PatchParameterInfo patchParameterInfo) {
Path path = this.outputDir
.resolve(patchParameterInfo.groupName())
.resolve(patchParameterInfo.appNameName())
.resolve(patchParameterInfo.version());
Path file = path.resolve("patch.json");
try {
Files.createDirectories(path);
} catch (IOException ioException) {
throw new UncheckedIOException(ioException);
}
Logger logger = Logger.getLogger(PatchJsonWriter.class.getName());
PatchParameterWrapper parameterWrapper = PatchParameterWrapper.from(patchParameterInfo.patchParameter());
logger.info(() -> String.format(Locale.ROOT, "Writing file [%s] with content [%s]", file, parameterWrapper));
try (OutputStream fos = Files.newOutputStream(
file, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) {
this.om.writeValue(fos, parameterWrapper);
} catch (IOException ioException) {
throw new UncheckedIOException(ioException);
} catch (Exception ex) {
throw new RuntimeException("Problem creating Jsonb instance", ex);
}
}
@Override
public void close() {}
}
This is the version I created with the latest Yasson version (3.0.4 -- where are the release notes btw?):
public class PatchJsonWriter implements AutoCloseable {
private final Path outputDir;
private final ExecutorService executorService;
private final Jsonb jsonb;
public PatchJsonWriter(Path outputDir, ExecutorService executorService) {
this.outputDir = outputDir;
this.executorService = executorService;
this.jsonb = JsonbFactory.create();
}
// omit sne methods
private void doWritePatchParameterInfo(PatchParameterInfo patchParameterInfo) {
// ... same
try (OutputStream fos = Files.newOutputStream(
file, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) {
this.jsonb.toJson(PatchParameterWrapper.from(patchParameterInfo.patchParameter()), fos);
} catch (IOException ioException) {
throw new UncheckedIOException(ioException);
}
}
@Override
public void close() throws IOException {
try {
this.jsonb.close();
} catch (IOException ioException) {
throw ioException;
} catch (Exception other) {
throw new UnsupportedOperationException(other);
}
}
}
Using this code, some files are corrupted like shown above:
{
"result": [
{
"name": "app1.service.url"
},
{
"name": "app1.service.ttl"
},
{
"nawr": "app1.jwt.expiration"
},
{
"namwrc."app1.ldap.searchbase"
},
"name"ot"app1.ldap.selfservice_searchba,
n" "erap.alias"
},
{
"name": "otherappport"
}
]
} "app1.jwt.encryptionKeySource"
},
}
]
}
The Jackson variant does not suffer from this behaviour. Yasson is mixing in contents from another file into this example file
Expected behaviour
- Write valid JSON
- do not leak data from other files into any files
System information:
- OS: [e.g. Linux, Windows, Mac]: Linux
- Java Version: [e.g. 8, 11]: 17
- Yasson Version: [e.g. 1.0.5]: 3.0.4 (released, undocumented)
Additional context
- Non-JakartaEE CLI app
- Executor used is
ExecutorService executorService = Executors.newWorkStealingPool();
We from JobRunr have the same issue. We are using Yasson as an option to generate JSON from some objects and sometimes it generates invalid JSON - this is with really limited parallelism (about 2 simultaneous requests).
The invalid JSON:
[{"deleteSucceededJobsAfter":129600.000000000,"firstHeartbeat":"2024-01-16T08:38:09.971713Z","id":"6e836e6f-3bea-4dc8-9d90-804ce2d3f76c","lastHeartbeat":"2024-01-16T08:38:39.994807Z","name":"Ismailas-MacBook-Pro.local","permanent,{"type":"severe-jobrunr-exception"0000000,"pollIntervalInSeconds":15,"processAllocatedMemory":18048136,"processCpuLoad":0.0020519469633203924,"processFreeMemory":17161821048,"processMaxMemory":17179869184,"running":true,"systemCpuLoad":0.2227047146401985,"systemFreeMemory":20553973760,"systemTotalMemory":68719476736,"workerPoolSize":96}]
Notice the permanent where invalid json is generated. Below is the valid json:
[{"deleteSucceededJobsAfter":129600.000000000,"firstHeartbeat":"2024-01-16T08:38:09.971713Z","id":"6e836e6f-3bea-4dc8-9d90-804ce2d3f76c","lastHeartbeat":"2024-01-16T08:41:40.032196Z","name":"Ismailas-MacBook-Pro.local","permanentlyDeleteDeletedJobsAfter":259200.000000000,"pollIntervalInSeconds":15,"processAllocatedMemory":48091728,"processCpuLoad":4.15962064712972E-4,"processFreeMemory":17131777456,"processMaxMemory":17179869184,"running":true,"systemCpuLoad":0.17733182589033353,"systemFreeMemory":20528300032,"systemTotalMemory":68719476736,"workerPoolSize":96}]
@rdehuyss for me I found that I was actually writing to the same file twice. Switching to Apache Johnzon only fixed it because it was way faster. For me, this is closed. But I can leave it open for you.
@bmarwell : these were two different Strings (so not even files).
Hello @bmarwell and @rdehuyss , I have tried to reproduce this and no luck so far.
Would you please be so kind and try to verify, if this still happens with the latest release?
No I was not able to reproduce it. Maybe it was overlapping writing to the same files after all?
Just to add to the discussion, we (the company I am working for) have apparently stumbled upon the very same issue when serialising a relatively simple event POJO. Initially, I suspect that heavy multi-threading leads to some internal string buffer or resource overlap within Eclipse Yasson, but a week ago another customer of ours reported a very similar situation with a simple POJO being serialised almost every SECOND TIME with a wrong result.
The symptoms are the same: broken JSON syntax with either incomplete ("chopped") object key names or misplaced double quotes. But I do not see any apparent incursion of other POJO data here; just the garbled JSON syntax... Like the following - notice the "step" key that has a string value that is not terminated properly.
{"timestamp":1744964625638,"satelliteUid":"64504f02-8d91-4955-a8d8-0bfda15a81d5","cacheEntry":{"documentId":"f15a6316-8dc6-4e00-b5aa-e97313e7ca9c","correlationId":"f15a6316-8dc6-4e00-b5aa-e97313e7ca9c","step":"r,"variables":{"CONTEXT":"Send request to all","FLOW":"DAYS59"}},"cacheKey":"f15a6316-8dc6-4e00-b5aa-e97313e7ca9c:f15a6316-8dc442d75ed-2fc7-4488-96f0-c}
I am still wrapping my head around this, trying to figure out WHY this happens. Sometimes almost 50% of time, sometimes never.
Version: Eclipse Yasson 3.0.4 Java: Oracle / Microsoft / RedHat OpenJDK 17.0.13 OS: Windows Server / RHEL / Ubuntu
Replying to myself (and updating others on the progress of investigation).
Just discovered that the application that has been experiencing these strange corrupted-JSON errors was actually built and packaged in such a way that it included BOTH the Eclipse Parsson (the Jakarta JSON implementation included as a direct dependency since Eclipse Yasson 3.x) and the Eclipse Glassfish Jakarta JSON (another reference implementation of the same Jakarta JSON specification, but part of Oracle/Eclipse Glassfish project). I am not exactly sure which of the two JSON serializers (and generators) Yasson picked at runtime when writing an object into a JSON string; when reading, it definitely "discovered" the Glassfish implementation before the Parsson, so I would assume it did the same for writing.
Once I manipulated the application project dependencies, removed the Eclipse Glassfish and explicitly included the Parsson (the same version referenced to by Yasson 3.0.4 descriptor), the corrupted JSON errors seemed to have... magically disappeared! At least we have not been able to get them to pop up their ugly heads again. I will monitor the situation, but it MAY very well the case that there is "something" in Eclipse Glassfish Jakarta JSON implementation that makes it less reliable in conjunction with Eclipse Yasson 3.x. Not sure as the relevant code (in Glassfish vs Parsson) looks almost exactly the same to me...
P.S. If you are facing the similar randomly-corrupted JSON files or strings, make sure you do not have multiple Jakarta JSON implementation on your classpath, and if you do, leave only one. Preferably the one that Yasson comes with; with 3.0.4 today, it is Eclipse Parsson 1.1.7.
I tried to build a fully contained reproducer. The code tries to reduce time spent outside of Yasson
public class Pojo {
private float deleteSucceededJobsAfter;
private OffsetDateTime firstHeartbeat;
private UUID id;
private OffsetDateTime lastHeartbeat;
private String name;
private float permanentlyDeleteDeletedJobsAfter;
private int pollIntervalInSeconds;
private long processAllocatedMemory;
private float processCpuLoad;
private long processFreeMemory;
private long processMaxMemory;
private boolean running;
private float systemCpuLoad;
private long systemFreeMemory;
private long systemTotalMemory;
private int workerPoolSize;
public Pojo() {
super();
}
public float getDeleteSucceededJobsAfter() {
return deleteSucceededJobsAfter;
}
public void setDeleteSucceededJobsAfter(float deleteSucceededJobsAfter) {
this.deleteSucceededJobsAfter = deleteSucceededJobsAfter;
}
public OffsetDateTime getFirstHeartbeat() {
return firstHeartbeat;
}
public void setFirstHeartbeat(OffsetDateTime firstHeartbeat) {
this.firstHeartbeat = firstHeartbeat;
}
public UUID getId() {
return id;
}
public void setId(UUID id) {
this.id = id;
}
public OffsetDateTime getLastHeartbeat() {
return lastHeartbeat;
}
public void setLastHeartbeat(OffsetDateTime lastHeartbeat) {
this.lastHeartbeat = lastHeartbeat;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getPermanentlyDeleteDeletedJobsAfter() {
return permanentlyDeleteDeletedJobsAfter;
}
public void setPermanentlyDeleteDeletedJobsAfter(float permanentlyDeleteDeletedJobsAfter) {
this.permanentlyDeleteDeletedJobsAfter = permanentlyDeleteDeletedJobsAfter;
}
public int getPollIntervalInSeconds() {
return pollIntervalInSeconds;
}
public void setPollIntervalInSeconds(int pollIntervalInSeconds) {
this.pollIntervalInSeconds = pollIntervalInSeconds;
}
public long getProcessAllocatedMemory() {
return processAllocatedMemory;
}
public void setProcessAllocatedMemory(long processAllocatedMemory) {
this.processAllocatedMemory = processAllocatedMemory;
}
public long getProcessFreeMemory() {
return processFreeMemory;
}
public void setProcessFreeMemory(long processFreeMemory) {
this.processFreeMemory = processFreeMemory;
}
public long getProcessMaxMemory() {
return processMaxMemory;
}
public void setProcessMaxMemory(long processMaxMemory) {
this.processMaxMemory = processMaxMemory;
}
public boolean isRunning() {
return running;
}
public void setRunning(boolean running) {
this.running = running;
}
public long getSystemFreeMemory() {
return systemFreeMemory;
}
public void setSystemFreeMemory(long systemFreeMemory) {
this.systemFreeMemory = systemFreeMemory;
}
public long getSystemTotalMemory() {
return systemTotalMemory;
}
public void setSystemTotalMemory(long systemTotalMemory) {
this.systemTotalMemory = systemTotalMemory;
}
public int getWorkerPoolSize() {
return workerPoolSize;
}
public void setWorkerPoolSize(int workerPoolSize) {
this.workerPoolSize = workerPoolSize;
}
public float getProcessCpuLoad() {
return processCpuLoad;
}
public void setProcessCpuLoad(float processCpuLoad) {
this.processCpuLoad = processCpuLoad;
}
public float getSystemCpuLoad() {
return systemCpuLoad;
}
public void setSystemCpuLoad(float systemCpuLoad) {
this.systemCpuLoad = systemCpuLoad;
}
}
class ThreadingTests {
private Jsonb jsonb;
@BeforeEach
void setUp() {
this.jsonb = JsonbBuilder.create();
}
@Test
void toJson() throws InterruptedException {
Random random = new Random();
int availableProcessors = Runtime.getRuntime().availableProcessors();
List<Runnable> tasks = new ArrayList<>(availableProcessors);
AtomicBoolean stop = new AtomicBoolean(false);
CountDownLatch start = new CountDownLatch(availableProcessors);
for (int i = 0; i < availableProcessors; i++) {
Pojo pojo = newRandomPojo(random);
String expected = this.jsonb.toJson(pojo);
tasks.add(new Trasher(jsonb, pojo, expected, start, stop));
}
List<Thread> threads = new ArrayList<>(availableProcessors);
for (int i = 0; i < tasks.size(); i++) {
Runnable task = tasks.get(i);
Thread thread = new Thread(task, "trasher-" + (i + 1));
thread.start();
threads.add(thread);
}
for (Thread thread : threads) {
thread.join();
}
}
static final class Trasher implements Runnable {
private final Jsonb jsonb;
private final Pojo pojo;
private final String expected;
private final StringWriter writer;
private CountDownLatch start;
private AtomicBoolean stop;
Trasher(Jsonb jsonb, Pojo pojo, String expected, CountDownLatch start, AtomicBoolean stop) {
this.jsonb = jsonb;
this.pojo = pojo;
this.expected = expected;
this.start = start;
this.stop = stop;
this.writer = new StringWriter();
}
@Override
public void run() {
this.start.countDown();
try {
this.start.await();
} catch (InterruptedException e) {
System.err.println("could not start");
return;
}
while (!this.stop.get()) {
if (!this.runAssertion()) {
this.stop.set(true);
return;
}
}
}
private boolean runAssertion() {
// clear the buffer
this.writer.getBuffer().setLength(0);
this.jsonb.toJson(this.pojo, this.writer);
return checkResult();
}
private boolean checkResult() {
StringBuffer actual = this.writer.getBuffer();
if (this.expected.length() != actual.length()) {
reportMismatch(this.expected, actual);
return false;
}
if (actual.indexOf(this.expected) != 0) {
reportMismatch(this.expected, actual);
return false;
}
return true;
}
private static void reportMismatch(String expected, StringBuffer actual) {
System.err.println("mismatch, exptected: '" + expected + "' but got '" + actual + "'");
}
}
private static Pojo newRandomPojo(Random random) {
Pojo pojo = new Pojo();
pojo.setDeleteSucceededJobsAfter(random.nextFloat());
pojo.setFirstHeartbeat(newRandomDate(random));
pojo.setId(newRandomUuid(random));
pojo.setName(Long.toString(random.nextLong()));
pojo.setPermanentlyDeleteDeletedJobsAfter(random.nextFloat());
pojo.setPollIntervalInSeconds(random.nextInt());
pojo.setProcessAllocatedMemory(random.nextLong());
pojo.setProcessCpuLoad(random.nextFloat(100.0f));
pojo.setProcessFreeMemory(random.nextLong(1_000_000L, 1_000_000_000L));
pojo.setProcessMaxMemory(random.nextLong(1_000_000L, 1_000_000_000L));
pojo.setRunning(random.nextBoolean());
pojo.setSystemCpuLoad(random.nextFloat(400.0f));
pojo.setProcessFreeMemory(random.nextLong(1_000_000L, 1_000_000_000L));
pojo.setSystemTotalMemory(random.nextLong(1_000_000L, 1_000_000_000L));
pojo.setWorkerPoolSize(random.nextInt(1, 100));
return pojo;
}
public static UUID newRandomUuid(Random random) {
return new UUID(random.nextLong(), random.nextLong());
}
public static OffsetDateTime newRandomDate(Random random) {
LocalDate date = LocalDate.of(random.nextInt(1900, 2100), random.nextInt(1, 12), random.nextInt(1, 28));
LocalTime time = LocalTime.of(random.nextInt(1, 24), random.nextInt(0, 59));
return OffsetDateTime.of(date, time, ZoneOffset.UTC);
}
}
I was unable to reproduce the issue. Maybe other people still observing the issue can adapt to their needs.
I will double check later today and get back on this.
Edit: just tested and the good news is that I'm also unable to reproduce it. For us, the issue can be closed.