node-csvtojson
node-csvtojson copied to clipboard
Stuck at 1197824 row
I am using the below code. It gets stuck at 1197824 rows. Note that, the CSV is 50.3 GB. Current json size is 1.72 GB. I have a 64 GB ram computer. Nodejs only uses at most 150MB. My CPU usage is less than 50% most of the time.
I have tried running it with the below command to force it to use more memory.
node --max-old-space-size=8192 index.js
const { Transform } = require("stream");
const fs = require("fs");
const csvtojson = require("csvtojson");
let index = 0;
const lineToArray = new Transform({
transform(chunk, encoding, cb) {
// add [ to very front
// add , between rows
// remove crlf from row
console.log(`Inserting ${index++} row`);
this.push(
(this.isNotAtFirstRow ? "," : "[") + chunk.toString("utf-8").slice(0, -2)
);
this.isNotAtFirstRow = true;
cb();
},
flush(cb) {
// add ] to very end or [] if no rows
const isEmpty = !this.isNotAtFirstRow;
this.push(isEmpty ? "[]" : "]");
cb();
},
});
fs.createReadStream("./persons.csv")
.pipe(
csvtojson({
checkType: true,
downstreamFormat: "line",
delimiter: "\t",
})
)
.pipe(lineToArray)
.pipe(fs.createWriteStream("./persons.json"));