deepgram-js-sdk
deepgram-js-sdk copied to clipboard
Corrupted buffer on SpeakLiveClient and ListenLiveClient
This doesn't work on my end: https://developers.deepgram.com/docs/streaming-text-to-speech
when we get a websocket message, it will call the handleMessage
function.
https://github.com/deepgram/deepgram-js-sdk/blob/main/src/packages/SpeakLiveClient.ts#L139-L162
When event.data
itself is a Buffer
, buffer.buffer
, which is a ArrayBufferLike
- however this looses some crucial metadata in the buffer itself: BYTES_PER_ELEMENT
, byteLength
, byteOffset
.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array
We shouldn't be using ArrayBuffer
but just Buffer
instead in the event.
Perhaps it works for MP3 or some other files, but it definitely fails with linear16
.
To reproduce:
const fs = require("fs");
const { createClient, LiveTTSEvents } = require("../../dist/main/index");
const live = async () => {
const text = "Hello, how can I help you today?";
const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
const dgConnection = deepgram.speak.live({
encoding: 'linear16',
bit_rate: 16000
});
let audioBuffer = Buffer.alloc(0);
dgConnection.on(LiveTTSEvents.Open, () => {
// Send text data for TTS synthesis
dgConnection.sendText(text);
// Send Flush message to the server after sending the text
dgConnection.flush();
dgConnection.on(LiveTTSEvents.Audio, (data) => {
console.log("Deepgram audio data received");
// this is no longer the same as the original buffer
const buffer = Buffer.from(data);
audioBuffer = Buffer.concat([audioBuffer, buffer]);
});
dgConnection.on(LiveTTSEvents.Flushed, () => {
console.log("Deepgram Flushed");
// Write the buffered audio data to a file when the flush event is received
writeFile();
});
});
function createWavHeader(bufferLength, sampleRate, numChannels, bitsPerSample, audioFormat = 1) {
const header = Buffer.alloc(44);
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
const blockAlign = (numChannels * bitsPerSample) / 8;
// RIFF chunk descriptor
header.write('RIFF', 0); // ChunkID: "RIFF"
header.writeUInt32LE(36 + bufferLength, 4); // ChunkSize: 36 + SubChunk2Size
header.write('WAVE', 8); // Format: "WAVE"
// fmt subchunk
header.write('fmt ', 12); // Subchunk1ID: "fmt "
header.writeUInt32LE(16, 16); // Subchunk1Size: 16 for PCM
header.writeUInt16LE(audioFormat, 20); // AudioFormat: 1 for PCM (linear16)
header.writeUInt16LE(numChannels, 22); // NumChannels
header.writeUInt32LE(sampleRate, 24); // SampleRate
header.writeUInt32LE(byteRate, 28); // ByteRate
header.writeUInt16LE(blockAlign, 32); // BlockAlign
header.writeUInt16LE(bitsPerSample, 34); // BitsPerSample (16 for linear16)
// data subchunk
header.write('data', 36); // Subchunk2ID: "data"
header.writeUInt32LE(bufferLength, 40); // Subchunk2Size
return header;
}
const writeFile = () => {
if (audioBuffer.length > 0) {
// Example usage:
const sampleRate = 8000;
const numChannels = 1; // Mono
const bitsPerSample = 16; // 16 bits for linear16 encoding
const audioFormat = 1; // PCM format
const wavHeader = createWavHeader(audioBuffer.length, sampleRate, numChannels, bitsPerSample, audioFormat);
const wavBuffer = Buffer.concat([wavHeader, audioBuffer]);
fs.writeFile("output.wav", audioBuffer, (err) => {
if (err) {
console.error("Error writing audio file:", err);
} else {
console.log("Audio file saved as output.mp3");
}
});
}
};
};
live();
This is just to generate the file, nothing wrong with createWavHeader
- because I've also tried this with vonage's streaming API:
https://developer.vonage.com/en/voice/voice-api/concepts/websockets#writing-audio-to-the-websocket
and with Twilio's
https://www.twilio.com/docs/voice/media-streams/websocket-messages#send-websocket-messages-to-twilio
If I don't use the SDK and use my own custom WS client, it works fine. Basically instead of:
} else if (event.data instanceof ArrayBuffer) {
this.handleBinaryMessage(event.data);
} else if (Buffer.isBuffer(event.data)) {
this.handleBinaryMessage(event.data.buffer);
it should be:
} else if (event.data instanceof ArrayBuffer) {
this.handleBinaryMessage(Buffer.from(event.data));
} else if (Buffer.isBuffer(event.data)) {
this.handleBinaryMessage(event.data);