ospeak
ospeak copied to clipboard
Streaming mode
I wanted to build my own (before I saw this), so I did my own script which breaks text into paragraphs, asynchronously convert them (using batches), so that it smoothly runs and start talking early, then start transcribing new text "on-the-fly" while it progresses
I use this to read long articles from NYTimes or whatever
pasting here my dirty JS code in case it could be of interest to do a streaming feature here? (there's also code which either takes just an URL and use puppeteer with my chrome cookies so it can access sites I'm logged in, etc)
#!/usr/bin/env node
import prompt from "prompt";
import clipboardy from "clipboardy";
import fetch from "node-fetch";
import { JSDOM } from "jsdom";
import { Readability } from "@mozilla/readability";
import OpenAI from "openai";
import puppeteer from "puppeteer";
import chrome from "chrome-cookies-secure";
import yargs from "yargs";
import { hideBin } from "yargs/helpers";
import fs from "fs";
import { exec } from "child_process";
import path from "path";
const MAX_PARAGRAPH_SIZE_CHARS = 1000;
const TEMP_AUDIO_DIR = "/tmp/tts-temp-audio/"; // Directory for temp audio files
const RESPONSE_FORMAT = "mp3";
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
throw new Error("Please set OPENAI_API_KEY");
}
const openai = new OpenAI({ apiKey: OPENAI_API_KEY });
// Check if ffmpeg and ffplay commands are available
const checkCommandAvailability = (command) => {
return new Promise((resolve, reject) => {
exec(`which ${command}`, (error, stdout) => {
if (error || !stdout) {
reject(new Error(`${command} not found. Please install it.`));
} else {
resolve(true);
}
});
});
};
Promise.all([
checkCommandAvailability("ffmpeg"),
checkCommandAvailability("ffplay"),
]).catch((error) => {
console.error(error.message);
process.exit(1);
});
prompt.start();
const N_LOOKAHEAD = 5;
const argv = yargs(hideBin(process.argv))
.option("speed", {
alias: "s",
description: "Speed of the speech output",
type: "number",
default: 1.5,
})
.option("summary", {
alias: "r",
description: "Summarize text",
type: "boolean",
})
.option("text", {
alias: "t",
description: "Text to process",
type: "string",
})
.option("clipboard", {
alias: "c",
description: "Process text from clipboard",
type: "boolean",
})
.option("url", {
alias: "u",
description: "URL to process",
type: "string",
})
.option("curl", {
description: "Fetch content via curl and process it",
type: "string",
})
.help()
.alias("help", "h").argv;
// Create temp directory if it doesn't exist
if (!fs.existsSync(TEMP_AUDIO_DIR)) {
fs.mkdirSync(TEMP_AUDIO_DIR);
}
// Fetch TTS audio using OpenAI API
const getTTSAudio = async (paragraph, outputPath) => {
console.log("Processing paragraph:", paragraph);
const response = await openai.audio.speech.create({
model: "tts-1",
voice: "nova",
input: paragraph,
response_format: RESPONSE_FORMAT,
});
const audioData = await response.arrayBuffer();
fs.writeFileSync(outputPath, Buffer.from(audioData));
};
// Adjust the speed using FFMPEG
const adjustSpeedWithFFMPEG = (inputFile, outputFile, speed) => {
return new Promise((resolve, reject) => {
const command = `ffmpeg -i ${inputFile} -filter:a "atempo=${speed}" -y ${outputFile}`;
exec(command, (error) => {
if (error) {
reject(`Error adjusting speed: ${error}`);
} else {
resolve();
}
});
});
};
// Play the audio using ffplay
const playAudio = (audioFile) => {
return new Promise((resolve, reject) => {
exec(`afplay ${audioFile}`, (error, stdout, stderr) => {
if (error) {
reject(new Error(`Error playing audio: ${error.message}`));
} else {
resolve();
}
});
});
};
// Clean up temp files
const cleanupFiles = (files) => {
files.forEach((file) => {
if (fs.existsSync(file)) {
fs.unlinkSync(file);
}
});
};
const getSummarizedText = async (text) => {
console.log("Summarizing text...");
const instructions =
"You are a professional summarizer. Your task is to concisely and precisely summarize the following text, in 2 to 5 paragraphs:";
const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [
{ role: "system", content: instructions },
{ role: "user", content: text },
],
});
const summarizedText = response.choices[0].message.content;
return summarizedText;
};
// Break text into paragraphs and process them
const processText = async (text, summary) => {
console.log("------------------");
console.log(text);
console.log("------------------");
if (summary) {
text = await getSummarizedText(text);
console.log("-----SUMMARY------");
console.log(text);
console.log("------------------");
}
const paragraphs = breakIntoParagraphs(text);
const wellSizedParagraphs = paragraphs.flatMap((paragraph) =>
breakIntoWellSizedParagraphs(paragraph)
);
const N_LOOKAHEAD = 2;
const paragraphsAudioPromises = new Map();
for (let i = 0; i < wellSizedParagraphs.length; i++) {
// Add the current and next N_LOOKAHEAD paragraphs to the processing queue
for (
let j = i;
j < Math.min(i + N_LOOKAHEAD, wellSizedParagraphs.length);
j++
) {
if (!paragraphsAudioPromises.has(j)) {
// Only add to queue if it's not already processing
paragraphsAudioPromises.set(
j,
processParagraph(j, wellSizedParagraphs[j])
);
}
}
// Wait for the current paragraph's processing to finish
const { spedUpAudioPath } = await paragraphsAudioPromises.get(i);
// Play audio after processing
await playAudio(spedUpAudioPath);
// Clean up the files after audio is played
cleanupFiles([spedUpAudioPath]);
// Optionally remove processed paragraphs from the map to free up memory
paragraphsAudioPromises.delete(i);
}
};
const processParagraph = async (idx, paragraph) => {
const tempAudioPath = path.join(
TEMP_AUDIO_DIR,
`audio-${idx}.${RESPONSE_FORMAT}`
);
const spedUpAudioPath = path.join(
TEMP_AUDIO_DIR,
`audio-speed-${idx}.${RESPONSE_FORMAT}`
);
// Get TTS audio and adjust speed
await getTTSAudio(paragraph, tempAudioPath);
await adjustSpeedWithFFMPEG(tempAudioPath, spedUpAudioPath, argv.speed);
// Return the path to the sped-up audio file for further processing
return { spedUpAudioPath };
};
// Helper functions for breaking text into parts
const breakIntoParagraphs = (text) => text.split(/\n\n+/);
const breakIntoWellSizedParagraphs = (paragraph) => {
const parts = [];
let currentPart = "";
paragraph.split(" ").forEach((word) => {
if (currentPart.length + word.length + 1 > MAX_PARAGRAPH_SIZE_CHARS) {
parts.push(currentPart);
currentPart = word;
} else {
currentPart += (currentPart.length > 0 ? " " : "") + word;
}
});
if (currentPart.length > 0) {
parts.push(currentPart);
}
return parts;
};
// Fetch and process URL with Puppeteer
const fetchAndProcessURLPuppeteer = async (url, summary) => {
const cookies = await chrome.getCookiesPromised(url, "puppeteer");
const browser = await puppeteer.launch({
headless: true,
});
const page = await browser.newPage();
await page.setCookie(...cookies);
await page.setViewport({ width: 1280, height: 800 });
await page.goto(url, { waitUntil: "networkidle2" });
const html = await page.content();
const doc = new JSDOM(html, { url }).window.document;
let reader = new Readability(doc);
let article = reader.parse();
console.log(doc);
processText(article.textContent, summary);
await browser.close();
};
// Fetch and process URL using fetch
const fetchAndProcessURL = async (url, summary) => {
const response = await fetch(url);
const html = await response.text();
const doc = new JSDOM(html, { url }).window.document;
let reader = new Readability(doc);
let article = reader.parse();
processText(article.textContent, summary);
};
// Handle different input methods based on CLI arguments
if (argv.text) {
processText(argv.text, argv.summary);
} else if (argv.clipboard) {
processText(clipboardy.readSync(), argv.summary);
} else if (argv.url) {
fetchAndProcessURLPuppeteer(argv.url, argv.summary);
} else if (argv.curl) {
fetchAndProcessURL(argv.curl, argv.summary);
} else {
console.log("Reading from stdin...");
let data = "";
process.stdin.on("data", function (chunk) {
data += chunk;
});
process.stdin.on("end", function () {
processText(data);
});
}