Search code examples
javascriptnode.jsjsontext-to-speechgoogle-text-to-speech

Google Text-to-speech - Loading text from individual lines of a txt file


I am using the Google TextToSpeech API in Node.js to generate speech from text. I was able to get an output file with the same name as the text that is generated for the speech. However, I need to tweak this a bit. I wish I could generate multiple files at the same time. The point is that I have, for example, 5 words (or sentences) to generate, e.g. cat, dog, house, sky, sun. I would like to generate them each to a separate file: cat.wav, dog.wav, etc.

I also want the application to be able to read these words from the * .txt file (each word/sentence on a separate line of the * .txt file).

Is there such a possibility? Below I am pasting the * .js file code and the * .json file code that I am using.

*.js

const textToSpeech = require('@google-cloud/text-to-speech');
const fs = require('fs');
const util = require('util');
const projectId = 'forward-dream-295509'
const keyFilename = 'myauth.json'
const client = new textToSpeech.TextToSpeechClient({ projectId, keyFilename });
const YourSetting = fs.readFileSync('setting.json');
async function Text2Speech(YourSetting) {
  const [response] = await client.synthesizeSpeech(JSON.parse(YourSetting));
  const writeFile = util.promisify(fs.writeFile);
  await writeFile(JSON.parse(YourSetting).input.text + '.wav', response.audioContent, 'binary');
  console.log(`Audio content written to file: ${JSON.parse(YourSetting).input.text}`);
}
Text2Speech(YourSetting);

*.json

{
  "audioConfig": {
    "audioEncoding": "LINEAR16",
    "pitch": -2,
    "speakingRate": 1
  },
  "input": {
    "text": "Text to Speech" 
  },
  "voice": {
    "languageCode": "en-US",
    "name": "en-US-Wavenet-D"
  }
}

I'm not very good at programming. I found a tutorial on google on how to do this and slightly modified it so that the name of the saved file was the same as the generated text.

I would be very grateful for your help. Arek


Solution

  • Here ya go - I haven't tested it, but this should show how to read a text file, split into each line, then run tts over it with a set concurrency. It uses the p-any and filenamify npm packages which you'll need to add to your project. Note that google may have API throttling or rate limits that I didn't take into account here - may consider using p-throttle library if that's a concern.

    // https://www.npmjs.com/package/p-map
    const pMap = require('p-map');
    
    // https://github.com/sindresorhus/filenamify
    const filenamify = require('filenamify');
    
    
    const textToSpeech = require('@google-cloud/text-to-speech');
    const fs = require('fs');
    const path = require('path');
    const projectId = 'forward-dream-295509'
    const keyFilename = 'myauth.json'
    const client = new textToSpeech.TextToSpeechClient({ projectId, keyFilename });
    const rawSettings = fs.readFileSync('setting.json', { encoding: 'utf8'});
    
    // base data for all requests (voice, etc)
    const yourSetting = JSON.parse(rawSettings);
    
    // where wav files will be put
    const outputDirectory = '.';
    
    async function Text2Speech(text, outputPath) {
        // include the settings in settings.json, but change text input
        const request = {
            ...yourSetting,
            input: { text }
        };
        const [response] = await client.synthesizeSpeech(request);
    
        await fs.promises.writeFile(outputPath, response.audioContent, 'binary');
        console.log(`Audio content written to file: ${text} = ${outputPath}`);
        // not really necessary, but you could return something if you wanted to
        return response;
    }
    
    // process a line of text - write to file and report result (success/error)
    async function processLine(text, index) {
        // create output path based on text input (use library to ensure it's filename safe)
        const outputPath = path.join(outputDirectory, filenamify(text) + '.wav');
        const result = {
            text,
            lineNumber: index,
            path: outputPath,
            isSuccess: null,
            error: null
        };
        try {
            const response = await Text2Speech(text, outputPath);
            result.isSuccess = true;
        } catch (error) {
            console.warn(`Failed: ${text}`, error);
            result.isSuccess = false;
            result.error = error;
        }
        return result;
    }
    
    async function processInputFile(filepath, concurrency = 3) {
        const rawText = fs.readFileSync(filepath, { encoding: 'utf8'});
        const lines = rawText
            // split into one item per line
            .split(/[\r\n]+/)
            // remove surrounding whitespace
            .map(s => s.trim())
            // remove empty lines
            .filter(Boolean);
        
        const results = await pMap(lines, processLine, { concurrency });
        console.log('Done!');
        console.table(results);
    }
    
    // create sample text file
    const sampleText = `Hello World
    cat
    dog
    another line of text`;
    fs.writeFileSync('./my-text-lines.txt', sampleText);
    
    // process each line in the text file, 3 at a time
    processInputFile('./my-text-lines.txt', 3);