Search code examples
javascriptjquerytext-to-speechspeech-synthesis

Javascript - How to get more voices for text to speech?


I made this little script where you can enter some message and it reads it out.

I solved it by using FireFox SpeechSynthesis API.

How can I get more voices and support for more languages?

Note: There is a "bug" in SO. If I edit my question and edit the script and test it from there, then 4 voices are loading. If I run the code snippet normally, then no voices are loading, but the default voice.

if ('speechSynthesis' in window) {
 // Speech Synthesis supported
} else {
  // Speech Synthesis Not Supported
  alert("Sorry, your browser doesn't support text to speech!");
}

const voices = speechSynthesis.getVoices();
populateVoiceList();

if (typeof speechSynthesis !== 'undefined' && speechSynthesis.onvoiceschanged !== undefined) {
  speechSynthesis.onvoiceschanged = populateVoiceList;
}

var selectedLang = $("#voiceSelect").find("option").attr("data-lang");
var selectedVoice = $("#voiceSelect").find("option").attr("data-pos");

$("#voiceSelect").change(function() {
   selectedLang = $(this).find("option:selected").attr("data-lang");
   selectedVoice = $(this).find("option:selected").attr("data-pos");
});

$("#start").click(function() {
    var msg = new SpeechSynthesisUtterance();

    msg.text = $("#message").val();
    msg.lang = selectedLang;
    msg.voice = voices[selectedVoice];
 
    console.log(msg);

    window.speechSynthesis.speak(msg);
});

// Hide Dropdown if there is only one voice
if ($("#voiceSelect option").length == 0) {
    $("#voiceSelect").hide();
}

function populateVoiceList()
{
  if (typeof speechSynthesis === 'undefined') {
    return;
  }
  
  for (let i = 0; i < voices.length; i++) {
    const option = document.createElement('option');
    option.textContent = `${voices[i].name} (${voices[i].lang})`;

    if (voices[i].default) {
      option.textContent += ' — DEFAULT';
    }

    option.setAttribute('data-lang', voices[i].lang);
    option.setAttribute('data-name', voices[i].name);
    option.setAttribute('data-voice-uri', voices[i].voiceURI);
    option.setAttribute('data-pos', i);

    document.getElementById("voiceSelect").appendChild(option);
  }
}
.flex-container {
    display: flex;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<select id="voiceSelect"></select>
<div class="flex-container">
    <textarea id="message" value="test"></textarea>
    <button id="start">Start</button>
</div>

JSFiddle


Solution

  • How can I get more voices and support for more languages?

    Currently there isn't a browser vendor that supports loading custom/more voices. It doesn't appear to be part of the nascent specification yet either, as there is an open issue asking how to add more voices.

    The list of voices is dependent upon the user agent/device because that is how it is specified. User agents are using either a native speech engine provided by the device OS or a network based speech engine, each of which work with a set of supported voices. Here is a comment from a chromium issue about not being able to support boundary events because of the missing capabilities in the native/network based speech engines, as an example of the platform the Web Speech API is dependent upon.

    If you want to see what voices are available for various devices/browsers you can go to https://morganney.github.io/tts-react/?path=/story/tts-react--hook and click the "load" button to get an HTML <select> of the SpeechSynthesisVoice's supported.

    You can also run the snippet below to see what voices are available:

    let voices = []
    const btn = document.getElementById('load')
    const options = document.getElementById('voices')
    const hasSynth = 'speechSynthesis' in window
    
    if (hasSynth) {
      voices = speechSynthesis.getVoices()
      
      speechSynthesis.addEventListener('voiceschanged', () => {
        voices = speechSynthesis.getVoices()
      })
    }
    
    btn.addEventListener('click', () => {
      const initOpt = document.createElement('option')
    
      if (!voices.length && hasSynth) {
        voices = speechSynthesis.getVoices()
      }
     
      initOpt.append(document.createTextNode('-- select voice --'))
      options.append(initOpt)
    
      voices.forEach(voice => {
        const option = document.createElement('option')
     
        option.value = voice.name
        option.append(document.createTextNode(`${voice.lang} - ${voice.name}`))
        options.append(option)
      })
      
      options.style.display = 'inline'
    })
    
    options.addEventListener('change', (evt) => {
      if (hasSynth) {
        const utterance = new SpeechSynthesisUtterance()
    
        utterance.text = 'Testing the selected voice.'
        utterance.voice = voices.find(voice => voice.name === evt.target.value)
        
        speechSynthesis.speak(utterance)
      }
    })
    #voices {
      display: none;
    }
    <button id="load">click to load voices</button>
    <select id="voices">
    </select>