Search code examples
javaandroidreal-timespeech-to-text

Web Speech API - Android


I want to implement a functionality like the Web Speech API (only works in Chrome) in an Android app.

I mean, a real time speech to text app.

Is there any framework or libraries to achieve this?


Solution

  • i'm just working in an app that uses google's speach to text , they are pretty accurate... although i made a Class that makes improves upon the result. you will need to add premission to use the Internet in your manifest.

    here is the Class from my App that take care of STT. by the way it is possible not to use the default dialog of google, if you are intrested i have an example .

    import java.util.ArrayList;
    import java.util.List;
    
    import android.app.Activity;
    import android.content.Intent;
    import android.content.pm.PackageManager;
    import android.content.pm.ResolveInfo;
    import android.speech.RecognizerIntent;
    public class STT 
    {
        Activity theCallerActivity;
        static final int THREE_RESULTS=3;
    
    
    boolean audioOk=true;
    boolean clienOk=true;
    boolean networkOk=true;
    boolean matchOk=true;
    boolean serverOk=true;
    
    private static final int VOICE_RECOGNITION_REQUEST_CODE = 9999;
    
    public STT(Activity activity)
    {
        theCallerActivity=activity;
    }
    
    public boolean isVoiceRecognitionAvailable() // Check if voice recognition is present
    {  
        PackageManager pm = theCallerActivity.getPackageManager();
        List<ResolveInfo> activities = pm.queryIntentActivities(new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH), 0);
        if (activities.size() == 0) 
            return false;
        return true;     
    }
    public void speak()  
    {
        final Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, getClass().getPackage().getName());
        intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH);
        intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, THREE_RESULTS);
        new Thread()
        {
            public void run()
            {
                theCallerActivity.startActivityForResult(intent, VOICE_RECOGNITION_REQUEST_CODE);
            }
        }.start();
    }
    
    public String toPerformInOnActivityResult(int requestCode,int resultCode,Intent data)
    {
        ArrayList<String> textMatchList;
    
        if (requestCode == VOICE_RECOGNITION_REQUEST_CODE)
        {
            if(resultCode == Activity.RESULT_OK)
            {
                 textMatchList=data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
    
    
                if (!textMatchList.isEmpty())
                {
                    return textMatchList.get(0);
    
                }
                else if(resultCode == RecognizerIntent.RESULT_AUDIO_ERROR)
                    audioOk=false;
                else if(resultCode == RecognizerIntent.RESULT_CLIENT_ERROR)
                    clienOk=false;
                else if(resultCode == RecognizerIntent.RESULT_NETWORK_ERROR)
                    networkOk=false;
                else if(resultCode == RecognizerIntent.RESULT_NO_MATCH)
                    matchOk=false;
                else if(resultCode == RecognizerIntent.RESULT_SERVER_ERROR)
                    serverOk=false;
    
            }
    
        }
        return null;
    }
    

    }

    have fun ...

    here is an example of how to use STT without the default dialog:

    package com.example.speech_to_text_experiments;
    
    import java.util.ArrayList;
    
    import android.os.Bundle;
    import android.app.Activity;
    import android.content.Intent;
    import android.speech.RecognitionListener;
    import android.speech.RecognizerIntent;
    import android.speech.SpeechRecognizer;
    import android.view.Menu;
    import android.view.View;
    import android.widget.ArrayAdapter;
    import android.widget.Button;
    import android.widget.ListView;
    import android.widget.TextView;
    import android.widget.Toast;
    
    public  class MainActivity extends Activity  implements RecognitionListener
    {
    
     private ListView wordsList;
     Button btn;
     TextView tv;
    private SpeechRecognizer mSpeechRecognizer;
    private Intent mSpeechRecognizerIntent; 
    
    
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.main);
        mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
        mSpeechRecognizer.setRecognitionListener(this);
        mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                                         RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
                                         this.getPackageName());
        btn=(Button)findViewById(R.id.button1);
    
        wordsList = (ListView) findViewById(R.id.listView1); 
    
       mSpeechRecognizer.setRecognitionListener(new RecognitionListener() {
    
        @Override
        public void onRmsChanged(float rmsdB) {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onResults(Bundle results) {
            // TODO Auto-generated method stub
            ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
    
    
            wordsList.setAdapter(new ArrayAdapter<String>(MainActivity.this, android.R.layout.simple_list_item_1,  matches));
    
                Toast.makeText(MainActivity.this, "Recognision OK!!!", Toast.LENGTH_SHORT).show(); 
    
        }
    
        @Override
        public void onReadyForSpeech(Bundle params) {
            // TODO Auto-generated method stub
            Toast.makeText(MainActivity.this, "Voice recording starts", Toast.LENGTH_SHORT).show();
        }
    
        @Override
        public void onPartialResults(Bundle partialResults) {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onEvent(int eventType, Bundle params) {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onError(int error) {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onEndOfSpeech() {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onBufferReceived(byte[] buffer) {
            // TODO Auto-generated method stub
    
        }
    
        @Override
        public void onBeginningOfSpeech() {
            // TODO Auto-generated method stub
    
        }
    });
    
    
    
        btn.setOnClickListener(new View.OnClickListener() {
    
            @Override
            public void onClick(View v) {
                // TODO Auto-generated method stub
    
                 mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
            }
        });
    
    
    
    }
    
    
        public void onBeginningOfSpeech(){ }
    
            public void onBufferReceived(byte[] buffer){ }
    
            public void onEndOfSpeech(){ }
    
            public void onError(int error){
    
               //mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
    
        }
    
        public void onEvent(int eventType, Bundle params){ }
    
    
        public void onPartialResults(Bundle partialResults){ }
    
    
        public void onReadyForSpeech(Bundle params){
    
    
    
    
        }
    
        public void onResults(Bundle results)
        {
    
            ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
    
    
            wordsList.setAdapter(new ArrayAdapter<String>(this, android.R.layout.simple_list_item_1,  matches));
    
                Toast.makeText(MainActivity.this, "Recognision OK!!!", Toast.LENGTH_SHORT).show(); 
    
        }
    
        public void onRmsChanged(float rmsdB) { }
    
      }