Search code examples
androidspeech-to-textandroid-speech-api

Getting speech to text Android


I have been refering to Android Speech Recognition as a service on Android 4.1 & 4.2 post to try and implement speech recognition in a service.

I guess I have got it right. On running in my device I am getting "Ready for Speech" toast message which I have declared inside onReadyForSpeech() function.

According to Hoan Nguyen the person who gave the ans for the above post, we can start speaking as soon as onReadyForSpeech() function is called.

My problem is I don't know how to get the speech which we are speaking and convert it to text and where to do it.

Does any one know how to do it? I know its a very lame question to ask but its my first time working with speech recognition. So please bear with me.

Any help on this is very much appreciated. Thanks in advance :)

    public class MyService extends Service
    {
      protected AudioManager mAudioManager; 
      protected SpeechRecognizer mSpeechRecognizer;
      protected Intent mSpeechRecognizerIntent;
      protected final Messenger mServerMessenger = new Messenger(new IncomingHandler(this));

      protected boolean mIsListening;
      protected volatile boolean mIsCountDownOn;

      static final int MSG_RECOGNIZER_START_LISTENING = 1;
      static final int MSG_RECOGNIZER_CANCEL = 2;

    @Override
    public void onCreate()
    {
        super.onCreate();
        mAudioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE); 
        mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
        mSpeechRecognizer.setRecognitionListener(new SpeechRecognitionListener());
        mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                                         RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
                                         this.getPackageName());

       mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
        //Toast.makeText(this, "onCreate", Toast.LENGTH_SHORT).show();
        Log.d("onCreate","Entered");
    }


    protected static class IncomingHandler extends Handler
    {
        private WeakReference<MyService> mtarget;

        IncomingHandler(MyService target)
        {
            mtarget = new WeakReference<MyService>(target);

            Log.d("IncomingHandler","Entered");
        }


        @Override
        public void handleMessage(Message msg)
        {
            Log.d("handleMessage","Entered");

            final MyService target = mtarget.get();

            switch (msg.what)
            {
                case MSG_RECOGNIZER_START_LISTENING:

                    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN)
                    {
                        // turn off beep sound  
                        target.mAudioManager.setStreamMute(AudioManager.STREAM_SYSTEM, true);
                    }
                     if (!target.mIsListening)
                     {
                         target.mSpeechRecognizer.startListening(target.mSpeechRecognizerIntent);
                         target.mIsListening = true;
                         Log.d("TAG", "message start listening"); 
                         //$NON-NLS-1$
                     }
                     break;

                 case MSG_RECOGNIZER_CANCEL:
                      target.mSpeechRecognizer.cancel();
                      target.mIsListening = false;
                      Log.d("TAG", "message canceled recognizer"); //$NON-NLS-1$
                      break;
             }
       } 
    } 

    // Count down timer for Jelly Bean work around
    protected CountDownTimer mNoSpeechCountDown = new CountDownTimer(5000, 5000)
    {

        @Override
        public void onTick(long millisUntilFinished)
        {
            // TODO Auto-generated method stub
            Log.d("onTick","Entered");
        }

        @Override
        public void onFinish()
        {
            Log.d("onFinish","Entered");

            mIsCountDownOn = false;
            Message message = Message.obtain(null, MSG_RECOGNIZER_CANCEL);
            try
            {
                mServerMessenger.send(message);
                message = Message.obtain(null, MSG_RECOGNIZER_START_LISTENING);
                mServerMessenger.send(message);
            }
            catch (RemoteException e)
            {

            }
        }
    };

    @Override
    public int onStartCommand(Intent intent, int flags, int startId) {
        // TODO Auto-generated method stub
        //mSpeechRecognizer.startListening(mSpeechRecognizerIntent);

        try
        {
            Message msg = new Message();
            msg.what = MSG_RECOGNIZER_START_LISTENING; 
            mServerMessenger.send(msg);
        }
        catch (RemoteException e)
        {
            Log.d("msg",""+e);
        }
        return  START_NOT_STICKY;
        //return super.onStartCommand(intent, flags, startId);
    }

    @Override
    public void onDestroy()
    {
        super.onDestroy();

        if (mIsCountDownOn)
        {
            mNoSpeechCountDown.cancel();
        }
        if (mSpeechRecognizer != null)
        {
            mSpeechRecognizer.destroy();
        }

        Log.d("onDestroy","Entered");
    }

    protected class SpeechRecognitionListener implements RecognitionListener
    {

        private static final String TAG = "Sppech---->";

        @Override
        public void onBeginningOfSpeech()
        {
            // speech input will be processed, so there is no need for count down anymore
            if (mIsCountDownOn)
            {
                mIsCountDownOn = false;
                mNoSpeechCountDown.cancel();
            }               
            //Log.d(TAG, "onBeginingOfSpeech"); //$NON-NLS-1$
            Log.d("onBeginningOfSpeech","Entered");
        }

        @Override
        public void onBufferReceived(byte[] buffer)
        {
            String sTest = "";
            Log.d("onBufferReceived","Entered");
        }

        @Override
        public void onEndOfSpeech()
        {
            //Log.d(TAG, "onEndOfSpeech"); //$NON-NLS-1$
            Log.d("onEndOfSpeech","Entered");
         }

        @Override
        public void onError(int error)
        {
            if (mIsCountDownOn)
            {
                mIsCountDownOn = false;
                mNoSpeechCountDown.cancel();
            }
             mIsListening = false;
             Message message = Message.obtain(null, MSG_RECOGNIZER_START_LISTENING);
             try
             {
                    mServerMessenger.send(message);
             }
             catch (RemoteException e)
             {

             }
            //Log.d(TAG, "error = " + error); //$NON-NLS-1$
             Log.d("onError","Entered");
        }

        @Override
        public void onEvent(int eventType, Bundle params)
        {

        }

        @Override
        public void onPartialResults(Bundle partialResults)
        {

        }

        @Override
        public void onReadyForSpeech(Bundle params)
        {
            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN)
            {
                mIsCountDownOn = true;
                mNoSpeechCountDown.start();
                mAudioManager.setStreamMute(AudioManager.STREAM_SYSTEM, false);
            }
            //Log.d("TAG", "onReadyForSpeech"); 
            Toast.makeText(getApplicationContext(), "Ready for Speech", Toast.LENGTH_SHORT).show();
            Log.d("onReadyForSpeech","Entered");//$NON-NLS-1$
        }

        @Override
        public void onResults(Bundle results)
        {
            //Log.d(TAG, "onResults"); //$NON-NLS-1$

        }

        @Override
        public void onRmsChanged(float rmsdB)
        {

        }



    }

    @Override
    public IBinder onBind(Intent intent) {
        // TODO Auto-generated method stub
        return null;
    }
}

Solution

  • You get it in onResult(Bundle result), which you can then get what the user speak into an ArrayList

    ArrayList<String> matches = result.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);