Search code examples
javaandroidartificial-intelligencetext-to-speechgoogle-cloud-speech

Google Cloud Speech API for Android


Currently I`m working on a project where i have to use Google Cloud Speech Api and TextToSpeech. I try-ed to work around with RecognizerIntent but i would like to give a try to Cloud Speech .

Would be great some tutorial material or guide , i checked the sample app but i`m looking for tutorial , guide anything that could explain something.

Here is my work around with TTS and RecognizerIntent .

  private TextToSpeech tts;
private TextToSpeech secondTTS;
private TextView speechInputTextView,correctAnswerTextView,wrongAnswerTextView,currentQuestionTextView;
private ArrayList<String> correctAnswersArrayList, questionArrayList, sayCorrectArrayList, sayWrongArrayList ,toSay ,toASk;
private MediaPlayer mediaPlayer;
private DBHelper dbHelper;
private SQLiteDatabase sqlDB;
private int correctACount,wrongACount,currentQuestion, Unit;
private boolean isStarted;
private String currentLanguage ;
private static int TOTAL_QUESITONS;
private final static int REQ_CODE_SPEECH_INPUT = 100;
private final static String PAUSE_COMMAND = "pos";
private final static String STOP_COMMAND = "stop";
private final static String RESTART_COMMNAD = "restart";
private final static String REPEAT_COMMAND = "repeat";
private final static String EXIT_COMMAND = "exit";
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_unit);
    isStarted = true;
    mediaPlayer = MediaPlayer.create(getApplicationContext(), R.raw.unitonemp3);
    currentQuestion = 0;
    speechInputTextView = (TextView) findViewById(R.id.speechInput);
    correctAnswerTextView = (TextView) findViewById(R.id.correctAnswers_TextView);
    currentQuestionTextView = (TextView) findViewById(R.id.currentQuestion_TextView);
    wrongAnswerTextView = (TextView) findViewById(R.id.wrongAnswer_TextView);

    Unit = 1;
    currentLanguage = getIntent().getBundleExtra("resultBundle").getString("language");


    Button next = (Button) findViewById(R.id.nextButton);
    Button changeUnitButton  = (Button) findViewById(R.id.changeUnitButton);
    Button playButton = (Button) findViewById(R.id.playButton);
    Button pauseButton = (Button) findViewById(R.id.pauseButton);

    playButton.setOnClickListener(new View.OnClickListener() {
        @Override
        public void onClick(View v) {
            startSayWithID(questionArrayList.get(currentQuestion), 1000, "say");
        }
    });

    pauseButton.setOnClickListener(new View.OnClickListener() {
        @Override
        public void onClick(View v) {
            tts.stop();
            secondTTS.stop();
            Intent pauseI = new Intent(UnitActivity.this, PauseActivity.class);
            Bundle resultBundle = new Bundle();
            resultBundle.putInt("npc", currentQuestion);
            pauseI.putExtra("resultBundle", resultBundle);
            startActivity(pauseI);

        }
    });

    tts = new TextToSpeech(this, this);
    secondTTS = new TextToSpeech(this, this);


 changeUnitButton.setOnClickListener(new View.OnClickListener() {
        @Override
        public void onClick(View v) {

            secondTTS.stop();
            tts.stop();
            Unit ++;
            mediaPlayer.start();

        }
    });



    Bundle extras = getIntent().getExtras();
    if (extras != null) {
        currentQuestion = getIntent().getBundleExtra("resultBundle").getInt("npc");

    }

    ImageView micButton = (ImageView) findViewById(R.id.micButton);
    micButton.setOnClickListener(new View.OnClickListener() {
        @Override
        public void onClick(View v) {
            if (!tts.isSpeaking()) {
                currentQuestion = 13;
                startSayWithID(questionArrayList.get(currentQuestion), 1000, "questionID");
            }
        }

    });

    String[] sayCorrectList = getResources().getStringArray(R.array.sayCorrect);
    String[] sayWrongList = getResources().getStringArray(R.array.satWrong);

    String[] listToSay = getResources().getStringArray(R.array.toSay);
    String[] listToAsk = getResources().getStringArray(R.array.toAsk);

    toSay = new ArrayList<>(Arrays.asList(listToSay));
    toASk = new ArrayList<>(Arrays.asList(listToAsk));
    questionArrayList = new ArrayList<>();
    correctAnswersArrayList = new ArrayList<>();


    addGerCorrect();
    addEngQuestions();


    sayCorrectArrayList = new ArrayList<>(Arrays.asList(sayCorrectList));
    sayWrongArrayList = new ArrayList<>(Arrays.asList(sayWrongList));


    TOTAL_QUESITONS = questionArrayList.size();
    mediaPlayer.start();

    mediaPlayer.setOnCompletionListener(new MediaPlayer.OnCompletionListener() {
        @Override
        public void onCompletion(MediaPlayer mp) {

            startSayWithID("Welcome",1000,"instruction");
        }
    });



    next.setOnClickListener(new View.OnClickListener() {
        @Override
        public void onClick(View v) {

            for(int i = 0 ; i< questionArrayList.size();i++){
                Log.d(" question  List "," item :"+"pisition "+i+ "" +questionArrayList.get(i));

            }
            currentQuestion++;
            tts.stop();
            secondTTS.stop();

            startSayWithID("",1000,"instruction");
        }

    });

    tts.setOnUtteranceProgressListener(new UtteranceProgressListener() {

        @Override
        public void onStart(String utteranceId) {

        }

        @Override
        public void onDone(final String utteranceId) {
            runOnUiThread(new Runnable() {
                @Override
                public void run() {


                    if (utteranceId.contains("say")) {


                        if (correctAnswersArrayList.get(currentQuestion).contains("tensa23")) {
                            startSayWithID(questionArrayList.get(currentQuestion), 1000, "say");
                            currentQuestion++;
                            Log.d("Current ", "current Question" + currentQuestion + "" + correctAnswersArrayList.get(currentQuestion));
                        }else
                            startSayWithID(questionArrayList.get(currentQuestion), 1000, "question");
                    }

                    if (utteranceId.contains("instruction")) {

                        if (correctAnswersArrayList.get(currentQuestion).contains("tensa23")) {
                            startSayWithID(questionArrayList.get(currentQuestion), 1000, "say");
                            currentQuestion++;
                            Log.d("Current ","current Question"+currentQuestion +""+correctAnswersArrayList.get(currentQuestion));
                        } else if (questionArrayList.get(currentQuestion).contains("?")) {
                            startSayWithID(toASk.get(new Random().nextInt(toASk.size())), 1000, "say");

                        } else {
                            startSayWithID(toSay.get(new Random().nextInt(toSay.size())), 1000, "say");
                        }
                    }


                    if (utteranceId.contains("question")) {

                        if(questionArrayList.get(currentQuestion).contains("?")){
                            startSayWithID("in Spanish you ask",1000,"german");
                        }else{
                            startSayWithID("In Spanish you say",1000,"german");
                        }
                    }


                    if (utteranceId.contains("german")) {
                        secondTTS.speak(correctAnswersArrayList.get(currentQuestion),TextToSpeech.QUEUE_FLUSH,null,"ask");
                    }
                    if(utteranceId.contains("ask")){
                        startAsk(1000);
                    }


                }

            });
        }


        @Override
        public void onError(String utteranceId) {

        }

    });

    secondTTS.setOnUtteranceProgressListener(new UtteranceProgressListener() {
        @Override
        public void onStart(String utteranceId) {

        }

        @Override
        public void onDone(final String utteranceId) {
            runOnUiThread(new Runnable() {
                @Override
                public void run() {
                    if(utteranceId.contains("ask")){
                        startAsk(1000);
                    }
                }
            });

        }

        @Override
        public void onError(String utteranceId) {

        }
    });




    // end of MainActivity
}


private void promptSpeechInput() {
    Intent prompIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
    prompIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, "es-ES");
    prompIntent.putExtra(RecognizerIntent.EXTRA_PROMPT, "How do you say \n" +questionArrayList.get(currentQuestion));
    try {
        startActivityForResult(prompIntent, REQ_CODE_SPEECH_INPUT);
    } catch (ActivityNotFoundException a) {

        makeText(getApplicationContext(), "speech not supported", Toast.LENGTH_SHORT).show();
    }
}

@Override
public void onInit(int status) {
    if (status == TextToSpeech.SUCCESS) {
        tts.setLanguage(Locale.US);

        switch (currentLanguage){
            case "Spanish" :
                secondTTS.setLanguage(new Locale("es","Es"));
                break;
            case "Italian" :
                secondTTS.setLanguage(Locale.ITALY);
                break;
            case "German" :
                secondTTS.setLanguage(Locale.GERMAN);
                break;
            case "French" :
                secondTTS.setLanguage(Locale.FRENCH);
                break;
        }
    }

}

protected void onActivityResult(int requestCode, int resultCode, Intent data) {
    super.onActivityResult(requestCode, resultCode, data);
    switch (requestCode) {
        case REQ_CODE_SPEECH_INPUT: {
            if (resultCode == RESULT_OK && null != data) {
                ArrayList<String> result = data
                        .getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);

                speechInputTextView.setText(result.get(0));
            }
        }
        String inputSpeechToString = speechInputTextView.getText().toString().toLowerCase();



        if (currentQuestion < TOTAL_QUESITONS && inputSpeechToString.contains(correctAnswersArrayList.get(currentQuestion))) {
            currentQuestion++;
            correctACount++;

            correctAnswerTextView.setText(String.valueOf(correctACount));
            currentQuestionTextView.setText(String.valueOf(currentQuestion));


            Log.d("Onactivity ", "CurrentQ = " + currentQuestion);
            startSayWithID(sayCorrectArrayList.get(new Random().nextInt(sayCorrectArrayList.size())), 1000, "instruction");


        } else if (inputSpeechToString.contains(STOP_COMMAND)) {
            Intent stopIntent = new Intent(UnitActivity.this, PauseActivity.class);
            Bundle resultBundle = new Bundle();
            resultBundle.putBoolean("isStarted", isStarted);
            stopIntent.putExtra("resultBundle", resultBundle);
            startActivity(stopIntent);

        } else if (inputSpeechToString.contains(PAUSE_COMMAND)) {
            Intent pauseI = new Intent(UnitActivity.this, PauseActivity.class);
            Bundle resultBundle = new Bundle();
            resultBundle.putInt("npc", currentQuestion);
            pauseI.putExtra("resultBundle", resultBundle);
            startActivity(pauseI);
        } else if (inputSpeechToString.contains(RESTART_COMMNAD)) {
            currentQuestion = 0;
            startSayWithID("Restarted", 1000, "say");
        } else if (inputSpeechToString.contains(REPEAT_COMMAND)) {
            startSayWithID(questionArrayList.get(currentQuestion), 1000, "question");
        } else if (inputSpeechToString.contains(EXIT_COMMAND)) {
            Intent homeIntent = new Intent(Intent.ACTION_MAIN);
            homeIntent.addCategory(Intent.CATEGORY_HOME);
            homeIntent.setFlags(Intent.FLAG_ACTIVITY_CLEAR_TOP);
            startActivity(homeIntent);


        } else {
            startSayWithID(sayWrongArrayList.get(new Random().nextInt(sayWrongArrayList.size())), 1000, "instruction");
            wrongACount++;
            wrongAnswerTextView.setText(String.valueOf(wrongACount));
            Log.d("Onactivity ", "CORRECT = " + correctAnswersArrayList.get(currentQuestion));
            Log.d("Onactivity ", "You said :  " + inputSpeechToString);
        }


    }
}


private void addEngQuestions() {
    dbHelper = new DBHelper(this);
    sqlDB = dbHelper.getReadableDatabase();
    String queryEngQuestion = "SELECT English  FROM " +currentLanguage+ " WHERE " + "Unit = " +Unit+  " ORDER BY Unit ASC";
    Cursor cursor = sqlDB.rawQuery(queryEngQuestion, null);
    try {
        cursor.moveToFirst();
        while (!cursor.isAfterLast()) {
            questionArrayList.add(cursor.getString(cursor.getColumnIndex("English")));

            cursor.moveToNext();

        }
    } finally {
        cursor.close();
    }
    Log.d("Line 255", " English Arraylist" + questionArrayList.size());
}


private void addGerCorrect() {
    dbHelper = new DBHelper(this);
    sqlDB = dbHelper.getReadableDatabase();
    String queryGerCOrrect = "SELECT "+ currentLanguage  +" FROM "+ currentLanguage + " WHERE "+  "Unit = "+Unit+ " ORDER BY Unit ASC";
    Cursor cursor2 = sqlDB.rawQuery(queryGerCOrrect, null);
    try {
        cursor2.moveToFirst();
        while (!cursor2.isAfterLast()) {
            correctAnswersArrayList.add(cursor2.getString(cursor2.getColumnIndex(currentLanguage))
                    .replaceAll("\\p{P}", "").toLowerCase());

            cursor2.moveToNext();
        }
    } finally {
        cursor2.close();
    }
}


private void startSayWithID(final String text, int mSeconds, final String ID) {
    final Handler handler = new Handler();
    handler.postDelayed(new Runnable() {
        @Override
        public void run() {
            tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, ID);
        }
    }, mSeconds);
}

private void startAsk(int seconds) {
    final Handler handler = new Handler();
    handler.postDelayed(new Runnable() {
        @Override
        public void run() {

            promptSpeechInput();

        }
    }, seconds);
}

@Override
protected void onDestroy() {
    if (mediaPlayer != null) {
        mediaPlayer.stop();
        mediaPlayer.release();
    }

    if (tts != null) {
        tts.stop();
        tts.shutdown();
    }
    if (secondTTS != null) {
        secondTTS.stop();
        secondTTS.shutdown();
    }
    super.onDestroy();
}

Solution

  • Setting up Google Speech cloud on Android is not a straightforward 1,2,3 process, but I will give you some guidance.

    1. Download the Sample project from here, use the Speech example. https://github.com/GoogleCloudPlatform/android-docs-samples/tree/master/speech/Speech
    2. Setup a google cloud project, enable the Speech API, and link it to your gmail account's billing (you get 60min of free speech recognition every month).
    3. Generate an authentication json, and put it into the "raw" folder of the sample project.
    4. Setup Google cloud on your computer and obtain an access token. Insert that access token on your SpeechService.java class.

    *Documentation on steps 3 and 4: https://cloud.google.com/speech/docs/getting-started

    *If you run into problems when trying to mimic the sample project into your own project, check this: Cannot import com.google.cloud.speech.v1.SpeechGrpc in Android

    The exact steps are too long to list, I can't even remember them all, if you run into specific trouble let me know.