Search code examples
androidspeech-to-textspeech

Google Speech to Text dialog stuck


I am developing an app in which I want to use transliteration from Hindi to English via speech input. for that, I am using google STT API. Everything works when my voice input is short, but when I give long voice input, Dialog gets stuck at "Try Saying Something..." and I don't get results a well.

This is my Main Activity:-

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

    //  Record Button
    AppCompatButton RecordBtn;

    //  TextView to show Original and recognized Text
    TextView Original,result;

    // Request Code for STT
    private final int SST_REQUEST_CODE = 101;

    //  Conversion Table Object...
    ConversionTable conversionTable;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        Original = findViewById(R.id.Original_Text);
        RecordBtn = findViewById(R.id.RecordBtn);
        result = findViewById(R.id.Recognized_Text);

        RecordBtn.setOnClickListener(this);
    }

    @Override
    public void onClick(View v) {
        switch (v.getId()) {
            case R.id.RecordBtn:
                Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);

                //  For 30 Sec it will Record...
                intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 30);

                //  Use Off line Recognition Engine only...
                intent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, false);

                //  Use Hindi Speech Recognition Model...
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, "hi-IN");

                try {
                    startActivityForResult(intent, SST_REQUEST_CODE);
                } catch (ActivityNotFoundException a) {
                    Toast.makeText(getApplicationContext(),
                            getString(R.string.error),
                            Toast.LENGTH_SHORT).show();
                }

                break;
        }
    }

    @Override
    protected void onActivityResult(int requestCode, int resultCode, Intent data) {
        super.onActivityResult(requestCode, resultCode, data);
        switch (requestCode) {
            case SST_REQUEST_CODE:
                if (resultCode == RESULT_OK && null != data) {
                    ArrayList<String> getResult = data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
                    Original.setText(getResult.get(0));
                    conversionTable = new ConversionTable();
                    String Transformed_String = conversionTable.transform(getResult.get(0));
                    result.setText(Transformed_String);
                }
                break;
        }
    }
}

My ConversationTable.class :-

package android.example.com.conversion;

import android.util.Log;

import java.util.ArrayList;
import java.util.Hashtable;

public class ConversionTable
{
    private String TAG = "Conversation Table";

    private Hashtable<String,String> unicode;

    private void populateHashTable()
    {
        unicode = new Hashtable<>();

        // unicode
        unicode.put("\u0901","rha"); // anunAsika - cchandra bindu, using ~ to // *
        unicode.put("\u0902","n"); // anusvara
        unicode.put("\u0903","ah"); // visarga

        unicode.put("\u0940","ee");
        unicode.put("\u0941","u");
        unicode.put("\u0942","oo");
        unicode.put("\u0943","rhi");
        unicode.put("\u0944","rhee");   //  * = Doubtful Case
        unicode.put("\u0945","e");
        unicode.put("\u0946","e");
        unicode.put("\u0947","e");
        unicode.put("\u0948","ai");
        unicode.put("\u0949","o");
        unicode.put("\u094a","o");
        unicode.put("\u094b","o");
        unicode.put("\u094c","au");

        unicode.put("\u094d","");
        unicode.put("\u0950","om");

        unicode.put("\u0958","k");
        unicode.put("\u0959","kh");
        unicode.put("\u095a","gh");
        unicode.put("\u095b","z");
        unicode.put("\u095c","dh");    // *
        unicode.put("\u095d","rh");
        unicode.put("\u095e","f");

        unicode.put("\u095f","y");
        unicode.put("\u0960","ri");
        unicode.put("\u0961","lri");
        unicode.put("\u0962","lr");       //  *
        unicode.put("\u0963","lree");     //  *

        unicode.put("\u093E","aa");
        unicode.put("\u093F","i");

        //  Vowels and Consonants...
        unicode.put("\u0905","a");
        unicode.put("\u0906","a");
        unicode.put("\u0907","i");
        unicode.put("\u0908","ee");
        unicode.put("\u0909","u");
        unicode.put("\u090a","oo");
        unicode.put("\u090b","ri");
        unicode.put("\u090c","lri"); // *
        unicode.put("\u090d","e"); // *
        unicode.put("\u090e","e"); // *
        unicode.put("\u090f","e");
        unicode.put("\u0910","ai");
        unicode.put("\u0911","o");
        unicode.put("\u0912","o");
        unicode.put("\u0913","o");
        unicode.put("\u0914","au");

        unicode.put("\u0915","k");
        unicode.put("\u0916","kh");
        unicode.put("\u0917","g");
        unicode.put("\u0918","gh");
        unicode.put("\u0919","ng");
        unicode.put("\u091a","ch");
        unicode.put("\u091b","chh");
        unicode.put("\u091c","j");
        unicode.put("\u091d","jh");
        unicode.put("\u091e","ny");
        unicode.put("\u091f","t"); // Ta as in Tom
        unicode.put("\u0920","th");
        unicode.put("\u0921","d"); // Da as in David
        unicode.put("\u0922","dh");
        unicode.put("\u0923","n");
        unicode.put("\u0924","t"); // ta as in tamasha
        unicode.put("\u0925","th"); // tha as in thanks
        unicode.put("\u0926","d"); // da as in darvaaza
        unicode.put("\u0927","dh"); // dha as in dhanusha
        unicode.put("\u0928","n");
        unicode.put("\u0929","nn");
        unicode.put("\u092a","p");
        unicode.put("\u092b","ph");
        unicode.put("\u092c","b");
        unicode.put("\u092d","bh");
        unicode.put("\u092e","m");
        unicode.put("\u092f","y");
        unicode.put("\u0930","r");
        unicode.put("\u0931","rr");
        unicode.put("\u0932","l");
        unicode.put("\u0933","ll"); // the Marathi and Vedic 'L'
        unicode.put("\u0934","lll"); // the Marathi and Vedic 'L'
        unicode.put("\u0935","v");
        unicode.put("\u0936","sh");
        unicode.put("\u0937","ss");
        unicode.put("\u0938","s");
        unicode.put("\u0939","h");

        // represent it\
        //  unicode.put("\u093c","'"); // avagraha using "'"
        //  unicode.put("\u093d","'"); // avagraha using "'"
        unicode.put("\u0969","3"); // 3 equals to pluta
        unicode.put("\u014F","Z");// Z equals to upadhamaniya
        unicode.put("\u0CF1","V");// V equals to jihvamuliya....but what character have u settled for jihvamuliya
     /*   unicode.put("\u0950","Ω"); // aum
        unicode.put("\u0958","κ"); // Urdu qaif
        unicode.put("\u0959","Κ"); //Urdu qhe
        unicode.put("\u095A","γ"); // Urdu gain
        unicode.put("\u095B","ζ"); //Urdu zal, ze, zoe
        unicode.put("\u095E","φ"); // Urdu f
        unicode.put("\u095C","δ"); // Hindi 'dh' as in padh
        unicode.put("\u095D","Δ"); // hindi dhh*/
        unicode.put("\u0926\u093C","τ"); // Urdu dwad
        unicode.put("\u0924\u093C","θ"); // Urdu toe
        unicode.put("\u0938\u093C","σ"); // Urdu swad, se
    }

    ConversionTable()
    {
        populateHashTable();
    }

    public String transform(String s1)
    {

        StringBuilder transformed = new StringBuilder();

        int strLen = s1.length();
        ArrayList<String> shabda = new ArrayList<>();
        String lastEntry = "";

        for (int i = 0; i < strLen; i++)
        {
            char c = s1.charAt(i);
            String varna = String.valueOf(c);

            Log.d(TAG, "transform: " + varna + "\n");

            String halant = "0x0951";

            if (VowelUtil.isConsonant(varna))
            {
                Log.d(TAG, "transform: " + unicode.get(varna));
                shabda.add(unicode.get(varna));
                shabda.add(halant); //halant
                lastEntry = halant;
            }

            else if (VowelUtil.isVowel(varna))
            {
                Log.d(TAG, "transform: " + "Vowel Detected...");
                if (halant.equals(lastEntry))
                {
                    if (varna.equals("a"))
                    {
                        shabda.set(shabda.size() - 1,"");
                    }
                    else
                    {
                        shabda.set(shabda.size() - 1, unicode.get(varna));
                    }
                }

                else
                {
                    shabda.add(unicode.get(varna));
                }
                lastEntry = unicode.get(varna);
            } // end of else if is-Vowel

            else if (unicode.containsKey(varna))
            {
                shabda.add(unicode.get(varna));
                lastEntry = unicode.get(varna);
            }
            else
            {
                shabda.add(varna);
                lastEntry = varna;
            }

        } // end of for

        for (String string: shabda)
        {
            transformed.append(string);
        }

        //Discard the shabda array
        shabda = null;
        return transformed.toString(); // return transformed;
    }

}

My ViewUtil Class:-

package android.example.com.conversion;

public class VowelUtil {

    protected static boolean isVowel(String strVowel) {
        // Log.logInfo("came in is_Vowel: Checking whether string is a Vowel");
        return strVowel.equals("a") || strVowel.equals("aa") || strVowel.equals("i") || strVowel.equals("ee") ||
                strVowel.equals("u") || strVowel.equals("oo") || strVowel.equals("ri") || strVowel.equals("lri") || strVowel.equals("e")
                || strVowel.equals("ai") || strVowel.equals("o") || strVowel.equals("au") || strVowel.equals("om");
    }

    protected static boolean isConsonant(String strConsonant) {
        // Log.logInfo("came in is_consonant: Checking whether string is a
        // consonant");
        return strConsonant.equals("k") || strConsonant.equals("kh") || strConsonant.equals("g")
                || strConsonant.equals("gh") || strConsonant.equals("ng") || strConsonant.equals("ch") || strConsonant.equals("chh") || strConsonant.equals("j")
                || strConsonant.equals("jh") || strConsonant.equals("ny") || strConsonant.equals("t") || strConsonant.equals("th") ||
                strConsonant.equals("d") || strConsonant.equals("dh") || strConsonant.equals("n") || strConsonant.equals("nn") || strConsonant.equals("p") ||
                strConsonant.equals("ph") || strConsonant.equals("b") || strConsonant.equals("bh") || strConsonant.equals("m") || strConsonant.equals("y") ||
                strConsonant.equals("r") || strConsonant.equals("rr") || strConsonant.equals("l") || strConsonant.equals("ll") || strConsonant.equals("lll") ||
                strConsonant.equals("v") || strConsonant.equals("sh") || strConsonant.equals("ss") || strConsonant.equals("s") || strConsonant.equals("h") ||
                strConsonant.equals("3") || strConsonant.equals("z") || strConsonant.equals("v") || strConsonant.equals("Ω") ||
                strConsonant.equals("κ") || strConsonant.equals("K") || strConsonant.equals("γ") || strConsonant.equals("ζ") || strConsonant.equals("φ") ||
                strConsonant.equals("δ") || strConsonant.equals("Δ") || strConsonant.equals("τ") || strConsonant.equals("θ") || strConsonant.equals("σ");
    }
}

Outputs :-

for Short voice input :-

Short Input

for Long voice Input, it stuck and can't get the result:- enter image description here


Solution

  • The Problem is in Google's Implementation. I was facing the same type of difficulty and tried all the things, but did not work on anything.

    So, i went on another way to solve this problem and the solution is implementing listeners by yourself. Here is my code for the same, it never popup the Inbuilt dialog (You can implement your custom dialog), but it works like charm.

    Here is how you can do it :

    public class MainActivity extends AppCompatActivity implements View.OnClickListener {
    
        //  Record Button
        AppCompatButton RecordBtn;
    
        //  TextView to show Original
        TextView Original;
    
        //  SpeechRecognizer Object...
        private SpeechRecognizer speechRecognizer;
    
        //  For TAG
        private String TAG = getClass().getName();
    
        //  RecognizerIntent
        private Intent recognizerIntent;
    
        //  Request Code for Permission
        private static final int REQUEST_CODE_RECORD_AUDIO = 100;
    
        @Override
        protected void onCreate(Bundle savedInstanceState) {
            super.onCreate(savedInstanceState);
            setContentView(R.layout.activity_main);
    
            Original = findViewById(R.id.Original_Text);
            RecordBtn = findViewById(R.id.RecordBtn);
    
            recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_PROMPT, R.string.record);
    
            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
                recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, false);
            }
    
            //  For 30 Sec it will Record...
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 30);
    
            //  Use Hindi Speech Recognition Model...
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, "hi-IN");
    
            speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
    
            //  Permission Dialog
            Askpermission();
    
            RecordBtn.setOnClickListener(this);
        }
    
        private void Askpermission() {
            // No explanation needed; request the permission
            ActivityCompat.requestPermissions(this,
                    new String[]{Manifest.permission.RECORD_AUDIO},
                    REQUEST_CODE_RECORD_AUDIO);
        }
    
        @Override
        public void onRequestPermissionsResult(int requestCode,
                                               @NonNull String permissions[], @NonNull int[] grantResults) {
            switch (requestCode) {
                case REQUEST_CODE_RECORD_AUDIO: {
                    // If request is cancelled, the result arrays are empty.
                    if (grantResults.length > 0
                            && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                        Operation();
                    } else {
                        Toast.makeText(MainActivity.this, "Permission denied to read your External storage", Toast.LENGTH_SHORT).show();
                    }
                }
            }
        }
    
        @Override
        public void onClick(View v) {
            switch (v.getId()) {
                case R.id.RecordBtn:
                    Log.d(TAG, "onClick: ");
                    if (checkPermission()) {
                        if (IsAvailable(this)) {
                            Log.d(TAG, "Speech Recognition Service Available...");
                            speechRecognizer.startListening(recognizerIntent);
                        } else {
                            Toast.makeText(this, "Speech Recognition Service not Available on your device...",
                                    Toast.LENGTH_SHORT)
                                    .show();
                        }
                    } else {
                        Askpermission();
                    }
                    break;
            }
        }
    
        //  Check if Speech recognition Service is Available on the Smartphone...
        private boolean IsAvailable(Context context) {
            return SpeechRecognizer.isRecognitionAvailable(context);
        }
    
        @Override
        protected void onDestroy() {
            super.onDestroy();
            speechRecognizer.destroy();
        }
    
        // Check Audio Permission
        private boolean checkPermission() {
            return ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) ==
                    PackageManager.PERMISSION_GRANTED;
        }
    
        // Start Operation
        private void Operation() {
            speechRecognizer.setRecognitionListener(new RecognitionListener() {
                @Override
                public void onReadyForSpeech(Bundle params) {
                    Log.d(TAG, "Audio Service is connected to Servers....");
                    Log.d(TAG, "You can now start your speech...");
                }
    
                @Override
                public void onBeginningOfSpeech() {
                    Log.d(TAG, "User has started speech...");
                }
    
                @Override
                public void onRmsChanged(float rmsdB) {
    
                }
    
                @Override
                public void onBufferReceived(byte[] buffer) {
    
                }
    
                @Override
                public void onEndOfSpeech() {
                    Log.d(TAG, "User has Finished... speech...");
                }
    
                @Override
                public void onError(int error) {
                    Log.d(TAG, "onError: " + error);
                    switch (error){
                        case SpeechRecognizer.ERROR_AUDIO:
                            Toast.makeText(MainActivity.this, "Error Recording Audio...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_CLIENT:
                            Toast.makeText(MainActivity.this, "Client Side Error...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                            Toast.makeText(MainActivity.this, "Insufficient permissions...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_NETWORK:
                            Toast.makeText(MainActivity.this, "Network Related Error...", Toast
                                    .LENGTH_SHORT).show();
                            break;
    
                        case SpeechRecognizer.ERROR_NO_MATCH:
                            Toast.makeText(MainActivity.this, "Please Installed Offline Hindi " +
                                    "Language Data...", Toast.LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                            Toast.makeText(MainActivity.this, "Recognition Busy...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_SERVER:
                            Toast.makeText(MainActivity.this, "Please Installed Offline Hindi " +
                                    "Language Data...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                            Toast.makeText(MainActivity.this, "Speech Timeout...", Toast
                                    .LENGTH_SHORT).show();
                            break;
                        case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                            Toast.makeText(MainActivity.this, "Network Timeout Error...", Toast
                                    .LENGTH_SHORT).show();
                    }
                }
    
                @Override
                public void onResults(Bundle results) {
                    ArrayList<String> Results = results.getStringArrayList(SpeechRecognizer
                            .RESULTS_RECOGNITION);
    
                    if (Results != null) {
                        Original.setText(Results.get(0));
                    }
                }
    
                @Override
                public void onPartialResults(Bundle partialResults) {
    
                }
    
                @Override
                public void onEvent(int eventType, Bundle params) {
    
                }
            });
        }
    
    }