Search code examples
androidspeech-recognitionspeechpocketsphinxpocketsphinx-android

Having <s> and </s> as an output from android pocketsphinx


We're using pocketsphinx to help us convert .wav files into text files. We don't know why it gives us a strange output as it gives us only <s> and </s> after the conversion. We're using the default dictionary, langauge model and acoustic model given by the cmusphinx community.

Here is the code we're using for the conversion:

package com.example.saling_wika.saling_wika;


import android.app.Activity;
import android.net.Uri;
import android.os.AsyncTask;
import android.os.Bundle;
import android.os.Environment;
import android.util.Log;
import android.widget.Toast;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import edu.cmu.pocketsphinx.Assets;
import edu.cmu.pocketsphinx.Config;
import edu.cmu.pocketsphinx.Decoder;
import edu.cmu.pocketsphinx.Segment;

import static android.support.v7.widget.StaggeredGridLayoutManager.TAG;
import static junit.framework.Assert.fail;





public class ConversionModule extends Activity {
    static {
        System.loadLibrary("pocketsphinx_jni");
    }
    Config c;
    Decoder ps;
    FileInputStream stream;

    @Override
    public void onCreate(Bundle state) {
        super.onCreate(state);
        setContentView(R.layout.main);

        c = Decoder.defaultConfig();

    /*Configuring decoder object*/
        c.setString("-hmm", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "en-us-ptm").getPath());
        c.setString("-dict", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "cmudict-en-us.dict").getPath());
        c.setString("-lm", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "weather.dmp").getPath());
        c.setBoolean("-allphone_ci", true);


        ps = new Decoder(c);


        try {
            final File file = new File(AudioToConvert.pathko);
            Uri uri = Uri.fromFile(file);
            File auxFile = new File(uri.getPath());
            stream = new FileInputStream(auxFile);


        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }




        ps.startUtt();
        byte[] b = new byte[4096];
        try {
            int nbytes;
            while ((nbytes = stream.read(b)) >= 0) {
                ByteBuffer bb = ByteBuffer.wrap(b, 0, nbytes);
                short[] s = new short[nbytes / 2];
                bb.asShortBuffer().get(s);
                ps.processRaw(s, nbytes / 2, false, false);
            }
        } catch (IOException e) {

        }
        ps.endUtt();
        //  System.out.println(ps.hyp().getHypstr());
        Toast.makeText(getApplicationContext(), ps.hyp().getHypstr(), Toast.LENGTH_LONG).show();
        for (Segment seg : ps.seg()) {
            //  System.out.println(seg.getWord());
            Toast.makeText(getApplicationContext(),seg.getWord(), Toast.LENGTH_LONG).show();
        }
        ;


    }
}

Solution

  • As explained on CMUSphinx forum you have multiple issues:

    1. You need to add bb.order(ByteOrder.LITTLE_ENDIAN); as described in Give a file as input to Pocketsphinx on Android

    2. Your input file should have PCM format 16khz 16 bit mono. In case you want to submit some encoded file you need to decode it to raw data first.