Rao's Rao's - 2 months ago 15
Android Question

Offline voice recognition android taking unwanted voice

I have did a lot for research and tried offline pocket sphinx but it is taking surrounding voice it is taking and reacting my app differently. Is there any Google offline app for above kitkat 4.4.. i am trying it from 2 week. Thank for your valuable answer.

Details: When the activity starts first text has to read the content (Text to speech) after it complete, reading voice recognition has to take voice and as per command ex: (next, previous, forward, option, 1, 2, 3, 4 etc). As per command it has to recognize and react to it in onresult method.

Error: i am getting error after taking some voice

06-18 19:54:00.159: V/onBeginningOfSpeech(3360): onBeginningOfSpeech
06-18 19:54:01.024: V/onPartialResult(3360): option
06-18 19:54:01.109: I/cmusphinx(3360): INFO: fsg_search.c(843): 105 frames, 5333 HMMs (50/fr), 7748 senones (73/fr), 371 history entries (3/fr)
06-18 19:54:01.110: I/SpeechRecognizer(3360): Stop recognition
06-18 19:54:01.110: E/cmusphinx(3360): ERROR: "fsg_search.c", line 913: Final result does not match the grammar in frame 105
06-18 19:54:01.111: V/onPartialResult-->(3360): option
06-18 19:54:01.111: V/onResult(3360): onResult


Android Hive offline link

But if I turn off internet it is not working offline.

In pocketsphinx it is not reacting to correct word. If I say "next" it takes near by voice content of another appends the "key" and it gives me lot of problem. Is there any solution or library to use offline. Any offline google offline voice support.

Below is my code what is tried

package com.example.sample1;

import static edu.cmu.pocketsphinx.SpeechRecognizerSetup.defaultSetup;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
import edu.cmu.pocketsphinx.RecognitionListener;
import android.app.Activity;
import android.graphics.Color;
import android.os.AsyncTask;
import android.os.Bundle;
import android.os.Handler;
import android.speech.tts.TextToSpeech;
import android.speech.tts.TextToSpeech.OnUtteranceCompletedListener;
import android.util.Log;
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.Button;
import android.widget.ExpandableListView;
import android.widget.ExpandableListView.OnChildClickListener;
import android.widget.ExpandableListView.OnGroupClickListener;
import android.widget.ExpandableListView.OnGroupExpandListener;
import android.widget.TextView;
import android.widget.Toast;
import edu.cmu.pocketsphinx.Assets;
import edu.cmu.pocketsphinx.Hypothesis;
import edu.cmu.pocketsphinx.SpeechRecognizer;

public class Sam extends Activity implements RecognitionListener, TextToSpeech.OnInitListener {

/* Named searches allow to quickly reconfigure the decoder */

private static final String DIGITS_SEARCH = "digits";

private SpeechRecognizer recognizer;
private HashMap<String, Integer> captions;
private TextView caption_text;
private TextView result_text;
ArrayList<String> result1;
private Button buttonLeft;
private Button buttonRight;
int count = 0;
private ArrayList<DataAnswer> dummyListTemp;
private ArrayList<DataAnswer> dummyList;
AnswerDataAdapter listAdapter = null;
int conteo = 0;
Handler a = new Handler();
private TextToSpeech tts;
String readIt ="";

HashMap<String, String> params = new HashMap<String, String>();

@Override
public void onCreate(Bundle state) {
super.onCreate(state);
Log.v("onCreate", "onCreate");

// Prepare the data for UI
captions = new HashMap<String, Integer>();

captions.put(DIGITS_SEARCH, R.string.digits_caption);

setContentView(R.layout.quiz);
caption_text = (TextView) findViewById(R.id.caption_text);
result_text = (TextView) findViewById(R.id.result_text);
// listViewAnswer = (ExpandableListView) findViewById(R.id.listViewAnswer);
buttonRight = (Button) findViewById(R.id.buttonRight);
buttonLeft = (Button) findViewById(R.id.buttonLeft);
result_text.setText("Result --->: ");

tts = new TextToSpeech(this, this);

params.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,"stringId");

String text = "World is full of chanllenge";
//Speakes the text first and then after comple reading text voice recoginzation must start
speakOut(text);




buttonRight.setOnClickListener(new OnClickListener() {
@Override
public void onClick(View v) {
try {
//onClickRight();

Toast.makeText(getApplicationContext(), "Right", Toast.LENGTH_SHORT).show();
} catch (Exception e){
e.printStackTrace();
}


}
});

buttonLeft.setOnClickListener(new OnClickListener() {
@Override
public void onClick(View v) {
//onClickLeft();
Toast.makeText(getApplicationContext(), "Left", Toast.LENGTH_SHORT).show();
}
});


// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
//if(!tts.isSpeaking()) {
new AsyncTask<Void, Void, Exception>() {
@Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(Sam.this);
File assetDir = assets.syncAssets();
Log.v("AsyncTask", "AsyncTask");
setupRecognizer(assetDir);

} catch (IOException e) {
return e;
}
return null;
}

@Override
protected void onPostExecute(Exception result) {
Log.v("onPostExecute", "onPostExecute");
try{
if (result != null) {
//caption_text.setText("Failed to init recognizer " + result);
Toast.makeText(getApplicationContext(), "Failed to init recognizer ", Toast.LENGTH_SHORT).show();
} else {
FireRecognition();
//switchSearch(DIGITS_SEARCH);

}
} catch (Exception e) {
e.printStackTrace();
}
}
}.execute();
}


@Override
public void onDestroy() {
super.onDestroy();
Log.v("onDestroy", "onDestroy");
recognizer.cancel();
recognizer.shutdown();

if (tts != null) {
tts.stop();
tts.shutdown();
}
}

public void FireRecognition(){
Log.d("Recognition","Recognition Started");
//caption_text.setText("Recognition Started!");
//Toast.makeText(getApplicationContext(), "Recognition Started!", Toast.LENGTH_SHORT).show();
recognizer.stop();
//recognizer.startListening("digits");

}

/**
* In partial result we get quick updates about current hypothesis. In
* keyword spotting mode we can react here, in other modes we need to wait
* for final result in onResult.
*/
@Override
public void onPartialResult(Hypothesis hypothesis) {
try {
//Log.v("onPartialResult", "onPartialResult");
if (hypothesis == null)
return;
Log.v("onPartialResult", hypothesis.getHypstr().toString());

String text = hypothesis.getHypstr();
if(recognizer !=null)
recognizer.stop();

caption_text.setText("Partial result -->: " + text);

Log.v("onPartialResult-->", text);

// Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* This callback is called when we stop the recognizer.
*/
@Override
public void onResult(Hypothesis hypothesis) {
try{
Log.v("onResult", "onResult");
// result_text.setText("");
if (hypothesis != null) {
String text = hypothesis.getHypstr();
//Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
// ((TextView) findViewById(R.id.result_text)).setText(text);

if(text.toLowerCase().equals("next")) {
result_text.setText("Result --->: " + text);
Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
} else if(text.toLowerCase().equals("previous")) {
result_text.setText("Result --->: " + text);
Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();

} else if(text.toLowerCase().trim().equals("option one".toLowerCase().trim())) {
result_text.setText("Result --->: " + text);


Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
result_text.setText("Result --->: " + text);
} else if(text.toLowerCase().trim().equals("option two".toLowerCase().toString())) {

Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
} else if(text.toLowerCase().trim().equals("option three".toLowerCase().toString())) {
result_text.setText("Result --->: " + text);
Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
} else if(text.toLowerCase().trim().equals("option four".toLowerCase().toString())) {
result_text.setText("Result --->: " + text);
Toast.makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
} else {
Toast.makeText(getApplicationContext(), " No Access:--" + text, Toast.LENGTH_SHORT).show();
}




Log.v("onResult-->", text);
if(recognizer != null)
recognizer.startListening("digits");
}
} catch (Exception e) {
e.printStackTrace();
}

}

@Override
public void onBeginningOfSpeech() {
Log.v("onBeginningOfSpeech", "onBeginningOfSpeech");

}

/**
* We stop recognizer here to get a final result
*/
@Override
public void onEndOfSpeech() {
Log.v("onEndOfSpeech", "onEndOfSpeech");

if (!recognizer.getSearchName().equals(DIGITS_SEARCH))
switchSearch(DIGITS_SEARCH);
}


private void switchSearch(String searchName) {
Log.v("switchSearch", "switchSearch--->" + searchName);
recognizer.stop();

// If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
if (searchName.equals(DIGITS_SEARCH))
recognizer.startListening(searchName, 10000);
/* else
recognizer.startListening(searchName, 10000);*/

/* String caption = getResources().getString(captions.get(searchName));
caption_text.setText(caption);*/
}

private void setupRecognizer(File assetsDir) throws IOException {

// The recognizer can be configured to perform multiple searches
// of different kind and switch between them
Log.v("setupRecognizer", "setupRecognizer");
recognizer = defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))

// To disable logging of raw audio comment out this call (takes a lot of space on the device)
.setRawLogDir(assetsDir)

// Threshold to tune for keyphrase to balance between false alarms and misses
.setKeywordThreshold(1e-20f) //1e-20f 1e-45f

// Use context-independent phonetic search, context-dependent is too slow for mobile
// .setBoolean("-allphone_ci", true)

.getRecognizer();
recognizer.addListener(this);

/** In your application you might not need to add all those searches.
* They are added here for demonstration. You can leave just one.
*/

// Create keyword-activation search.
// recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);



// Create grammar-based search for digit recognition
File digitsGrammar = new File(assetsDir, "digits.gram");
recognizer.addGrammarSearch(DIGITS_SEARCH, digitsGrammar);

}



@Override
public void onError(Exception error) {
Log.v("onError", "onError");
//caption_text.setText(error.getMessage());
Toast.makeText(getApplicationContext(), error.getMessage(), Toast.LENGTH_SHORT).show();
}

@Override
public void onTimeout() {
Log.v("onTimeout", "onTimeout");
switchSearch(DIGITS_SEARCH);
}


@SuppressWarnings("deprecation")
@Override
public void onInit(int status) {

tts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener() {

@Override
public void onUtteranceCompleted(String utteranceId) {
runOnUiThread(new Runnable() {
@Override
public void run() {
if(recognizer != null)
recognizer.startListening("digits");
//Toast.makeText(getApplicationContext(), "Completed", Toast.LENGTH_LONG).show();
}
});
}
});

if (status == TextToSpeech.SUCCESS) {

int result = tts.setLanguage(Locale.US);


Log.i("Success", "Completed");

if (result == TextToSpeech.LANG_MISSING_DATA || result == TextToSpeech.LANG_NOT_SUPPORTED) {
Log.e("TTS", "This Language is not supported");
} else {
//buttonSpeak.setEnabled(true);
// speakOut();
String text = " No Voice Found".toString();
Log.i("else", "else");
// speakOut(text);
}

} else {
Log.e("TTS", "Initilization Failed!");
}

}


@SuppressWarnings("deprecation")
private void speakOut(String text) {
if(tts.isSpeaking()) {
//recognizer.stop();
}
// String text = ((TextView) findViewById(R.id.caption_text)).getText().toString();
tts.speak(text, TextToSpeech.QUEUE_FLUSH, params);

Log.i("Speaking-->", "****" + tts.isSpeaking());

}



}


And in digit.gram

#JSGF V1.0;

grammar digits;

<digit> = option one |
option two |
option three |
option four |
back |
previous |
next;

public <digits> = <digit>+;


Above is my code if any one worked on it please let me know the solution
Here is my xml quiz.xml

<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="vertical" >

<LinearLayout
android:id="@+id/linearLayout2"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:layout_below="@+id/linearLayout1"
android:orientation="vertical" >

<ExpandableListView
android:id="@+id/listViewAnswer"
android:layout_width="match_parent"
android:layout_height="301dp"
android:focusable="false"
android:focusableInTouchMode="false"
android:childDivider="#334455" >
</ExpandableListView>
</LinearLayout>

<RelativeLayout
android:id="@+id/relativeLayout1"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:layout_below="@+id/linearLayout2"
android:layout_marginTop="10dip"
android:orientation="horizontal" >

<Button
android:id="@+id/buttonRight"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_alignParentRight="true"
android:layout_marginRight="10dip"
android:text="Next" />

<Button
android:id="@+id/buttonLeft"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginLeft="10dip"
android:text="Previous" />
</RelativeLayout>

<RelativeLayout
android:id="@+id/relativeLayout2"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:layout_below="@+id/relativeLayout1" >

<TextView
android:id="@+id/result_text"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:textAppearance="?android:attr/textAppearanceMedium"
android:textColor="#556677" />

<TextView
android:id="@+id/caption_text"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_below="@+id/result_text"
android:layout_alignParentLeft="true"
android:textColor="#443399" />
</RelativeLayout>

</RelativeLayout>



Update:

if u are using addKeywordSearch

File digitsGrammar = new File(context.getFilesDir(), "digits.gram");
recognizer.addKeywordSearch(DIGITS_SEARCH, digitsGrammar);


then in digit.gram write only

option one /1e-1/
option two /1e-1/
option three /1e-1/
option four /1e-1/
back /1e-1/
previous /1e-1/
next /1e-1/


or

if you are using addGrammarSearch

File digitsGrammar = new File(context.getFilesDir(), "digits.gram");
recognizer.addGrammarSearch(DIGITS_SEARCH, digitsGrammar);


then in digit.gram

#JSGF V1.0;

grammar digits;

<digit> = option one |
option two |
option three |
option four |
back |
previous |
next;

public <digits> = <digit>+;

Answer

You need to use keyword spotting mode instead of grammar mode for continuous listening

You can find example here:

Recognizing multiple keywords using PocketSphinx