Good Day!
I need help in developing my android app. is this possible to change the default name of speaker label to a custom one like a person name. i would like to create like this conversation as I example..
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
Please help i need help if is this possible to change the default name of speaker label i need your help i created like this
private RecognizeOptions getRecognizeOptions(InputStream captureStream) {
return new RecognizeOptions.Builder()
.audio(captureStream)
.contentType(ContentType.OPUS.toString())
.model("en-US_BroadbandModel")
.interimResults(true)
.inactivityTimeout(2000)
.timestamps(true)
.speakerLabels(true)
.maxAlternatives(3)
.smartFormatting(true)
.timestamps(true)
.wordConfidence(true)
.build();
}
this is the method of .speakerLabels
public class SpeakerLabelsDiarization {
public static class RecoToken {
private Double startTime;
private Double endTime;
private Long speaker;
private String word;
private Boolean spLabelIsFinal;
/**
* Instantiates a new reco token.
*
* @param speechTimestamp the speech timestamp
*/
RecoToken(SpeechTimestamp speechTimestamp) {
startTime = speechTimestamp.getStartTime();
endTime = speechTimestamp.getEndTime();
word = speechTimestamp.getWord();
}
/**
* Instantiates a new reco token.
*
* @param speakerLabel the speaker label
*/
RecoToken(SpeakerLabelsResult speakerLabel) {
startTime = Double.valueOf(speakerLabel.getFrom());
endTime = Double.valueOf(speakerLabel.getTo());
speaker = speakerLabel.getSpeaker();
}
/**
* Update from.
*
* @param speechTimestamp the speech timestamp
*/
public void updateFrom(SpeechTimestamp speechTimestamp) {
word = speechTimestamp.getWord();
}
/**
* Update from.
*
* @param speakerLabel the speaker label
*/
public void updateFrom(SpeakerLabelsResult speakerLabel) {
speaker = speakerLabel.getSpeaker();
}
}
/**
* The Class Utterance.
*/
public static class Utterance {
private Integer speaker;
private String transcript = "";
/**
* Instantiates a new utterance.
*
* @param speaker the speaker
* @param transcript the transcript
*/
public Utterance(final Integer speaker, final String transcript) {
this.speaker = speaker;
this.transcript = transcript;
}
}
/**
* The Class RecoTokens.
*/
public static class RecoTokens {
private Map<Double, RecoToken> recoTokenMap;
/**
* Instantiates a new reco tokens.
*/
public RecoTokens() {
recoTokenMap = new LinkedHashMap<Double, RecoToken>();
}
/**
* Adds the.
*
* @param speechResults the speech results
*/
public void add(SpeechRecognitionResults speechResults) {
if (speechResults.getResults() != null)
for (int i = 0; i < speechResults.getResults().size(); i++) {
SpeechRecognitionResult transcript = speechResults.getResults().get(i);
if (transcript.isFinalResults()) {
SpeechRecognitionAlternative speechAlternative = transcript.getAlternatives().get(0);
for (int ts = 0; ts < speechAlternative.getTimestamps().size(); ts++) {
SpeechTimestamp speechTimestamp = speechAlternative.getTimestamps().get(ts);
add(speechTimestamp);
}
}
}
if (speechResults.getSpeakerLabels() != null)
for (int i = 0; i < speechResults.getSpeakerLabels().size(); i++) {
add(speechResults.getSpeakerLabels().get(i));
}
}
/**
* Adds the.
*
* @param speechTimestamp the speech timestamp
*/
public void add(SpeechTimestamp speechTimestamp) {
RecoToken recoToken = recoTokenMap.get(speechTimestamp.getStartTime());
if (recoToken == null) {
recoToken = new RecoToken(speechTimestamp);
recoTokenMap.put(speechTimestamp.getStartTime(), recoToken);
} else {
recoToken.updateFrom(speechTimestamp);
}
}
/**
* Adds the.
*
* @param speakerLabel the speaker label
*/
public void add(SpeakerLabelsResult speakerLabel) {
RecoToken recoToken = recoTokenMap.get(speakerLabel.getFrom());
if (recoToken == null) {
recoToken = new RecoToken(speakerLabel);
recoTokenMap.put(Double.valueOf(speakerLabel.getFrom()), recoToken);
} else {
recoToken.updateFrom(speakerLabel);
}
if (speakerLabel.isFinalResults()) {
markTokensBeforeAsFinal(speakerLabel.getFrom());
report();
cleanFinal();
}
}
private void markTokensBeforeAsFinal(Float from) {
Map<Double, RecoToken> recoTokenMap = new LinkedHashMap<>();
for (RecoToken rt : recoTokenMap.values()) {
if (rt.startTime <= from)
rt.spLabelIsFinal = true;
}
}
/**
* Report.
*/
public void report() {
List<Utterance> uttterances = new ArrayList<Utterance>();
Utterance currentUtterance = new Utterance(0, "");
for (RecoToken rt : recoTokenMap.values()) {
if (currentUtterance.speaker != Math.toIntExact(rt.speaker)) {
uttterances.add(currentUtterance);
currentUtterance = new Utterance(Math.toIntExact(rt.speaker), "");
}
currentUtterance.transcript = currentUtterance.transcript + rt.word + " ";
}
uttterances.add(currentUtterance);
String result = GsonSingleton.getGson().toJson(uttterances);
System.out.println(result);
}
private void cleanFinal() {
Set<Map.Entry<Double, RecoToken>> set = recoTokenMap.entrySet();
for (Map.Entry<Double, RecoToken> e : set) {
if (e.getValue().spLabelIsFinal) {
recoTokenMap.remove(e.getKey());
}
}
}
}
private static CountDownLatch lock = new CountDownLatch(1);
}
the output in that is like this`
speaker 0: Hi
speaker 1: Hello
speaker 0: Good Day To you marie..
speaker 1:......
and i would like to output like this
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
my question is. Is this possible to create like that in ibm watspon speech to text api because i read in their documentation their are not mentioning on how to change the labels i just want to clarify it if is this possible
There is nothing in the API nor the documentation to suggest that it is possible to modify the labels in the output using the service itself. https://cloud.ibm.com/docs/services/speech-to-text/output.html#speaker_labels