Search code examples

How can i change the speaker label in ibm watson speech to text api in android?

Good Day!

I need help in developing my android app. is this possible to change the default name of speaker label to a custom one like a person name. i would like to create like this conversation as I example..

 Jhon: Hi
 Marie: Hello
 Jhon: Good Day To you marie..

Please help i need help if is this possible to change the default name of speaker label i need your help i created like this

private RecognizeOptions getRecognizeOptions(InputStream captureStream) {
    return new RecognizeOptions.Builder()

this is the method of .speakerLabels

public class SpeakerLabelsDiarization {
public static class RecoToken {
    private Double startTime;
    private Double endTime;
    private Long speaker;
    private String word;
    private Boolean spLabelIsFinal;

     * Instantiates a new reco token.
     * @param speechTimestamp the speech timestamp
    RecoToken(SpeechTimestamp speechTimestamp) {
        startTime = speechTimestamp.getStartTime();
        endTime = speechTimestamp.getEndTime();
        word = speechTimestamp.getWord();

     * Instantiates a new reco token.
     * @param speakerLabel the speaker label
    RecoToken(SpeakerLabelsResult speakerLabel) {
        startTime = Double.valueOf(speakerLabel.getFrom());
        endTime = Double.valueOf(speakerLabel.getTo());
        speaker = speakerLabel.getSpeaker();

     * Update from.
     * @param speechTimestamp the speech timestamp
    public void updateFrom(SpeechTimestamp speechTimestamp) {
        word = speechTimestamp.getWord();

     * Update from.
     * @param speakerLabel the speaker label
    public void updateFrom(SpeakerLabelsResult speakerLabel) {
        speaker = speakerLabel.getSpeaker();

 * The Class Utterance.
public static class Utterance {
    private Integer speaker;
    private String transcript = "";

     * Instantiates a new utterance.
     * @param speaker    the speaker
     * @param transcript the transcript
    public Utterance(final Integer speaker, final String transcript) {
        this.speaker = speaker;
        this.transcript = transcript;

 * The Class RecoTokens.
public static class RecoTokens {

    private Map<Double, RecoToken> recoTokenMap;

     * Instantiates a new reco tokens.
    public RecoTokens() {
        recoTokenMap = new LinkedHashMap<Double, RecoToken>();

     * Adds the.
     * @param speechResults the speech results
    public void add(SpeechRecognitionResults speechResults) {
        if (speechResults.getResults() != null)
            for (int i = 0; i < speechResults.getResults().size(); i++) {
                SpeechRecognitionResult transcript = speechResults.getResults().get(i);
                if (transcript.isFinalResults()) {
                    SpeechRecognitionAlternative speechAlternative = transcript.getAlternatives().get(0);

                    for (int ts = 0; ts < speechAlternative.getTimestamps().size(); ts++) {
                        SpeechTimestamp speechTimestamp = speechAlternative.getTimestamps().get(ts);
        if (speechResults.getSpeakerLabels() != null)
            for (int i = 0; i < speechResults.getSpeakerLabels().size(); i++) {


     * Adds the.
     * @param speechTimestamp the speech timestamp
    public void add(SpeechTimestamp speechTimestamp) {
        RecoToken recoToken = recoTokenMap.get(speechTimestamp.getStartTime());
        if (recoToken == null) {
            recoToken = new RecoToken(speechTimestamp);
            recoTokenMap.put(speechTimestamp.getStartTime(), recoToken);
        } else {

     * Adds the.
     * @param speakerLabel the speaker label
    public void add(SpeakerLabelsResult speakerLabel) {
        RecoToken recoToken = recoTokenMap.get(speakerLabel.getFrom());
        if (recoToken == null) {
            recoToken = new RecoToken(speakerLabel);
            recoTokenMap.put(Double.valueOf(speakerLabel.getFrom()), recoToken);
        } else {

        if (speakerLabel.isFinalResults()) {

    private void markTokensBeforeAsFinal(Float from) {
        Map<Double, RecoToken> recoTokenMap = new LinkedHashMap<>();

        for (RecoToken rt : recoTokenMap.values()) {
            if (rt.startTime <= from)
                rt.spLabelIsFinal = true;

     * Report.
    public void report() {
        List<Utterance> uttterances = new ArrayList<Utterance>();
        Utterance currentUtterance = new Utterance(0, "");

        for (RecoToken rt : recoTokenMap.values()) {
            if (currentUtterance.speaker != Math.toIntExact(rt.speaker)) {
                currentUtterance = new Utterance(Math.toIntExact(rt.speaker), "");
            currentUtterance.transcript = currentUtterance.transcript + rt.word + " ";

        String result = GsonSingleton.getGson().toJson(uttterances);

    private void cleanFinal() {
        Set<Map.Entry<Double, RecoToken>> set = recoTokenMap.entrySet();
        for (Map.Entry<Double, RecoToken> e : set) {
            if (e.getValue().spLabelIsFinal) {


private static CountDownLatch lock = new CountDownLatch(1);


the output in that is like this`

 speaker 0: Hi
 speaker 1: Hello
 speaker 0: Good Day To you marie..
 speaker 1:......

and i would like to output like this

 Jhon: Hi
 Marie: Hello
 Jhon: Good Day To you marie..

my question is. Is this possible to create like that in ibm watspon speech to text api because i read in their documentation their are not mentioning on how to change the labels i just want to clarify it if is this possible


  • There is nothing in the API nor the documentation to suggest that it is possible to modify the labels in the output using the service itself.