Search code examples
iosobjective-cwebsocketspeech-recognitionmicrosoft-translator

How can i send Audio using websocket to Microsoft Translator


I have create one application to translate text to text and speech to text. I have done text to text and text to speech.i am not translate speech to text.

I am using this demo https://github.com/bitmapdata/MSTranslateVendor it will only text to text and text to speech.

I am searching in stack overflow it will give me solution like send audio using websocket but i don't know how can i send it.And i am new in websocket programming.

Please help me how can i send audio using websocket.

I am create audio like below method but i do not know how can i send it.

- (void)viewDidLoad {
    [super viewDidLoad];

    settings = [[NSMutableDictionary alloc] init];
    [settings setValue:[NSNumber numberWithInt:kAudioFormatLinearPCM] forKey:AVFormatIDKey];
    [settings setValue:[NSNumber numberWithFloat:44100.0] forKey:AVSampleRateKey];
    [settings setValue:[NSNumber numberWithInt: 2] forKey:AVNumberOfChannelsKey];
    [settings setValue:[NSNumber numberWithInt: 16] forKey:AVLinearPCMBitDepthKey];
    [settings setValue:[NSNumber numberWithBool: NO] forKey:AVLinearPCMIsBigEndianKey];
    [settings setValue:[NSNumber numberWithBool: NO] forKey:AVLinearPCMIsFloatKey];
    [settings setValue:[NSNumber numberWithInt: AVAudioQualityHigh] forKey:AVEncoderAudioQualityKey];


    NSArray *pathComponents = [NSArray arrayWithObjects:
                               [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) lastObject],
                               @"Sohil.wav",
                               nil];
    outputFileURL = [NSURL fileURLWithPathComponents:pathComponents];
    NSLog(@"Record URL : %@",outputFileURL);

    // Setup audio session
    AVAudioSession *session = [AVAudioSession sharedInstance];
    [session setCategory:AVAudioSessionCategoryPlayAndRecord error:nil];

    // Initiate and prepare the recorder
    recorder = [[AVAudioRecorder alloc] initWithURL:outputFileURL settings:settings error:nil];
    recorder.delegate = self;
    recorder.meteringEnabled = YES;
    [recorder prepareToRecord];
}

- (IBAction)recordStart:(id)sender {

    AVAudioSession *session = [AVAudioSession sharedInstance];
    [session setActive:YES error:nil];
    [recorder record];

}

- (IBAction)recordStop:(id)sender {
    [recorder stop];
    AVAudioSession *audioSession = [AVAudioSession sharedInstance];
    [audioSession setActive:NO error:nil];

}

and convert it :

    -(NSData*) stripAndAddWavHeader:(NSData*) wav {
    unsigned long wavDataSize = [wav length] - 44;

    NSData *WaveFile= [NSMutableData dataWithData:[wav subdataWithRange:NSMakeRange(44, wavDataSize)]];

    NSMutableData *newWavData;
    newWavData = [self addWavHeader:WaveFile];

    return newWavData;
}
- (NSMutableData *)addWavHeader:(NSData *)wavNoheader {

    int headerSize = 44;
    long totalAudioLen = [wavNoheader length];
    long totalDataLen = [wavNoheader length] + headerSize-8;
    long longSampleRate = 22050.0;
    int channels = 1;
    long byteRate = 8 * 44100.0 * channels/8;



    Byte *header = (Byte*)malloc(44);
    header[0] = 'R';  // RIFF/WAVE header
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    header[4] = (Byte) (totalDataLen & 0xff);
    header[5] = (Byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (Byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (Byte) ((totalDataLen >> 24) & 0xff);
    header[8] = 'W';
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    header[12] = 'f';  // 'fmt ' chunk
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    header[16] = 16;  // 4 bytes: size of 'fmt ' chunk
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    header[20] = 1;  // format = 1
    header[21] = 0;
    header[22] = (Byte) channels;
    header[23] = 0;
    header[24] = (Byte) (longSampleRate & 0xff);
    header[25] = (Byte) ((longSampleRate >> 8) & 0xff);
    header[26] = (Byte) ((longSampleRate >> 16) & 0xff);
    header[27] = (Byte) ((longSampleRate >> 24) & 0xff);
    header[28] = (Byte) (byteRate & 0xff);
    header[29] = (Byte) ((byteRate >> 8) & 0xff);
    header[30] = (Byte) ((byteRate >> 16) & 0xff);
    header[31] = (Byte) ((byteRate >> 24) & 0xff);
    header[32] = (Byte) (2 * 8 / 8);  // block align
    header[33] = 0;
    header[34] = 16;  // bits per sample
    header[35] = 0;
    header[36] = 'd';
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    header[40] = (Byte) (totalAudioLen & 0xff);
    header[41] = (Byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (Byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (Byte) ((totalAudioLen >> 24) & 0xff);

    NSMutableData *newWavData = [NSMutableData dataWithBytes:header length:44];
    [newWavData appendBytes:[wavNoheader bytes] length:[wavNoheader length]];
    return newWavData;
}

Solution

  • You can use the Microsoft Cognitive-Speech-STT-iOS its work perfect Speech-To-Text.

    1) First you wont to register your app on Register App

    2) Now you want to subscribe key Bing Speech - Preview use this key in your demo project on setting.plist file its work fine. you can get Two key use any one key.