Search code examples
azureaudio

azure tts use go sdk generate in stream to wav less 46 byte


i use this samplehttps://github.com/Microsoft/cognitive-services-speech-sdk-go/blob/1af83b0cf8fb/samples/synthesizer/to_audio_data_stream.go

and i wirte byte to file and i find file cant open

stream.SaveToWavFileAsync("fun.wav")
//use this func can save a file which can open

output

Synthesis started.
Synthesizing, audio chunk size 65582.
Synthesizing, audio chunk size 294510.
Synthesizing, audio chunk size 56446.
Synthesizing, audio chunk size 65582.
Synthesizing, audio chunk size 1710.
Synthesized, audio length 483646.
Read [483600] bytes from audio data stream.

the stream less 46 bytes


Solution

  • pcm need add wav head.

    func PcmToWav(dst []byte, numChannel int, sampleRate int) (resDst []byte) {
        byteDst := dst
        longSampleRate := sampleRate
        byteRate := 16 * sampleRate * numChannel / 8
        totalAudioLen := len(byteDst)
        totalDataLen := totalAudioLen + 36
        var header = make([]byte, 44)
        // RIFF/WAVE header
        header[0] = 'R'
        header[1] = 'I'
        header[2] = 'F'
        header[3] = 'F'
        header[4] = byte(totalDataLen & 0xff)
        header[5] = byte((totalDataLen >> 8) & 0xff)
        header[6] = byte((totalDataLen >> 16) & 0xff)
        header[7] = byte((totalDataLen >> 24) & 0xff)
        //WAVE
        header[8] = 'W'
        header[9] = 'A'
        header[10] = 'V'
        header[11] = 'E'
        // 'fmt ' chunk
        header[12] = 'f'
        header[13] = 'm'
        header[14] = 't'
        header[15] = ' '
        // 4 bytes: size of 'fmt ' chunk
        header[16] = 16
        header[17] = 0
        header[18] = 0
        header[19] = 0
        // format = 1
        header[20] = 1
        header[21] = 0
        header[22] = byte(numChannel)
        header[23] = 0
        header[24] = byte(longSampleRate & 0xff)
        header[25] = byte((longSampleRate >> 8) & 0xff)
        header[26] = byte((longSampleRate >> 16) & 0xff)
        header[27] = byte((longSampleRate >> 24) & 0xff)
        header[28] = byte(byteRate & 0xff)
        header[29] = byte((byteRate >> 8) & 0xff)
        header[30] = byte((byteRate >> 16) & 0xff)
        header[31] = byte((byteRate >> 24) & 0xff)
        // block align
        header[32] = byte(2 * 16 / 8)
        header[33] = 0
        // bits per sample
        header[34] = 16
        header[35] = 0
        //data
        header[36] = 'd'
        header[37] = 'a'
        header[38] = 't'
        header[39] = 'a'
        header[40] = byte(totalAudioLen & 0xff)
        header[41] = byte((totalAudioLen >> 8) & 0xff)
        header[42] = byte((totalAudioLen >> 16) & 0xff)
        header[43] = byte((totalAudioLen >> 24) & 0xff)
    
        resDst = append(header, dst...)
        return
    }