Search code examples
pythonspeech-recognitiongoogle-speech-apipydub

split audio file into parts but i need to use these files in speech recognition


i am facing problem in google speech recognition about long audio file .. so i decided to split my audio file in 15 sec .. each time i send first 15 sec to speech recognition then the second 15 sec and so on ...

but now when i use pydub lib it split the audio file the return value after splitting is not a file extension since API need a file extension as parameter (i marked the error) it says "Given audio file must be a filename string or a file-like object"

import speech_recognition as sr
import numpy

from os import path
AUDIO_FILE = "OAF_back_happy.wav"

from pydub import AudioSegment
sound = AudioSegment.from_wav("OAF_back_happy.wav")

halfway_point = len(sound) // 2
split = []
split.append(sound[:halfway_point])
split.append(sound[halfway_point:])
r = sr.Recognizer()

words=1
for x in split:
  with sr.AudioFile(x) as source:     #<-----
      audio = r.record(source)  # read the entire audio file
  try:
      # for testing purposes, we're just using the default API key
      # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
      # instead of `r.recognize_google(audio)`
      ans = r.recognize_google(audio)
      print("Google Speech Recognition thinks you said " +ans)
      for x in ans:
        if (x.isspace()) == True: 
          words+=1
      print(words) 
  except sr.UnknownValueError:
      print("Google Speech Recognition could not understand audio")
  except sr.RequestError as e:
      print("Could not request results from Google Speech Recognition service; {0}".format(e))

Edited : as described in comment i dont want to export files bec i am using server and i dont want to put same file "twice"


Solution

  • Untested as I cannot be bothered to install packages I do not use but here's what I mean.

    for x in split:
        b = io.BytesIO()
        x.export(b)
        b.seek(0)
        with sr.AudioFile(b) as source:
            audio = r.record(source)