I have a url to live audio recording that I'm trying to transcribe using Google Speech to Text API. I am using an example code from the Cloud Speech to Text API. However, the problem is that when I pass the live url I do not receive any output. Below is the relevant portion of my code. Any help would be greatly appreciated!
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import io
import os
import time
import requests
import numpy as np
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from urllib.request import urlopen
from datetime import datetime
from datetime import timedelta
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= "app_creds.json"
def get_stream():
stream = urlopen('streamurl')
duration = 60
begin = datetime.now()
duration = timedelta(seconds=duration)
while datetime.now() - begin < duration:
data = stream.read(8000)
return data
def transcribe_streaming():
"""Streams transcription of the given audio file."""
client = speech.SpeechClient()
content = get_stream()
# In practice, stream should be a generator yielding chunks of audio data.
stream = [content]
requests = (types.StreamingRecognizeRequest(audio_content=chunk)
for chunk in stream)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
streaming_config = types.StreamingRecognitionConfig(config=config)
# streaming_recognize returns a generator.
responses = client.streaming_recognize(streaming_config, requests)
for response in responses:
# Once the transcription has settled, the first result will contain the
# is_final result. The other results will be for subsequent portions of
# the audio.
for result in response.results:
print('Finished: {}'.format(result.is_final))
print('Stability: {}'.format(result.stability))
alternatives = result.alternatives
# The alternatives are ordered from most likely to least.
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print(u'Transcript: {}'.format(alternative.transcript))
When sending audio to the Google Speech service, make sure that the service object setup matches the audio encoding. In your particular case
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
corresponds to single channel, 16KHz, linear 16 bit PCM encoding. See the list of other supported encodings if you need to transcribe audio in different formats.