I am busy with making a speech recognition code and I have this:
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math
class SpeechDetector:
def __init__(self):
self.CHUNK = 1024
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1
self.PREV_AUDIO = 0.5
self.THRESHOLD = 4500
self.num_phrases = -1
MODELDIR = "../../tools/pocketsphinx/model"
DATADIR = "../../tools/pocketsphinx/test/data"
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
print " Finished "
print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
filename = 'output_'+str(int(time.time()))
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000)
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
self.setup_mic()
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
print "* Mic set up and listening. "
audio2send = []
cur_data = ''
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Starting recording of phrase"
started = True
audio2send.append(cur_data)
elif started:
print "Finished recording, decoding phrase"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "DETECTED: ", r
os.remove(filename)
stream.close()
p.terminate()
if __name__ == "__main__":
sd = SpeechDetector()
sd.run()
but every time I run it it says: ImportError: No module named pyaudio.
Then I go to terminal and I do pip install pyaudio but then it shows this:
#include "portaudio.h"
^
1 error generated.
error: command '/usr/bin/clang' failed with exit status 1
I'm using a Macbook with MacOS Sierra version 10.12.5 from 2011. I looked at every stack overflow page but nothing worked with me. Please help me!
You need portaudio installed before installing pyaudio. Install portaudio with homebrew
From pyaudio website
Apple Mac OS X Use Homebrew to install the prerequisite portaudio library, then install PyAudio using pip:
brew install portaudio pip install pyaudio
Notes:
If not already installed, download Homebrew. pip will download the PyAudio source and build it for your version of Python. Homebrew and building PyAudio also require installing the Command Line Tools for Xcode (more information).