Search code examples
pythonkeyboard-eventspyaudio

Output audio file not created correctly, or has unknown duration time


I am currently trying to record some utterances, in which the record session should start when a key is pressed and held down, and stop when it is released. I made the python script for recording and storing the data..

from pynput import keyboard
import time
import pyaudio
import wave

CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()
frames = []

def callback(in_data, frame_count, time_info, status):
    return (in_data, pyaudio.paContinue)

class MyListener(keyboard.Listener):
    def __init__(self):
        super(MyListener, self).__init__(self.on_press, self.on_release)
        self.key_pressed = None

        self.stream = p.open(format=FORMAT,
                             channels=CHANNELS,
                             rate=RATE,
                             input=True,
                             frames_per_buffer=CHUNK,
                             stream_callback = self.callback)
        print self.stream.is_active()

    def on_press(self, key):
        if key == keyboard.Key.cmd_l:
            self.key_pressed = True

    def on_release(self, key):
        if key == keyboard.Key.cmd_l:
            self.key_pressed = False

    def callback(self,in_data, frame_count, time_info, status):
        if self.key_pressed == True:
            return (in_data, pyaudio.paContinue)
        elif self.key_pressed == False:
            return (in_data, pyaudio.paComplete)
        else:
            return (in_data,pyaudio.paAbort)


listener = MyListener()
listener.start()
started = False

while True:
    time.sleep(0.1)
    if listener.key_pressed == True and started == False:
        started = True
        listener.stream.start_stream()
        print "start Stream"

    elif listener.key_pressed == False and started == True:
        print "Something coocked"
        listener.stream.stop_stream()
        listener.stream.close()
        p.terminate()

        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()

        started = False

Problem with the script is the audio file doesn't seem to record anything, the duration of the file when i play it is unknown?..

I am not sure i understand what could be wrong here..?

Update:

new version with output:

from pynput import keyboard
import time
import pyaudio
import StringIO
import multiprocessing
from multiprocessing import Process, Queue, queues
import wave

CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()
frames = []

stream_queue = Queue()



class MyListener(keyboard.Listener):
    def __init__(self):
        super(MyListener, self).__init__(on_press=self.on_press, on_release=self.on_release)
        self.key_pressed = None


        self.stream = p.open(format=FORMAT,
                             channels=CHANNELS,
                             rate=RATE,
                             input=True,
                             frames_per_buffer=CHUNK,
                             stream_callback = self.callback)

        print ("Stream active? " + str(self.stream.is_active()))

    def on_press(self, key):
        if key == keyboard.Key.cmd_l:
            self.key_pressed = True

    def on_release(self, key):
        if key == keyboard.Key.cmd_l:
            self.key_pressed = False

    def callback(self,in_data, frame_count, time_info, status):
        print "callback"
        if self.key_pressed == True:
            #stream_queue.put(in_data)
            frames.append(data)
            return (in_data, pyaudio.paContinue)

        elif self.key_pressed == False:
            #stream_queue.put(in_data)
            frames.append(data)
            return (in_data, pyaudio.paComplete)

        else:
            return (in_data,pyaudio.paAbort)


listener = MyListener()
listener.start()
started = False

while True:
    time.sleep(0.1)
    if listener.key_pressed == True and started == False:
        started = True
        listener.stream.start_stream()
        print ("Start stream -  Key is down")

    elif listener.key_pressed == True and started == True:
        print("stream has started and key is still down")
        print("Stream is active? " + str(listener.stream.is_active()))
        print("Stream is stopped? " + str(listener.stream.is_stopped()))
        print("Stream is time? " + str(listener.stream.get_time()))

    elif listener.key_pressed == False and started == True:
        print("Key has been released")
        listener.stream.stop_stream()
        listener.stream.close()
        print("stream has been closed")
        p.terminate()

        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()

        started = False

output:

python File2.py 
Stream active? True
callback
Start stream -  Key is down
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134638.797766
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134638.902259
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.006739
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.111282
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.215573
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.320448
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.424682
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.528631
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.633699
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.738129
stream has started and key is still down
Stream is active? False
Stream is stopped? False
Stream is time? 134639.842747
Key has been released
stream has been closed
^CTraceback (most recent call last):
  File "File2.py", line 67, in <module>
    time.sleep(0.1)
KeyboardInterrupt
MacBook-Pro:~$ play output.wav 

output.wav:

 File Size: 44        
  Encoding: Signed PCM    
  Channels: 2 @ 16-bit   
Samplerate: 44100Hz      
Replaygain: off         
  Duration: unknown      

In:0.00% 00:00:00.00 [00:00:00.00] Out:0     [      |      ]        Clip:0    
Done.

Things that seems weird to me is

  • The stream is not active after the listener.stream.start_stream()
  • callback print message callback is only being printed once, but should be printed each time the callback stores data to the frames, which only apparently happen once.
  • the output.wav file duration is unknown? why?

Solution

  • I have this first edition of you request.Sorry for all this global.I am not familiar with the pynput,so i just follow the DOC,use the simplest example of pynput. so here the code which work fine with win7 and python3.holding the space to start record,and escto exit the scripts.

    from pynput import keyboard
    import pyaudio
    import wave
    import time
    
    CHUNK = 8192
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    RECORD_SECONDS = 5
    WAVE_OUTPUT_FILENAME = "output.wav"
    
    record_on = False
    complete_tag = False
    frames = []
    
    def callback(in_data, frame_count, time_info, status):
        print("callback called")
        callback_flag = pyaudio.paContinue
        # global record_on
        if record_on:
            # global frames
            frames.append(in_data)
        if complete_tag:
            callback_flag = pyaudio.paComplete
    
        return in_data, callback_flag
    
    def on_press(key):
        global record_on
        print(record_on)
        if key == keyboard.Key.space:
            record_on = True
    
    def on_release(key):
        global record_on
        global complete_tag
        record_on = False
        complete_tag = True
        if key == keyboard.Key.esc:
            return False
    
    if __name__ == '__main__':
        p = pyaudio.PyAudio()
        stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK,
                    stream_callback=callback)
        with keyboard.Listener(
                on_press=on_press,
                on_release=on_release) as listener:
            listener.join()
        stream.stop_stream()
        stream.close()
        p.terminate()
    
        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()
    

    UPDATE:

    I just rewrite your callback,and it may work fine for you but not for me.

    def callback(self,in_data, frame_count, time_info, status):
        print("callback")
        if self.key_pressed == True:
            #stream_queue.put(in_data)
            print("record")
            frames.append(in_data)
            return (in_data, pyaudio.paContinue)
    
        elif self.key_pressed == False:
            #stream_queue.put(in_data)
            frames.append(in_data)
            return (in_data, pyaudio.paComplete)
    
        else:
            print("not record")
            return (in_data,pyaudio.paContinue)
    

    you don't understand the callback,when you called p.open with callback,the callback will be called when the data come from hardware.so the logic should write in the callback like my version instead of while 1: time.sleep(0.1).

    so,all your problem is after the first call of the callback,the stream receivePAabort,then stream stop.so the callback just be called once,so your .wav file just have metadata and have no duration.

    and i change your entire code to

    from pynput import keyboard
    import pyaudio
    import wave
    
    CHUNK = 8192
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    WAVE_OUTPUT_FILENAME = "output.wav"
    
    class MyListener(keyboard.Listener):
        def __enter__(self):
            self.p = pyaudio.PyAudio()
            self.stream = self.p.open(format=FORMAT,
                                 channels=CHANNELS,
                                 rate=RATE,
                                 input=True,
                                 frames_per_buffer=CHUNK,
                                 stream_callback = self.callback)
            self.start()
            return self
        def __init__(self):
            super(MyListener, self).__init__(on_press=self.on_press, on_release=self.on_release)
            self.key_pressed = False
            self.complete_tag = False
            self.frames = []
    
        def on_press(self, key):
            if key == keyboard.Key.space:
                self.key_pressed = True
    
        def on_release(self, key):
            if key == keyboard.Key.space:
                self.key_pressed = False
                self.complete = True
            if key == keyboard.Key.esc:
                return False
    
        def callback(self,in_data, frame_count, time_info, status):
            callback_flag = pyaudio.paContinue
            if self.key_pressed:
                self.frames.append(in_data)
            if self.complete_tag:
                callback_flag = pyaudio.paComplete
            return in_data, callback_flag
    
        def __exit__(self, exc_type, exc_value, traceback):
            self.stream.stop_stream()
            self.stream.close()
            self.p.terminate()
            self.stop()
            wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(self.p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(self.frames))
            wf.close()
    
    with MyListener() as listener:
            listener.join()