python for-loop glob audio-fingerprinting

Loop for recognizing all files in a folder using Dejavu library for audio fingerprinting

Does anybody have experience with Dejavu library for audio fingerprinting and recognizing in Python? It's working fine but what I'm interested in is that so far I'm able just to recognize one file at a time using:

print djv.recognize(FileRecognizer, 'path/song_name.mp3')

Anyone got an idea how and where to create a loop that will do that print command from above for all the files in a folder? I think loop should be created somewhere in here:

https://github.com/worldveil/dejavu/blob/master/dejavu/recognize.py

import dejavu.fingerprint as fingerprint
import dejavu.decoder as decoder
import numpy as np
import pyaudio
import time


class BaseRecognizer(object):

    def __init__(self, dejavu):
        self.dejavu = dejavu
        self.Fs = fingerprint.DEFAULT_FS

    def _recognize(self, *data):
        matches = []
        for d in data:
            matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
        return self.dejavu.align_matches(matches)

    def recognize(self):
        pass # base class does nothing


class FileRecognizer(BaseRecognizer):
    def __init__(self, dejavu):
        super(FileRecognizer, self).__init__(dejavu)

    def recognize_file(self, filename):
        frames, self.Fs = decoder.read(filename, self.dejavu.limit)

        t = time.time()
        match = self._recognize(*frames)
        t = time.time() - t

        if match:
            match['match_time'] = t

        return match

    def recognize(self, filename):
        return self.recognize_file(filename)

I know it should be done with glob module, but since I'm a newbie, don't know where to put it with that loop, so I don't get prompted for exact file for recognizing!

Solution

No just create another .py file in the same directory as the dejavu folder. Don't alter the library code.

You probably want to do something like this:

from dejavu import Dejavu
from dejavu.recognize import FileRecognizer
import os, fnmatch

def find_files(directory, pattern):
    """http://stackoverflow.com/a/2186673/712997"""
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                yield filename

config = {
     "database": {
         "host": "127.0.0.1",
         "user": "root",
         "passwd": "Password123", 
         "db": "dejavu_db",
     },
     "database_type" : "mysql",
     "fingerprint_limit" : 10
}

# create a dejavu object
djv = Dejavu(config)

# gather files to fingerprint
UNLABELED_AUDIO_DIR = "/home/me/music/unknown/"
PATTERN = "*.mp3"
audio_paths = find_files(UNLABELED_AUDIO_DIR, PATTERN)

# recognize them one at a time
original_file_to_song = {}
for path in audio_paths:
    print "Attempting to recognize %s..." % path
    song = djv.recognize(FileRecognizer, path)
    original_file_to_song[path] = song

# see the songs you've recognized
for path, song in original_file_to_song.iteritems():
    print "Audio file at: %s was recognized as %s" % (path, song)

Hopefully that is what you intended.