Search code examples
pythonfor-loopglobaudio-fingerprinting

Loop for recognizing all files in a folder using Dejavu library for audio fingerprinting


Does anybody have experience with Dejavu library for audio fingerprinting and recognizing in Python? It's working fine but what I'm interested in is that so far I'm able just to recognize one file at a time using:

print djv.recognize(FileRecognizer, 'path/song_name.mp3')

Anyone got an idea how and where to create a loop that will do that print command from above for all the files in a folder? I think loop should be created somewhere in here:

https://github.com/worldveil/dejavu/blob/master/dejavu/recognize.py

import dejavu.fingerprint as fingerprint
import dejavu.decoder as decoder
import numpy as np
import pyaudio
import time


class BaseRecognizer(object):

    def __init__(self, dejavu):
        self.dejavu = dejavu
        self.Fs = fingerprint.DEFAULT_FS

    def _recognize(self, *data):
        matches = []
        for d in data:
            matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
        return self.dejavu.align_matches(matches)

    def recognize(self):
        pass # base class does nothing


class FileRecognizer(BaseRecognizer):
    def __init__(self, dejavu):
        super(FileRecognizer, self).__init__(dejavu)

    def recognize_file(self, filename):
        frames, self.Fs = decoder.read(filename, self.dejavu.limit)

        t = time.time()
        match = self._recognize(*frames)
        t = time.time() - t

        if match:
            match['match_time'] = t

        return match

    def recognize(self, filename):
        return self.recognize_file(filename)

I know it should be done with glob module, but since I'm a newbie, don't know where to put it with that loop, so I don't get prompted for exact file for recognizing!


Solution

  • No just create another .py file in the same directory as the dejavu folder. Don't alter the library code.

    You probably want to do something like this:

    from dejavu import Dejavu
    from dejavu.recognize import FileRecognizer
    import os, fnmatch
    
    def find_files(directory, pattern):
        """http://stackoverflow.com/a/2186673/712997"""
        for root, dirs, files in os.walk(directory):
            for basename in files:
                if fnmatch.fnmatch(basename, pattern):
                    filename = os.path.join(root, basename)
                    yield filename
    
    config = {
         "database": {
             "host": "127.0.0.1",
             "user": "root",
             "passwd": "Password123", 
             "db": "dejavu_db",
         },
         "database_type" : "mysql",
         "fingerprint_limit" : 10
    }
    
    # create a dejavu object
    djv = Dejavu(config)
    
    # gather files to fingerprint
    UNLABELED_AUDIO_DIR = "/home/me/music/unknown/"
    PATTERN = "*.mp3"
    audio_paths = find_files(UNLABELED_AUDIO_DIR, PATTERN)
    
    # recognize them one at a time
    original_file_to_song = {}
    for path in audio_paths:
        print "Attempting to recognize %s..." % path
        song = djv.recognize(FileRecognizer, path)
        original_file_to_song[path] = song
    
    # see the songs you've recognized
    for path, song in original_file_to_song.iteritems():
        print "Audio file at: %s was recognized as %s" % (path, song)
    

    Hopefully that is what you intended.