Search code examples
python-3.xbsddb

bsddb3 cannot read whole file


My code for creating Berkeley DB file:

def create_bdb_object(filename):
    bdb = bsddb3.db.DB()
    bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
    open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
    if os.path.exists(filename) and is_create:
        os.remove(filename)
    bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
    return bdb

After that, I wrote some pickled data into this file. The file creates without any problems.

Update#1: Code for writing to the file:

def write_to_the_file(filename, kv_pair_rdd):
    bdb_filename = f'{filename}.new'
    bdb = create_bdb_object(bdb_filename)

    for url, record in kv_pair_rdd.toLocalIterator():
        bdb.put(url.encode(), pickle.dumps(record, protocol=2))

    bdb.close()
    os.rename(bdb_filename, filename)

But when I try to read this file I get not all data from it. In the file should be 9 records, but after reading I get only 4.

When I do db_dump -p filename I get 9 records

Code for reading data from file:

bdb = bsddb3.db.DB()                                                                                                                                                                            
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)                                   
bdb.open(filename)                                                                 
bdb_cursor = bdb.cursor() 

record = bdb_cursor.first()                                                              
while record:                                  
    print(record[0], pickle.loads(record[1]))                                       
    record = bdb_cursor.next()  

bdb_cursor.close()                                                                        
bdb.close()

Could anybody explain to me what I'm doing wrong, please?


Solution

  • Investigate what the data comes to your file. I used your code and created the following script:

    import bsddb3
    import os
    import pickle
    
    
    def create_bdb_object(filename):
        bdb = bsddb3.db.DB()
        bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
        open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
        if os.path.exists(filename):
            os.remove(filename)
        bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
        return bdb
    
    
    def write_to_the_file(filename, data):
        bdb_filename = f'{filename}.new'
        bdb = create_bdb_object(bdb_filename)
    
        for url, record in data.items():
            bdb.put(url.encode(), pickle.dumps(record, protocol=2))
    
        bdb.close()
        os.rename(bdb_filename, filename)
    
    
    def read_bdb(bdb_filename):
        bdb = bsddb3.db.DB()
        bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
        bdb.open(bdb_filename)
        bdb_cursor = bdb.cursor()
    
        record = bdb_cursor.first()
        counter = 1
        while record:
            print('Record num: %s, key: %s, value: %s' % (counter, record[0], pickle.loads(record[1])))
            record = bdb_cursor.next()
            counter += 1
    
        bdb_cursor.close()
        bdb.close()
    
    
    def main():
        bdb_filename = '/tmp/bsddb.bdb'
        data = {'www.example1.com': 'lorem ipsum 1',
                'www.example2.com': 'lorem ipsum 2',
                'www.example3.com': 'lorem ipsum 3',
                'www.example4.com': 'lorem ipsum 4',
                'www.example5.com': 'lorem ipsum 5',
                'www.example6.com': 'lorem ipsum 6',
                'www.example7.com': 'lorem ipsum 7',
                'www.example8.com': 'lorem ipsum 8',
                'www.example9.com': 'lorem ipsum 9'}
        write_to_the_file(bdb_filename, data)
    
        read_bdb(bdb_filename)
    
    
    main()
    

    It works perfectly, can't spot the issue, here's output:

    Record num: 1, key: b'www.example1.com', value: lorem ipsum 1
    Record num: 2, key: b'www.example2.com', value: lorem ipsum 2
    Record num: 3, key: b'www.example3.com', value: lorem ipsum 3
    Record num: 4, key: b'www.example4.com', value: lorem ipsum 4
    Record num: 5, key: b'www.example5.com', value: lorem ipsum 5
    Record num: 6, key: b'www.example6.com', value: lorem ipsum 6
    Record num: 7, key: b'www.example7.com', value: lorem ipsum 7
    Record num: 8, key: b'www.example8.com', value: lorem ipsum 8
    Record num: 9, key: b'www.example9.com', value: lorem ipsum 9
    

    Maybe, you have some additional code which can modify somehow your data