Search code examples
mysqlpython-2.7mysql-connector-python

Bulk update MySql with python


I have to update millions of row into MySQL. I am currently using for loop to execute query. To make the update faster I want to use executemany() of Python MySQL Connector, so that I can update in batches using single query for each batch.


Solution

  • I don't think mysqldb has a way of handling multiple UPDATE queries at one time.

    But you can use an INSERT query with ON DUPLICATE KEY UPDATE condition at the end.

    I written the following example for ease of use and readability.

    import MySQLdb
    
    def update_many(data_list=None, mysql_table=None):
        """
        Updates a mysql table with the data provided. If the key is not unique, the
        data will be inserted into the table.
    
        The dictionaries must have all the same keys due to how the query is built.
    
        Param:
            data_list (List):
                A list of dictionaries where the keys are the mysql table
                column names, and the values are the update values
            mysql_table (String):
                The mysql table to be updated.
        """
    
        # Connection and Cursor
        conn = MySQLdb.connect('localhost', 'jeff', 'atwood', 'stackoverflow')
        cur = conn.cursor()
    
        query = ""
        values = []
    
        for data_dict in data_list:
    
            if not query:
                columns = ', '.join('`{0}`'.format(k) for k in data_dict)
                duplicates = ', '.join('{0}=VALUES({0})'.format(k) for k in data_dict)
                place_holders = ', '.join('%s'.format(k) for k in data_dict)
                query = "INSERT INTO {0} ({1}) VALUES ({2})".format(mysql_table, columns, place_holders)
                query = "{0} ON DUPLICATE KEY UPDATE {1}".format(query, duplicates)
    
            v = data_dict.values()
            values.append(v)
    
        try:
            cur.executemany(query, values)
        except MySQLdb.Error, e:
            try:
                print"MySQL Error [%d]: %s" % (e.args[0], e.args[1])
            except IndexError:
                print "MySQL Error: %s" % str(e)
    
            conn.rollback()
            return False
    
        conn.commit()
        cur.close()
        conn.close()
    

    Explanation of one liners

    columns = ', '.join('`{}`'.format(k) for k in data_dict)
    

    is the same as

    column_list = []
    for k in data_dict:
        column_list.append(k)
    columns = ", ".join(columns)
    

    Here's an example of usage

    test_data_list = []
    test_data_list.append( {'id' : 1, 'name' : 'Marco', 'articles' : 1 } )
    test_data_list.append( {'id' : 2, 'name' : 'Keshaw', 'articles' : 8 } )
    test_data_list.append( {'id' : 3, 'name' : 'Wes', 'articles' : 0 } )
    
    update_many(data_list=test_data_list, mysql_table='writers')
    

    Query output

    INSERT INTO writers (`articles`, `id`, `name`) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE articles=VALUES(articles), id=VALUES(id), name=VALUES(name)
    

    Values output

    [[1, 1, 'Marco'], [8, 2, 'Keshaw'], [0, 3, 'Wes']]