Search code examples
pythonpostgresqlsqlalchemyofbiz

SQLAlchemy cascade delete on automapped Universal Data Model Schema


I have a predefined PostgreSQL database from an OfBiz installation. The database has numerous foreign key components. I am trying to write a python program to copy the data from a production database into staging or development databases.

The last step is to clear some private data out of my data-set that should not be seen by developers.

I have the reflection set up as follows:

def reflectSourceTables():
     global Base
     Base = automap_base(metadata = smeta)
     global baseNum
     baseNum = 0
     Base.prepare(name_for_collection_relationship=_name_for_collection_relationship, name_for_scalar_relationship=_name_for_scalar_relationship, generate_relationship=_generate_relationship)

My reflection is set up as follows

def _name_for_scalar_relationship(base, local_cls, referred_cls, constraint):
    global baseNum
    if constraint.name:
        baseNum += 1
        disc = '_'.join(col.name for col in constraint.columns)
        return referred_cls.__name__.lower() + '.' + disc + "_scalar_" + str(baseNum)
    # if this didn't work, revert to the default behavior
    return name_for_scalar_relationship(base, local_cls, referred_cls, constraint)

def _name_for_collection_relationship(base, local_cls, referred_cls, constraint):
    global baseNum
    if constraint.name:
        baseNum += 1
        
        disc = '_'.join(col.name for col in constraint.columns)
        return referred_cls.__name__.lower() + '.' + disc + "_collection_" + str(baseNum)
    
    # if this didn't work, revert to the default behavior
    return name_for_collection_relationship(base, local_cls, referred_cls, constraint)

def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
    
    if direction is interfaces.ONETOMANY:
        kw['cascade'] = 'all, delete-orphan'
        kw['passive_deletes'] = True
    
    return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)

I am able to see the relationship between tables with the following code:

def getTableList(smeta):
    tableList = []
    if args.tables:
        ##Validate tables are in database
        for table in args.tables:
            if smeta.tables[table] in smeta.sorted_tables:
                tableList.append(str(smeta.tables[table]))
            else:
                log('Table {0} does not exist on source'.format(table))
    else:
        tableList = smeta.sorted_tables
    
    if args.tables:
        for table in tableList:
            
            for relationship in getattr(Base.classes,str(table)).__mapper__.relationships:
                #print(relationship)
                tableName = re.search(r'\.(.*)\.', str(relationship)).group(1)
                if tableName and tableName not in tableList:
                    tableList.append(tableName)
            
    return tableList
    

But my hope was that the delete code:

def cleanData():
    log("Clean Data")
    destSession = sessionmaker()
    destSess = destSession(bind=db2)
    
    for partyId in partyIds:
        log("Cleaning data for {0}".format(partyId))
        voucher = Base.classes.voucher
        invoice = Base.classes.invoice
        voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
        voucherDelete.delete(synchronize_session=False)
        invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
        invoiceDelete.delete(synchronize_session=False)
        destSess.commit()

The delete does delete vouchers and invoices, but does not delete child invoice_item records.

My database setup does not include cascade delete functionality for foreign keys, but I was hoping that I could have the ORM provide the functionality.

Ideally this code would delete the children of a voucher or invoice.

Edit

New relationship generation is as follows:

def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
if direction is interfaces.ONETOMANY or direction is interfaces.MANYTOMANY:
    kw['cascade'] = 'all, delete, delete-orphan'
    kw['passive_deletes'] = False

if direction is interfaces.MANYTOONE:
    kw['viewonly'] = True
    
return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)

Delete code changed to:

voucher = Base.classes.voucher
    invoice = Base.classes.invoice
    invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
    rs = invoiceDelete.all()
    for result in rs:
        destSess.delete(result)
    voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
    rs = voucherDelete.all()
    for result in rs:
        destSess.delete(result)
    destSess.commit()

This results in the following error:

sqlalchemy.exc.IntegrityError: (psycopg2.IntegrityError) update or delete on table "invoice_item" violates foreign key constraint "invoice_imat_itm" on table "invoice_item_attribute"
DETAIL:  Key (invoice_id, invoice_item_seq_id)=(19439, 00001) is still referenced from table "invoice_item_attribute".
 [SQL: 'DELETE FROM invoice_item WHERE invoice_item.invoice_id = %(invoice_id)s AND invoice_item.invoice_item_seq_id = %(invoice_item_seq_id)s'] [parameters: ({'invoice_id': '19439', 'invoice_item_seq_id': '00001'}, {'invoice_id': '33674', 'invoice_item_seq_id': '00001'}, {'invoice_id': '49384', 'invoice_item_seq_id': '00001'}, {'invoice_id': '58135', 'invoice_item_seq_id': '00001'}, {'invoice_id': '83457', 'invoice_item_seq_id': '00001'})] (Background on this error at: http://sqlalche.me/e/gkpj)

Solution

  • The following configuration allowed me to delete children records of Vouchers and Invoices within my database.

    Relationship auto-mapper set up as follows:

    def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
        ## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
        if direction is interfaces.ONETOMANY:
            kw['cascade'] = 'all, delete, delete-orphan'
            kw['passive_deletes'] = False
            kw['lazy'] = 'immediate'
        if direction is interfaces.MANYTOONE or direction is interfaces.MANYTOMANY:
            kw['viewonly'] = True
    
        return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)
    

    Delete Code is:

    def cleanData():
        for partyId in partyIds:
            log("Cleaning data for {0}".format(partyId))
    
            invoice = Base.classes.invoice
            invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
            rs = invoiceDelete.all()
            for result in rs:
                deleteChildren(result, destSess)
                destSess.delete(result)
            destSess.commit()
    
            voucher = Base.classes.voucher
            voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
            rs = voucherDelete.all()
            for result in rs:
                deleteChildren(result, destSess)        
                destSess.delete(result)
    
            destSess.commit()
    
    def deleteChildren(result, destSess):
        for relationship in result.__mapper__.relationships:
            if relationship.direction is interfaces.ONETOMANY:
                childs = getattr(result, str(re.search(r'\.(.*)', str(relationship)).group(1)))
                for child in childs:
                    if child.__mapper__.relationships:
                        deleteChildren(child, destSess)
                        destSess.commit()
                    destSess.delete(child)
                    destSess.commit()
    

    To explain, I get the records related to my party id that I want to remove from the database, and using a recursive method get the children from my defined relationship using eager fetching. If that child record has children, I call the same method. When I run out of one to many relationships to follow, I remove the record and return to its parent, deleting that record as well.