I have a predefined PostgreSQL database from an OfBiz installation. The database has numerous foreign key components. I am trying to write a python program to copy the data from a production database into staging or development databases.
The last step is to clear some private data out of my data-set that should not be seen by developers.
I have the reflection set up as follows:
def reflectSourceTables():
global Base
Base = automap_base(metadata = smeta)
global baseNum
baseNum = 0
Base.prepare(name_for_collection_relationship=_name_for_collection_relationship, name_for_scalar_relationship=_name_for_scalar_relationship, generate_relationship=_generate_relationship)
My reflection is set up as follows
def _name_for_scalar_relationship(base, local_cls, referred_cls, constraint):
global baseNum
if constraint.name:
baseNum += 1
disc = '_'.join(col.name for col in constraint.columns)
return referred_cls.__name__.lower() + '.' + disc + "_scalar_" + str(baseNum)
# if this didn't work, revert to the default behavior
return name_for_scalar_relationship(base, local_cls, referred_cls, constraint)
def _name_for_collection_relationship(base, local_cls, referred_cls, constraint):
global baseNum
if constraint.name:
baseNum += 1
disc = '_'.join(col.name for col in constraint.columns)
return referred_cls.__name__.lower() + '.' + disc + "_collection_" + str(baseNum)
# if this didn't work, revert to the default behavior
return name_for_collection_relationship(base, local_cls, referred_cls, constraint)
def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
if direction is interfaces.ONETOMANY:
kw['cascade'] = 'all, delete-orphan'
kw['passive_deletes'] = True
return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)
I am able to see the relationship between tables with the following code:
def getTableList(smeta):
tableList = []
if args.tables:
##Validate tables are in database
for table in args.tables:
if smeta.tables[table] in smeta.sorted_tables:
tableList.append(str(smeta.tables[table]))
else:
log('Table {0} does not exist on source'.format(table))
else:
tableList = smeta.sorted_tables
if args.tables:
for table in tableList:
for relationship in getattr(Base.classes,str(table)).__mapper__.relationships:
#print(relationship)
tableName = re.search(r'\.(.*)\.', str(relationship)).group(1)
if tableName and tableName not in tableList:
tableList.append(tableName)
return tableList
But my hope was that the delete code:
def cleanData():
log("Clean Data")
destSession = sessionmaker()
destSess = destSession(bind=db2)
for partyId in partyIds:
log("Cleaning data for {0}".format(partyId))
voucher = Base.classes.voucher
invoice = Base.classes.invoice
voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
voucherDelete.delete(synchronize_session=False)
invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
invoiceDelete.delete(synchronize_session=False)
destSess.commit()
The delete does delete vouchers and invoices, but does not delete child invoice_item records.
My database setup does not include cascade delete functionality for foreign keys, but I was hoping that I could have the ORM provide the functionality.
Ideally this code would delete the children of a voucher or invoice.
New relationship generation is as follows:
def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
if direction is interfaces.ONETOMANY or direction is interfaces.MANYTOMANY:
kw['cascade'] = 'all, delete, delete-orphan'
kw['passive_deletes'] = False
if direction is interfaces.MANYTOONE:
kw['viewonly'] = True
return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)
Delete code changed to:
voucher = Base.classes.voucher
invoice = Base.classes.invoice
invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
rs = invoiceDelete.all()
for result in rs:
destSess.delete(result)
voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
rs = voucherDelete.all()
for result in rs:
destSess.delete(result)
destSess.commit()
This results in the following error:
sqlalchemy.exc.IntegrityError: (psycopg2.IntegrityError) update or delete on table "invoice_item" violates foreign key constraint "invoice_imat_itm" on table "invoice_item_attribute"
DETAIL: Key (invoice_id, invoice_item_seq_id)=(19439, 00001) is still referenced from table "invoice_item_attribute".
[SQL: 'DELETE FROM invoice_item WHERE invoice_item.invoice_id = %(invoice_id)s AND invoice_item.invoice_item_seq_id = %(invoice_item_seq_id)s'] [parameters: ({'invoice_id': '19439', 'invoice_item_seq_id': '00001'}, {'invoice_id': '33674', 'invoice_item_seq_id': '00001'}, {'invoice_id': '49384', 'invoice_item_seq_id': '00001'}, {'invoice_id': '58135', 'invoice_item_seq_id': '00001'}, {'invoice_id': '83457', 'invoice_item_seq_id': '00001'})] (Background on this error at: http://sqlalche.me/e/gkpj)
The following configuration allowed me to delete children records of Vouchers and Invoices within my database.
Relationship auto-mapper set up as follows:
def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
if direction is interfaces.ONETOMANY:
kw['cascade'] = 'all, delete, delete-orphan'
kw['passive_deletes'] = False
kw['lazy'] = 'immediate'
if direction is interfaces.MANYTOONE or direction is interfaces.MANYTOMANY:
kw['viewonly'] = True
return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)
Delete Code is:
def cleanData():
for partyId in partyIds:
log("Cleaning data for {0}".format(partyId))
invoice = Base.classes.invoice
invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
rs = invoiceDelete.all()
for result in rs:
deleteChildren(result, destSess)
destSess.delete(result)
destSess.commit()
voucher = Base.classes.voucher
voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
rs = voucherDelete.all()
for result in rs:
deleteChildren(result, destSess)
destSess.delete(result)
destSess.commit()
def deleteChildren(result, destSess):
for relationship in result.__mapper__.relationships:
if relationship.direction is interfaces.ONETOMANY:
childs = getattr(result, str(re.search(r'\.(.*)', str(relationship)).group(1)))
for child in childs:
if child.__mapper__.relationships:
deleteChildren(child, destSess)
destSess.commit()
destSess.delete(child)
destSess.commit()
To explain, I get the records related to my party id that I want to remove from the database, and using a recursive method get the children from my defined relationship using eager fetching. If that child record has children, I call the same method. When I run out of one to many relationships to follow, I remove the record and return to its parent, deleting that record as well.