Search code examples
pythonsqlflasksqlalchemyorm

SQL query returning additional values while trying to sort based on a foreign key using SQL ORM


I have two tables here.

class BusinessRole:
    __tablename__ = 'businessrole'

    id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    created_on = db.Column(db.DateTime, default=db.func.now())
    updated_on = db.Column(db.DateTime, default=db.func.now(), onupdate=db.func.now())
    name = db.Column(db.String(50), nullable=False)
    department = db.Column(db.String(50), nullable=False)
    app_config = db.Column(db.Text(TextLength.LONGTEXT.value))
    okta_group_name = db.Column(db.String(50), nullable=False)
    status = db.Column(db.String(50), nullable=False)
    version = db.Column(db.Integer, nullable=False)
    created_by = db.Column(db.String(120), nullable=False)
    modified_by = db.Column(db.String(120))
    approval_status = db.Column(db.String(50), nullable=False, default=BRApprovalStatus.VOID.value)
    owners = db.relationship('BusinessRoleOwners', backref='owners')
    approval = db.relationship('Approval', backref='approval')
    last_certified_date = db.Column(db.DateTime, default=db.func.now())
    certification_status = db.Column(db.String(50), nullable=False, default=CertificationStatus.VOID.value)
    operation_by = db.Column(db.String(50))
    business_role_audit = db.relationship('BusinessRoleAudit', backref='business_role_audit',
                                          foreign_keys='BusinessRoleAudit.business_role_id')
    prev_business_role_audit = db.relationship('BusinessRoleAudit', backref='prev_business_role_audit',
                                               foreign_keys='BusinessRoleAudit.prev_business_role_id')
    final_status = db.Column(db.String(50), nullable=True)


class Approval():
    __tablename__ = 'approval'
    
    id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    created_on = db.Column(db.DateTime, default=db.func.now())
    updated_on = db.Column(db.DateTime, default=db.func.now(), onupdate=db.func.now())
    requester = db.Column(db.String(120), nullable=False)
    approver = db.Column(db.String(120), nullable=False)
    status = db.Column(db.String(50), nullable=False)
    created_by = db.Column(db.String(120), nullable=False)
    modified_by = db.Column(db.String(120), nullable=False, default='')
    comment = db.Column(db.Text, nullable=False)
    business_role_id = db.Column(db.String(36), db.ForeignKey(BusinessRole.id), nullable=False)
    approval_type = db.Column(db.String(36), nullable=False, default=ApprovalType.BUSINESS_ROLE_APPROVAL.value)

These are the enums

class BRStatus:
    APPROVED = "APPROVED"
    DRAFT = "DRAFT"
    NEW = "NEW"
    DENIED = "DENIED"

class BRApprovalStatus:
    VOID = "VOID"
    SENT = "SENT"
    DENIED = "DENIED"
    COMPLETED = "COMPLETED"
    OVERDUE = "OVERDUE"

class ApprovalStatus:
    APPROVED = "APPROVED"
    DENIED = "DENIED"
    PENDING = "PENDING"

class ApprovalType:
    CERTIFICATION_APPROVAL = "CERTIFICATION_APPROVAL"
    BUSINESS_ROLE_APPROVAL = "BUSINESS_ROLE_APPROVAL"

Here is the function I have written

def all_latest_business_roles(order_field_name='modified_on', order_type='desc'):
    from models import Approval
    


    sub_query = db.session.query(
        BusinessRole.name,
        func.max(BusinessRole.version).label('max_version'),
        func.max(BusinessRole.updated_on).label('latest_updated_on')
    ).filter(
        or_(and_(BusinessRole.status == BRStatus.APPROVED.value,
                 BusinessRole.approval_status == BRApprovalStatus.COMPLETED.value),
            and_(BusinessRole.status.in_([BRStatus.DRAFT.value, BRStatus.NEW.value, BRStatus.DENIED.value])))
    ).group_by(BusinessRole.name).subquery()

    # Comparing max version and role
    business_roles = db.session.query(BusinessRole).join(
        sub_query,
        and_(
            BusinessRole.version == sub_query.c.max_version,
            BusinessRole.updated_on == sub_query.c.latest_updated_on,
            BusinessRole.name == sub_query.c.name
        ))

    business_roles = business_roles.outerjoin(
        Approval, BusinessRole.approval)

    business_roles = order_results_from_query(business_roles, BRModel.BusinessRole, order_field_name, order_type)

    return business_roles

Now this returns to me 494 results from my database which is correct Now I want to sort based on the approver column of the Approval table

so in the query itself I add this change

business_roles = db.session.query(BusinessRole, Approval).join(
        sub_query,
        and_(
            BusinessRole.version == sub_query.c.max_version,
            BusinessRole.updated_on == sub_query.c.latest_updated_on,
            BusinessRole.name == sub_query.c.name
        ))

business_roles = business_roles.outerjoin(Approval, BusinessRole.approval)

But I now get 959 results, I want the same 494 results when I add the Approval field in the select ORM query.

In the order_results_from_query function I pass the order_field name = Approval.approver and order_type as asc() or desc()

Please can someone suggest the change to me


Solution

  • The outer join duplicates rows when there are multiple approvals for the same business role. Lets update all_latest_business_roles so that it include only the latest approval for each business role.

    def all_latest_business_roles(order_field_name='modified_on', order_type='desc'):
        from models import Approval
    
        sub_query = db.session.query(
            BusinessRole.name,
            func.max(BusinessRole.version).label('max_version'),
            func.max(BusinessRole.updated_on).label('latest_updated_on')
        ).filter(
            or_(and_(BusinessRole.status == BRStatus.APPROVED.value,
                     BusinessRole.approval_status == BRApprovalStatus.COMPLETED.value),
                and_(BusinessRole.status.in_([BRStatus.DRAFT.value, BRStatus.NEW.value, BRStatus.DENIED.value])))
        ).group_by(BusinessRole.name).subquery()
    
        latest_approval_sub_query = db.session.query(
            Approval.business_role_id,
            func.max(Approval.updated_on).label('latest_approval_updated_on')
        ).group_by(Approval.business_role_id).subquery()
    
        business_roles = db.session.query(BusinessRole, Approval).join(
            sub_query,
            and_(
                BusinessRole.version == sub_query.c.max_version,
                BusinessRole.updated_on == sub_query.c.latest_updated_on,
                BusinessRole.name == sub_query.c.name
            ))
    
        business_roles = business_roles.join(
            latest_approval_sub_query,
            BusinessRole.id == latest_approval_sub_query.c.business_role_id)
    
        business_roles = business_roles.join(
            Approval,
            and_(
                Approval.business_role_id == latest_approval_sub_query.c.business_role_id,
                Approval.updated_on == latest_approval_sub_query.c.latest_approval_updated_on
            ))
    
        business_roles = order_results_from_query(business_roles, BRModel.BusinessRole, order_field_name, order_type)
    
        return business_roles