Search code examples
pythonsqlalchemysnowflake-cloud-data-platformsubquery

Create nested queries with SQLAlchemy


I have a nested query that I need to write in a Pyhton project using SQLAlchemy, but everything I tried doesn't seem to work and I keep getting errors. This is the SQL query (I'm using Snowflake) and it's returning all accounts that have the max order count (there are multiple accounts that the same max count):

SELECT ACCOUNT_ID, COUNT(*) as MAX_ORDERS
FROM ORDERS
GROUP BY ACCOUNT_ID
HAVING COUNT(*) = (
    SELECT MAX(COUNT_ORDERS)
    FROM (
        SELECT ACCOUNT_ID, COUNT(ORDER_ID) as COUNT_ORDERS
        FROM ORDERS
        GROUP BY ACCOUNT_ID
        ORDER BY COUNT(ORDER_ID) DESC
    )
);

This is the Orders class written in Python:

class Order(Base):
    __tablename__ = "ORDERS"

    order_id = db.Column(db.INTEGER, primary_key=True)
    account_id = db.Column(db.INTEGER)
    bank_to = db.Column(db.String(16777216))
    account_to = db.Column(db.INTEGER)
    amount = db.Column(db.Float)
    k_symbol = db.Column(db.String(16777216))

I tried separating into 3 queries tied to each other, but I'm going from one error to another. Would anyone please help me figure out this nested query?


Solution

  • You can use two subqueries to do what you want.

    from sqlalchemy import create_engine, String, select, func, text, column
    from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session
    
    class Base(DeclarativeBase):
        pass
    
    class Order(Base):
        __tablename__ = "ORDERS"
        order_id: Mapped[int] = mapped_column(primary_key=True)
        account_id: Mapped[int]
        bank_to: Mapped[str] = mapped_column(String(16777216))
        account_to: Mapped[int]
        amount: Mapped[float]
        k_symbol: Mapped[str] = mapped_column(String(16777216))
    
    engine = create_engine("sqlite:///temp.sqlite", echo=True)
    Base.metadata.create_all(engine)
    
    with Session(engine) as session:
        subq_1 = (
            select(Order.account_id, func.count(Order.order_id).label("COUNT_ORDERS"))
            .group_by(Order.account_id)
            .order_by(func.count(Order.order_id).desc())
        )
        subq_2 = select(func.max(column("COUNT_ORDERS"))).select_from(subq_1.subquery())
        statement = (
            select(Order.account_id, func.count(text("*")).label("MAX_ORDERS"))
            .group_by(Order.account_id)
            .having(func.count(text("*")) == subq_2.scalar_subquery())
        )
        for i in session.scalars(statement):
            print(i)
    

    This generates the query

    SELECT "ORDERS".account_id, count(*) AS "MAX_ORDERS" 
    FROM "ORDERS" GROUP BY "ORDERS".account_id 
    HAVING count(*) = (SELECT max("COUNT_ORDERS") AS max_1 
    FROM (SELECT "ORDERS".account_id AS account_id, count("ORDERS".order_id) AS "COUNT_ORDERS" 
    FROM "ORDERS" GROUP BY "ORDERS".account_id ORDER BY count("ORDERS".order_id) DESC) AS anon_1)