Search code examples
pythonsqlalchemy

How to query from a joined table in SQLAlchemy


I have 2 tables with a many-to-many relationship, so I have a join table between them. They are defined like this

from sqlalchemy import String, Integer, Boolean
from sqlalchemy.orm import mapped_column, relationship

from root.db.ModelBase import ModelBase


class Module(ModelBase):
    __tablename__ = 'module'
    pk = mapped_column(Integer, primary_key=True)
    description = mapped_column(String)
    is_active = mapped_column(Boolean)
    name = mapped_column(String, unique=True, nullable=False)
    ngen_cal_active = mapped_column(String)
    groups = relationship("ModuleGroup", secondary="module_group_member", back_populates="modules", lazy="joined")
---

from sqlalchemy import String, Integer, Boolean
from sqlalchemy.orm import mapped_column, relationship

from root.db.ModelBase import ModelBase


class ModuleGroup(ModelBase):
    __tablename__ = 'module_group'
    pk = mapped_column(Integer, primary_key=True)
    description = mapped_column(String)
    is_active = mapped_column(Boolean)
    name = mapped_column(String, unique=True, nullable=False)
    modules = relationship("Module", secondary="module_group_member", back_populates="groups", lazy="joined")
---

from sqlalchemy import String, Integer, Boolean, ForeignKey
from sqlalchemy.orm import mapped_column

from root.db.ModelBase import ModelBase


class ModuleGroupMember(ModelBase):
    __tablename__ = 'module_group_member'
    description = mapped_column(String)
    is_active = mapped_column(Boolean)
    module_pk = mapped_column(ForeignKey('module.pk'), primary_key=True,)
    module_group_pk = mapped_column(ForeignKey('module_group.pk'), primary_key=True)

If I query like this

query = select(Module).where(Module.name == 'Module3')  # type: ignore
results = session.execute(query).unique().all()
print('results', results)

It appears to work (although I don't know why it forces me to use unique() since there should only be 1 result) In the result, I see a groups object, which is defined by the relationship, so it all works fine.

However, if I want to get just the name and the associated groups, I tried something like this:

query = select(Module.name, Module.groups).where(Module.name == 'Module3') # type: ignore

and I get this error SAWarning: SELECT statement has a cartesian product between FROM element(s) "module_group", "module_group_member_1" and FROM element "module". Apply join condition(s) between each element to resolve.

I'm not sure what the problem is. I thought that SQLAlchemy would take care of any JOINS that need to be done

What I really want to do is get all the names and groups for every module

query = select(Module.name, Module.groups)

Solution

  • What I really want to do is get all the names and groups for every module

    Rather than construct your own query, you can use a simple ORM query with load_only() to only retrieve Module.name, and eager-load the groups as usual.

        query = select(Module).options(load_only(Module.name))
        results = sess.scalars(query).unique().all()
        """
    SELECT module.pk, module.name, module_group_1.pk AS pk_1, module_group_1.name AS name_1 
    FROM module 
        LEFT OUTER JOIN (
            module_group_member AS module_group_member_1 
            JOIN module_group AS module_group_1 
            ON module_group_1.pk = module_group_member_1.module_group_pk
        ) ON module.pk = module_group_member_1.module_pk
        """
        pprint(results)
        """
    [Module(name=Module1, groups=[ModuleGroup(name=Group11), ModuleGroup(name=Group12)]),
     Module(name=Module3, groups=[ModuleGroup(name=Group31), ModuleGroup(name=Group32)])]
         """
    

    You can then pull Module.name out as a scalar (str) value

        list_of_tuples = [(mod.name, mod.groups) for mod in results]
        pprint(list_of_tuples)
        """
    [('Module1', [ModuleGroup(name=Group11), ModuleGroup(name=Group12)]),
     ('Module3', [ModuleGroup(name=Group31), ModuleGroup(name=Group32)])]
        """