Search code examples
pythonsqlalchemymany-to-manypython-asyncio

How to properly handle many to many in async sqlalchemy?


I was trying to implement many to many relationship between tables. When I use backpopulates all tags for a specific user must be in the tags field.

The tables are successfully created.

Users and tags are added. Link table too.

import asyncio
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.util import await_only, greenlet_spawn

from sqlalchemy import Column, Table, ForeignKey
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy.dialects.postgresql import VARCHAR, INTEGER

Base = declarative_base()

user_tag = Table('user_tag', Base.metadata,
                 Column('user_id', INTEGER, ForeignKey('users.id')),
                 Column('tag_id', INTEGER, ForeignKey('tags.id'))
                 )


class User(Base):
    __tablename__ = 'users'
    id = Column(INTEGER, primary_key=True)
    name = Column(VARCHAR(32), nullable=False, unique=True)
    tags = relationship("Tag",
                        secondary=user_tag,
                        back_populates="users")


class Tag(Base):
    __tablename__ = 'tags'
    id = Column(INTEGER, primary_key=True)
    tag = Column(VARCHAR(255), nullable=False, unique=True)
    users = relationship("User",
                         secondary=user_tag,
                         back_populates="tags")


async def main():
    engine = create_async_engine(
        "postgresql+asyncpg://postgres:pgs12345@localhost/test",
        echo=False,
    )

    async with engine.begin() as conn:
        await conn.run_sync(Base.metadata.drop_all)
        await conn.run_sync(Base.metadata.create_all)

    users = [User(name="p1"), User(name="p2"), User(name="p3")]
    tags = [Tag(tag="tag1"), Tag(tag="tag2"), Tag(tag="tag3")]

    async with AsyncSession(engine) as session:
        async with session.begin():
            session.add_all(users)
            session.add_all(tags)

        for user in users:
            await session.refresh(user)
        for tag in tags:
            await session.refresh(tag)

        for user in users:
            for i in range(3, user.id - 1, -1):
                await session.execute(user_tag.insert().values(user_id=user.id, tag_id=i))
        await session.commit()

        for user in users:
            await session.refresh(user)
        for tag in tags:
            await session.refresh(tag)

        tags = await greenlet_spawn(users[0].tags)
        print(tags)


loop = asyncio.get_event_loop()
loop.run_until_complete(main())

When I run the program, it crashes with:

 File "C:\Sources\asyncSQLAl test\main.py", line 48, in <module>
    loop.run_until_complete(main())
  File "C:\Users\Stanislav\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 
642, in run_until_complete
    return future.result()
  File "C:\Sources\asyncSQLAl test\main.py", line 41, in main
    tags = await greenlet_spawn(await users[0].tags)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\orm\attributes.py", line 480, in __get__
    return self.impl.get(state, dict_)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\orm\attributes.py", line 931, in get
    value = self.callable_(state, passive)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\orm\strategies.py", line 879, in _load_for_state
    return self._emit_lazyload(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\orm\strategies.py", line 1036, 
in _emit_lazyload
    result = session.execute(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\orm\session.py", line 1689, in 
execute
    result = conn._execute_20(statement, params or {}, execution_options)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1582, in 
_execute_20
    return meth(self, args_10style, kwargs_10style, execution_options)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\sql\lambdas.py", line 481, in _execute_on_connection
    return connection._execute_clauseelement(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1451, in 
_execute_clauseelement
    ret = self._execute_context(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1813, in 
_execute_context
    self._handle_dbapi_exception(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1998, in 
_handle_dbapi_exception
    util.raise_(exc_info[1], with_traceback=exc_info[2])
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\util\compat.py", line 207, in raise_
    raise exception
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1770, in 
_execute_context
    self.dialect.do_execute(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\engine\default.py", line 717, in do_execute
    cursor.execute(statement, parameters)
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\dialects\postgresql\asyncpg.py", line 449, in execute
    self._adapt_connection.await_(
  File "C:\Sources\asyncSQLAl test\venv\lib\site-packages\sqlalchemy\util\_concurrency_py3k.py", line 60, in await_only
    raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)      
sys:1: RuntimeWarning: coroutine 'AsyncAdapt_asyncpg_cursor._prepare_and_execute' was never awaited

I don't quite understand how greenlet_spawn works here and where it should be used in this example.

For example, same program, but in sync style

from sqlalchemy import Column, Table, ForeignKey
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
from sqlalchemy import create_engine
from sqlalchemy.dialects.postgresql import VARCHAR, INTEGER

Base = declarative_base()

user_tag = Table('user_tag', Base.metadata,
                 Column('user_id', INTEGER, ForeignKey('users.id')),
                 Column('tag_id', INTEGER, ForeignKey('tags.id'))
                 )


class User(Base):
    __tablename__ = 'users'
    id = Column(INTEGER, primary_key=True)
    name = Column(VARCHAR(32), nullable=False, unique=True)
    tags = relationship("Tag",
                        secondary=user_tag,
                        back_populates="users")


class Tag(Base):
    __tablename__ = 'tags'
    id = Column(INTEGER, primary_key=True)
    tag = Column(VARCHAR(255), nullable=False, unique=True)
    users = relationship("User",
                         secondary=user_tag,
                         back_populates="tags")
    
    def __str__(self):
        return self.tag


def main():
    engine = create_engine(
        "postgresql+psycopg2://postgres:pgs12345@localhost/test",
        echo=False,
    )

    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    Session = sessionmaker(bind=engine)
    session = Session()

    users = [User(name="p1"), User(name="p2"), User(name="p3")]
    tags = [Tag(tag="tag1"), Tag(tag="tag2"), Tag(tag="tag3")]

    with session.begin():
        session.add_all(users)
        session.add_all(tags)

    for user in users:
        for i in range(3, user.id - 1, -1):
            session.execute(user_tag.insert().values(
                user_id=user.id, tag_id=i))
    session.commit()

    for tag in users[0].tags:
        print(tag, end=" ")

main()

Gives me:

tag1 tag2 tag3 

Solution

  • I've been stuck on this today too and I've narrowed it down to the fact that a lazyload is attempted, which GreenLet is not happy about. I wasn't sure whether this was just my lack of skill but I've found this article that details some of the common errors: https://matt.sh/sqlalchemy-the-async-ening, where it's mentioned that this very issue will occur in this way. Furthermore, the docs go into detail about needing to avoid lazyloading: https://docs.sqlalchemy.org/en/14/orm/extensions/asyncio.html.

    My solution at the moment is to effectively prefetch the Child relation upon the initial query of the Parent object and then manipulate it from there. Whether this is a true bug, in the sense that it should work in async when it already works in sync or simply a limitation of the async method, I've no idea.

    Edit 06/08/21, here is how I am prefetching relationships:

    import sqlalchemy as sa
    from sqlalchemy.ext.asyncio import AsyncSession
    from sqlalchemy.orm import selectinload
    from . import models
    
    async def get_parent_prefetch_children(db: AsyncSession, parent_id: int) -> models.Parent:
        result = await db.execute(
            sa.select(models.Parent).where(models.Parent.id == parent_id).options(
                selectinload(models.Parent.children)
            )
        )
        return result.scalar()
    

    In your case, you call users[0].tags, which makes a lazyload and fails. In order for you to avoid this, you must refetch users with their tags eagerly loaded.