Search code examples
sql-serversqlalchemyflask-sqlalchemypyodbc

<column_name> is not contained in either an aggregate function or the GROUP BY clause. (8120)


I have a flask app whose DB back end is an MS SQL Server.
Please note that I am not crafting pyodbc queries, I am trying to craft an SqlAlchemy query.

    active_with_schools = db.query(
        Registration.id,
        Registration.uuid,
        Registration.last_name,
        Registration.first_name,
        Registration.email,
        Registration.phone,
        Registration.date_added,
        Registration.other_heard_from,
        Registration.other_feedback,
        func.coalesce(
            Registration.date_expired,
            datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S')
        ),
        func.coalesce(
            Registration.expired_by, ''
        ),
        func.count(School.registration_id)
    ).join(
        User,
        User.email == Registration.email
    ).join(
        School,
        School.registration_id == Registration.id,
        isouter=True
    ).filter(
        func.coalesce(
            Registration.date_expired,
            datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S')
        ) < datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S')
    ).group_by(
        Registration.id,
        Registration.uuid,
        Registration.last_name,
        Registration.first_name,
        Registration.email,
        Registration.phone,
        Registration.date_added,
        Registration.other_heard_from,
        Registration.other_feedback,
        func.coalesce(
            Registration.date_expired,
            datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S')
        ),
        func.coalesce(
            Registration.expired_by, ''
        )
    ).having(
        func.count(
            func.coalesce(
                School.registration_id, -1
            )
        ) > 0
    ).order_by(
        Registration.date_added.desc()
    ).all()

This throws a "ProgrammingError" exception (slightly edited):

(pyodbc.ProgrammingError) ('42000', "[42000] [Microsoft][ODBC Driver 
18 for SQL Server][SQL Server]Column 'MySchema.registration.date_expired'
is invalid in the select list because it is not contained in either an 
aggregate function or the GROUP BY clause. (8120) (SQLExecDirectW); 
[42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Column
'MySchema.registration.date_expired' is invalid in the select list because
it is not contained in either an aggregate function or the GROUP BY clause.
(8120); [42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]
Statement(s) could not be prepared. (8180)")
[SQL: SELECT [MySchema].registration.id AS [MySchema_registration_id],
            [MySchema].registration.uuid AS [MySchema_registration_uuid],
            [MySchema].registration.last_name AS [MySchema_registration_last_name], 
            [MySchema].registration.first_name AS [MySchema_registration_first_name], 
            [MySchema].registration.email AS [MySchema_registration_email],
            [MySchema].registration.phone AS [MySchema_registration_phone],
            [MySchema].registration.date_added AS [MySchema_registration_date_added], 
            [MySchema].registration.other_heard_from AS [MySchema_registration_other_heard_from], 
            [MySchema].registration.other_feedback AS [MySchema_registration_other_feedback], 
            coalesce([MySchema].registration.date_expired, ?) AS coalesce_1,
            coalesce([MySchema].registration.expired_by, ?) AS coalesce_3,
            count([MySchema].school.registration_id) AS count_1 
FROM [MySchema].registration 
    JOIN [MySchema].[user] 
        ON [MySchema].[user].email = [MySchema].registration.email 
    LEFT OUTER JOIN [MySchema].school 
        ON [MySchema].school.registration_id = [MySchema].registration.id 
WHERE coalesce([MySchema].registration.date_expired, ?) < ? 
GROUP BY [MySchema].registration.id, 
    [MySchema].registration.uuid, 
    [MySchema].registration.last_name, 
    [MySchema].registration.first_name, 
    [MySchema].registration.email, 
    [MySchema].registration.phone, 
    [MySchema].registration.date_added, 
    [MySchema].registration.other_heard_from, 
    [MySchema].registration.other_feedback, 
    coalesce([MySchema].registration.date_expired, ?),
    coalesce([MySchema].registration.expired_by, ?) 
HAVING count(coalesce([MySchema].school.registration_id, ?)) > ? 
ORDER BY [MySchema].registration.date_added DESC]
[parameters: (
    '2024-05-16 16:36:39', 
    '', 
    '2024-05-16 16:36:39', 
    '2024-05-16 16:36:39', 
    '2024-05-16 16:36:39', 
    '', 
    -1, 
    0
)]
(Background on this error at: https://sqlalche.me/e/20/f405)

However, If I open a DB Console window in PyCharm; paste in the SQL in the exception; and add in the parameter values it works:

SELECT [MySchema].registration.id                        AS [MySchema_registration_id],
       [MySchema].registration.uuid                      AS [MySchema_registration_uuid],
       [MySchema].registration.last_name                 AS [MySchema_registration_last_name],
       [MySchema].registration.first_name                AS [MySchema_registration_first_name],
       [MySchema].registration.email                     AS [MySchema_registration_email],
       [MySchema].registration.phone                     AS [MySchema_registration_phone],
       [MySchema].registration.date_added                AS [MySchema_registration_date_added],
       [MySchema].registration.other_heard_from          AS [MySchema_registration_other_heard_from],
       [MySchema].registration.other_feedback            AS [MySchema_registration_other_feedback],
       COALESCE([MySchema].registration.date_expired, '2024-05-16 16:36:39') AS coalesce_1,
       COALESCE([MySchema].registration.expired_by, '')   AS coalesce_3,
       COUNT([MySchema].school.registration_id)          AS count_1
    FROM [MySchema].registration
             JOIN [MySchema].[user]
                  ON [MySchema].[user].email = [MySchema].registration.email
             LEFT OUTER JOIN [MySchema].school
                             ON [MySchema].school.registration_id = [MySchema].registration.id
    WHERE COALESCE([MySchema].registration.date_expired, '2024-05-16 16:36:39') < '2024-05-16 16:36:39'
    GROUP BY [MySchema].registration.id, 
             [MySchema].registration.uuid, 
             [MySchema].registration.last_name,
             [MySchema].registration.first_name, 
             [MySchema].registration.email, 
             [MySchema].registration.phone,
             [MySchema].registration.date_added, 
             [MySchema].registration.other_heard_from,
             [MySchema].registration.other_feedback, 
             COALESCE([MySchema].registration.date_expired, '2024-05-16 16:36:39'),
             COALESCE([MySchema].registration.expired_by, '')
    HAVING COUNT(COALESCE([MySchema].school.registration_id, -1)) > 0
    ORDER BY [MySchema].registration.date_added DESC

What am I messing up with the SqlAlchemy?


Solution

  • What am I messing up with the SqlAlchemy?

    Nothing. You're hitting a limitation of parameterized queries in SQL Server ODBC. It has been discussed on GitHub here.

    To prove that it is not the fault of SQLAlchemy or pyodbc, here is repro code in VBA.

    Const default_expiry = "2024-05-16 16:36:39"
    cmd.CommandText = _
        "SELECT id, COALESCE(registration.date_expired, ?) AS coalesce_1 " & _
        "FROM registration " & _
        "GROUP BY id, COALESCE(registration.date_expired, ?)"
    cmd.Parameters.Append cmd.CreateParameter("", adVarChar, adParamInput, 30, default_expiry)
    cmd.Parameters.Append cmd.CreateParameter("", adVarChar, adParamInput, 30, default_expiry)
        
    Dim rst As ADODB.Recordset
    Set rst = cmd.Execute
    

    fails with

    Column 'registration.date_expired' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.

    However, this code runs without error:

    cmd.CommandText = _
        "SELECT id, COALESCE(registration.date_expired, '2024-05-16 16:36:39') AS coalesce_1 " & _
        "FROM registration " & _
        "GROUP BY id, COALESCE(registration.date_expired, '2024-05-16 16:36:39')"
        
    Dim rst As ADODB.Recordset
    Set rst = cmd.Execute
    

    Translating the first query into SQLAlchemy

    default_expiry = "2024-05-16 16:36:39"
    qry = select(
        Registration.id,
        func.coalesce(Registration.date_expired, default_expiry).label(
            "coalesce_1"
        ),
    ).group_by(
        Registration.id, func.coalesce(Registration.date_expired, default_expiry)
    )
    print(qry)
    """
    SELECT registration.id, coalesce(registration.date_expired, :coalesce_1) AS coalesce_1 
    FROM registration GROUP BY registration.id, coalesce(registration.date_expired, :coalesce_2)
    """
    

    produces the error when executed. We can reproduce the second (working) example by compiling the query with literal_binds

    sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True}))
    print(sql)
    """
    SELECT registration.id, coalesce(registration.date_expired, '2024-05-16 16:36:39') AS coalesce_1 
    FROM registration GROUP BY registration.id, coalesce(registration.date_expired, '2024-05-16 16:36:39')
    """
    

    or we could also use a subquery instead

    default_expiry = "2024-05-16 16:36:39"
    subq = select(
        Registration.id,
        func.coalesce(Registration.date_expired, default_expiry).label(
            "coalesce_1"
        ),
    ).subquery()
    qry = select(subq.c.id, subq.c.coalesce_1).group_by(
        subq.c.id, subq.c.coalesce_1
    )