So, lets say I've got some table with 2 fields - first_col
is int
and second_col
is array[int]
:
from sqlalchemy import (
Table,
select,
Integer,
)
from sqlalchemy.dialects.postgresql import ARRAY
some_table = Table(
"some_table",
Column("id", Integer, primary_key=True),
Column("first_col", Integer),
Column("second_col", ARRAY(Integer)),
)
And, I want to take all objects from the db
with both first_col
and second_col
and like aggregate them, I can show what I want in python code:
def get_some_stuff_and_aggregate() -> dict:
query = (
select(
[
some_table.c.first_col,
some_table.c.second_col,
]
)
.select_from(some_table)
)
# execute query here... it's not important here
query_res = ...
res_dict = {}
for some_obj in query_res:
for some_ids in (
[some_obj.first_col],
some_obj.second_col,
):
for some_id in some_ids:
if some_id in res_dict:
res_dict[some_id] += 1
else:
res_dict[some_id] = 1
return res_dict
But, I guess I can do this at the SQL (ORM) level without this dirty and slow python aggregation, can you please help me? How to do it better, using ORM
?
There is probably a better SQL, but this seems to do what I think you ask for.
from sqlalchemy import Column, Integer, MetaData, Table, create_engine, func, insert, select
from sqlalchemy.dialects.postgresql import ARRAY, array
from sqlalchemy.orm import Session
engine = create_engine("postgresql+psycopg://")
metadata = MetaData()
some_table = Table(
"some_table",
metadata,
Column("id", Integer, primary_key=True),
Column("first_col", Integer),
Column("second_col", ARRAY(Integer)),
)
metadata.create_all(engine)
dummy_data = [
{"first_col": 1, "second_col": [1, 2, 1, 4, 5]},
{"first_col": 3, "second_col": [2, 4, 6, 2, 10]},
{"first_col": 6, "second_col": [3, 6, 9, 3, 15]},
{"first_col": 4, "second_col": [4, 8, 4, 16, 20]},
{"first_col": 2, "second_col": [5, 10, 15, 5, 25]},
]
with Session(engine) as session:
session.execute(insert(some_table).values(dummy_data))
session.commit()
with Session(engine) as session:
subq = select(
func.unnest(
some_table.c.second_col.op("||")(array([some_table.c.first_col]))
).label("element")
).subquery()
statement = select(subq.c.element, func.count("*")).group_by(subq.c.element)
for i in session.execute(statement):
print(i)
This generates the following sql and output.
SELECT
anon_1.element,
count(% (count_2)
s::VARCHAR) AS count_1
FROM (
SELECT
unnest(some_table.second_col || ARRAY [some_table.first_col]) AS element
FROM
some_table) AS anon_1
GROUP BY
anon_1.element
(9, 1)
(15, 2)
(3, 3)
(5, 3)
(4, 5)
(10, 2)
(6, 3)
(2, 4)
(16, 1)
(25, 1)
(20, 1)
(1, 3)
(8, 1)