Search code examples
pythonsql-serversqlalchemywindow-functions

How can I do windowed query on multiple columns primary key?


Based on example found here but I guess I'm not understanding it. This works for single column primary keys but fails on multiple ones. This is my code

@classmethod
    def column_windows(cls, q, columns, windowsize, where = None):
        """Return a series of WHERE clauses against 
        a given column that break it into windows.

        Result is an iterable of tuples, consisting of
        ((start, end), whereclause), where (start, end) are the ids.

        Requires a database that supports window functions, 
        i.e. Postgresql, SQL Server, Oracle.

        Enhance this yourself !  Add a "where" argument
        so that windows of just a subset of rows can
        be computed.
        """
             #Here is the thing... how to compare...
        def int_for_range(start_id, end_id):
            if end_id:
                return and_(
                    columns>=start_id,
                    columns<end_id
                )
            else:
                return columns>=start_id

        if isinstance(columns, Column):
            columns_k=(columns,)
        else:
            columns_k=tuple(columns)
        q2=None
        cols=()
        for c in columns:
            cols = cols + (c,)
            if not q2:
                q2=q.session.query(c)
            else:
                q2=q2.add_column(c)
        q2 = q2.add_column(func.row_number().over(order_by=columns_k).label('rownum'))
        q2=q2.filter(q._criterion).from_self(cols)
        if windowsize > 1:
            q2 = q2.filter("rownum %% %d=1" % windowsize)
        for res in q2:
            print res
        intervals = [id for id, in q2]
        while intervals:
            start = intervals.pop(0)
            if intervals:
                end = intervals[0]
            else:
                end = None
            yield int_for_range(start, end)

    @classmethod
    def windowed_query(cls, q, columns, windowsize):
        """"Break a Query into windows on a given column."""

        for whereclause in cls.column_windows(q,columns, windowsize):
            for row in q.filter(whereclause).order_by(columns):
                yield row

Now I have the problem when comparing the set of columns of the primary key. Well I guess kind of recursive clause generating function should do it... Let's try it...


Solution

  • Well, result is not what expected but got it to work: Now it really windows any query keeping all in place, multi column unique ordering, and so on: Here is my code, hope it may be usefull for someone else:

    @classmethod
        def window_query(cls, q, windowsize, windows=None):
            """
                q=Query object we want to window results
                windowsize=The number of elements each window has
                windows=The window, or window list, numbers: 1-based to query
            """
            windowselect=False
            if windows:
                if not isinstance(windows,list):
                    windows=list(windows)
                windowselect=True
            #Appending u_columns to ordered counting subquery will ensure unique ordering
            u_columns=list([col for col in cls.getBestUniqueColumns()])
            #o_columns is the list of order by columns for the query
            o_columns=list([col for col in q._order_by])
            #we append columns from u_columns not in o_columns to ensure unique ordering but keeping the desired one
            sq_o_columns=list(o_columns)
            for col in u_columns:
                if not col in sq_o_columns:
                    sq_o_columns.append(col)
    
            sub=None
            #we select unique columns in subquery that we'll need to join in parent query
            for col in u_columns:
                if not sub:
                    sub=q.session.query(col)
                else:
                    sub=sub.add_column(col)
    
            #Generate a tuple from sq_o_columns list (I don't know why over() won't accept list itself TODO: more elegant
            sq_o_col_tuple=()
            for col in sq_o_columns:
                sq_o_col_tuple=sq_o_col_tuple + (col,)
            #we add row counting column, counting on generated combined ordering+unique columns tuple
            sub = sub.add_column(func.row_number().over(order_by=sq_o_col_tuple).label('rownum')).filter(q._criterion)
    
            #Prepare sub query to use as subquery (LOL)
            sub=sub.subquery('lacrn')
    
            #Prepare join ON clauses epxression comparing unique columns defined by u_columns 
            joinclause=expression.BooleanClauseList()
            for col in u_columns:
                joinclause=joinclause.__and__(col == sub.c[col.key])
            #Make the joining
            q=q.join(sub,joinclause
                     )
            i=-1
            while True:
                #We try to query windows defined by windows list
                if windowselect:
                    #We want selected-windows-results to returned
                    if windows:
                        i=windows.pop(0)-1
                    else:
                        break
                else:
                    #We want all-windows-results to be returned
                    i=i+1
                res=q.filter(and_(sub.c.rownum > (i*windowsize), sub.c.rownum <= ((i+1)*windowsize))).all()
                if not (res or windowselect):
                    #We end an all-windows-results because of no more results, we must check if is selected-window-query
                    #because of selected-window-results may not exist and the are unordered
                    #EX: [1,2,9999999999999,3] : Assuming the third page required has no results it will return pages 1, 2, and 3 
                    break
                for row in res:
                    yield row