Search code examples
sqlclickhouse

How to collapse multiple call of the same function in clickhouse?


I have a table with data:

user items
1 9
2 8
3 7
4 2

And I need to find how many users have more than 1 item, more than 2 and until inf. I can use SELECT uniqExactIf(user, items >= 1), uniqExactIf(user, items >= 2)..., but how to make it shorter without calling uniqExactIf for infinite times?


Solution

  • create table data (user int, items int) Engine = Memory;
    insert into data values (1,     9)(2,   8)(3,   7)(4,   2);
    insert into data values (11,    9)(12,  8)(13,  7)(14,  2);
    insert into data values (111,   9)(112,     8);
    
    --slow
    
    SELECT
        items,
        uniqExactMerge(s) OVER (ORDER BY items DESC Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS _uniq
    FROM
    (
        SELECT
            items,
            uniqExactState(user) AS s
        FROM data
        GROUP BY items
    )
    ┌─items─┬─_uniq─┐
    │     9 │     3 │
    │     8 │     6 │
    │     7 │     8 │
    │     2 │    10 │
    └───────┴───────┘
    
    --faster but could be incorrect on blocks bounds
    
    SELECT
        items,
        runningAccumulate(s) AS _uniq
    FROM
    (
        SELECT
            items,
            uniqExactState(user) AS s
        FROM data
        GROUP BY items
        ORDER BY items DESC
    )
    ┌─items─┬─_uniq─┐
    │     9 │     3 │
    │     8 │     6 │
    │     7 │     8 │
    │     2 │    10 │
    └───────┴───────┘
    
    -- as a single column
    SELECT CAST(((groupArray((items, _uniq)) AS a).1, a.2), 'Map(Int64, Int64)') AS r
    FROM
    (
        SELECT
            items,
            runningAccumulate(s) AS _uniq
        FROM
        (
            SELECT
                items,
                uniqExactState(user) AS s
            FROM data
            GROUP BY items
            ORDER BY items DESC
        )
    )
    ┌─r──────────────────┐
    │ {9:3,8:6,7:8,2:10} │
    └────────────────────┘