Search code examples
clickhouse

Clickhouse - How can I get distinct values from all values inside an array type column


On a clickhouse database, I've an array type as column and I want to make an distinct for all elements inside them

Instead of getting this

Select distinct errors.message_grouping_fingerprint
FROM views
WHERE  (session_date >= toDate('2022-07-21')) and (session_date < toDate('2022-07-22'))
      and notEmpty(errors.message) = 1
      and project_id = 162
SETTINGS distributed_group_by_no_merge=0


[-8964675922652096680,-8964675922652096680]
[-8964675922652096680]
[-8964675922652096680,-8964675922652096680,-8964675922652096680,-8964675922652096680,-8964675922652096680,-8964675922652096680,-8964675922652096680,-827009490898812590,-8964675922652096680,-8964675922652096680,-8964675922652096680,-8964675922652096680]
[-8964675922652096680,-8964675922652096680,-8964675922652096680]
[-827009490898812590]
[-1660275624223727714,-1660275624223727714]
[1852265010681444046]
[-2552644061611887546]
[-7142229185866234523]
[-7142229185866234523,-7142229185866234523]

To get this

-8964675922652096680
-827009490898812590
-1660275624223727714
1852265010681444046
-2552644061611887546
-7142229185866234523

and finally, to make a count of all them

as 6


Solution

  • groupUniqArrayArray

    select arrayMap( i-> rand()%10, range(rand()%3+1)) arr from numbers(10);
    ┌─arr─────┐
    │ [0]     │
    │ [1]     │
    │ [7,7,7] │
    │ [8,8]   │
    │ [9,9,9] │
    │ [6,6,6] │
    │ [2,2]   │
    │ [8,8,8] │
    │ [2]     │
    │ [8,8,8] │
    └─────────┘
    
    SELECT
        groupUniqArrayArray(arr) AS uarr,
        length(uarr)
    FROM
    (
        SELECT arrayMap(i -> (rand() % 10), range((rand() % 3) + 1)) AS arr
        FROM numbers(10)
    )
    
    ┌─uarr──────────────┬─length(groupUniqArrayArray(arr))─┐
    │ [0,5,9,4,2,8,7,3] │                                8 │
    └───────────────────┴──────────────────────────────────┘
    

    ARRAY JOIN

    SELECT A
    FROM
    (
        SELECT arrayMap(i -> (rand() % 10), range((rand() % 3) + 1)) AS arr
        FROM numbers(10)
    )
    ARRAY JOIN arr AS A
    GROUP BY A
    
    
    ┌─A─┐
    │ 0 │
    │ 1 │
    │ 4 │
    │ 5 │
    │ 6 │
    │ 9 │
    └───┘