Search code examples
sqlclickhouse

Nearest matching text search using the same table in ClickHouse


CREATE TABLE DB.table
(
    `date` Date,
    `path` String,
    `visits` UInt64,
    `users` UInt64
) ENGINE = MergeTree
PARTITION BY toYYYYMM( `date`)
ORDER BY  `date`
SETTINGS index_granularity = 8192

Insert Data

INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','sofa',14837,12444);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','regalo',11917,8268);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','puertas',7498,6279);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','armario',7078,6230);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','sofa-cama',6771,5442);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','sofas',6182,5161);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','tractor-cortacesped',5740,3407);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','cocina',5634,4721);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','sillas',5448,4738);
INSERT INTO DB.Table (`date`,`path`,visits,users) VALUES ('2022-04-01','silla',448,478);

I want get the nearest matching text for each path field using the same table

I read this example, but in this case "clickhouse" isn't a value of the same table...


Solution

  • You do understand that it's a cross join and in case of 10 rows, it's 100 ngramDistance calls ?

    SELECT DISTINCT
        arraySort([t1.path, t2.path]) AS pair,
        ngramDistance(t1.path, t2.path) AS dist
    FROM t AS t1, t AS t2
    WHERE t1.path != t2.path
    ORDER BY dist ASC
    
    Query id: 3cdeb2af-3628-4289-9d83-91d571aa70e7
    
    ┌─pair────────────────────────────────┬───────dist─┐
    │ ['silla','sillas']                  │        0.2 │
    │ ['sofa','sofas']                    │ 0.33333334 │
    │ ['sofa','sofa-cama']                │ 0.71428573 │
    │ ['sofa-cama','sofas']               │       0.75 │
    │ ['puertas','sillas']                │          1 │
    │ ['sofas','tractor-cortacesped']     │          1 │
    │ ['cocina','sofa']                   │          1 │
    │ ['sillas','sofa']                   │          1 │
    │ ['silla','sofa']                    │          1 │
    │ ['puertas','regalo']                │          1 │
    │ ['armario','regalo']                │          1 │
    │ ['regalo','sofa-cama']              │          1 │
    │ ['regalo','sofas']                  │          1 │
    │ ['regalo','tractor-cortacesped']    │          1 │
    │ ['cocina','regalo']                 │          1 │
    │ ['sofa','tractor-cortacesped']      │          1 │
    │ ['regalo','silla']                  │          1 │
    │ ['armario','puertas']               │          1 │
    │ ['puertas','sofa-cama']             │          1 │
    │ ['puertas','sofas']                 │          1 │
    │ ['puertas','tractor-cortacesped']   │          1 │
    │ ['cocina','puertas']                │          1 │
    │ ['armario','sofa']                  │          1 │
    │ ['puertas','silla']                 │          1 │
    │ ['armario','sofa-cama']             │          1 │
    │ ['armario','sofas']                 │          1 │
    │ ['armario','tractor-cortacesped']   │          1 │
    │ ['armario','cocina']                │          1 │
    │ ['armario','sillas']                │          1 │
    │ ['armario','silla']                 │          1 │
    │ ['puertas','sofa']                  │          1 │
    │ ['sofa-cama','tractor-cortacesped'] │          1 │
    │ ['cocina','sofa-cama']              │          1 │
    │ ['sillas','sofa-cama']              │          1 │
    │ ['silla','sofa-cama']               │          1 │
    │ ['regalo','sofa']                   │          1 │
    │ ['cocina','sofas']                  │          1 │
    │ ['sillas','sofas']                  │          1 │
    │ ['silla','sofas']                   │          1 │
    │ ['cocina','tractor-cortacesped']    │          1 │
    │ ['sillas','tractor-cortacesped']    │          1 │
    │ ['silla','tractor-cortacesped']     │          1 │
    │ ['cocina','sillas']                 │          1 │
    │ ['cocina','silla']                  │          1 │
    │ ['regalo','sillas']                 │          1 │
    └─────────────────────────────────────┴────────────┘