Search code examples
greenplum

GPDB distribution key should be same order?


create table schema.table1 (
    col1 text,
    col2 text,
    ...,
    coln
)
distributed by (col1, col2);

create table schema.table2 (
    col1 text,
    col2 text,
    ...,
    colm
)
distributed by (col2, col1);

Q1. if 'table1', 'table2' shared same columns (col1, col2) then 'table1' and 'table2' distributed correctly?

Q2. if wrong,

alter table schema.table2 set distributed by (col1, col2);

This code can make correct distribution?

Q3. Are there some tips for distribution?

I tested join about DK, I can't found advantage about same DK.


Solution

  • I've ran tests on Greenplum database, I think the order of columns should be the same in order to benefit from performant local joins within same segments.

    As you can see below, in the query plans using "EXPLAIN ANALYZE", when we use the order (col2, col1), Broadcast motion is happening, while when we alter the table and change the distribution key order to (col1, col2), local joins in segments are happening.


    warehouse=# create schema schema;
    CREATE SCHEMA
    warehouse=# create table schema.table1 ( col1 text, col2 text ) distributed by (col1, col2); create table schema.table2 ( col1 text, col2 text, col3 text ) distributed by (col2, col1);
    CREATE TABLE
    CREATE TABLE
    warehouse=# insert into schema.table1 values ('a','b'),('c','d'),('e','f');
    INSERT 0 3
    warehouse=# insert into schema.table2 values ('a','b'),('c','d'),('x','y');
    INSERT 0 3
    warehouse=# analyze schema.table1;
    ANALYZE
    warehouse=# analyze schema.table2;
    ANALYZE
    warehouse=# select * from schema.table1 a left join schema.table2 b on a.col1 = b.col1 and a.col2 = b.col2;
     col1 | col2 | col1 | col2 | col3 
    ------+------+------+------+------
     c    | d    | c    | d    | 
     e    | f    |      |      | 
     a    | b    | a    | b    | 
    (3 rows)
    
    warehouse=# explain analyze select * from schema.table1 a left join schema.table2 b on a.col1 = b.col1 and a.col2 = b.col2;
                                                                       QUERY PLAN                                                       
                
    ------------------------------------------------------------------------------------------------------------------------------------
    ------------
     Gather Motion 24:1  (slice2; segments: 24)  (cost=0.00..862.00 rows=6 width=8) (actual time=6.303..9.118 rows=3 loops=1)
       ->  Hash Left Join  (cost=0.00..862.00 rows=1 width=8) (actual time=5.279..6.320 rows=1 loops=1)
             Hash Cond: ((table1.col1 = table2.col1) AND (table1.col2 = table2.col2))
             Extra Text: (seg13)  Hash chain length 1.0 avg, 1 max, using 3 of 524288 buckets.
             ->  Seq Scan on table1  (cost=0.00..431.00 rows=1 width=4) (actual time=0.007..0.007 rows=1 loops=1)
             ->  Hash  (cost=431.00..431.00 rows=3 width=4) (actual time=0.028..0.028 rows=3 loops=1)
                   ->  Broadcast Motion 24:24  (slice1; segments: 24)  (cost=0.00..431.00 rows=3 width=4) (actual time=0.010..0.022 rows
    =3 loops=1)
                         ->  Seq Scan on table2  (cost=0.00..431.00 rows=1 width=4) (actual time=0.012..0.013 rows=1 loops=1)
     Planning time: 8.386 ms
       (slice0)    Executor memory: 127K bytes.
       (slice1)    Executor memory: 58K bytes avg x 24 workers, 58K bytes max (seg0).
       (slice2)    Executor memory: 4176K bytes avg x 24 workers, 4176K bytes max (seg0).  Work_mem: 1K bytes max.
     Memory used:  128000kB
     Optimizer: Pivotal Optimizer (GPORCA)
     Execution time: 30.778 ms
    (15 rows)
    
    warehouse=# create table schema.table3 ( col1 text, col2 text ) distributed by (col1, col2);
    CREATE TABLE
    warehouse=# analyze schema.table3;
    ANALYZE
    warehouse=# insert into schema.table3 values ('a','b'),('c','d'),('x','y');
    INSERT 0 3
    warehouse=# explain analyze select * from schema.table1 a left join schema.table3 b on a.col1 = b.col1 and a.col2 = b.col2;
                                                            QUERY PLAN                                                         
    ---------------------------------------------------------------------------------------------------------------------------
     Gather Motion 24:1  (slice1; segments: 24)  (cost=0.00..862.00 rows=3 width=20) (actual time=2.533..2.592 rows=3 loops=1)
       ->  Hash Left Join  (cost=0.00..862.00 rows=1 width=20) (actual time=0.897..2.012 rows=1 loops=1)
             Hash Cond: ((table1.col1 = table3.col1) AND (table1.col2 = table3.col2))
             Extra Text: (seg13)  Hash chain length 0.0 avg, 0 max, using 0 of 524288 buckets.
             ->  Seq Scan on table1  (cost=0.00..431.00 rows=1 width=4) (actual time=0.007..0.008 rows=1 loops=1)
             ->  Hash  (cost=431.00..431.00 rows=1 width=16) (actual time=0.035..0.035 rows=1 loops=1)
                   ->  Seq Scan on table3  (cost=0.00..431.00 rows=1 width=16) (actual time=0.029..0.029 rows=1 loops=1)
     Planning time: 17.175 ms
       (slice0)    Executor memory: 63K bytes.
       (slice1)    Executor memory: 4172K bytes avg x 24 workers, 4172K bytes max (seg0).  Work_mem: 1K bytes max.
     Memory used:  128000kB
     Optimizer: Pivotal Optimizer (GPORCA)
     Execution time: 3.682 ms
    (13 rows)
    
    warehouse=# create table schema.table4 ( col1 text, col2 text, col3 text ) distributed by (col1,col2);
    CREATE TABLE
    warehouse=# analyze schema.table4;
    ANALYZE
    warehouse=# insert into schema.table4 values ('a','b','aa'),('c','d','bb'),('x','y','cc');
    INSERT 0 3
    warehouse=# analyze schema.table4;
    ANALYZE
    warehouse=# explain analyze select * from schema.table1 a left join schema.table4 b on a.col1 = b.col1 and a.col2 = b.col2;
                                                            QUERY PLAN                                                         
    ---------------------------------------------------------------------------------------------------------------------------
     Gather Motion 24:1  (slice1; segments: 24)  (cost=0.00..862.00 rows=6 width=11) (actual time=2.631..2.792 rows=3 loops=1)
       ->  Hash Left Join  (cost=0.00..862.00 rows=1 width=11) (actual time=0.873..1.939 rows=1 loops=1)
             Hash Cond: ((table1.col1 = table4.col1) AND (table1.col2 = table4.col2))
             Extra Text: (seg13)  Hash chain length 0.0 avg, 0 max, using 0 of 524288 buckets.
             ->  Seq Scan on table1  (cost=0.00..431.00 rows=1 width=4) (actual time=0.006..0.007 rows=1 loops=1)
             ->  Hash  (cost=431.00..431.00 rows=1 width=7) (actual time=0.019..0.019 rows=1 loops=1)
                   ->  Seq Scan on table4  (cost=0.00..431.00 rows=1 width=7) (actual time=0.010..0.011 rows=1 loops=1)
     Planning time: 26.950 ms
       (slice0)    Executor memory: 127K bytes.
       (slice1)    Executor memory: 4172K bytes avg x 24 workers, 4172K bytes max (seg0).  Work_mem: 1K bytes max.
     Memory used:  128000kB
     Optimizer: Pivotal Optimizer (GPORCA)
     Execution time: 3.886 ms
    (13 rows)
    
    warehouse=# alter table schema.table4 set distributed by (col2,col1);
    ALTER TABLE
    warehouse=# explain analyze select * from schema.table1 a left join schema.table4 b on a.col1 = b.col1 and a.col2 = b.col2;
                                                                     QUERY PLAN                                                         
             
    ------------------------------------------------------------------------------------------------------------------------------------
    ---------
     Gather Motion 24:1  (slice2; segments: 24)  (cost=0.00..862.00 rows=6 width=11) (actual time=3.772..4.564 rows=3 loops=1)
       ->  Hash Left Join  (cost=0.00..862.00 rows=1 width=11) (actual time=1.863..3.460 rows=1 loops=1)
             Hash Cond: ((table1.col1 = table4.col1) AND (table1.col2 = table4.col2))
             Extra Text: (seg4)   Hash chain length 1.0 avg, 1 max, using 1 of 524288 buckets.
             ->  Redistribute Motion 24:24  (slice1; segments: 24)  (cost=0.00..431.00 rows=1 width=4) (actual time=0.736..1.179 rows=1 
    loops=1)
                   Hash Key: table1.col2, table1.col1
                   ->  Seq Scan on table1  (cost=0.00..431.00 rows=1 width=4) (actual time=0.012..0.013 rows=1 loops=1)
             ->  Hash  (cost=431.00..431.00 rows=1 width=7) (actual time=0.029..0.029 rows=1 loops=1)
                   ->  Seq Scan on table4  (cost=0.00..431.00 rows=1 width=7) (actual time=0.022..0.022 rows=1 loops=1)
     Planning time: 27.195 ms
       (slice0)    Executor memory: 127K bytes.
       (slice1)    Executor memory: 42K bytes avg x 24 workers, 42K bytes max (seg0).
       (slice2)    Executor memory: 4176K bytes avg x 24 workers, 4176K bytes max (seg0).  Work_mem: 1K bytes max.
     Memory used:  128000kB
     Optimizer: Pivotal Optimizer (GPORCA)
     Execution time: 6.069 ms
    (16 rows)