Search code examples
postgresqlconcatenationstring-agg

Concatenate few rows into a single row with comma seperated using string_agg in postgreSQL


clique_bait.page_hierarchy

CREATE TABLE clique_bait.page_hierarchy (
  "page_id" INTEGER,
  "page_name" VARCHAR(14),
  "product_category" VARCHAR(9),
  "product_id" INTEGER
);
sample input 
  ('1', 'Home Page', null, null),
  ('2', 'All Products', null, null),
  ('3', 'Salmon', 'Fish', '1'),
  ('4', 'Kingfish', 'Fish', '2'),
  ('5', 'Tuna', 'Fish', '3'),
  ('6', 'Russian Caviar', 'Luxury', '4'),
  ('7', 'Black Truffle', 'Luxury', '5'),

clique_bait.events

CREATE TABLE clique_bait.events (
  "visit_id" VARCHAR(6),
  "cookie_id" VARCHAR(6),
  "page_id" INTEGER,
  "event_type" INTEGER,
  "sequence_number" INTEGER,
  "event_time" TIMESTAMP
);

sample input: 
  ('ccf365', 'c4ca42', '1', '1', '1', '2020-02-04 19:16:09.182546'),
  ('ccf365', 'c4ca42', '2', '1', '2', '2020-02-04 19:16:17.358191'),
  ('ccf365', 'c4ca42', '6', '1', '3', '2020-02-04 19:16:58.454669'),
  ('ccf365', 'c4ca42', '9', '1', '4', '2020-02-04 19:16:58.609142'),
  ('ccf365', 'c4ca42', '9', '2', '5', '2020-02-04 19:17:51.72942'),
  ('ccf365', 'c4ca42', '10', '1', '6', '2020-02-04 19:18:11.605815'),
  ('ccf365', 'c4ca42', '10', '2', '7', '2020-02-04 19:19:10.570786'),
  ('ccf365', 'c4ca42', '11', '1', '8', '2020-02-04 19:19:46.911728'),
  ('ccf365', 'c4ca42', '11', '2', '9', '2020-02-04 19:20:45.27469'),
  ('ccf365', 'c4ca42', '12', '1', '10', '2020-02-04 19:20:52.307244'),
  ('ccf365', 'c4ca42', '13', '3', '11', '2020-02-04 19:21:26.242563'),
  ('d58cbd', 'c81e72', '1', '1', '1', '2020-01-18 23:40:54.761906'),
  ('d58cbd', 'c81e72', '2', '1', '2', '2020-01-18 23:41:06.391027'),
  ('d58cbd', 'c81e72', '4', '1', '3', '2020-01-18 23:42:02.213001'),
  ('d58cbd', 'c81e72', '4', '2', '4', '2020-01-18 23:42:02.370046'),
  ('d58cbd', 'c81e72', '5', '1', '5', '2020-01-18 23:42:44.717024'),
  ('d58cbd', 'c81e72', '5', '2', '6', '2020-01-18 23:43:11.121855'),
  ('d58cbd', 'c81e72', '7', '1', '7', '2020-01-18 23:43:25.806239'),
  ('d58cbd', 'c81e72', '8', '1', '8', '2020-01-18 23:43:40.537995'),
  ('d58cbd', 'c81e72', '8', '2', '9', '2020-01-18 23:44:14.026393'),
  ('d58cbd', 'c81e72', '10', '1', '10', '2020-01-18 23:44:22.103768'),
  ('d58cbd', 'c81e72', '10', '2', '11', '2020-01-18 23:45:00.004781'),
  ('d58cbd', 'c81e72', '12', '1', '12', '2020-01-18 23:45:38.186554')

clique_bait.event_identifier

CREATE TABLE clique_bait.event_identifier (
  "event_type" INTEGER,
  "event_name" VARCHAR(13)
);

Sample input
  ('1', 'Page View'),
  ('2', 'Add to Cart'),
  ('3', 'Purchase'),
  ('4', 'Ad Impression'),
  ('5', 'Ad Click');

The output i need is Visit_id, Page_name which are added to cart concatenated with commas

my query

select visit_id, string_agg(page_name::character varying, ',') 
within group (order by sequence_number) as cart_items                        
from clique_bait.events e 
join clique_bait.page_hierarchy ph 
on e.page_id = ph.page_id 
join clique_bait.event_identifier ei 
on ei.event_type = e.event_type
where event_name = 'Add to Cart'
group by visit_id

is not working, error: function string_agg(character varying, unknown, integer) does not exist


Solution

  • Give this a shot:

    select visit_id, string_agg(page_name, ',' order by sequence_number) as cart_items                        
    from events e 
    join page_hierarchy ph on e.page_id = ph.page_id 
    join event_identifier ei on ei.event_type = e.event_type
    where event_name = 'Add to Cart'
    group by visit_id
    

    The order by sequence_number within the string_agg(...) function will sort your comma separated output based on sequence number.

    Here's an example with the sample data you provided: https://dbfiddle.uk/?rdbms=postgres_13&fiddle=52077d5be605a51d3a7bb14152a392df

    Here're the results of that:

    visit_id | cart_items   
    :------- | :------------
    d58cbd   | Kingfish,Tuna