Search code examples
sqlprestotrino

Unable to format trino sql query in expected format


I Have a query-

WITH dataset(ns, tid, nid, type) AS ( values ('PQR',  'ITKT20254',  'A','X'),
                                        ('PQR', 'ITKT20223',    'A','X'), 
                                        ('PQR', 'ABCD23456',    'B','X'), 
                                         ('PQR', 'ABCD54321',   'B','X'), 
                                        ('PQR', 'ITKT21111',    'A','X'),
                                        ('PQR', 'ITKT20000',    'A','Y')
                                         )
 select ns,nid, 
    cast(row(tradelist , res1) as row(tid array(varchar),res varchar)) as finalMap 
from
  (select ns,nid,tradelist,res1 
   from (
      select ns,nid,
          array_agg(cast(tid  as varchar)) as tradelist,
          'not include' as  res1 from
          (select ns,nid,tid from  dataset where type='X' )
       
      group by ns, nid 
     union 
      select ns,nid, 
          array_agg(cast(tid  as varchar)) as tradelist,
          'include' as  res1  from
           (select ns,nid, tid from  dataset where type='Y' )
       
      group by ns, nid
    )
   )

Getting the result-

ns      nid      finalMap
PQR     A   {tid=[ITKT20254, ITKT20223, ITKT21111], res=not include}
PQR     A   {tid=[ITKT20000], res=include}
PQR     B   {tid=[ABCD23456, ABCD54321], res=not include}

Expected output-

ns      nid      finalMap
PQR     A   [{tid=[ITKT20254, ITKT20223, ITKT21111], res=not include},{tid=[ITKT20000], res=include}]
PQR     B   [{tid=[ABCD23456, ABCD54321], res=not include}]

I'm trying to modify the query to get above format. but getting errors in array_agg function


Solution

  • Not sure why you have not used my previous answer but for this one try:

    select ns, nid,
      transform(array['include', 'not include'] -- "generated" values 
          ,t -> cast(row(tids, t) as row(tid array(varchar), res varchar))) as finalMap
    from (select ns,
                 nid,
                 array_agg(tid) tids
          from dataset
          group by ns, nid);
    

    Since you generate the include/not include pair you can just use transform to generate corresponding rows

    Output:

    ns nid finalMap
    PQR A [{tid=[ITKT20254, ITKT20223, ITKT21111], res=include}, {tid=[ITKT20254, ITKT20223, ITKT21111], res=not include}]
    PQR B [{tid=[ABCD23456, ABCD54321], res=include}, {tid=[ABCD23456, ABCD54321], res=not include}]

    UPD

    Just add grouping and array_agg:

     select ns,nid,
        array_agg(cast(row(tradelist , res1) as row(tid array(varchar),res varchar))) as finalMap
    from
      (select ns,nid,tradelist,res1
       from (
          select ns,nid,
              array_agg(cast(tid  as varchar)) as tradelist,
              'not include' as  res1 from
              (select ns,nid,tid from  dataset where type='X' )
    
          group by ns, nid
         union
          select ns,nid,
              array_agg(cast(tid  as varchar)) as tradelist,
              'include' as  res1  from
               (select ns,nid, tid from  dataset where type='Y' )
    
          group by ns, nid
        )
       )
    group by ns, nid
    

    Output:

    ns nid finalMap
    PQR A [{tid=[ITKT20254, ITKT20223, ITKT21111], res=not include}, {tid=[ITKT20000], res=include}]
    PQR B [{tid=[ABCD23456, ABCD54321], res=not include}]