Search code examples
apache-pig

Why can't generate from UNION operation


My pig Script is:

S22_JOIN = JOIN S0 BY (CD_SI,IDT_ETT_CTR), S7 BY (CD_SI,IDT_ETT_CTR);

    S22 = FOREACH S22_JOIN GENERATE
    S7::CD_SI                      AS CD_SI,
    S7::IDT_ETT_CTR                AS IDT_ETT_CTR,
    S7::CD_EFS                        AS CD_EFS,
    S7::MT_DSP                     AS MT_DSP,
    S7::MT_NAL                     AS MT_NAL,
    S7::MT_ENC_MOY                 AS MT_ENC_MOY,
    S7::MT_UTI                     AS MT_UTI,
    S7::MT_CAP_RST_DU              AS MT_CAP_RST_DU,
    S7::MT_ITT_CRU                 AS MT_ITT_CRU,
    S7::MT_CAP_ECN_IMP             AS MT_CAP_ECN_IMP,
    S7::MT_ITT_IMP                 AS MT_ITT_IMP,
    S7::MT_DNR_ECN                 AS MT_DNR_ECN,
    S7::CD_ETT_ORI                 AS CD_ETT_ORI,
    S7::MT_DSP_CVE                 AS MT_DSP_CVE,
    S7::MT_NAL_CVE                 AS MT_NAL_CVE,
    S7::MT_ENC_MOY_CVE             AS MT_ENC_MOY_CVE,
    S7::MT_CAP_IMP_CVE             AS MT_CAP_IMP_CVE,
    S7::MT_ITT_IMP_CVE             AS MT_ITT_IMP_CVE,
    S7::MT_GLB_IMP                 AS MT_GLB_IMP,
    S7::MT_GLB_IMP_CVE             AS MT_GLB_IMP_CVE,
    S7::MT_BN_INST                 AS MT_BN_INST,
    S7::MT_BN_INST_CVE             AS MT_BN_INST_CVE,
    S7::MT_BN_NV                   AS MT_BN_NV,
    S7::MT_BN_NV_CVE               AS MT_BN_NV_CVE,
    S7::MT_IMP                     AS MT_IMP,
    S7::MT_PROR                    AS MT_PROR,
    S7::MT_DEM                     AS MT_DEM,
    S7::MT_ITT_M                   AS MT_ITT_M;   

-- Grouper by

S23 = FOREACH S21 GENERATE

    CD_SI                      AS CD_SI,
    IDT_ETT_CTR                AS IDT_ETT_CTR,
    CD_EFS                         AS CD_EFS,
   (IDT_ETT_CTR_LIG == '' ? (MT_NAL_BIL - MT_CAP_RST_DU):99)   AS MT_DSP,
    MT_NAL                     AS MT_NAL,
    MT_ENC_MOY                 AS MT_ENC_MOY,
    MT_UTI                     AS MT_UTI,
    MT_CAP_RST_DU              AS MT_CAP_RST_DU,
    MT_ITT_CRU                 AS MT_ITT_CRU,
    MT_CAP_ECN_IMP             AS MT_CAP_ECN_IMP,
    MT_ITT_IMP                 AS MT_ITT_IMP,
    MT_DNR_ECN                 AS MT_DNR_ECN,
    CD_ETT_ORI                 AS CD_ETT_ORI,
    (IDT_ETT_CTR_LIG == '' ? (MT_NAL_BIL - MT_CAP_RST_DU):99)     AS 
    MT_DSP_CVE,
    MT_NAL_CVE                 AS MT_NAL_CVE,
    MT_ENC_MOY_CVE             AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE             AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE             AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                 AS MT_GLB_IMP,
    MT_GLB_IMP_CVE             AS MT_GLB_IMP_CVE,
    MT_BN_INST                 AS MT_BN_INST,
    MT_BN_INST_CVE             AS MT_BN_INST_CVE,
    MT_BN_NV                   AS MT_BN_NV,
    MT_BN_NV_CVE               AS MT_BN_NV_CVE,
    MT_IMP                     AS MT_IMP,
    MT_PROR                    AS MT_PROR,
    MT_DEM                     AS MT_DEM,
    MT_ITT_M                   AS MT_ITT_M;               

S24 = GROUP S22 BY IDT_ETT_CTR;
S25 = GROUP S23 BY IDT_ETT_CTR;

S26 = UNION S24, S25;

S20 = GROUP S19 BY IDT_ETT_CTR;

S27 = UNION S20, S26;

Note that the result of the UNION operation (S27) loook like :

TE7420183801,"{(001,TE7420183801,29,0.0,60000.0,0.0,,34137.88,0.0,0.0,0.0,,ECRFI,0.0,60000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7424155609,"{(001,TE7424155609,29,0.0,616084.0,0.0,,407423.56,0.0,0.0,0.0,,ECRFI,0.0,616084.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7433106206,"{(001,TE7433106206,29,0.0,1241357.04,0.0,,1241357.04,0.0,0.0,0.0,,ECRFI,0.0,1241357.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7439115501,"{(001,TE7439115501,29,0.0,1044900.0,0.0,,776071.78,0.0,0.0,0.0,,ECRFI,0.0,1044900.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7439115502,"{(001,TE7439115502,29,0.0,116100.0,0.0,,86230.23,0.0,0.0,0.0,,ECRFI,0.0,116100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7439115503,"{(001,TE7439115503,29,0.0,-116100.0,0.0,,-83626.92,0.0,0.0,0.0,,ECRFI,0.0,-116100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7444031506,"{(001,TE7444031506,29,0.0,16802484.21,0.0,,14153891.85,0.0,0.0,0.0,,ECRFI,0.0,16802484.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7446142004,"{(001,TE7446142004,29,0.0,47341.96,0.0,,35618.57,0.0,0.0,0.0,,ECRFI,0.0,47341.96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7458036405,"{(001,TE7458036405,29,0.0,3395911.0,0.0,,2576480.4,0.0,0.0,0.0,,ECRFI,0.0,3395911.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7461072603,"{(001,TE7461072603,29,0.0,1192162.0,0.0,,708855.6,0.0,0.0,0.0,,ECRFI,0.0,1192162.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7461072701,"{(001,TE7461072701,29,0.0,3700000.0,0.0,,2311590.89,0.0,0.0,0.0,,ECRFI,0.0,3700000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"
TE7474079601,"{(001,TE7474079601,29,0.0,9570000.0,0.0,,8010700.07,0.0,0.0,0.0,,ECRFI,0.0,9570000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,)}"

Now when I want to generate the final entity from it :

S28 = FOREACH S27 GENERATE
        CD_SI                        AS CD_SI,  
     IDT_ETT_CTR                            AS IDT_ETT_CTR,
     CD_EFS                         AS CD_EFS,
     MT_DSP                     AS MT_DSP,
     MT_NAL                       AS MT_NAL,
     MT_ENC_MOY                      AS MT_ENC_MOY,
     MT_UTI                      AS MT_UTI,
     MT_CAP_RST_DU                    AS MT_CAP_RST_DU,
     MT_ITT_CRU               AS MT_ITT_CRU,
     MT_CAP_ECN_IMP                 AS MT_CAP_ECN_IMP,
     MT_ITT_IMP               AS MT_ITT_IMP,
    MT_DNR_ECN                  AS MT_DNR_ECN,
    CD_ETT_ORI                  AS CD_ETT_ORI,
    MT_DSP_CVE                  AS MT_DSP_CVE,
    MT_NAL_CVE                  AS MT_NAL_CVE,
    MT_ENC_MOY_CVE              AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE              AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE              AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                  AS MT_GLB_IMP,
    MT_GLB_IMP_CVE              AS MT_GLB_IMP_CVE,
    MT_BN_INST                  AS MT_BN_INST,
    MT_BN_INST_CVE              AS MT_BN_INST_CVE,
    MT_BN_NV                    AS MT_BN_NV,
    MT_BN_NV_CVE                AS MT_BN_NV_CVE,
    S10.MT_ENMO + S5.MT_ENC_MOY AS MT_ENC_EFF,
    MT_IMP                      AS MT_IMP,
    MT_PROR                     AS MT_PROR,
    MT_DEM                      AS MT_DEM,
    MT_ITT_M                    AS MT_ITT_M,
    (S6.IDT_ETT_CTR_LIG == '' ? (S6.MT_NAL_BIL + S6.MT_AUT) :'99') AS MT_NAL_LIG;

    STORE S28 INTO '$PathDataWorkingFile' USING CSVExcelStorage(',', 'YES_MULTILINE');

I get this error :

Invalid field reference. Referenced field [CD_SI] does not exist in schema: .

Note that there is no problem about the resources S6, S7, S20 ,... because I verified them and they generated the S27 data without problem.

BUt in reality CD_SI exist it has as values 001 as you can see in first line second column

It's really wired no ?


Solution

  • Since you are using union on grouped relations. You can just store the 2nd field using positional notation

    S24 = GROUP S22 BY IDT_ETT_CTR;
    S25 = GROUP S23 BY IDT_ETT_CTR;
    
    S26 = UNION S24, S25;
    
    S20 = GROUP S19 BY IDT_ETT_CTR;
    
    S27 = UNION S20, S26;
    
    S28 = FOREACH S27 GENERATE S27.$1
    
    STORE S28 INTO '$PathDataWorkingFile' USING CSVExcelStorage(',', 'YES_MULTILINE');
    

    Or

    Enumerate the groups and then join.Since you are using UNION on grouped relations, you really have only two fields i.e. the grouped column and the the rest in the form of a bag.

    S24 = GROUP S22 BY IDT_ETT_CTR;
    S25 = GROUP S23 BY IDT_ETT_CTR;
    
    S24_1 = FOREACH S24 GENERATE group as IDT_ETT_CTR,
              CD_SI                        AS CD_SI,  
     IDT_ETT_CTR                            AS IDT_ETT_CTR,
     CD_EFS                         AS CD_EFS,
     MT_DSP                     AS MT_DSP,
     MT_NAL                       AS MT_NAL,
     MT_ENC_MOY                      AS MT_ENC_MOY,
     MT_UTI                      AS MT_UTI,
     MT_CAP_RST_DU                    AS MT_CAP_RST_DU,
     MT_ITT_CRU               AS MT_ITT_CRU,
     MT_CAP_ECN_IMP                 AS MT_CAP_ECN_IMP,
     MT_ITT_IMP               AS MT_ITT_IMP,
    MT_DNR_ECN                  AS MT_DNR_ECN,
    CD_ETT_ORI                  AS CD_ETT_ORI,
    MT_DSP_CVE                  AS MT_DSP_CVE,
    MT_NAL_CVE                  AS MT_NAL_CVE,
    MT_ENC_MOY_CVE              AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE              AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE              AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                  AS MT_GLB_IMP,
    MT_GLB_IMP_CVE              AS MT_GLB_IMP_CVE,
    MT_BN_INST                  AS MT_BN_INST,
    MT_BN_INST_CVE              AS MT_BN_INST_CVE,
    MT_BN_NV                    AS MT_BN_NV,
    MT_BN_NV_CVE                AS MT_BN_NV_CVE,
    S10.MT_ENMO + S5.MT_ENC_MOY AS MT_ENC_EFF,
    MT_IMP                      AS MT_IMP,
    MT_PROR                     AS MT_PROR,
    MT_DEM                      AS MT_DEM,
    MT_ITT_M                    AS MT_ITT_M,
    (S6.IDT_ETT_CTR_LIG == '' ? (S6.MT_NAL_BIL + S6.MT_AUT) :'99') AS MT_NAL_LIG;
    
    S25_1 = FOREACH S25 GENERATE group as IDT_ETT_CTR,
              CD_SI                        AS CD_SI,  
     IDT_ETT_CTR                            AS IDT_ETT_CTR,
     CD_EFS                         AS CD_EFS,
     MT_DSP                     AS MT_DSP,
     MT_NAL                       AS MT_NAL,
     MT_ENC_MOY                      AS MT_ENC_MOY,
     MT_UTI                      AS MT_UTI,
     MT_CAP_RST_DU                    AS MT_CAP_RST_DU,
     MT_ITT_CRU               AS MT_ITT_CRU,
     MT_CAP_ECN_IMP                 AS MT_CAP_ECN_IMP,
     MT_ITT_IMP               AS MT_ITT_IMP,
    MT_DNR_ECN                  AS MT_DNR_ECN,
    CD_ETT_ORI                  AS CD_ETT_ORI,
    MT_DSP_CVE                  AS MT_DSP_CVE,
    MT_NAL_CVE                  AS MT_NAL_CVE,
    MT_ENC_MOY_CVE              AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE              AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE              AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                  AS MT_GLB_IMP,
    MT_GLB_IMP_CVE              AS MT_GLB_IMP_CVE,
    MT_BN_INST                  AS MT_BN_INST,
    MT_BN_INST_CVE              AS MT_BN_INST_CVE,
    MT_BN_NV                    AS MT_BN_NV,
    MT_BN_NV_CVE                AS MT_BN_NV_CVE,
    S10.MT_ENMO + S5.MT_ENC_MOY AS MT_ENC_EFF,
    MT_IMP                      AS MT_IMP,
    MT_PROR                     AS MT_PROR,
    MT_DEM                      AS MT_DEM,
    MT_ITT_M                    AS MT_ITT_M,
    (S6.IDT_ETT_CTR_LIG == '' ? (S6.MT_NAL_BIL + S6.MT_AUT) :'99') AS MT_NAL_LIG;
    
    S26 = UNION S24_1, S25_1;
    
    S20 = GROUP S19 BY IDT_ETT_CTR;
    
    S20_1 = FOREACH S20 GENERATE group as IDT_ETT_CTR,
              CD_SI                        AS CD_SI,  
     IDT_ETT_CTR                            AS IDT_ETT_CTR,
     CD_EFS                         AS CD_EFS,
     MT_DSP                     AS MT_DSP,
     MT_NAL                       AS MT_NAL,
     MT_ENC_MOY                      AS MT_ENC_MOY,
     MT_UTI                      AS MT_UTI,
     MT_CAP_RST_DU                    AS MT_CAP_RST_DU,
     MT_ITT_CRU               AS MT_ITT_CRU,
     MT_CAP_ECN_IMP                 AS MT_CAP_ECN_IMP,
     MT_ITT_IMP               AS MT_ITT_IMP,
    MT_DNR_ECN                  AS MT_DNR_ECN,
    CD_ETT_ORI                  AS CD_ETT_ORI,
    MT_DSP_CVE                  AS MT_DSP_CVE,
    MT_NAL_CVE                  AS MT_NAL_CVE,
    MT_ENC_MOY_CVE              AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE              AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE              AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                  AS MT_GLB_IMP,
    MT_GLB_IMP_CVE              AS MT_GLB_IMP_CVE,
    MT_BN_INST                  AS MT_BN_INST,
    MT_BN_INST_CVE              AS MT_BN_INST_CVE,
    MT_BN_NV                    AS MT_BN_NV,
    MT_BN_NV_CVE                AS MT_BN_NV_CVE,
    S10.MT_ENMO + S5.MT_ENC_MOY AS MT_ENC_EFF,
    MT_IMP                      AS MT_IMP,
    MT_PROR                     AS MT_PROR,
    MT_DEM                      AS MT_DEM,
    MT_ITT_M                    AS MT_ITT_M,
    (S6.IDT_ETT_CTR_LIG == '' ? (S6.MT_NAL_BIL + S6.MT_AUT) :'99') AS MT_NAL_LIG;
    
    S27 = UNION S20_1, S26;
    
    S28 = FOREACH S27 GENERATE
        CD_SI                        AS CD_SI,  
     IDT_ETT_CTR                            AS IDT_ETT_CTR,
     CD_EFS                         AS CD_EFS,
     MT_DSP                     AS MT_DSP,
     MT_NAL                       AS MT_NAL,
     MT_ENC_MOY                      AS MT_ENC_MOY,
     MT_UTI                      AS MT_UTI,
     MT_CAP_RST_DU                    AS MT_CAP_RST_DU,
     MT_ITT_CRU               AS MT_ITT_CRU,
     MT_CAP_ECN_IMP                 AS MT_CAP_ECN_IMP,
     MT_ITT_IMP               AS MT_ITT_IMP,
    MT_DNR_ECN                  AS MT_DNR_ECN,
    CD_ETT_ORI                  AS CD_ETT_ORI,
    MT_DSP_CVE                  AS MT_DSP_CVE,
    MT_NAL_CVE                  AS MT_NAL_CVE,
    MT_ENC_MOY_CVE              AS MT_ENC_MOY_CVE,
    MT_CAP_IMP_CVE              AS MT_CAP_IMP_CVE,
    MT_ITT_IMP_CVE              AS MT_ITT_IMP_CVE,
    MT_GLB_IMP                  AS MT_GLB_IMP,
    MT_GLB_IMP_CVE              AS MT_GLB_IMP_CVE,
    MT_BN_INST                  AS MT_BN_INST,
    MT_BN_INST_CVE              AS MT_BN_INST_CVE,
    MT_BN_NV                    AS MT_BN_NV,
    MT_BN_NV_CVE                AS MT_BN_NV_CVE,
    S10.MT_ENMO + S5.MT_ENC_MOY AS MT_ENC_EFF,
    MT_IMP                      AS MT_IMP,
    MT_PROR                     AS MT_PROR,
    MT_DEM                      AS MT_DEM,
    MT_ITT_M                    AS MT_ITT_M,
    (S6.IDT_ETT_CTR_LIG == '' ? (S6.MT_NAL_BIL + S6.MT_AUT) :'99') AS MT_NAL_LIG;
    
    STORE S28 INTO '$PathDataWorkingFile' USING CSVExcelStorage(',', 'YES_MULTILINE');