Search code examples
awksubstringstring-matchingvcf-variant-call-format

awk print 4 columns and a substring of $8


I have a file in VCF format with several info in 8th column:

# ... rest of file ...
1   11850891    rs753917964 C   T   22276.39    PASS    non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=18390;AF_eas=0.00000e+00;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=1;non_neuro_AN_nfe_female=39830;non_neuro_AF_nfe_female=2.51067e-05;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=16214;non_neuro_AF_afr=0.00000e+00;non_neuro_nhomalt_afr=0;controls_AC_raw=2;controls_AN_raw=109408;controls_AF_raw=1.82802e-05;controls_nhomalt_raw=0;non_cancer_AC_eas=0;non_cancer_AN_eas=17690;non_cancer_AF_eas=0.00000e+00;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=20086;non_cancer_AF_amr_female=0.00000e+00;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=14780;non_neuro_AF_nfe_swe=0.00000e+00;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=2;controls_AN_male=58114;controls_AF_male=3.44151e-05;controls_nhomalt_male=0;non_topmed_AC_male=5;non_topmed_AN_male=133538;non_topmed_AF_male=3.74425e-05;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=114;controls_AF_eas_jpn=0.00000e+00;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=19148;controls_AF_nfe_female=0.00000e+00;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=30522;non_neuro_AF_amr=0.00000e+00;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=6838;non_neuro_AF_eas_female=0.00000e+00;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=5180;AF_asj_male=0.00000e+00;nhomalt_asj_male=0;controls_AC_nfe_male=1;controls_AN_nfe_male=23620;controls_AF_nfe_male=4.23370e-05;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=16732;non_neuro_AF_fin=0.00000e+00;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=2;non_topmed_AN_sas=30616;non_topmed_AF_sas=6.53253e-05;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=1;non_cancer_AN_nfe_female=44296;non_cancer_AF_nfe_female=2.25754e-05;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=2928;AF_oth_female=0.00000e+00;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=9572;non_cancer_AF_asj=0.00000e+00;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=26134;AF_nfe_swe=0.00000e+00;nhomalt_nfe_swe=0;controls_AC_nfe=1;controls_AN_nfe=42768;controls_AF_nfe=2.33820e-05;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=986;controls_AF_oth_female=0.00000e+00;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=2320;controls_AF_asj=0.00000e+00;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=12256;non_neuro_AF_amr_male=0.00000e+00;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=14452;controls_AF_nfe_nwe=0.00000e+00;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=42210;AF_nfe_nwe=0.00000e+00;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=4760;controls_AF_nfe_seu=0.00000e+00;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=4214;controls_AF_sas_female=0.00000e+00;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=18266;non_neuro_AF_amr_female=0.00000e+00;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=124;non_cancer_AF_eas_jpn=0.00000e+00;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=2;non_neuro_AN_nfe_onf=27808;non_neuro_AF_nfe_onf=7.19217e-05;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=9062;non_topmed_AF_eas_male=0.00000e+00;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=152;AF_eas_jpn=0.00000e+00;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=5672;non_cancer_AF_afr_male=0.00000e+00;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=14902;non_cancer_AF_afr=0.00000e+00;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=10226;controls_AF_amr_female=0.00000e+00;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=9082;non_neuro_AF_fin_male=0.00000e+00;non_neuro_nhomalt_fin_male=0;AC_female=1;AN_female=115558;AF_female=8.65366e-06;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=452;non_neuro_AF_nfe_bgr=0.00000e+00;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=2476;non_neuro_AF_oth_male=0.00000e+00;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=238;non_topmed_AF_nfe_est=0.00000e+00;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=41090;non_topmed_AF_nfe_nwe=0.00000e+00;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=14262;non_topmed_AF_amr_male=0.00000e+00;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=34258;non_cancer_AF_amr=0.00000e+00;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=26072;non_topmed_AF_nfe_swe=0.00000e+00;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=2;non_topmed_AN_nfe_onf=30190;non_topmed_AF_nfe_onf=6.62471e-05;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=1888;controls_AF_eas_kor=0.00000e+00;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=14416;non_topmed_AF_eas_oea=0.00000e+00;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=4258;controls_AF_eas_male=0.00000e+00;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=928;controls_AF_oth_male=0.00000e+00;controls_nhomalt_oth_male=0;non_topmed_AC=6;non_topmed_AN=244846;non_topmed_AF=2.45052e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=13392;controls_AF_fin=0.00000e+00;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=3818;AF_eas_kor=0.00000e+00;nhomalt_eas_kor=0;non_neuro_AC_nfe=4;non_neuro_AN_nfe=89556;non_neuro_AF_nfe=4.46648e-05;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=7650;non_neuro_AF_fin_female=0.00000e+00;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=3;non_cancer_AN_nfe_male=58440;non_cancer_AF_nfe_male=5.13347e-05;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=7044;controls_AF_eas_oea=0.00000e+00;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=2;non_topmed_AN_nfe_seu=11408;non_topmed_AF_nfe_seu=1.75316e-04;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=4788;controls_AF_eas_female=0.00000e+00;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=9998;non_topmed_AF_asj=0.00000e+00;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=1;controls_AN_nfe_onf=9998;controls_AF_nfe_onf=1.00020e-04;controls_nhomalt_nfe_onf=0;non_neuro_AC=6;non_neuro_AN=208122;non_neuro_AF=2.88292e-05;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=14420;AF_eas_oea=0.00000e+00;nhomalt_eas_oea=0;non_topmed_AC_nfe=4;non_topmed_AN_nfe=111660;non_topmed_AF_nfe=3.58230e-05;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=5620;non_cancer_AF_oth=0.00000e+00;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=6;non_topmed_AN_raw=244878;non_topmed_AF_raw=2.45020e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=216;non_neuro_AF_nfe_est=0.00000e+00;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=3186;non_topmed_AF_oth_male=0.00000e+00;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=2938;non_cancer_AF_oth_male=0.00000e+00;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=242;AF_nfe_est=0.00000e+00;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=9230;non_cancer_AF_afr_female=0.00000e+00;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=4686;non_topmed_AF_afr_male=0.00000e+00;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=9066;AF_eas_male=0.00000e+00;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=9046;controls_AF_eas=0.00000e+00;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=6574;non_neuro_AF_eas_male=0.00000e+00;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=39490;non_cancer_AF_nfe_nwe=0.00000e+00;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=1;controls_AN_sas=15690;controls_AF_sas=6.37349e-05;controls_nhomalt_sas=0;non_neuro_AC_sas_male=2;non_neuro_AN_sas_male=23066;non_neuro_AF_sas_male=8.67077e-05;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=3126;non_neuro_AF_asj_male=0.00000e+00;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=2526;non_cancer_AF_nfe_bgr=0.00000e+00;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=1914;controls_AF_oth=0.00000e+00;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=8946;non_cancer_AF_eas_female=0.00000e+00;non_cancer_nhomalt_eas_female=0;AC_nfe=4;AN_nfe=113750;AF_nfe=3.51648e-05;nhomalt_nfe=0;non_topmed_AC_female=1;non_topmed_AN_female=111308;non_topmed_AF_female=8.98408e-06;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=6212;non_neuro_AF_asj=0.00000e+00;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=9324;non_topmed_AF_eas_female=0.00000e+00;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=6;non_neuro_AN_raw=208136;non_neuro_AF_raw=2.88273e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=18386;non_topmed_AF_eas=0.00000e+00;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=11272;non_topmed_AF_fin_male=0.00000e+00;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=4976;non_cancer_AF_asj_male=0.00000e+00;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=21646;AF_fin=0.00000e+00;nhomalt_fin=0;AC_nfe_male=3;AN_nfe_male=63598;AF_nfe_male=4.71713e-05;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=3818;non_topmed_AF_eas_kor=0.00000e+00;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=6884;controls_AF_amr_male=0.00000e+00;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=9446;non_neuro_AF_eas_oea=0.00000e+00;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=7544;AF_sas_female=0.00000e+00;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=4240;controls_AF_afr_female=0.00000e+00;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=17110;controls_AF_amr=0.00000e+00;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=152;non_topmed_AF_eas_jpn=0.00000e+00;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=4900;AF_asj_female=0.00000e+00;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=2662;non_topmed_AF_nfe_bgr=0.00000e+00;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=158;non_cancer_AF_nfe_est=0.00000e+00;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non_neuro_AN_eas=13412;non_neuro_AF_eas=0.00000e+00;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=4;non_cancer_AN_nfe=102736;non_cancer_AF_nfe=3.89347e-05;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=5;non_neuro_AN_male=112470;non_neuro_AF_male=4.44563e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=7542;non_neuro_AF_sas_female=0.00000e+00;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=10080;AF_asj=0.00000e+00;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=70;controls_AF_nfe_est=0.00000e+00;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=4854;non_topmed_AF_asj_female=0.00000e+00;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=25290;non_cancer_AF_nfe_swe=0.00000e+00;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=6;non_cancer_AN=236934;non_cancer_AF=2.53235e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=6064;non_topmed_AF_oth=0.00000e+00;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=10372;non_topmed_AF_fin_female=0.00000e+00;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=10362;non_cancer_AF_fin_female=0.00000e+00;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=6140;AF_oth=0.00000e+00;nhomalt_oth=0;non_neuro_AC_nfe_male=3;non_neuro_AN_nfe_male=49726;non_neuro_AF_nfe_male=6.03306e-05;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=51290;controls_AF_female=0.00000e+00;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=21630;non_cancer_AF_fin=0.00000e+00;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=21644;non_topmed_AF_fin=0.00000e+00;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=0;non_cancer_AN_eas_oea=13792;non_cancer_AF_eas_oea=0.00000e+00;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=1;non_topmed_AN_nfe_female=48806;non_topmed_AF_nfe_female=2.04893e-05;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=2;non_cancer_AN_sas_male=23032;non_cancer_AF_sas_male=8.68357e-05;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=1106;controls_AF_asj_male=0.00000e+00;controls_nhomalt_asj_male=0;non_cancer_AC_raw=6;non_cancer_AN_raw=236958;non_cancer_AF_raw=2.53209e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=0;non_cancer_AN_eas_male=8744;non_cancer_AF_eas_male=0.00000e+00;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=5144;non_topmed_AF_asj_male=0.00000e+00;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=4866;non_neuro_AF_oth=0.00000e+00;non_neuro_nhomalt_oth=0;AC_male=5;AN_male=135906;AF_male=3.67901e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=6474;controls_AF_fin_female=0.00000e+00;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=678;controls_AF_nfe_bgr=0.00000e+00;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=1214;controls_AF_asj_female=0.00000e+00;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=14320;AF_amr_male=0.00000e+00;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=20270;AF_amr_female=0.00000e+00;nhomalt_amr_female=0;non_topmed_AC_sas_male=2;non_topmed_AN_sas_male=23072;non_topmed_AF_sas_male=8.66852e-05;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=3212;AF_oth_male=0.00000e+00;nhomalt_oth_male=0;non_cancer_AC_sas=2;non_cancer_AN_sas=30526;non_cancer_AF_sas=6.55179e-05;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=2;non_neuro_AN_nfe_seu=10942;non_neuro_AF_nfe_seu=1.82782e-04;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=3774;controls_AF_sas_male=8.71384e-05;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=7544;non_topmed_AF_sas_female=0.00000e+00;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=12022;non_topmed_AF_afr=0.00000e+00;non_topmed_nhomalt_afr=0;controls_AC=2;controls_AN=109404;controls_AF=1.82809e-05;AN_popmax=30616;AF_popmax=6.53253e-05;nhomalt_popmax=0;non_cancer_AF_eas_kor=0.00000e+00;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=7336;non_topmed_AF_afr_female=0.00000e+00;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=1;controls_AN_sas_male=11476
1   11850892    rs373747884 G   A   34745.78    PASS    non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=114;controls_AF_eas_jpn=0.00000e+00;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=2;controls_AN_nfe_female=19148;controls_AF_nfe_female=1.04450e-04;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=1;non_neuro_AN_amr=30522;non_neuro_AF_amr=3.27633e-05;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=6840;non_neuro_AF_eas_female=0.00000e+00;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=5180;AF_asj_male=0.00000e+00;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=23620;controls_AF_nfe_male=0.00000e+00;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=16734;non_neuro_AF_fin=0.00000e+00;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=2;non_topmed_AN_sas=30614;non_topmed_AF_sas=6.53296e-05;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=4;non_cancer_AN_nfe_female=44298;non_cancer_AF_nfe_female=9.02975e-05;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=2928;AF_oth_female=0.00000e+00;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=9572;non_cancer_AF_asj=0.00000e+00;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=26134;AF_nfe_swe=0.00000e+00;nhomalt_nfe_swe=0;controls_AC_nfe=2;controls_AN_nfe=42768;controls_AF_nfe=4.67639e-05;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=986;controls_AF_oth_female=0.00000e+00;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=2320;controls_AF_asj=0.00000e+00;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=12256;non_neuro_AF_amr_male=0.00000e+00;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=2;controls_AN_nfe_nwe=14452;controls_AF_nfe_nwe=1.38389e-04;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=4;AN_nfe_nwe=42212;AF_nfe_nwe=9.47598e-05;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=4760;controls_AF_nfe_seu=0.00000e+00;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=1;controls_AN_sas_female=4212;controls_AF_sas_female=2.37417e-04;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=1;non_neuro_AN_amr_female=18266;non_neuro_AF_amr_female=5.47465e-05;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=124;non_cancer_AF_eas_jpn=0.00000e+00;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=1;non_neuro_AN_nfe_onf=27808;non_neuro_AF_nfe_onf=3.59609e-05;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=2;non_topmed_AN_eas_male=9062;non_topmed_AF_eas_male=2.20702e-04;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=152;AF_eas_jpn=0.00000e+00;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=5672;non_cancer_AF_afr_male=0.00000e+00;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=1;non_cancer_AN_afr=14902;non_cancer_AF_afr=6.71051e-05;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=10226;controls_AF_amr_female=0.00000e+00;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=9084;non_neuro_AF_fin_male=0.00000e+00;non_neuro_nhomalt_fin_male=0;AC_female=7;AN_female=115558;AF_female=6.05756e-05;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=452;non_neuro_AF_nfe_bgr=0.00000e+00;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=2476;non_neuro_AF_oth_male=0.00000e+00;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=238;non_topmed_AF_nfe_est=0.00000e+00;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=4;non_topmed_AN_nfe_nwe=41092;non_topmed_AF_nfe_nwe=9.73425e-05;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=14262;non_topmed_AF_amr_male=0.00000e+00;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=1;non_cancer_AN_amr=34258;non_cancer_AF_amr=2.91903e-05;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=26072;non_topmed_AF_nfe_swe=0.00000e+00;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=1;non_topmed_AN_nfe_onf=30190;non_topmed_AF_nfe_onf=3.31236e-05;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=1888;controls_AF_eas_kor=0.00000e+00;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=2;non_topmed_AN_eas_oea=14418;non_topmed_AF_eas_oea=1.38715e-04;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=2;controls_AN_eas_male=4258;controls_AF_eas_male=4.69704e-04;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=928;controls_AF_oth_male=0.00000e+00;controls_nhomalt_oth_male=0;non_topmed_AC=11;non_topmed_AN=244848;non_topmed_AF=4.49258e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=13394;controls_AF_fin=0.00000e+00;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=3818;AF_eas_kor=0.00000e+00;nhomalt_eas_kor=0;non_neuro_AC_nfe=4;non_neuro_AN_nfe=89554;non_neuro_AF_nfe=4.46658e-05;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=7650;non_neuro_AF_fin_female=0.00000e+00;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=1;non_cancer_AN_nfe_male=58442;non_cancer_AF_nfe_male=1.71110e-05;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=2;controls_AN_eas_oea=7044;controls_AF_eas_oea=2.83930e-04;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=11408;non_topmed_AF_nfe_seu=0.00000e+00;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=4788;controls_AF_eas_female=0.00000e+00;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=9998;non_topmed_AF_asj=0.00000e+00;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=9998;controls_AF_nfe_onf=0.00000e+00;controls_nhomalt_nfe_onf=0;non_neuro_AC=9;non_neuro_AN=208120;non_neuro_AF=4.32443e-05;non_neuro_nhomalt=0;AC_eas_oea=2;AN_eas_oea=14422;AF_eas_oea=1.38677e-04;nhomalt_eas_oea=0;non_topmed_AC_nfe=5;non_topmed_AN_nfe=111662;non_topmed_AF_nfe=4.47780e-05;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=5620;non_cancer_AF_oth=0.00000e+00;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=11;non_topmed_AN_raw=244878;non_topmed_AF_raw=4.49203e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=216;non_neuro_AF_nfe_est=0.00000e+00;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=3186;non_topmed_AF_oth_male=0.00000e+00;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=2938;non_cancer_AF_oth_male=0.00000e+00;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=242;AF_nfe_est=0.00000e+00;nhomalt_nfe_est=0;non_cancer_AC_afr_female=1;non_cancer_AN_afr_female=9230;non_cancer_AF_afr_female=1.08342e-04;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=4686;non_topmed_AF_afr_male=0.00000e+00;non_topmed_nhomalt_afr_male=0;AC_eas_male=2;AN_eas_male=9066;AF_eas_male=2.20604e-04;nhomalt_eas_male=0;controls_AC_eas=2;controls_AN_eas=9046;controls_AF_eas=2.21092e-04;controls_nhomalt_eas=0;non_neuro_AC_eas_male=1;non_neuro_AN_eas_male=6574;non_neuro_AF_eas_male=1.52114e-04;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=4;non_cancer_AN_nfe_nwe=39494;non_cancer_AF_nfe_nwe=1.01281e-04;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=1;controls_AN_sas=15688;controls_AF_sas=6.37430e-05;controls_nhomalt_sas=0;non_neuro_AC_sas_male=1;non_neuro_AN_sas_male=23066;non_neuro_AF_sas_male=4.33539e-05;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=3126;non_neuro_AF_asj_male=0.00000e+00;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=2526;non_cancer_AF_nfe_bgr=0.00000e+00;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=1914;controls_AF_oth=0.00000e+00;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=8948;non_cancer_AF_eas_female=0.00000e+00;non_cancer_nhomalt_eas_female=0;AC_nfe=5;AN_nfe=113752;AF_nfe=4.39553e-05;nhomalt_nfe=0;non_topmed_AC_female=7;non_topmed_AN_female=111308;non_topmed_AF_female=6.28886e-05;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=6212;non_neuro_AF_asj=0.00000e+00;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=9326;non_topmed_AF_eas_female=0.00000e+00;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=9;non_neuro_AN_raw=208136;non_neuro_AF_raw=4.32410e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=2;non_topmed_AN_eas=18388;non_topmed_AF_eas=1.08767e-04;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=11274;non_topmed_AF_fin_male=0.00000e+00;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=4976;non_cancer_AF_asj_male=0.00000e+00;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=21648;AF_fin=0.00000e+00;nhomalt_fin=0;AC_nfe_male=1;AN_nfe_male=63598;AF_nfe_male=1.57238e-05;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=3818;non_topmed_AF_eas_kor=0.00000e+00;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=6884;controls_AF_amr_male=0.00000e+00;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=1;non_neuro_AN_eas_oea=9448;non_neuro_AF_eas_oea=1.05843e-04;non_neuro_nhomalt_eas_oea=0;AC_sas_female=1;AN_sas_female=7542;AF_sas_female=1.32591e-04;nhomalt_sas_female=0;controls_AC_afr_female=1;controls_AN_afr_female=4240;controls_AF_afr_female=2.35849e-04;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=17110;controls_AF_amr=0.00000e+00;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=152;non_topmed_AF_eas_jpn=0.00000e+00;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=4900;AF_asj_female=0.00000e+00;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=2662;non_topmed_AF_nfe_bgr=0.00000e+00;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=158;non_cancer_AF_nfe_est=0.00000e+00;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=1;non_neuro_AN_eas=13414;non_neuro_AF_eas=7.45490e-05;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=5;non_cancer_AN_nfe=102740;non_cancer_AF_nfe=4.86665e-05;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=3;non_neuro_AN_male=112470;non_neuro_AF_male=2.66738e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=1;non_neuro_AN_sas_female=7540;non_neuro_AF_sas_female=1.32626e-04;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=10080;AF_asj=0.00000e+00;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=70;controls_AF_nfe_est=0.00000e+00;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=4854;non_topmed_AF_asj_female=0.00000e+00;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=25290;non_cancer_AF_nfe_swe=0.00000e+00;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=11;non_cancer_AN=236940;non_cancer_AF=4.64253e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=6064;non_topmed_AF_oth=0.00000e+00;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=10372;non_topmed_AF_fin_female=0.00000e+00;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=10362;non_cancer_AF_fin_female=0.00000e+00;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=6140;AF_oth=0.00000e+00;nhomalt_oth=0;non_neuro_AC_nfe_male=1;non_neuro_AN_nfe_male=49724;non_neuro_AF_nfe_male=2.01110e-05;non_neuro_nhomalt_nfe_male=0;controls_AC_female=4;controls_AN_female=51288;controls_AF_female=7.79910e-05;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=21632;non_cancer_AF_fin=0.00000e+00;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=21646;non_topmed_AF_fin=0.00000e+00;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=2;non_cancer_AN_eas_oea=13794;non_cancer_AF_eas_oea=1.44991e-04;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=4;non_topmed_AN_nfe_female=48808;non_topmed_AF_nfe_female=8.19538e-05;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=1;non_cancer_AN_sas_male=23032;non_cancer_AF_sas_male=4.34179e-05;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=1106;controls_AF_asj_male=0.00000e+00;controls_nhomalt_asj_male=0;non_cancer_AC_raw=11;non_cancer_AN_raw=236958;non_cancer_AF_raw=4.64217e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=2;non_cancer_AN_eas_male=8744;non_cancer_AF_eas_male=2.28728e-04;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=5144;non_topmed_AF_asj_male=0.00000e+00;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=4866;non_neuro_AF_oth=0.00000e+00;non_neuro_nhomalt_oth=0;AC_male=4;AN_male=135908;AF_male=2.94317e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=6474;controls_AF_fin_female=0.00000e+00;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=678;controls_AF_nfe_bgr=0.00000e+00;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=1214;controls_AF_asj_female=0.00000e+00;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=14320;AF_amr_male=0.00000e+00;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=20270;AF_amr_female=4.93340e-05;nhomalt_amr_female=0;non_topmed_AC_sas_male=1;non_topmed_AN_sas_male=23072;non_topmed_AF_sas_male=4.33426e-05;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=3212;AF_oth_male=0.00000e+00;nhomalt_oth_male=0;non_cancer_AC_sas=2;non_cancer_AN_sas=30524;non_cancer_AF_sas=6.55222e-05;controls_AF_sas_male=0.00000e+00;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=1;non_topmed_AN_sas_female=7542;non_topmed_AF_sas_female=1.32591e-04;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=1;non_topmed_AN_afr=12020;non_topmed_AF_afr=8.31947e-05;non_topmed_nhomalt_afr=0;controls_AC=6;controls_AN=109404;controls_AF=5.48426e-05;AF_popmax=1.08743e-04;nhomalt_popmax=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=10942;non_neuro_AF_nfe_seu=0.00000e+00;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=3774;non_cancer_AF_eas_kor=0.00000e+00;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=1;non_topmed_AN_afr_female=7334;non_topmed_AF_afr_female=1.36351e-04;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=11476
# ... rest of file ...

I need to merge a tabix command with an awk to print $1,$2,$4,$5 and a substring of $8 (only the number after AF_popmax= and the number after nhomalt_popmax=), as example:

# ...
1   11850891    C   T    AF_popmax=6.53253e-05;nhomalt_popmax=0
1   11850892    G   A    AF_popmax=1.08743e-04;nhomalt_popmax=0
# ...

I tried this command:

tabix file_input.vcf.bgz 1:11850891-55525202 | awk '{$8=substr(/;AF_popmax=[^;]*/,""); print $1,$2,$4,$5,$8}'

but I only get a 0/1 incolumn $8:

1 11850891 C T 1
1 11850892 G A 1

Does anybody got a clue?

Thank you very much in advance for any help (other approaches are welcomed)


Solution

  • I will try to explain you a very generic way to do this which you can adapt easily if you want to print out other stuff.

    Assume you have a string which has a format like this:

    key1=value1;key2=value2;key3=value3
    

    and you would like to make a selection or some operations with these values, then the easiest is to store these in an associative array such that we have:

    array["key1"] => value1
    array["key2"] => value2
    array["key3"] => value3
    array["key1","full"] => "key1=value1"
    array["key2","full"] => "key2=value2"
    array["key3","full"] => "key3=value3"
    

    you can use the following function for that:

    function str2map(str,fs1,fs2,map,   n,tmp) {
       n=split(str,map,fs1)
       for (;n>0;n--) { 
         split(map[n],tmp,fs2);
         map[tmp[1]]=tmp[2]; map[tmp[1],"full"]=map[n]
         delete map[n]
       }
    }
    

    And this leads to the following awk program:

    awk '
        function str2map(str,fs1,fs2,map,   n,tmp) {
           n=split(str,map,fs1)
           for (;n>0;n--) { 
             split(map[n],tmp,fs2);
             map[tmp[1]]=tmp[2]; map[tmp[1],"full"]=map[n]
             delete map[n]
           }
        }
        { str2map($8,";","=",map) }
        { print $1,$2,$4,$5,map["AF_popmax","full"] ";" map["nhomalt_popmax","full"] }
       ' file
    

    This outputs

    1 11850891 C T AF_popmax=6.53253e-05;nhomalt_popmax=0
    1 11850892 G A AF_popmax=1.08743e-04;nhomalt_popmax=0
    

    The advantage of this method is that you can easily adapt your code to print any other key you are interested in, or even make selections based on this.