Search code examples
rdataframeanova

Splitting up a big data-frame into smaller subset column wise


I'm trying to run anova on multiple Principal component with different categorical as well continuous variables using all possible combination.

The dimensions of my data frame is

dim(tcga_mrna.pcs55)
[1] 147  67

The no of combination of models i have to test is this 112585

which was generated from this

frms <- with(expand.grid(dv, rhs), paste(Var1, Var2, sep = ' ~ '))

Now I tried to run it once It was stuck for quite a while so i had to abort it give my computational resources.

Therefore I think if I split my data frame into smaller data frame where I want to keep all the predictors constant but I would like to break the other columns into small subset.

My data small subset

 dput(head(tcga_mrna_pcs55))
structure(list(Sample_ID = c("TCGA-AB-2856", "TCGA-AB-2849", 
"TCGA-AB-2971", "TCGA-AB-2930", "TCGA-AB-2891", "TCGA-AB-2872"
), FAB = c("M4", "M0", "M4", "M2", "M1", "M3"), prior_malignancy = c("no", 
"no", "no", "no", "no", "no"), Age = c(63, 39, 76, 62, 42, 42
), BM_percentage = c(82, 83, 91, 72, 68, 88), Cytogenetic_Code = c("Normal Karyotype", 
"Complex Cytogenetics", "Normal Karyotype", "Normal Karyotype", 
"Complex Cytogenetics", "PML-RARA"), Histologic_Subtype = c("NUP98 Translocation", 
"Complex Cytogenetics", "Normal Karyotype", "NUP98 Translocation", 
"Complex Cytogenetics", "PML-RARA"), Risk_Cyto = c("Intermediate", 
"Poor", "Intermediate", "Intermediate", "Poor", "Good"), Risk_Molecular = c("Poor", 
"Poor", "Intermediate", "Poor", "Poor", "Good"), Sex = c("Male", 
"Male", "Female", "Female", "Male", "Male"), TMB = c(0, 0.733333333333, 
0.3, 0.266666666667, 0.466666666667, 0.333333333333), WBC = c(76.7, 
5, 5, 27.7, 10.7, 2.1), PC1 = c(-25.4243169876343, 38.5584419151387, 
-18.8838255683554, 3.773812175371, -5.02868029999407, 21.4658284982092
), PC2 = c(14.4895578447888, -27.8233346053999, -0.318074813205288, 
6.17043126174388, -9.29150756229324, 35.1156168048889), PC3 = c(-10.6509445605983, 
28.0996432599761, 5.88270605324811, -26.4971717145656, -0.896362785151599, 
23.2794429531062), PC4 = c(1.18248804745738, -21.0145760152975, 
-13.6652202316835, 4.64544888299446, 6.10552116611012, 1.085498115633
), PC5 = c(-14.8325881422899, 17.8653710387376, 8.90002489087104, 
-0.550793434039587, 5.90790796345414, 13.7446793572887), PC6 = c(0.695367268633542, 
-7.46255391237719, -9.48973541984696, 5.27626778248046, 2.85645531301921, 
-2.5417697261715), PC7 = c(-16.7000152968204, 14.3887321471474, 
16.0657716315069, -9.86610587188809, -8.27832660111485, -3.14876491002283
), PC8 = c(2.79822148585397, -6.63528657940777, -12.8725509038156, 
-2.17579923819722, -12.5781664467208, -2.90943809569856), PC9 = c(-7.05331558116121, 
-12.1985749853038, 4.10613337565274, -20.0374908146072, -13.4276520442583, 
-2.77032899744962), PC10 = c(13.2132444645362, -2.82152344784948, 
-8.00771994862333, 5.3333694628255, -6.78114804624295, -5.63354620465723
), PC11 = c(-1.79050241538047, -6.57822316228283, -4.20132241912175, 
4.51589800987586, -1.67953673784626, 3.75349242056027), PC12 = c(7.83152902157972, 
-19.5950183628134, -9.38164109885085, 16.3690122002304, 0.0735031667926224, 
2.32446981112219), PC13 = c(-5.25219547328429, -7.13380025578665, 
6.09600053996671, -7.11925980557811, -5.61967462665635, -9.80647746645279
), PC14 = c(1.45188764160216, -25.5978607332207, 18.3643001800981, 
4.7265900178811, -15.071134439125, 11.3956478391763), PC15 = c(-7.3393199774991, 
-33.112294903764, -4.10920083616075, -11.3366588668303, 2.5968258382962, 
14.4766162599917), PC16 = c(0.529278749351839, -20.0921377085554, 
9.88228975185339, -0.264632117869371, 4.39109257712349, 17.8403742741107
), PC17 = c(-5.79919206631477, -34.4597935232432, -0.284077310829092, 
-1.45723530362592, 8.066297152665, -4.36479763922708), PC18 = c(6.16739223066386, 
-0.668191107754327, 7.17864592583405, 1.10258322969635, -2.88635363509576, 
-3.55077626222531), PC19 = c(-2.46075725680638, 11.2317147986833, 
10.7210109810505, -1.86175537360617, 9.00649577117842, -5.20964171868026
), PC20 = c(0.447290924483848, 0.882697730068387, -1.64992531160428, 
3.69926682756107, -8.45636279736397, 12.0178514144455), PC21 = c(7.77512402052619, 
-13.723689855566, 0.929876575603838, 7.20400850159562, -0.614055839592973, 
-6.15633968149479), PC22 = c(-1.56535673338356, -13.2971868706006, 
1.87562172644287, -3.28771663165701, -5.64722916304599, 0.636358407474463
), PC23 = c(0.164107670637167, -15.2249958235848, 8.00555210033773, 
2.0662276295149, 7.73028430813706, -2.32179860594496), PC24 = c(-1.8934805361982, 
8.21971891071679, 3.08512611513449, -0.628702548440314, -0.233105377199397, 
2.87674317483379), PC25 = c(0.893451809081066, 6.60513492724147, 
8.88171627539804, 2.97249584622476, -17.4778489423161, -4.58539478100194
), PC26 = c(-1.32955071985976, 11.9145713692928, -3.79820868194203, 
4.91276198192432, 1.14456788292366, 9.69280466752626), PC27 = c(5.80488907470531, 
-9.84420624259338, 2.14543167774679, -3.04254310413812, 5.7902970935943, 
-3.75331337674036), PC28 = c(-8.18472344420157, 1.65255506997329, 
7.07760527456274, -6.32026527255729, -4.33442214041778, -6.65351307662841
), PC29 = c(1.75032780020844, 15.5611773097845, -2.52903882532741, 
2.53566972972068, 6.44542594461733, -2.73677227120317), PC30 = c(-0.862387620806526, 
-14.0405815436268, -7.08059737134561, -0.429947697667332, -4.93506927070922, 
-7.24877851150857), PC31 = c(5.04914290995488, 1.94876316261089, 
-1.44943546186944, 0.589695885543367, 7.55928674782029, -2.70932468259665
), PC32 = c(-0.331134735300882, 6.19579420256524, -1.11785338261286, 
-1.29691032897408, 20.2001081109543, 7.8570225951223), PC33 = c(4.89375087245026, 
6.48463626836495, 6.73612277868434, 4.24109357290756, 1.02817278604743, 
0.680027817141749), PC34 = c(-0.800041139194579, -1.88905732488826, 
1.7772915935601, -0.499932283505083, 10.7430548643924, -6.53775164240871
), PC35 = c(5.12118821250308, -3.98313005901599, -4.52005990894197, 
-3.07369863487262, 3.92078873433114, -2.18933519508166), PC36 = c(-2.54985917927219, 
-1.70921978278497, -2.44961274490961, 1.56802927495698, 7.08687990990386, 
-0.604700521943517), PC37 = c(5.1747232970747, -5.34247962945995, 
-1.83839184464979, 6.70262336281884, -1.10932786180704, -3.25652639774021
), PC38 = c(-4.18410989825183, -6.98950710609193, 0.866526234992652, 
-0.0950366191443256, 3.35399502292955, 2.90766983495248), PC39 = c(2.46730811173428, 
-0.455543469604487, -4.63050936679246, -1.34675190382428, -6.1200022250839, 
-3.40619104956874), PC40 = c(-0.731471474196848, -4.24515300461387, 
-3.43245666463953, 3.70020703587818, -8.76472221293956, -1.1281798870577
), PC41 = c(-3.79301551015471, -5.25686203441764, 6.76297802293118, 
-3.68970972173239, 4.35055761452324, -18.4180107861132), PC42 = c(4.83388024710314, 
-0.25083519933247, -3.21152818097955, 5.96597185780427, 4.19254774340514, 
-8.18426155110418), PC43 = c(-0.217047959384719, -1.13621909801165, 
-4.4592933756817, -6.96360564960356, 2.27400449542372, -2.86813634075033
), PC44 = c(-3.33545179774935, 6.11834882717519, -0.264585462886141, 
-7.6792938724774, -3.99915221656525, -2.5294702493956), PC45 = c(2.77954857939566, 
7.82470034842594, -3.52534065178766, -2.56221337540028, 7.09562358045148, 
-1.49373245991455), PC46 = c(-1.60423065922446, -0.428508391589366, 
4.03490498808649, 2.12844259167901, -1.3678347436909, -6.13180626071563
), PC47 = c(-3.20068124812043, 5.06644140525654, 7.37963017443048, 
-4.84325578581087, -17.680506272578, 0.560814898057312), PC48 = c(2.91858197345977, 
-1.11915083153502, 3.47278363466071, 1.21240736359339, -5.58511090848592, 
5.52652026954627), PC49 = c(3.84744380211926, 0.861663719832773, 
-1.40060221851844, 1.62791310594578, -2.52243080963911, 0.361029214307694
), PC50 = c(5.15785104158866, -0.319668135009027, 4.80115302565519, 
4.45746767521537, 2.76979916871901, -10.7678984312634), PC51 = c(-6.22760710964996, 
-3.55897006680048, -1.68421228474145, -1.51499187118043, 4.69802013777757, 
-7.25050359857057), PC52 = c(-2.26345921059907, 3.60461592062774, 
-1.37792205061882, 8.69053064558714, -10.7983766769631, -2.63687558522692
), PC53 = c(-1.65172511606967, 0.118920655863908, 6.29953754003559, 
-3.16092526827426, -3.64199764016276, -6.98013560579073), PC54 = c(6.17213064069784, 
3.78913668381605, 5.94121227070784, 1.6838389802013, 2.47727981128471, 
1.71804579216696), PC55 = c(-3.7893860872842, -0.325634230487849, 
-5.98312342448493, -5.37971579967361, -6.71876005026094, -4.19058766854014
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))

So here the first 12 columns I want to keep constant while adding PC1 to PC10 in my first subset. Similarly I would keep the first 12 again constant then add PC11 to PC20 , this way small subset of data-frame till my last column with first 11 such as this as constant for each subset of dataframe.

[1] "FAB"                "prior_malignancy"   "Age"                "BM_percentage"      "Cytogenetic_Code"   "Histologic_Subtype"
 [7] "Risk_Cyto"          "Risk_Molecular"     "Sex"                "TMB"                "WBC" 

Sample_ID FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code Histologic_Subt… Risk_Cyto Risk_Molecular Sex     TMB   WBC    PC1     PC2
  <chr>     <chr> <chr>            <dbl>         <dbl> <chr>            <chr>            <chr>     <chr>          <chr> <dbl> <dbl>  <dbl>   <dbl>
1 TCGA-AB-… M4    no                  63            82 Normal Karyotype NUP98 Transloca… Intermed… Poor           Male  0      76.7 -25.4   14.5  
2 TCGA-AB-… M0    no                  39            83 Complex Cytogen… Complex Cytogen… Poor      Poor           Male  0.733   5    38.6  -27.8  
3 TCGA-AB-… M4    no                  76            91 Normal Karyotype Normal Karyotype Intermed… Intermediate   Fema… 0.3     5   -18.9   -0.318
4 TCGA-AB-… M2    no                  62            72 Normal Karyotype NUP98 Transloca… Intermed… Poor           Fema… 0.267  27.7   3.77   6.17 
5 TCGA-AB-… M1    no                  42            68 Complex Cytogen… Complex Cytogen… Poor      Poor           Male  0.467  10.7  -5.03  -9.29 
6 TCGA-AB-… M3    no                  42            88 PML-RARA         PML-RARA         Good      Good           Male  0.333   2.1  21.5   35.1 

My objective is to run to run this since with such huge no of combination its taking a lot of time, so in a crude way i thought if the data frame can be split it would be easier to run. If there is faster way to execute the below code i would be glad to know.

Any help or suggestion are really appreciated.

models <- lapply(frms, function(x) anova(lm(x, data = tcga_mrna.pcs55)))

Solution

  • Here is a try! I searched a lot but was not able to find a simple solution So this is a sugesstion how you could bring your shorter dataframes in a list. It is tedious but once you got a list, you could apply your operations to each element of the list:

    The nearest solution I found was here: R: Splitting dataframe columnwise. But here only one column is added to the constant columns!

    library(dplyr)
    
    col1_12 <- df %>% 
      select(1:12)
    
    PC1_PC10 <- df %>% 
      select(1, 13:22) %>% 
      right_join(col1_12, by = "Sample_ID")
    PC11_PC20 <- df %>% 
      select(1, 23:32) %>% 
      right_join(col1_12, by = "Sample_ID")
    PC21_PC30 <- df %>% 
      select(1, 33:42) %>% 
      right_join(col1_12, by = "Sample_ID")
    PC31_PC40 <- df %>% 
      select(1, 43:52) %>% 
      right_join(col1_12, by = "Sample_ID")
    PC41_PC50 <- df %>% 
      select(1, 53:62) %>% 
      right_join(col1_12, by = "Sample_ID")
    PC51_PC55 <- df %>% 
      select(1, 63:67) %>% 
      right_join(col1_12, by = "Sample_ID")
    
    list_of_dfs <- list(PC1_PC10, PC11_PC20, PC21_PC30,
                        PC31_PC41, PC41_PC50, PC51_PC55)
    
    list_of_dfs
    

    output:

    > list_of_dfs
    [[1]]
    # A tibble: 6 x 22
      Sample_ID       PC1     PC2     PC3    PC4     PC5    PC6    PC7    PC8    PC9  PC10 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code     Histologic_Subtype  Risk_Cyto Risk_Molecular Sex     TMB   WBC
      <chr>         <dbl>   <dbl>   <dbl>  <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <chr> <chr>            <dbl>         <dbl> <chr>                <chr>               <chr>     <chr>          <chr> <dbl> <dbl>
    1 TCGA-AB-2856 -25.4   14.5   -10.7     1.18 -14.8    0.695 -16.7    2.80  -7.05 13.2  M4    no                  63            82 Normal Karyotype     NUP98 Translocation Intermed~ Poor           Male  0      76.7
    2 TCGA-AB-2849  38.6  -27.8    28.1   -21.0   17.9   -7.46   14.4   -6.64 -12.2  -2.82 M0    no                  39            83 Complex Cytogenetics Complex Cytogeneti~ Poor      Poor           Male  0.733   5  
    3 TCGA-AB-2971 -18.9   -0.318   5.88  -13.7    8.90  -9.49   16.1  -12.9    4.11 -8.01 M4    no                  76            91 Normal Karyotype     Normal Karyotype    Intermed~ Intermediate   Fema~ 0.3     5  
    4 TCGA-AB-2930   3.77   6.17  -26.5     4.65  -0.551  5.28   -9.87  -2.18 -20.0   5.33 M2    no                  62            72 Normal Karyotype     NUP98 Translocation Intermed~ Poor           Fema~ 0.267  27.7
    5 TCGA-AB-2891  -5.03  -9.29   -0.896   6.11   5.91   2.86   -8.28 -12.6  -13.4  -6.78 M1    no                  42            68 Complex Cytogenetics Complex Cytogeneti~ Poor      Poor           Male  0.467  10.7
    6 TCGA-AB-2872  21.5   35.1    23.3     1.09  13.7   -2.54   -3.15  -2.91  -2.77 -5.63 M3    no                  42            88 PML-RARA             PML-RARA            Good      Good           Male  0.333   2.1
    
    [[2]]
    # A tibble: 6 x 22
      Sample_ID     PC11     PC12  PC13   PC14   PC15    PC16    PC17   PC18  PC19   PC20 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code     Histologic_Subtype   Risk_Cyto Risk_Molecular Sex     TMB   WBC
      <chr>        <dbl>    <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>  <dbl> <dbl>  <dbl> <chr> <chr>            <dbl>         <dbl> <chr>                <chr>                <chr>     <chr>          <chr> <dbl> <dbl>
    1 TCGA-AB-2856 -1.79   7.83   -5.25   1.45  -7.34   0.529  -5.80   6.17  -2.46  0.447 M4    no                  63            82 Normal Karyotype     NUP98 Translocation  Intermed~ Poor           Male  0      76.7
    2 TCGA-AB-2849 -6.58 -19.6    -7.13 -25.6  -33.1  -20.1   -34.5   -0.668 11.2   0.883 M0    no                  39            83 Complex Cytogenetics Complex Cytogenetics Poor      Poor           Male  0.733   5  
    3 TCGA-AB-2971 -4.20  -9.38    6.10  18.4   -4.11   9.88   -0.284  7.18  10.7  -1.65  M4    no                  76            91 Normal Karyotype     Normal Karyotype     Intermed~ Intermediate   Fema~ 0.3     5  
    4 TCGA-AB-2930  4.52  16.4    -7.12   4.73 -11.3   -0.265  -1.46   1.10  -1.86  3.70  M2    no                  62            72 Normal Karyotype     NUP98 Translocation  Intermed~ Poor           Fema~ 0.267  27.7
    5 TCGA-AB-2891 -1.68   0.0735 -5.62 -15.1    2.60   4.39    8.07  -2.89   9.01 -8.46  M1    no                  42            68 Complex Cytogenetics Complex Cytogenetics Poor      Poor           Male  0.467  10.7
    6 TCGA-AB-2872  3.75   2.32   -9.81  11.4   14.5   17.8    -4.36  -3.55  -5.21 12.0   M3    no                  42            88 PML-RARA             PML-RARA             Good      Good           Male  0.333   2.1
    
    [[3]]
    # A tibble: 6 x 22
      Sample_ID       PC21    PC22    PC23   PC24    PC25  PC26  PC27  PC28  PC29    PC30 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code     Histologic_Subtype   Risk_Cyto Risk_Molecular Sex     TMB   WBC
      <chr>          <dbl>   <dbl>   <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <chr> <chr>            <dbl>         <dbl> <chr>                <chr>                <chr>     <chr>          <chr> <dbl> <dbl>
    1 TCGA-AB-2856   7.78   -1.57    0.164 -1.89    0.893 -1.33  5.80 -8.18  1.75  -0.862 M4    no                  63            82 Normal Karyotype     NUP98 Translocation  Intermed~ Poor           Male  0      76.7
    2 TCGA-AB-2849 -13.7   -13.3   -15.2    8.22    6.61  11.9  -9.84  1.65 15.6  -14.0   M0    no                  39            83 Complex Cytogenetics Complex Cytogenetics Poor      Poor           Male  0.733   5  
    3 TCGA-AB-2971   0.930   1.88    8.01   3.09    8.88  -3.80  2.15  7.08 -2.53  -7.08  M4    no                  76            91 Normal Karyotype     Normal Karyotype     Intermed~ Intermediate   Fema~ 0.3     5  
    4 TCGA-AB-2930   7.20   -3.29    2.07  -0.629   2.97   4.91 -3.04 -6.32  2.54  -0.430 M2    no                  62            72 Normal Karyotype     NUP98 Translocation  Intermed~ Poor           Fema~ 0.267  27.7
    5 TCGA-AB-2891  -0.614  -5.65    7.73  -0.233 -17.5    1.14  5.79 -4.33  6.45  -4.94  M1    no                  42            68 Complex Cytogenetics Complex Cytogenetics Poor      Poor           Male  0.467  10.7
    6 TCGA-AB-2872  -6.16    0.636  -2.32   2.88   -4.59   9.69 -3.75 -6.65 -2.74  -7.25  M3    no                  42            88 PML-RARA             PML-RARA             Good      Good           Male  0.333   2.1
    
    [[4]]
    # A tibble: 6 x 25
      Sample_ID      PC31   PC32  PC33   PC34  PC35   PC36  PC37    PC38   PC39   PC40   PC41   PC42   PC43 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code   Histologic_Subt~ Risk_Cyto Risk_Molecular Sex  
      <chr>         <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <chr> <chr>            <dbl>         <dbl> <chr>              <chr>            <chr>     <chr>          <chr>
    1 TCGA-AB-2856  5.05  -0.331 4.89  -0.800  5.12 -2.55   5.17 -4.18    2.47  -0.731  -3.79  4.83  -0.217 M4    no                  63            82 Normal Karyotype   NUP98 Transloca~ Intermed~ Poor           Male 
    2 TCGA-AB-2849  1.95   6.20  6.48  -1.89  -3.98 -1.71  -5.34 -6.99   -0.456 -4.25   -5.26 -0.251 -1.14  M0    no                  39            83 Complex Cytogenet~ Complex Cytogen~ Poor      Poor           Male 
    3 TCGA-AB-2971 -1.45  -1.12  6.74   1.78  -4.52 -2.45  -1.84  0.867  -4.63  -3.43    6.76 -3.21  -4.46  M4    no                  76            91 Normal Karyotype   Normal Karyotype Intermed~ Intermediate   Fema~
    4 TCGA-AB-2930  0.590 -1.30  4.24  -0.500 -3.07  1.57   6.70 -0.0950 -1.35   3.70   -3.69  5.97  -6.96  M2    no                  62            72 Normal Karyotype   NUP98 Transloca~ Intermed~ Poor           Fema~
    5 TCGA-AB-2891  7.56  20.2   1.03  10.7    3.92  7.09  -1.11  3.35   -6.12  -8.76    4.35  4.19   2.27  M1    no                  42            68 Complex Cytogenet~ Complex Cytogen~ Poor      Poor           Male 
    6 TCGA-AB-2872 -2.71   7.86  0.680 -6.54  -2.19 -0.605 -3.26  2.91   -3.41  -1.13  -18.4  -8.18  -2.87  M3    no                  42            88 PML-RARA           PML-RARA         Good      Good           Male 
    # ... with 2 more variables: TMB <dbl>, WBC <dbl>
    
    [[5]]
    # A tibble: 6 x 22
      Sample_ID      PC41   PC42   PC43   PC44  PC45   PC46    PC47  PC48   PC49    PC50 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code     Histologic_Subtype   Risk_Cyto  Risk_Molecular Sex     TMB   WBC
      <chr>         <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>   <dbl> <dbl>  <dbl>   <dbl> <chr> <chr>            <dbl>         <dbl> <chr>                <chr>                <chr>      <chr>          <chr> <dbl> <dbl>
    1 TCGA-AB-2856  -3.79  4.83  -0.217 -3.34   2.78 -1.60   -3.20   2.92  3.85    5.16  M4    no                  63            82 Normal Karyotype     NUP98 Translocation  Intermedi~ Poor           Male  0      76.7
    2 TCGA-AB-2849  -5.26 -0.251 -1.14   6.12   7.82 -0.429   5.07  -1.12  0.862  -0.320 M0    no                  39            83 Complex Cytogenetics Complex Cytogenetics Poor       Poor           Male  0.733   5  
    3 TCGA-AB-2971   6.76 -3.21  -4.46  -0.265 -3.53  4.03    7.38   3.47 -1.40    4.80  M4    no                  76            91 Normal Karyotype     Normal Karyotype     Intermedi~ Intermediate   Fema~ 0.3     5  
    4 TCGA-AB-2930  -3.69  5.97  -6.96  -7.68  -2.56  2.13   -4.84   1.21  1.63    4.46  M2    no                  62            72 Normal Karyotype     NUP98 Translocation  Intermedi~ Poor           Fema~ 0.267  27.7
    5 TCGA-AB-2891   4.35  4.19   2.27  -4.00   7.10 -1.37  -17.7   -5.59 -2.52    2.77  M1    no                  42            68 Complex Cytogenetics Complex Cytogenetics Poor       Poor           Male  0.467  10.7
    6 TCGA-AB-2872 -18.4  -8.18  -2.87  -2.53  -1.49 -6.13    0.561  5.53  0.361 -10.8   M3    no                  42            88 PML-RARA             PML-RARA             Good       Good           Male  0.333   2.1
    
    [[6]]
    # A tibble: 6 x 17
      Sample_ID     PC51   PC52   PC53  PC54   PC55 FAB   prior_malignancy   Age BM_percentage Cytogenetic_Code     Histologic_Subtype   Risk_Cyto    Risk_Molecular Sex      TMB   WBC
      <chr>        <dbl>  <dbl>  <dbl> <dbl>  <dbl> <chr> <chr>            <dbl>         <dbl> <chr>                <chr>                <chr>        <chr>          <chr>  <dbl> <dbl>
    1 TCGA-AB-2856 -6.23  -2.26 -1.65   6.17 -3.79  M4    no                  63            82 Normal Karyotype     NUP98 Translocation  Intermediate Poor           Male   0      76.7
    2 TCGA-AB-2849 -3.56   3.60  0.119  3.79 -0.326 M0    no                  39            83 Complex Cytogenetics Complex Cytogenetics Poor         Poor           Male   0.733   5  
    3 TCGA-AB-2971 -1.68  -1.38  6.30   5.94 -5.98  M4    no                  76            91 Normal Karyotype     Normal Karyotype     Intermediate Intermediate   Female 0.3     5  
    4 TCGA-AB-2930 -1.51   8.69 -3.16   1.68 -5.38  M2    no                  62            72 Normal Karyotype     NUP98 Translocation  Intermediate Poor           Female 0.267  27.7
    5 TCGA-AB-2891  4.70 -10.8  -3.64   2.48 -6.72  M1    no                  42            68 Complex Cytogenetics Complex Cytogenetics Poor         Poor           Male   0.467  10.7
    6 TCGA-AB-2872 -7.25  -2.64 -6.98   1.72 -4.19  M3    no                  42            88 PML-RARA             PML-RARA             Good         Good           Male   0.333   2.1