Search code examples
rdcast

Reshape long to wide with dcast


I need to reshape a dataframe csf_antibio (sample below) from long to wide format, separating all values per patient by row.

study_id,proanbx_dt,proanbx_tm,name,othername,route,dosage,units,doses,freq
CHLA_0001,2021-07-22,20:01:00,ceftriaxone,,IV,1250,mg,4,13
CHLA_0001,2021-07-22,20:19:00,metronidazole,,IV,250,mg,5,9
CHLA_0001,2021-07-22,23:17:00,vancomycin,,IV,350,mg,3,6
CHLA_0001,2021-08-09,19:34:00,cefazolin,,IV,738,mg,1,8
CHLA_0002,2020-12-18,0:30:00,cefepime,,IV,75,mg,5,8
CHLA_0002,2020-12-18,1:03:00,vancomycin,,IV,23,mg,4,13
CHLA_0002,2020-12-19,18:15:00,cefepime,,IV,60,mg,6,8
CHLA_0002,2020-12-20,4:18:00,vancomycin,,IV,24,mg,4,12
CHLA_0003,2021-04-20,15:17:00,meropenem,,IV,200,mg,2,1
CHLA_0003,2021-04-21,2:20:00,meropenem,,IV,400,mg,17,8
CHLA_0003,2021-04-22,14:16:00,Other,sulfamethoxazole-trimethoprim,IV,50,mg,9,12

I tried the following without success:

csfmelt <- melt(csf_antibio, id.vars=1:1)
csf <- dcast(csfmelt, study_id ~ variable, value.var = "value", fun.aggregate = sum)

I want the final dataframe to have each study id per row with variables

study_id,proanbx_dt1,proanbx_tm1,name1,othername1,route1,dosage1,units1,doses1,freq1,proanbx_dt2,proanbx_tm2,name2,othername2,route2,dosage2,units2,doses2,freq2,proanbx_dt3,proanbx_tm3,name3,othername3,route3,dosage3,units3,doses3,freq3,proanbx_dt4,proanbx_tm4,name4,othername4,route4,dosage4,units4,doses4,freq4
CHLA_0001,2021-07-22,20:01:00,ceftriaxone,,IV,1250,mg,4,13, 2021-07-22,20:19:00,metronidazole,,IV,250,mg,5,9, 2021-07-22,23:17:00,vancomycin,,IV,350,mg,3,6,2021-08-09,19:34:00,cefazolin,,IV,738,mg,1,8
CHLA_0002,2020-12-18,0:30:00,cefepime,,IV,75,mg,5,8,2020-12-18,1:03:00,vancomycin,,IV,23,mg,4,13,2020-12-19,18:15:00,cefepime,,IV,60,mg,6,8,2020-12-20,4:18:00,vancomycin,,IV,24,mg,4,12,2021-04-20,15:17:00,meropenem,,IV,200,mg,2,1,2021-04-21,2:20:00,meropenem,,IV,400,mg,17,8,2021-04-22,14:16:00,Other,sulfamethoxazole-trimethoprim,IV,50,mg,9,12

Thanks in advance!


Solution

  • Your desired output has a "number" component that is not naturally inferred by dcast. We can add it relatively easily with ave (base R, certainly this can be done just as easily in data.table or dplyr groupings).

    reshape2 and base R

    csfmelt$num <- ave(seq(nrow(csfmelt)), csfmelt[c("study_id","variable")], FUN = seq_along)
    head(csfmelt)
    #    study_id   variable      value num
    # 1 CHLA_0001 proanbx_dt 2021-07-22   1
    # 2 CHLA_0001 proanbx_dt 2021-07-22   2
    # 3 CHLA_0001 proanbx_dt 2021-07-22   3
    # 4 CHLA_0001 proanbx_dt 2021-08-09   4
    # 5 CHLA_0002 proanbx_dt 2020-12-18   1
    # 6 CHLA_0002 proanbx_dt 2020-12-18   2
    csfwide <- reshape2::dcast(csfmelt, study_id ~ variable + num, value.var = "value")
    csfwide
    #    study_id proanbx_dt_1 proanbx_dt_2 proanbx_dt_3 proanbx_dt_4 proanbx_tm_1 proanbx_tm_2 proanbx_tm_3 proanbx_tm_4      name_1        name_2     name_3     name_4 othername_1 othername_2                   othername_3 othername_4 route_1 route_2 route_3 route_4 dosage_1 dosage_2 dosage_3 dosage_4 units_1 units_2 units_3 units_4 doses_1 doses_2 doses_3 doses_4 freq_1 freq_2 freq_3 freq_4
    # 1 CHLA_0001   2021-07-22   2021-07-22   2021-07-22   2021-08-09     20:01:00     20:19:00     23:17:00     19:34:00 ceftriaxone metronidazole vancomycin  cefazolin                                                                        IV      IV      IV      IV     1250      250      350      738      mg      mg      mg      mg       4       5       3       1     13      9      6      8
    # 2 CHLA_0002   2020-12-18   2020-12-18   2020-12-19   2020-12-20      0:30:00      1:03:00     18:15:00      4:18:00    cefepime    vancomycin   cefepime vancomycin                                                                        IV      IV      IV      IV       75       23       60       24      mg      mg      mg      mg       5       4       6       4      8     13      8     12
    # 3 CHLA_0003   2021-04-20   2021-04-21   2021-04-22         <NA>     15:17:00      2:20:00     14:16:00         <NA>   meropenem     meropenem      Other       <NA>                         sulfamethoxazole-trimethoprim        <NA>      IV      IV      IV    <NA>      200      400       50     <NA>      mg      mg      mg    <NA>       2      17       9    <NA>      1      8     12   <NA>
    

    The column order is not what you requested, but it can be conformed a bit with this:

    variables <- as.character(unique(csfmelt$variable))
    sub(".*_", "", names(csfwide)[-(1:2)])
    #  [1] "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4"
    sub("_[^_]*$", "", names(csfwide)[-(1:2)])
    #  [1] "proanbx_dt" "proanbx_dt" "proanbx_dt" "proanbx_tm" "proanbx_tm" "proanbx_tm" "proanbx_tm" "name"       "name"       "name"       "name"       "othername"  "othername"  "othername"  "othername"  "route"      "route"      "route"      "route"      "dosage"    
    # [21] "dosage"     "dosage"     "dosage"     "units"      "units"      "units"      "units"      "doses"      "doses"      "doses"      "doses"      "freq"       "freq"       "freq"       "freq"      
    
    nms <- names(csfwide)[-(1:2)]
    newnms <- nms[order(sub(".*_", "", nms), match(nms, variables))]
    csfwide2 <- subset(csfwide, select = c(names(csfwide)[1:2], newnms))
    csfwide2
    #    study_id proanbx_dt_1 proanbx_tm_1      name_1 othername_1 route_1 dosage_1 units_1 doses_1 freq_1 proanbx_dt_2 proanbx_tm_2        name_2 othername_2 route_2 dosage_2 units_2 doses_2 freq_2 proanbx_dt_3 proanbx_tm_3     name_3                   othername_3 route_3 dosage_3 units_3 doses_3 freq_3 proanbx_dt_4 proanbx_tm_4     name_4 othername_4 route_4 dosage_4 units_4 doses_4 freq_4
    # 1 CHLA_0001   2021-07-22     20:01:00 ceftriaxone                  IV     1250      mg       4     13   2021-07-22     20:19:00 metronidazole                  IV      250      mg       5      9   2021-07-22     23:17:00 vancomycin                                    IV      350      mg       3      6   2021-08-09     19:34:00  cefazolin                  IV      738      mg       1      8
    # 2 CHLA_0002   2020-12-18      0:30:00    cefepime                  IV       75      mg       5      8   2020-12-18      1:03:00    vancomycin                  IV       23      mg       4     13   2020-12-19     18:15:00   cefepime                                    IV       60      mg       6      8   2020-12-20      4:18:00 vancomycin                  IV       24      mg       4     12
    # 3 CHLA_0003   2021-04-20     15:17:00   meropenem                  IV      200      mg       2      1   2021-04-21      2:20:00     meropenem                  IV      400      mg      17      8   2021-04-22     14:16:00      Other sulfamethoxazole-trimethoprim      IV       50      mg       9     12         <NA>         <NA>       <NA>        <NA>    <NA>     <NA>    <NA>    <NA>   <NA>