I would appreciate anyone's help and advice with this question
The 1st dataframe contains drugs given to patients along with the dates administered. I need to run through this dataframe and calculate the daily dose for each drug per id so that the dataframe outputted resembles 2nd dataframe.
1st dataframe
structure(list(id = c(1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002), drug = c("Furosemide",
"Furosemide", "Furosemide", "Furosemide", "Furosemide", "Magnesium Sulfate",
"Methylprednisolone", "Methylprednisolone", "Morphine", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Cefazolin",
"Cefazolin", "Furosemide", "Ganciclovir", "Ganciclovir", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Mycophenolate Mofetil", "Mycophenolate Mofetil", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Ranitidine", "Ranitidine", "Ranitidine",
"Ranitidine", "Furosemide", "Lorazepam", "Magnesium Sulfate",
"Sodium Phosphate", "Acetaminophen", "Dimenhydrinate", "Dimenhydrinate",
"Dimenhydrinate", "Dimenhydrinate", "Fentanyl", "Fentanyl", "Hydralazine",
"Hydralazine", "Hydralazine", "Hydralazine", "Hydralazine", "Hydralazine",
"Nifedipine", "Nifedipine", "Nifedipine", "Nifedipine", "Nifedipine",
"Nifedipine", "Nifedipine", "Nifedipine", "Ondansetron", "Ondansetron",
"Ondansetron", "Ondansetron", "Ondansetron", "Ondansetron", "Ondansetron",
"CMV Immune Globulin", "CMV Immune Globulin", "Cefazolin", "Cefazolin",
"Cefazolin", "Cefazolin", "Cefazolin", "Cefazolin", "Cefazolin",
"Cefazolin", "Cefazolin", "Cefazolin", "Cefazolin", "Cefazolin",
"Furosemide", "Ganciclovir", "Ganciclovir", "Ganciclovir", "Ganciclovir",
"Ganciclovir", "Ganciclovir", "Ganciclovir", "Ganciclovir", "Ganciclovir",
"Ganciclovir", "Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Methylprednisolone", "Methylprednisolone", "Methylprednisolone",
"Mycophenolate Mofetil", "Mycophenolate Mofetil", "Mycophenolate Mofetil",
"Mycophenolate Mofetil", "Mycophenolate Mofetil", "Mycophenolate Mofetil",
"Mycophenolate Mofetil", "Mycophenolate Mofetil", "Mycophenolate Mofetil",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Nystatin Mouthwash", "Nystatin Mouthwash", "Nystatin Mouthwash",
"Ranitidine", "Ranitidine", "Ranitidine", "Ranitidine", "Ranitidine",
"Ranitidine", "Ranitidine", "Ranitidine", "Ranitidine", "Ranitidine",
"Ranitidine", "Ranitidine", "Ranitidine", "Ranitidine", "Tacrolimus",
"Tacrolimus", "Tacrolimus", "Tacrolimus", "Tacrolimus", "Tacrolimus",
"Tacrolimus", "Tacrolimus", "Tacrolimus", "Furosemide", "Lorazepam",
"Midazolam", "Midazolam", "Propofol", "Propofol", "Acetaminophen",
"Midazolam", "Midazolam", "Midazolam", "Ondansetron", "Propofol",
"Propofol", "Propofol", "Calcium Carbonate", "Calcium Carbonate",
"Calcium Carbonate", "Cotrimoxazole", "Cotrimoxazole", "Magnesium Hydroxide",
"Mycophenolate Mofetil", "Mycophenolate Mofetil", "Mycophenolate Mofetil",
"Mycophenolate Mofetil", "Omeprazole", "Omeprazole", "Omeprazole",
"Omeprazole", "Oxybutynin", "Oxybutynin", "Oxybutynin", "Oxybutynin",
"Prednisone", "Prednisone", "Prednisone", "Tacrolimus", "Tacrolimus",
"Tacrolimus"), date = structure(c(1145404800, 1145404800, 1145404800,
1145491200, 1145491200, 1145318400, 1145318400, 1145404800, 1145318400,
1145404800, 1145404800, 1145491200, 1145491200, 1145404800, 1145491200,
1145491200, 1145318400, 1145404800, 1145404800, 1145404800, 1145404800,
1145491200, 1145491200, 1145491200, 1145491200, 1145404800, 1145491200,
1145318400, 1145404800, 1145404800, 1145404800, 1145404800, 1145491200,
1145491200, 1145491200, 1145404800, 1145404800, 1145491200, 1145491200,
1212105600, 1212019200, 1211932800, 1212364800, 1211932800, 1211932800,
1212019200, 1212105600, 1212278400, 1211932800, 1211932800, 1212105600,
1212192000, 1212192000, 1212192000, 1212192000, 1212278400, 1212192000,
1212278400, 1212278400, 1212278400, 1212278400, 1212278400, 1212364800,
1212364800, 1211932800, 1212019200, 1212105600, 1212105600, 1212192000,
1212278400, 1212364800, 1212019200, 1212105600, 1211932800, 1212019200,
1212105600, 1212105600, 1212192000, 1212192000, 1212192000, 1212278400,
1212278400, 1212278400, 1212364800, 1212364800, 1212105600, 1212019200,
1212105600, 1212105600, 1212105600, 1212105600, 1212192000, 1212278400,
1212278400, 1212364800, 1212364800, 1211932800, 1212019200, 1212019200,
1212019200, 1212105600, 1212105600, 1212105600, 1212105600, 1212192000,
1212192000, 1212192000, 1212192000, 1212278400, 1212278400, 1212278400,
1212364800, 1212364800, 1212364800, 1212105600, 1212105600, 1212105600,
1212192000, 1212192000, 1212192000, 1212278400, 1212278400, 1212364800,
1212019200, 1212105600, 1212105600, 1212105600, 1212105600, 1212192000,
1212192000, 1212192000, 1212192000, 1212278400, 1212278400, 1212278400,
1212278400, 1212364800, 1212364800, 1211932800, 1212019200, 1212019200,
1212105600, 1212105600, 1212192000, 1212192000, 1212192000, 1212278400,
1212278400, 1212278400, 1212364800, 1212364800, 1212364800, 1212105600,
1212105600, 1212105600, 1212105600, 1212192000, 1212192000, 1212278400,
1212278400, 1212364800, 1212192000, 1223942400, 1224028800, 1224028800,
1224028800, 1224028800, 1224115200, 1224028800, 1224028800, 1224028800,
1224115200, 1224028800, 1224028800, 1224028800, 1224028800, 1224028800,
1224115200, 1224028800, 1224115200, 1224028800, 1224028800, 1224028800,
1224028800, 1224115200, 1224028800, 1224028800, 1224028800, 1224115200,
1224028800, 1224028800, 1224028800, 1224115200, 1224028800, 1224028800,
1224115200, 1224028800, 1224028800, 1224115200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), dose = c(40, 10, 12, 20, 20, 6, 46,
46, 60, 500, 500, 500, 500, 1000, 1000, 20, 60, 60, 23, 23, 23,
23, 23, 23, 23, 500, 500, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05,
1e+05, 1e+05, 1e+05, 50, 50, 50, 50, 5, 2, 10, 10, 500, 20, 20,
20, 20, 50, 50, 5, 10, 10, 10, 10, 10, 5, 5, 5, 5, 5, 5, 5, 5,
4, 4, 4, 4, 4, 4, 4, 7500, 7500, 1000, 1000, 1000, 1000, 1000,
1000, 1000, 1000, 1000, 1000, 1000, 1000, 10, 56, 56, 112, 112,
112, 112, 112, 112, 112, 112, 45, 45, 23, 23, 23, 23, 23, 23,
20, 23, 20, 23, 20, 20, 20, 20, 20, 20, 400, 400, 400, 400, 400,
400, 400, 400, 400, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05,
1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05,
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 1, 1,
4, 4, 1, 1.3, 1.3, 1.3, 1.3, 5, 4, 4, 4, 25, 25, 650, 4, 4, 4,
4, 25, 25, 25, 200, 200, 200, 80, 80, 130, 750, 750, 750, 750,
20, 20, 20, 20, 5, 5, 5, 5, 12.5, 12.5, 12.5, 4, 4, 4), units = c("mg",
"mg", "mg", "mg", "mg", "mmol", "mg", "mg", "mcg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", NA, "mg",
"mg", "mg", "mg", "mg", "mg", NA, "U", "U", "U", "U", "U", "U",
"U", "mg", "mg", "mg", "mg", "mg", "mg", "mmol", "mmol", "mg",
"mg", "mg", "mg", "mg", "mcg", "mcg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", NA, "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", NA, "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", NA, "mg",
"mg", "mg", "mg", "U", "U", "U", "U", "U", "U", "U", "U", "U",
"U", "U", "U", "U", "U", "U", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "ng", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
NA, "mg", "mg", NA, "mg", "mg", "mg", "mg", "mg", "mg", NA, "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg")), row.names = c(NA,
-199L), class = c("tbl_df", "tbl", "data.frame"))
2nd dataframe
structure(list(id = c(1010001, 1010001, 1010001, 1010001, 1010001,
1010001, 1010001, 1010001, 1010001, 1010001, 1010001, 1010001,
1010001), drug = c("Furosemide", "Furosemide", "Magnesium Sulfate",
"Methylprednisolone", "Methylprednisolone", "Morphine", "Acetaminophen",
"Acetaminophen", "Cefazolin", "Cefazolin", "Ganciclovir", "Ganciclovir",
"Methylprednisolone"), date = structure(c(1145404800, 1145491200,
1145318400, 1145318400, 1145404800, 1145318400, 1145404800, 1145491200,
1145404800, 1145491200, 1145318400, 1145404800, 1145404800), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), dailydose = c(62, 60, 6, 46, 46, 60,
1000, 1000, 1000, 1000, 60, 60, 69), units = c("mg", "mg", "mmol",
"mg", "mg", "mcg", "mg", "mg", "mg", "mg", "mg", "mg", "mg")), row.names = c(NA,
-13L), class = c("tbl_df", "tbl", "data.frame"))
If any more information is required please feel free to leave a comment and I will get back to you.
library(dplyr)
df1 %>%
group_by(id, drug, date) %>%
summarise(dailydose = sum(dose, na.rm = T),
units = first(units))