for some reason this very basic subset is not working. Can anyone replicate the error that I find?
I simply trying to summarize a variable that meets a condition.
library(tidyverse)
dat |>
summarise(
invoice_usd=sum(invoice_usd,na.rm=TRUE)
,invoice_usd_overdue = sum(invoice_usd[overdue_indicator==0],na.rm = TRUE)
)
I get the following results
Invoice_usd
<dbl>
invoice_usd_overdue
<dbl>
3525924 3525924
Both are equal to eachother?
here is the data
structure(list(invoice_usd = c(768.3, 33116.65, 29801.35, 15548.67,
748.8, 43506.0981401951, 34126.59, 110.88, 32519.93, 3814.10450994275,
123.910066653647, 9056.81686889635, 5280.35381654576, 1497.6,
25694.7, 34762.41, 30708.78, 30266.71, 5642.53296230612, 23046.73,
29233.7, 31234.7372533846, 38239.4645517168, 1191.25, 23819.49,
32331.53, 30703.51, 13821.3265705642, 62096.6980502484, 32225.3,
26647.2826958931, 35083.8675795201, 74097.9979256714, 34446.39,
43377.5, 33340.4, 31485.24, 46739.5144071148, 31732.517590235,
22900.47, 32635.43, 20213.4230960573, 1884.68, 296.25, 1001.52,
31112.93, 27884.8, 31015.68, 27337.04, 33645.83, 955, 218.85,
14113.8, 18407.59, 29783.1196862457, 44048.0748905081, 64022.9197862025,
32710.63, 32199.65, 21611.697431022, 1014229.17, 11063.370500554,
8283.50645227789, 6189.65, 823.67, 61586.7668701702, 70256.7541546641,
89136.0099085984, 8735.09297677894, 17974.67, 3552, 7684.67,
28476.61, 35574.43, 16096.93, 6199.98162780545, 44263.8256750224,
12617.6797342677, 23698.2896894991, 32507.4, 12675.12, 16298.58,
30472, 35333.2, 33603.44, 29568.23, 31927.3, 43898.4205715362,
25534.61, 34515.19, 29304.43, 33724.18, 25941.2, 171.697042400774,
29855.3747983556, 1225.08, 31576.81, 30970.71, 19705.21, 30684.18
), overdue_indicator = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0)), row.names = c(NA, -100L), class = c("tbl_df", "tbl",
"data.frame"))
Well, the problem is updated value of invoice_usd
in summarise
call is used which is a single value. If you calculate invoice_usd_overdue
first it would work as expected.
library(dplyr)
dat |>
summarise(
invoice_usd_overdue = sum(invoice_usd[overdue_indicator == 0],na.rm = TRUE),
invoice_usd =sum(invoice_usd,na.rm=TRUE)
)
# A tibble: 1 × 2
# invoice_usd_overdue invoice_usd
# <dbl> <dbl>
#1 3239980. 3525924.
Or change the name of first value.
dat |>
summarise(
invoice_usd_new =sum(invoice_usd,na.rm=TRUE),
invoice_usd_overdue = sum(invoice_usd[overdue_indicator == 0],na.rm = TRUE)
)