I have a dataframe
which is arranged based on the minimum value for each id
(high to low). What I want to achieve is that the minimum value of each id
coincides with the position of the corresponding value in the next id
and so on. A new vector is created Time
which is 1:nrow(df)
.
I have done this manually and it looks like this:
I wondered how to automate this process?
This is an example of my data:
MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667)
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)
df <- bind_rows(df1, df2, df3, df4, df5)
Thanks for your help!
# Order based on each min value (high to low)
R_minvalues <- df %>%
group_by(R_id) %>% # group by recession id
slice(which.min(MA_vol)) %>% # extract min volume values for each recession
select(R_id, MA_vol)
x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ] # order by MA-vol min value (high to low)
R_id_order <- as.numeric(x$R_id)
# Reorder dataframe based on R_minvalues (high to low)
MRC_DF <- df %>%
arrange(match(R_id, R_id_order)) %>% # match R_id rows with R_id_order
transform(t = 1:nrow(df)) %>% # create t (time) column the length of the df
select(t, R_id, MA_vol) # select columns
R_order_chr <- as.character(R_id_order) # convert R_id_order to character so can rearrange columns
MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>% # convert df to wide format
select(all_of(R_order_chr)) # rearrange column order
colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "") # add "R_" to start of numbers so syntax is correct
matching.strip.fn <- function(.x, .y) {
.y <- .y[!is.na(.y)]
pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
}
MRC_DF_wide <- MRC_DF_wide %>%
accumulate(matching.strip.fn) %>%
set_names(names(MRC_DF_wide)) %>%
as.data.frame()
MRC_DF_wide <-
MRC_DF_wide[rowSums(is.na(MRC_DF_wide)) != ncol(MRC_DF_wide),]
MRC_DF_wide <- MRC_DF_wide %>%
transform(t = 1:nrow(MRC_DF_wide)) %>%
select(t, everything())