Search code examples
rdataframesortingmatchreshape

Arrange and match minimum value of vector to corresponding value in the next vector


I have a dataframe which is arranged based on the minimum value for each id (high to low). What I want to achieve is that the minimum value of each id coincides with the position of the corresponding value in the next id and so on. A new vector is created Time which is 1:nrow(df).

I have done this manually and it looks like this: enter image description here

I wondered how to automate this process?

This is an example of my data:

MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667) 
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)

df <- bind_rows(df1, df2, df3, df4, df5)

Thanks for your help!


Solution

  • # Order based on each min value (high to low)
    R_minvalues <- df %>%
      group_by(R_id) %>%                # group by recession id
      slice(which.min(MA_vol)) %>%      # extract min volume values for each recession
      select(R_id, MA_vol)
    
    x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ]     # order by MA-vol min value (high to low)
    R_id_order <- as.numeric(x$R_id)
    
    
    # Reorder dataframe based on R_minvalues (high to low)
    MRC_DF <- df %>%
      arrange(match(R_id, R_id_order)) %>%       # match R_id rows with R_id_order
      transform(t = 1:nrow(df)) %>%     # create t (time) column the length of the df
      select(t, R_id, MA_vol)                    # select columns
    
    R_order_chr <- as.character(R_id_order)     # convert R_id_order to character so can rearrange columns
    
    MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>%     # convert df to wide format 
      select(all_of(R_order_chr))       # rearrange column order
    
    colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
      paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "")     # add "R_" to start of numbers so syntax is correct
    
    matching.strip.fn <- function(.x, .y) {
      .y <- .y[!is.na(.y)]
      pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
      c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
    }
    
    MRC_DF_wide <- MRC_DF_wide %>% 
      accumulate(matching.strip.fn) %>%
      set_names(names(MRC_DF_wide)) %>%
      as.data.frame()
    
    MRC_DF_wide <-
      MRC_DF_wide[rowSums(is.na(MRC_DF_wide)) != ncol(MRC_DF_wide),]
    
    MRC_DF_wide <- MRC_DF_wide %>%
      transform(t = 1:nrow(MRC_DF_wide)) %>%
      select(t, everything())