Subset dataframe based on a specific number of unique values in a column in R

I have a dataframe:

group_eg <- data.frame(
  CID = c(091, 091, 091, 091,
          101, 101, 101,
          102, 102, 102, 
          103, 103,
          104, 104, 104),
  PID_A = c(1091, 1091, 1091, 1091,
            1101, 1101, 1101,
            1102, 1102, 1102,
            1103, 1103,
            1104, 1104, 1104),
  PID_B = c(2091, 2091, 2091, 2091,
            2101, 2101, 2101, 
            2102, 2102, 2102,
            2103, 2103,
            2104, 2104, 2104),
  text = c("eg1", "eg2", "eg3", "eg3", 
           "eg4","eg5", "eg6", 
           "eg7", "eg8","eg9", 
           "eg10", "eg11", 
           "eg12", "eg13", "eg14")
)

I want to divide this dataframe into a list of smaller dataframes. Each of the smaller dataframes should only contain rows that belong to 2 unique CIDs in ascending order. If the number of CIDs is not divisible by 2, the last dataframe can contain the rows that belong to just 1 CID. Notice that there are different number of observations for each unique CID, so I'm a bit stuck.

Here are the example outputs:

output1 <- data.frame(
  CID = c(091, 091, 091, 091,
          101, 101, 101),
  PID_A = c(1091, 1091, 1091, 1091,
            1101, 1101, 1101),
  PID_B = c(2091, 2091, 2091, 2091,
            2101, 2101, 2101),
  text = c("eg1", "eg2", "eg3", "eg3", 
           "eg4","eg5", "eg6")
)

output2 <- data.frame(
  CID = c(102, 102, 102, 
          103, 103),
  PID_A = c(1102, 1102, 1102,
            1103, 1103),
  PID_B = c(2102, 2102, 2102,
            2103, 2103),
  text = c( "eg7", "eg8","eg9", 
           "eg10", "eg11")
)

output3 <- data.frame(
  CID = c(104, 104, 104),
  PID_A = c(1104, 1104, 1104),
  PID_B = c(2104, 2104, 2104),
  text = c("eg12", "eg13", "eg14")
)

Does anyone know how to do this? Thank you!

Solution

Using dplyr::consecutive_id and integer division %/% you could do:

library(dplyr, warn = FALSE)

group_eg |>
  group_by(group = (consecutive_id(CID) + 1) %/% 2) |>
  group_split()
#> <list_of<
#>   tbl_df<
#>     CID  : double
#>     PID_A: double
#>     PID_B: double
#>     text : character
#>     group: double
#>   >
#> >[3]>
#> [[1]]
#> # A tibble: 7 × 5
#>     CID PID_A PID_B text  group
#>   <dbl> <dbl> <dbl> <chr> <dbl>
#> 1    91  1091  2091 eg1       1
#> 2    91  1091  2091 eg2       1
#> 3    91  1091  2091 eg3       1
#> 4    91  1091  2091 eg3       1
#> 5   101  1101  2101 eg4       1
#> 6   101  1101  2101 eg5       1
#> 7   101  1101  2101 eg6       1
#> 
#> [[2]]
#> # A tibble: 5 × 5
#>     CID PID_A PID_B text  group
#>   <dbl> <dbl> <dbl> <chr> <dbl>
#> 1   102  1102  2102 eg7       2
#> 2   102  1102  2102 eg8       2
#> 3   102  1102  2102 eg9       2
#> 4   103  1103  2103 eg10      2
#> 5   103  1103  2103 eg11      2
#> 
#> [[3]]
#> # A tibble: 3 × 5
#>     CID PID_A PID_B text  group
#>   <dbl> <dbl> <dbl> <chr> <dbl>
#> 1   104  1104  2104 eg12      3
#> 2   104  1104  2104 eg13      3
#> 3   104  1104  2104 eg14      3