Search code examples
rassignment-problem

Coding an assignment problem with interconnected ('nested') constraints


I am trying to write some code to optimally assign students to projects based on their preferred choices. The aim is to find the best allocation so that the most number of students get their highest ranked project. I have based my attempts so far mainly on the very useful example posted here: https://dirkschumacher.github.io/ompr/articles/problem-course-assignment.html

This works fine as long as I just want to assign students to projects and take into account that each project has a limited capacity.

However, I would like to extend this to also take into account that:

  1. each project has a supervisor
  2. supervisors may offer more than one project
  3. supervisors can only supervise a limited number of students

In the example below, supervisor 1 may offer 2 projects and each project could be assigned to up to 4 students. However, overall the supervisor can not supervise more than 4 students, i.e. the sum of the students across the 2 projects supervised by supervisor 1 can not excede 4.

Below is the R code I have got so far that assigns 15 students to 6 possiblle projects based on their preferences.

In the example, the solution assigns supervisor 2 5 students (1 to project 3 and 4 to project 4) exceeding their capacity of 4 students maximum as their is no constraint that limits the number of students that can be assigned to a supervisor.

Despite lots of searching and trying, I just don't seem to be able to figure out how I can include an additional constraint in the model that limits the total number of students assigned to a supervisor across all projects supervised by the same supervisor.

Any suggestions would be very much appreciated.

Example Code:

library(purrr)
library(dplyr)
library(ompr)
library(ompr.roi)
library(ROI.plugin.glpk)


#Create model data
n <- 15    #Students
m <- 6    #Projects
o <- 4    #Supervisors

proj_capacity <- rep.int(4, m) # all have equal capacities
proj_info <- matrix(c(1:6,1,1,2,2,3,4,proj_capacity,4,4,4,4,4,4),6,4, dimnames = list(c(1,2,3,4,5,6), c("Project","Supervisor","Proj.Cap","Super.Cap")))

df_proj_info <- as.data.frame(proj_info)

set.seed(1234)
preference_data <- lapply(seq_len(n), function(x) sample(seq_len(m), 3))

##Functions

#Function that extracts vector for preferences of a single student
preferences <- function(student) preference_data[[student]]
preferences(1)

# Function that assigns each project a weight according to a student's choices
# if the project is not among the preferences, the weight is 10000
funcWeight <- function(student, project) {
  p <- which(as.numeric(project) == preferences(as.numeric(student)))
  as.integer(if (length(p) == 0) {
    10000
  } else {
    p
  })
}


#Build model
model <- MIPModel() %>%
  
  # 1 iff student i is assigned to project j
  add_variable(x[i, j], i = 1:n, j = 1:m, type = "binary") %>%
  
  # maximize the preferences
  set_objective(sum_over(funcWeight(i, j) * x[i, j], i = 1:n, j = 1:m),"min") %>%
  
  # we cannot exceed the capacity of a project
  add_constraint(sum_over(x[i, j], i = 1:n) <= proj_capacity[j], j = 1:m) %>%
  
  # we cannot exceed the supervisor capacity
  # add_constraint(students assigned to supervisor <= super_capacity[k], k = 1:o) %>%
  
  # each student needs to be assigned to one project
  add_constraint(sum_over(x[i, j], j = 1:m) == 1, i = 1:n)

#Model definition
model

#Solve model
result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))

#Extract Student-Project pairs
df_matching <- result %>% 
  get_solution(x[i,j]) %>%
  filter(value > .9) %>%  
  select(i, j) %>% 
  rowwise() %>% 
  mutate(weight = funcWeight(as.numeric(i), as.numeric(j)), 
         preferences = paste0(preferences(as.numeric(i)), collapse = ",")) %>% ungroup

##Decode pairs
#And merge with Student-Project pairs info
df_proj_allocation<-merge(df_matching,df_proj_info, by.x=c("j"), by.y=c("Project"),all.x=TRUE)
df_proj_allocation<-df_proj_allocation %>%
  rename(
    Student=i,
    Project=j
  )

head(df_proj_allocation)

Solution

  • Following some further reading and trying various things, I have come up with a function that extracts a vector of all projects supervised by one supervisor:

    func_find_supervisor_projects <- function(proj) {
      #Find project supervisor
      super <- df_proj_info %>%
        filter(Project==proj) %>%
        select(Supervisor)
      #Find all projects supervised by supervisor
      p1 <- df_proj_info %>%
        filter(Supervisor %in% super) %>%
        select(Project)`enter code here`
      p1<-unlist(p1)
      p1<-as.vector(p1,'numeric')
    }
    

    This is then be used to filter the model variable x, count all allocations to each supervisor and constraint it to less than the supervisor's capacity. Here is the modified model:

    model <- MIPModel() %>%
      
      # 1 iff student i is assigned to project j
      add_variable(x[i, j], i = 1:n, j = 1:m, type = "binary") %>%
      
      # maximize the preferences
      set_objective(sum_over(funcWeight(i, j) * x[i, j], i = 1:n, j = 1:m),"min") %>%
      
      # we cannot exceed the capacity of a project
      add_constraint(sum_over(x[i, j], i = 1:n) <= df_proj_info$Proj.Cap[j], j = 1:m) %>%
      
      # we cannot exceed the supervisor capacity
      add_constraint(sum_over(x[i, j1], i = 1:n, j1 = func_find_supervisor_projects(j)) <= df_proj_info$Super.Cap[j], j = 1:m) %>%
      
      # each student needs to be assigned to one project
      add_constraint(sum_over(x[i, j], j = 1:m) == 1, i = 1:n)enter code here
    

    There may be more elegant solutions, but it is working for my purposes.

    Here is the complete modified code:

    library(purrr)
    library(dplyr)
    library(ompr)
    library(ompr.roi)
    library(ROI.plugin.glpk)
    
    
    #Create model data
    n <- 15    #Students
    m <- 6    #Projects
    o <- 4    #Supervisors
    
    proj_capacity <- rep.int(4, m) # all have equal capacities
    proj_info <- matrix(c(1:6,1,1,2,2,3,4,proj_capacity,4,4,4,4,4,4),6,4, dimnames = list(c(1,2,3,4,5,6), c("Project","Supervisor","Proj.Cap","Super.Cap")))
    
    df_proj_info <- as.data.frame(proj_info)
    
    set.seed(1234)
    preference_data <- lapply(seq_len(n), function(x) sample(seq_len(m), 3))
    
    ##Functions
    
    #Function that extracts vector for preferences of a single student
    preferences <- function(student) preference_data[[student]]
    preferences(1)
    
    # Function that assigns each project a weight according to a student's choices
    # if the project is not among the preferences, the weight is 10000
    funcWeight <- function(student, project) {
      p <- which(as.numeric(project) == preferences(as.numeric(student)))
      as.integer(if (length(p) == 0) {
        10000
      } else {
        p
      })
    }
    
    #Function to create vector of all projects with same supervisor
    func_find_supervisor_projects <- function(proj) {
      #Find project supervisor
      super <- df_proj_info %>%
        filter(Project==proj) %>%
        select(Supervisor)
      #Find all projects supervised by supervisor
      p1 <- df_proj_info %>%
        filter(Supervisor %in% super) %>%
        select(Project)
      p1<-unlist(p1)
      p1<-as.vector(p1,'numeric')
    }
    
    #Build model
    model <- MIPModel() %>%
      
      # 1 iff student i is assigned to project j
      add_variable(x[i, j], i = 1:n, j = 1:m, type = "binary") %>%
      
      # maximize the preferences
      set_objective(sum_over(funcWeight(i, j) * x[i, j], i = 1:n, j = 1:m),"min") %>%
      
      # we cannot exceed the capacity of a project
      add_constraint(sum_over(x[i, j], i = 1:n) <= df_proj_info$Proj.Cap[j], j = 1:m) %>%
      
      # we cannot exceed the supervisor capacity
      add_constraint(sum_over(x[i, j1], i = 1:n, j1 = func_find_supervisor_projects(j)) <= df_proj_info$Super.Cap[j], j = 1:m) %>%
      
      # each student needs to be assigned to one project
      add_constraint(sum_over(x[i, j], j = 1:m) == 1, i = 1:n)
    
     
    
    #Model definition
    model
    
    
    #Solve model
    result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
    
    #Extract Student-Project pairs
    df_matching <- result %>% 
      get_solution(x[i,j]) %>%
      filter(value > .9) %>%  
      select(i, j) %>% 
      rowwise() %>% 
      mutate(weight = funcWeight(as.numeric(i), as.numeric(j)), 
             preferences = paste0(preferences(as.numeric(i)), collapse = ",")) %>% ungroup
    
    ##Decode pairs
    #And merge with Student-Project pairs info
    df_proj_allocation<-merge(df_matching,df_proj_info, by.x=c("j"), by.y=c("Project"),all.x=TRUE)
    df_proj_allocation<-df_proj_allocation %>%
      rename(
        Student=i,
        Project=j
      )
    
    head(df_proj_allocation)