python python-3.x optimization or-tools cp-sat

Is there anyway I can import "jobs_data" from a file (mostly .csv) for Job Shop Scheduling provided by Google or-tools?

Good day all, I am trying to import the data for the list 'jobs_data' from a file (.csv mostly). I have tried common import methods like open.csv pandas etc but nothing has worked out so far. The error I received most of the time was 'TypeError: unsupported operand type(s) for +: ‘int’ and ‘str’'. I also tried converting the data into a list but it loses its expected format:

jobs_data = [  # task = (machine_id, processing_time).
    [(0, 3), (1, 2), (2, 2)],  # Job0
    [(0, 2), (2, 1), (1, 4)],  # Job1
    [(1, 4), (2, 3)]  # Job2
]

I am not used to python programming. So I was hoping someone could help. Thank you.

After the answer given by @Ashok Arora, the code works like charm:

import tkinter as tk
import csv
from ortools.sat.python import cp_model



with open("schedule.csv") as csv_file:
    global jd
    csv_reader = csv.reader(csv_file, delimiter=",")
    jd = [[tuple(row[i : i + 2]) for i in range(0, len(row), 2)] for row in csv_reader]

def MinimalJobshopSat():

    model = cp_model.CpModel()
    jobs_data = jd
    machines_count = 1 + max(task[0] for job in jobs_data for task in job)
    all_machines = range(machines_count)
    horizon = sum(task[1] for job in jobs_data for task in job)
    task_type = collections.namedtuple('task_type', 'start end interval')
    assigned_task_type = collections.namedtuple('assigned_task_type',
                                            'start job index duration')
    all_tasks = {}
    machine_to_intervals = collections.defaultdict(list)
    for job_id, job in enumerate(jobs_data):
        for task_id, task in enumerate(job):
        machine = task[0]
        duration = task[1]
        suffix = '_%i_%i' % (job_id, task_id)
        start_var = model.NewIntVar(0, horizon, 'start' + suffix)
        end_var = model.NewIntVar(0, horizon, 'end' + suffix)
        interval_var = model.NewIntervalVar(start_var, duration, end_var,
                                            'interval' + suffix)
            all_tasks[job_id, task_id] = task_type(start=start_var,
                                               end=end_var,
                                               interval=interval_var)
            machine_to_intervals[machine].append(interval_var)
    for machine in all_machines:
        model.AddNoOverlap(machine_to_intervals[machine])
    for job_id, job in enumerate(jobs_data):
        for task_id in range(len(job) - 1):
            model.Add(all_tasks[job_id, task_id +
                            1].start >= all_tasks[job_id, task_id].end)
    obj_var = model.NewIntVar(0, horizon, 'makespan')
    model.AddMaxEquality(obj_var, [
    all_tasks[job_id, len(job) - 1].end
    for job_id, job in enumerate(jobs_data)])
    model.Minimize(obj_var)
    solver = cp_model.CpSolver()
    status = solver.Solve(model)
    if status == cp_model.OPTIMAL:
        assigned_jobs = collections.defaultdict(list)
        for job_id, job in enumerate(jobs_data):
            for task_id, task in enumerate(job):
                machine = task[0]
                assigned_jobs[machine].append(
                assigned_task_type(start=solver.Value(
                    all_tasks[job_id, task_id].start),
                                   job=job_id,
                                   index=task_id,
                                   duration=task[1]))

    output = ''
    for machine in all_machines:
        assigned_jobs[machine].sort()
        sol_line_tasks = 'Machine ' + str(machine) + ': '
        sol_line = '           '
        
    for assigned_task in assigned_jobs[machine]:
        name = 'job_%i_%i' % (assigned_task.job, assigned_task.index)
        sol_line_tasks += '%-10s' % name
        start = assigned_task.start
        duration = assigned_task.duration
        sol_tmp = '[%i,%i]' % (start, start + duration)
        sol_line += '%-10s' % sol_tmp
    sol_line += '\n
    sol_line_tasks += '\n'
    output += sol_line_tasks
    output += sol_line
    print('Optimal Schedule Length: %i' % solver.ObjectiveValue())
    print(output)
MinimalJobshopSat()

Solution

Assume jobs.csv to be as follows:

0,3,1,2,2,2
0,2,2,1,1,4
1,4,2,3

wherein each row represents a job and each row has len(row)/2 tuples of the form (m, p) where m = machine id and p = processing time. This follows the convention used by Google OR-Tools.

A way of reading CSV into a list of list of tuples of string:

import csv

with open("jobs.csv") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    jobs_data = [[tuple(row[i : i + 2]) for i in range(0, len(row), 2)] for row in csv_reader]
    print(jobs_data)

will print:

[[('0', '3'), ('1', '2'), ('2', '2')], [('0', '2'), ('2', '1'), ('1', '4')], [('1', '4'), ('2', '3')]]

Edit: To make the tuple of int, use the map() function.

with open("jobs.csv") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    jobs_data = [[tuple(map(int, row[i : i + 2])) for i in range(0, len(row), 2) ] for row in csv_reader ]
    print(jobs_data)