Search code examples
pythondna-sequence

Iterating through a list where I have to grab data from the first item for use in the last item


This is a follow-up to a previous question I had asked: Processing a sub-list of variable size within a larger list.

I managed to use itertools to get groups of DNA fragments out, but now I'm faced with a different problem.

I need to design primers based on these groups of DNA fragments. Primers are designed by including overlaps from different DNA fragments. Let's say I have three DNA fragments in a list, fragments A, B, and C. I need to extract:

  • the last 20 nucleotides (n.t.) of C to concatenate (in order) with the first 40 n.t. of A,
  • the reverse complement (RC) of the first 20 n.t. of B to concatenate in order with the RC of the last n.t. of A,
  • the last 20 n.t. of A to concatenate with the first 40 n.t. of B,
  • the RC of the first 20 n.t. of C to concatenate with the RC of the last 40 n.t. of B,
  • the last 20 n.t. of C to concatenate with the first 40 n.t. of A,
  • the RC of the first 20 n.t. of A to concatenate with the RC of the last 40 n.t. of C.

I can't seem to solve this problem, and I'm not sure where's the best place for me to start on this...

Code that I've already written so far outputs just "group 1" (on purpose, so I can minimize the amount of visual output I'm dealing with). Here it is:

#import BioPython Tools
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

#import csv tools
import csv
import sys
import os
import itertools

with open('constructs-to-make.csv', 'rU') as constructs:
    construct_list = csv.DictReader(constructs)
    def get_construct_number(row):
        return row["Construct"]
    def get_strategy(row):
        return row["Strategy"]
##    construct_list.next()
##    construct_number = 1
    primer_list = []
##    temp_list = []
##    counter = 2
    groups = []

##    for row in construct_list:
##        print(row)
##
    for key, items in itertools.groupby(construct_list, key=get_construct_number):
        for subitems in items:
            #here, I am trying to get the annealing portion of the Gibson sequence out
            if subitems['Strategy'] == 'Gibson' and subitems['Construct'] == '1':
                print(subitems['Construct'])
                fw_anneal = Seq(subitems['Sequence'][0:40], IUPAC.unambiguous_dna)
                print(fw_anneal)
                re_anneal = Seq(subitems['Sequence'][-40:], IUPAC.unambiguous_dna).reverse_complement()
                print(re_anneal)
                fw_overhang = Seq(subitems['Sequence'][0:20], IUPAC.unambiguous_dna).reverse_complement()
                print(fw_overhang)
                re_overhang = Seq(subitems['Sequence'][-20:], IUPAC.unambiguous_dna)
                print(re_overhang)

Any help would be greatly appreciated!


Solution

  • I ended up using a bunch of conditionals to solve this problem.

    The code is inelegant, and involves a lot of repetition, but for a quick-and-dirty script that I'll use over and over, I think it suffices.

    ##here, i process all the gibson primers to get the final list of primers##
    ##=======================================================================##
        construct_num = 1
        temp = []
        part_num = 1
        temp_row_num = 1
        max_seq_num = 0
    
        for row in gibson_primer_temp_list:
    
            max_seq_num = 0
    
            for x in gibson_primer_temp_list:
                if int(x[1]) > construct_num:
                    pass
                if int(x[1]) == construct_num:
                    max_seq_num += 1
    ##        print('Const. number counter is at ' + str(construct_num) + ' and current maximum known number of sequences is ' + str(max_seq_num))
    
    ##        print(row[1])
    
    ##        if int(row[1]) < construct_num:
    ##            while construct_num < int(row[1]):
    ##        print(max_seq_num)
    ##        for row in gibson_primer_temp_list:
    ##            if int(row[1]) == construct_num:
    ##                max_seq_num += 1
    ##            if int(row[1]) > construct_num:
    ##                break
    
            #print('Construct number is ' + str(row[1]) + ' and seq. number is ' + str(row[4]))
            #print('Const. number counter is ' + str(construct_num) + ' and max. seq. number is ' + str(max_seq_num) + '.')
    
            if int(row[1]) > construct_num:
                part_num = 1
                while construct_num < int(row[1]):
                    #print('Construct number is ' + str(construct_num))
                    construct_num += 1
    ##                temp_row_num += 1 #do not uncomment
                #continue - not to be added back again!
    
            if int(row[1]) == construct_num:
    
                if int(row[4]) == max_seq_num:
    
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    #print('We are going to make primers that join the first and last part in construct ' + str(construct_num))
                    #print('Grabbing overhang portion from part ' + str(part_num) + ', which is sequence ' + str(row[4]) + '. It has the sequence ' + str(row[0]))
                    overhang = row
                    #print('Grabbing the first sequence...')
                    for x in gibson_primer_temp_list:
                        #print(row[1] == x[1] and x[4] == 1)
                        if row[1] == x[1] and x[4] == 1:
                            #print(x[0])
                            anneal = x
                            #print('The first sequence is ' + str(anneal))
                            fw_primer = overhang[0] + anneal [0]
                            #print('The forward primer on the first part is: ' + str(fw_primer))
                            primer_list.append([fw_primer, construct_num, x[2], 'fw primer'])
                            break
    
                    #print('Grabbing the third sequence...')
                    for y in gibson_primer_temp_list:
                        #print(row[1] == y[1] and y[4] == 3)
                        if row[1] == y[1] and y[4] == 3:
                            #print(y[0])
                            overhang = y
                            #print('The third sequence is ' + str(overhang))
                            break
    
                    #print('Grabbing the (n-2)th sequence...')
                    steps_backward = 2
                    target_seq_num = max_seq_num - steps_backward
                    for z in gibson_primer_temp_list:
                        #print(row[1] == z[1] and z[4] == target_seq_num)
                        if row[1] == z[1] and z[4] == target_seq_num:
                            #print(z[0])
                            anneal = z
                            #print('The n-2th sequence is ' + str(anneal))
                            break
    
                    re_primer = overhang[0] + anneal[0]
                    primer_list.append([re_primer, construct_num, z[2], 're primer'])
                    continue
    
                if part_num == int(row[2]) and part_num == 1: #if the part number counter = part number
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    continue #do NOT delete this continue
    
                if part_num < int(row[2]):
                    #print('Current part is: ' + str(part_num) + '. Upping part number.' + '\n')
                    part_num += 1
                    #do NOT add in a "continue" here
    
    
                if part_num == int(row[2]) and row[3] == 'fp_anneal':
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    #print('Current part is: ' + str(part_num))
                    #print('Grabbing tp_overhang from part ' + str(part_num - 1) + '...')
                    x = 1
                    for row in temp:
                        x += 1
                        if x == temp_row_num - 1:
                            prev_tp_overhang = row
                    #print('Sequence of tp_overhang from part ' + str(part_num - 1) + ' is: ' + prev_tp_overhang[0])
                    fw_primer_current = prev_tp_overhang[0] + row[0]
                    #print('Appending to master primer list...')
                    primer_list.append([fw_primer_current, construct_num, part_num, 'fw primer'])
                    #print('Forward primer is: ' + str(fw_primer_current) + '\n')
                    continue
    
                if part_num == int(row[2]) and row[3] == 'tp_anneal':
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    continue
    
    
                if part_num == int(row[2]) and row[3] == 'fp_overhang':
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    #print('Current temp_row_num is ' + str(temp_row_num))
                    #print('Current part is: ' + str(part_num))
                    #print('Grabbing tp_anneal from part ' + str(part_num - 1) + '...')
                    x = 1
                    for row in temp:
                        x += 1
                        if x == temp_row_num - 5:
                            prev_tp_anneal = row
                            #print(row)
                            pass
                    #print('Sequence of tp_anneal from part ' + str(part_num - 1) + ' is: ' + prev_tp_anneal[0])
                    re_primer_prev = row[0] + prev_tp_anneal[0]
                    #print('Appending to master primer list...')
                    primer_list.append([re_primer_prev, construct_num, part_num - 1, 're primer'])
                    #print('Reverse primer for previous part is: ' + str(re_primer_prev) + '\n')
                    part_num += 1
                    continue
    
                if part_num == int(row[2]) and row[3] == 'tp_overhang':
                    #print(row)
                    temp.append(row)
                    temp_row_num += 1
                    continue
    
                continue
    

    Thanks everybody for the help!