This is a follow-up to a previous question I had asked: Processing a sub-list of variable size within a larger list.
I managed to use itertools to get groups of DNA fragments out, but now I'm faced with a different problem.
I need to design primers based on these groups of DNA fragments. Primers are designed by including overlaps from different DNA fragments. Let's say I have three DNA fragments in a list, fragments A, B, and C. I need to extract:
I can't seem to solve this problem, and I'm not sure where's the best place for me to start on this...
Code that I've already written so far outputs just "group 1" (on purpose, so I can minimize the amount of visual output I'm dealing with). Here it is:
#import BioPython Tools
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
#import csv tools
import csv
import sys
import os
import itertools
with open('constructs-to-make.csv', 'rU') as constructs:
construct_list = csv.DictReader(constructs)
def get_construct_number(row):
return row["Construct"]
def get_strategy(row):
return row["Strategy"]
## construct_list.next()
## construct_number = 1
primer_list = []
## temp_list = []
## counter = 2
groups = []
## for row in construct_list:
## print(row)
##
for key, items in itertools.groupby(construct_list, key=get_construct_number):
for subitems in items:
#here, I am trying to get the annealing portion of the Gibson sequence out
if subitems['Strategy'] == 'Gibson' and subitems['Construct'] == '1':
print(subitems['Construct'])
fw_anneal = Seq(subitems['Sequence'][0:40], IUPAC.unambiguous_dna)
print(fw_anneal)
re_anneal = Seq(subitems['Sequence'][-40:], IUPAC.unambiguous_dna).reverse_complement()
print(re_anneal)
fw_overhang = Seq(subitems['Sequence'][0:20], IUPAC.unambiguous_dna).reverse_complement()
print(fw_overhang)
re_overhang = Seq(subitems['Sequence'][-20:], IUPAC.unambiguous_dna)
print(re_overhang)
Any help would be greatly appreciated!
I ended up using a bunch of conditionals to solve this problem.
The code is inelegant, and involves a lot of repetition, but for a quick-and-dirty script that I'll use over and over, I think it suffices.
##here, i process all the gibson primers to get the final list of primers##
##=======================================================================##
construct_num = 1
temp = []
part_num = 1
temp_row_num = 1
max_seq_num = 0
for row in gibson_primer_temp_list:
max_seq_num = 0
for x in gibson_primer_temp_list:
if int(x[1]) > construct_num:
pass
if int(x[1]) == construct_num:
max_seq_num += 1
## print('Const. number counter is at ' + str(construct_num) + ' and current maximum known number of sequences is ' + str(max_seq_num))
## print(row[1])
## if int(row[1]) < construct_num:
## while construct_num < int(row[1]):
## print(max_seq_num)
## for row in gibson_primer_temp_list:
## if int(row[1]) == construct_num:
## max_seq_num += 1
## if int(row[1]) > construct_num:
## break
#print('Construct number is ' + str(row[1]) + ' and seq. number is ' + str(row[4]))
#print('Const. number counter is ' + str(construct_num) + ' and max. seq. number is ' + str(max_seq_num) + '.')
if int(row[1]) > construct_num:
part_num = 1
while construct_num < int(row[1]):
#print('Construct number is ' + str(construct_num))
construct_num += 1
## temp_row_num += 1 #do not uncomment
#continue - not to be added back again!
if int(row[1]) == construct_num:
if int(row[4]) == max_seq_num:
#print(row)
temp.append(row)
temp_row_num += 1
#print('We are going to make primers that join the first and last part in construct ' + str(construct_num))
#print('Grabbing overhang portion from part ' + str(part_num) + ', which is sequence ' + str(row[4]) + '. It has the sequence ' + str(row[0]))
overhang = row
#print('Grabbing the first sequence...')
for x in gibson_primer_temp_list:
#print(row[1] == x[1] and x[4] == 1)
if row[1] == x[1] and x[4] == 1:
#print(x[0])
anneal = x
#print('The first sequence is ' + str(anneal))
fw_primer = overhang[0] + anneal [0]
#print('The forward primer on the first part is: ' + str(fw_primer))
primer_list.append([fw_primer, construct_num, x[2], 'fw primer'])
break
#print('Grabbing the third sequence...')
for y in gibson_primer_temp_list:
#print(row[1] == y[1] and y[4] == 3)
if row[1] == y[1] and y[4] == 3:
#print(y[0])
overhang = y
#print('The third sequence is ' + str(overhang))
break
#print('Grabbing the (n-2)th sequence...')
steps_backward = 2
target_seq_num = max_seq_num - steps_backward
for z in gibson_primer_temp_list:
#print(row[1] == z[1] and z[4] == target_seq_num)
if row[1] == z[1] and z[4] == target_seq_num:
#print(z[0])
anneal = z
#print('The n-2th sequence is ' + str(anneal))
break
re_primer = overhang[0] + anneal[0]
primer_list.append([re_primer, construct_num, z[2], 're primer'])
continue
if part_num == int(row[2]) and part_num == 1: #if the part number counter = part number
#print(row)
temp.append(row)
temp_row_num += 1
continue #do NOT delete this continue
if part_num < int(row[2]):
#print('Current part is: ' + str(part_num) + '. Upping part number.' + '\n')
part_num += 1
#do NOT add in a "continue" here
if part_num == int(row[2]) and row[3] == 'fp_anneal':
#print(row)
temp.append(row)
temp_row_num += 1
#print('Current part is: ' + str(part_num))
#print('Grabbing tp_overhang from part ' + str(part_num - 1) + '...')
x = 1
for row in temp:
x += 1
if x == temp_row_num - 1:
prev_tp_overhang = row
#print('Sequence of tp_overhang from part ' + str(part_num - 1) + ' is: ' + prev_tp_overhang[0])
fw_primer_current = prev_tp_overhang[0] + row[0]
#print('Appending to master primer list...')
primer_list.append([fw_primer_current, construct_num, part_num, 'fw primer'])
#print('Forward primer is: ' + str(fw_primer_current) + '\n')
continue
if part_num == int(row[2]) and row[3] == 'tp_anneal':
#print(row)
temp.append(row)
temp_row_num += 1
continue
if part_num == int(row[2]) and row[3] == 'fp_overhang':
#print(row)
temp.append(row)
temp_row_num += 1
#print('Current temp_row_num is ' + str(temp_row_num))
#print('Current part is: ' + str(part_num))
#print('Grabbing tp_anneal from part ' + str(part_num - 1) + '...')
x = 1
for row in temp:
x += 1
if x == temp_row_num - 5:
prev_tp_anneal = row
#print(row)
pass
#print('Sequence of tp_anneal from part ' + str(part_num - 1) + ' is: ' + prev_tp_anneal[0])
re_primer_prev = row[0] + prev_tp_anneal[0]
#print('Appending to master primer list...')
primer_list.append([re_primer_prev, construct_num, part_num - 1, 're primer'])
#print('Reverse primer for previous part is: ' + str(re_primer_prev) + '\n')
part_num += 1
continue
if part_num == int(row[2]) and row[3] == 'tp_overhang':
#print(row)
temp.append(row)
temp_row_num += 1
continue
continue
Thanks everybody for the help!