Search code examples
pythonpython-3.xcsvneural-networknested-loops

Nested for loop : List - Not getting the desired output


What I want to do is to estimate the score of each peptide i.e. row

My code is as following:

import csv, math

def train_data(fname):
        #load csv training files
        peptide= []
        allele= []
        score = []
        with open (fname) as train:
                reader = csv.DictReader(train, delimiter='\t')
                for row in reader:
                        peptide.append(row['peptide'])
                        allele.append(row['allele'])
                        score.append(row['score'])

        return [peptide, allele, score]

def ff():
        peptide, allele, score = train_data('sample.txt')
        p={'A':(0.074+0.077)/2, 'R':(0.052+0.053)/2, 'N':(0.045+0.044)/2, 'D':(0.054+0.051)/2, 'C':(0.025+0.022)/2, 'Q':(0.034+0.035)/2, 'E':(0.054+0.056)/2, 'G':(0.074+0.074)/2, 'H':(0.026+0.025)/2, 'I':(0.068+0.064)/2, 'L':(0.099+0.096)/2, 'K':(0.058+0.058)/2, 'M':(0.025+0.024)/2, 'F':(0.047+0.048)/2, 'P':(0.039+0.041)/2, 'S':(0.057+0.059)/2, 'T':(0.051+0.053)/2, 'W':(0.013+0.014)/2, 'Y':(0.032+0.033)/2, 'V':(0.073+0.072)/2}
        for i in range(len(peptide)):
#                peptide[i]=list(peptide[i])
                peptide.append(peptide[i])
                for j in range(len(peptide[i])):
                        print(peptide[2][j])
                        #est_score+=p[peptide[i][j]]
                print ('---')
        print(peptide[2][1])

if __name__=='__main__':

        ff()

When I run this code what I get as output is all the peptide values i.e. peptide[i][j] for the print stmt in the loop but what I want is to get only peptide[2][j] values. Also outside the loop it works fine. print(peptide[2][1]) gives the o/p perfectly fine i.e. the value 'A'

my csv file is like this:

peptide score   allele  
AAAGAEAGKATTEEQ 0.190842    DRB1_0101
AAAGAEAGKATTEEQ 0.006301    DRB1_0301
AAAGAEAGKATTEEQ 0.066851    DRB1_0401
AAAGAEAGKATTEEQ 0.006344    DRB1_0405
AAAGAEAGKATTEEQ 0.035130    DRB1_0701
AAAGAEAGKATTEEQ 0.006288    DRB1_0802
AAAGAEAGKATTEEQ 0.176268    DRB1_0901
AAAGAEAGKATTEEQ 0.042555    DRB1_1101
AAAGAEAGKATTEEQ 0.114855    DRB1_1302
AAAGAEAGKATTEEQ 0.006377    DRB1_1501
AAAGAEAGKATTEEQ 0.006296    DRB3_0101
AAAGAEAGKATTEEQ 0.006313    DRB4_0101
AAAGAEAGKATTEEQ 0.070413    DRB5_0101

What I want to do is to estimate the score of each peptide i.e. row not all the rows together using: est_score+=p[peptide[i][j]]


Solution

  • import csv, math
    
    p={'A':(0.074+0.077)/2, 'R':(0.052+0.053)/2, 'N':(0.045+0.044)/2, 'D':(0.054+0.051)/2, 'C':(0.025+0.022)/2, 'Q':(0.034+0.035)/2, 'E':(0.054+0.056)/2, 'G':(0.074+0.074)/2, 'H':(0.026+0.025)/2, 'I':(0.068+0.064)/2, 'L':(0.099+0.096)/2, 'K':(0.058+0.058)/2, 'M':(0.025+0.024)/2, 'F':(0.047+0.048)/2, 'P':(0.039+0.041)/2, 'S':(0.057+0.059)/2, 'T':(0.051+0.053)/2, 'W':(0.013+0.014)/2, 'Y':(0.032+0.033)/2, 'V':(0.073+0.072)/2}
    
    def train_data(fname):
            #load csv training files
            peptide= []
            allele= []
            score = []
            with open (fname) as train:
                    reader = csv.DictReader(train, delimiter='\t')
                    for row in reader:
                            peptide.append(row['peptide'])
                            allele.append(row['allele'])
                            score.append(row['score'])
    
            return [peptide, allele, score]
    
    def ff():
            peptide, allele, score = train_data('peptide.txt')
            for i in range(len(peptide)):
                    est_score = 0
                    for char in peptide[i]:
                        est_score += p[char]
                    print("est_score: " + str(est_score), "\t: read_score: " + str(score[i]) )
                    print ('---')
            print(peptide[2][1])
    
    if __name__=='__main__':
    
            ff()
    

    The est_score is always the same because in the file you have provided, the peptide is identical in each row. This prints:

    est_score: 0.9625000000000001   : read_score: 0.190842
    ---
    est_score: 0.9625000000000001   : read_score: 0.006301
    ---
    est_score: 0.9625000000000001   : read_score: 0.066851
    ---
    est_score: 0.9625000000000001   : read_score: 0.006344
    ---
    est_score: 0.9625000000000001   : read_score: 0.035130
    ---
    est_score: 0.9625000000000001   : read_score: 0.006288
    ---
    est_score: 0.9625000000000001   : read_score: 0.176268
    ---
    est_score: 0.9625000000000001   : read_score: 0.042555
    ---
    est_score: 0.9625000000000001   : read_score: 0.114855
    ---
    est_score: 0.9625000000000001   : read_score: 0.006377
    ---
    est_score: 0.9625000000000001   : read_score: 0.006296
    ---
    est_score: 0.9625000000000001   : read_score: 0.006313
    ---
    est_score: 0.9625000000000001   : read_score: 0.070413
    ---
    A