Search code examples
pythonbioinformaticsargparsefile-handlingdna-sequence

Cannot output file: no file created


I'm brand new to python, and am struggling to understand why my program will not print despite my best efforts to understand I/O and file handling.

The below code should take in a fastQ or fasta file (for DNA or protein sequences) and prune the sequences according to user-specified quality, then create a new file with the pruned sequences.

The trouble comes when I attempt to run the program from the command line:

python endtrim --min_q 35 --in_33 fQ.txt --out_33 fQ_out.txt

The program runs without incident (no errors or trace issues), but I don't see the file fQ_out.txt being created. Methinks the problem lies somewhere with argparse, since I don't get a help message when running:

python endtrim --help

Can someone please point me in the right direction?

from __future__ import division, print_function
import argparse
import collections
import sys
import re
from string import punctuation
from fastRead import *
ready2trim = ()

def parse_arguments():
    """Creates a bevvy of possible sort arguments from command line and
    binds them to their respective names"""
    parser = argparse.ArgumentParser("--h", "--help", description=__doc__, \
                                     formatter_class=argparse.\
                                     RawDescriptionHelpFormatter)
    options = parse_arguments()

    #quality argument
    parser.add_argument("--min_qual", action='store', default=30, \
                        dest='min_qual', help="""Lowest quality value 
                        that can appear in the output""")
    #input arguments
    parser.add_argument("--in_33", action='store', default=sys.stdin, \
                        dest='in_33', nargs='?', help="""Input file in fastq format, using Phred+33 coding""")
    parser.add_argument("--in_64", action='store', default=sys.stdin, \
                        dest='in_64', nargs='?', help="""Input file in fastq format, using Phred+64 coding""")
    parser.add_argument("--in_fasta", action='store', default=sys.stdin, \
                        dest='in_fasta', nargs='?', help="""Input fasta format, requires concurrent --in_qual argument""")
    parser.add_argument("--in_qual", action='store', default=sys.stdin, \
                        dest='in_qual', nargs='?', help="""Input quality format, requires concurrent --in_fasta argument""")                    
    #output arguments
    parser.add_argument("--out_33", action='store', default=sys.stdout, \
                        dest='out_33', nargs='?', help="""Output file in fastq format, 
                        using Phred+33 coding""")
    parser.add_argument("--out_64", action='store', default=sys.stdout, \
                        dest='out_64', nargs='?', help="""Output file in fastq format, 
                        using Phred+33 coding""")                   
    parser.add_argument("--out_fasta", action='store', default=sys.stdout, \
                        dest='out_fasta', nargs='?', help="""Output fasta format, 
                        """)
    parser.add_argument("--out_qual", action='store', default=False, \
                        dest='out_qual', nargs='?', help="""Output quality format, 
                        """)    
    args = parser.parse_args()
    return args

def incoming(args):
    """interprets argparse command and assigns appropriate format for
    incoming file"""
    if options.in_fasta and options.in_qual:
        #ready2trim is the input after being read by fastRead.py
        ready2trim = read_fasta_with_quality(open(options.in_fasta), \
        open(options.in_qual))
        return ready2trim
    elif options.in_33:
        ready2trim = read_fastq(open(options.in_33))
        #phredCode_in specifies the Phred coding of the input fastQ
        phredCode_in = 33
        return ready2trim
    elif options.in_64:
        ready2trim = read_fastq(open(options.in_64))
        phredCode_in = 64
        return ready2trim
    else: sys.stderr.write("ERR: insufficient input arguments")

def print_output(seqID, seq, comm, qual):
    """interprets argparse command and creates appropriate format for
    outgoing file"""
    #Printing a fastQ
    if options.out_33 or options.out_64:
        if options.out_33:
            #phredCode_out specifies the Phred coding of the output fastQ
            phredCode_out = 33
            if comm:
                #outputfh is the file handle of new output file
                with open(options.out_33,'a') as outputfh:
                    outputfh.write("@{}\n{}\n{}\n+".format(seqID, seq, comm))
            else:
                with open(options.out_33,'a') as outputfh:
                    outputfh.write("@{}\n{}\n+".format(seqID, seq))             
        else: 
            phredCode_out = 64
            if comm:
                #outputfh is the file handle of new output file
                with open(options.out_33,'a') as outputfh:
                    outputfh.write("@{}\n{}\n{}\n+".format(seqID, seq, comm))
            else:
                with open(options.out_33,'a') as outputfh:
                    outputfh.write("@{}\n{}\n+".format(seqID, seq))
        print(''.join(str(chr(q+phredCode_out)) for q in qual))
    #Print a fasta
    if options.out_fasta:
        outputfh = open(options.out_fasta, "a")
        if(comment == ''):
            output.write('>{}\n{}\n'.format(seqID, seq))
        else: output.write('>{} {}\n{}\n'.format(seqID, comm, seq))
    #Print a qual
    if options.out_qual:
        outputfh = open(options.out_qual, "a")
        if(comment == ''):
            output.write('>{}\n{}\n'.format(seqID, seq))
        else: output.write('>{} {}\n{}\n'.format(seqID, comm, seq))

def main(args):
    """Prints combined fastq sequence from separate fasta and quality
    files according to user-generated arguments """
    for (seqID, seq, comm, qual) in ready2trim:
        for q in qual:
            #i counts satisfactory bases to later print that number of
            i = 0
            if ord(q) - phredCode_in >= min_qual:
                i += 1
            print_output(seqID, seq[0:i], comm, qual[0:i])  
        sys.stderr.write("ERR: sys.stdin is without sequence data")

if __name__ == "__main__" :
    sys.exit(main(sys.argv))

Solution

  • parse_arguments seems to be calling itself recursively, while it is not called at all from anywhere else in the program

    def parse_arguments():
        """Creates a bevvy of possible sort arguments from command line and
        binds them to their respective names"""
        parser = argparse.ArgumentParser("--h", "--help", description=__doc__, \
                                         formatter_class=argparse.\
                                         RawDescriptionHelpFormatter)
        options = parse_arguments()
    

    Perhaps this options line should be in the main function, or global?