Search code examples
pythonregexlistfunctionregex-group

Correcting python code for using functions


So huge disclaimer first: I am very new to python and programing in general, and this is my first time using functions. I would be very glad for any help, but the end goal here is not to have the prettiest or the most efficient code. I just want it to somehow work.😅

I have written the program below but I can't get it to work properly.

Because this is a graded exercise for Uni I have certain restrictions that I am supposed to follow:

This is my code:


import re
import sys
# get the filename
filename = sys.argv[1]

# open a file for reading
infile = open(filename, 'r')


########################################################################################
# function to return just the consonants in a given namecdef get_consonants(y):
def get_consonants(y):
    only_consonants_sep = re.findall('[^\W_aeiou]', y)
    only_consonants = ''.join(only_consonants_sep)
    return only_consonants


########################################################################################
# function to return first and last name according to all rules
def parse_name(input_y):
        
    # Parse the names to first (meaning first+middle, if it exists) and last names
    for element in input_y:
    
        # define last name for later use
        last_namegroup = re.search('(\w*)\t', element)
        last_name = last_namegroup.group(1)
        
        # define what the middle name is for later use
        middle_namegroup = re.search('\s(\w)', element)
        middle_name = middle_namegroup.group(1)
        
        # define what first name is for later use
        first_namegroup = re.search('^(\S*)', element)
        first_name1 = first_namegroup.group(1) 
            
        
                # if length of consonant_first_middle is still larger than 8
                # take the consonants of only the first name
                # and add the first letter of the middle name defined above 
                if len(consonant_first_middle) > 8:
                    consonant_first_name = get_consonants(first_name1)
                    first_name = (f'{consonant_first_name}{middle_name}')
        
        # if there is no middle name, i.e. only ones white space in the line
        # then just take the word until the space and store it as first_name
        else:
            first_namegroup = re.search('^(\S*)', element)
            first_name = first_namegroup.group(1)   
        
                    
    return(first_name, last_name)
    

########################################################################################
# creating the email addresses
def create_email_address(first_name, last_name):
    
    for lines in infile:
        if re.search('\tstu', lines):
            domain = (f'{first_name}.{last_name}@uzh.ch')
        else:
            domain_part = re.search('\t(.*)$',lines)
            domain = (f'{first_name}.{last_name}@{domain_part.group(1)}.uzh.ch')
    return domain

########################################################################################
# function as the "top level" of the program that calls all other functions as needed.

def main():
  
    ####### Receive a file name from the command line
    # get the filename
    filename = sys.argv[1]
    # open a file for reading
    infile = open(filename, 'r')
    
    ####### Normalise the input
    input_normalized = normalize(infile)
    
    ####### Parse the names as needed
    input_first, input_last = parse_name(input_normalized)
    
    ####### Create email addresses
    email_output = create_email_address(input_first, input_last)
    
    ####### Print the result
    for line in infile:
        print(f'{line} --> {email_output}')

main_function = main()
print(main_function)


As you can see, I think I did somethinf wrong with how the file is iterating through my code, but I simply can't figure out what I am doing wrong.

I appreciate everything you could point out to me!





Solution

  • In my first answer, I tried not to change too much code, but I think it will be easier to re-think the logic, and rewrite the code. Here's what I came up with, I think this does what you need?

    import sys, re
    
    
    def normalize(name):
        name = name.lower()
        name = re.sub('ä', 'ae', name)
        name = re.sub('ü', 'ue', name)
        name = re.sub('ö', 'oe', name)
        name = re.sub('é|ë', 'e', name)
        return name
    
    
    def remove_vowels(name):
        return re.sub('a|e|i|o|u', '', name)
        
    
    def main(filename):
        # a list to store our output
        people = []
    
        # let's loop through each line, looking at
        # just one person at a time
        for line in open(filename, "r"):
            # remove trailing \n from each line
            line = re.sub("\n", "", line)
    
            # separate the name and domain
            name, domain = re.split("\t", line)
    
            # remove the accents and capitals
            normalized_name = normalize(name)
    
            # separate the name
            # we use a "*" so that the first and last names
            # are selected correctly, and then any remaining
            # characters in between are put into the middle name.
            first_name, *middle_name, last_name  = re.split(" ", normalized_name)
            middle_name = "".join(middle_name)
    
            full_first_name = first_name + middle_name
    
            # this is where we check for how
            # long the first and middle names are
            if len(full_first_name) > 8:
                full_first_name = remove_vowels(full_first_name)
    
                # now we check if this is still too long
                if len(full_first_name) > 8:
                    full_first_name = remove_vowels(first_name)
    
                    # check if they actually have a middle name
                    if middle_name:
                        full_first_name += middle_name[0]
            
            # now let's grab the domain
            subdomain = ""
            if domain != "stu":
                subdomain = domain + "."
            
            host = subdomain + "uzh.ch"
    
            
            # now let's format this into an address
            address = f"{full_first_name}.{last_name}@{host}"
    
            people.append((name, address))
    
        return people
    
    
    
    filename = sys.argv[1]
    people = main(filename)
    
    for name, address in people:
        print(name, "-->", address)
    

    Output:

    Raphael Fritz Bernasconi --> rphlfrtz.bernasconi@uzh.ch
    Frédéric Piero --> frederic.piero@cl.uzh.ch
    Sören Thaddäus Favre --> srnthdds.favre@uzh.ch
    Regula Aegerter --> regula.aegerter@uzh.ch
    Noël Äbiger --> noel.aebiger@cl.uzh.ch
    Inés Desirée Muff --> nsdsr.muff@rom.uzh.ch
    Sébastien Merian --> sbstn.merian@uzh.ch
    Liam Cereghetti --> liam.cereghetti@uzh.ch
    Björn Michael Crivelli --> bjrnmchl.crivelli@ds.uzh.ch
    Joëlle Fürrer --> joelle.fuerrer@uzh.ch
    

    Hopefully the comments in the code do a good job of explaining what's going on.