Search code examples
pythonpositionenumeration

Positions of words within a sentence not working properly


Im having a problem with my code:

def sentence_recreation(grammar_choice, sentence):
    new_sentence=''
    for char in sentence:
        if char not in grammar_choice:
            new_sentence=new_sentence + char
            sentence_list=new_sentence.split()
    compression(sentence_list)

def validation(sentence):
    if sentence=='':
        print('Input invalid. Please enter a sentence: ')
        compress_sentence()
    else:
        grammar_choice = input("Would you like to remove any punctuation or numbers in your sentence?('None', 'Both', 'Punctuation' or 'Numbers'): ")
        grammar_choice.lower()
        both=('''!()-[]{};:'"\,<>./?@#$%^&*_~0123456789''')
        punctuation=('''!()-[]{};:'"\,<>./?@#$%^&*_~''')
        numbers=('0123456789')
        #These if statements decide to remove: nothing, punctuation, numbers or punctuation and numbers
        if grammar_choice=='':
            print('Input invalid. Please try again.')
            validation(sentence)
        if grammar_choice=="none":
            sentence_list=sentence.split()
            compression(sentence_list)
        elif grammar_choice == "punctuation":
            grammar_choice = punctuation
            sentence_recreation(grammar_choice, sentence)
        elif grammar_choice == "numbers":
            grammar_choice = numbers
            sentence_recreation(grammar_choice, sentence)
        elif grammar_choice == "both":
            grammar_choice = both
            sentence_recreation(grammar_choice, sentence)
        else:
            print('Input invalid. Please try again.')
            validation(sentence)

def compression(sentence_list):
    words=[]
    positions=[]
    y={}
    #This enumerate function allows the program to create two lists with the unique words as well as the positions of those words within the sentence
    for i,x in enumerate(sentence_list):
        if x in y:
            positions.append(y[x])
        else:
            y[x]=i
            positions.append(i)
    for i,x in enumerate(sentence_list):
        if sentence_list[i] not in words:
            words.append(sentence_list[i])
    print(words)
    print(positions)
    file=open('positions and words.txt','w')
    file.write(str(words))
    file.write(str(positions))
    file.close
    print('Goodbye')
    import sys
    sys.exit()

def compress_sentence():
    sentence=input('Please enter your desired sentence: ')
    validation(sentence)

compress_sentence()

The code works up to the point when outputting the positions of the words within the sentence where it doesn't seem to work for some reason, such as:

>>> 
Please enter your desired sentence: When you crack the code, you don't just crack the code, you crack all the codes 1.048596
Would you like to remove any punctuation or numbers in your sentence?('None', 'Both', 'Punctuation' or 'Numbers'): none
['When', 'you', 'crack', 'the', 'code,', "don't", 'just', 'all', 'codes', '1.048596']
[0, 1, 2, 3, 4, 1, 6, 7, 2, 3, 4, 1, 2, 13, 3, 15, 16]
Goodbye
>>> 

The program is supposed to output the positions [0,1,2,3,4,1,5,6,2,3,4,1,2,7,3,8,9] however it does not. I'd really appreciate some help with as i'm not sure as to what i have to do to fix it and i have vague idea as to why it's doing it.


Solution

  • Here is the source of your problem:

    positions.append(i)
    

    This is appending the index from the enumerate function, which is appending the original location of each unique word, hence the ever increasing number. What you wanted to do, is append in increments of one for each new term. This can be accomplished by changing that line to the following:

    positions.append(len(y) -1)
    

    Output:

    Please enter your desired sentence:  When you crack the code, you don't just crack the code, you crack all the codes 1.048596
    Would you like to remove any punctuation or numbers in your sentence?('None', 'Both', 'Punctuation' or 'Numbers'):  none
    ['When', 'you', 'crack', 'the', 'code,', "don't", 'just', 'all', 'codes', '1.048596']
    [0, 1, 2, 3, 4, 1, 5, 6, 2, 3, 4, 1, 2, 7, 3, 8, 9]
    Goodbye