Search code examples
pythonlistanacondaspyder

Finding a position of a random word in a text file full of random letters


I've been stuck on this for a few hours and can't seem to crack my way through it. I just want to find all the different times a word pops up in the text file attached as well as its location.index.

I thought converting the word and text file to a list might help but alas. Here is what I have now. Here is the paste bin of the text file I am working with https://pastebin.com/MtjkvHaf

import string
import random
import os.path
def findWord(filename:str, word:str):
    if os.path.isfile(filename) == True:
        f = open(filename, 'r')
        fStr = str(f.read())
        location = []
        charList = []
        wordList = []
        i = 0
        j = 0

        #converts word to list
        for l in word:
            wordList.append(l)
    
        #converts text file to list
        for line in fStr:
            for c in line:
                charList.append(c)
        
        
        for i in charList:
            if wordList[0] == charList[i]:
                pos = i
                location.append(pos)
        print(location)
       
                
    else:
    print("File not found")
    
findWord("random_letters_05292022_1902.txt", "potato")

Solution

  • Here's a way to do what your question asks:

    import re
    import os.path
    def findWord(filename:str, word:str):
        if not os.path.isfile(filename):
            print("File not found")
            return
        with open(filename, 'r') as f:
            fStr = str(f.read())
            locs = []
            loc = 0
            while loc != -1:
                loc = fStr.find(word, locs[-1] + len(word) if locs else 0)
                if loc != -1:
                    locs.append(loc)
            print(locs)
    
    findWord('foo.txt', 'foo')
    

    Input file foo.txt:

    barbarfoobarbarbarfoobarbarbarfoobar
    barbarfoobarbarbarfoobarbarbarfoobar
    barbarfoobarbarbarfoobarbarbarfoobar
    barbarfoobarbarbarfoobarbarbarfoobar
    barbarfoobarbarbarfoobarbarbarfoobar
    barbarfoobarbarbarfoobarbarbarfoobar
    

    Output:

    [6, 18, 30, 43, 55, 67, 80, 92, 104, 117, 129, 141, 154, 166, 178, 191, 203, 215]