Search code examples
pythontkintertkinter-scrolledtext

Tkinter ScrolledText search() function not working beyond the first line


I'm creating a tkinter application that highlights misspelled words in a ScrolledText() widget. To find the index of the misspelled word I use the check() function. However, when I move to a new line in the scrolled text, the index returned by check() is stuck on the index of the last word in the previous line.

import nltk
from nltk.corpus import words
from nltk.corpus import wordnet
import re
import tkinter as tk
from tkinter.scrolledtext import ScrolledText


nltk.download("words")
nltk.download("wordnet")
w= words.words()+list(wordnet.words())
w.append("typing")
word_set= set(w) #set of correctly spelled words from NLTK corpus
root= tk.Tk()
root.geometry("600x600")
text= ScrolledText(root, font = ("Arial", 14))
old_spaces= 0

def check(self):
    global old_spaces
    global old_lines
    global prev_len
    content= text.get("1.0", tk.END)
    space_count= content.count(" ")

    if space_count != old_spaces:  ##updates ScrolledText tags when spacebar is hit
        old_spaces = space_count
        print(content.split())
        for tag in text.tag_names():  ##resets and deletes all previous tags
            text.tag_delete(tag)


        for word in content.split(" "):
            if  re.sub("[^\w]", "", word.lower()) not in word_set: ##checks if word is spelt correctly 

                count_var= tk.StringVar()
                start_pos= text.search(word, '1.0',  count= count_var)  ##the search function
                end_pos = str(start_pos) + "+" + count_var.get() + "c"
                print(end_pos)
                text.tag_add(word, f"{start_pos}", f"{end_pos}")
                text.tag_config(word, foreground= "red")

                
                    

                                  
text.bind("<KeyRelease>",check)
text.pack()


root.mainloop()

[Tkinter output: "First" in the first line and "Second" in the second line*(https://i.sstatic.net/HqwbJ.png)*]Terminal window output. The word "Second", though in the second line, has the same index as "First"


Solution

  • You can get rid of all the globals and the regex. Also just create one tag and reuse it. You are searching on every word from position 1.0 but you should be searching from the last end_pos.

    import nltk
    from nltk.corpus import words
    from nltk.corpus import wordnet
    import re
    import tkinter as tk
    from tkinter.scrolledtext import ScrolledText
    
    
    #nltk.download("words")
    #nltk.download("wordnet")
    w= words.words()+list(wordnet.words())
    w.append("typing")
    word_set= set(w) #set of correctly spelled words from NLTK corpus
    root= tk.Tk()
    root.geometry("600x600")
    text= ScrolledText(root, font = ("Arial", 14))
    
    
    def check(self):
        #start over with misspelled tag
        text.tag_delete('misspelled')
        text.tag_configure('misspelled', foreground= "red")
        
        words = text.get('1.0', tk.END).replace('\n', ' ').split(' ')
        end_pos = '1.0'
        
        for word in words:
            if not (word.lower() in word_set):
                count_var= tk.StringVar()
                #note that we start at the last end position instead of '1.0'
                start_pos= text.search(word, end_pos, tk.END, count= count_var)  ##the search function
                end_pos  = str(start_pos) + "+" + count_var.get() + "c"
                text.tag_add('misspelled', start_pos, end_pos)
            
    for key in ('Return', 'space'):
        text.bind(f"<KeyRelease-{key}>", check)
        
    text.pack()
    text.tag_configure('misspelled', foreground= "red")
    
    
    root.mainloop()
    

    This way is not very efficient, though. On every single check you recheck the entire contents. Another way to go, is to simply check the word that was just typed. One way you can do this is with a mark.

    import nltk
    from nltk.corpus import words
    from nltk.corpus import wordnet
    import re
    import tkinter as tk
    from tkinter.scrolledtext import ScrolledText
    
    
    #nltk.download("words")
    #nltk.download("wordnet")
    w= words.words()+list(wordnet.words())
    w.append("typing")
    word_set= set(w) #set of correctly spelled words from NLTK corpus
    root= tk.Tk()
    root.geometry("600x600")
    text= ScrolledText(root, font = ("Arial", 14))
    
    
    def check(self):
        #get mark
        mark = text.mark_previous('lpos')
    
        #get characters from mark to just before last space or return
        word = text.get("lpos", f'{tk.INSERT}-1c')
    
        #if that word is not in the word list, tag it
        if not (word.lower() in word_set):
            text.tag_add('misspelled', "lpos", f'{tk.INSERT}-1c')
    
        #move mark to caret position
        text.mark_set('lpos', tk.INSERT)
    
    #check on return and space
    for key in ('Return', 'space'):
        text.bind(f"<KeyRelease-{key}>", check)
        
    text.pack()
    
    #init mark at first position with gravity set to left so it wont move as you type
    text.mark_set('lpos', "1.0") 
    text.mark_gravity('lpos', tk.LEFT)
    
    #make 1 tag and reuse it
    text.tag_configure('misspelled', foreground= "red")
    
    root.mainloop()
    

    You will need to play with this more to handle situations like backspace, maybe tab, delete and any other situation where the text is changed in a non-linear way.

    enter image description here