Search code examples
pythonnlpnltkwordnet

NLTK's Spell checker is not working correctly


I want to check the spellings of a sentence in python using NLTK. The built-in spell checker is not working correctly. It is giving with and 'and' as a wrong spellings.

def tokens(sent):
        return nltk.word_tokenize(sent)

def SpellChecker(line):
        for i in tokens(line):
            strip = i.rstrip()
            if not WN.synsets(strip):
                print("Wrong spellings : " +i)
            else: 
                print("No mistakes :" + i)

def removePunct(str):
        return  "".join(c for c in str if c not in ('!','.',':',','))

l = "Attempting artiness With black & white and clever camera angles, the movie disappointed - became even more ridiculous - as the acting was poor and the plot and lines almost non-existent. "
noPunct = removePunct(l.lower())
if(SpellChecker(noPunct)):
        print(l)
        print(noPunct)

Can someone give me the reason?


Solution

  • It is giving wrong spellings because those are stopwords which are not contained in wordnet (check FAQs)

    So, you can instead use stopwords from NLTK corpus to check for such words.

    #Add these lines:
    import nltk
    from nltk.corpus import wordnet as WN
    from nltk.corpus import stopwords
    stop_words_en = set(stopwords.words('english'))
    
    def tokens(sent):
            return nltk.word_tokenize(sent)
    
    def SpellChecker(line):
        for i in tokens(line):
            strip = i.rstrip()
            if not WN.synsets(strip):
                if strip in stop_words_en:    # <--- Check whether it's in stopword list
                    print("No mistakes :" + i)
                else:
                    print("Wrong spellings : " +i)
            else: 
                print("No mistakes :" + i)
    
    
    def removePunct(str):
            return  "".join(c for c in str if c not in ('!','.',':',','))
    
    l = "Attempting artiness With black & white and clever camera angles, the movie disappointed - became even more ridiculous - as the acting was poor and the plot and lines almost non-existent. "
    
    noPunct = removePunct(l.lower())
    if(SpellChecker(noPunct)):
            print(l)
            print(noPunct)