Search code examples
pythonnumpyword-cloud

adding words to stopwords in wordcloud does not work


I want to add some stopwords to wordcloud but the resulting image shows the exact words I add to the stopwords. I am doing something wrong? How can I add the word to the stopwords

from wordcloud import STOPWORDS as EN_STOPWORDS
from wordcloud import ImageColorGenerator
from stopword_persian import stopword_persian as STOPWORDS
from wordcloud_fa import WordCloudFa


# Add another stopword
STOPWORDS.add('ميساخته')
stopwords = STOPWORDS.union(EN_STOPWORDS)


# Generate a word cloud image

wordcloud = WordCloudFa(
    persian_normalize=True,
    include_numbers=True,
    max_words=300,
    stopwords=stopwords,
    margin=0,
    width=3000,
    height=3000,
    min_font_size=1,
    max_font_size=500,
    random_state=True,
    background_color="black",
    mask=twitter_mask
).generate(text)


Solution

  • You can do something like this.

    import matplotlib.pyplot as plt
    import nltk # Natural Language ToolKit
    nltk.download('stopwords')
    from nltk.corpus import stopwords # to get rid of StopWords 
    
    from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator # to create a Word Cloud
    from PIL import Image # Pillow with WordCloud to image manipulation
    
    text = 'New stop words are bad for this text.'
    
    # Adding the stopwords
    stop_words = stopwords.words('en') 
    new_stopwors = ['new', 'stop', 'words']
    stop_words.extend(new_stopwords)
    stop_words = set(stop_words)
    
    # Getting rid of the stopwords
    clean_text = [word for word in text.split() if word not in stop_words]
    
    # Converting the list to string
    text = ' '.join([str(elem) for elem in clean_text])
    
    # Generating a wordcloud
    wordcloud = WordCloud(background_color = "black").generate(text)
    
    # Display the generated image:
    plt.figure(figsize = (15, 10))
    plt.imshow(wordcloud, interpolation = 'bilinear')
    plt.axis("off")
    plt.show()
    

    Here is a helpful link If you want to explore more about the interpolations.