Search code examples
nltksentiment-analysisnlpvader

Cannot update VADER lexicon


print(news['title'][5]) Magnitude 7.5 quake hits Peru-Ecuador border region - The Hindu

print(analyser.polarity_scores(news['title'][5])) {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

from nltk.tokenize import word_tokenize, RegexpTokenizer

import pandas as pd

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


analyzer = SentimentIntensityAnalyzer()


sentence = news['title'][5]

tokenized_sentence = nltk.word_tokenize(sentence)
pos_word_list=[]
neu_word_list=[]
neg_word_list=[]

for word in tokenized_sentence:
    if (analyzer.polarity_scores(word)['compound']) >= 0.1:
        pos_word_list.append(word)
    elif (analyzer.polarity_scores(word)['compound']) <= -0.1:
        neg_word_list.append(word)
    else:
        neu_word_list.append(word)                

print('Positive:',pos_word_list)
print('Neutral:',neu_word_list)
print('Negative:',neg_word_list) 
score = analyzer.polarity_scores(sentence)
print('\nScores:', score)

Positive: [] Neutral: ['Magnitude', '7.5', 'quake', 'hits', 'Peru-Ecuador', 'border', 'region', '-', 'The', 'Hindu'] Negative: []

Scores: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

new_words = {
    'Peru-Ecuador': -2.0,
    'quake': -3.4,
}

analyser.lexicon.update(new_words)
print(analyzer.polarity_scores(sentence))

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

from nltk.tokenize import word_tokenize, RegexpTokenizer

import pandas as pd

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


analyzer = SentimentIntensityAnalyzer()


sentence = news['title'][5]

tokenized_sentence = nltk.word_tokenize(sentence)
pos_word_list=[]
neu_word_list=[]
neg_word_list=[]

for word in tokenized_sentence:
    if (analyzer.polarity_scores(word)['compound']) >= 0.1:
        pos_word_list.append(word)
    elif (analyzer.polarity_scores(word)['compound']) <= -0.1:
        neg_word_list.append(word)
    else:
        neu_word_list.append(word)                

print('Positive:',pos_word_list)
print('Neutral:',neu_word_list)
print('Negative:',neg_word_list) 
score = analyzer.polarity_scores(sentence)
print('\nScores:', score)

Positive: [] Neutral: ['Magnitude', '7.5', 'quake', 'hits', 'Peru-Ecuador', 'border', 'region', '-', 'The', 'Hindu'] Negative: []

Scores: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


Solution

  • The code you are using is absolutely fine. While updating the dictionary you used analyserinstead of analyzer (Not sure why you didn't get an error).

    new_words = {
        'Peru-Ecuador': -2.0,
        'quake': -3.4,
    }
    ​
    analyzer.lexicon.update(new_words)
    print(analyzer.polarity_scores(sentence))
    

    Output:

    {'neg': 0.355, 'neu': 0.645, 'pos': 0.0, 'compound': -0.6597}
    

    One more caution (not sure whether you are making this mistake.) You shouldn't import the library again. Because your updated words will be gone. Steps should be:

    1. Import the library and the dictionary
    2. Update the dictionary (You shouldn't import the library again after this step)
    3. Calculate the sentiment scores