I am trying to modify the sentiment of a few specific words in my df to make them more suitable for my context, where they were used with a negative connotation but have been classified as having a positive sentiment. The words are "talent" and "prefer".
Here is my code:
#Loading packages
library(dplyr)
library(ggplot2)
require(readxl)
library(tidytext)
require(writexl)
data example:
dput(sentiment_words[1:20,c(7,8,9)])
data output:
structure(list(word = c("talent", "prefer", "lies", "hard", "worsen",
"addicts", "obnoxious", "unbearable", "sickening", "irritating",
"weird", "inconsiderate", "weird", "overwhelming", "issue", "complaints",
"confined", "love", "confined", "idiots"), sentiment = c("positive",
"positive", "negative", "negative", "negative", "negative", "negative",
"negative", "negative", "negative", "negative", "negative", "negative",
"negative", "negative", "negative", "negative", "positive", "negative",
"negative"), count = c(79L, 3L, 53L, 316L, 2L, 2L, 3L, 2L, 2L,
7L, 24L, 2L, 24L, 2L, 198L, 21L, 4L, 52L, 4L, 19L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
word = c("addicts", "complaints", "confined", "ftw", "hard",
"idiots", "inconsiderate", "irritating", "issue", "lies",
"lost", "love", "obnoxious", "overwhelming", "sickening",
"unbearable", "weird", "worsen"), .rows = structure(list(
6L, 16L, c(17L, 19L), 2L, 4L, 20L, 12L, 10L, 15L, 3L,
1L, 18L, 7L, 14L, 9L, 8L, c(11L, 13L), 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -18L), .drop = TRUE))
###### Sentiment Analysis by Word ######
## Using "TIDYTEXT" sentiment dictionary
sentiment_words <- df |>
tidytext::unnest_tokens(output="word", input="post") |>
dplyr::anti_join(tidytext::stop_words)|>
dplyr::inner_join(tidytext::get_sentiments("bing"))
sentiment_words %>%
count(word, sort = TRUE)
# Check the Most common positive and negative words
sentiment_words <-
sentiment_words %>% group_by(word) %>% mutate(count = n())
bing_word_counts <- sentiment_words %>%
dplyr::inner_join(tidytext::get_sentiments("bing") %>%
count(word, sentiment, sort = TRUE))
get_sentiments("bing")
returns a regular tibble with 2 string columns that you can filter and wrangle as you see fit:
library(tidytext)
library(dplyr)
library(stringr)
get_sentiments("bing")
#> # A tibble: 6,786 × 2
#> word sentiment
#> <chr> <chr>
#> 1 2-faces negative
#> 2 abnormal negative
#> 3 abolish negative
#> 4 abominable negative
#> 5 abominably negative
#> 6 abominate negative
#> 7 abomination negative
#> 8 abort negative
#> 9 aborted negative
#> 10 aborts negative
#> # ℹ 6,776 more rows
# modified sentiments tibble
sentiments_mod <-
get_sentiments("bing") |>
mutate(sentiment = case_when(
word %in% c("talent", "prefer") ~ "negative",
.default = sentiment))
Though there's no magic involved, so "prefers" and "talents" are still classified as positives, which may or may not be what you are after:
filter(sentiments_mod, str_starts(word, "talent|prefer"))
#> # A tibble: 10 × 2
#> word sentiment
#> <chr> <chr>
#> 1 prefer negative
#> 2 preferable positive
#> 3 preferably positive
#> 4 prefered positive
#> 5 preferes positive
#> 6 preferring positive
#> 7 prefers positive
#> 8 talent negative
#> 9 talented positive
#> 10 talents positive
When you have applied all required modification to your sentiment table, use that ( sentiments_mod
) in your workflow:
df <- tibble(post = "talent prefer lies hard worsen addicts obnoxious
unbearable sickening irritating weird inconsiderate weird
overwhelming issue complaints confined love confined idiots")
df |>
unnest_tokens(output="word", input="post") |>
anti_join(stop_words)|>
inner_join(sentiments_mod)
#> Joining with `by = join_by(word)`
#> Joining with `by = join_by(word)`
#> # A tibble: 20 × 2
#> word sentiment
#> <chr> <chr>
#> 1 talent negative
#> 2 prefer negative
#> 3 lies negative
#> 4 hard negative
#> 5 worsen negative
#> 6 addicts negative
#> 7 obnoxious negative
#> 8 unbearable negative
#> 9 sickening negative
#> 10 irritating negative
#> 11 weird negative
#> 12 inconsiderate negative
#> 13 weird negative
#> 14 overwhelming negative
#> 15 issue negative
#> 16 complaints negative
#> 17 confined negative
#> 18 love positive
#> 19 confined negative
#> 20 idiots negative
Created on 2023-11-28 with reprex v2.0.2