usage of bind tf_df in R

    library(janeaustenr)
    library(tidytext)
    library(tidyverse)
    library(tm)
    library(corpus)

   text <- removeNumbers(sensesensibility)

text <- data.frame(text)

tidy_text <- text %>% unnest_tokens(bigram,text,token='ngrams',n=2)


tidy_text %>%count(bigram,sort =TRUE)
             
             
tidy_text <-tidy_text %>% separate(bigram,c('word1','word2'),sep =' ')

tidy_text_filtered <- tidy_text %>% 
                      filter(!word1 %in% stop_words$word)%>%
                      filter(!word2 %in% stop_words$word)
                   


trigram_count <- tidy_text_filtered%>% count(word1,word2, sort= TRUE)
                

united <- trigram_count%>%unite(bigram,word1,word2,sep=' ')%>%
          filter(n >1)

united <- united %>% bind_tf_idf(bigram,n)

However i am getting this error : "Error in tapply(n, documents, sum) : arguments must have same length"

What could be wrong in my usage of bind_tf_df

Solution

The bind_tf_idf includes three argument 'term', 'document' and 'n'. We can create the 'document' column

out <- united %>%
             mutate(book = 'sensesensibility') %>%
             bind_tf_idf(bigram, document = book, n)

if we need to create 'chapters', check for the word 'chapter' in the original 'text' data.frame

library(stringr)
text <- text %>% 
     mutate(chapter = cumsum(str_detect(text, 
              regex("chapter",ignore_case = TRUE))))
tidy_text <- text %>% 
              unnest_tokens(bigram,text,token='ngrams',n=2)
...

trigram_count <- tidy_text_filtered%>%
                  count(chapter, word1,word2, sort= TRUE)
united <- trigram_count%>%
                 unite(bigram,word1,word2,sep=' ')%>%
                 filter(n >1)

out <- united %>%                 
             bind_tf_idf(bigram, document = chapter, n) 
head(out)
#  chapter          bigram  n        tf       idf    tf_idf
#1      21        sir john 12 0.2068966 0.9162907 0.1895774
#2      21    miss steeles 11 0.1896552 2.1202635 0.4021189
#3       9        sir john  9 0.6000000 0.9162907 0.5497744
#4      13        sir john  9 0.3750000 0.9162907 0.3436090
#5      23  lady middleton  9 0.4090909 1.0788097 0.4413312
#6      40 colonel brandon  9 0.4736842 0.6931472 0.3283329