I am trying scrape data from Google Reviews (stars, commentary, date, etc.).
I tried to adapt a code that I found available online but am having problems to make it work. Apparently, R is not managing to scroll down google reviews and only returns the first ten reviews (that are the ones that Google displays without scrolling)
Has someone came across the same issue? Thanks!
#install.packages("rvest")
#install.packages("xml2")
#install.packages("RSelenium")
library(rvest)
library(xml2)
library(RSelenium)
rmDr=rsDriver(port = 4444L, browser=c("firefox"))
myclient= rmDr$client
myclient$navigate("https://www.google.com/search?client=firefox-b-d&q=emporio+santa+maria#lrd=0x94ce576a4e45ed99:0xa36a342d3ceb06c3,1,,,")
#click on the snippet to switch focus----------
webEle <- myclient$findElement(using = "css",value = ".review-snippet")
webEle$clickElement()
#simulate scroll down for several times-------------
scroll_down_times=1000
for(i in 1 :scroll_down_times){
webEle$sendKeysToActiveElement(sendKeys = list(key="page_down"))
#the content needs time to load,wait 1 second every 5 scroll downs
if(i%%5==0){
Sys.sleep(3)
}
}
#loop and simulate clicking on all "click on more" elements-------------
webEles <- myclient$findElements(using = "css",value = ".review-more-link")
for(webEle in webEles){
tryCatch(webEle$clickElement(),error=function(e){print(e)}) # trycatch to prevent any error from stopping the loop
}
pagesource= myclient$getPageSource()[[1]]
#this should get you the full review, including translation and original text-------------
reviews=read_html(pagesource) %>%
html_nodes(".review-full-text") %>%
html_text()
#number of stars
stars <- read_html(pagesource) %>%
html_node(".review-dialog-list") %>%
html_nodes("g-review-stars > span") %>%
html_attr("aria-label")
#time posted
post_time <- read_html(pagesource) %>%
html_node(".review-dialog-list") %>%
html_nodes(".dehysf") %>%
html_text()`enter code here`
The codes are all correct, but you didn't target the correct element, use .review-dialog-list
in css instead. That element is where the scroll bar resides.
library(RSelenium)
rmDr <- rsDriver(browser = "firefox")
driver <- rmDr$client
driver$navigate("https://www.google.com/search?client=firefox-b-d&q=emporio+santa+maria#lrd=0x94ce576a4e45ed99:0xa36a342d3ceb06c3,1,,,")
Sys.sleep(3) # wait a couple of seconds to let browser render the review window.
webEle <- driver$findElement(using = "css",value = ".review-dialog-list")
for(i in 1 : 5){
webEle$sendKeysToElement(sendKeys = list(key = "page_down"))
Sys.sleep(1)
}