I have this code that should iterate through each object in "days." However, when I run the loop it only returns the dates found on the last day.
days = seq(as.Date("2004-09-21"),as.Date("2004-09-25"),by = 1)
for (i in days){
link = paste0("https://alrai.com/search?date-from=", days[i])
readlink <- read_html(link)
link_maxpagenumbers_full <- readlink %>%
html_elements(".roboto-b") %>%
html_text2()
link_maxpagenumbers_cut <- str_extract_all(link_maxpagenumbers_full,'\\d{1,3}')
readlink <- read_html(link)
article_links <- readlink %>%
html_elements(".font-700") %>%
html_attr("href")
link_title <- readlink %>%
html_elements(".font-700") %>%
html_text2()
link_date <- readlink %>%
html_elements(".size-13")%>%
html_text2()
link_pagesummary <- readlink %>%
html_elements(".size-14") %>%
html_text2()
}
alrai <- data.frame(article_links, link_title, link_date, link_pagesummary)
I'm genuinely not sure why - I imagine this is suppose to return a link for each day, and article_links for each day.
Your loop is going through all of the days, but then you are overwriting your objects with the last iteration.
Instead you need to add each iteration to an output object (I call it Out
here), then put your data.frame
code inside the loop and use rbind()
to append the output:
Out<-NULL # initiate empty object
for (i in days){
link = paste0("https://alrai.com/search?date-from=", days[i])
readlink <- read_html(link)
link_maxpagenumbers_full <- readlink %>%
html_elements(".roboto-b") %>%
html_text2()
link_maxpagenumbers_cut <- str_extract_all(link_maxpagenumbers_full,'\\d{1,3}')
readlink <- read_html(link)
article_links <- readlink %>%
html_elements(".font-700") %>%
html_attr("href")
link_title <- readlink %>%
html_elements(".font-700") %>%
html_text2()
link_date <- readlink %>%
html_elements(".size-13")%>%
html_text2()
link_pagesummary <- readlink %>%
html_elements(".size-14") %>%
html_text2()
alrai <- data.frame(article_links, link_title, link_date, link_pagesummary) # create a data frame
Out<-rbind(Out,alrai) # add this iterations data to the output
}