Search code examples
rweb-scrapingrvest

Webscraping with rvest : https://www.sports-reference.com


I am trying to get the tables from this link: https://www.sports-reference.com/cfb/boxscores/2022-09-02-charlotte.html but it's not working for me.

I can scrape game detail, scores, and scoring summary but I cannot get the team_stats.

Any help is greatly appreciated.

Thanks

library(tidyverse)
library(rvest)
url <- "https://www.sports-reference.com/cfb/boxscores/2022-09-02-charlotte.html"

webpage <- read_html(url)

(team_names <- webpage %>% 
    html_nodes('div.scorebox strong a') %>%
    html_text())

(scores <- webpage %>% html_nodes('div.score') %>% html_text())
(team_names <- setNames(team_names, c('away', 'home')))



(game_detail <- webpage %>% 
    html_nodes('#wrap') %>% 
    html_nodes('#content') %>% 
    html_node('h1') %>% 
    html_text()
)


(scores <- webpage %>% 
    html_nodes('#wrap') %>% 
    html_nodes('#content') %>% 
    html_nodes('.scorebox') %>% 
    html_nodes('div') %>% 
    html_nodes('.score') %>% 
    html_text2())

(scoring_summary <- webpage %>% 
    html_nodes('#wrap') %>% 
    html_nodes('#content') %>% 
    html_nodes('.table_container#div_scoring') %>% 
    html_table()
)


(team_stats <- webpage %>% 
    html_nodes('#wrap') %>% 
    html_nodes('#content') %>% 
    html_nodes(xpath = '//*[@id="div_team_stats"]') %>% 
    html_table()
)


Solution

  • I have been able to get the content of the table with the following code :

    library(RSelenium)
    library(rvest)
    url <- "https://www.sports-reference.com/cfb/boxscores/2022-09-02-charlotte.html"
    shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
    remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
    remDr$open()
    remDr$navigate(url)
    
    remDr$getPageSource()[[1]] %>% read_html() %>% html_table()
    
    [[1]]
    # A tibble: 2 x 3
      X1          X2 X3     
      <chr>    <int> <chr>  
    1 Duquesne    14 "Final"
    2 Hawaii      24 ""     
    
    [[2]]
    # A tibble: 2 x 7
      ``                                        ``               `1`   `2`   `3`   `4` Final
      <chr>                                     <chr>          <int> <int> <int> <int> <int>
    1 "via Sports Logos.net\n\t\t\tAbout logos" William & Mary    10     7     3    21    41
    2 "via Sports Logos.net\n\t\t\tAbout logos" Charlotte          3    14     7     0    24
    
    [[3]]
    # A tibble: 11 x 6
       Quarter Time  Team  Description                                                      `W&M`  CHAR
         <int> <chr> <chr> <chr>                                                            <int> <int>
     1       1 12:17 W&M   Ethan Chang 36 yard field goal                                       3     0
     2      NA 5:19  CHAR  Antonio Zita 32 yard field goal                                      3     3
     3      NA 0:00  W&M   Tyler Rose 31 yard pass from Darius Wilson (Ethan Chang kick)       10     3
     4       2 0:00  W&M   Donavyn Lester 22 yard run (Ethan Chang kick)                       17     3
     5      NA 6:34  CHAR  Chavon McEachern 3 yard run (Antonio Zita kick)                     17    10
     6      NA 0:13  CHAR  Xavier Williams 2 yard run (Antonio Zita kick)                      17    17
     7       3 4:30  W&M   Ethan Chang 28 yard field goal                                      20    17
     8      NA 2:18  CHAR  Xavier Williams 67 yard run (Antonio Zita kick)                     20    24
     9       4 12:31 W&M   Bronson Yoder 1 yard run (Ethan Chang kick)                         27    24
    10      NA 0:00  W&M   Lachlan Pitts 65 yard pass from Darius Wilson (Ethan Chang kick)    34    24
    11      NA 7:38  W&M   Malachi Imoh 17 yard run (Ethan Chang kick)                         41    24
    
    [[4]]
    # A tibble: 7 x 3
      Stat              `W&M`         CHAR         
      <chr>             <chr>         <chr>        
    1 First Downs       25            22           
    2 Rush-Yds-TDs      47-303-3      37-131-3     
    3 Cmp-Att-Yd-TD-INT 13-19-256-2-0 17-31-248-0-0
    4 Total Yards       559           379          
    5 Fumbles-Lost      0-0           0-0          
    6 Turnovers         0             0            
    7 Penalties-Yards   7-80          9-83         
    
    [[5]]
    # A tibble: 3 x 11
      ``              ``        Passing Passing Passing Passing Passing Passing Passing Passing Passing
      <chr>           <chr>     <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
    1 Player          School    Cmp     Att     Pct     Yds     Y/A     AY/A    TD      Int     Rate   
    2 Xavier Williams Charlotte 12      23      52.2    201     8.7     8.7     0       0       125.6  
    3 James Foster    Charlotte 5       8       62.5    47      5.9     5.9     0       0       111.9  
    
    [[6]]
    # A tibble: 10 x 14
       ``               ``        Rushing Rushing Rushing Rushing Receiving Receiving Receiving Receiving Scrimmage Scrimmage Scrimmage Scrimmage
       <chr>            <chr>     <chr>   <chr>   <chr>   <chr>   <chr>     <chr>     <chr>     <chr>     <chr>     <chr>     <chr>     <chr>    
     1 Player           School    "Att"   "Yds"   "Avg"   "TD"    "Rec"     "Yds"     "Avg"     "TD"      Plays     Yds       Avg       TD       
     2 Shadrick Byrd    Charlotte "13"    "46"    "3.5"   "0"     "2"       "5"       "2.5"     "0"       15        51        3.4       0        
     3 Chavon McEachern Charlotte "11"    "30"    "2.7"   "1"     ""        ""        ""        ""        11        30        2.7       1        
     4 Xavier Williams  Charlotte "10"    "48"    "4.8"   "2"     ""        ""        ""        ""        10        48        4.8       2        
     5 James Foster     Charlotte "3"     "7"     "2.3"   "0"     ""        ""        ""        ""        3         7         2.3       0        
     6 Elijah Spencer   Charlotte ""      ""      ""      ""      "5"       "107"     "21.4"    "0"       5         107       21.4      0        
     7 Grant Dubose     Charlotte ""      ""      ""      ""      "4"       "67"      "16.8"    "0"       4         67        16.8      0        
     8 Victor Tucker    Charlotte ""      ""      ""      ""      "4"       "58"      "14.5"    "0"       4         58        14.5      0        
     9 Nolan Groulx     Charlotte ""      ""      ""      ""      "1"       "6"       "6.0"     "0"       1         6         6.0       0        
    10 Taylor Thompson  Charlotte ""      ""      ""      ""      "1"       "5"       "5.0"     "0"       1         5         5.0       0        
    
    [[7]]
    # A tibble: 25 x 16
       ``                ``        Tackles Tackles Tackles Tackles Tackles `Def Int` `Def Int` `Def Int` `Def Int` `Def Int` Fumbles Fumbles Fumbles Fumbles
       <chr>             <chr>     <chr>   <chr>   <chr>   <chr>   <chr>   <chr>     <chr>     <chr>     <chr>     <chr>     <chr>   <chr>   <chr>   <chr>  
     1 Player            School    Solo    Ast     Tot     Loss    Sk      "Int"     "Yds"     "Avg"     "TD"      "PD"      "FR"    "Yds"   "TD"    "FF"   
     2 Wayne Jones       Charlotte 5       6       11      0.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     3 Jalar Holley      Charlotte 2       4       6       0.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     4 Davondre Robinson Charlotte 5       1       6       0.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     5 Kofi Wardlow      Charlotte 2       4       6       1.0     1.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     6 Chase Monroe      Charlotte 3       2       5       1.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     7 Cam Burden        Charlotte 3       1       4       1.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     8 Geovonte' Howard  Charlotte 2       2       4       0.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
     9 Solomon Rogers    Charlotte 1       3       4       0.0     0.0     ""        ""        ""        ""        ""        ""      ""      ""      ""     
    10 Jordan Anderson   Charlotte 0       3       3       0.0     0.0     ""        ""        ""        ""        "1"       ""      ""      ""      ""     
    # ... with 15 more rows
    # i Use `print(n = ...)` to see more rows
    
    [[8]]
    # A tibble: 4 x 10
      ``             ``        `Kick Ret` `Kick Ret` `Kick Ret` `Kick Ret` `Punt Ret` `Punt Ret` `Punt Ret` `Punt Ret`
      <chr>          <chr>     <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>     
    1 Player         School    "Ret"      "Yds"      "Avg"      "TD"       "Ret"      "Yds"      "Avg"      "TD"      
    2 Henry Rutledge Charlotte "4"        "72"       "18.0"     "0"        ""         ""         ""         ""        
    3 Shadrick Byrd  Charlotte "2"        "31"       "15.5"     "0"        ""         ""         ""         ""        
    4 Victor Tucker  Charlotte ""         ""         ""         ""         "1"        "5"        "5.0"      "0"       
    
    [[9]]
    # A tibble: 3 x 12
      ``           ``        Kicking Kicking Kicking Kicking Kicking Kicking Kicking Punting Punting Punting
      <chr>        <chr>     <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
    1 Player       School    "XPM"   "XPA"   "XP%"   "FGM"   "FGA"   "FG%"   "Pts"   "Punts" "Yds"   "Avg"  
    2 Antonio Zita Charlotte "3"     "3"     "100.0" "1"     "2"     "50.0"  "6"     ""      ""      ""     
    3 Bailey Rice  Charlotte ""      ""      ""      ""      ""      ""      ""      "4"     "189"   "47.3" 
    
    [[10]]
    # A tibble: 4 x 15
      ``               ``        Touchdowns Touchdowns Touchdowns Touchdowns Touchdowns Touchdowns Touchdowns Touchdowns Kicking Kicking ``    ``     ``   
      <chr>            <chr>     <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>   <chr>   <chr> <chr>  <chr>
    1 Player           School    "Rush"     "Rec"      "Int"      "FR"       "PR"       "KR"       "Oth"      "Tot"      "XPM"   "FGM"   "2PM" "Sfty" Pts  
    2 Xavier Williams  Charlotte "2"        ""         ""         ""         ""         ""         ""         "2"        ""      ""      ""    ""     12   
    3 Chavon McEachern Charlotte "1"        ""         ""         ""         ""         ""         ""         "1"        ""      ""      ""    ""     6    
    4 Antonio Zita     Charlotte ""         ""         ""         ""         ""         ""         ""         ""         "3"     "1"     ""    ""     6    
    
    

    Here is another approach :

    library(RDCOMClient)
    library(stringr)
    url <- "https://www.sports-reference.com/cfb/boxscores/2022-09-02-charlotte.html"
    IEApp <- COMCreate("InternetExplorer.Application")
    IEApp[['Visible']] <- TRUE
    IEApp$Navigate(url)
    Sys.sleep(5)
    doc <- IEApp$Document()
    
    web_Obj <- doc$GetElementByID("team_stats")
    table_Content <- web_Obj$innerHtml()
    first_Col <- unlist(stringr::str_extract_all(table_Content, "data-stat=\"stat\">.*</th>"))
    first_Col <- stringr::str_replace_all(first_Col, "(data-stat=\"stat\">)(.*)(</th>)", "\\2")
    
    second_Col <- unlist(stringr::str_extract_all(table_Content, "data-stat=\"vis_stat\">.*</td><td class=\"center"))
    second_Col <- stringr::str_replace_all(second_Col, "(data-stat=\"vis_stat\">)(.*)(</td><td class=\"center)", "\\2")
    
    third_Col <- unlist(stringr::str_extract_all(table_Content, "data-stat=\"home_stat\">.*</td>"))
    third_Col <- stringr::str_replace_all(third_Col, "(data-stat=\"home_stat\">)(.*)(</td>)", "\\2")
    
    table <- cbind(first_Col, second_Col, third_Col)
    table
    
     first_Col           second_Col      third_Col      
    [1,] "Stat"              "25"            "22"           
    [2,] "First Downs"       "47-303-3"      "37-131-3"     
    [3,] "Rush-Yds-TDs"      "13-19-256-2-0" "17-31-248-0-0"
    [4,] "Cmp-Att-Yd-TD-INT" "559"           "379"          
    [5,] "Total Yards"       "0-0"           "0-0"          
    [6,] "Fumbles-Lost"      "0"             "0"            
    [7,] "Turnovers"         "7-80"          "9-83"         
    [8,] "Penalties-Yards"   "25"            "22"