I am trying to extract the data points from the graphic...without any luck. And no clue, why not.
I get always returned "NAs introduced by coercion"
My idea was to read the URL and look through the date in order to identify the part where the values from the graphic are listed. However, it seems I am not able to get hold of them.
url <- "https://www.transfermarkt.com/manuel-neuer/marktwertverlauf/spieler/17259"
data <- readLines(con = url)
data <- paste0(data, collapse = "\n")
start.index <- gregexpr(pattern = "'data':\\[", text = data)
end.index <- gregexpr(pattern = "\\]", text = data)
end.index[[1]] <- end.index[[1]][which(end.index[[1]] > start.index[[1]])[1]]
data <- substring(
text = data,
first = start.index[[1]],
last = end.index[[1]]
)
#date
#market value
#club
#age
# Get Dates
date.s.index <- gregexpr(pattern = "'datum_mw':'", text = data)
date.e.index <- gregexpr(pattern = "',", text = data)
date.index <- rbind(
cbind(date.s.index[[1]], 0),
cbind(date.e.index[[1]], 1)
)
date.index <- date.index[order(date.index[, 1]), ]
d.index <- date.index[, 2] == 0
d.next.index <- c(FALSE, d.index[-length(d.index)])
date.start.index <- date.index[d.index, 1]
date.end.index <- date.index[d.next.index, 1]
date.res <- substring(
text = data,
first = date.start.index + attr(date.s.index[[1]], "match.length")[1],
last = date.end.index - 1
)
date.res <- gsub(pattern = "\\\\x20", replacement = " ", x = date.res)
# Get Values
value.s.index <- gregexpr(pattern = "'y':", text = data)
value.e.index <- gregexpr(pattern = ",", text = data)
value.index <- rbind(
cbind(value.s.index[[1]], 0),
cbind(value.e.index[[1]], 1)
)
value.index <- value.index[order(value.index[, 1]), ]
v.index <- value.index[, 2] == 0
v.next.index <- c(FALSE, v.index[-length(v.index)])
value.start.index <- value.index[v.index, 1]
value.end.index <- value.index[v.next.index, 1]
value.res <- substring(
text = data,
first = value.start.index + attr(value.s.index[[1]], "match.length")[1],
last = value.end.index - 1
)
# Get Clubs
club.s.index <- gregexpr(pattern = "'verein':'", text = data)
club.e.index <- gregexpr(pattern = "',", text = data)
club.index <- rbind(
cbind(club.s.index[[1]], 0),
cbind(club.e.index[[1]], 1)
)
club.index <- club.index[order(club.index[, 1]), ]
c.index <- club.index[, 2] == 0
c.next.index <- c(FALSE, c.index[-length(c.index)])
club.start.index <- club.index[c.index, 1]
club.end.index <- club.index[c.next.index, 1]
club.res <- substring(
text = data,
first = club.start.index + attr(club.s.index[[1]], "match.length")[1],
last = club.end.index - 1
)
club.res <- gsub(pattern = "\\\\x20", replacement = " ", x = club.res)
# Get Age
age.s.index <- gregexpr(pattern = "'age':", text = data)
age.e.index <- gregexpr(pattern = ",", text = data)
age.index <- rbind(
cbind(age.s.index[[1]], 0),
cbind(age.e.index[[1]], 1)
)
age.index <- age.index[order(age.index[, 1]), ]
a.index <- age.index[, 2] == 0
a.next.index <- c(FALSE, a.index[-length(a.index)])
age.start.index <- age.index[a.index, 1]
age.end.index <- age.index[a.next.index, 1]
age.res <- substring(
text = data,
first = age.start.index + attr(age.s.index[[1]], "match.length")[1],
last = age.end.index - 1
)
res <- data.frame(
Date = date.res,
MarketValue = as.numeric(value.res),
Club = club.res,
Age = as.numeric(age.res)
)
Chart data is pulled from a different URL, https://www.transfermarkt.com/ceapi/marketValueDevelopment/graph/17259
. As it's JSON, we could parse it directly with jsonlite::read_json(url)
. Though they seem to be bit picky regarding user agent (jsonlite
request works in interactive RStudio session, gets 403
error when called through reprex), so let's just use httr2
for more convenient user agent config:
library(httr2)
request("https://www.transfermarkt.com/ceapi/marketValueDevelopment/graph/17259") |>
req_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)") |>
req_perform() |>
resp_body_json(simplifyVector = TRUE) |>
getElement(1) |>
tibble::as_tibble()
#> # A tibble: 43 × 7
#> x y mw datum_mw verein age wappen
#> <dbl> <int> <chr> <chr> <chr> <chr> <chr>
#> 1 1111014000000 75000 €75k Mar 17, 2005 FC Schalke 04 U19 18 "https:/…
#> 2 1126821600000 150000 €150k Sep 16, 2005 FC Schalke 04 19 "https:/…
#> 3 1156111200000 350000 €350k Aug 21, 2006 FC Schalke 04 20 ""
#> 4 1168815600000 1500000 €1.50m Jan 15, 2007 FC Schalke 04 20 ""
#> 5 1182376800000 3000000 €3.00m Jun 21, 2007 FC Schalke 04 21 ""
#> 6 1199142000000 4500000 €4.50m Jan 1, 2008 FC Schalke 04 21 ""
#> 7 1212530400000 7000000 €7.00m Jun 4, 2008 FC Schalke 04 22 ""
#> 8 1232492400000 9000000 €9.00m Jan 21, 2009 FC Schalke 04 22 ""
#> 9 1244584800000 11000000 €11.00m Jun 10, 2009 FC Schalke 04 23 ""
#> 10 1263682800000 15000000 €15.00m Jan 17, 2010 FC Schalke 04 23 ""
#> # ℹ 33 more rows
Created on 2023-12-31 with reprex v2.0.2