baseballr
baseballr copied to clipboard
bbref_id incorrect in daily_batter_bref()
When calling daily_batter_bref(today-14, today) the data pulls back but the bbref_id does not match the player name.
The player name data is sorted by PAs then OPS
df <- arrange_(df, ~desc(PA), ~desc(OPS))
and the bbref_id is sorted by last name. When these are joined together, the names and the ids are out of sync. `playerids <- payload %>% html_nodes("table") %>% html_nodes("a") %>% html_attr("href") %>% as.data.frame() %>% rename(slug = ".") %>% filter(grepl("redirect", slug)) %>% mutate(playerid = gsub("/redirect.fcgi\?player=1&mlb_ID=", "", slug))
df <- df %>% mutate(bbref_id = playerids$playerid) %>% select(bbref_id, everything())`
This also does not appear to be the baseball reference id but rather the mlb id.
suggested edit that places the sort at the end, and changes the HTML scan to the bbref_ID over the MLB_ID:
daily_batter_bref <- function (t1, t2)
{
tryCatch(expr = {
payload <- xml2::read_html(paste0("http://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=b&lastndays=7&dates=fromandto&fromandto=",
t1, ".", t2, "&level=mlb&franch=&stat=&stat_value=0"))
df <- payload %>% rvest::html_elements(xpath = "//*[@id=\"daily\"]") %>%
rvest::html_table(fill = TRUE)
df <- as.data.frame(df)[-c(1, 3, 5)]
names(df)[1:4] <- c("Name", "Age", "Level", "Team")
suppressWarnings(df[, c(2, 5:26)] <- lapply(df[, c(2,
5:26)], as.numeric))
df$X1B <- with(df, H - (X2B + X3B + HR))
season <- substr(t1, 1, 4)
df$season <- as.integer(season)
df$uBB <- with(df, BB - IBB)
df <- df[, c(28, 1:9, 27, 10:15, 29, 16:26)]
df$Team <- gsub(" $", "", df$Team, perl = T)
df <- df %>% dplyr::filter(.data$Name != "Name")
playerids <- payload %>% rvest::html_elements("table") %>%
rvest::html_elements("a") %>% rvest::html_attr("href") %>%
as.data.frame() %>% dplyr::rename(slug = ".") %>%
#change HTML scan to bbref ID path
dplyr::filter(grepl("players", .data$slug)) %>%
dplyr::mutate(playerid = gsub("/players/gl.fcgi\\?id=",
"", .data$slug)) %>%
dplyr::mutate(playerid = gsub("&t.*","",playerid))
df <- df %>% dplyr::mutate(bbref_id = playerids$playerid) %>%
dplyr::select(.data$bbref_id, tidyr::everything())
# shift order to here
df <- df %>% dplyr::arrange(desc(.data$PA), desc(.data$OPS)) %>%
make_baseballr_data("MLB Daily Batter data from baseball-reference.com",
Sys.time())
}, error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no daily batter data available!"))
}, warning = function(w) {
}, finally = {
})
return(df)
}