library(rvest) library(dplyr) library(tidyr) library(ggplot2) library(ggthemes) library(htmlwidgets) library(dygraphs) library(xls) library(highcharter) day <- c(1:31) `04` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=4&day=", day, "&submit=Submit+Date") `05` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=5&day=", day, "&submit=Submit+Date") `06` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=6&day=", day, "&submit=Submit+Date") `07` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=7&day=", day, "&submit=Submit+Date") `08` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=8&day=", day, "&submit=Submit+Date") `09` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=9&day=", day, "&submit=Submit+Date") `10` <- paste0("http://www.baseball-reference.com/games/standings.cgi?year=2016&month=10&day=", day, "&submit=Submit+Date") all <- as.data.frame(cbind(`04`, `05`, `06`, `07`, `08`, `09`, `10`)) all$day <- rownames(all) all.l <- all %>% gather(month, url, 1:7) bind_rows(lapply(all.l$url, function(x) { data.frame(url = x, teams = read_html(x[1]) %>% html_nodes(css = '.p402_hide td:nth-child(1)') %>% html_text(), wins = read_html(x[1]) %>% html_nodes(css = '.p402_hide td:nth-child(2)') %>% html_text(), losses = read_html(x[1]) %>% html_nodes(css = '.p402_hide td:nth-child(3)') %>% html_text(), pct = read_html(x[1]) %>% html_nodes(css = '.p402_hide td:nth-child(4)') %>% html_text(), stringsAsFactors = FALSE) })) -> df dfn <- merge(all.l, df, by = "url") ale <- dfn %>% filter(teams %in% c("BAL","BOS","TOR","NYY","TBR")) ale$dayn <- as.numeric(ale$day) ale$monthn <- as.numeric(ale$month) ale$year <- as.numeric(2016) ale$datenw <- paste0(ale$dayn, "_", ale$monthn, "_", ale$year) ale$dt <- paste0(ale$year, "/", ale$monthn, "/", ale$dayn) ale$date <- as.Date(ale$dt) dat <- ale %>% select(date, wins, losses, teams) %>% mutate(game = as.numeric(wins) - as.numeric(losses)) %>% filter(date <= "2016-08-07") %>% select(date, teams, game) %>% arrange(date) teams <- dat %>% spread(teams, game) rownames(teams) <- teams$date ts <- as.xts(teams) hchart <- highchart() %>% hc_add_series_times_values(teams$date, teams$BAL, name = "Baltimore", color = "#DF4601") %>% hc_add_series_times_values(teams$date, teams$TOR, name = "Toronto", color = "#134A8E") %>% hc_add_series_times_values(teams$date, teams$NYY, name = "New York", color = "#E4002B") %>% hc_add_series_times_values(teams$date, teams$BOS, name = "Boston", color = "#BD3039") %>% hc_add_series_times_values(teams$date, teams$TBR, name = "Tampa Bay", color = "#8FBCE6") %>% hc_add_theme(hc_theme_google()) saveWidget(hchart, file="teams.html")