library(foreign) library(dplyr) library(tidyr) library(ggplot2) library(ggthemes) library(htmlwidgets) library(dygraphs) # Importing female <- read.csv( "https://d2khazk8e83rdv.cloudfront.net/opendata/ontariotopbabynames_female_1917-2010_english.csv", skip = 1, stringsAsFactors = FALSE) female$Frequency <- as.numeric(female$Frequency) male <- read.csv( "https://d2khazk8e83rdv.cloudfront.net/opendata/ontariotopbabynames_male_1917-2010_english.csv", skip = 1, stringsAsFactors = FALSE) # Names of some significance ggplot(filter(male, Name %in% c("DAVID", "JORDAN", "MALCOLM", "NIGEL", "PETER","KEN","GLENN"))) + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=3000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=4000, lty=1, lwd=.25, colour="grey90") + geom_segment(data=male, aes(x = 1915, y = 4000, xend = 2000, yend = 4000), lwd=.25, colour="grey90") + geom_segment(data=male, aes(x = 1915, y = 3000, xend = 2000, yend = 3000), lwd=.25, colour="grey90") + geom_segment(data=male, aes(x = 1915, y = 2000, xend = 2010, yend = 2000), lwd=.25, colour="grey90") + geom_segment(data=male, aes(x = 1915, y = 1000, xend = 2010, yend = 1000), lwd=.25, colour="grey90") + geom_line(aes(Year, Frequency, group = Name, colour = Name)) + scale_color_brewer(palette = 'Set2') + labs(x = "", y = "") + ggtitle("Boys' names of some personal significance", subtitle="More popular names peaked in 1960; Jordan is the exception") + theme_tufte(base_family = "Helvetica") + theme(axis.ticks = element_blank(), legend.title = element_blank(), legend.position=c(.93,.77)) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/boys.jpg") ggplot(filter(female, Name %in% c("EMMA","MARY","ISABELLE","MIRIAM","KELLY","CATHY","KRISTEN"))) + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=3000, lty=1, lwd=.25, colour="grey90") + #geom_hline(yintercept=4000, lty=1, lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 4000, xend = 2000, yend = 4000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 3000, xend = 2000, yend = 3000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 2000, xend = 2010, yend = 2000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 1000, xend = 2010, yend = 1000), lwd=.25, colour="grey90") + geom_line(aes(Year, Frequency, group = Name, colour = Name)) + scale_color_brewer(palette = 'Set2') + labs(x = "", y = "") + ggtitle("Girls' names of some personal significance", subtitle="Total registrations lower than the boys; Emma is recently popular") + theme_tufte(base_family = "Helvetica") + theme(axis.ticks = element_blank(), legend.title = element_blank(), legend.position=c(.93,.77)) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/girls.jpg") ggplot(filter(female, Name %in% c("CATHY","CATHARINE","KATHERINE","CATHERINE"))) + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + # geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + # geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + # geom_hline(yintercept=3000, lty=1, lwd=.25, colour="grey90") + # geom_hline(yintercept=4000, lty=1, lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 4000, xend = 2000, yend = 4000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 3000, xend = 2010, yend = 3000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 2000, xend = 2010, yend = 2000), lwd=.25, colour="grey90") + geom_segment(data=female, aes(x = 1915, y = 1000, xend = 2010, yend = 1000), lwd=.25, colour="grey90") + geom_line(aes(Year, Frequency, group = Name, colour = Name)) + scale_color_brewer(palette = 'Set2') + labs(x = "", y = "") + ggtitle("Different ways of being a Cathy") + theme_tufte(base_family = "Helvetica") + theme(axis.ticks = element_blank(), legend.title = element_blank(), legend.position=c(.93,.87)) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/cathy.jpg") agg_male <- aggregate(Frequency ~ Year, data = male, sum) agg_female <- aggregate(Frequency ~ Year, data = female, sum) ggplot() + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1930, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1950, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1970, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1990, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2010, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=0, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=20000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=40000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=60000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=80000, lty=1, lwd=.25, colour="grey90") + geom_line(data = agg_female, aes(Year, Frequency), color = "pink3") + geom_line(data = agg_male, aes(Year, Frequency), color = "navyblue") + scale_y_continuous(breaks = c(0,20000,40000,60000,80000), limits = c(0,80000)) + scale_x_continuous(breaks = c(1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010)) + labs(x = "", y = "") + ggtitle("Number of babies registered in Ontario by year", subtitle="Boys outnumber girls slightly througout; peak hit in 1960") + theme_tufte(base_family = "Helvetica") + theme(legend.position = "none", axis.ticks = element_blank()) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/total_names.jpg") total_female <- female %>% count(Year) %>% filter(Year != c(1917, 1918,1919,1920)) total_male <- male %>% count(Year) %>% filter(Year != c(1917, 1918,1919,1920)) ggplot() + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1930, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1950, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1970, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1990, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2010, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=500, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=0, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=1500, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_line(data = total_female, aes(Year, n), color = "pink3") + geom_line(data = total_male, aes(Year, n), color = "navyblue") + scale_y_continuous(breaks = c(0,500,1000,1500,2000), limits = c(0,2000)) + scale_x_continuous(breaks = c(1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010)) + labs(x = "", y = "") + ggtitle("The number of unique names registered in Ontario per year", subtitle="More girls names than boys; steady increase since ~1945") + theme_tufte(base_family = "Helvetica") + theme(legend.position = "none", axis.ticks = element_blank()) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/names.jpg") top_female <- female %>% filter(Year > 1920) %>% group_by(Year) %>% top_n(1, Frequency) %>% arrange(Year, -Frequency) ggplot() + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1930, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1950, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1970, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1990, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2010, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=0, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=3000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=4000, lty=1, lwd=.25, colour="grey90") + #geom_line(data = total_female, aes(Year, n), color = "pink3") + geom_text(data = top_female, aes(Year, Frequency, label = Name, color = Name), size = 1.95) + scale_y_continuous(breaks = c(1000,2000,3000,4000), limits = c(0,4000)) + scale_x_continuous(breaks = c(1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010)) + labs(x = "", y = "") + ggtitle("Ontario's most popular girls' names over time", subtitle="Total registrations have decreased steadily since 1975") + theme_tufte(base_family = "Helvetica") + theme(legend.position = "none", axis.ticks = element_blank()) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/top_girl.jpg") top_male <- male %>% filter(Year > 1920) %>% group_by(Year) %>% top_n(1, Frequency) %>% arrange(Year, -Frequency) ggplot() + geom_vline(xintercept=1920, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1930, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1940, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1950, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1960, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1970, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1980, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=1990, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_vline(xintercept=2010, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=0, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=1000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=2000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=3000, lty=1, lwd=.25, colour="grey90") + geom_hline(yintercept=4000, lty=1, lwd=.25, colour="grey90") + #geom_line(data = total_male, aes(Year, n), color = "navyblue") + geom_text(data = top_male, aes(Year, Frequency, label = Name, color = Name), size = 1.95) + scale_y_continuous(breaks = c(1000,2000,3000,4000), limits = c(0,4000)) + scale_x_continuous(breaks = c(1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010)) + labs(x = "", y = "") + ggtitle("Ontario's most popular boys' names over time") + theme_tufte(base_family = "Helvetica") + theme(legend.position = "none", axis.ticks = element_blank()) ggsave(file="~/Dropbox/Apps/asocialfolder/davidtingle/files/top_boy.jpg") boys <- male %>% filter(Name %in% c("DAVID","PETER","JORDAN","GLENN")) %>% spread(Name,Frequency) boys <- dygraph(data = boys,ylab="Frequency", main="") %>% dySeries("DAVID",label="David") %>% dySeries("PETER",label="Peter") %>% dySeries("JORDAN",label="Jordan") %>% dySeries("GLENN",label="Glenn") %>% #dyOptions(colors = c("blue","red","orange","green")) %>% dyAxis("x", drawGrid = FALSE) %>% dyAxis("y", drawGrid = FALSE) %>% dyOptions(colors = RColorBrewer::brewer.pal(4, "Set2")) %>% dyRangeSelector() saveWidget(boys, file="~/Dropbox/Apps/asocialfolder/davidtingle/files/boys.html")