tryCatch({
data(montreal.bikes, package="animint2")
}, warning=function(w){
devtools::install_github("tdhock/animint2")
})9 Montreal bikes data viz
In this chapter we will explore several data visualizations of the Montreal bike data set.
Chapter outline:
- We begin with some static data visualizations.
- We create an interactive visualization of accident frequency over time.
- We create a interactive data viz with four plots, showing monthly accident trends, daily details, and a map of counter locations.
9.1 Static figures
We begin by loading the montreal.bikes data set, which is not available in the CRAN release of animint2, in order to save space on CRAN. Therefore to access this data set, you will need to install animint2 from GitHub:
We begin by examining the accidents data table.
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
data(montreal.bikes) #only present if installed from github
Sys.setlocale("LC_ALL", "")[1] "LC_CTYPE=C.UTF-8;LC_NUMERIC=C;LC_TIME=C.UTF-8;LC_COLLATE=C.UTF-8;LC_MONETARY=C.UTF-8;LC_MESSAGES=C.UTF-8;LC_PAPER=C.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=C.UTF-8;LC_IDENTIFICATION=C"
Berri Brébeuf Côte-Sainte-Catherine
1666 730 1666
Maisonneuve 1 Maisonneuve 2 du Parc
1666 1666 1666
Pierre-Dupuy Rachel Saint-Urbain
1666 1666 730
Totem_Laurier
261
Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':
between, first, last
accidents.dt <- data.table(montreal.bikes$accidents)
str(accidents.dt)Classes 'data.table' and 'data.frame': 5595 obs. of 12 variables:
$ date.str : chr "2012-01-02" "2012-01-05" "2012-01-09" "2012-01-10" ...
$ time.str : chr "18:35" "21:50" "21:15" "15:40" ...
$ deaths : int 0 0 0 0 0 0 0 1 0 0 ...
$ people.severely.injured: int 0 0 0 0 0 0 0 0 0 0 ...
$ people.slightly.injured: int 1 1 1 1 1 1 1 0 1 1 ...
$ street.number : int NA NA NA NA NA 2330 NA NA 4160 NA ...
$ street : chr "ST JEAN BAPTISTE O" "FOSTER" "ROSEMONT" "ST ANTOINE" ...
$ cross.street : chr "AV ROULEAU" "JANELLE" "DES ERABLES" "MANSFIELD" ...
$ location.int : int 32 34 NA 32 32 34 32 32 33 31 ...
$ position.int : int 6 6 6 6 6 6 6 6 6 5 ...
$ position : chr "Voie de circulation" "Voie de circulation" "Voie de circulation" "Voie de circulation" ...
$ location : chr "En intersection (moins de 5 mètres)" "Entre intersections (100 mètres et +)" NA "En intersection (moins de 5 mètres)" ...
- attr(*, ".internal.selfref")=<externalptr>
Each accident has data about its date, time, location, and counts of death and slight/severe injury. Some of the values are in French (e.g. position Voie de circulation, location En intersection, etc).
We calculate the time period of the accidents below.
(accidents.dt[
, date.POSIXct := as.POSIXct(strptime(date.str, "%Y-%m-%d"))
][
, month.str := strftime(date.POSIXct, "%Y-%m")
][]) date.str time.str deaths people.severely.injured
1: 2012-01-02 18:35 0 0
2: 2012-01-05 21:50 0 0
3: 2012-01-09 21:15 0 0
4: 2012-01-10 15:40 0 0
5: 2012-01-10 0:15 0 0
---
5591: 2014-12-19 12:22 0 0
5592: 2014-12-19 19:50 0 0
5593: 2014-12-26 19:56 0 0
5594: 2014-12-27 12:35 0 0
5595: 2014-12-30 11:55 0 0
10 variables not shown: [people.slightly.injured, street.number, street, cross.street, location.int, position.int, position, location, date.POSIXct, month.str]
range(accidents.dt$month.str)[1] "2012-01" "2014-12"
Below we also compute the range of months for the bike counter data table.
(counts.dt <- data.table(montreal.bikes$counter.counts)) location date count
1: Berri 2009-01-01 05:00:00 29
2: Berri 2009-01-02 05:00:00 19
3: Berri 2009-01-03 05:00:00 24
4: Berri 2009-01-04 05:00:00 24
5: Berri 2009-01-05 05:00:00 120
---
13379: Totem_Laurier 2013-09-14 04:00:00 2456
13380: Totem_Laurier 2013-09-15 04:00:00 2527
13381: Totem_Laurier 2013-09-16 04:00:00 3012
13382: Totem_Laurier 2013-09-17 04:00:00 3745
13383: Totem_Laurier 2013-09-18 04:00:00 3921
[1] "2009-01" "2013-09"
The bike counts are time series data which we visualize below.
counts.dt[, loc.lines := gsub("[- _]", "\n", location)]
ggplot()+
theme_bw()+
theme(panel.margin=grid::unit(0, "lines"))+
facet_grid(loc.lines ~ .)+
geom_point(aes(
date, count, color=count==0),
shape=21,
data=counts.dt)+
scale_color_manual(values=c("TRUE"="grey", "FALSE"="black"))Warning: Removed 407 rows containing missing values (geom_point).

Plotting with geom_point makes it easy to see the difference between zeros and missing values.
We will compute a summary of all accidents per month in this time period, so we first create a data table for each month below. (and make sure to set the locale to C for English month names)
uniq.month.vec <- unique(c(
accidents.dt$month.str,
counts.dt$month.str))
one.day <- 60 * 60 * 24
months <- data.table(month.str=uniq.month.vec)[
, month01.str := paste0(month.str, "-01")
][
, month01.POSIXct := as.POSIXct(strptime(month01.str, "%Y-%m-%d"))
][, let(
next.POSIXct = month01.POSIXct + one.day * 31,
month.str = strftime(month01.POSIXct, "%B %Y")
)][
, next01.str := paste0(strftime(next.POSIXct, "%Y-%m"), "-01")
][
, next01.POSIXct := as.POSIXct(strptime(next01.str, "%Y-%m-%d"))
]
month.levs <- months[order(month01.POSIXct), month.str]
(months[, month := factor(month.str, month.levs)][]) month.str month01.str month01.POSIXct next.POSIXct next01.str
1: January 2012 2012-01-01 2012-01-01 2012-02-01 2012-02-01
2: February 2012 2012-02-01 2012-02-01 2012-03-03 2012-03-01
3: March 2012 2012-03-01 2012-03-01 2012-04-01 2012-04-01
4: April 2012 2012-04-01 2012-04-01 2012-05-02 2012-05-01
5: May 2012 2012-05-01 2012-05-01 2012-06-01 2012-06-01
---
68: August 2011 2011-08-01 2011-08-01 2011-09-01 2011-09-01
69: September 2011 2011-09-01 2011-09-01 2011-10-02 2011-10-01
70: October 2011 2011-10-01 2011-10-01 2011-11-01 2011-11-01
71: November 2011 2011-11-01 2011-11-01 2011-12-02 2011-12-01
72: December 2011 2011-12-01 2011-12-01 2012-01-01 2012-01-01
2 variables not shown: [next01.POSIXct, month]
Note that we created a month column which is a factor ordered by month.levs.
(accidents.dt[
, month.text := strftime(date.POSIXct, "%B %Y")
][
, month := factor(month.text, month.levs)
][
, month.POSIXct := as.POSIXct(strptime(paste0(month.str, "-15"), "%Y-%m-%d"))
][]) date.str time.str deaths people.severely.injured
1: 2012-01-02 18:35 0 0
2: 2012-01-05 21:50 0 0
3: 2012-01-09 21:15 0 0
4: 2012-01-10 15:40 0 0
5: 2012-01-10 0:15 0 0
---
5591: 2014-12-19 12:22 0 0
5592: 2014-12-19 19:50 0 0
5593: 2014-12-26 19:56 0 0
5594: 2014-12-27 12:35 0 0
5595: 2014-12-30 11:55 0 0
13 variables not shown: [people.slightly.injured, street.number, street, cross.street, location.int, position.int, position, location, date.POSIXct, month.str, ...]
stopifnot(!is.na(accidents.dt$month.POSIXct))
(accidents.per.month <- accidents.dt[, list(
total.accidents=.N,
total.people=sum(deaths+people.severely.injured+people.slightly.injured),
deaths=sum(deaths),
people.severely.injured=sum(people.severely.injured),
people.slightly.injured=sum(people.slightly.injured),
next.POSIXct = month.POSIXct + one.day * 30,
month01.str = paste0(strftime(month.POSIXct, "%Y-%m"), "-01")
), by=.(month, month.str, month.text, month.POSIXct)][, let(
month01.POSIXct = as.POSIXct(strptime(month01.str, "%Y-%m-%d")),
next01.str = paste0(strftime(next.POSIXct, "%Y-%m"), "-01")
)][
, next01.POSIXct := as.POSIXct(strptime(next01.str, "%Y-%m-%d"))
][]) month month.str month.text month.POSIXct total.accidents
1: January 2012 2012-01 January 2012 2012-01-15 11
2: February 2012 2012-02 February 2012 2012-02-15 19
3: March 2012 2012-03 March 2012 2012-03-15 76
4: April 2012 2012-04 April 2012 2012-04-15 113
5: May 2012 2012-05 May 2012 2012-05-15 224
---
32: August 2014 2014-08 August 2014 2014-08-15 308
33: September 2014 2014-09 September 2014 2014-09-15 279
34: October 2014 2014-10 October 2014 2014-10-15 166
35: November 2014 2014-11 November 2014 2014-11-15 71
36: December 2014 2014-12 December 2014 2014-12-15 10
9 variables not shown: [total.people, deaths, people.severely.injured, people.slightly.injured, next.POSIXct, month01.str, month01.POSIXct, next01.str, next01.POSIXct]
We plot the accidents per month below.
accidents.tall <- melt(
accidents.per.month,
measure.vars=c(
"deaths", "people.severely.injured", "people.slightly.injured"),
variable.name="severity",
value.name="people")
severity.colors <- c(
"people.slightly.injured"="#FEE0D2",#lite red
"people.severely.injured"="#FB6A4A",
deaths="#A50F15")#dark red
ggplot()+
theme_bw()+
geom_bar(aes(
month.POSIXct, people, fill=severity),
stat="identity",
data=accidents.tall)+
scale_fill_manual(values=severity.colors)
In each accident, there are counts of people who died, along with people who suffered severe and slight injuries. Below we classify the severity of each accident according to the worst outcome among the people affected.
severity
people.slightly.injured people.severely.injured deaths
5262 289 44
The output above shows that accidents with only slight injuries are most frequent, and accidents with at least one death are least frequent. Below we compute counts per month.
(counts.per.month <- counts.dt[, let(
month.POSIXct = as.POSIXct(strptime(paste0(month.str, "-15"), "%Y-%m-%d")),
month.text = strftime(date, "%B %Y"),
day.of.the.month = as.integer(strftime(date, "%d"))
)][
, month := factor(month.text, month.levs)
][, list(
days=.N,
mean.per.day=mean(count),
count=sum(count),
month01.str = paste0(month.str, "-01")
), by=.(location, month, month.str, month.POSIXct)][
0 < count
][
, month01.POSIXct := as.POSIXct(strptime(month01.str, "%Y-%m-%d"))
][
, next.POSIXct := month01.POSIXct + one.day * 31
][
, next01.str := paste0(strftime(next.POSIXct, "%Y-%m"), "-01")
][
, next01.POSIXct := as.POSIXct(strptime(next01.str, "%Y-%m-%d"))
][
, days.in.month := as.integer(round(difftime(next01.POSIXct,month01.POSIXct,units="days")))
][]) location month month.str month.POSIXct days mean.per.day
1: Berri January 2009 2009-01 2009-01-15 31 100.3226
2: Berri February 2009 2009-02 2009-02-15 28 159.6786
3: Berri March 2009 2009-03 2009-03-15 31 271.3226
4: Berri May 2009 2009-05 2009-05-15 31 2972.8710
5: Berri June 2009 2009-06 2009-06-15 30 3909.9333
---
321: Totem_Laurier May 2013 2013-05 2013-05-15 31 2746.4194
322: Totem_Laurier June 2013 2013-06 2013-06-15 30 2828.6000
323: Totem_Laurier July 2013 2013-07 2013-07-15 31 3238.3871
324: Totem_Laurier August 2013 2013-08 2013-08-15 31 3162.7097
325: Totem_Laurier September 2013 2013-09 2013-09-15 18 2888.7778
7 variables not shown: [count, month01.str, month01.POSIXct, next.POSIXct, next01.str, next01.POSIXct, days.in.month]
counts.per.month[days < days.in.month, {
list(location, month, days, days.in.month)
}] location month days days.in.month
1: Berri November 2012 5 30
2: Côte-Sainte-Catherine November 2012 5 30
3: Maisonneuve 1 November 2012 5 30
4: Maisonneuve 2 November 2012 5 30
5: du Parc November 2012 5 30
---
11: Maisonneuve 2 September 2013 18 30
12: du Parc September 2013 18 30
13: Pierre-Dupuy September 2013 18 30
14: Rachel September 2013 18 30
15: Totem_Laurier September 2013 18 30
As shown above, some months do not have observations for all days.
9.2 Interactive viz of accident frequency
complete.months <- counts.per.month[days == days.in.month]
month.labels <- counts.per.month[, {
.SD[which.max(count), ]
}, by=location]
day.labels <- counts.dt[, {
.SD[which.max(count), ]
}, by=.(location, month)]
(city.wide.cyclists <- counts.per.month[0 < count, list(
locations=.N,
count=sum(count),
month01.str = paste0(month.str, "-01")
), by=.(month, month.str, month.POSIXct)][
, month01.POSIXct := as.POSIXct(strptime(month01.str, "%Y-%m-%d"))
][
, next.POSIXct := month01.POSIXct + one.day * 31
][
, next01.str := paste0(strftime(next.POSIXct, "%Y-%m"), "-01")
][
, next01.POSIXct := as.POSIXct(strptime(next01.str, "%Y-%m-%d"))
][]) month month.str month.POSIXct locations count month01.str
1: January 2009 2009-01 2009-01-15 2 14245 2009-01-01
2: February 2009 2009-02 2009-02-15 2 24002 2009-02-01
3: March 2009 2009-03 2009-03-15 2 57980 2009-03-01
4: May 2009 2009-05 2009-05-15 2 149327 2009-05-01
5: June 2009 2009-06 2009-06-15 4 305555 2009-06-01
---
50: May 2013 2013-05 2013-05-15 8 811126 2013-05-01
51: June 2013 2013-06 2013-06-15 8 767951 2013-06-01
52: July 2013 2013-07 2013-07-15 8 916662 2013-07-01
53: August 2013 2013-08 2013-08-15 8 856066 2013-08-01
54: September 2013 2013-09 2013-09-15 8 450513 2013-09-01
4 variables not shown: [month01.POSIXct, next.POSIXct, next01.str, next01.POSIXct]
month.str.vec <- strftime(seq(
strptime("2012-01-15", "%Y-%m-%d"),
strptime("2013-01-15", "%Y-%m-%d"),
by="month"), "%Y-%m")
city.wide.complete <- complete.months[0 < count, list(
locations=.N,
count=sum(count),
month01.str = paste0(month.str, "-01")
), by=.(month, month.str, month.POSIXct)]
setkey(city.wide.complete, month.str)
scatter.cyclists <- city.wide.complete[month.str.vec]
scatter.accidents <- accidents.per.month[scatter.cyclists, on=list(month.str)]
scatter.not.na <- scatter.accidents[!is.na(locations),]
scatter.max <- scatter.not.na[locations==max(locations)]
fit <- lm(total.accidents ~ count - 1, scatter.max)
scatter.max[
, pred.accidents := predict(fit)
][
, mean(total.accidents/count)
][1] 0.0004201563
animint(
regression=ggplot()+
theme_bw()+
ggtitle("Numbers of accidents and cyclists")+
geom_line(aes(
count, pred.accidents),
color="grey",
data=scatter.max)+
geom_point(aes(
count, total.accidents),
shape=1,
clickSelects="month",
size=5,
alpha=0.75,
data=scatter.max)+
ylab("Total bike accidents (all Montreal locations)")+
xlab("Total cyclists (all Montreal locations)"),
timeSeries=ggplot()+
theme_bw()+
ggtitle("Time series of accident frequency")+
xlab("Month")+
geom_point(aes(
month.POSIXct, total.accidents/count),
clickSelects="month",
size=5,
alpha=0.75,
data=scatter.max))The data viz above shows two data visualizations of city-wide accident frequency over time. The plot on the left shows that the number of accidents grows with the number of cyclists. The plot on the right shows the frequency of accidents over time.
9.3 Interactive viz with map and details
The plot below is a dotplot of accidents for each month. Each dot represents one person who got in an accident.
(accidents.cumsum <- accidents.dt[
order(date.POSIXct, month, severity)
][
, accident.i := seq_along(severity)
, by=.(date.POSIXct, month)
][
, day.of.the.month := as.integer(strftime(date.POSIXct, "%d"))
][]) date.str time.str deaths people.severely.injured
1: 2012-01-02 18:35 0 0
2: 2012-01-05 21:50 0 0
3: 2012-01-09 21:15 0 0
4: 2012-01-10 15:40 0 0
5: 2012-01-10 0:15 0 0
---
5591: 2014-12-19 12:22 0 0
5592: 2014-12-19 19:50 0 0
5593: 2014-12-26 19:56 0 0
5594: 2014-12-27 12:35 0 0
5595: 2014-12-30 11:55 0 0
17 variables not shown: [people.slightly.injured, street.number, street, cross.street, location.int, position.int, position, location, date.POSIXct, month.str, ...]
ggplot()+
theme_bw()+
theme(panel.margin=grid::unit(0, "cm"))+
facet_wrap("month")+
geom_text(aes(15, 25, label=month), data=accidents.per.month)+
scale_fill_manual(values=severity.colors)+
scale_x_continuous("day of the month", breaks=c(1, 10, 20, 30))+
geom_point(aes(
day.of.the.month, accident.i, fill=severity),
shape=21,
data=accidents.cumsum)
(counter.locations <- data.table(montreal.bikes$counter.locations)[, let(
lon = coord_X,
lat = coord_Y
)][]) id nom nom_comptage Etat Type
1: 1 St-Urbain_1 Saint-Urbain Existant compteur
2: 2 Brebeuf_1 Brebeuf Existant compteur
3: 4 Maisonneuve_1 Maisonneuve_1 À réinstaller compteur
4: 5 Maisonneuve_2 Maisonneuve_2 Existant compteur
5: 6 Rachel/Papineau Rachel/Papineau Existant compteur
---
17: 36 René-Lévesque_2 René-Lévesque Existant compteur
18: 37 Totem_Laurier Totem_Laurier Existant totem
19: 3 Berri_1 Berri1 Existant compteur
20: 38 Parc_2 Parc U-Zelt Test Existant Projet-pilot
21: 39 St-Laurent Saint-Laurent U-Zelt Test Existant Projet-pilot
5 variables not shown: [Annee_implante, coord_X, coord_Y, lon, lat]
loc.name.code <- c(
"Berri1"="Berri",
"Brebeuf"="Brébeuf",
"CSC"="Côte-Sainte-Catherine",
"Maisonneuve_1"="Maisonneuve 1",
"Maisonneuve_2"="Maisonneuve 2",
"Parc"="du Parc",
"PierDup"="Pierre-Dupuy",
"Rachel/Papineau"="Rachel",
"Saint-Urbain"="Saint-Urbain",
"Totem_Laurier"="Totem_Laurier")
counter.locations[, location := loc.name.code[nom_comptage] ]
velo.counts <- table(counts.dt$location)
(show.locations <- counter.locations[names(velo.counts), on=list(location)]) id nom nom_comptage Etat Type
1: 3 Berri_1 Berri1 Existant compteur
2: 2 Brebeuf_1 Brebeuf Existant compteur
3: 8 Cote-Ste-Catherine_1 CSC Existant compteur
4: 4 Maisonneuve_1 Maisonneuve_1 À réinstaller compteur
5: 5 Maisonneuve_2 Maisonneuve_2 Existant compteur
6: 22 Parc_1 Parc Existant compteur
7: 12 Pierre-Dupuy_1 PierDup Existant compteur
8: 6 Rachel/Papineau Rachel/Papineau Existant compteur
9: 1 St-Urbain_1 Saint-Urbain Existant compteur
10: 37 Totem_Laurier Totem_Laurier Existant totem
6 variables not shown: [Annee_implante, coord_X, coord_Y, lon, lat, location]
The counter locations above will be plotted below. Note that we use showSelected=month and clickSelects=location.
map.lim <- show.locations[, list(
range.lat=range(lat),
range.lon=range(lon)
)]
diff.vec <- sapply(map.lim, diff)
diff.mat <- c(-1, 1) * matrix(diff.vec, 2, 2, byrow=TRUE)
scale.mat <- as.matrix(map.lim) + diff.mat
location.colors <-
c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462",
"#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F")
names(location.colors) <- show.locations$location
counts.per.month.loc <- counts.per.month[show.locations, on=list(location)]
bike.paths <- data.table(montreal.bikes$path.locations)
some.paths <- bike.paths[
scale.mat[1, "range.lat"] < lat &
scale.mat[1, "range.lon"] < lon &
lat < scale.mat[2, "range.lat"] &
lon < scale.mat[2, "range.lon"]]
mtl.map <- ggplot()+
theme_bw()+
theme(
panel.margin=grid::unit(0, "lines"),
axis.line=element_blank(), axis.text=element_blank(),
axis.ticks=element_blank(), axis.title=element_blank(),
panel.background = element_blank(),
panel.border = element_blank())+
coord_equal(xlim=map.lim$range.lon, ylim=map.lim$range.lat)+
scale_color_manual(values=location.colors)+
scale_x_continuous(limits=scale.mat[, "range.lon"])+
scale_y_continuous(limits=scale.mat[, "range.lat"])+
geom_path(aes(
lon, lat,
tooltip=TYPE_VOIE,
group=paste(feature.i, path.i)),
color="grey",
data=some.paths)+
guides(color="none")+
geom_text(aes(
lon, lat,
label=location),
clickSelects="location",
data=show.locations)
mtl.map
The plot below shows the time period that each counter was in operation. Note that we use geom_tallrect with clickSelects to select the month.
location.ranges <- counts.per.month[0 < count, list(
min=min(month.POSIXct),
max=max(month.POSIXct)
), by=location]
accidents.range <- accidents.dt[, data.table(
location="accidents",
min=min(date.POSIXct),
max=max(date.POSIXct))]
MonthSummary <- ggplot()+
theme_bw()+
theme_animint(width=450, height=250)+
xlab("range of dates in data")+
ylab("data type")+
scale_color_manual(values=location.colors)+
guides(color="none")+
geom_segment(aes(
min, location,
xend=max, yend=location,
color=location),
clickSelects="location",
data=location.ranges, alpha=3/4, size=10)+
geom_segment(aes(
min, location,
xend=max, yend=location),
color=severity.colors[["deaths"]],
data=accidents.range,
size=10)
print(MonthSummary)
The plot below shows the bike counts at each location and day.
date min.date max.date locations
1: 2009-01-01 05:00:00 2008-12-31 17:00:00 2009-01-01 17:00:00 9
2: 2009-01-02 05:00:00 2009-01-01 17:00:00 2009-01-02 17:00:00 9
3: 2009-01-03 05:00:00 2009-01-02 17:00:00 2009-01-03 17:00:00 9
4: 2009-01-04 05:00:00 2009-01-03 17:00:00 2009-01-04 17:00:00 9
5: 2009-01-05 05:00:00 2009-01-04 17:00:00 2009-01-05 17:00:00 9
---
1604: 2013-09-14 04:00:00 2013-09-13 16:00:00 2013-09-14 16:00:00 8
1605: 2013-09-15 04:00:00 2013-09-14 16:00:00 2013-09-15 16:00:00 8
1606: 2013-09-16 04:00:00 2013-09-15 16:00:00 2013-09-16 16:00:00 8
1607: 2013-09-17 04:00:00 2013-09-16 16:00:00 2013-09-17 16:00:00 8
1608: 2013-09-18 04:00:00 2013-09-17 16:00:00 2013-09-18 16:00:00 8
location date count month.str
1: Berri 2010-06-15 04:00:00 7495 2010-06
2: Brébeuf 2010-06-04 04:00:00 9235 2010-06
3: Côte-Sainte-Catherine 2013-09-18 04:00:00 3330 2013-09
4: Maisonneuve 1 2011-06-17 04:00:00 5355 2011-06
5: Maisonneuve 2 2011-06-07 04:00:00 8332 2011-06
6: du Parc 2011-09-27 04:00:00 4577 2011-09
7: Pierre-Dupuy 2013-07-21 04:00:00 4841 2013-07
8: Rachel 2013-05-31 04:00:00 8555 2013-05
9: Saint-Urbain 2010-04-27 04:00:00 3856 2010-04
10: Totem_Laurier 2013-08-21 04:00:00 4293 2013-08
5 variables not shown: [loc.lines, month.POSIXct, month.text, day.of.the.month, month]
TimeSeries <- ggplot()+
theme_bw()+
geom_tallrect(aes(
xmin=date-one.day/2, xmax=date+one.day/2,
clickSelects=date),
data=dates, alpha=1/2)+
geom_line(aes(
date, count, group=location,
showSelected=location,
clickSelects=location),
data=counts.dt)+
scale_color_manual(values=location.colors)+
geom_point(aes(
date, count, color=location,
showSelected=location,
clickSelects=location),
data=counts.dt)+
geom_text(aes(
date, count+200, color=location, label=location,
showSelected=location,
clickSelects=location),
data=location.labels)
print(TimeSeries)Warning: Removed 407 rows containing missing values (geom_point).

The plot below shows the same data but for each month.
MonthSeries <- ggplot()+
guides(color="none", fill="none")+
theme_bw()+
geom_tallrect(aes(
xmin=month01.POSIXct, xmax=next01.POSIXct),
clickSelects="month",
data=months,
alpha=1/2)+
geom_line(aes(
month.POSIXct, count, group=location,
color=location),
showSelected="location",
clickSelects="location",
data=counts.per.month)+
scale_color_manual(values=location.colors)+
scale_fill_manual(values=location.colors)+
xlab("month")+
ylab("bike counts per month")+
geom_point(aes(
month.POSIXct, count, fill=location,
tooltip=paste(
count, "bikers counted at",
location, "in", month)),
showSelected="location",
clickSelects="location",
size=5,
color="black",
data=counts.per.month)+
geom_text(aes(
month.POSIXct, count+5000, color=location, label=location),
showSelected="location",
clickSelects="location",
data=month.labels)
print(MonthSeries)
counter.title <- "mean cyclists per day"
accidents.title <- "city-wide accidents"
MonthFacet <- ggplot()+
ggtitle("All data, select month")+
guides(color="none", fill="none")+
theme_bw()+
facet_grid(facet ~ ., scales="free")+
theme(panel.margin=grid::unit(0, "lines"))+
geom_tallrect(aes(
xmin=month01.POSIXct, xmax=next01.POSIXct),
clickSelects="month",
data=data.table(
city.wide.cyclists,
facet=counter.title),
alpha=1/2)+
geom_line(aes(
month.POSIXct, mean.per.day, group=location,
color=location),
showSelected="location",
clickSelects="location",
data=data.table(counts.per.month, facet=counter.title))+
scale_color_manual(values=location.colors)+
xlab("month")+
ylab("")+
geom_point(aes(
month.POSIXct, mean.per.day, color=location,
tooltip=paste(
count, "cyclists counted at",
location, "in",
days, "days of", month,
sprintf("(mean %d cyclists/day)", as.integer(mean.per.day)))),
showSelected="location",
clickSelects="location",
size=5,
fill="grey",
data=data.table(counts.per.month, facet=counter.title))+
geom_text(aes(
month.POSIXct, mean.per.day+300, color=location, label=location),
showSelected="location",
clickSelects="location",
data=data.table(month.labels, facet=counter.title))+
scale_fill_manual(values=severity.colors, breaks=names(severity.colors))+
geom_bar(aes(
month.POSIXct, people,
fill=severity),
showSelected="severity",
stat="identity",
position="identity",
color=NA,
data=data.table(accidents.tall, facet=accidents.title))+
geom_tallrect(aes(
xmin=month01.POSIXct, xmax=next01.POSIXct,
tooltip=paste(
ifelse(deaths==0, "",
ifelse(deaths==1,
"1 death,",
paste(deaths, "deaths,"))),
ifelse(people.severely.injured==0, "",
ifelse(people.severely.injured==1,
"1 person severely injured,",
paste(people.severely.injured,
"people severely injured,"))),
people.slightly.injured,
"people slightly injured in",
month)),
clickSelects="month",
alpha=0.5,
data=data.table(accidents.per.month,
facet=accidents.title))
MonthFacet
day.POSIXct day.of.the.week
1: 2009-01-01 Thu
2: 2009-01-02 Fri
3: 2009-01-03 Sat
4: 2009-01-04 Sun
5: 2009-01-05 Mon
---
2188: 2014-12-28 Sun
2189: 2014-12-29 Mon
2190: 2014-12-30 Tue
2191: 2014-12-31 Wed
2192: 2015-01-01 Thu
day.POSIXct day.of.the.week month.text day.of.the.month month
1: 2009-01-03 Sat January 2009 3 January 2009
2: 2009-01-04 Sun January 2009 4 January 2009
3: 2009-01-10 Sat January 2009 10 January 2009
4: 2009-01-11 Sun January 2009 11 January 2009
5: 2009-01-17 Sat January 2009 17 January 2009
---
622: 2014-12-14 Sun December 2014 14 December 2014
623: 2014-12-20 Sat December 2014 20 December 2014
624: 2014-12-21 Sun December 2014 21 December 2014
625: 2014-12-27 Sat December 2014 27 December 2014
626: 2014-12-28 Sun December 2014 28 December 2014
counter.title <- "cyclists per day"
DaysFacet <- ggplot()+
ggtitle("Selected month (weekends in grey)")+
geom_tallrect(aes(
xmin=day.of.the.month-0.5, xmax=day.of.the.month+0.5,
key=paste(day.POSIXct)),
showSelected="month",
fill="grey",
color="white",
data=weekend.dt)+
guides(color="none")+
theme_bw()+
facet_grid(facet ~ ., scales="free")+
geom_line(aes(
day.of.the.month, count, group=location,
key=location,
color=location),
showSelected=c("location", "month"),
clickSelects="location",
chunk_vars=c("month"),
data=data.table(counts.dt, facet=counter.title))+
scale_color_manual(values=location.colors)+
ylab("")+
geom_point(aes(
day.of.the.month, count, color=location,
key=paste(day.of.the.month, location),
tooltip=paste(
count, "cyclists counted at",
location, "on",
date)),
showSelected=c("location", "month"),
clickSelects="location",
size=5,
chunk_vars=c("month"),
fill="white",
data=data.table(counts.dt, facet=counter.title))+
scale_fill_manual(values=severity.colors, breaks=names(severity.colors))+
geom_text(aes(
15, 23, label=month, key=1),
showSelected="month",
data=data.table(months, facet=accidents.title))+
scale_x_continuous("day of the month", breaks=c(1, 10, 20, 30))+
geom_text(aes(
day.of.the.month, count+500, color=location, label=location,
key=location),
showSelected=c("location", "month"),
clickSelects="location",
data=data.table(day.labels, facet=counter.title))+
geom_point(aes(
day.of.the.month, accident.i,
key=paste(date.str, accident.i),
tooltip=paste(
ifelse(deaths==0, "",
ifelse(deaths==1,
"1 death,",
paste(deaths, "deaths,"))),
ifelse(people.severely.injured==0, "",
ifelse(people.severely.injured==1,
"1 person severely injured,",
paste(people.severely.injured,
"people severely injured,"))),
people.slightly.injured,
"people slightly injured at",
ifelse(is.na(street.number), "", street.number),
street, "/", cross.street,
date.str, time.str),
fill=severity),
showSelected="month",
size=4,
chunk_vars=c("month"),
data=data.table(accidents.cumsum, facet=accidents.title))
DaysFacetWarning: Removed 407 rows containing missing values (geom_point).

9.4 Chapter summary and exercises
Exercises:
- Change location to a multiple selection variable.
- Add a plot for the map to the data viz.
- On the map, draw a circle for each location, with size that changes based on the
countof the accidents in the currently selectedmonth. - On the
MonthSummaryplot, add a background rectangle that can be used to select themonth. - Remove the
MonthSummaryplot and add a similar visualization as a third panel in theMonthFacetplot.
Next, Chapter 10 explains how to visualize the K-Nearest-Neighbors machine learning model.