ggplot2 graphics companion
Last updated: 09 July 2020
The Graphics Companion provides the R code for different data visualisations that can be created using the ggplot2 package.
The Companion adopts the structure of the Financial Times’ Visual Vocabulary by categorising different chart types by the data relationships that they best illustrate.
The data used throughout the Companion derive from a subset of Hans Rosling’s Gapminder World which are available in the gapminder R package. Data on life expectancy at birth, GDP per capita and total population are provided for 142 countries between 1952 and 2007.
Setup
You need to install - but only once - the tidyverse package and load it into your R session. ggplot2 is part of the tidyverse suite of R tools for data science.
# install.packages('tidyverse')
library(tidyverse)
All of the example plots below use data contained in the gapminder R package which also needs to be installed / loaded:
# install.packages('gapminder')
library(gapminder)
Lastly, we need to load the Trafford Data Lab’s ggplot2 theme.
source("https://raw.githubusercontent.com/traffordDataLab/assets/601e80334e0d78dfe913685561196b8b6fc278a7/theme/ggplot2/theme_lab.R")
If you wish to use an alternative theme simply swap out the theme_lab()
function with a different ggplot2 theme or use one from the ggthemes package.
Change over time
Single line chart
df <- filter(gapminder, country == "Argentina") %>%
mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))
ggplot(df, aes(x = year, y = lifeExp)) +
geom_line(colour = "#fc6721", size = 1) +
geom_point(colour = "#fc6721", size = 2) +
scale_x_date(breaks = df$year, date_labels = "%Y") +
scale_y_continuous(limits = c(0, max(df$lifeExp)), labels = scales::comma) +
labs(title = "",
subtitle = "Life expectancy in Argentina, 1952-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "",
y = "Age (years)") +
theme_lab() +
theme(panel.grid.major.x = element_blank())
Life expectancy in Argentina, 1952-2007 single line chart.
Multiple line chart
df <- filter(gapminder, country %in% c("Argentina", "Italy")) %>%
mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))
ggplot(df, aes(x = year, y = lifeExp, colour = country)) +
geom_line(size = 1) +
geom_point(size = 2) +
scale_colour_manual(values = c("Argentina" = "#fc6721", "Italy" = "#E7B800")) +
scale_x_date(breaks = df$year, date_labels = "%Y") +
scale_y_continuous(limits = c(0, NA), labels = scales::comma) +
labs(title = "",
subtitle = "Life expectancy in Argentina and Italy, 1952-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "",
y = "Age (years)",
colour = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "bottom")
Life expectancy in Argentina and Italy, 1952-2007 multple line chart.
Bar chart (vertical)
df <- filter(gapminder, country == "Egypt") %>%
mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))
ggplot(df, aes(x = year, y = gdpPercap)) +
geom_col(fill = "#fc6721", alpha = 0.8) +
scale_x_date(breaks = df$year, date_labels = "%Y") +
scale_y_continuous(expand = c(0, 0), labels = scales::dollar) +
labs(title = "",
subtitle = "GDP per capita in Egypt, 1952-2007", caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = "GDP per capita ($)",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank())
GDP per capita in Egypt, 1952-2007 vertical bar chart.
Slope chart
df <- filter(gapminder, country %in% c("Iceland", "Norway") & year %in% c(1952, 2007))
ggplot(df) +
geom_line(aes(x = as.factor(year), y = gdpPercap, group = country, colour = country), size = 2, alpha = 0.8) +
geom_point(aes(x = as.factor(year), y = gdpPercap, group = country, colour = country), size = 5, alpha = 0.8) +
geom_text(data = subset(df, year == 1952),
aes(x = as.factor(year), y = gdpPercap, colour = country,
label = paste(country, scales::dollar(round(gdpPercap, 0)), sep = ", "),
size = 4, hjust = 1.2)) +
geom_text(data = subset(df, year == 2007),
aes(x = as.factor(year), y = gdpPercap, colour = country, label = scales::dollar(round(gdpPercap, 0))),
size = 4, hjust = -0.3) +
scale_colour_brewer(palette = "Set2") +
labs(title = "",
subtitle = "GDP per capita change, 1952 - 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
colour = NULL) +
theme_lab() +
theme(panel.grid.major = element_blank(),
axis.text.y = element_blank(),
legend.position = "none")
GDP per capita change, 1952-2007 slope chart.
Stacked area chart
df <- gapminder %>%
filter(country %in% c("France", "Germany", "Ireland", "Italy")) %>%
mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))
ggplot(df, aes(x = year, y = gdpPercap, fill = country)) +
geom_area(color = "white", alpha = 0.4) +
scale_fill_brewer(palette = "Set2") +
scale_x_date(breaks = df$year, date_labels = "%Y") +
scale_y_continuous(expand = c(0, 0), labels = scales::dollar) +
labs(title = "",
subtitle = "GDP per capita by country, 1952-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = "GDP per capita ($)",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "bottom")
GDP per capita by country, 1952-2007 stacked area chart.
Correlation
Scatterplot
ggplot(filter(gapminder, year == 2007), aes(x = gdpPercap, y = lifeExp)) +
geom_point(colour = "#fc6721") +
scale_x_log10(labels = scales::dollar) +
labs(title = "",
subtitle = "Relationship between life expectancy and income, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "GDP per capita ($)",
y = "Age (years)") +
theme_lab() +
theme(panel.grid.major.x = element_blank())
Relationship between life expectancy and income, 2007 scatterplot.
Bubble chart
ggplot(filter(gapminder, year == 2007), aes(x = gdpPercap, y = lifeExp)) +
scale_x_log10(labels = scales::dollar) +
geom_point(aes(size = pop, fill = continent), shape = 21, colour = "white", alpha = 0.6) +
scale_fill_brewer(palette = "Set2") +
scale_size_continuous(range = c(1, 20)) +
labs(title = "",
subtitle = "Relationship between life expectancy and income, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "GDP per capita ($)",
y = "Age (years)") +
guides(size = FALSE) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "right",
legend.title = element_blank())
Relationship between life expectancy and income, 2007 bubble chart.
Deviation
Diverging bar chart
df <- gapminder %>%
filter(year == 2007 & continent == "Europe") %>%
mutate(median = median(gdpPercap),
diff = gdpPercap - median,
type = ifelse(gdpPercap < median, "Below", "Above")) %>%
arrange(diff) %>%
mutate(country = factor(country, levels = country))
ggplot(df, aes(x = country, y = diff, label = country)) +
geom_col(aes(fill = type), width = 0.5, alpha = 0.8) +
scale_y_continuous(expand = c(0, 0), labels = scales::dollar) +
scale_fill_manual(labels = c("Above median", "Below median"),
values = c("Above" = "#31a354", "Below" = "#de2d26")) +
labs(title = "",
subtitle = "GDP per capita, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
coord_flip() +
theme_lab() +
theme(panel.grid.major.y = element_blank())
GDP per capita, 2007 diverging bar chart.
Distribution
Histogram
ggplot(filter(gapminder, year == 2007), aes(x = lifeExp)) +
geom_histogram(binwidth = 1, fill = "#fc6721", colour = "white", alpha = 0.8) +
scale_y_continuous(breaks = scales::pretty_breaks()) +
labs(title = "",
subtitle = "Life expectancy distribution, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "Age (years)",
y = "") +
theme_lab() +
theme(panel.grid.major.x = element_blank())
Life expectancy distribution, 2007 histogram.
Density plot
ggplot(filter(gapminder, year == 2007), aes(x = lifeExp)) +
geom_density(aes(fill = continent), size = 0.1, alpha = 0.5) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Life expectancy distribution, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "Age (years)",
y = "",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank())
Life expectancy distribution, 2007 density plot.
Boxplot
ggplot(filter(gapminder, year == 2007), aes(x = continent, y = lifeExp, fill = continent)) +
geom_boxplot(colour = "#757575", alpha = 0.8) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Life expectancy distributions, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "",
y = "Age (years)") +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "none")
Life expectancy distribution, 2007 boxplot.
Violin plot
ggplot(filter(gapminder, year == 2007), aes(x = continent, y = lifeExp, fill = continent)) +
geom_violin(colour = "#757575", alpha = 0.8) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Life expectancy distribution, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "",
y = "Age (years)") +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "none")
Life expectancy distribution, 2007 violin plot.
Ridgeline plot
library(ggridges)
df <- gapminder %>% filter(year == 2007 & continent != "Oceania")
ggplot(df, aes(x = lifeExp, y = fct_rev(continent), fill = continent)) +
geom_density_ridges(colour = "#bdbdbd", size = 0.5, alpha = 0.5) +
scale_x_continuous(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Life expectancy distribution, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = "Age (years)",
y = "") +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "none")
Life expectancy distribution, 2007 ridgeline plot.
Magnitude
Bar chart (vertical)
df <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(median = median(gdpPercap))
ggplot(df, aes(x = continent, y = median, fill = continent)) +
geom_col(alpha = 0.8) +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(labels = scales::dollar, expand = c(0, 0)) +
labs(title = "",
subtitle = "Median GDP per capita by continent, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = "GDP per capita",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "none")
Median GDP per capita by continent, 2007 vertical bar chart.
Grouped bar chart
df <- gapminder %>%
filter(year > 1990) %>%
group_by(year, continent) %>%
summarise(totalpop = sum(as.double(pop)))
ggplot(df, aes(x = year, y = totalpop, group = continent, fill = continent)) +
geom_col(position = "dodge", colour = "#757575", size = 0.2, alpha = 0.8) +
scale_x_continuous(breaks = seq(1992, 2007, 5), expand = c(0, 0)) +
scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Total population by continent, 1990-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "bottom")
Total population by continent, 1990-2007 grouped bar chart.
Stacked bar chart
df <- gapminder %>%
filter(year > 1990) %>%
group_by(year, continent) %>%
summarise(totalpop = sum(as.double(pop)))
ggplot(df, aes(x = year, y = totalpop, fill = continent)) +
geom_col(colour = "white", size = 0.2, alpha = 0.8) +
scale_x_continuous(breaks = seq(1992, 2007, 5), expand = c(0, 0)) +
scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
scale_fill_brewer(palette = "Set2") +
guides(fill = guide_legend(reverse = T)) +
labs(title = "",
subtitle = "Total population by continent, 1990-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = "Population",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "right")
Total population by continent, 1990-2007 stacked bar chart.
Part-to-whole
100% stacked bar chart
df <- gapminder %>%
filter(year > 1990) %>%
group_by(year, continent) %>%
summarise(totalpop = sum(as.double(pop)))
ggplot(df, aes(x = year, y = totalpop, fill = continent)) +
geom_col(position = "fill", colour = "#757575", size = 0.2, alpha = 0.8) +
scale_x_continuous(breaks = seq(1992, 2007, 5), expand = c(0, 0)) +
scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
scale_fill_brewer(palette = "Set2") +
guides(fill = guide_legend(reverse = T)) +
labs(title = "",
subtitle = "Proportion of total population by continent, 1990-2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
theme_lab() +
theme(panel.grid.major.x = element_blank(),
legend.position = "right")
Proportion of total population by continent, 1990-2007 100% stacked bar chart.
Treemap
library(treemapify)
df <- gapminder %>%
filter(year == 2007) %>%
mutate(gdp = pop * gdpPercap)
ggplot(df, aes(area = gdp, fill = continent, subgroup = continent, label = country)) +
geom_treemap() +
geom_treemap_subgroup_border(colour = "black") +
geom_treemap_subgroup_text(fontface = "bold", colour = "#f0f0f0", alpha = 0.7, place = "bottomleft") +
geom_treemap_text(colour = "white", place = "centre", reflow = TRUE) +
scale_fill_brewer(palette = "Set2") +
labs(title = "",
subtitle = "Country GDP by continent, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
theme_lab() +
theme(legend.position = "none")
Country GDP by continent, 2007 treemap.
Waffle chart
library(waffle) ; library(RColorBrewer)
g7 <- c("Canada", "France", "Germany", "Italy", "Japan", "United Kingdom", "United States")
df <- filter(gapminder, year == 2007 & country %in% g7) %>%
mutate(gdp = pop * gdpPercap) %>%
select(country, gdp)
vec <- magrittr::extract2(df, 'gdp') %>% set_names(df$country)
waffle(round((vec/sum(df$gdp)) * 100, 0), rows = 5, size = 1,
colors = (brewer.pal(length(vec), "Set2"))) +
labs(title = "GDP in G7 countries, 2007",
subtitle = "1 square = 1% of total GDP",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
theme_lab() +
theme(axis.text = element_blank(),
legend.position = "bottom")
GDP in G7 countries, 2007 waffle chart.
Ranking
Ordered bar chart (horizontal)
df <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(median = median(gdpPercap))
ggplot(df, aes(reorder(continent, -median, sum), median)) +
geom_col(fill = "#fc6721", alpha = 0.8) +
scale_y_continuous(expand = c(0, 0), labels = scales::dollar) +
coord_flip() +
labs(title = "",
subtitle = "Median GDP per capita by continent, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = "GDP per capita",
fill = NULL) +
theme_lab() +
theme(panel.grid.major.y = element_blank())
Median GDP per capita by continent, 2007 order bar chart.
Lollipop chart
df <- gapminder %>%
filter(year == 2007 & continent == "Europe") %>%
arrange(gdpPercap) %>%
mutate(country = factor(country, levels = country))
ggplot(df, aes(x = gdpPercap, y = country)) +
geom_segment(aes(x = 0, xend = gdpPercap, y = country, yend = country), colour = "#f0f0f0") +
geom_point(colour = "#fc6721", size = 3, alpha = 0.8) +
scale_x_continuous(expand = c(0, 0), limits = c(0, max(df$gdpPercap) * 1.1),
labels = scales::dollar) +
labs(title = "",
subtitle = "GDP per capita in European countries, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL,
fill = NULL) +
theme_lab() +
theme(panel.grid.major = element_blank(),
axis.text.y = element_text(hjust = 0))
GDP per capita in European countries, 2007 lollipop chart.
Spatial
Choropleth map
df <- gapminder %>%
filter(year == 2007) %>%
left_join(country_codes) %>%
rename("iso_a3" = "iso_alpha")
library(rnaturalearth)
world <- ne_countries(type = "countries", returnclass = 'sf')
sf <- ne_countries(type = "countries", returnclass = 'sf') %>%
left_join(., df, by = "iso_a3", sort = FALSE) %>%
filter(!is.na(country)) %>%
select("country", "continent" = "continent.y", "year", "lifeExp", "pop", "gdpPercap", "geometry")
library(sf) ; library(RColorBrewer)
ggplot(sf, aes(fill = lifeExp)) +
geom_sf(data = world, fill = "#f0f0f0", colour = "white") +
geom_sf(alpha = 0.8, colour = "white", size = 0.1) +
scale_fill_gradientn(colours = brewer.pal(5, "Oranges"),
name = "Age (Years)",
guide = guide_colourbar(
direction = "horizontal",
barheight = unit(2, units = "mm"),
barwidth = unit(50, units = "mm"),
title.position = 'top',
title.hjust = 0.5,
label.hjust = 0.5)) +
labs(title = "",
subtitle = "Life expectancy, 2007",
caption = "Source: Gapminder.org | @traffordDataLab",
x = NULL,
y = NULL) +
theme_lab() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom") +
coord_sf(crs = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000
+datum=WGS84 +units=m +no_defs",
datum = NA)
Life expectancy, 2007 choropleth map.
Useful resources
Chang, W. (2012). R Graphics Cookbook: Practical Recipes for Visualizing Data. O’Reilly Media, Inc.
Wickham, H., & Grolemund, G. (2016). R for data science: import, tidy, transform, visualize, and model data. O’Reilly Media, Inc. Available online via http://r4ds.had.co.nz/
Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer.