library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite
Statistical Programming in R
library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite
New functions:
hist()
: histogramplot()
: R’s plotting devicebarplot()
: bar plot functionboxplot()
: box plot functiondensity()
: function that calculates the densityggplot()
: ggplot’s plotting deviceSource: Anscombe, F. J. (1973). “Graphs in Statistical Analysis”. American Statistician. 27 (1): 17–21.
Source: https://www.autodeskresearch.com/publications/samestats
base
graphics in R
ggplot2
graphicshist(boys$hgt, main = "Histogram", xlab = "Height")
dens <- density(boys$hgt, na.rm = TRUE) plot(dens, main = "Density plot", xlab = "Height", bty = "L")
plot(x = boys$hgt, y = boys$wgt, main = "Scatter plot", xlab = "Height", ylab = "Weight", bty = "L")
boxplot(boys$hgt ~ boys$reg, main = "Boxplot", xlab = "Region", ylab = "Height")
boxplot(hgt ~ reg, boys, main = "Boxplot", xlab = "Region", ylab = "Height", lwd = 2, notch = TRUE, col = rainbow(5))
boys %>% md.pattern() # from mice
## age reg wgt hgt bmi hc gen phb tv ## 223 1 1 1 1 1 1 1 1 1 0 ## 19 1 1 1 1 1 1 1 1 0 1 ## 1 1 1 1 1 1 1 1 0 1 1 ## 1 1 1 1 1 1 1 0 1 0 2 ## 437 1 1 1 1 1 1 0 0 0 3 ## 43 1 1 1 1 1 0 0 0 0 4 ## 16 1 1 1 0 0 1 0 0 0 5 ## 1 1 1 1 0 0 0 0 0 0 6 ## 1 1 1 0 1 0 1 0 0 0 5 ## 1 1 1 0 0 0 1 1 1 1 3 ## 1 1 1 0 0 0 0 1 1 1 4 ## 1 1 1 0 0 0 0 0 0 0 7 ## 3 1 0 1 1 1 1 0 0 0 4 ## 0 3 4 20 21 46 503 503 522 1622
plot()
methodresult <- lm(age~wgt, boys) plot(result, which = 1)
ggplot2
?Layered plotting based on the book The Grammer of Graphics by Leland Wilkinsons.
With ggplot2
you
ggplot2
then takes care of the details
1: Provide the data
boys %>% ggplot()
2: map variable to aesthetics
boys %>% ggplot(aes(x = age, y = bmi))
3: state which geometric object to display
boys %>% ggplot(aes(x = age, y = bmi)) + geom_point()
Create the plot
gg <- boys %>% ggplot(aes(x = age, y = bmi)) + geom_point(col = "dark green")
Add another layer (smooth fit line)
gg <- gg + geom_smooth(col = "dark blue")
Give it some labels and a nice look
gg <- gg + labs(x = "Age", y = "BMI", title = "BMI trend for boys") + theme_minimal()
plot(gg)
gg <- boys %>% filter(!is.na(reg)) %>% ggplot(aes(x = age, y = bmi, size = hc, colour = reg)) + geom_point(alpha = 0.5) + labs(title = "BMI trend for boys", x = "Age", y = "BMI", size = "Head circumference", colour = "Region") + theme_minimal()
plot(gg)
geom_point
geom_bar
geom_line
geom_smooth
geom_histogram
geom_boxplot
geom_density
theme_minimal()
, theme_classic()
, theme_bw()
, …ggthemes
theme()