ggplot2
R
basegraphs
grid
und lattice
ggplot2
+
operatordata.frame
ggplot()
functiongeom
: points, lines, histograminstall.packages("ggplot2")
use the build in dataset failthful
?faithful
head(faithful)
eruptions waiting
1 3.600 79
2 1.800 54
3 3.333 74
4 2.283 62
5 4.533 85
6 2.883 55
any variables that are part of the source dataframe have to be provided inside the aes()
function
library(ggplot2)
ggplot(faithful, aes(x = waiting, y = eruptions))
additional layers have to be “added” with the +
operator
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point()
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point() +
geom_smooth(method='lm')
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point(col ="steelblue", size = 0.9) +
geom_smooth(method = 'lm', color = "black")
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point(col ="steelblue", size = 0.9) +
geom_smooth(method = 'lm', color = "black") +
labs(title = "Old Faithful Geyser Data",
subtitle = "Waiting time between eruptions and the duration of the eruption",
x = "waiting time in mins", y = "eruption time in mins"
)
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point(col ="steelblue", size = 0.9) +
geom_smooth(method = 'lm', color = "black") +
labs(title = "Old Faithful Geyser Data",
subtitle = "Waiting time between eruptions and the duration of the eruption",
x = "waiting time in mins", y = "eruption time in mins"
) +
theme_minimal()
m <- lm(eruptions ~ waiting, data = faithful)
m <- summary(m)
m
Call:
lm(formula = eruptions ~ waiting, data = faithful)
Residuals:
Min 1Q Median 3Q Max
-1.29917 -0.37689 0.03508 0.34909 1.19329
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.874016 0.160143 -11.70 <2e-16 ***
waiting 0.075628 0.002219 34.09 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4965 on 270 degrees of freedom
Multiple R-squared: 0.8115, Adjusted R-squared: 0.8108
F-statistic: 1162 on 1 and 270 DF, p-value: < 2.2e-16
rSquared <- round(m$r.squared, 2)
ggplot(faithful, aes(x = waiting, y = eruptions)) +
geom_point(col ="steelblue", size = 0.9) +
geom_smooth(method = 'lm', color = "black") +
labs(title = "Old Faithful Geyser Data",
subtitle = "Waiting time between eruptions and the duration of the eruption",
x = "waiting time in mins", y = "eruption time in mins"
) +
geom_label(x = 90, y = 1.5, size = 4,
label = paste("Bestimmtheitsmaß:", rSquared))
titanic <- read.csv("./www/titanic.csv", stringsAsFactors = FALSE)
soccer <- read.csv2("./www/football.csv", stringsAsFactors = FALSE)
ggplot(titanic, aes(x = age)) +
geom_histogram(binwidth = 5, na.rm = TRUE) +
facet_grid(~ sex)
ggplot(titanic, aes(x = pclass, y = age, fill = sex)) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge")
Warning: Removed 263 rows containing non-finite values (stat_summary).
titanic$pclass <- as.character(titanic$pclass)
ggplot(titanic, aes(y = age, x = pclass)) +
geom_boxplot()
Warning: Removed 263 rows containing non-finite values (stat_boxplot).
ggplot(titanic, aes(y = sex, x = pclass)) +
geom_count()
library(lubridate)
soccer$date <- as_date(soccer$date)
soccer$year <- year(soccer$date)
ggplot(soccer, aes(x = year)) +
geom_line(stat = "count")