layout | title | subtitle |
---|---|---|
slides |
R for reproducible scientific analysis |
Why Use R? |
- Powerful statistical analysis
- and powerful visualisation
- integrated elegantly
- Get to know R and RStudio
- Analyze a meaningful data set
- Extract insights and deliver them visually
- Leave ready to learn more R independently
gapminder <- read.csv(
"data/gapminder-FiveYearData.csv",
header=TRUE,
sep=',')
head(gapminder, 1) # Show me the first row
country year pop continent lifeExp gdpPercap 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
ggplot(
data=gapminder,
aes(x=lifeExp, y=gdpPercap)
) + geom_point()
ggplot(
data=gapminder,
aes(x=year, y=lifeExp, by=country, colour=continent)
) + geom_line()
+ geom_point()
library(dplyr)
cors <- gapminder %>%
group_by(year) %>%
summarise(
gdpPercap.lifeExp = cor(gdpPercap, lifeExp),
gdpPercap.pop = cor(gdpPercap, pop),
pop.lifeExp = cor(pop, lifeExp))
head(cors, 1)
Source: local data frame [1 x 4]
year gdpPercap.lifeExp gdpPercap.pop pop.lifeExp
1 1952 0.2780236 -0.02526041 -0.002724782
library(tidyr)
Error in library(tidyr): there is no package called 'tidyr'
tidy.cors <- cors %>% gather(
variables, correlation,
gdpPercap.lifeExp, gdpPercap.pop,
pop.lifeExp)
Error in function_list[[k]](value): could not find function "gather"
head(tidy.cors, 1)
Source: local data frame [1 x 3]
year variables correlation
1 1952 gdpPercap.lifeExp 0.2780236