knitr::opts_chunk$set(echo = TRUE)
coalConsumptionData <- read.csv("C:/Users/sudhi/Downloads/BIS_581/CoalConsumption_R_Project/annual-coal-consumption-by-country-1980-2009-2.csv")
head(coalConsumptionData, n=8)
colnames(coalConsumptionData)[1] <- "Region"
head(coalConsumptionData, n=8)
summary(coalConsumptionData)
library(tidyverse)
library(dplyr)
coalLongData <- pivot_longer(coalConsumptionData, !Region, names_to = "Year", values_to = "Consumption")
coalLongData
coalLongData <- transform(coalLongData, Year = gsub("X", "", Year))
head(coalLongData, n=8)
coalLongData$Year <- as.numeric(as.character(coalLongData$Year))
is.numeric(coalLongData$Year)
is.numeric(coalLongData$Consumption)
is.character(coalLongData$Consumption)
coalLongData$Consumption <- as.numeric(coalLongData$Consumption)
is.numeric(coalLongData$Consumption)
summary(coalLongData)
#View(coalLongData)
sapply(coalLongData, class)
summary(coalLongData)
#install.packages("janitor")
#library(janitor)
head(coalLongData, n=10)
#library(tidyr)
#head(coalLongData,n=10)
# drop_na()
# head(coalLongData, n=10)
sum(is.na(coalLongData))
summary(coalLongData)
processedCoalData <- coalLongData
continentLabels <- c("Africa", "Asia", "Europe", "North America", "Central & South America", "Former U.S.S.R.",
"Middle East", "Central African Republic", "Asia & Oceania", "Antarctica")
continentData <- filter(processedCoalData, Region %in% continentLabels)
head(continentData, n=10)
View(continentData)
otherRegionData <- processedCoalData %>%
filter(!(Region %in% continentLabels))
head(otherRegionData, n=10)
countrySpecificData <- otherRegionData[!(otherRegionData$Region %in% "World"),]
head(countrySpecificData, n=10)
library(ggplot2)
ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, color = Region)) +
geom_line(size = 1.2, alpha = 0.8, linetype = "solid") +
labs(
title = "Coal Consumption Over Years by Region",
x = "Year",
y = "Consumption"
) +
theme_minimal()
ggplot(continentData, aes(x = Consumption)) +
geom_histogram(bins = 10) +
facet_wrap(~Region) +
labs(
title = "Distribution of Coal Consumption by Region",
x = "Consumption",
y = "Frequency"
) +
theme_minimal()
totalConsumptionByYear <- aggregate(Consumption ~ Year + Region, data = continentData, FUN = sum)
ggplot(totalConsumptionByYear, aes(x = as.factor(Year), y = Consumption, fill = Region)) +
geom_bar(stat = "identity") +
labs(
title = "Total Coal Consumption by Year",
x = "Year",
y = "Total Consumption"
) +
theme_minimal()
ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, fill = as.factor(Year))) +
geom_violin(trim = FALSE) +
labs(
title = "Violin Plot of Coal Consumption by Year",
x = "Year",
y = "Consumption"
) +
theme_minimal()
ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, color = Region)) +
geom_point() +
labs(
title = "Scatter Plot of Coal Consumption Over Years",
x = "Year",
y = "Consumption"
) +
theme_minimal()
library(ggplot2)
library(dplyr)
totalConsumptionByRegion <- continentData %>%
group_by(Region) %>%
summarise(TotalConsumption = sum(Consumption)) %>%
arrange(desc(TotalConsumption))
### Create bar chart
ggplot(totalConsumptionByRegion, aes(x = reorder(Region, -TotalConsumption), y = TotalConsumption)) +
geom_bar(stat = "identity", fill = "violet") +
labs(
title = "Total Consumption by Region",
x = "Region",
y = "Total Consumption"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))