Skip to content

Commit

Permalink
Merge pull request #284 from Eddy-JV/Transport_input_data
Browse files Browse the repository at this point in the history
Add Rule prepare_transport_data_input
  • Loading branch information
hazemakhalek authored Mar 6, 2024
2 parents 871f79d + 973aad8 commit 552dcd4
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 42 deletions.
9 changes: 7 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ rule prepare_urban_percent:
"scripts/prepare_urban_percent.py"


rule prepare_transport_data_input:
output:
transport_data_input="resources/transport_data.csv",
script:
"scripts/prepare_transport_data_input.py"


if not config["custom_data"]["gas_network"]:

rule prepare_gas_network:
Expand Down Expand Up @@ -353,10 +360,8 @@ rule build_population_layouts:
rule move_hardcoded_files_temp:
input:
"data/temp_hard_coded/energy_totals.csv",
"data/temp_hard_coded/transport_data.csv",
output:
"resources/energy_totals.csv",
"resources/transport_data.csv",
shell:
"cp -a data/temp_hard_coded/. resources"

Expand Down
40 changes: 0 additions & 40 deletions data/temp_hard_coded/transport_data.csv

This file was deleted.

136 changes: 136 additions & 0 deletions scripts/prepare_transport_data_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
import logging
import os
from pathlib import Path

import country_converter as coco
import helpers
import numpy as np
import pandas as pd

# from helpers import configure_logging


# logger = logging.getLogger(__name__)


def download_number_of_vehicles():
"""
Downloads the Number of registered vehicles as .csv File.
The following csv file was downloaded from the webpage https://apps.who.int/gho/data/node.main.A995 as a .csv file.
"""
fn = "https://apps.who.int/gho/athena/data/GHO/RS_194?filter=COUNTRY:*&ead=&x-sideaxis=COUNTRY;YEAR;DATASOURCE&x-topaxis=GHO&profile=crosstable&format=csv"
storage_options = {"User-Agent": "Mozilla/5.0"}
Nbr_vehicles_csv = pd.read_csv(fn, storage_options=storage_options, encoding="utf8")

Nbr_vehicles_csv = Nbr_vehicles_csv.rename(
columns={
"Countries, territories and areas": "Country",
"Number of registered vehicles": "number cars",
}
)

# Add ISO2 country code for each country
cc = coco.CountryConverter()
Country = pd.Series(Nbr_vehicles_csv["Country"])
Nbr_vehicles_csv["country"] = cc.pandas_convert(
series=Country, to="ISO2", not_found="not found"
)

# # Remove spaces, Replace empty values with NaN
Nbr_vehicles_csv["number cars"] = (
Nbr_vehicles_csv["number cars"].str.replace(" ", "").replace("", np.nan)
)

# Drop rows with NaN values in 'number cars'
Nbr_vehicles_csv = Nbr_vehicles_csv.dropna(subset=["number cars"])

# convert the 'number cars' to integer
Nbr_vehicles_csv["number cars"] = Nbr_vehicles_csv["number cars"].astype(int)

return Nbr_vehicles_csv


def download_CO2_emissions():
"""
Downloads the CO2_emissions from vehicles as .csv File.
The dataset is downloaded from the following link: https://data.worldbank.org/indicator/EN.CO2.TRAN.ZS?view=map
It is until the year 2014. # TODO: Maybe search for more recent years.
"""
url = (
"https://api.worldbank.org/v2/en/indicator/EN.CO2.TRAN.ZS?downloadformat=excel"
)

# Read the 'Data' sheet directly from the Excel file at the provided URL
try:
CO2_emissions = pd.read_excel(url, sheet_name="Data", skiprows=[0, 1, 2])
print("File read successfully.")
except Exception as e:
print("Failed to read the file:", e)

CO2_emissions = CO2_emissions[
["Country Name", "Country Code", "Indicator Name", "2014"]
]

# Calculate efficiency based on CO2 emissions from transport (% of total fuel combustion)
CO2_emissions["average fuel efficiency"] = (100 - CO2_emissions["2014"]) / 100

# Add ISO2 country code for each country
CO2_emissions = CO2_emissions.rename(columns={"Country Name": "Country"})
cc = coco.CountryConverter()
Country = pd.Series(CO2_emissions["Country"])
CO2_emissions["country"] = cc.pandas_convert(
series=Country, to="ISO2", not_found="not found"
)

# Drop region names that have no ISO2:
CO2_emissions = CO2_emissions[CO2_emissions.country != "not found"]

return CO2_emissions


if __name__ == "__main__":
if "snakemake" not in globals():
from helpers import mock_snakemake, sets_path_to_root

os.chdir(os.path.dirname(os.path.abspath(__file__)))
snakemake = mock_snakemake("prepare_transport_data_input")
sets_path_to_root("pypsa-earth-sec")
# configure_logging(snakemake)

# run = snakemake.config.get("run", {})
# RDIR = run["name"] + "/" if run.get("name") else ""
# store_path_data = Path.joinpath(Path().cwd(), "data")
# country_list = country_list_to_geofk(snakemake.config["countries"])'

# Downloaded and prepare vehicles_csv:
vehicles_csv = download_number_of_vehicles().copy()

# Downloaded and prepare CO2_emissions_csv:
CO2_emissions_csv = download_CO2_emissions().copy()

# Join the DataFrames by the 'country' column
merged_df = pd.merge(vehicles_csv, CO2_emissions_csv, on="country")
merged_df = merged_df[["country", "number cars", "average fuel efficiency"]]

# drop rows with NaN values in 'average fuel efficiency'
merged_df = merged_df.dropna(subset=["average fuel efficiency"])

# Convert the 'average fuel efficiency' to float
merged_df["average fuel efficiency"] = merged_df["average fuel efficiency"].astype(
float
)

# Round the 'average fuel efficiency' to three decimal places
merged_df.loc[:, "average fuel efficiency"] = merged_df[
"average fuel efficiency"
].round(3)

# Save
merged_df.to_csv(
snakemake.output.transport_data_input,
sep=",",
encoding="utf-8",
header="true",
index=False,
)

0 comments on commit 552dcd4

Please sign in to comment.