Merge pull request #284 from Eddy-JV/Transport_input_data

Add Rule prepare_transport_data_input
pypsa-meets-earth · Mar 6, 2024 · 552dcd4 · 552dcd4
2 parents 871f79d + 973aad8
commit 552dcd4
Show file tree

Hide file tree

Showing 3 changed files with 143 additions and 42 deletions.
diff --git a/Snakefile b/Snakefile
@@ -120,6 +120,13 @@ rule prepare_urban_percent:
         "scripts/prepare_urban_percent.py"
 
 
+rule prepare_transport_data_input:
+    output:
+        transport_data_input="resources/transport_data.csv",
+    script:
+        "scripts/prepare_transport_data_input.py"
+
+
 if not config["custom_data"]["gas_network"]:
 
     rule prepare_gas_network:
@@ -353,10 +360,8 @@ rule build_population_layouts:
 rule move_hardcoded_files_temp:
     input:
         "data/temp_hard_coded/energy_totals.csv",
-        "data/temp_hard_coded/transport_data.csv",
     output:
         "resources/energy_totals.csv",
-        "resources/transport_data.csv",
     shell:
         "cp -a data/temp_hard_coded/. resources"
 

diff --git a/data/temp_hard_coded/transport_data.csv b/data/temp_hard_coded/transport_data.csv
diff --git a/scripts/prepare_transport_data_input.py b/scripts/prepare_transport_data_input.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+import logging
+import os
+from pathlib import Path
+
+import country_converter as coco
+import helpers
+import numpy as np
+import pandas as pd
+
+# from helpers import configure_logging
+
+
+# logger = logging.getLogger(__name__)
+
+
+def download_number_of_vehicles():
+    """
+    Downloads the Number of registered vehicles as .csv File.
+    The following csv file was downloaded from the webpage https://apps.who.int/gho/data/node.main.A995 as a .csv file.
+    """
+    fn = "https://apps.who.int/gho/athena/data/GHO/RS_194?filter=COUNTRY:*&ead=&x-sideaxis=COUNTRY;YEAR;DATASOURCE&x-topaxis=GHO&profile=crosstable&format=csv"
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+    Nbr_vehicles_csv = pd.read_csv(fn, storage_options=storage_options, encoding="utf8")
+
+    Nbr_vehicles_csv = Nbr_vehicles_csv.rename(
+        columns={
+            "Countries, territories and areas": "Country",
+            "Number of registered vehicles": "number cars",
+        }
+    )
+
+    # Add ISO2 country code for each country
+    cc = coco.CountryConverter()
+    Country = pd.Series(Nbr_vehicles_csv["Country"])
+    Nbr_vehicles_csv["country"] = cc.pandas_convert(
+        series=Country, to="ISO2", not_found="not found"
+    )
+
+    # # Remove spaces, Replace empty values with NaN
+    Nbr_vehicles_csv["number cars"] = (
+        Nbr_vehicles_csv["number cars"].str.replace(" ", "").replace("", np.nan)
+    )
+
+    # Drop rows with NaN values in 'number cars'
+    Nbr_vehicles_csv = Nbr_vehicles_csv.dropna(subset=["number cars"])
+
+    # convert the 'number cars' to integer
+    Nbr_vehicles_csv["number cars"] = Nbr_vehicles_csv["number cars"].astype(int)
+
+    return Nbr_vehicles_csv
+
+
+def download_CO2_emissions():
+    """
+    Downloads the CO2_emissions from vehicles as .csv File.
+    The dataset is downloaded from the following link: https://data.worldbank.org/indicator/EN.CO2.TRAN.ZS?view=map
+    It is until the year 2014. # TODO: Maybe search for more recent years.
+    """
+    url = (
+        "https://api.worldbank.org/v2/en/indicator/EN.CO2.TRAN.ZS?downloadformat=excel"
+    )
+
+    # Read the 'Data' sheet directly from the Excel file at the provided URL
+    try:
+        CO2_emissions = pd.read_excel(url, sheet_name="Data", skiprows=[0, 1, 2])
+        print("File read successfully.")
+    except Exception as e:
+        print("Failed to read the file:", e)
+
+    CO2_emissions = CO2_emissions[
+        ["Country Name", "Country Code", "Indicator Name", "2014"]
+    ]
+
+    # Calculate efficiency based on CO2 emissions from transport (% of total fuel combustion)
+    CO2_emissions["average fuel efficiency"] = (100 - CO2_emissions["2014"]) / 100
+
+    # Add ISO2 country code for each country
+    CO2_emissions = CO2_emissions.rename(columns={"Country Name": "Country"})
+    cc = coco.CountryConverter()
+    Country = pd.Series(CO2_emissions["Country"])
+    CO2_emissions["country"] = cc.pandas_convert(
+        series=Country, to="ISO2", not_found="not found"
+    )
+
+    # Drop region names that have no ISO2:
+    CO2_emissions = CO2_emissions[CO2_emissions.country != "not found"]
+
+    return CO2_emissions
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from helpers import mock_snakemake, sets_path_to_root
+
+        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        snakemake = mock_snakemake("prepare_transport_data_input")
+        sets_path_to_root("pypsa-earth-sec")
+    # configure_logging(snakemake)
+
+    # run = snakemake.config.get("run", {})
+    # RDIR = run["name"] + "/" if run.get("name") else ""
+    # store_path_data = Path.joinpath(Path().cwd(), "data")
+    # country_list = country_list_to_geofk(snakemake.config["countries"])'
+
+    # Downloaded and prepare vehicles_csv:
+    vehicles_csv = download_number_of_vehicles().copy()
+
+    # Downloaded and prepare CO2_emissions_csv:
+    CO2_emissions_csv = download_CO2_emissions().copy()
+
+    # Join the DataFrames by the 'country' column
+    merged_df = pd.merge(vehicles_csv, CO2_emissions_csv, on="country")
+    merged_df = merged_df[["country", "number cars", "average fuel efficiency"]]
+
+    # drop rows with NaN values in 'average fuel efficiency'
+    merged_df = merged_df.dropna(subset=["average fuel efficiency"])
+
+    # Convert the 'average fuel efficiency' to float
+    merged_df["average fuel efficiency"] = merged_df["average fuel efficiency"].astype(
+        float
+    )
+
+    # Round the 'average fuel efficiency' to three decimal places
+    merged_df.loc[:, "average fuel efficiency"] = merged_df[
+        "average fuel efficiency"
+    ].round(3)
+
+    # Save
+    merged_df.to_csv(
+        snakemake.output.transport_data_input,
+        sep=",",
+        encoding="utf-8",
+        header="true",
+        index=False,
+    )