From 3298572cedf955392359b04933471671edf2d6c3 Mon Sep 17 00:00:00 2001
From: toniseibold <tseibold.tub@gmail.com>
Date: Fri, 23 Feb 2024 13:53:28 +0100
Subject: [PATCH 01/14] new energy totals from eurostat 2023 report scaling JRC
 IDEES data from 2015

---
 config/config.default.yaml                |   4 +-
 data/switzerland-new_format-all_years.csv |  50 ++--
 rules/common.smk                          |   9 +-
 scripts/build_energy_totals.py            | 313 ++++++++++++++++++----
 4 files changed, 295 insertions(+), 81 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index ca2d1ba3d..093b1aad1 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -314,9 +314,9 @@ pypsa_eur:
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#energy
 energy:
-  energy_totals_year: 2013
+  energy_totals_year: 2019
   base_emissions_year: 1990
-  eurostat_report_year: 2016
+  eurostat_report_year: 2023
   emissions: CO2
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass
diff --git a/data/switzerland-new_format-all_years.csv b/data/switzerland-new_format-all_years.csv
index 93123009a..d083e8a88 100644
--- a/data/switzerland-new_format-all_years.csv
+++ b/data/switzerland-new_format-all_years.csv
@@ -1,25 +1,25 @@
-country,item,2010,2011,2012,2013,2014,2015
-CH,total residential,268.2,223.4,243.4,261.3,214.2,229.1
-CH,total residential space,192.2,149.0,168.1,185.5,139.7,154.4
-CH,total residential water,32.2,31.6,31.9,32.2,31.7,31.9
-CH,total residential cooking,9.3,9.3,9.3,9.4,9.5,9.6
-CH,electricity residential,67.9,63.7,65.7,67.6,63.0,64.4
-CH,electricity residential space,15.9,12.8,14.3,15.8,12.3,13.5
-CH,electricity residential water,8.8,8.5,8.5,8.6,8.5,8.6
-CH,electricity residential cooking,4.9,4.9,4.9,4.9,5.0,5.0
-CH,total services,145.9,127.4,136.7,144.0,124.5,132.5
-CH,total services space,80.0,62.2,70.8,77.4,58.3,64.3
-CH,total services water,10.1,10.0,10.1,10.1,10.0,10.0
-CH,total services cooking,2.5,2.4,2.3,2.3,2.4,2.3
-CH,electricity services,60.5,59.2,60.3,61.4,60.3,62.6
-CH,electricity services space,4.0,3.2,3.8,4.2,3.3,3.6
-CH,electricity services water,0.7,0.7,0.7,0.7,0.7,0.7
-CH,electricity services cooking,2.5,2.4,2.3,2.3,2.4,2.3
-CH,total rail,11.5,11.1,11.2,11.4,11.1,11.4
-CH,total road,199.4,200.4,200.4,201.2,202.0,203.1
-CH,electricity road,0.,0.,0.,0.,0.,0.
-CH,electricity rail,11.5,11.1,11.2,11.4,11.1,11.4
-CH,total domestic aviation,3.3,3.2,3.4,3.4,3.5,3.5
-CH,total international aviation,58.0,62.0,63.5,64.2,64.5,66.8
-CH,total domestic navigation,1.6,1.6,1.6,1.6,1.6,1.6
-CH,total international navigation,0.,0.,0.,0.,0.,0.
+country,item,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
+CH,total residential,268.2,223.4,243.4,261.3,214.2,229.1,241.2,236.5,223.7,226.5,219.1,241.2,211.3
+CH,total residential space,192.2,149,168.1,185.5,139.7,154.4,167.3,161.5,147.2,150.4,140.2,166.2,131.9
+CH,total residential water,32.2,31.6,31.9,32.2,31.7,31.9,31.8,31.8,31.8,31.7,33.3,32.5,32.5
+CH,total residential cooking,9.3,9.3,9.3,9.4,9.5,9.6,9.9,10,10.1,10.2,10.5,10.3,10.3
+CH,electricity residential,67.9,63.7,65.7,67.6,63,64.4,69.7,69.2,67.7,68.1,68.7,70.8,66.8
+CH,electricity residential space,15.9,12.8,14.3,15.8,12.3,13.5,15.8,15.6,14.7,15.3,14.8,17.8,14.8
+CH,electricity residential water,8.8,8.5,8.5,8.6,8.5,8.6,8.9,9,9.2,9.3,9.7,9.5,9.5
+CH,electricity residential cooking,4.9,4.9,4.9,4.9,5,5,5,5.1,5.1,5.1,5.4,5.2,5.3
+CH,total services,145.9,127.4,136.7,144,124.5,132.5,150.5,147.7,141.5,143.1,129.7,144.2,122.5
+CH,total services space,80,62.2,70.8,77.4,58.3,64.3,77,74.4,68.2,69.8,64.3,75.7,58.7
+CH,total services water,10.1,10,10.1,10.1,10,10,11.4,11.3,11.2,11.1,9.7,10.4,12
+CH,total services cooking,2.5,2.4,2.3,2.3,2.4,2.3,3.1,3.1,3.2,3.3,2.1,2.6,3.2
+CH,electricity services,60.5,59.2,60.3,61.4,60.3,62.6,65.9,65.7,65.5,65.6,58.8,61.6,61.6
+CH,electricity services space,4,3.2,3.8,4.2,3.3,3.6,2.7,2.5,2.3,2.3,2.2,2.5,2.5
+CH,electricity services water,0.7,0.7,0.7,0.7,0.7,0.7,1.2,1.1,1.1,1.1,0.9,1,1
+CH,electricity services cooking,2.5,2.4,2.3,2.3,2.4,2.3,3.1,3.1,3.1,3.2,3.3,2.1,3.2
+CH,total rail,11.5,11.1,11.2,11.4,11.1,11.4,11.6,11.4,11.2,11,10.2,10.6,10.8
+CH,total road,199.4,200.4,200.4,201.2,202,203.1,203.9,203.7,202.6,200.5,182.6,188.3,193.3
+CH,electricity road,0,0,0,0,0,0,0.1,0.2,0.3,0.4,0.5,0.8,1.3
+CH,electricity rail,11.5,11.1,11.2,11.4,11.1,11.4,11.5,11.3,11.1,11,10.1,10.6,10.7
+CH,total domestic aviation,3.3,3.2,3.4,3.4,3.5,3.5,3.6,3.1,3.1,2.9,2.5,2.8,3
+CH,total international aviation,58,62,63.5,64.2,64.5,66.8,70.6,72.8,77.2,78.2,28.2,31.2,56.8
+CH,total domestic navigation,1.6,1.6,1.6,1.6,1.6,1.6,1.4,1.4,1.4,1.4,1.4,1.4,1.4
+CH,total international navigation,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/rules/common.smk b/rules/common.smk
index 618d746bf..c3ce845c5 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -131,10 +131,11 @@ def has_internet_access(url="www.zenodo.org") -> bool:
 
 
 def input_eurostat(w):
-    # 2016 includes BA, 2017 does not
-    report_year = config_provider("energy", "eurostat_report_year")(w)
-    return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
-
+    if config["energy"]["eurostat_report_year"] != 2023:
+        report_year = config["energy"]["eurostat_report_year"]
+        return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
+    else:
+        return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
 
 def solved_previous_horizon(w):
     planning_horizons = config_provider("scenario", "planning_horizons")(w)
diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index 960d1bbe8..13a8c30da 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -16,6 +16,7 @@
 import pandas as pd
 from _helpers import configure_logging, mute_print, set_scenario_config
 from tqdm import tqdm
+import os
 
 cc = coco.CountryConverter()
 logger = logging.getLogger(__name__)
@@ -120,36 +121,93 @@ def build_eurostat(input_eurostat, countries, report_year, year):
     """
     Return multi-index for all countries' energy data in TWh/a.
     """
-    filenames = {
+    if report_year != 2023:
+        filenames = {
         2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
         2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
     }
 
-    with mute_print():
-        dfs = pd.read_excel(
-            input_eurostat + filenames[report_year],
-            sheet_name=None,
-            skiprows=1,
-            index_col=list(range(4)),
-        )
-
-    # sorted_index necessary for slicing
-    lookup = eurostat_codes
-    labelled_dfs = {
-        lookup[df.columns[0]]: df
-        for df in dfs.values()
-        if lookup[df.columns[0]] in countries
-    }
-    df = pd.concat(labelled_dfs, sort=True).sort_index()
-
-    # drop non-numeric and country columns
-    non_numeric_cols = df.columns[df.dtypes != float]
-    country_cols = df.columns.intersection(lookup.keys())
-    to_drop = non_numeric_cols.union(country_cols)
-    df.drop(to_drop, axis=1, inplace=True)
+        with mute_print():
+            dfs = pd.read_excel(
+                input_eurostat + filenames[report_year],
+                sheet_name=None,
+                skiprows=1,
+                index_col=list(range(4)),
+            )
 
-    # convert ktoe/a to TWh/a
-    df *= 11.63 / 1e3
+        # sorted_index necessary for slicing
+        lookup = eurostat_codes
+        labelled_dfs = {
+            lookup[df.columns[0]]: df
+            for df in dfs.values()
+            if lookup[df.columns[0]] in countries
+        }
+        df = pd.concat(labelled_dfs, sort=True).sort_index()
+        # drop non-numeric and country columns
+        non_numeric_cols = df.columns[df.dtypes != float]
+        country_cols = df.columns.intersection(lookup.keys())
+        to_drop = non_numeric_cols.union(country_cols)
+        df.drop(to_drop, axis=1, inplace=True)
+
+        # convert ktoe/a to TWh/a
+        df *= 11.63 / 1e3
+    
+    else:
+        # read in every country file in countries
+        eurostat = pd.DataFrame()
+        countries = [country if country != 'GB' else 'UK' for country in countries]
+        countries = [country if country != 'GR' else 'EL' for country in countries]
+        for country in countries:
+            filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
+            if os.path.exists(input_eurostat + filename):
+                df = pd.read_excel(
+                    input_eurostat + filename,
+                    engine='pyxlsb',
+                    sheet_name=str(year),
+                    skiprows=4,
+                    index_col=list(range(4)))
+                # replace entry 'Z' with 0
+                df.replace('Z', 0, inplace=True)
+                # write 'International aviation' to the 2nd level of the multiindex
+                index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
+                new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
+                modified_index = list(df.index)
+                modified_index[index_number] = new_index
+                df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
+                # drop the annoying subhead line
+                df.drop(df[df[year] == year].index, inplace=True)
+                # replace 'Z' with 0
+                df = df.replace('Z', 0)
+                # add country to the multiindex
+                new_tuple = [(country, *idx) for idx in df.index]
+                new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
+                df.index = new_mindex
+                # make numeric values where possible
+                df = df.apply(pd.to_numeric, errors='coerce')
+                # drop non-numeric columns
+                non_numeric_cols = df.columns[df.dtypes != float]
+                df.drop(non_numeric_cols, axis=1, inplace=True)
+                # concatenate the dataframes
+                eurostat = pd.concat([eurostat, df], axis=0)
+        
+        eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
+        # Renaming some indices
+        rename = {
+            'Households': 'Residential',
+            'Commercial & public services': 'Services',
+            'Domestic navigation': 'Domestic Navigation'
+        }
+        for name, rename in rename.items():
+            eurostat.index = eurostat.index.set_levels(
+            eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
+            level=3)
+        new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
+        eurostat.index = new_index
+
+        eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
+        eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
+        
+        df = eurostat * 11.63 / 1e3
 
     return df
 
@@ -669,31 +727,49 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
     return eurostat.multiply(specific_emissions).sum(axis=1)
 
 
-def build_co2_totals(countries, eea_co2, eurostat_co2):
+def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
     co2 = eea_co2.reindex(countries)
 
     for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
-        mappings = {
-            "electricity": (
-                ct,
-                "+",
-                "Conventional Thermal Power Stations",
-                "of which From Coal",
-            ),
-            "residential non-elec": (ct, "+", "+", "Residential"),
-            "services non-elec": (ct, "+", "+", "Services"),
-            "road non-elec": (ct, "+", "+", "Road"),
-            "rail non-elec": (ct, "+", "+", "Rail"),
-            "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
-            "international navigation": (ct, "-", "Bunkers"),
-            "domestic aviation": (ct, "+", "+", "Domestic aviation"),
-            "international aviation": (ct, "+", "+", "International aviation"),
-            # does not include industrial process emissions or fuel processing/refining
-            "industrial non-elec": (ct, "+", "Industry"),
-            # does not include non-energy emissions
-            "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-            & eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
-        }
+        if report_year != 2023:
+            mappings = {
+                "electricity": (
+                    ct,
+                    "+",
+                    "Conventional Thermal Power Stations",
+                    "of which From Coal",
+                ),
+                "residential non-elec": (ct, "+", "+", "Residential"),
+                "services non-elec": (ct, "+", "+", "Services"),
+                "road non-elec": (ct, "+", "+", "Road"),
+                "rail non-elec": (ct, "+", "+", "Rail"),
+                "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
+                "international navigation": (ct, "-", "Bunkers"),
+                "domestic aviation": (ct, "+", "+", "Domestic aviation"),
+                "international aviation": (ct, "+", "+", "International aviation"),
+                # does not include industrial process emissions or fuel processing/refining
+                "industrial non-elec": (ct, "+", "Industry"),
+                # does not include non-energy emissions
+                "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
+                & eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
+            }
+        else:
+            mappings = {
+                "electricity": (ct, "+", "Electricity & heat generation", np.nan),
+                "residential non-elec": (ct, "+", "+", "Residential"),
+                "services non-elec": (ct, "+", "+", "Services"),
+                "road non-elec": (ct, "+", "+", "Road"),
+                "rail non-elec": (ct, "+", "+", "Rail"),
+                "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
+                "international navigation": (ct, "-", "Bunkers"),
+                "domestic aviation": (ct, "+", "+", "Domestic aviation"),
+                "international aviation": (ct, "-", "International aviation"),
+                # does not include industrial process emissions or fuel processing/refining
+                "industrial non-elec": (ct, "+", "Industry sector"),
+                # does not include non-energy emissions
+                "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
+                & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
+            }
 
         for i, mi in mappings.items():
             co2.at[ct, i] = eurostat_co2.loc[mi].sum()
@@ -736,6 +812,133 @@ def build_transport_data(countries, population, idees):
 
     return transport_data
 
+def rescale(idees_countries, energy, eurostat):
+    '''
+    Takes JRC IDEES data from 2015 and rescales it by the ratio of the 
+    eurostat data and the 2015 eurostat data.
+    missing data: ['passenger car efficiency', 'passenger cars']
+    '''
+    # read in the eurostat data for 2015
+    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
+    # eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
+    eurostat_year = eurostat[["Total all products", "Electricity"]]
+    # calculate the ratio of the two data sets
+    ratio = eurostat_year / eurostat_2015
+    ratio = ratio.droplevel([1,4])
+    ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
+    ratio = ratio.rename(index={"GB": "UK"}, level=0)
+
+    residential_total = [
+        "total residential space",
+        "total residential water",
+        "total residential cooking",
+        "total residential",
+        "derived heat residential",
+        "thermal uses residential",
+    ]
+    residential_ele = [
+        "electricity residential space",
+        "electricity residential water",
+        "electricity residential cooking",
+        "electricity residential",
+    ]
+
+    service_total = [
+        "total services space",
+        "total services water",
+        "total services cooking",
+        "total services",
+        "derived heat services",
+        "thermal uses services",
+    ]
+    service_ele = [
+        "electricity services space",
+        "electricity services water",
+        "electricity services cooking",
+        "electricity services",
+    ]
+
+    agri_total = [
+        "total agriculture heat",
+        "total agriculture machinery",
+        "total agriculture",
+    ]
+    agri_ele = [
+        "total agriculture electricity",
+    ]
+
+    road_total = [
+        "total road",
+        "total passenger cars",
+        "total other road passenger",
+        "total light duty road freight",
+    ]
+    road_ele = [
+        "electricity road",
+        "electricity passenger cars",
+        "electricity other road passenger",
+        "electricity light duty road freight",
+    ]
+
+    rail_total = [
+        "total rail",
+        "total rail passenger",
+        "total rail freight",
+    ]
+    rail_ele = [
+        "electricity rail",
+        "electricity rail passenger",
+        "electricity rail freight",
+    ]
+
+    avia_inter = [
+        'total aviation passenger',
+        'total aviation freight',
+        'total international aviation passenger',
+        'total international aviation freight',
+        'total international aviation'
+    ]
+    avia_domestic = [
+        'total domestic aviation passenger',
+        'total domestic aviation freight',
+        'total domestic aviation',
+    ]
+    navigation = [
+        "total domestic navigation",
+    ]
+    
+    idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
+
+    for country in idees_countries:
+        res = ratio.loc[(country, slice(None), 'Residential')]
+        energy.loc[country, residential_total] *= res[['total']].iloc[0,0]
+        energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
+
+        ser = ratio.loc[(country, slice(None), 'Services')]
+        energy.loc[country, service_total] *= ser[['total']].iloc[0,0]
+        energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
+
+        agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
+        energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
+        energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
+
+        road = ratio.loc[(country, slice(None), 'Road')]
+        energy.loc[country, road_total] *= road[['total']].iloc[0,0]
+        energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
+
+        rail = ratio.loc[(country, slice(None), 'Rail')]
+        energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
+        energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
+
+        avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
+        avi_i = ratio.loc[(country, 'International aviation', slice(None))]
+        energy.loc[country, avia_inter] *= avi_i[['total']].iloc[0,0]
+        energy.loc[country, avia_domestic] *= avi_d[['total']].iloc[0,0]
+
+        nav = ratio.loc[(country, slice(None), 'Domestic Navigation')]
+        energy.loc[country, navigation] *= nav[['total']].iloc[0,0]
+
+    return energy
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
@@ -759,12 +962,22 @@ def build_transport_data(countries, population, idees):
     input_eurostat = snakemake.input.eurostat
     eurostat = build_eurostat(input_eurostat, countries, report_year, data_year)
     swiss = build_swiss(data_year)
-    idees = build_idees(idees_countries, data_year)
+    # data from idees only exists for 2015
+    if data_year > 2015:
+        # read in latest data and rescale later
+        idees = build_idees(idees_countries, 2015)
+    else:
+        idees = build_idees(idees_countries, data_year)
 
     energy = build_energy_totals(countries, eurostat, swiss, idees)
+    
+    if data_year > 2015:
+        energy = rescale(idees_countries, energy, eurostat)
+    
     energy.to_csv(snakemake.output.energy_name)
 
-    district_heat_share = build_district_heat_share(countries, idees)
+    # use rescaled idees data to calculate district heat share
+    district_heat_share = build_district_heat_share(countries, energy.loc[idees_countries])
     district_heat_share.to_csv(snakemake.output.district_heat_share)
 
     base_year_emissions = params["base_emissions_year"]
@@ -774,7 +987,7 @@ def build_transport_data(countries, population, idees):
         input_eurostat, countries, report_year, base_year_emissions
     )
 
-    co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
+    co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year)
     co2.to_csv(snakemake.output.co2_name)
 
     transport = build_transport_data(countries, population, idees)

From ace51b9e5547ec9c23730c832f4d9a7b5212d5a2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 23 Feb 2024 12:56:43 +0000
Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 rules/common.smk               |   1 +
 scripts/build_energy_totals.py | 174 ++++++++++++++++++++-------------
 2 files changed, 106 insertions(+), 69 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index c3ce845c5..3484c2075 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -137,6 +137,7 @@ def input_eurostat(w):
     else:
         return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
 
+
 def solved_previous_horizon(w):
     planning_horizons = config_provider("scenario", "planning_horizons")(w)
     i = planning_horizons.index(int(w.planning_horizons))
diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index 13a8c30da..a17721433 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -8,6 +8,7 @@
 
 import logging
 import multiprocessing as mp
+import os
 from functools import partial
 
 import country_converter as coco
@@ -16,7 +17,6 @@
 import pandas as pd
 from _helpers import configure_logging, mute_print, set_scenario_config
 from tqdm import tqdm
-import os
 
 cc = coco.CountryConverter()
 logger = logging.getLogger(__name__)
@@ -123,9 +123,9 @@ def build_eurostat(input_eurostat, countries, report_year, year):
     """
     if report_year != 2023:
         filenames = {
-        2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
-        2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
-    }
+            2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
+            2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
+        }
 
         with mute_print():
             dfs = pd.read_excel(
@@ -151,62 +151,85 @@ def build_eurostat(input_eurostat, countries, report_year, year):
 
         # convert ktoe/a to TWh/a
         df *= 11.63 / 1e3
-    
+
     else:
         # read in every country file in countries
         eurostat = pd.DataFrame()
-        countries = [country if country != 'GB' else 'UK' for country in countries]
-        countries = [country if country != 'GR' else 'EL' for country in countries]
+        countries = [country if country != "GB" else "UK" for country in countries]
+        countries = [country if country != "GR" else "EL" for country in countries]
         for country in countries:
             filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
             if os.path.exists(input_eurostat + filename):
                 df = pd.read_excel(
                     input_eurostat + filename,
-                    engine='pyxlsb',
+                    engine="pyxlsb",
                     sheet_name=str(year),
                     skiprows=4,
-                    index_col=list(range(4)))
+                    index_col=list(range(4)),
+                )
                 # replace entry 'Z' with 0
-                df.replace('Z', 0, inplace=True)
+                df.replace("Z", 0, inplace=True)
                 # write 'International aviation' to the 2nd level of the multiindex
-                index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
-                new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
+                index_number = (
+                    df.index.get_level_values(1) == "International aviation"
+                ).argmax()
+                new_index = (
+                    "-",
+                    "International aviation",
+                    "International aviation",
+                    "ktoe",
+                )
                 modified_index = list(df.index)
                 modified_index[index_number] = new_index
-                df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
+                df.index = pd.MultiIndex.from_tuples(
+                    modified_index, names=df.index.names
+                )
                 # drop the annoying subhead line
                 df.drop(df[df[year] == year].index, inplace=True)
                 # replace 'Z' with 0
-                df = df.replace('Z', 0)
+                df = df.replace("Z", 0)
                 # add country to the multiindex
                 new_tuple = [(country, *idx) for idx in df.index]
-                new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
+                new_mindex = pd.MultiIndex.from_tuples(
+                    new_tuple, names=["country", None, "name", None, "unit"]
+                )
                 df.index = new_mindex
                 # make numeric values where possible
-                df = df.apply(pd.to_numeric, errors='coerce')
+                df = df.apply(pd.to_numeric, errors="coerce")
                 # drop non-numeric columns
                 non_numeric_cols = df.columns[df.dtypes != float]
                 df.drop(non_numeric_cols, axis=1, inplace=True)
                 # concatenate the dataframes
                 eurostat = pd.concat([eurostat, df], axis=0)
-        
+
         eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
         # Renaming some indices
         rename = {
-            'Households': 'Residential',
-            'Commercial & public services': 'Services',
-            'Domestic navigation': 'Domestic Navigation'
+            "Households": "Residential",
+            "Commercial & public services": "Services",
+            "Domestic navigation": "Domestic Navigation",
         }
         for name, rename in rename.items():
             eurostat.index = eurostat.index.set_levels(
-            eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
-            level=3)
-        new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
+                eurostat.index.levels[3].where(
+                    eurostat.index.levels[3] != name, rename
+                ),
+                level=3,
+            )
+        new_index = eurostat.index.set_levels(
+            eurostat.index.levels[2].where(
+                eurostat.index.levels[2] != "International maritime bunkers", "Bunkers"
+            ),
+            level=2,
+        )
         eurostat.index = new_index
 
-        eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
-        eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
-        
+        eurostat.rename(columns={"Total": "Total all products"}, inplace=True)
+        eurostat.index = eurostat.index.set_levels(
+            eurostat.index.levels[0].where(eurostat.index.levels[0] != "UK", "GB"),
+            level=0,
+        )
+
         df = eurostat * 11.63 / 1e3
 
     return df
@@ -751,7 +774,9 @@ def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
                 "industrial non-elec": (ct, "+", "Industry"),
                 # does not include non-energy emissions
                 "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-                & eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
+                & eurostat_co2.index.isin(
+                    ["Agriculture / Forestry", "Fishing"], level=3
+                ),
             }
         else:
             mappings = {
@@ -768,7 +793,9 @@ def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
                 "industrial non-elec": (ct, "+", "Industry sector"),
                 # does not include non-energy emissions
                 "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-                & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
+                & eurostat_co2.index.isin(
+                    ["Agriculture & forestry", "Fishing"], level=3
+                ),
             }
 
         for i, mi in mappings.items():
@@ -812,20 +839,26 @@ def build_transport_data(countries, population, idees):
 
     return transport_data
 
+
 def rescale(idees_countries, energy, eurostat):
-    '''
-    Takes JRC IDEES data from 2015 and rescales it by the ratio of the 
-    eurostat data and the 2015 eurostat data.
+    """
+    Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
+    data and the 2015 eurostat data.
+
     missing data: ['passenger car efficiency', 'passenger cars']
-    '''
+    """
     # read in the eurostat data for 2015
-    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
+    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[
+        ["Total all products", "Electricity"]
+    ]
     # eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
     eurostat_year = eurostat[["Total all products", "Electricity"]]
     # calculate the ratio of the two data sets
     ratio = eurostat_year / eurostat_2015
-    ratio = ratio.droplevel([1,4])
-    ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
+    ratio = ratio.droplevel([1, 4])
+    ratio.rename(
+        columns={"Total all products": "total", "Electricity": "ele"}, inplace=True
+    )
     ratio = ratio.rename(index={"GB": "UK"}, level=0)
 
     residential_total = [
@@ -892,54 +925,55 @@ def rescale(idees_countries, energy, eurostat):
     ]
 
     avia_inter = [
-        'total aviation passenger',
-        'total aviation freight',
-        'total international aviation passenger',
-        'total international aviation freight',
-        'total international aviation'
+        "total aviation passenger",
+        "total aviation freight",
+        "total international aviation passenger",
+        "total international aviation freight",
+        "total international aviation",
     ]
     avia_domestic = [
-        'total domestic aviation passenger',
-        'total domestic aviation freight',
-        'total domestic aviation',
+        "total domestic aviation passenger",
+        "total domestic aviation freight",
+        "total domestic aviation",
     ]
     navigation = [
         "total domestic navigation",
     ]
-    
-    idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
+
+    idees_countries = idees_countries.repalce({"GB": "UK", "GR": "EL"})
 
     for country in idees_countries:
-        res = ratio.loc[(country, slice(None), 'Residential')]
-        energy.loc[country, residential_total] *= res[['total']].iloc[0,0]
-        energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
+        res = ratio.loc[(country, slice(None), "Residential")]
+        energy.loc[country, residential_total] *= res[["total"]].iloc[0, 0]
+        energy.loc[country, residential_ele] *= res[["ele"]].iloc[0, 0]
 
-        ser = ratio.loc[(country, slice(None), 'Services')]
-        energy.loc[country, service_total] *= ser[['total']].iloc[0,0]
-        energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
+        ser = ratio.loc[(country, slice(None), "Services")]
+        energy.loc[country, service_total] *= ser[["total"]].iloc[0, 0]
+        energy.loc[country, service_ele] *= ser[["ele"]].iloc[0, 0]
 
-        agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
-        energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
-        energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
+        agri = ratio.loc[(country, slice(None), "Agriculture & forestry")]
+        energy.loc[country, agri_total] *= agri[["total"]].iloc[0, 0]
+        energy.loc[country, agri_ele] *= agri[["ele"]].iloc[0, 0]
 
-        road = ratio.loc[(country, slice(None), 'Road')]
-        energy.loc[country, road_total] *= road[['total']].iloc[0,0]
-        energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
+        road = ratio.loc[(country, slice(None), "Road")]
+        energy.loc[country, road_total] *= road[["total"]].iloc[0, 0]
+        energy.loc[country, road_ele] *= road[["ele"]].iloc[0, 0]
 
-        rail = ratio.loc[(country, slice(None), 'Rail')]
-        energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
-        energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
+        rail = ratio.loc[(country, slice(None), "Rail")]
+        energy.loc[country, rail_total] *= rail[["total"]].iloc[0, 0]
+        energy.loc[country, rail_ele] *= rail[["ele"]].iloc[0, 0]
 
-        avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
-        avi_i = ratio.loc[(country, 'International aviation', slice(None))]
-        energy.loc[country, avia_inter] *= avi_i[['total']].iloc[0,0]
-        energy.loc[country, avia_domestic] *= avi_d[['total']].iloc[0,0]
+        avi_d = ratio.loc[(country, slice(None), "Domestic aviation")]
+        avi_i = ratio.loc[(country, "International aviation", slice(None))]
+        energy.loc[country, avia_inter] *= avi_i[["total"]].iloc[0, 0]
+        energy.loc[country, avia_domestic] *= avi_d[["total"]].iloc[0, 0]
 
-        nav = ratio.loc[(country, slice(None), 'Domestic Navigation')]
-        energy.loc[country, navigation] *= nav[['total']].iloc[0,0]
+        nav = ratio.loc[(country, slice(None), "Domestic Navigation")]
+        energy.loc[country, navigation] *= nav[["total"]].iloc[0, 0]
 
     return energy
 
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
@@ -970,14 +1004,16 @@ def rescale(idees_countries, energy, eurostat):
         idees = build_idees(idees_countries, data_year)
 
     energy = build_energy_totals(countries, eurostat, swiss, idees)
-    
+
     if data_year > 2015:
         energy = rescale(idees_countries, energy, eurostat)
-    
+
     energy.to_csv(snakemake.output.energy_name)
 
     # use rescaled idees data to calculate district heat share
-    district_heat_share = build_district_heat_share(countries, energy.loc[idees_countries])
+    district_heat_share = build_district_heat_share(
+        countries, energy.loc[idees_countries]
+    )
     district_heat_share.to_csv(snakemake.output.district_heat_share)
 
     base_year_emissions = params["base_emissions_year"]

From d363aeb57dac2370199b1a260e7c31c1014c074d Mon Sep 17 00:00:00 2001
From: toniseibold <tseibold.tub@gmail.com>
Date: Tue, 27 Feb 2024 12:04:07 +0100
Subject: [PATCH 03/14] removing old eurostat data reports as an option,
 cleaning up code

---
 config/config.default.yaml            |   1 -
 rules/build_sector.smk                |   4 +-
 rules/common.smk                      |   8 -
 rules/postprocess.smk                 |   2 +-
 rules/retrieve.smk                    |   3 +
 scripts/build_energy_totals.py        | 356 ++++++++++----------------
 scripts/retrieve_sector_databundle.py |  14 +
 7 files changed, 153 insertions(+), 235 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 093b1aad1..b2828a443 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -316,7 +316,6 @@ pypsa_eur:
 energy:
   energy_totals_year: 2019
   base_emissions_year: 1990
-  eurostat_report_year: 2023
   emissions: CO2
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass
diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 466d17137..6c542f4e7 100644
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -270,7 +270,7 @@ rule build_energy_totals:
         swiss="data/switzerland-new_format-all_years.csv",
         idees="data/bundle-sector/jrc-idees-2015",
         district_heat_share="data/district_heat_share.csv",
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
     output:
         energy_name=resources("energy_totals.csv"),
         co2_name=resources("co2_totals.csv"),
@@ -865,7 +865,7 @@ rule prepare_sector_network:
         ),
         network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
         energy_totals_name=resources("energy_totals.csv"),
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
         pop_weighted_energy_totals=resources(
             "pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
         ),
diff --git a/rules/common.smk b/rules/common.smk
index c3ce845c5..8e0e1e662 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -129,14 +129,6 @@ def has_internet_access(url="www.zenodo.org") -> bool:
     finally:
         conn.close()
 
-
-def input_eurostat(w):
-    if config["energy"]["eurostat_report_year"] != 2023:
-        report_year = config["energy"]["eurostat_report_year"]
-        return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
-    else:
-        return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
-
 def solved_previous_horizon(w):
     planning_horizons = config_provider("scenario", "planning_horizons")(w)
     i = planning_horizons.index(int(w.planning_horizons))
diff --git a/rules/postprocess.smk b/rules/postprocess.smk
index dc08699fb..5f93540ff 100644
--- a/rules/postprocess.smk
+++ b/rules/postprocess.smk
@@ -247,7 +247,7 @@ rule plot_summary:
         costs=RESULTS + "csvs/costs.csv",
         energy=RESULTS + "csvs/energy.csv",
         balances=RESULTS + "csvs/supply_energy.csv",
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
         co2="data/bundle-sector/eea/UNFCCC_v23.csv",
     output:
         costs=RESULTS + "graphs/costs.pdf",
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index 29d050abb..8ef373d1d 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -142,6 +142,9 @@ if config["enable"]["retrieve"] and config["enable"].get(
         protected(
             directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
         ),
+        protected(
+            directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
+        ),
         protected(directory("data/bundle-sector/jrc-idees-2015")),
     ]
 
diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index 13a8c30da..16c99d8fc 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -117,97 +117,65 @@ def reverse(dictionary):
 }
 
 
-def build_eurostat(input_eurostat, countries, report_year, year):
+def build_eurostat(input_eurostat, countries, year):
     """
     Return multi-index for all countries' energy data in TWh/a.
     """
-    if report_year != 2023:
-        filenames = {
-        2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
-        2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
+    # read in every country file in countries
+    eurostat = pd.DataFrame()
+    countries = [country if country != 'GB' else 'UK' for country in countries]
+    countries = [country if country != 'GR' else 'EL' for country in countries]
+    for country in countries:
+        filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
+        if os.path.exists(input_eurostat + filename):
+            df = pd.read_excel(
+                input_eurostat + filename,
+                engine='pyxlsb',
+                sheet_name=str(year),
+                skiprows=4,
+                index_col=list(range(4)))
+            # replace entry 'Z' with 0
+            df.replace('Z', 0, inplace=True)
+            # write 'International aviation' to the 2nd level of the multiindex
+            index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
+            new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
+            modified_index = list(df.index)
+            modified_index[index_number] = new_index
+            df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
+            # drop the annoying subhead line
+            df.drop(df[df[year] == year].index, inplace=True)
+            # replace 'Z' with 0
+            df = df.replace('Z', 0)
+            # add country to the multiindex
+            new_tuple = [(country, *idx) for idx in df.index]
+            new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
+            df.index = new_mindex
+            # make numeric values where possible
+            df = df.apply(pd.to_numeric, errors='coerce')
+            # drop non-numeric columns
+            non_numeric_cols = df.columns[df.dtypes != float]
+            df.drop(non_numeric_cols, axis=1, inplace=True)
+            # concatenate the dataframes
+            eurostat = pd.concat([eurostat, df], axis=0)
+    
+    eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
+    # Renaming some indices
+    rename = {
+        'Households': 'Residential',
+        'Commercial & public services': 'Services',
+        'Domestic navigation': 'Domestic Navigation'
     }
-
-        with mute_print():
-            dfs = pd.read_excel(
-                input_eurostat + filenames[report_year],
-                sheet_name=None,
-                skiprows=1,
-                index_col=list(range(4)),
-            )
-
-        # sorted_index necessary for slicing
-        lookup = eurostat_codes
-        labelled_dfs = {
-            lookup[df.columns[0]]: df
-            for df in dfs.values()
-            if lookup[df.columns[0]] in countries
-        }
-        df = pd.concat(labelled_dfs, sort=True).sort_index()
-        # drop non-numeric and country columns
-        non_numeric_cols = df.columns[df.dtypes != float]
-        country_cols = df.columns.intersection(lookup.keys())
-        to_drop = non_numeric_cols.union(country_cols)
-        df.drop(to_drop, axis=1, inplace=True)
-
-        # convert ktoe/a to TWh/a
-        df *= 11.63 / 1e3
+    for name, rename in rename.items():
+        eurostat.index = eurostat.index.set_levels(
+        eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
+        level=3)
+    new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
+    eurostat.index = new_index
+
+    eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
+    eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
     
-    else:
-        # read in every country file in countries
-        eurostat = pd.DataFrame()
-        countries = [country if country != 'GB' else 'UK' for country in countries]
-        countries = [country if country != 'GR' else 'EL' for country in countries]
-        for country in countries:
-            filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
-            if os.path.exists(input_eurostat + filename):
-                df = pd.read_excel(
-                    input_eurostat + filename,
-                    engine='pyxlsb',
-                    sheet_name=str(year),
-                    skiprows=4,
-                    index_col=list(range(4)))
-                # replace entry 'Z' with 0
-                df.replace('Z', 0, inplace=True)
-                # write 'International aviation' to the 2nd level of the multiindex
-                index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
-                new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
-                modified_index = list(df.index)
-                modified_index[index_number] = new_index
-                df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
-                # drop the annoying subhead line
-                df.drop(df[df[year] == year].index, inplace=True)
-                # replace 'Z' with 0
-                df = df.replace('Z', 0)
-                # add country to the multiindex
-                new_tuple = [(country, *idx) for idx in df.index]
-                new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
-                df.index = new_mindex
-                # make numeric values where possible
-                df = df.apply(pd.to_numeric, errors='coerce')
-                # drop non-numeric columns
-                non_numeric_cols = df.columns[df.dtypes != float]
-                df.drop(non_numeric_cols, axis=1, inplace=True)
-                # concatenate the dataframes
-                eurostat = pd.concat([eurostat, df], axis=0)
-        
-        eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
-        # Renaming some indices
-        rename = {
-            'Households': 'Residential',
-            'Commercial & public services': 'Services',
-            'Domestic navigation': 'Domestic Navigation'
-        }
-        for name, rename in rename.items():
-            eurostat.index = eurostat.index.set_levels(
-            eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
-            level=3)
-        new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
-        eurostat.index = new_index
-
-        eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
-        eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
-        
-        df = eurostat * 11.63 / 1e3
+    df = eurostat * 11.63 / 1e3
 
     return df
 
@@ -709,8 +677,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
     return emissions / 1e3
 
 
-def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
-    eurostat = build_eurostat(input_eurostat, countries, report_year, year)
+def build_eurostat_co2(input_eurostat, countries, year=1990):
+    eurostat = build_eurostat(input_eurostat, countries, year)
 
     specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
 
@@ -727,49 +695,26 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
     return eurostat.multiply(specific_emissions).sum(axis=1)
 
 
-def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
+def build_co2_totals(countries, eea_co2, eurostat_co2):
     co2 = eea_co2.reindex(countries)
 
     for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
-        if report_year != 2023:
-            mappings = {
-                "electricity": (
-                    ct,
-                    "+",
-                    "Conventional Thermal Power Stations",
-                    "of which From Coal",
-                ),
-                "residential non-elec": (ct, "+", "+", "Residential"),
-                "services non-elec": (ct, "+", "+", "Services"),
-                "road non-elec": (ct, "+", "+", "Road"),
-                "rail non-elec": (ct, "+", "+", "Rail"),
-                "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
-                "international navigation": (ct, "-", "Bunkers"),
-                "domestic aviation": (ct, "+", "+", "Domestic aviation"),
-                "international aviation": (ct, "+", "+", "International aviation"),
-                # does not include industrial process emissions or fuel processing/refining
-                "industrial non-elec": (ct, "+", "Industry"),
-                # does not include non-energy emissions
-                "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-                & eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
-            }
-        else:
-            mappings = {
-                "electricity": (ct, "+", "Electricity & heat generation", np.nan),
-                "residential non-elec": (ct, "+", "+", "Residential"),
-                "services non-elec": (ct, "+", "+", "Services"),
-                "road non-elec": (ct, "+", "+", "Road"),
-                "rail non-elec": (ct, "+", "+", "Rail"),
-                "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
-                "international navigation": (ct, "-", "Bunkers"),
-                "domestic aviation": (ct, "+", "+", "Domestic aviation"),
-                "international aviation": (ct, "-", "International aviation"),
-                # does not include industrial process emissions or fuel processing/refining
-                "industrial non-elec": (ct, "+", "Industry sector"),
-                # does not include non-energy emissions
-                "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-                & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
-            }
+        mappings = {
+            "electricity": (ct, "+", "Electricity & heat generation", np.nan),
+            "residential non-elec": (ct, "+", "+", "Residential"),
+            "services non-elec": (ct, "+", "+", "Services"),
+            "road non-elec": (ct, "+", "+", "Road"),
+            "rail non-elec": (ct, "+", "+", "Rail"),
+            "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
+            "international navigation": (ct, "-", "Bunkers"),
+            "domestic aviation": (ct, "+", "+", "Domestic aviation"),
+            "international aviation": (ct, "-", "International aviation"),
+            # does not include industrial process emissions or fuel processing/refining
+            "industrial non-elec": (ct, "+", "Industry sector"),
+            # does not include non-energy emissions
+            "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
+            & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
+        }
 
         for i, mi in mappings.items():
             co2.at[ct, i] = eurostat_co2.loc[mi].sum()
@@ -820,83 +765,65 @@ def rescale(idees_countries, energy, eurostat):
     '''
     # read in the eurostat data for 2015
     eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
-    # eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
     eurostat_year = eurostat[["Total all products", "Electricity"]]
     # calculate the ratio of the two data sets
     ratio = eurostat_year / eurostat_2015
     ratio = ratio.droplevel([1,4])
     ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
-    ratio = ratio.rename(index={"GB": "UK"}, level=0)
-
-    residential_total = [
-        "total residential space",
-        "total residential water",
-        "total residential cooking",
-        "total residential",
-        "derived heat residential",
-        "thermal uses residential",
-    ]
-    residential_ele = [
-        "electricity residential space",
-        "electricity residential water",
-        "electricity residential cooking",
-        "electricity residential",
-    ]
-
-    service_total = [
-        "total services space",
-        "total services water",
-        "total services cooking",
-        "total services",
-        "derived heat services",
-        "thermal uses services",
-    ]
-    service_ele = [
-        "electricity services space",
-        "electricity services water",
-        "electricity services cooking",
-        "electricity services",
-    ]
-
-    agri_total = [
-        "total agriculture heat",
-        "total agriculture machinery",
-        "total agriculture",
-    ]
-    agri_ele = [
-        "total agriculture electricity",
-    ]
-
-    road_total = [
-        "total road",
-        "total passenger cars",
-        "total other road passenger",
-        "total light duty road freight",
-    ]
-    road_ele = [
-        "electricity road",
-        "electricity passenger cars",
-        "electricity other road passenger",
-        "electricity light duty road freight",
-    ]
-
-    rail_total = [
-        "total rail",
-        "total rail passenger",
-        "total rail freight",
-    ]
-    rail_ele = [
-        "electricity rail",
-        "electricity rail passenger",
-        "electricity rail freight",
-    ]
+    ratio = ratio.rename(index={"EL": "GR"}, level=0)
+
+    mappings = {
+        "Residential": {
+            "total": ["total residential space",
+                      "total residential water",
+                      "total residential cooking",
+                      "total residential",
+                      "derived heat residential",
+                      "thermal uses residential",],
+            "elec": ["electricity residential space",
+                     "electricity residential water",
+                     "electricity residential cooking",
+                     "electricity residential",]},
+        "Services": {
+            "total": ["total services space",
+                      "total services water",
+                      "total services cooking",
+                      "total services",
+                      "derived heat services",
+                      "thermal uses services",],
+            "elec": ["electricity services space",
+                     "electricity services water",
+                     "electricity services cooking",
+                     "electricity services",]},
+        "Agriculture & forestry": {
+            "total": ["total agriculture heat",
+                      "total agriculture machinery",
+                      "total agriculture",],
+            "elec": ["total agriculture electricity",]},
+        "Road": {
+            "total": ["total road",
+                      "total passenger cars",
+                      "total other road passenger",
+                      "total light duty road freight",],
+            "elec": ["electricity road",
+                     "electricity passenger cars",
+                     "electricity other road passenger",
+                     "electricity light duty road freight",]},
+        "Rail": {
+            "total": ["total rail",
+                      "total rail passenger",
+                      "total rail freight",],
+            "elec": ["electricity rail",
+                     "electricity rail passenger",
+                     "electricity rail freight",]},
+    }
 
     avia_inter = [
-        'total aviation passenger',
-        'total aviation freight',
-        'total international aviation passenger',
-        'total international aviation freight',
-        'total international aviation'
+    'total aviation passenger',
+    'total aviation freight',
+    'total international aviation passenger',
+    'total international aviation freight',
+    'total international aviation'
     ]
     avia_domestic = [
         'total domestic aviation passenger',
@@ -906,30 +833,14 @@ def rescale(idees_countries, energy, eurostat):
     navigation = [
         "total domestic navigation",
     ]
-    
-    idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
 
     for country in idees_countries:
-        res = ratio.loc[(country, slice(None), 'Residential')]
-        energy.loc[country, residential_total] *= res[['total']].iloc[0,0]
-        energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
-
-        ser = ratio.loc[(country, slice(None), 'Services')]
-        energy.loc[country, service_total] *= ser[['total']].iloc[0,0]
-        energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
-
-        agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
-        energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
-        energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
-
-        road = ratio.loc[(country, slice(None), 'Road')]
-        energy.loc[country, road_total] *= road[['total']].iloc[0,0]
-        energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
-
-        rail = ratio.loc[(country, slice(None), 'Rail')]
-        energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
-        energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
+        for sector, mapping in mappings.items():
+            sector_ratio = ratio.loc[(country, slice(None), sector)]
 
+            energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
+            energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
+        
         avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
         avi_i = ratio.loc[(country, 'International aviation', slice(None))]
         energy.loc[country, avia_inter] *= avi_i[['total']].iloc[0,0]
@@ -958,9 +869,8 @@ def rescale(idees_countries, energy, eurostat):
     idees_countries = pd.Index(countries).intersection(eu28)
 
     data_year = params["energy_totals_year"]
-    report_year = snakemake.params.energy["eurostat_report_year"]
     input_eurostat = snakemake.input.eurostat
-    eurostat = build_eurostat(input_eurostat, countries, report_year, data_year)
+    eurostat = build_eurostat(input_eurostat, countries, data_year)
     swiss = build_swiss(data_year)
     # data from idees only exists for 2015
     if data_year > 2015:
@@ -984,10 +894,10 @@ def rescale(idees_countries, energy, eurostat):
     emissions_scope = snakemake.params.energy["emissions"]
     eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
     eurostat_co2 = build_eurostat_co2(
-        input_eurostat, countries, report_year, base_year_emissions
+        input_eurostat, countries, base_year_emissions
     )
 
-    co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year)
+    co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
     co2.to_csv(snakemake.output.co2_name)
 
     transport = build_transport_data(countries, population, idees)
diff --git a/scripts/retrieve_sector_databundle.py b/scripts/retrieve_sector_databundle.py
index 3b825da29..a075119ec 100644
--- a/scripts/retrieve_sector_databundle.py
+++ b/scripts/retrieve_sector_databundle.py
@@ -8,6 +8,7 @@
 
 import logging
 import tarfile
+import zipfile
 from pathlib import Path
 
 from _helpers import (
@@ -47,3 +48,16 @@
     tarball_fn.unlink()
 
     logger.info(f"Databundle available in '{to_fn}'.")
+
+    url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
+    tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
+    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
+
+    logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
+    progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
+
+    logger.info("Extracting Eurostat data.")
+    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
+        zip_ref.extractall(to_fn)
+
+    logger.info(f"Eurostat data available in '{to_fn}'.")
\ No newline at end of file

From 9182d6d667df59f435cd7d5d9c9f651665cd3df8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 12:14:41 +0000
Subject: [PATCH 04/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 rules/common.smk                      |   1 +
 scripts/build_energy_totals.py        | 197 ++++++++++++++++----------
 scripts/retrieve_sector_databundle.py |   8 +-
 3 files changed, 127 insertions(+), 79 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index 8e0e1e662..2b8495e1c 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -129,6 +129,7 @@ def has_internet_access(url="www.zenodo.org") -> bool:
     finally:
         conn.close()
 
+
 def solved_previous_horizon(w):
     planning_horizons = config_provider("scenario", "planning_horizons")(w)
     i = planning_horizons.index(int(w.planning_horizons))
diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index 24e414519..d6c63f004 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -123,58 +123,76 @@ def build_eurostat(input_eurostat, countries, year):
     """
     # read in every country file in countries
     eurostat = pd.DataFrame()
-    countries = [country if country != 'GB' else 'UK' for country in countries]
-    countries = [country if country != 'GR' else 'EL' for country in countries]
+    countries = [country if country != "GB" else "UK" for country in countries]
+    countries = [country if country != "GR" else "EL" for country in countries]
     for country in countries:
         filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
         if os.path.exists(input_eurostat + filename):
             df = pd.read_excel(
                 input_eurostat + filename,
-                engine='pyxlsb',
+                engine="pyxlsb",
                 sheet_name=str(year),
                 skiprows=4,
-                index_col=list(range(4)))
+                index_col=list(range(4)),
+            )
             # replace entry 'Z' with 0
-            df.replace('Z', 0, inplace=True)
+            df.replace("Z", 0, inplace=True)
             # write 'International aviation' to the 2nd level of the multiindex
-            index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
-            new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
+            index_number = (
+                df.index.get_level_values(1) == "International aviation"
+            ).argmax()
+            new_index = (
+                "-",
+                "International aviation",
+                "International aviation",
+                "ktoe",
+            )
             modified_index = list(df.index)
             modified_index[index_number] = new_index
             df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
             # drop the annoying subhead line
             df.drop(df[df[year] == year].index, inplace=True)
             # replace 'Z' with 0
-            df = df.replace('Z', 0)
+            df = df.replace("Z", 0)
             # add country to the multiindex
             new_tuple = [(country, *idx) for idx in df.index]
-            new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
+            new_mindex = pd.MultiIndex.from_tuples(
+                new_tuple, names=["country", None, "name", None, "unit"]
+            )
             df.index = new_mindex
             # make numeric values where possible
-            df = df.apply(pd.to_numeric, errors='coerce')
+            df = df.apply(pd.to_numeric, errors="coerce")
             # drop non-numeric columns
             non_numeric_cols = df.columns[df.dtypes != float]
             df.drop(non_numeric_cols, axis=1, inplace=True)
             # concatenate the dataframes
             eurostat = pd.concat([eurostat, df], axis=0)
-    
+
     eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
     # Renaming some indices
     rename = {
-        'Households': 'Residential',
-        'Commercial & public services': 'Services',
-        'Domestic navigation': 'Domestic Navigation'
+        "Households": "Residential",
+        "Commercial & public services": "Services",
+        "Domestic navigation": "Domestic Navigation",
     }
     for name, rename in rename.items():
         eurostat.index = eurostat.index.set_levels(
-        eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
-        level=3)
-    new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
+            eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
+            level=3,
+        )
+    new_index = eurostat.index.set_levels(
+        eurostat.index.levels[2].where(
+            eurostat.index.levels[2] != "International maritime bunkers", "Bunkers"
+        ),
+        level=2,
+    )
     eurostat.index = new_index
 
-    eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
-    eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
-    
+    eurostat.rename(columns={"Total": "Total all products"}, inplace=True)
+    eurostat.index = eurostat.index.set_levels(
+        eurostat.index.levels[0].where(eurostat.index.levels[0] != "UK", "GB"), level=0
+    )
+
     df = eurostat * 11.63 / 1e3
 
     return df
@@ -766,66 +784,95 @@ def rescale(idees_countries, energy, eurostat):
     missing data: ['passenger car efficiency', 'passenger cars']
     """
     # read in the eurostat data for 2015
-    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
+    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[
+        ["Total all products", "Electricity"]
+    ]
     eurostat_year = eurostat[["Total all products", "Electricity"]]
     # calculate the ratio of the two data sets
     ratio = eurostat_year / eurostat_2015
-    ratio = ratio.droplevel([1,4])
-    ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
+    ratio = ratio.droplevel([1, 4])
+    ratio.rename(
+        columns={"Total all products": "total", "Electricity": "ele"}, inplace=True
+    )
     ratio = ratio.rename(index={"EL": "GR"}, level=0)
 
     mappings = {
         "Residential": {
-            "total": ["total residential space",
-                      "total residential water",
-                      "total residential cooking",
-                      "total residential",
-                      "derived heat residential",
-                      "thermal uses residential",],
-            "elec": ["electricity residential space",
-                     "electricity residential water",
-                     "electricity residential cooking",
-                     "electricity residential",]},
+            "total": [
+                "total residential space",
+                "total residential water",
+                "total residential cooking",
+                "total residential",
+                "derived heat residential",
+                "thermal uses residential",
+            ],
+            "elec": [
+                "electricity residential space",
+                "electricity residential water",
+                "electricity residential cooking",
+                "electricity residential",
+            ],
+        },
         "Services": {
-            "total": ["total services space",
-                      "total services water",
-                      "total services cooking",
-                      "total services",
-                      "derived heat services",
-                      "thermal uses services",],
-            "elec": ["electricity services space",
-                     "electricity services water",
-                     "electricity services cooking",
-                     "electricity services",]},
+            "total": [
+                "total services space",
+                "total services water",
+                "total services cooking",
+                "total services",
+                "derived heat services",
+                "thermal uses services",
+            ],
+            "elec": [
+                "electricity services space",
+                "electricity services water",
+                "electricity services cooking",
+                "electricity services",
+            ],
+        },
         "Agriculture & forestry": {
-            "total": ["total agriculture heat",
-                      "total agriculture machinery",
-                      "total agriculture",],
-            "elec": ["total agriculture electricity",]},
+            "total": [
+                "total agriculture heat",
+                "total agriculture machinery",
+                "total agriculture",
+            ],
+            "elec": [
+                "total agriculture electricity",
+            ],
+        },
         "Road": {
-            "total": ["total road",
-                      "total passenger cars",
-                      "total other road passenger",
-                      "total light duty road freight",],
-            "elec": ["electricity road",
-                     "electricity passenger cars",
-                     "electricity other road passenger",
-                     "electricity light duty road freight",]},
+            "total": [
+                "total road",
+                "total passenger cars",
+                "total other road passenger",
+                "total light duty road freight",
+            ],
+            "elec": [
+                "electricity road",
+                "electricity passenger cars",
+                "electricity other road passenger",
+                "electricity light duty road freight",
+            ],
+        },
         "Rail": {
-            "total": ["total rail",
-                      "total rail passenger",
-                      "total rail freight",],
-            "elec": ["electricity rail",
-                     "electricity rail passenger",
-                     "electricity rail freight",]},
+            "total": [
+                "total rail",
+                "total rail passenger",
+                "total rail freight",
+            ],
+            "elec": [
+                "electricity rail",
+                "electricity rail passenger",
+                "electricity rail freight",
+            ],
+        },
     }
 
     avia_inter = [
-    'total aviation passenger',
-    'total aviation freight',
-    'total international aviation passenger',
-    'total international aviation freight',
-    'total international aviation'
+        "total aviation passenger",
+        "total aviation freight",
+        "total international aviation passenger",
+        "total international aviation freight",
+        "total international aviation",
     ]
     avia_domestic = [
         "total domestic aviation passenger",
@@ -840,13 +887,13 @@ def rescale(idees_countries, energy, eurostat):
         for sector, mapping in mappings.items():
             sector_ratio = ratio.loc[(country, slice(None), sector)]
 
-            energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
-            energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
-        
-        avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
-        avi_i = ratio.loc[(country, 'International aviation', slice(None))]
-        energy.loc[country, avia_inter] *= avi_i[['total']].iloc[0,0]
-        energy.loc[country, avia_domestic] *= avi_d[['total']].iloc[0,0]
+            energy.loc[country, mapping["total"]] *= sector_ratio[["total"]].iloc[0, 0]
+            energy.loc[country, mapping["elec"]] *= sector_ratio[["ele"]].iloc[0, 0]
+
+        avi_d = ratio.loc[(country, slice(None), "Domestic aviation")]
+        avi_i = ratio.loc[(country, "International aviation", slice(None))]
+        energy.loc[country, avia_inter] *= avi_i[["total"]].iloc[0, 0]
+        energy.loc[country, avia_domestic] *= avi_d[["total"]].iloc[0, 0]
 
         nav = ratio.loc[(country, slice(None), "Domestic Navigation")]
         energy.loc[country, navigation] *= nav[["total"]].iloc[0, 0]
@@ -898,9 +945,7 @@ def rescale(idees_countries, energy, eurostat):
     base_year_emissions = params["base_emissions_year"]
     emissions_scope = snakemake.params.energy["emissions"]
     eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
-    eurostat_co2 = build_eurostat_co2(
-        input_eurostat, countries, base_year_emissions
-    )
+    eurostat_co2 = build_eurostat_co2(input_eurostat, countries, base_year_emissions)
 
     co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
     co2.to_csv(snakemake.output.co2_name)
diff --git a/scripts/retrieve_sector_databundle.py b/scripts/retrieve_sector_databundle.py
index a075119ec..2c9fea92b 100644
--- a/scripts/retrieve_sector_databundle.py
+++ b/scripts/retrieve_sector_databundle.py
@@ -51,13 +51,15 @@
 
     url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
     tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
-    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
+    to_fn = Path(
+        f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/"
+    )
 
     logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
     progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
 
     logger.info("Extracting Eurostat data.")
-    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
+    with zipfile.ZipFile(tarball_fn, "r") as zip_ref:
         zip_ref.extractall(to_fn)
 
-    logger.info(f"Eurostat data available in '{to_fn}'.")
\ No newline at end of file
+    logger.info(f"Eurostat data available in '{to_fn}'.")

From 5bcecc62c6abcfa78df6c2833bb22b0e40aab88a Mon Sep 17 00:00:00 2001
From: toniseibold <tseibold.tub@gmail.com>
Date: Tue, 27 Feb 2024 13:32:07 +0100
Subject: [PATCH 05/14] retrieve eurostat data is now outside of
 retrieve_sector_databundle

---
 rules/retrieve.smk                    | 13 ++++++--
 scripts/retrieve_eurostat_data.py     | 45 +++++++++++++++++++++++++++
 scripts/retrieve_sector_databundle.py | 16 +---------
 3 files changed, 56 insertions(+), 18 deletions(-)
 create mode 100644 scripts/retrieve_eurostat_data.py

diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index 8ef373d1d..9250e7455 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -142,9 +142,6 @@ if config["enable"]["retrieve"] and config["enable"].get(
         protected(
             directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
         ),
-        protected(
-            directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
-        ),
         protected(directory("data/bundle-sector/jrc-idees-2015")),
     ]
 
@@ -160,6 +157,16 @@ if config["enable"]["retrieve"] and config["enable"].get(
         script:
             "../scripts/retrieve_sector_databundle.py"
 
+    rule retrieve_eurostat_data:
+        output:
+            protected(
+            directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
+        ),
+        log:
+            "logs/retrieve_eurostat_data.log",
+        retries: 2
+        script:
+            "../scripts/retrieve_eurostat_data.py"
 
 if config["enable"]["retrieve"]:
     datafiles = [
diff --git a/scripts/retrieve_eurostat_data.py b/scripts/retrieve_eurostat_data.py
new file mode 100644
index 000000000..aedc43c90
--- /dev/null
+++ b/scripts/retrieve_eurostat_data.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2024- The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+Retrieve and extract eurostat energy balances data.
+"""
+
+
+import logging
+import zipfile
+from pathlib import Path
+
+from _helpers import (
+    configure_logging,
+    progress_retrieve,
+    set_scenario_config,
+)
+
+logger = logging.getLogger(__name__)
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("retrieve_eurostat_data")
+        rootpath = ".."
+    else:
+        rootpath = "."
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    disable_progress = snakemake.config["run"].get("disable_progressbar", False)
+    url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
+    tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
+    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
+
+    logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
+    progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
+
+    logger.info("Extracting Eurostat data.")
+    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
+        zip_ref.extractall(to_fn)
+
+    logger.info(f"Eurostat data available in '{to_fn}'.")
\ No newline at end of file
diff --git a/scripts/retrieve_sector_databundle.py b/scripts/retrieve_sector_databundle.py
index a075119ec..d311dac75 100644
--- a/scripts/retrieve_sector_databundle.py
+++ b/scripts/retrieve_sector_databundle.py
@@ -8,7 +8,6 @@
 
 import logging
 import tarfile
-import zipfile
 from pathlib import Path
 
 from _helpers import (
@@ -47,17 +46,4 @@
 
     tarball_fn.unlink()
 
-    logger.info(f"Databundle available in '{to_fn}'.")
-
-    url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
-    tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
-    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
-
-    logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
-    progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
-
-    logger.info("Extracting Eurostat data.")
-    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
-        zip_ref.extractall(to_fn)
-
-    logger.info(f"Eurostat data available in '{to_fn}'.")
\ No newline at end of file
+    logger.info(f"Databundle available in '{to_fn}'.")
\ No newline at end of file

From 8dfab454537fa3892cce1a939625e671aca3091e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 12:35:55 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 rules/retrieve.smk                    |  7 +++++--
 scripts/retrieve_eurostat_data.py     | 14 ++++++--------
 scripts/retrieve_sector_databundle.py |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index 9250e7455..cfe698fc6 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -160,14 +160,17 @@ if config["enable"]["retrieve"] and config["enable"].get(
     rule retrieve_eurostat_data:
         output:
             protected(
-            directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
-        ),
+                directory(
+                    "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
+                )
+            ),
         log:
             "logs/retrieve_eurostat_data.log",
         retries: 2
         script:
             "../scripts/retrieve_eurostat_data.py"
 
+
 if config["enable"]["retrieve"]:
     datafiles = [
         "IGGIELGN_LNGs.geojson",
diff --git a/scripts/retrieve_eurostat_data.py b/scripts/retrieve_eurostat_data.py
index aedc43c90..daee4fc45 100644
--- a/scripts/retrieve_eurostat_data.py
+++ b/scripts/retrieve_eurostat_data.py
@@ -11,11 +11,7 @@
 import zipfile
 from pathlib import Path
 
-from _helpers import (
-    configure_logging,
-    progress_retrieve,
-    set_scenario_config,
-)
+from _helpers import configure_logging, progress_retrieve, set_scenario_config
 
 logger = logging.getLogger(__name__)
 
@@ -33,13 +29,15 @@
     disable_progress = snakemake.config["run"].get("disable_progressbar", False)
     url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
     tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
-    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
+    to_fn = Path(
+        f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/"
+    )
 
     logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
     progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
 
     logger.info("Extracting Eurostat data.")
-    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
+    with zipfile.ZipFile(tarball_fn, "r") as zip_ref:
         zip_ref.extractall(to_fn)
 
-    logger.info(f"Eurostat data available in '{to_fn}'.")
\ No newline at end of file
+    logger.info(f"Eurostat data available in '{to_fn}'.")
diff --git a/scripts/retrieve_sector_databundle.py b/scripts/retrieve_sector_databundle.py
index d311dac75..3b825da29 100644
--- a/scripts/retrieve_sector_databundle.py
+++ b/scripts/retrieve_sector_databundle.py
@@ -46,4 +46,4 @@
 
     tarball_fn.unlink()
 
-    logger.info(f"Databundle available in '{to_fn}'.")
\ No newline at end of file
+    logger.info(f"Databundle available in '{to_fn}'.")

From bd0880c02255e6214ba94a4b2ba2b8da7519c75e Mon Sep 17 00:00:00 2001
From: toniseibold <tseibold.tub@gmail.com>
Date: Tue, 27 Feb 2024 14:32:09 +0100
Subject: [PATCH 07/14] correcting misleading comment

---
 scripts/build_energy_totals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index d6c63f004..c114b7072 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -922,7 +922,7 @@ def rescale(idees_countries, energy, eurostat):
     input_eurostat = snakemake.input.eurostat
     eurostat = build_eurostat(input_eurostat, countries, data_year)
     swiss = build_swiss(data_year)
-    # data from idees only exists for 2015
+    # data from idees only exists from 2000-2015
     if data_year > 2015:
         # read in latest data and rescale later
         idees = build_idees(idees_countries, 2015)

From 41f23f9589669147f5dfef96e42fe68bceabd595 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 18:40:06 +0100
Subject: [PATCH 08/14] change eurostat year in
 build_industrial_production_per_country

---
 rules/build_sector.smk                        |  2 +-
 ...build_industrial_production_per_country.py | 50 +++++++------------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 6c542f4e7..5e19a4bb8 100644
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -468,7 +468,7 @@ rule build_industrial_production_per_country:
     input:
         ammonia_production=resources("ammonia_production.csv"),
         jrc="data/bundle-sector/jrc-idees-2015",
-        eurostat="data/bundle-sector/eurostat-energy_balances-may_2018_edition",
+        eurostat="data/eurostat/eurostat-energy_balances-april_2023_edition",
     output:
         industrial_production_per_country=resources(
             "industrial_production_per_country.csv"
diff --git a/scripts/build_industrial_production_per_country.py b/scripts/build_industrial_production_per_country.py
index 2ad37d3f9..5c14b065e 100644
--- a/scripts/build_industrial_production_per_country.py
+++ b/scripts/build_industrial_production_per_country.py
@@ -97,33 +97,18 @@
     "Other Industrial Sectors": "Physical output (index)",
 }
 
-eb_names = {
-    "NO": "Norway",
-    "AL": "Albania",
-    "BA": "Bosnia and Herzegovina",
-    "MK": "FYR of Macedonia",
-    "GE": "Georgia",
-    "IS": "Iceland",
-    "KO": "Kosovo",
-    "MD": "Moldova",
-    "ME": "Montenegro",
-    "RS": "Serbia",
-    "UA": "Ukraine",
-    "TR": "Turkey",
-}
-
 eb_sectors = {
-    "Iron & steel industry": "Iron and steel",
-    "Chemical and Petrochemical industry": "Chemicals Industry",
-    "Non-ferrous metal industry": "Non-metallic mineral products",
-    "Paper, Pulp and Print": "Pulp, paper and printing",
-    "Food and Tabacco": "Food, beverages and tobacco",
-    "Non-metallic Minerals (Glass, pottery & building mat. Industry)": "Non Ferrous Metals",
-    "Transport Equipment": "Transport Equipment",
+    "Iron & steel": "Iron and steel",
+    "Chemical & petrochemical": "Chemicals Industry",
+    "Non-ferrous metals": "Non-metallic mineral products",
+    "Paper, pulp & printing": "Pulp, paper and printing",
+    "Food, beverages & tobacco": "Food, beverages and tobacco",
+    "Non-metallic minerals": "Non Ferrous Metals",
+    "Transport equipment": "Transport Equipment",
     "Machinery": "Machinery Equipment",
-    "Textile and Leather": "Textiles and leather",
-    "Wood and Wood Products": "Wood and wood products",
-    "Non-specified (Industry)": "Other Industrial Sectors",
+    "Textile & leather": "Textiles and leather",
+    "Wood & wood products": "Wood and wood products",
+    "Not elsewhere specified (industry)": "Other Industrial Sectors",
 }
 
 # TODO: this should go in a csv in `data`
@@ -160,12 +145,15 @@ def get_energy_ratio(country, eurostat_dir, jrc_dir, year):
         e_country = e_switzerland * tj_to_ktoe
     else:
         # estimate physical output, energy consumption in the sector and country
-        fn = f"{eurostat_dir}/{eb_names[country]}.XLSX"
-        with mute_print():
-            df = pd.read_excel(
-                fn, sheet_name="2016", index_col=2, header=0, skiprows=1
-            ).squeeze("columns")
-        e_country = df.loc[eb_sectors.keys(), "Total all products"].rename(eb_sectors)
+        fn = f"{eurostat_dir}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
+        df = pd.read_excel(
+            fn,
+            sheet_name=str(min(2021, year)),
+            index_col=2,
+            header=0,
+            skiprows=4,
+        )
+        e_country = df.loc[eb_sectors.keys(), "Total"].rename(eb_sectors)
 
     fn = f"{jrc_dir}/JRC-IDEES-2015_Industry_EU28.xlsx"
 

From 5b513f81db7b52d79eb4f316df08d7bbe232ec9c Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 18:42:15 +0100
Subject: [PATCH 09/14] move eurostat into data/eurostat subdirectory from
 sector-bundle

---
 doc/configtables/energy.csv       |  3 ---
 rules/build_sector.smk            |  4 ++--
 rules/postprocess.smk             |  2 +-
 rules/retrieve.smk                | 18 ++----------------
 scripts/retrieve_eurostat_data.py |  4 ++--
 5 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/doc/configtables/energy.csv b/doc/configtables/energy.csv
index 8718d75ed..3d13b9c38 100644
--- a/doc/configtables/energy.csv
+++ b/doc/configtables/energy.csv
@@ -1,7 +1,4 @@
 ,Unit,Values,Description
 energy_totals_year ,--,"{1990,1995,2000,2005,2010,2011,…} ",The year for the sector energy use. The year must be avaliable in the Eurostat report
 base_emissions_year ,--,"YYYY; e.g. 1990","The base year for the sector emissions. See `European Environment Agency (EEA) <https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16>`_."
-
-eurostat_report_year ,--,"{2016,2017,2018}","The publication year of the Eurostat report. 2016 includes Bosnia and Herzegovina, 2017 does not"
-
 emissions ,--,"{CO2, All greenhouse gases - (CO2 equivalent)}","Specify which sectoral emissions are taken into account. Data derived from EEA. Currently only CO2 is implemented."
diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 5e19a4bb8..2ca0d9a42 100644
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -270,7 +270,7 @@ rule build_energy_totals:
         swiss="data/switzerland-new_format-all_years.csv",
         idees="data/bundle-sector/jrc-idees-2015",
         district_heat_share="data/district_heat_share.csv",
-        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
+        eurostat="data/eurostat/eurostat-energy_balances-april_2023_edition",
     output:
         energy_name=resources("energy_totals.csv"),
         co2_name=resources("co2_totals.csv"),
@@ -865,7 +865,7 @@ rule prepare_sector_network:
         ),
         network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
         energy_totals_name=resources("energy_totals.csv"),
-        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
+        eurostat="data/eurostat/eurostat-energy_balances-april_2023_edition",
         pop_weighted_energy_totals=resources(
             "pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
         ),
diff --git a/rules/postprocess.smk b/rules/postprocess.smk
index 5f93540ff..ecfc5b743 100644
--- a/rules/postprocess.smk
+++ b/rules/postprocess.smk
@@ -247,7 +247,7 @@ rule plot_summary:
         costs=RESULTS + "csvs/costs.csv",
         energy=RESULTS + "csvs/energy.csv",
         balances=RESULTS + "csvs/supply_energy.csv",
-        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
+        eurostat="data/eurostat/eurostat-energy_balances-april_2023_edition",
         co2="data/bundle-sector/eea/UNFCCC_v23.csv",
     output:
         costs=RESULTS + "graphs/costs.pdf",
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index cfe698fc6..b516a0cdc 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -135,20 +135,10 @@ if config["enable"]["retrieve"] and config["enable"].get(
         "h2_salt_caverns_GWh_per_sqkm.geojson",
     ]
 
-    datafolders = [
-        protected(
-            directory("data/bundle-sector/eurostat-energy_balances-june_2016_edition")
-        ),
-        protected(
-            directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
-        ),
-        protected(directory("data/bundle-sector/jrc-idees-2015")),
-    ]
-
     rule retrieve_sector_databundle:
         output:
             protected(expand("data/bundle-sector/{files}", files=datafiles)),
-            *datafolders,
+            protected(directory("data/bundle-sector/jrc-idees-2015")),
         log:
             "logs/retrieve_sector_databundle.log",
         retries: 2
@@ -159,11 +149,7 @@ if config["enable"]["retrieve"] and config["enable"].get(
 
     rule retrieve_eurostat_data:
         output:
-            protected(
-                directory(
-                    "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
-                )
-            ),
+            directory("data/eurostat/eurostat-energy_balances-april_2023_edition"),
         log:
             "logs/retrieve_eurostat_data.log",
         retries: 2
diff --git a/scripts/retrieve_eurostat_data.py b/scripts/retrieve_eurostat_data.py
index daee4fc45..4b4cea4ac 100644
--- a/scripts/retrieve_eurostat_data.py
+++ b/scripts/retrieve_eurostat_data.py
@@ -28,9 +28,9 @@
 
     disable_progress = snakemake.config["run"].get("disable_progressbar", False)
     url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
-    tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
+    tarball_fn = Path(f"{rootpath}/data/eurostat/eurostat_2023.zip")
     to_fn = Path(
-        f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/"
+        f"{rootpath}/data/eurostat/eurostat-energy_balances-april_2023_edition/"
     )
 
     logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")

From bf60da973b0370dcce3b5b7d7fd166cd73839351 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 18:43:24 +0100
Subject: [PATCH 10/14] build_energy_totals: revision of eurostat report
 upgrade

---
 scripts/build_energy_totals.py | 202 +++++++++++----------------------
 1 file changed, 64 insertions(+), 138 deletions(-)

diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index c114b7072..1ffc4ae2c 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -37,54 +37,6 @@ def reverse(dictionary):
     return {v: k for k, v in dictionary.items()}
 
 
-eurostat_codes = {
-    "EU28": "EU",
-    "EA19": "EA",
-    "Belgium": "BE",
-    "Bulgaria": "BG",
-    "Czech Republic": "CZ",
-    "Denmark": "DK",
-    "Germany": "DE",
-    "Estonia": "EE",
-    "Ireland": "IE",
-    "Greece": "GR",
-    "Spain": "ES",
-    "France": "FR",
-    "Croatia": "HR",
-    "Italy": "IT",
-    "Cyprus": "CY",
-    "Latvia": "LV",
-    "Lithuania": "LT",
-    "Luxembourg": "LU",
-    "Hungary": "HU",
-    "Malta": "MA",
-    "Netherlands": "NL",
-    "Austria": "AT",
-    "Poland": "PL",
-    "Portugal": "PT",
-    "Romania": "RO",
-    "Slovenia": "SI",
-    "Slovakia": "SK",
-    "Finland": "FI",
-    "Sweden": "SE",
-    "United Kingdom": "GB",
-    "Iceland": "IS",
-    "Norway": "NO",
-    "Montenegro": "ME",
-    "FYR of Macedonia": "MK",
-    "Albania": "AL",
-    "Serbia": "RS",
-    "Turkey": "TU",
-    "Bosnia and Herzegovina": "BA",
-    "Kosovo\n(UNSCR 1244/99)": "KO",  # 2017 version
-    # 2016 version
-    "Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
-    "Moldova": "MO",
-    "Ukraine": "UK",
-    "Switzerland": "CH",
-}
-
-
 idees_rename = {"GR": "EL", "GB": "UK"}
 
 eu28 = cc.EU28as("ISO2").ISO2.tolist()
@@ -121,79 +73,54 @@ def build_eurostat(input_eurostat, countries, year):
     """
     Return multi-index for all countries' energy data in TWh/a.
     """
-    # read in every country file in countries
-    eurostat = pd.DataFrame()
-    countries = [country if country != "GB" else "UK" for country in countries]
-    countries = [country if country != "GR" else "EL" for country in countries]
+    df = {}
+    countries = {idees_rename.get(country, country) for country in countries} - {"CH"}
     for country in countries:
-        filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
-        if os.path.exists(input_eurostat + filename):
-            df = pd.read_excel(
-                input_eurostat + filename,
-                engine="pyxlsb",
-                sheet_name=str(year),
-                skiprows=4,
-                index_col=list(range(4)),
-            )
-            # replace entry 'Z' with 0
-            df.replace("Z", 0, inplace=True)
-            # write 'International aviation' to the 2nd level of the multiindex
-            index_number = (
-                df.index.get_level_values(1) == "International aviation"
-            ).argmax()
-            new_index = (
-                "-",
-                "International aviation",
-                "International aviation",
-                "ktoe",
-            )
-            modified_index = list(df.index)
-            modified_index[index_number] = new_index
-            df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
-            # drop the annoying subhead line
-            df.drop(df[df[year] == year].index, inplace=True)
-            # replace 'Z' with 0
-            df = df.replace("Z", 0)
-            # add country to the multiindex
-            new_tuple = [(country, *idx) for idx in df.index]
-            new_mindex = pd.MultiIndex.from_tuples(
-                new_tuple, names=["country", None, "name", None, "unit"]
-            )
-            df.index = new_mindex
-            # make numeric values where possible
-            df = df.apply(pd.to_numeric, errors="coerce")
-            # drop non-numeric columns
-            non_numeric_cols = df.columns[df.dtypes != float]
-            df.drop(non_numeric_cols, axis=1, inplace=True)
-            # concatenate the dataframes
-            eurostat = pd.concat([eurostat, df], axis=0)
-
-    eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
+        filename = (
+            f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
+        )
+        sheet = pd.read_excel(
+            filename,
+            engine="pyxlsb",
+            sheet_name=str(year),
+            skiprows=4,
+            index_col=list(range(4)),
+        )
+        df[country] = sheet
+    df = pd.concat(df, axis=0)
+
+    # drop columns with all NaNs
+    unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")]
+    df.drop(unnamed_cols, axis=1, inplace=True)
+    df.drop(year, axis=1, inplace=True)
+
+    # make numeric values where possible
+    df.replace("Z", 0, inplace=True)
+    df = df.apply(pd.to_numeric, errors="coerce")
+    df = df.select_dtypes(include=[np.number])
+
+    # write 'International aviation' to the 2nd level of the multiindex
+    int_avia = df.index.get_level_values(2) == "International aviation"
+    temp = df.loc[int_avia]
+    temp.index = pd.MultiIndex.from_frame(
+        temp.index.to_frame().fillna("International aviation")
+    )
+    df = pd.concat([temp, df.loc[~int_avia]])
+
     # Renaming some indices
-    rename = {
+    index_rename = {
         "Households": "Residential",
         "Commercial & public services": "Services",
         "Domestic navigation": "Domestic Navigation",
+        "International maritime bunkers": "Bunkers",
     }
-    for name, rename in rename.items():
-        eurostat.index = eurostat.index.set_levels(
-            eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
-            level=3,
-        )
-    new_index = eurostat.index.set_levels(
-        eurostat.index.levels[2].where(
-            eurostat.index.levels[2] != "International maritime bunkers", "Bunkers"
-        ),
-        level=2,
-    )
-    eurostat.index = new_index
-
-    eurostat.rename(columns={"Total": "Total all products"}, inplace=True)
-    eurostat.index = eurostat.index.set_levels(
-        eurostat.index.levels[0].where(eurostat.index.levels[0] != "UK", "GB"), level=0
-    )
+    columns_rename = {"Total": "Total all products", "UK": "GB"}
+    df.rename(index=index_rename, columns=columns_rename, inplace=True)
+    df.sort_index(inplace=True)
+    df.index.names = [None] * len(df.index.names)
 
-    df = eurostat * 11.63 / 1e3
+    # convert to TWh/a from ktoe/a
+    df *= 11.63 / 1e3
 
     return df
 
@@ -776,25 +703,25 @@ def build_transport_data(countries, population, idees):
     return transport_data
 
 
-def rescale(idees_countries, energy, eurostat):
+def rescale_idees_from_eurostat(
+    idees_countries, energy, eurostat, input_eurostat, countries
+):
     """
     Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
     data and the 2015 eurostat data.
 
     missing data: ['passenger car efficiency', 'passenger cars']
     """
+    main_cols = ["Total all products", "Electricity"]
     # read in the eurostat data for 2015
-    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[
-        ["Total all products", "Electricity"]
-    ]
-    eurostat_year = eurostat[["Total all products", "Electricity"]]
+    eurostat_2015 = build_eurostat(input_eurostat, countries, 2015)[main_cols]
+    eurostat_year = eurostat[main_cols]
     # calculate the ratio of the two data sets
     ratio = eurostat_year / eurostat_2015
     ratio = ratio.droplevel([1, 4])
-    ratio.rename(
-        columns={"Total all products": "total", "Electricity": "ele"}, inplace=True
-    )
-    ratio = ratio.rename(index={"EL": "GR"}, level=0)
+    cols_rename = {"Total all products": "total", "Electricity": "ele"}
+    index_rename = {v: k for k, v in idees_rename.items()}
+    ratio.rename(columns=cols_rename, index=index_rename, inplace=True)
 
     mappings = {
         "Residential": {
@@ -887,16 +814,16 @@ def rescale(idees_countries, energy, eurostat):
         for sector, mapping in mappings.items():
             sector_ratio = ratio.loc[(country, slice(None), sector)]
 
-            energy.loc[country, mapping["total"]] *= sector_ratio[["total"]].iloc[0, 0]
-            energy.loc[country, mapping["elec"]] *= sector_ratio[["ele"]].iloc[0, 0]
+            energy.loc[country, mapping["total"]] *= sector_ratio["total"].iloc[0]
+            energy.loc[country, mapping["elec"]] *= sector_ratio["ele"].iloc[0]
 
-        avi_d = ratio.loc[(country, slice(None), "Domestic aviation")]
-        avi_i = ratio.loc[(country, "International aviation", slice(None))]
-        energy.loc[country, avia_inter] *= avi_i[["total"]].iloc[0, 0]
-        energy.loc[country, avia_domestic] *= avi_d[["total"]].iloc[0, 0]
+        avi_d = ratio.loc[(country, slice(None), "Domestic aviation"), "total"]
+        avi_i = ratio.loc[(country, "International aviation", slice(None)), "total"]
+        energy.loc[country, avia_inter] *= avi_i.iloc[0]
+        energy.loc[country, avia_domestic] *= avi_d.iloc[0]
 
-        nav = ratio.loc[(country, slice(None), "Domestic Navigation")]
-        energy.loc[country, navigation] *= nav[["total"]].iloc[0, 0]
+        nav = ratio.loc[(country, slice(None), "Domestic Navigation"), "total"]
+        energy.loc[country, navigation] *= nav.iloc[0]
 
     return energy
 
@@ -922,17 +849,16 @@ def rescale(idees_countries, energy, eurostat):
     input_eurostat = snakemake.input.eurostat
     eurostat = build_eurostat(input_eurostat, countries, data_year)
     swiss = build_swiss(data_year)
-    # data from idees only exists from 2000-2015
-    if data_year > 2015:
-        # read in latest data and rescale later
-        idees = build_idees(idees_countries, 2015)
-    else:
-        idees = build_idees(idees_countries, data_year)
+    # data from idees only exists from 2000-2015. read in latest data and rescale later
+    idees = build_idees(idees_countries, min(2015, data_year))
 
     energy = build_energy_totals(countries, eurostat, swiss, idees)
 
     if data_year > 2015:
-        energy = rescale(idees_countries, energy, eurostat)
+        logger.info("Data year is after 2015. Rescaling IDEES data based on eurostat.")
+        energy = rescale_idees_from_eurostat(
+            idees_countries, energy, eurostat, input_eurostat, countries
+        )
 
     energy.to_csv(snakemake.output.energy_name)
 

From c13e0b83cff1f8a7ac2d6c9cd07ba01bd2b235f8 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 18:55:59 +0100
Subject: [PATCH 11/14] remove remaining references of eurostat report year

---
 rules/build_sector.smk            |  1 -
 rules/postprocess.smk             |  1 -
 scripts/plot_summary.py           |  2 --
 scripts/prepare_sector_network.py | 14 ++++----------
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 2ca0d9a42..9147a6231 100644
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -834,7 +834,6 @@ rule prepare_sector_network:
         countries=config_provider("countries"),
         adjustments=config_provider("adjustments", "sector"),
         emissions_scope=config_provider("energy", "emissions"),
-        eurostat_report_year=config_provider("energy", "eurostat_report_year"),
         RDIR=RDIR,
     input:
         unpack(input_profile_offwind),
diff --git a/rules/postprocess.smk b/rules/postprocess.smk
index ecfc5b743..1b188829f 100644
--- a/rules/postprocess.smk
+++ b/rules/postprocess.smk
@@ -237,7 +237,6 @@ rule plot_summary:
         countries=config_provider("countries"),
         planning_horizons=config_provider("scenario", "planning_horizons"),
         emissions_scope=config_provider("energy", "emissions"),
-        eurostat_report_year=config_provider("energy", "eurostat_report_year"),
         plotting=config_provider("plotting"),
         foresight=config_provider("foresight"),
         co2_budget=config_provider("co2_budget"),
diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py
index c2fd7e04d..bfe9995fe 100644
--- a/scripts/plot_summary.py
+++ b/scripts/plot_summary.py
@@ -462,7 +462,6 @@ def plot_carbon_budget_distribution(input_eurostat, options):
     plt.rcParams["ytick.labelsize"] = 20
 
     emissions_scope = snakemake.params.emissions_scope
-    report_year = snakemake.params.eurostat_report_year
     input_co2 = snakemake.input.co2
 
     # historic emissions
@@ -472,7 +471,6 @@ def plot_carbon_budget_distribution(input_eurostat, options):
         input_eurostat,
         options,
         emissions_scope,
-        report_year,
         input_co2,
         year=1990,
     )
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 9a9db36fa..8b001da37 100755
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -248,7 +248,7 @@ def get(item, investment_year=None):
 
 
 def co2_emissions_year(
-    countries, input_eurostat, options, emissions_scope, report_year, input_co2, year
+    countries, input_eurostat, options, emissions_scope, input_co2, year
 ):
     """
     Calculate CO2 emissions in one specific year (e.g. 1990 or 2018).
@@ -258,11 +258,9 @@ def co2_emissions_year(
     # TODO: read Eurostat data from year > 2014
     # this only affects the estimation of CO2 emissions for BA, RS, AL, ME, MK
     if year > 2014:
-        eurostat_co2 = build_eurostat_co2(
-            input_eurostat, countries, report_year, year=2014
-        )
+        eurostat_co2 = build_eurostat_co2(input_eurostat, countries, 2014)
     else:
-        eurostat_co2 = build_eurostat_co2(input_eurostat, countries, report_year, year)
+        eurostat_co2 = build_eurostat_co2(input_eurostat, countries, year)
 
     co2_totals = build_co2_totals(countries, eea_co2, eurostat_co2)
 
@@ -278,7 +276,7 @@ def co2_emissions_year(
 
 # TODO: move to own rule with sector-opts wildcard?
 def build_carbon_budget(
-    o, input_eurostat, fn, emissions_scope, report_year, input_co2, options
+    o, input_eurostat, fn, emissions_scope, input_co2, options
 ):
     """
     Distribute carbon budget following beta or exponential transition path.
@@ -300,7 +298,6 @@ def build_carbon_budget(
         input_eurostat,
         options,
         emissions_scope,
-        report_year,
         input_co2,
         year=1990,
     )
@@ -311,7 +308,6 @@ def build_carbon_budget(
         input_eurostat,
         options,
         emissions_scope,
-        report_year,
         input_co2,
         year=2018,
     )
@@ -3669,14 +3665,12 @@ def lossy_bidirectional_links(n, carrier, efficiencies={}):
         fn = "results/" + snakemake.params.RDIR + "/csvs/carbon_budget_distribution.csv"
         if not os.path.exists(fn):
             emissions_scope = snakemake.params.emissions_scope
-            report_year = snakemake.params.eurostat_report_year
             input_co2 = snakemake.input.co2
             build_carbon_budget(
                 co2_budget,
                 snakemake.input.eurostat,
                 fn,
                 emissions_scope,
-                report_year,
                 input_co2,
                 options,
             )

From ca91c02bf77e5d33c787d2e4b78f65e2c694dfce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 17:56:39 +0000
Subject: [PATCH 12/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/prepare_sector_network.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 8b001da37..2e8bf6fdb 100755
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -275,9 +275,7 @@ def co2_emissions_year(
 
 
 # TODO: move to own rule with sector-opts wildcard?
-def build_carbon_budget(
-    o, input_eurostat, fn, emissions_scope, input_co2, options
-):
+def build_carbon_budget(o, input_eurostat, fn, emissions_scope, input_co2, options):
     """
     Distribute carbon budget following beta or exponential transition path.
     """

From 85ceb9ad54875f84fd52ea643fd0f0f333d19ed1 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 19:03:48 +0100
Subject: [PATCH 13/14] add release notes

---
 doc/release_notes.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 8167f6dd2..de08873dc 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -9,6 +9,19 @@ Release Notes
 
 Upcoming Release
 ================
+
+* The Eurostat data was updated to the 2023 version in :mod:`build_energy_totals`.
+
+* The latest `Swiss energy totals
+  <https://www.bfe.admin.ch/bfe/de/home/versorgung/statistik-und-geodaten/energiestatistiken/energieverbrauch-nach-verwendungszweck.html/>`_
+  have been updated to the 2023 version.
+
+* The JRC-IDEES data is only available until 2015. For energy totals years (``energy: energy_totals_year``) after
+  2015, the data scaled using the ratio of Eurostat data reported for the energy
+  totals year and 2015.
+
+* The default energy totals year (``energy: energy_totals_year``) was updated to 2019.
+
 * Upgrade default techno-economic assumptions to ``technology-data`` v0.8.1.
 
 * Linearly interpolate missing investment periods in year-dependent

From da8119e47ee2e686fe063d31ac9ff3e79b2f6508 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 5 Mar 2024 19:08:22 +0100
Subject: [PATCH 14/14] reset CI data cache

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index bad6039f9..c17c0425f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -19,7 +19,7 @@ on:
   - cron: "0 5 * * TUE"
 
 env:
-  DATA_CACHE_NUMBER: 2
+  DATA_CACHE_NUMBER: 1
 
 jobs:
   build: