Skip to content

Commit

Permalink
PR fixes: better naming, better rules, more assert checks
Browse files Browse the repository at this point in the history
  • Loading branch information
irm-codebase committed Jul 4, 2024
1 parent de2ba7f commit f734975
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 178 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

### Added (models)

* **ADD** industry module and steel industry energy demand processing. NOT CONNECTED TO THE MAIN WORKFLOW. Industry sectors pending: chemical. (Fixes #308, #309, #310, #347, #345 and #346)
* **ADD** Industry module: iron and steel, "default" combined categories. NOT CONNECTED TO THE MAIN WORKFLOW. (Fixes #308, #309, #310, #347, #345 and #346)

* **ADD** Spatial resolution that aligns with the regions defined by the [e-Highway 2050 project](https://cordis.europa.eu/project/id/308908/reporting) (`ehighways`) (#370).

Expand Down
20 changes: 10 additions & 10 deletions modules/industry/config.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
industry:
inputs:
path-energy-balances: build/data/annual-energy-balances.csv
path-cat-names: config/energy-balances/energy-balance-category-names.csv
path-carrier-names: config/energy-balances/energy-balance-carrier-names.csv
path-jrc-industry-energy: build/data/jrc-idees/industry/processed-energy.nc
path-jrc-industry-production: build/data/jrc-idees/industry/processed-production.nc
input-paths:
energy-balances: build/data/annual-energy-balances.csv
cat-names: config/energy-balances/energy-balance-category-names.csv
carrier-names: config/energy-balances/energy-balance-carrier-names.csv
jrc-industry-energy: build/data/jrc-idees/industry/processed-energy.nc
jrc-industry-production: build/data/jrc-idees/industry/processed-production.nc
outputs:
placeholder-out1:
placeholder-out2:
params:
non-generic-categories: ["Iron and steel", "Chemicals Industry"]
steel-config:
recycled-steel-share: 0.5 # % of recycled scrap steel for H-DRI
generic-config:
specific-categories: ["Iron and steel", "Chemicals Industry"]
config-combined-categories:
final-energy-method: "by priority"
final-energy-carriers: ["Electricity", "Natural gas (incl. biogas)", "Diesel oil (incl. biofuels)"]
useful-demands: ["Low enthalpy heat"]
config-iron-and-steel:
recycled-steel-share: 0.5 # % of recycled scrap steel for H-DRI
84 changes: 43 additions & 41 deletions modules/industry/industry.smk
Original file line number Diff line number Diff line change
@@ -1,66 +1,68 @@
from snakemake.utils import validate

# Paths dependent on main Snakefile
MODULE_PATH = "modules/industry"
MODULE_PATH = "modules/industry" # TODO: remove if the module becomes an imported external workflow
BUILD_PATH = f"{MODULE_PATH}/build"
DATA_PATH = f"{MODULE_PATH}/raw_data"

# Paths relative to this snakefile (snakemake behaviour is inconsitent)
SCRIPT_PATH = "scripts" # scripts are called relative to this file
CONDA_PATH = "./env_industry.yaml"

configfile: "./config.yaml"
validate(config, "./schema.yaml")

# Ensure rules are defined in order.
# Otherwise commands like "rules.rulename.output" won't work!
if "Iron and steel" in config["params"]["non-generic-categories"]:
rule steel_processing:
message: "Calculate energy demand for the 'Iron and steel' sector in JRC-IDEES."
conda: CONDA_PATH
params:
steel_config = config["params"]["steel-config"]
input:
path_energy_balances = config["inputs"]["path-energy-balances"],
path_cat_names = config["inputs"]["path-cat-names"],
path_carrier_names = config["inputs"]["path-carrier-names"],
path_jrc_industry_energy = config["inputs"]["path-jrc-industry-energy"],
path_jrc_industry_production = config["inputs"]["path-jrc-industry-production"],
output:
path_output = f"{BUILD_PATH}/annual_demand_steel.nc"
script: f"{SCRIPT_PATH}/steel_processing.py"
rule iron_and_steel:
message: "Calculate energy demand for the 'Iron and steel' sector in JRC-IDEES."
conda: CONDA_PATH
params:
config = config["params"]["config-iron-and-steel"]
input:
energy_balances = config["input-paths"]["energy-balances"],
cat_names = config["input-paths"]["cat-names"],
carrier_names = config["input-paths"]["carrier-names"],
jrc_industry_energy = config["input-paths"]["jrc-industry-energy"],
jrc_industry_production = config["input-paths"]["jrc-industry-production"],
output:
path_output = f"{BUILD_PATH}/annual_demand_iron_and_steel.nc"
script: f"{SCRIPT_PATH}/steel_processing.py"

if "Chemicals Industry" in config["params"]["non-generic-categories"]:
rule chemicals_processing:
message: "."
conda: CONDA_PATH
params:
input:
output:
script: f"{SCRIPT_PATH}/chemicals_processing.py"
rule chemicals_industry:
message: "."
# conda: CONDA_PATH
params:
input:
output: f"{BUILD_PATH}/annual_demand_chemicals_industry.nc"
shell:
"touch {output}"
# script: f"{SCRIPT_PATH}/annual_demand_chemicals_industry.py"

rule generic_processing:
rule combined_categories:
message: "Calculate energy demand for all other industry sectors in JRC-IDEES."
conda: CONDA_PATH
params:
non_generic_categories = config["params"]["non-generic-categories"],
generic_config = config["params"]["generic-config"],
specific_categories = config["params"]["specific-categories"],
config = config["params"]["config-combined-categories"],
input:
path_energy_balances = config["inputs"]["path-energy-balances"],
path_cat_names = config["inputs"]["path-cat-names"],
path_carrier_names = config["inputs"]["path-carrier-names"],
path_jrc_industry_energy = config["inputs"]["path-jrc-industry-energy"],
path_jrc_industry_production = config["inputs"]["path-jrc-industry-production"],
output:
path_output = f"{BUILD_PATH}/annual_demand_generic.nc"
energy_balances = config["input-paths"]["energy-balances"],
cat_names = config["input-paths"]["cat-names"],
carrier_names = config["input-paths"]["carrier-names"],
jrc_industry_energy = config["input-paths"]["jrc-industry-energy"],
jrc_industry_production = config["input-paths"]["jrc-industry-production"],
output: f"{BUILD_PATH}/annual_demand_combined_categories.nc"
script: f"{SCRIPT_PATH}/generic_processing.py"

# rule combine_and_scale:
# message: "."
# conda: CONDA_PATH
# params:
# input:
# output:
# script:
SUFFIXES = [i.lower().replace(" ", "_") for i in config["params"]["specific-categories"]]
rule combine_and_scale:
message: "Identify the category scripts to run based on the configuration."
conda: CONDA_PATH
input:
expand("{path}/annual_demand_{sample}.nc", path=[BUILD_PATH], sample=SUFFIXES),
rules.combined_categories.output
# output: "{BUILD_PATH}/annual_demand_aggregated.nc"


# rule verify:
# message: "."
Expand Down
40 changes: 21 additions & 19 deletions modules/industry/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@ $schema: https://json-schema.org/draft/2020-12/schema
type: object
additionalProperties: false
properties:
inputs:
input-paths:
type: object
additionalProperties: false
description: Inputs are paths of prerequired files.
properties:
path-energy-balances:
energy-balances:
type: string
description: |
Annual energy balance file.
Columns [cat_code,carrier_code,unit,country,year,value].
path-cat-names:
cat-names:
type: string
description: |
Category mapping file.
Columns [cat_code,top_cat,sub_cat_contribution,sub_cat_1,sub_cat_2,jrc_idees].
path-carrier-names:
carrier-names:
type: string
description: |
Carrier mapping file.
Columns [carrier_code,carrier_name,hh_carrier_name,com_carrier_name,ind_carrier_name,oth_carrier_name].
path-jrc-industry-energy:
jrc-industry-energy:
type: string
description: |
JRC processed industry energy demand .nc file.
path-jrc-industry-production:
jrc-industry-production:
type: string
description: |
JRC processed industrial production .nc file.
Expand All @@ -38,24 +38,16 @@ properties:
additionalProperties: false
description: Parameters allow users to configure module behaviour.
properties:
non-generic-categories:
specific-categories:
type: array
description: "Specifies which JRC industry categories will be processed separately."
description: |
Specifies which JRC industry categories will be processed through category-specific rules.
Omitted categories will instead be processed through the "combined" category rule.
uniqueItems: true
items:
type: string
enum: ["Iron and steel", "Chemicals Industry"]
steel-config:
type: object
additionalProperties: false
description: "Parameters specific to the 'Iron and steel' industry category."
properties:
recycled-steel-share:
type: number
description: "Share of recycled metal in the H-DRI steel process."
minimum: 0
maximum: 1
generic-config:
config-combined-categories:
type: object
additionalProperties: false
description: "Parameters for default/generic category processing."
Expand All @@ -82,3 +74,13 @@ properties:
uniqueItems: true
items:
type: string
config-iron-and-steel:
type: object
additionalProperties: false
description: "Parameters specific to the 'Iron and steel' industry category."
properties:
recycled-steel-share:
type: number
description: "Share of recycled metal in the H-DRI steel process."
minimum: 0
maximum: 1
92 changes: 44 additions & 48 deletions modules/industry/scripts/generic_processing.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,84 @@
from typing import Optional

import pandas as pd
import xarray as xr
from utils import filling
from utils import jrc_idees_parser as jrc


def get_generic_demand(
non_generic_categories: list,
generic_config: dict,
path_energy_balances: str,
path_cat_names: str,
path_carrier_names: str,
path_jrc_industry_energy: str,
path_jrc_industry_production: str,
path_output: Optional[str] = None,
) -> xr.DataArray:
"""Processing of industry categories not selected for individual processing.
specific_categories: list,
config: dict,
energy_balances: str,
cat_names: str,
carrier_names: str,
jrc_industry_energy: str,
jrc_industry_production: str,
output_path: str,
):
"""Processing of industry categories not selected for specific processing.
Merges all energy demand into a single `generic` category using a configurable data processing pipeline.
Args:
non_generic_categories (list): categories with separate processing (will be ignored).
generic_config (dict): configuration for generic category processing.
path_energy_balances (str): country energy balances (usually from eurostat).
path_cat_names (str): eurostat category mapping file.
path_carrier_names (str): eurostat carrier name mapping file.
path_jrc_industry_energy (str): jrc country-specific industrial energy demand file.
path_jrc_industry_production (str): jrc country-specific industrial production file.
path_output (str): location of steel demand output file.
specific_categories (list): categories with separate processing (will be ignored).
config (dict): configuration for generic category processing.
energy_balances (str): country energy balances (usually from eurostat).
cat_names (str): eurostat category mapping file.
carrier_names (str): eurostat carrier name mapping file.
jrc_industry_energy (str): jrc country-specific industrial energy demand file.
jrc_industry_production (str): jrc country-specific industrial production file.
output_path (str): location of steel demand output file.
Returns:
pd.DataFrame: dataframe with industrial demand per country.
"""
# Load data
energy_balances_df = pd.read_csv(
path_energy_balances, index_col=[0, 1, 2, 3, 4]
energy_balances, index_col=[0, 1, 2, 3, 4]
).squeeze("columns")
cat_names_df = pd.read_csv(path_cat_names, header=0, index_col=0)
carrier_names_df = pd.read_csv(path_carrier_names, header=0, index_col=0)
jrc_energy = xr.open_dataset(path_jrc_industry_energy)
jrc_prod = xr.open_dataarray(path_jrc_industry_production)
cat_names_df = pd.read_csv(cat_names, header=0, index_col=0)
carrier_names_df = pd.read_csv(carrier_names, header=0, index_col=0)
jrc_energy = xr.open_dataset(jrc_industry_energy)
jrc_prod = xr.open_dataarray(jrc_industry_production)
jrc.check_units(jrc_energy, jrc_prod)

# Remove data from all specifically processed industries
cat_names_df = cat_names_df[~cat_names_df["jrc_idees"].isin(non_generic_categories)]
jrc_energy = jrc_energy.drop_sel(cat_name=non_generic_categories)
jrc_prod = jrc_prod.drop_sel(cat_name=non_generic_categories)
cat_names_df = cat_names_df[~cat_names_df["jrc_idees"].isin(specific_categories)]
jrc_energy = jrc_energy.drop_sel(cat_name=specific_categories)
jrc_prod = jrc_prod.drop_sel(cat_name=specific_categories)

# Process data:
# Extract useful dem. -> remove useful dem. from rest -> extract final dem.
selected_useful = generic_config["useful-demands"]
# Extract useful demand -> remove useful demand from rest -> extract final demand
selected_useful = config["useful-demands"]
other_useful_demand = jrc.convert_subsection_demand_to_carrier(
jrc_energy, selected_useful
)

final_method = generic_config["final-energy-method"]
final_method = config["final-energy-method"]
jrc_energy = jrc_energy.drop_sel(subsection=selected_useful)

match final_method:
case "by priority":
other_final_demand = transform_final_demand_by_priority(
jrc_energy, generic_config["final-energy-carriers"]
jrc_energy, config["final-energy-carriers"]
)
case "keep everything":
other_final_demand = jrc_energy["final"].sum(["section", "subsection"])
other_final_demand = jrc.standardize(other_final_demand, "twh")
case _:
raise ValueError(f"Unsupported final energy method: {final_method}.")

# Combine and fill missing countries
other_demand = xr.concat(
[other_useful_demand, other_final_demand], dim="carrier_name"
)

assert other_demand.sum() < jrc_energy["final"].sum(), "Potential double counting!"

other_demand = filling.fill_missing_countries_years(
energy_balances_df, cat_names_df, carrier_names_df, other_demand
)

other_demand = jrc.standardize(other_demand, "twh")

if path_output:
other_demand.to_netcdf(path_output)

return other_demand
other_demand = jrc.standardize(other_demand, "twh", "demand")
other_demand.to_netcdf(output_path)


def transform_final_demand_by_priority(
Expand Down Expand Up @@ -110,7 +106,7 @@ def transform_final_demand_by_priority(
carrier_final_dem = {}

for carrier in carrier_priority:
dem_replaced = jrc.replace_final_demand_by_carrier(carrier, jrc_energy)
dem_replaced = jrc.replace_carrier_final_demand(carrier, jrc_energy)
dem_replaced = dem_replaced.to_dataframe().dropna()
for dem_replaced_prev in carrier_final_dem.values():
dem_replaced = dem_replaced.drop(dem_replaced_prev.index, errors="ignore")
Expand All @@ -131,12 +127,12 @@ def transform_final_demand_by_priority(

if __name__ == "__main__":
get_generic_demand(
non_generic_categories=snakemake.params.non_generic_categories,
generic_config=snakemake.params.generic_config,
path_energy_balances=snakemake.input.path_energy_balances,
path_cat_names=snakemake.input.path_cat_names,
path_carrier_names=snakemake.input.path_carrier_names,
path_jrc_industry_energy=snakemake.input.path_jrc_industry_energy,
path_jrc_industry_production=snakemake.input.path_jrc_industry_production,
path_output=snakemake.output.path_output,
specific_categories=snakemake.params.specific_categories,
config=snakemake.params.config,
energy_balances=snakemake.input.energy_balances,
cat_names=snakemake.input.cat_names,
carrier_names=snakemake.input.carrier_names,
jrc_industry_energy=snakemake.input.jrc_industry_energy,
jrc_industry_production=snakemake.input.jrc_industry_production,
output_path=snakemake.output[0],
)
Loading

0 comments on commit f734975

Please sign in to comment.