diff --git a/Snakefile b/Snakefile index be2545ada..eb99437bf 100644 --- a/Snakefile +++ b/Snakefile @@ -54,6 +54,7 @@ include: "rules/build_sector.smk" include: "rules/solve_electricity.smk" include: "rules/postprocess.smk" include: "rules/validate.smk" +include: "rules/development.smk" if config["foresight"] == "overnight": diff --git a/config/config.default.yaml b/config/config.default.yaml index 9d565b633..0fd6e9869 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -74,7 +74,6 @@ enable: custom_busmap: false drop_leap_day: true - # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: 2020: 0.701 @@ -87,7 +86,8 @@ co2_budget: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [220., 300., 380., 500., 750.] + voltages: [200., 220., 300., 380., 500., 750.] + base_network: entsoegridkit gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -276,6 +276,7 @@ conventional: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines lines: types: + 200.: "Al/St 240/40 2-bundle 200.0" 220.: "Al/St 240/40 2-bundle 220.0" 300.: "Al/St 240/40 3-bundle 300.0" 380.: "Al/St 240/40 4-bundle 380.0" diff --git a/doc/configtables/electricity.csv b/doc/configtables/electricity.csv index ee733660c..9bad7bfc6 100644 --- a/doc/configtables/electricity.csv +++ b/doc/configtables/electricity.csv @@ -1,5 +1,6 @@ ,Unit,Values,Description -voltages,kV,"Any subset of {220., 300., 380.}",Voltage levels to consider +voltages,kV,"Any subset of {200., 220., 300., 380., 500., 750.}",Voltage levels to consider +base_network, --, "Any value in {'entsoegridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer." gaslimit_enable,bool,true or false,Add an overall absolute gas limit configured in ``electricity: gaslimit``. gaslimit,MWhth,float or false,Global gas usage limit co2limit_enable,bool,true or false,Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit`` in :mod:`prepare_network`. **Warning:** This option should currently only be used with electricity-only networks, not for sector-coupled networks.. diff --git a/doc/configtables/enable.csv b/doc/configtables/enable.csv index 0268319eb..5359131de 100644 --- a/doc/configtables/enable.csv +++ b/doc/configtables/enable.csv @@ -4,5 +4,5 @@ retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zen retrieve_cost_data,bool,"{true, false}","Switch to retrieve technology cost data from `technology-data repository `_." build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`." retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`." -custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``." +custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``. {base_network} is the name of the selected base_network in electricity, e.g. ``gridkit``, ``osm-prebuilt``, or ``osm-raw``." drop_leap_day,bool,"{true, false}","Switch to drop February 29 from all time-dependent data in leap years" diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 3ee042840..4df38b019 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -74,6 +74,8 @@ Upcoming Release * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files. +* Added new major feature to create the base_network from OpenStreetMap (OSM) data (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity["base_network"]``, the base network can be set to "entsoegridkit" (original default setting, deprecated soon), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes. + * Updated pre-built `weather data cutouts `__. These are now merged cutouts with solar irradiation from the new SARAH-3 dataset while taking all other @@ -89,6 +91,8 @@ Upcoming Release * In :mod:`base_network`, replace own voronoi polygon calculation function with Geopandas `gdf.voronoi_polygons` method. +* Move custom busmaps to ```data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv``` (if enabled). This allows for different busmaps depending on the base network and scenario. + PyPSA-Eur 0.11.0 (25th May 2024) ===================================== diff --git a/envs/environment.yaml b/envs/environment.yaml index 9115ccd97..98cf858d6 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -47,6 +47,7 @@ dependencies: - pyxlsb - graphviz - pre-commit +- geojson # Keep in conda environment when calling ipython - ipython diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index f35050917..1f568e99c 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -50,6 +50,19 @@ rule build_powerplants: "../scripts/build_powerplants.py" +def input_base_network(w): + base_network = config_provider("electricity", "base_network")(w) + components = {"buses", "lines", "links", "converters", "transformers"} + if base_network == "osm-raw": + inputs = {c: resources(f"osm-raw/build/{c}.csv") for c in components} + else: + inputs = {c: f"data/{base_network}/{c}.csv" for c in components} + if base_network == "entsoegridkit": + inputs["parameter_corrections"] = "data/parameter_corrections.yaml" + inputs["links_p_nom"] = "data/links_p_nom.csv" + return inputs + + rule base_network: params: countries=config_provider("countries"), @@ -58,13 +71,7 @@ rule base_network: lines=config_provider("lines"), transformers=config_provider("transformers"), input: - eg_buses="data/entsoegridkit/buses.csv", - eg_lines="data/entsoegridkit/lines.csv", - eg_links="data/entsoegridkit/links.csv", - eg_converters="data/entsoegridkit/converters.csv", - eg_transformers="data/entsoegridkit/transformers.csv", - parameter_corrections="data/parameter_corrections.yaml", - links_p_nom="data/links_p_nom.csv", + unpack(input_base_network), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), @@ -523,6 +530,15 @@ rule simplify_network: "../scripts/simplify_network.py" +# Optional input when using custom busmaps - Needs to be tailored to selected base_network +def input_cluster_network(w): + if config_provider("enable", "custom_busmap", default=False)(w): + base_network = config_provider("electricity", "base_network")(w) + custom_busmap = f"data/busmaps/elec_s{w.simpl}_{w.clusters}_{base_network}.csv" + return {"custom_busmap": custom_busmap} + return {"custom_busmap": []} + + rule cluster_network: params: cluster_network=config_provider("clustering", "cluster_network"), @@ -539,15 +555,11 @@ rule cluster_network: length_factor=config_provider("lines", "length_factor"), costs=config_provider("costs"), input: + unpack(input_cluster_network), network=resources("networks/elec_s{simpl}.nc"), regions_onshore=resources("regions_onshore_elec_s{simpl}.geojson"), regions_offshore=resources("regions_offshore_elec_s{simpl}.geojson"), busmap=ancient(resources("busmap_elec_s{simpl}.csv")), - custom_busmap=lambda w: ( - "data/custom_busmap_elec_s{simpl}_{clusters}.csv" - if config_provider("enable", "custom_busmap", default=False)(w) - else [] - ), tech_costs=lambda w: resources( f"costs_{config_provider('costs', 'year')(w)}.csv" ), @@ -629,3 +641,79 @@ rule prepare_network: "../envs/environment.yaml" script: "../scripts/prepare_network.py" + + +if config["electricity"]["base_network"] == "osm-raw": + + rule clean_osm_data: + input: + cables_way=expand( + "data/osm-raw/{country}/cables_way.json", + country=config_provider("countries"), + ), + lines_way=expand( + "data/osm-raw/{country}/lines_way.json", + country=config_provider("countries"), + ), + links_relation=expand( + "data/osm-raw/{country}/links_relation.json", + country=config_provider("countries"), + ), + substations_way=expand( + "data/osm-raw/{country}/substations_way.json", + country=config_provider("countries"), + ), + substations_relation=expand( + "data/osm-raw/{country}/substations_relation.json", + country=config_provider("countries"), + ), + offshore_shapes=resources("offshore_shapes.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + substations=resources("osm-raw/clean/substations.geojson"), + substations_polygon=resources("osm-raw/clean/substations_polygon.geojson"), + lines=resources("osm-raw/clean/lines.geojson"), + links=resources("osm-raw/clean/links.geojson"), + log: + logs("clean_osm_data.log"), + benchmark: + benchmarks("clean_osm_data") + threads: 1 + resources: + mem_mb=4000, + conda: + "../envs/environment.yaml" + script: + "../scripts/clean_osm_data.py" + + +if config["electricity"]["base_network"] == "osm-raw": + + rule build_osm_network: + input: + substations=resources("osm-raw/clean/substations.geojson"), + lines=resources("osm-raw/clean/lines.geojson"), + links=resources("osm-raw/clean/links.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + lines=resources("osm-raw/build/lines.csv"), + links=resources("osm-raw/build/links.csv"), + converters=resources("osm-raw/build/converters.csv"), + transformers=resources("osm-raw/build/transformers.csv"), + substations=resources("osm-raw/build/buses.csv"), + lines_geojson=resources("osm-raw/build/geojson/lines.geojson"), + links_geojson=resources("osm-raw/build/geojson/links.geojson"), + converters_geojson=resources("osm-raw/build/geojson/converters.geojson"), + transformers_geojson=resources("osm-raw/build/geojson/transformers.geojson"), + substations_geojson=resources("osm-raw/build/geojson/buses.geojson"), + log: + logs("build_osm_network.log"), + benchmark: + benchmarks("build_osm_network") + threads: 1 + resources: + mem_mb=4000, + conda: + "../envs/environment.yaml" + script: + "../scripts/build_osm_network.py" diff --git a/rules/development.smk b/rules/development.smk new file mode 100644 index 000000000..465490258 --- /dev/null +++ b/rules/development.smk @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +if config["electricity"]["base_network"] == "osm-raw": + + rule prepare_osm_network_release: + input: + base_network=resources("networks/base.nc"), + output: + buses=resources("osm-raw/release/buses.csv"), + converters=resources("osm-raw/release/converters.csv"), + lines=resources("osm-raw/release/lines.csv"), + links=resources("osm-raw/release/links.csv"), + transformers=resources("osm-raw/release/transformers.csv"), + log: + logs("prepare_osm_network_release.log"), + benchmark: + benchmarks("prepare_osm_network_release") + threads: 1 + resources: + mem_mb=1000, + conda: + "../envs/environment.yaml" + script: + "../scripts/prepare_osm_network_release.py" diff --git a/rules/retrieve.smk b/rules/retrieve.smk index de84b2fae..c30696ccf 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -390,3 +390,85 @@ if config["enable"]["retrieve"]: "../envs/retrieve.yaml" script: "../scripts/retrieve_monthly_fuel_prices.py" + + +if config["enable"]["retrieve"] and ( + config["electricity"]["base_network"] == "osm-prebuilt" +): + + rule retrieve_osm_prebuilt: + input: + buses=storage("https://zenodo.org/records/13358976/files/buses.csv"), + converters=storage( + "https://zenodo.org/records/13358976/files/converters.csv" + ), + lines=storage("https://zenodo.org/records/13358976/files/lines.csv"), + links=storage("https://zenodo.org/records/13358976/files/links.csv"), + transformers=storage( + "https://zenodo.org/records/13358976/files/transformers.csv" + ), + output: + buses="data/osm-prebuilt/buses.csv", + converters="data/osm-prebuilt/converters.csv", + lines="data/osm-prebuilt/lines.csv", + links="data/osm-prebuilt/links.csv", + transformers="data/osm-prebuilt/transformers.csv", + log: + "logs/retrieve_osm_prebuilt.log", + threads: 1 + resources: + mem_mb=500, + retries: 2 + run: + for key in input.keys(): + move(input[key], output[key]) + validate_checksum(output[key], input[key]) + + + +if config["enable"]["retrieve"] and ( + config["electricity"]["base_network"] == "osm-raw" +): + + rule retrieve_osm_data: + output: + cables_way="data/osm-raw/{country}/cables_way.json", + lines_way="data/osm-raw/{country}/lines_way.json", + links_relation="data/osm-raw/{country}/links_relation.json", + substations_way="data/osm-raw/{country}/substations_way.json", + substations_relation="data/osm-raw/{country}/substations_relation.json", + log: + "logs/retrieve_osm_data_{country}.log", + threads: 1 + conda: + "../envs/retrieve.yaml" + script: + "../scripts/retrieve_osm_data.py" + + +if config["enable"]["retrieve"] and ( + config["electricity"]["base_network"] == "osm-raw" +): + + rule retrieve_osm_data_all: + input: + expand( + "data/osm-raw/{country}/cables_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm-raw/{country}/lines_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm-raw/{country}/links_relation.json", + country=config_provider("countries"), + ), + expand( + "data/osm-raw/{country}/substations_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm-raw/{country}/substations_relation.json", + country=config_provider("countries"), + ), diff --git a/scripts/base_network.py b/scripts/base_network.py index 2b0fee63b..afb66387e 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -5,7 +5,11 @@ # coding: utf-8 """ -Creates the network topology from an `ENTSO-E map extract `_ (March 2022) as a PyPSA network. +Creates the network topology from a `ENTSO-E map extract. +`_ (March 2022) +or `OpenStreetMap data `_ (Aug 2024) +as a PyPSA +network. Relevant Settings ----------------- @@ -131,45 +135,50 @@ def _find_closest_links(links, new_links, distance_upper_bound=1.5): ) -def _load_buses_from_eg(eg_buses, europe_shape, config_elec): +def _load_buses(buses, europe_shape, config): buses = ( pd.read_csv( - eg_buses, + buses, quotechar="'", true_values=["t"], false_values=["f"], dtype=dict(bus_id="str"), ) .set_index("bus_id") - .drop(["station_id"], axis=1) .rename(columns=dict(voltage="v_nom")) ) + if "station_id" in buses.columns: + buses.drop("station_id", axis=1, inplace=True) + buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) buses["under_construction"] = buses.under_construction.where( lambda s: s.notnull(), False ).astype(bool) - - # remove all buses outside of all countries including exclusive economic zones (offshore) europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] europe_shape_prepped = shapely.prepared.prep(europe_shape) buses_in_europe_b = buses[["x", "y"]].apply( lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) + v_nom_min = min(config["electricity"]["voltages"]) + v_nom_max = max(config["electricity"]["voltages"]) + buses_with_v_nom_to_keep_b = ( - buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() - ) - logger.info( - f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}' + (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) + | (buses.v_nom.isnull()) + | ( + buses.carrier == "DC" + ) # Keeping all DC buses from the input dataset independent of voltage (e.g. 150 kV connections) ) + logger.info(f"Removing buses outside of range AC {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) -def _load_transformers_from_eg(buses, eg_transformers): +def _load_transformers(buses, transformers): transformers = pd.read_csv( - eg_transformers, + transformers, quotechar="'", true_values=["t"], false_values=["f"], @@ -181,9 +190,9 @@ def _load_transformers_from_eg(buses, eg_transformers): return transformers -def _load_converters_from_eg(buses, eg_converters): +def _load_converters_from_eg(buses, converters): converters = pd.read_csv( - eg_converters, + converters, quotechar="'", true_values=["t"], false_values=["f"], @@ -197,9 +206,25 @@ def _load_converters_from_eg(buses, eg_converters): return converters -def _load_links_from_eg(buses, eg_links): +def _load_converters_from_osm(buses, converters): + converters = pd.read_csv( + converters, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(converter_id="str", bus0="str", bus1="str"), + ).set_index("converter_id") + + converters = _remove_dangling_branches(converters, buses) + + converters["carrier"] = "" + + return converters + + +def _load_links_from_eg(buses, links): links = pd.read_csv( - eg_links, + links, quotechar="'", true_values=["t"], false_values=["f"], @@ -208,7 +233,7 @@ def _load_links_from_eg(buses, eg_links): links["length"] /= 1e3 - # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg. + # Skagerrak Link is connected to 132kV bus which is removed in _load_buses. # Connect to neighboring 380kV bus links.loc[links.bus1 == "6396", "bus1"] = "6398" @@ -220,10 +245,35 @@ def _load_links_from_eg(buses, eg_links): return links -def _load_lines_from_eg(buses, eg_lines): +def _load_links_from_osm(buses, links): + links = pd.read_csv( + links, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict( + link_id="str", + bus0="str", + bus1="str", + voltage="int", + p_nom="float", + ), + ).set_index("link_id") + + links["length"] /= 1e3 + + links = _remove_dangling_branches(links, buses) + + # Add DC line parameters + links["carrier"] = "DC" + + return links + + +def _load_lines(buses, lines): lines = ( pd.read_csv( - eg_lines, + lines, quotechar="'", true_values=["t"], false_values=["f"], @@ -240,6 +290,7 @@ def _load_lines_from_eg(buses, eg_lines): ) lines["length"] /= 1e3 + lines["carrier"] = "AC" lines = _remove_dangling_branches(lines, buses) @@ -290,7 +341,7 @@ def _reconnect_crimea(lines): return pd.concat([lines, lines_to_crimea]) -def _set_electrical_parameters_lines(lines, config): +def _set_electrical_parameters_lines_eg(lines, config): v_noms = config["electricity"]["voltages"] linetypes = config["lines"]["types"] @@ -302,16 +353,36 @@ def _set_electrical_parameters_lines(lines, config): return lines +def _set_electrical_parameters_lines_osm(lines, config): + if lines.empty: + lines["type"] = [] + return lines + + v_noms = config["electricity"]["voltages"] + linetypes = _get_linetypes_config(config["lines"]["types"], v_noms) + + lines["carrier"] = "AC" + lines["dc"] = False + + lines.loc[:, "type"] = lines.v_nom.apply( + lambda x: _get_linetype_by_voltage(x, linetypes) + ) + + lines["s_max_pu"] = config["lines"]["s_max_pu"] + + return lines + + def _set_lines_s_nom_from_linetypes(n): n.lines["s_nom"] = ( np.sqrt(3) * n.lines["type"].map(n.line_types.i_nom) * n.lines["v_nom"] - * n.lines.num_parallel + * n.lines["num_parallel"] ) -def _set_electrical_parameters_links(links, config, links_p_nom): +def _set_electrical_parameters_links_eg(links, config, links_p_nom): if links.empty: return links @@ -343,12 +414,27 @@ def _set_electrical_parameters_links(links, config, links_p_nom): return links +def _set_electrical_parameters_links_osm(links, config): + if links.empty: + return links + + p_max_pu = config["links"].get("p_max_pu", 1.0) + links["p_max_pu"] = p_max_pu + links["p_min_pu"] = -p_max_pu + links["carrier"] = "DC" + links["dc"] = True + + return links + + def _set_electrical_parameters_converters(converters, config): p_max_pu = config["links"].get("p_max_pu", 1.0) converters["p_max_pu"] = p_max_pu converters["p_min_pu"] = -p_max_pu - converters["p_nom"] = 2000 + # if column "p_nom" does not exist, set to 2000 + if "p_nom" not in converters: + converters["p_nom"] = 2000 # Converters are combined with links converters["under_construction"] = False @@ -463,7 +549,7 @@ def prefer_voltage(x, which): buses["substation_lv"] = ( lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b ) - buses["substation_off"] = ((hv_b & offshore_b) | (hv_b & onshore_b)) & ( + buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & ( ~buses["under_construction"] ) @@ -617,11 +703,11 @@ def _set_shapes(n, country_shapes, offshore_shapes): def base_network( - eg_buses, - eg_converters, - eg_transformers, - eg_lines, - eg_links, + buses, + converters, + transformers, + lines, + links, links_p_nom, europe_shape, country_shapes, @@ -629,29 +715,59 @@ def base_network( parameter_corrections, config, ): - buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) - links = _load_links_from_eg(buses, eg_links) + base_network = config["electricity"].get("base_network") + assert base_network in { + "entsoegridkit", + "osm-raw", + "osm-prebuilt", + }, f"base_network must be either 'entsoegridkit', 'osm-raw' or 'osm-prebuilt', but got '{base_network}'" + if base_network == "entsoegridkit": + warnings.warn( + "The 'entsoegridkit' base network is deprecated and will be removed in future versions. Please use 'osm-raw' or 'osm-prebuilt' instead.", + DeprecationWarning, + ) + + logger.info(f"Creating base network using {base_network}.") + + buses = _load_buses(buses, europe_shape, config) + transformers = _load_transformers(buses, transformers) + lines = _load_lines(buses, lines) - converters = _load_converters_from_eg(buses, eg_converters) + if base_network == "entsoegridkit": + links = _load_links_from_eg(buses, links) + converters = _load_converters_from_eg(buses, converters) - lines = _load_lines_from_eg(buses, eg_lines) - transformers = _load_transformers_from_eg(buses, eg_transformers) + # Optionally reconnect Crimea + if (config["lines"].get("reconnect_crimea", True)) & ( + "UA" in config["countries"] + ): + lines = _reconnect_crimea(lines) - if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: - lines = _reconnect_crimea(lines) + # Set electrical parameters of lines and links + lines = _set_electrical_parameters_lines_eg(lines, config) + links = _set_electrical_parameters_links_eg(links, config, links_p_nom) + elif base_network in {"osm-prebuilt", "osm-raw"}: + links = _load_links_from_osm(buses, links) + converters = _load_converters_from_osm(buses, converters) + + # Set electrical parameters of lines and links + lines = _set_electrical_parameters_lines_osm(lines, config) + links = _set_electrical_parameters_links_osm(links, config) + else: + raise ValueError( + "base_network must be either 'entsoegridkit', 'osm-raw', or 'osm-prebuilt'" + ) - lines = _set_electrical_parameters_lines(lines, config) + # Set electrical parameters of transformers and converters transformers = _set_electrical_parameters_transformers(transformers, config) - links = _set_electrical_parameters_links(links, config, links_p_nom) converters = _set_electrical_parameters_converters(converters, config) n = pypsa.Network() - n.name = "PyPSA-Eur" + n.name = f"PyPSA-Eur ({base_network})" time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) n.set_snapshots(time) - n.madd("Carrier", ["AC", "DC"]) n.import_components_from_dataframe(buses, "Bus") n.import_components_from_dataframe(lines, "Line") @@ -660,8 +776,8 @@ def base_network( n.import_components_from_dataframe(converters, "Link") _set_lines_s_nom_from_linetypes(n) - - _apply_parameter_corrections(n, parameter_corrections) + if config["electricity"].get("base_network") == "entsoegridkit": + _apply_parameter_corrections(n, parameter_corrections) n = _remove_unconnected_components(n) @@ -675,9 +791,64 @@ def base_network( _set_shapes(n, country_shapes, offshore_shapes) + # Add carriers if they are present in buses.carriers + carriers_in_buses = set(n.buses.carrier.dropna().unique()) + carriers = carriers_in_buses.intersection({"AC", "DC"}) + + if carriers: + n.madd("Carrier", carriers) + return n +def _get_linetypes_config(line_types, voltages): + """ + Return the dictionary of linetypes for selected voltages. The dictionary is + a subset of the dictionary line_types, whose keys match the selected + voltages. + + Parameters + ---------- + line_types : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + voltages : list + List of selected voltages. + + Returns + ------- + Dictionary of linetypes for selected voltages. + """ + # get voltages value that are not available in the line types + vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys())) + if vnoms_diff: + logger.warning( + f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list." + ) + return {k: v for k, v in line_types.items() if k in voltages} + + +def _get_linetype_by_voltage(v_nom, d_linetypes): + """ + Return the linetype of a specific line based on its voltage v_nom. + + Parameters + ---------- + v_nom : float + The voltage of the line. + d_linetypes : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + + Returns + ------- + The linetype of the line whose nominal voltage is closest to the line voltage. + """ + v_nom_min, line_type_min = min( + d_linetypes.items(), + key=lambda x: abs(x[0] - v_nom), + ) + return line_type_min + + def voronoi(points, outline, crs=4326): """ Create Voronoi polygons from a set of points within an outline. @@ -793,25 +964,47 @@ def append_bus_shapes(n, shapes, type): configure_logging(snakemake) set_scenario_config(snakemake) + countries = snakemake.params.countries + + buses = snakemake.input.buses + converters = snakemake.input.converters + transformers = snakemake.input.transformers + lines = snakemake.input.lines + links = snakemake.input.links + europe_shape = snakemake.input.europe_shape + country_shapes = snakemake.input.country_shapes + offshore_shapes = snakemake.input.offshore_shapes + config = snakemake.config + + if "links_p_nom" in snakemake.input.keys(): + links_p_nom = snakemake.input.links_p_nom + else: + links_p_nom = None + + if "parameter_corrections" in snakemake.input.keys(): + parameter_corrections = snakemake.input.parameter_corrections + else: + parameter_corrections = None + n = base_network( - snakemake.input.eg_buses, - snakemake.input.eg_converters, - snakemake.input.eg_transformers, - snakemake.input.eg_lines, - snakemake.input.eg_links, - snakemake.input.links_p_nom, - snakemake.input.europe_shape, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.input.parameter_corrections, - snakemake.config, + buses, + converters, + transformers, + lines, + links, + links_p_nom, + europe_shape, + country_shapes, + offshore_shapes, + parameter_corrections, + config, ) onshore_regions, offshore_regions, shapes, offshore_shapes = build_bus_shapes( n, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.params.countries, + country_shapes, + offshore_shapes, + countries, ) shapes.to_file(snakemake.output.regions_onshore) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py new file mode 100644 index 000000000..83461a98d --- /dev/null +++ b/scripts/build_osm_network.py @@ -0,0 +1,863 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur and PyPSA-Earth Authors +# +# SPDX-License-Identifier: MIT + +import logging +import os +import string + +import geopandas as gpd +import numpy as np +import pandas as pd +from _helpers import configure_logging, set_scenario_config +from shapely.geometry import LineString, Point +from shapely.ops import linemerge, nearest_points, split +from tqdm import tqdm + +logger = logging.getLogger(__name__) + + +GEO_CRS = "EPSG:4326" +DISTANCE_CRS = "EPSG:3035" +BUS_TOL = ( + 5000 # unit: meters, default 5000 - Buses within this distance are grouped together +) +LINES_COLUMNS = [ + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", +] +LINKS_COLUMNS = [ + "bus0", + "bus1", + "voltage", + "p_nom", + "length", + "underground", + "under_construction", + "geometry", +] +TRANSFORMERS_COLUMNS = [ + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "country", + "geometry", +] + + +def line_endings_to_bus_conversion(lines): + """ + Converts line endings to bus connections. + + This function takes a df of lines and converts the line endings to bus + connections. It performs the necessary operations to ensure that the line + endings are properly connected to the buses in the network. + + Parameters: + lines (DataFrame) + + Returns: + lines (DataFrame) + """ + # Assign to every line a start and end point + + lines["bounds"] = lines["geometry"].boundary # create start and end point + + lines["bus_0_coors"] = lines["bounds"].map(lambda p: p.geoms[0]) + lines["bus_1_coors"] = lines["bounds"].map(lambda p: p.geoms[-1]) + + # splits into coordinates + lines["bus0_lon"] = lines["bus_0_coors"].x + lines["bus0_lat"] = lines["bus_0_coors"].y + lines["bus1_lon"] = lines["bus_1_coors"].x + lines["bus1_lat"] = lines["bus_1_coors"].y + + return lines + + +# tol in m +def set_substations_ids(buses, distance_crs, tol=5000): + """ + Function to set substations ids to buses, accounting for location + tolerance. + + The algorithm is as follows: + + 1. initialize all substation ids to -1 + 2. if the current substation has been already visited [substation_id < 0], then skip the calculation + 3. otherwise: + 1. identify the substations within the specified tolerance (tol) + 2. when all the substations in tolerance have substation_id < 0, then specify a new substation_id + 3. otherwise, if one of the substation in tolerance has a substation_id >= 0, then set that substation_id to all the others; + in case of multiple substations with substation_ids >= 0, the first value is picked for all + """ + + buses["station_id"] = -1 + + # create temporary series to execute distance calculations using m as reference distances + temp_bus_geom = buses.geometry.to_crs(distance_crs) + + # set tqdm options for substation ids + tqdm_kwargs_substation_ids = dict( + ascii=False, + unit=" buses", + total=buses.shape[0], + desc="Set substation ids ", + ) + + station_id = 0 + for i, row in tqdm(buses.iterrows(), **tqdm_kwargs_substation_ids): + if buses.loc[i, "station_id"] >= 0: + continue + + # get substations within tolerance + close_nodes = np.flatnonzero( + temp_bus_geom.distance(temp_bus_geom.loc[i]) <= tol + ) + + if len(close_nodes) == 1: + # if only one substation is in tolerance, then the substation is the current one iƬ + # Note that the node cannot be with substation_id >= 0, given the preliminary check + # at the beginning of the for loop + buses.loc[buses.index[i], "station_id"] = station_id + # update station id + station_id += 1 + else: + # several substations in tolerance + # get their ids + subset_substation_ids = buses.loc[buses.index[close_nodes], "station_id"] + # check if all substation_ids are negative (<0) + all_neg = subset_substation_ids.max() < 0 + # check if at least a substation_id is negative (<0) + some_neg = subset_substation_ids.min() < 0 + + if all_neg: + # when all substation_ids are negative, then this is a new substation id + # set the current station_id and increment the counter + buses.loc[buses.index[close_nodes], "station_id"] = station_id + station_id += 1 + elif some_neg: + # otherwise, when at least a substation_id is non-negative, then pick the first value + # and set it to all the other substations within tolerance + sub_id = -1 + for substation_id in subset_substation_ids: + if substation_id >= 0: + sub_id = substation_id + break + buses.loc[buses.index[close_nodes], "station_id"] = sub_id + + +def set_lines_ids(lines, buses, distance_crs): + """ + Function to set line buses ids to the closest bus in the list. + """ + # set tqdm options for set lines ids + tqdm_kwargs_line_ids = dict( + ascii=False, + unit=" lines", + total=lines.shape[0], + desc="Set line/link bus ids ", + ) + + # initialization + lines["bus0"] = -1 + lines["bus1"] = -1 + + busesepsg = buses.to_crs(distance_crs) + linesepsg = lines.to_crs(distance_crs) + + for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids): + # select buses having the voltage level of the current line + buses_sel = busesepsg[ + (buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"]) + ] + + # find the closest node of the bus0 of the line + bus0_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[0]).idxmin() + lines.loc[i, "bus0"] = buses.loc[bus0_id, "bus_id"] + + # check if the line starts exactly in the node, otherwise modify the linestring + distance_bus0 = busesepsg.geometry.loc[bus0_id].distance( + row.geometry.boundary.geoms[0] + ) + + if distance_bus0 > 0: + # the line does not start in the node, thus modify the linestring + line_start_point = lines.geometry.loc[i].boundary.geoms[0] + new_segment = LineString([buses.geometry.loc[bus0_id], line_start_point]) + modified_line = linemerge([new_segment, lines.geometry.loc[i]]) + lines.loc[i, "geometry"] = modified_line + + # find the closest node of the bus1 of the line + bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin() + lines.loc[i, "bus1"] = buses.loc[bus1_id, "bus_id"] + + # check if the line ends exactly in the node, otherwise modify the linestring + distance_bus1 = busesepsg.geometry.loc[bus1_id].distance( + row.geometry.boundary.geoms[1] + ) + + if distance_bus1 > 0: + # the line does not start in the node, thus modify the linestring + line_end_point = lines.geometry.loc[i].boundary.geoms[1] + new_segment = LineString([line_end_point, buses.geometry.loc[bus1_id]]) + modified_line = linemerge([lines.geometry.loc[i], new_segment]) + lines.loc[i, "geometry"] = modified_line + + return lines, buses + + +def merge_stations_same_station_id( + buses, delta_lon=0.001, delta_lat=0.001, precision=4 +): + """ + Function to merge buses with same voltage and station_id This function + iterates over all substation ids and creates a bus_id for every substation + and voltage level. + + Therefore, a substation with multiple voltage levels is represented + with different buses, one per voltage level + """ + # initialize list of cleaned buses + buses_clean = [] + + # initialize the number of buses + n_buses = 0 + + for g_name, g_value in buses.groupby(by="station_id"): + # average location of the buses having the same station_id + station_point_x = np.round(g_value.geometry.x.mean(), precision) + station_point_y = np.round(g_value.geometry.y.mean(), precision) + # is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"]) + + # loop for every voltage level in the bus + # The location of the buses is averaged; in the case of multiple voltage levels for the same station_id, + # each bus corresponding to a voltage level and each polatity is located at a distance regulated by delta_lon/delta_lat + v_it = 0 + for v_name, bus_row in g_value.groupby(by=["voltage", "dc"]): + lon_bus = np.round(station_point_x + v_it * delta_lon, precision) + lat_bus = np.round(station_point_y + v_it * delta_lat, precision) + + bus_data = [ + n_buses, # "bus_id" + g_name, # "station_id" + v_name[0], # "voltage" + bus_row["dc"].all(), # "dc" + "|".join(bus_row["symbol"].unique()), # "symbol" + bus_row["under_construction"].any(), # "under_construction" + "|".join(bus_row["tag_substation"].unique()), # "tag_substation" + bus_row["tag_area"].sum(), # "tag_area" + lon_bus, # "lon" + lat_bus, # "lat" + bus_row["country"].iloc[0], # "country" + Point(lon_bus, lat_bus), # "geometry" + ] + + # add the bus + buses_clean.append(bus_data) + + # increase counters + v_it += 1 + n_buses += 1 + + # names of the columns + buses_clean_columns = [ + "bus_id", + "station_id", + "voltage", + "dc", + "symbol", + "under_construction", + "tag_substation", + "tag_area", + "x", + "y", + "country", + # "is_dclink_boundary_point", + "geometry", + ] + + gdf_buses_clean = gpd.GeoDataFrame( + buses_clean, columns=buses_clean_columns + ).set_crs(crs=buses.crs, inplace=True) + + return gdf_buses_clean + + +def get_ac_frequency(df, fr_col="tag_frequency"): + """ + # Function to define a default frequency value. + + Attempts to find the most usual non-zero frequency across the + dataframe; 50 Hz is assumed as a back-up value + """ + + # Initialize a default frequency value + ac_freq_default = 50 + + grid_freq_levels = df[fr_col].value_counts(sort=True, dropna=True) + if not grid_freq_levels.empty: + # AC lines frequency shouldn't be 0Hz + ac_freq_levels = grid_freq_levels.loc[ + grid_freq_levels.index.get_level_values(0) != "0" + ] + ac_freq_default = ac_freq_levels.index.get_level_values(0)[0] + + return ac_freq_default + + +def get_transformers(buses, lines): + """ + Function to create fake transformer lines that connect buses of the same + station_id at different voltage. + """ + + ac_freq = get_ac_frequency(lines) + df_transformers = [] + + # Transformers should be added between AC buses only + buses_ac = buses[buses["dc"] != True] + + for g_name, g_value in buses_ac.sort_values("voltage", ascending=True).groupby( + by="station_id" + ): + # note: by construction there cannot be more that two buses with the same station_id and same voltage + n_voltages = len(g_value) + + if n_voltages > 1: + for id in range(n_voltages - 1): + # when g_value has more than one node, it means that there are multiple voltages for the same bus + transformer_geometry = LineString( + [g_value.geometry.iloc[id], g_value.geometry.iloc[id + 1]] + ) + + transformer_data = [ + f"transf_{g_name}_{id}", # "line_id" + g_value["bus_id"].iloc[id], # "bus0" + g_value["bus_id"].iloc[id + 1], # "bus1" + g_value.voltage.iloc[id], # "voltage_bus0" + g_value.voltage.iloc[id + 1], # "voltage_bus0" + g_value.country.iloc[id], # "country" + transformer_geometry, # "geometry" + ] + + df_transformers.append(transformer_data) + + # name of the columns + transformers_columns = [ + "transformer_id", + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "country", + "geometry", + ] + + df_transformers = gpd.GeoDataFrame(df_transformers, columns=transformers_columns) + if not df_transformers.empty: + init_index = 0 if lines.empty else lines.index[-1] + 1 + df_transformers.set_index(init_index + df_transformers.index, inplace=True) + # update line endings + df_transformers = line_endings_to_bus_conversion(df_transformers) + df_transformers.drop(columns=["bounds", "bus_0_coors", "bus_1_coors"], inplace=True) + + gdf_transformers = gpd.GeoDataFrame(df_transformers) + gdf_transformers.crs = GEO_CRS + + return gdf_transformers + + +def _find_closest_bus(row, buses, distance_crs, tol=5000): + """ + Find the closest bus to a given bus based on geographical distance and + country. + + Parameters: + - row: The bus_id of the bus to find the closest bus for. + - buses: A GeoDataFrame containing information about all the buses. + - distance_crs: The coordinate reference system to use for distance calculations. + - tol: The tolerance distance within which a bus is considered closest (default: 5000). + Returns: + - closest_bus_id: The bus_id of the closest bus, or None if no bus is found within the distance and same country. + """ + gdf_buses = buses.copy() + gdf_buses = gdf_buses.to_crs(distance_crs) + # Get the geometry of the bus with bus_id = link_bus_id + bus = gdf_buses[gdf_buses["bus_id"] == row] + bus_geom = bus.geometry.values[0] + + gdf_buses_filtered = gdf_buses[gdf_buses["dc"] == False] + + # Find the closest point in the filtered buses + nearest_geom = nearest_points(bus_geom, gdf_buses_filtered.union_all())[1] + + # Get the bus_id of the closest bus + closest_bus = gdf_buses_filtered.loc[gdf_buses["geometry"] == nearest_geom] + + # check if closest_bus_id is within the distance + within_distance = ( + closest_bus.to_crs(distance_crs).distance(bus.to_crs(distance_crs), align=False) + ).values[0] <= tol + + in_same_country = closest_bus.country.values[0] == bus.country.values[0] + + if within_distance and in_same_country: + closest_bus_id = closest_bus.bus_id.values[0] + else: + closest_bus_id = None + + return closest_bus_id + + +def _get_converters(buses, links, distance_crs): + """ + Get the converters for the given buses and links. Connecting link endings + to closest AC bus. + + Parameters: + - buses (pandas.DataFrame): DataFrame containing information about buses. + - links (pandas.DataFrame): DataFrame containing information about links. + Returns: + - gdf_converters (geopandas.GeoDataFrame): GeoDataFrame containing information about converters. + """ + converters = [] + for idx, row in links.iterrows(): + for conv in range(2): + link_end = row[f"bus{conv}"] + # HVDC Gotland is connected to 130 kV grid, closest HVAC bus is further away + + closest_bus = _find_closest_bus(link_end, buses, distance_crs, tol=40000) + + if closest_bus is None: + continue + + converter_id = f"converter/{row['link_id']}_{conv}" + converter_geometry = LineString( + [ + buses[buses["bus_id"] == link_end].geometry.values[0], + buses[buses["bus_id"] == closest_bus].geometry.values[0], + ] + ) + + logger.info( + f"Added converter #{conv+1}/2 for link {row['link_id']}:{converter_id}." + ) + + converter_data = [ + converter_id, # "line_id" + link_end, # "bus0" + closest_bus, # "bus1" + row["voltage"], # "voltage" + row["p_nom"], # "p_nom" + False, # "underground" + False, # "under_construction" + buses[buses["bus_id"] == closest_bus].country.values[0], # "country" + converter_geometry, # "geometry" + ] + + # Create the converter + converters.append(converter_data) + + conv_columns = [ + "converter_id", + "bus0", + "bus1", + "voltage", + "p_nom", + "underground", + "under_construction", + "country", + "geometry", + ] + + gdf_converters = gpd.GeoDataFrame( + converters, columns=conv_columns, crs=GEO_CRS + ).reset_index() + + return gdf_converters + + +def connect_stations_same_station_id(lines, buses): + """ + Function to create fake links between substations with the same + substation_id. + """ + ac_freq = get_ac_frequency(lines) + station_id_list = buses.station_id.unique() + + add_lines = [] + from shapely.geometry import LineString + + for s_id in station_id_list: + buses_station_id = buses[buses.station_id == s_id] + + if len(buses_station_id) > 1: + for b_it in range(1, len(buses_station_id)): + line_geometry = LineString( + [ + buses_station_id.geometry.iloc[0], + buses_station_id.geometry.iloc[b_it], + ] + ) + line_bounds = line_geometry.bounds + + line_data = [ + f"link{buses_station_id}_{b_it}", # "line_id" + buses_station_id.index[0], # "bus0" + buses_station_id.index[b_it], # "bus1" + 400000, # "voltage" + 1, # "circuits" + 0.0, # "length" + False, # "underground" + False, # "under_construction" + "transmission", # "tag_type" + ac_freq, # "tag_frequency" + buses_station_id.country.iloc[0], # "country" + line_geometry, # "geometry" + line_bounds, # "bounds" + buses_station_id.geometry.iloc[0], # "bus_0_coors" + buses_station_id.geometry.iloc[b_it], # "bus_1_coors" + buses_station_id.lon.iloc[0], # "bus0_lon" + buses_station_id.lat.iloc[0], # "bus0_lat" + buses_station_id.lon.iloc[b_it], # "bus1_lon" + buses_station_id.lat.iloc[b_it], # "bus1_lat" + ] + + add_lines.append(line_data) + + # name of the columns + add_lines_columns = [ + "line_id", + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "tag_type", + "tag_frequency", + "country", + "geometry", + "bounds", + "bus_0_coors", + "bus_1_coors", + "bus0_lon", + "bus0_lat", + "bus1_lon", + "bus1_lat", + ] + + df_add_lines = gpd.GeoDataFrame(pd.concat(add_lines), columns=add_lines_columns) + lines = pd.concat([lines, df_add_lines], ignore_index=True) + + return lines + + +def set_lv_substations(buses): + """ + Function to set what nodes are lv, thereby setting substation_lv The + current methodology is to set lv nodes to buses where multiple voltage + level are found, hence when the station_id is duplicated. + """ + # initialize column substation_lv to true + buses["substation_lv"] = True + + # For each station number with multiple buses make lowest voltage `substation_lv = TRUE` + bus_with_stations_duplicates = buses[ + buses.station_id.duplicated(keep=False) + ].sort_values(by=["station_id", "voltage"]) + lv_bus_at_station_duplicates = ( + buses[buses.station_id.duplicated(keep=False)] + .sort_values(by=["station_id", "voltage"]) + .drop_duplicates(subset=["station_id"]) + ) + # Set all buses with station duplicates "False" + buses.loc[bus_with_stations_duplicates.index, "substation_lv"] = False + # Set lv_buses with station duplicates "True" + buses.loc[lv_bus_at_station_duplicates.index, "substation_lv"] = True + + return buses + + +def merge_stations_lines_by_station_id_and_voltage( + lines, links, buses, distance_crs, tol=5000 +): + """ + Function to merge close stations and adapt the line datasets to adhere to + the merged dataset. + """ + + logger.info(" - Setting substation ids with tolerance of %.2f m" % (tol)) + + # bus types (AC != DC) + buses_ac = buses[buses["dc"] == False].reset_index() + buses_dc = buses[buses["dc"] == True].reset_index() + + # set_substations_ids(buses, distance_crs, tol=tol) + set_substations_ids(buses_ac, distance_crs, tol=tol) + set_substations_ids(buses_dc, distance_crs, tol=tol) + + logger.info(" - Merging substations with the same id") + + # merge buses with same station id and voltage + if not buses.empty: + buses_ac = merge_stations_same_station_id(buses_ac) + buses_dc = merge_stations_same_station_id(buses_dc) + buses_dc["bus_id"] = buses_ac["bus_id"].max() + buses_dc["bus_id"] + 1 + buses = pd.concat([buses_ac, buses_dc], ignore_index=True) + set_substations_ids(buses, distance_crs, tol=tol) + + logger.info(" - Specifying the bus ids of the line endings") + + # set the bus ids to the line dataset + lines, buses = set_lines_ids(lines, buses, distance_crs) + links, buses = set_lines_ids(links, buses, distance_crs) + + # drop lines starting and ending in the same node + lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True) + links.drop(links[links["bus0"] == links["bus1"]].index, inplace=True) + # update line endings + lines = line_endings_to_bus_conversion(lines) + links = line_endings_to_bus_conversion(links) + + # set substation_lv + set_lv_substations(buses) + + # reset index + lines.reset_index(drop=True, inplace=True) + links.reset_index(drop=True, inplace=True) + + return lines, links, buses + + +def _split_linestring_by_point(linestring, points): + """ + Function to split a linestring geometry by multiple inner points. + + Parameters + ---------- + lstring : LineString + Linestring of the line to be split + points : list + List of points to split the linestring + + Return + ------ + list_lines : list + List of linestring to split the line + """ + + list_linestrings = [linestring] + + for p in points: + # execute split to all lines and store results + temp_list = [split(l, p) for l in list_linestrings] + # nest all geometries + list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms] + + return list_linestrings + + +def fix_overpassing_lines(lines, buses, distance_crs, tol=1): + """ + Fix overpassing lines by splitting them at nodes within a given tolerance, + to include the buses being overpassed. + + Parameters: + - lines (GeoDataFrame): The lines to be fixed. + - buses (GeoDataFrame): The buses representing nodes. + - distance_crs (str): The coordinate reference system (CRS) for distance calculations. + - tol (float): The tolerance distance in meters for determining if a bus is within a line. + Returns: + - lines (GeoDataFrame): The fixed lines. + - buses (GeoDataFrame): The buses representing nodes. + """ + + lines_to_add = [] # list of lines to be added + lines_to_split = [] # list of lines that have been split + + lines_epsgmod = lines.to_crs(distance_crs) + buses_epsgmod = buses.to_crs(distance_crs) + + # set tqdm options for substation ids + tqdm_kwargs_substation_ids = dict( + ascii=False, + unit=" lines", + total=lines.shape[0], + desc="Verify lines overpassing nodes ", + ) + + for l in tqdm(lines.index, **tqdm_kwargs_substation_ids): + # bus indices being within tolerance from the line + bus_in_tol_epsg = buses_epsgmod[ + buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol + ] + + # Get boundary points + endpoint0 = lines_epsgmod.geometry.loc[l].boundary.geoms[0] + endpoint1 = lines_epsgmod.geometry.loc[l].boundary.geoms[1] + + # Calculate distances + dist_to_ep0 = bus_in_tol_epsg.geometry.distance(endpoint0) + dist_to_ep1 = bus_in_tol_epsg.geometry.distance(endpoint1) + + # exclude endings of the lines + bus_in_tol_epsg = bus_in_tol_epsg[(dist_to_ep0 > tol) | (dist_to_ep1 > tol)] + + if not bus_in_tol_epsg.empty: + # add index of line to split + lines_to_split.append(l) + + buses_locs = buses.geometry.loc[bus_in_tol_epsg.index] + + # get new line geometries + new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs) + n_geoms = len(new_geometries) + + # create temporary copies of the line + df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms) + # update geometries + df_append["geometry"] = new_geometries + # update name of the line if there are multiple line segments + df_append["line_id"] = [ + str(df_append["line_id"].iloc[0]) + + (f"-{letter}" if n_geoms > 1 else "") + for letter in string.ascii_lowercase[:n_geoms] + ] + + lines_to_add.append(df_append) + + if not lines_to_add: + return lines, buses + + df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True)) + df_to_add.set_crs(lines.crs, inplace=True) + df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True) + + # update length + df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length + + # update line endings + df_to_add = line_endings_to_bus_conversion(df_to_add) + + # remove original lines + lines.drop(lines_to_split, inplace=True) + + lines = df_to_add if lines.empty else pd.concat([lines, df_to_add]) + + lines = gpd.GeoDataFrame(lines.reset_index(drop=True), crs=lines.crs) + + return lines, buses + + +def build_network(inputs, outputs): + + logger.info("Reading input data.") + buses = gpd.read_file(inputs["substations"]) + lines = gpd.read_file(inputs["lines"]) + links = gpd.read_file(inputs["links"]) + + lines = line_endings_to_bus_conversion(lines) + links = line_endings_to_bus_conversion(links) + + logger.info( + "Fixing lines overpassing nodes: Connecting nodes and splittling lines." + ) + lines, buses = fix_overpassing_lines(lines, buses, DISTANCE_CRS, tol=1) + + # Merge buses with same voltage and within tolerance + logger.info(f"Aggregating close substations with a tolerance of {BUS_TOL} m") + + lines, links, buses = merge_stations_lines_by_station_id_and_voltage( + lines, links, buses, DISTANCE_CRS, BUS_TOL + ) + + # Recalculate lengths of lines + utm = lines.estimate_utm_crs(datum_name="WGS 84") + lines["length"] = lines.to_crs(utm).length + links["length"] = links.to_crs(utm).length + + transformers = get_transformers(buses, lines) + converters = _get_converters(buses, links, DISTANCE_CRS) + + logger.info("Saving outputs") + + ### Convert output to pypsa-eur friendly format + # Rename "substation" in buses["symbol"] to "Substation" + buses["symbol"] = buses["symbol"].replace({"substation": "Substation"}) + + # Drop unnecessary index column and set respective element ids as index + lines.set_index("line_id", inplace=True) + if not links.empty: + links.set_index("link_id", inplace=True) + converters.set_index("converter_id", inplace=True) + transformers.set_index("transformer_id", inplace=True) + buses.set_index("bus_id", inplace=True) + + # Convert voltages from V to kV + lines["voltage"] = lines["voltage"] / 1000 + if not links.empty: + links["voltage"] = links["voltage"] / 1000 + if not converters.empty: + converters["voltage"] = converters["voltage"] / 1000 + transformers["voltage_bus0"], transformers["voltage_bus1"] = ( + transformers["voltage_bus0"] / 1000, + transformers["voltage_bus1"] / 1000, + ) + buses["voltage"] = buses["voltage"] / 1000 + + # Convert 'true' and 'false' to 't' and 'f' + lines = lines.replace({True: "t", False: "f"}) + links = links.replace({True: "t", False: "f"}) + converters = converters.replace({True: "t", False: "f"}) + buses = buses.replace({True: "t", False: "f"}) + + # Change column orders + lines = lines[LINES_COLUMNS] + if not links.empty: + links = links[LINKS_COLUMNS] + else: + links = pd.DataFrame(columns=["link_id"] + LINKS_COLUMNS) + links.set_index("link_id", inplace=True) + transformers = transformers[TRANSFORMERS_COLUMNS] + + # Export to csv for base_network + buses.to_csv(outputs["substations"], quotechar="'") + lines.to_csv(outputs["lines"], quotechar="'") + links.to_csv(outputs["links"], quotechar="'") + converters.to_csv(outputs["converters"], quotechar="'") + transformers.to_csv(outputs["transformers"], quotechar="'") + + # Export to GeoJSON for quick validations + buses.to_file(outputs["substations_geojson"]) + lines.to_file(outputs["lines_geojson"]) + links.to_file(outputs["links_geojson"]) + converters.to_file(outputs["converters_geojson"]) + transformers.to_file(outputs["transformers_geojson"]) + + return None + + +if __name__ == "__main__": + # Detect running outside of snakemake and mock snakemake for testing + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("build_osm_network") + + configure_logging(snakemake) + set_scenario_config(snakemake) + + countries = snakemake.config["countries"] + + build_network(snakemake.input, snakemake.output) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py new file mode 100644 index 000000000..8669d0af5 --- /dev/null +++ b/scripts/clean_osm_data.py @@ -0,0 +1,1807 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +This script is used to clean OpenStreetMap (OSM) data for creating a PyPSA-Eur +ready network. + +The script performs various cleaning operations on the OSM data, including: +- Cleaning voltage, circuits, cables, wires, and frequency columns +- Splitting semicolon-separated cells into new rows +- Distributing values to circuits based on the number of splits +- Adding line endings to substations based on line data +""" + +import json +import logging +import os +import re + +import geopandas as gpd +import numpy as np +import pandas as pd +from _helpers import configure_logging, set_scenario_config +from shapely.geometry import LineString, MultiLineString, Point, Polygon +from shapely.ops import linemerge, unary_union + +logger = logging.getLogger(__name__) + + +def _create_linestring(row): + """ + Create a LineString object from the given row. + + Args: + row (dict): A dictionary containing the row data. + + Returns: + LineString: A LineString object representing the geometry. + """ + coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]] + return LineString(coords) + + +def _create_polygon(row): + """ + Create a Shapely Polygon from a list of coordinate dictionaries. + + Parameters: + coords (list): List of dictionaries with 'lat' and 'lon' keys + representing coordinates. + + Returns: + shapely.geometry.Polygon: The constructed polygon object. + """ + # Extract coordinates as tuples + point_coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]] + + # Ensure closure by repeating the first coordinate as the last coordinate + if point_coords[0] != point_coords[-1]: + point_coords.append(point_coords[0]) + + # Create Polygon object + polygon = Polygon(point_coords) + + return polygon + + +def _find_closest_polygon(gdf, point): + """ + Find the closest polygon in a GeoDataFrame to a given point. + + Parameters: + gdf (GeoDataFrame): A GeoDataFrame containing polygons. + point (Point): A Point object representing the target point. + + Returns: + int: The index of the closest polygon in the GeoDataFrame. + """ + # Compute the distance to each polygon + gdf["distance"] = gdf["geometry"].apply(lambda geom: point.distance(geom)) + + # Find the index of the closest polygon + closest_idx = gdf["distance"].idxmin() + + # Get the closest polygon's row + closest_polygon = gdf.loc[closest_idx] + + return closest_idx + + +def _clean_voltage(column): + """ + Function to clean the raw voltage column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning voltages.") + column = column.copy() + + column = ( + column.astype(str) + .str.lower() + .str.replace("400/220/110 kV'", "400000;220000;110000") + .str.replace("400/220/110/20_kv", "400000;220000;110000;20000") + .str.replace("2x25000", "25000;25000") + .str.replace("Ć©", ";") + ) + + column = ( + column.astype(str) + .str.lower() + .str.replace("(temp 150000)", "") + .str.replace("low", "1000") + .str.replace("minor", "1000") + .str.replace("medium", "33000") + .str.replace("med", "33000") + .str.replace("m", "33000") + .str.replace("high", "150000") + .str.replace("23000-109000", "109000") + .str.replace("380000>220000", "380000;220000") + .str.replace(":", ";") + .str.replace("<", ";") + .str.replace(",", ";") + .str.replace("kv", "000") + .str.replace("kva", "000") + .str.replace("/", ";") + .str.replace("nan", "") + .str.replace("", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", str(x))) + + column.dropna(inplace=True) + return column + + +def _clean_circuits(column): + """ + Function to clean the raw circuits column: manual fixing and drop nan + values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning circuits.") + column = column.copy() + column = ( + column.astype(str) + .str.replace("partial", "") + .str.replace("1operator=RTE operator:wikidata=Q2178795", "") + .str.lower() + .str.replace("1,5", "3") + .str.replace("1/3", "1") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_cables(column): + """ + Function to clean the raw cables column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning cables.") + column = column.copy() + column = ( + column.astype(str) + .str.lower() + .str.replace("1/3", "1") + .str.replace("3x2;2", "3") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_wires(column): + """ + Function to clean the raw wires column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning wires.") + column = column.copy() + column = ( + column.astype(str) + .str.lower() + .str.replace("?", "") + .str.replace("trzyprzewodowe", "3") + .str.replace("pojedyńcze", "1") + .str.replace("single", "1") + .str.replace("double", "2") + .str.replace("triple", "3") + .str.replace("quad", "4") + .str.replace("fivefold", "5") + .str.replace("yes", "3") + .str.replace("1/3", "1") + .str.replace("3x2;2", "3") + .str.replace("_", "") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _check_voltage(voltage, list_voltages): + """ + Check if the given voltage is present in the list of allowed voltages. + + Parameters: + voltage (str): The voltage to check. + list_voltages (list): A list of allowed voltages. + + Returns: + bool: True if the voltage is present in the list of allowed voltages, + False otherwise. + """ + voltages = voltage.split(";") + for v in voltages: + if v in list_voltages: + return True + return False + + +def _clean_frequency(column): + """ + Function to clean the raw frequency column: manual fixing and drop nan + values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning frequencies.") + column = column.copy() + column = ( + column.astype(str) + .str.lower() + .str.replace("16.67", "16.7") + .str.replace("16,7", "16.7") + .str.replace("?", "") + .str.replace("hz", "") + .str.replace(" ", "") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;.]", "", x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_rating(column): + """ + Function to clean and sum the rating columns: + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning ratings.") + column = column.copy() + column = column.astype(str).str.replace("MW", "") + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) + + # Sum up all ratings if there are multiple entries + column = column.str.split(";").apply(lambda x: sum([int(i) for i in x])) + + column.dropna(inplace=True) + return column.astype(str) + + +def _split_cells(df, cols=["voltage"]): + """ + Split semicolon separated cells i.e. [66000;220000] and create new + identical rows. + + Parameters + ---------- + df : dataframe + Dataframe under analysis + cols : list + List of target columns over which to perform the analysis + + Example + ------- + Original data: + row 1: '66000;220000', '50' + + After applying split_cells(): + row 1, '66000', '50', 2 + row 2, '220000', '50', 2 + """ + if df.empty: + return df + + # Create a dictionary to store the suffix count for each original ID + suffix_counts = {} + # Create a dictionary to store the number of splits associated with each + # original ID + num_splits = {} + + # Split cells and create new rows + x = df.assign(**{col: df[col].str.split(";") for col in cols}) + x = x.explode(cols, ignore_index=True) + + # Count the number of splits associated with each original ID + num_splits = x.groupby("id").size().to_dict() + + # Update the 'split_elements' column + x["split_elements"] = x["id"].map(num_splits) + + # Function to generate the new ID with suffix and update the number of + # splits + def generate_new_id(row): + original_id = row["id"] + if row["split_elements"] == 1: + return original_id + else: + suffix_counts[original_id] = suffix_counts.get(original_id, 0) + 1 + return f"{original_id}-{suffix_counts[original_id]}" + + # Update the ID column with the new IDs + x["id"] = x.apply(generate_new_id, axis=1) + + return x + + +def _distribute_to_circuits(row): + """ + Distributes the number of circuits or cables to individual circuits based + on the given row data. + + Parameters: + - row: A dictionary representing a row of data containing information about + circuits and cables. + + Returns: + - single_circuit: The number of circuits to be assigned to each individual + circuit. + """ + if row["circuits"] != "": + circuits = int(row["circuits"]) + else: + cables = int(row["cables"]) + circuits = cables / 3 + + single_circuit = int(max(1, np.floor_divide(circuits, row["split_elements"]))) + single_circuit = str(single_circuit) + + return single_circuit + + +def _add_line_endings_to_substations( + df_substations, + gdf_lines, + path_country_shapes, + path_offshore_shapes, + prefix, +): + """ + Add line endings to substations. + + This function takes two pandas DataFrames, `substations` and `lines`, and + adds line endings to the substations based on the information from the + lines DataFrame. + + Parameters: + - substations (pandas DataFrame): DataFrame containing information about + substations. + - lines (pandas DataFrame): DataFrame containing information about lines. + + Returns: + - buses (pandas DataFrame): DataFrame containing the updated information + about substations with line endings. + """ + if gdf_lines.empty: + return df_substations + + logger.info("Adding line endings to substations") + # extract columns from df_substations + bus_s = pd.DataFrame(columns=df_substations.columns) + bus_e = pd.DataFrame(columns=df_substations.columns) + + # TODO pypsa-eur: fix country code to contain single country code + # Read information from gdf_lines + bus_s[["voltage", "country"]] = gdf_lines[["voltage", "country"]] + bus_s.loc[:, "geometry"] = gdf_lines.geometry.boundary.map( + lambda p: p.geoms[0] if len(p.geoms) >= 2 else None + ) + bus_s.loc[:, "lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None) + bus_s.loc[:, "lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None) + bus_s.loc[:, "dc"] = gdf_lines["dc"] + + bus_e[["voltage", "country"]] = gdf_lines[["voltage", "country"]] + bus_e.loc[:, "geometry"] = gdf_lines.geometry.boundary.map( + lambda p: p.geoms[1] if len(p.geoms) >= 2 else None + ) + bus_e.loc[:, "lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None) + bus_e.loc[:, "lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None) + bus_e.loc[:, "dc"] = gdf_lines["dc"] + + bus_all = pd.concat([bus_s, bus_e], ignore_index=True) + + # Group gdf_substations by voltage and and geometry (dropping duplicates) + bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index() + bus_all = bus_all[df_substations.columns] + bus_all.loc[:, "bus_id"] = bus_all.apply( + lambda row: f"{prefix}/{row.name + 1}", axis=1 + ) + + # Initialize default values + bus_all["station_id"] = None + # Assuming substations completed for installed lines + bus_all["under_construction"] = False + bus_all["tag_area"] = None + bus_all["symbol"] = "substation" + # TODO: this tag may be improved, maybe depending on voltage levels + bus_all["tag_substation"] = "transmission" + bus_all["tag_source"] = prefix + + buses = pd.concat([df_substations, bus_all], ignore_index=True) + buses.set_index("bus_id", inplace=True) + + # Fix country codes + # TODO pypsa-eur: Temporary solution as long as the shapes have a low, + # incomplete resolution (cf. 2500 meters for buffering) + bool_multiple_countries = buses["country"].str.contains(";") + gdf_offshore = gpd.read_file(path_offshore_shapes).set_index("name")["geometry"] + gdf_offshore = gpd.GeoDataFrame( + gdf_offshore, geometry=gdf_offshore, crs=gdf_offshore.crs + ) + gdf_countries = gpd.read_file(path_country_shapes).set_index("name")["geometry"] + # reproject to enable buffer + gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs=gdf_countries.crs) + gdf_union = gdf_countries.merge( + gdf_offshore, how="outer", left_index=True, right_index=True + ) + gdf_union["geometry"] = gdf_union.apply( + lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).union_all(), + axis=1, + ) + gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs) + gdf_buses_tofix = gpd.GeoDataFrame( + buses[bool_multiple_countries], geometry="geometry", crs=crs + ) + joined = gpd.sjoin( + gdf_buses_tofix, gdf_union.reset_index(), how="left", predicate="within" + ) + + # For all remaining rows where the country/index_right column is NaN, find + # find the closest polygon index + joined.loc[joined["name"].isna(), "name"] = joined.loc[ + joined["name"].isna(), "geometry" + ].apply(lambda x: _find_closest_polygon(gdf_union, x)) + + joined.reset_index(inplace=True) + joined = joined.drop_duplicates(subset="bus_id") + joined.set_index("bus_id", inplace=True) + + buses.loc[bool_multiple_countries, "country"] = joined.loc[ + bool_multiple_countries, "name" + ] + + return buses.reset_index() + + +def _import_lines_and_cables(path_lines): + """ + Import lines and cables from the given input paths. + + Parameters: + - path_lines (dict): A dictionary containing the input paths for lines and + cables data. + + Returns: + - df_lines (DataFrame): A DataFrame containing the imported lines and + cables data. + """ + columns = [ + "id", + "bounds", + "nodes", + "geometry", + "country", + "power", + "cables", + "circuits", + "frequency", + "voltage", + "wires", + ] + df_lines = pd.DataFrame(columns=columns) + + logger.info("Importing lines and cables") + for key in path_lines: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(path_lines[key]): + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(path_lines[key][idx])) + + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)}: {ip}" + ) + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data["elements"]) + df["id"] = df["id"].astype(str) + df["country"] = country + + col_tags = [ + "power", + "cables", + "circuits", + "frequency", + "voltage", + "wires", + ] + + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] + + df = pd.concat([df, tags], axis="columns") + df.drop(columns=["type", "tags"], inplace=True) + + df_lines = pd.concat([df_lines, df], axis="rows") + + else: + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)} (empty): {ip}" + ) + continue + logger.info("---") + + return df_lines + + +def _import_links(path_links): + """ + Import links from the given input paths. + + Parameters: + - path_links (dict): A dictionary containing the input paths for links. + + Returns: + - df_links (DataFrame): A DataFrame containing the imported links data. + """ + columns = [ + "id", + "bounds", + "nodes", + "geometry", + "country", + "circuits", + "frequency", + "rating", + "voltage", + ] + df_links = pd.DataFrame(columns=columns) + + logger.info("Importing links") + for key in path_links: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(path_links[key]): + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(path_links[key][idx])) + + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)}: {ip}" + ) + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data["elements"]) + df["id"] = df["id"].astype(str) + df["id"] = df["id"].apply(lambda x: (f"relation/{x}")) + df["country"] = country + + col_tags = [ + "circuits", + "frequency", + "rating", + "voltage", + ] + + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] + + df = pd.concat([df, tags], axis="columns") + df.drop(columns=["type", "tags"], inplace=True) + + df_links = pd.concat([df_links, df], axis="rows") + + else: + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)} (empty): {ip}" + ) + continue + logger.info("---") + logger.info("Dropping lines without rating.") + len_before = len(df_links) + df_links = df_links.dropna(subset=["rating"]) + len_after = len(df_links) + logger.info( + f"Dropped {len_before-len_after} elements without rating. " + + f"Imported {len_after} elements." + ) + + return df_links + + +def _create_single_link(row): + """ + Create a single link from multiple rows within a OSM link relation. + + Parameters: + - row: A row of OSM data containing information about the link. + + Returns: + - single_link: A single LineString representing the link. + + This function takes a row of OSM data and extracts the relevant information + to create a single link. It filters out elements (substations, electrodes) + with invalid roles and finds the longest link based on its endpoints. + If the longest link is a MultiLineString, it extracts the longest + linestring from it. The resulting single link is returned. + """ + valid_roles = ["line", "cable"] + df = pd.json_normalize(row["members"]) + df = df[df["role"].isin(valid_roles)] + df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1) + df.loc[:, "length"] = df["geometry"].apply(lambda x: x.length) + + list_endpoints = [] + for idx, row in df.iterrows(): + tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]]) + # round tuple to 3 decimals + tuple = ( + round(tuple[0][0], 3), + round(tuple[0][1], 3), + round(tuple[1][0], 3), + round(tuple[1][1], 3), + ) + list_endpoints.append(tuple) + + df.loc[:, "endpoints"] = list_endpoints + df_longest = df.loc[df.groupby("endpoints")["length"].idxmin()] + + single_link = linemerge(df_longest["geometry"].values.tolist()) + + # If the longest component is a MultiLineString, extract the longest linestring from it + if isinstance(single_link, MultiLineString): + # Find connected components + components = list(single_link.geoms) + + # Find the longest connected linestring + single_link = max(components, key=lambda x: x.length) + + return single_link + + +def _drop_duplicate_lines(df_lines): + """ + Drop duplicate lines from the given dataframe. Duplicates are usually lines + cross-border lines or slightly outside the country border of focus. + + Parameters: + - df_lines (pandas.DataFrame): The dataframe containing lines data. + + Returns: + - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed + and cleaned data. + + This function drops duplicate lines from the given dataframe based on the + 'id' column. It groups the duplicate rows by 'id' and aggregates the + 'country' column to a string split by semicolon, as they appear in multiple + country datasets. One example of the duplicates is kept, accordingly. + Finally, the updated dataframe without multiple duplicates is returned. + """ + logger.info("Dropping duplicate lines.") + duplicate_rows = df_lines[df_lines.duplicated(subset=["id"], keep=False)].copy() + + # Group rows by id and aggregate the country column to a string split by semicolon + grouped_duplicates = ( + duplicate_rows.groupby("id")["country"].agg(lambda x: ";".join(x)).reset_index() + ) + duplicate_rows.drop_duplicates(subset="id", inplace=True) + duplicate_rows.drop(columns=["country"], inplace=True) + duplicate_rows = duplicate_rows.join( + grouped_duplicates.set_index("id"), on="id", how="left" + ) + + len_before = len(df_lines) + # Drop duplicates and update the df_lines dataframe with the cleaned data + df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])] + df_lines = pd.concat([df_lines, duplicate_rows], axis="rows") + len_after = len(df_lines) + + logger.info( + f"Dropped {len_before - len_after} duplicate elements. " + + f"Keeping {len_after} elements." + ) + + return df_lines + + +def _filter_by_voltage(df, min_voltage=200000): + """ + Filter rows in the DataFrame based on the voltage in V. + + Parameters: + - df (pandas.DataFrame): The DataFrame containing the substations or lines data. + - min_voltage (int, optional): The minimum voltage value to filter the + rows. Defaults to 200000 [unit: V]. + + Returns: + - filtered df (pandas.DataFrame): The filtered DataFrame containing + the lines or substations above min_voltage. + - list_voltages (list): A list of unique voltage values above min_voltage. + The type of the list elements is string. + """ + if df.empty: + return df, [] + + logger.info( + f"Filtering dataframe by voltage. Only keeping rows above and including {min_voltage} V." + ) + list_voltages = df["voltage"].str.split(";").explode().unique().astype(str) + # Keep numeric strings + list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] + list_voltages = list_voltages.astype(int) + list_voltages = list_voltages[list_voltages >= int(min_voltage)] + list_voltages = list_voltages.astype(str) + + bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages) + len_before = len(df) + df = df[bool_voltages] + len_after = len(df) + logger.info( + f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. " + + f"Keeping {len_after} elements." + ) + + return df, list_voltages + + +def _clean_substations(df_substations, list_voltages): + """ + Clean the substation data by performing the following steps: + - Split cells in the dataframe. + - Filter substation data based on specified voltages. + - Update the frequency values based on the split count. + - Split cells in the 'frequency' column. + - Set remaining invalid frequency values that are not in ['0', '50'] + to '50'. + + Parameters: + - df_substations (pandas.DataFrame): The input dataframe containing + substation data. + - list_voltages (list): A list of voltages above min_voltage to filter the + substation data. + + Returns: + - df_substations (pandas.DataFrame): The cleaned substation dataframe. + """ + df_substations = df_substations.copy() + + df_substations = _split_cells(df_substations) + + bool_voltages = df_substations["voltage"].apply( + _check_voltage, list_voltages=list_voltages + ) + df_substations = df_substations[bool_voltages] + df_substations.loc[:, "split_count"] = df_substations["id"].apply( + lambda x: x.split("-")[1] if "-" in x else "0" + ) + df_substations.loc[:, "split_count"] = df_substations["split_count"].astype(int) + + bool_split = df_substations["split_elements"] > 1 + bool_frequency_len = ( + df_substations["frequency"].apply(lambda x: len(x.split(";"))) + == df_substations["split_elements"] + ) + + op_freq = lambda row: row["frequency"].split(";")[row["split_count"] - 1] + + df_substations.loc[bool_frequency_len & bool_split, "frequency"] = ( + df_substations.loc[bool_frequency_len & bool_split,].apply(op_freq, axis=1) + ) + + df_substations = _split_cells(df_substations, cols=["frequency"]) + bool_invalid_frequency = df_substations["frequency"].apply( + lambda x: x not in ["50", "0"] + ) + df_substations.loc[bool_invalid_frequency, "frequency"] = "50" + + return df_substations + + +def _clean_lines(df_lines, list_voltages): + """ + Cleans and processes the `df_lines` DataFrame heuristically based on the + information available per respective line and cable. Further checks to + ensure data consistency and completeness. + + Parameters + ---------- + df_lines : pandas.DataFrame + The input DataFrame containing line information with columns such as + 'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id', + etc. + list_voltages : list + A list of unique voltage values above a certain threshold. (type: str) + + Returns + ------- + df_lines : pandas.DataFrame + The cleaned DataFrame with updated columns 'circuits', 'frequency', and + 'cleaned' to reflect the applied transformations. + + Description + ----------- + This function performs the following operations: + + - Initializes a 'cleaned' column with False, step-wise updates to True + following the respective cleaning step. + - Splits the voltage cells in the DataFrame at semicolons using a helper + function `_split_cells`. + - Filters the DataFrame to only include rows with valid voltages. + - Sets circuits of remaining lines without any applicable heuristic equal + to 1. + + The function ensures that the resulting DataFrame has consistent and + complete information for further processing or analysis while maintaining + the data of the original OSM data set wherever possible. + """ + logger.info("Cleaning lines and determining circuits.") + # Initiate boolean with False, only set to true if all cleaning steps are + # passed + df_lines = df_lines.copy() + df_lines["cleaned"] = False + + df_lines["voltage_original"] = df_lines["voltage"] + df_lines["circuits_original"] = df_lines["circuits"] + + df_lines = _split_cells(df_lines) + bool_voltages = df_lines["voltage"].apply( + _check_voltage, list_voltages=list_voltages + ) + df_lines = df_lines[bool_voltages] + + bool_ac = df_lines["frequency"] != "0" + bool_dc = ~bool_ac + valid_frequency = ["50", "0"] + bool_invalid_frequency = df_lines["frequency"].apply( + lambda x: x not in valid_frequency + ) + + bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "") + # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit + df_lines.loc[bool_noinfo, "circuits"] = "1" + df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50" + df_lines.loc[bool_noinfo, "cleaned"] = True + + # Fill in all values where cables info exists and split_elements == 1 + bool_cables_ac = ( + (df_lines["cables"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["cables"] != "0") + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"] == "") + & (df_lines["cleaned"] == False) + & bool_ac + ) + + df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[ + bool_cables_ac, "cables" + ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 3))))) + + df_lines.loc[bool_cables_ac, "frequency"] = "50" + df_lines.loc[bool_cables_ac, "cleaned"] = True + + bool_cables_dc = ( + (df_lines["cables"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["cables"] != "0") + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"] == "") + & (df_lines["cleaned"] == False) + & bool_dc + ) + + df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[ + bool_cables_dc, "cables" + ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 2))))) + + df_lines.loc[bool_cables_dc, "frequency"] = "0" + df_lines.loc[bool_cables_dc, "cleaned"] = True + + # Fill in all values where circuits info exists and split_elements == 1 + bool_lines = ( + (df_lines["circuits"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["circuits"] != "0") + & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["cleaned"] == False) + ) + + df_lines.loc[bool_lines & bool_ac, "frequency"] = "50" + df_lines.loc[bool_lines & bool_dc, "frequency"] = "0" + df_lines.loc[bool_lines, "cleaned"] = True + + # Clean those values where number of voltages split by semicolon is larger + # than no cables or no circuits + bool_cables = ( + (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1)) + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["cleaned"] == False) + ) + + df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables].apply( + _distribute_to_circuits, axis=1 + ) + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # Clean those values where multiple circuit values are present, divided by + # semicolon + has_multiple_circuits = df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1) + circuits_match_split_elements = df_lines.apply( + lambda row: len(row["circuits"].split(";")) == row["split_elements"], + axis=1, + ) + is_not_cleaned = df_lines["cleaned"] == False + bool_cables = has_multiple_circuits & circuits_match_split_elements & is_not_cleaned + + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( + lambda row: str(row["circuits"].split(";")[int(row["id"].split("-")[-1]) - 1]), + axis=1, + ) + + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # Clean those values where multiple cables values are present, divided by + # semicolon + has_multiple_cables = df_lines["cables"].apply(lambda x: len(x.split(";")) > 1) + cables_match_split_elements = df_lines.apply( + lambda row: len(row["cables"].split(";")) == row["split_elements"], + axis=1, + ) + is_not_cleaned = df_lines["cleaned"] == False + bool_cables = has_multiple_cables & cables_match_split_elements & is_not_cleaned + + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( + lambda row: str( + max( + 1, + np.floor_divide( + int(row["cables"].split(";")[int(row["id"].split("-")[-1]) - 1]), 3 + ), + ) + ), + axis=1, + ) + + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # All remaining lines to circuits == 1 + bool_leftover = df_lines["cleaned"] == False + if sum(bool_leftover) > 0: + str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"]) + logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...") + logger.info(f"Lines affected: {str_id}") + + df_lines.loc[bool_leftover, "circuits"] = "1" + df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50" + df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0" + df_lines.loc[bool_leftover, "cleaned"] = True + + return df_lines + + +def _create_substations_geometry(df_substations): + """ + Creates geometries. + + Parameters: + df_substations (DataFrame): The input DataFrame containing the substations + data. + + Returns: + df_substations (DataFrame): A new DataFrame with the + polygons ["polygon"] of the substations geometries. + """ + logger.info("Creating substations geometry.") + df_substations = df_substations.copy() + + # Create centroids from geometries and keep the original polygons + df_substations.loc[:, "polygon"] = df_substations["geometry"] + + return df_substations + + +def _create_substations_centroid(df_substations): + """ + Creates centroids from geometries and keeps the original polygons. + + Parameters: + df_substations (DataFrame): The input DataFrame containing the substations + data. + + Returns: + df_substations (DataFrame): A new DataFrame with the centroids ["geometry"] + and polygons ["polygon"] of the substations geometries. + """ + logger.info("Creating substations geometry.") + df_substations = df_substations.copy() + + df_substations.loc[:, "geometry"] = df_substations["polygon"].apply( + lambda x: x.centroid + ) + + df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) + df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + + return df_substations + + +def _create_lines_geometry(df_lines): + """ + Create line geometry for the given DataFrame of lines. + + Parameters: + - df_lines (pandas.DataFrame): DataFrame containing lines data. + + Returns: + - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry' + column (type: shapely LineString). + + Notes: + - This function transforms 'geometry' column in the input DataFrame by + applying the '_create_linestring' function to each row. + - It then drops rows where the geometry has equal start and end points, + as these are usually not lines but outlines of areas. + """ + logger.info("Creating lines geometry.") + df_lines = df_lines.copy() + df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) + + bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) + df_lines = df_lines[~bool_circle] + + return df_lines + + +def _add_bus_centroid_to_line(linestring, point): + """ + Adds the centroid of a substation to a linestring by extending the + linestring with a new segment. + + Parameters: + linestring (LineString): The original linestring to extend. + point (Point): The centroid of the bus. + + Returns: + merged (LineString): The extended linestring with the new segment. + """ + start = linestring.coords[0] + end = linestring.coords[-1] + + dist_to_start = point.distance(Point(start)) + dist_to_end = point.distance(Point(end)) + + if dist_to_start < dist_to_end: + new_segment = LineString([point.coords[0], start]) + else: + new_segment = LineString([point.coords[0], end]) + + merged = linemerge([linestring, new_segment]) + + return merged + + +def _finalise_substations(df_substations): + """ + Finalises the substations column types. + + Args: + df_substations (pandas.DataFrame): The input DataFrame + containing substations data. + + Returns: + df_substations (pandas.DataFrame(): The DataFrame with finalised column + types and transformed data. + """ + logger.info("Finalising substations column types.") + df_substations = df_substations.copy() + # rename columns + df_substations.rename( + columns={ + "id": "bus_id", + "power": "symbol", + "substation": "tag_substation", + }, + inplace=True, + ) + + # Initiate new columns for subsequent build_osm_network step + df_substations.loc[:, "symbol"] = "substation" + df_substations.loc[:, "tag_substation"] = "transmission" + df_substations.loc[:, "dc"] = False + df_substations.loc[df_substations["frequency"] == "0", "dc"] = True + df_substations.loc[:, "under_construction"] = False + df_substations.loc[:, "station_id"] = None + df_substations.loc[:, "tag_area"] = None + df_substations.loc[:, "tag_source"] = df_substations["bus_id"] + + # Only included needed columns + df_substations = df_substations[ + [ + "bus_id", + "symbol", + "tag_substation", + "voltage", + "lon", + "lat", + "dc", + "under_construction", + "station_id", + "tag_area", + "country", + "geometry", + "polygon", + "tag_source", + ] + ] + + # Substation data types + df_substations["voltage"] = df_substations["voltage"].astype(int) + + return df_substations + + +def _finalise_lines(df_lines): + """ + Finalises the lines column types. + + Args: + df_lines (pandas.DataFrame): The input DataFrame containing lines data. + + Returns: + df_lines (pandas.DataFrame(): The DataFrame with finalised column types + and transformed data. + """ + logger.info("Finalising lines column types.") + df_lines = df_lines.copy() + # Rename columns + df_lines.rename( + columns={ + "id": "line_id", + "power": "tag_type", + "frequency": "tag_frequency", + }, + inplace=True, + ) + + # Initiate new columns for subsequent build_osm_network step + df_lines.loc[:, "bus0"] = None + df_lines.loc[:, "bus1"] = None + df_lines.loc[:, "length"] = None + df_lines.loc[:, "underground"] = False + df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False + df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True + df_lines.loc[:, "under_construction"] = False + df_lines.loc[:, "dc"] = False + df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False + df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True + + # Only include needed columns + df_lines = df_lines[ + [ + "line_id", + "circuits", + "tag_type", + "voltage", + "tag_frequency", + "bus0", + "bus1", + "length", + "underground", + "under_construction", + "dc", + "country", + "geometry", + ] + ] + + df_lines["circuits"] = df_lines["circuits"].astype(int) + df_lines["voltage"] = df_lines["voltage"].astype(int) + df_lines["tag_frequency"] = df_lines["tag_frequency"].astype(int) + + return df_lines + + +def _finalise_links(df_links): + """ + Finalises the links column types. + + Args: + df_links (pandas.DataFrame): The input DataFrame containing links data. + + Returns: + df_links (pandas.DataFrame(): The DataFrame with finalised column types + and transformed data. + """ + logger.info("Finalising links column types.") + df_links = df_links.copy() + # Rename columns + df_links.rename( + columns={ + "id": "link_id", + "rating": "p_nom", + }, + inplace=True, + ) + + # Initiate new columns for subsequent build_osm_network step + df_links["bus0"] = None + df_links["bus1"] = None + df_links["length"] = None + df_links["underground"] = True + df_links["under_construction"] = False + df_links["dc"] = True + + # Only include needed columns + df_links = df_links[ + [ + "link_id", + "voltage", + "p_nom", + "bus0", + "bus1", + "length", + "underground", + "under_construction", + "dc", + "country", + "geometry", + ] + ] + + df_links["p_nom"] = df_links["p_nom"].astype(int) + df_links["voltage"] = df_links["voltage"].astype(int) + + return df_links + + +def _import_substations(path_substations): + """ + Import substations from the given input paths. This function imports both + substations from OSM ways as well as relations that contain nested + information on the substations shape and electrical parameters. Ways and + relations are subsequently concatenated to form a single DataFrame + containing unique bus ids. + + Args: + path_substations (dict): A dictionary containing input paths for + substations. + + Returns: + pd.DataFrame: A DataFrame containing the imported substations data. + """ + cols_substations_way = [ + "id", + "geometry", + "country", + "power", + "substation", + "voltage", + "frequency", + ] + cols_substations_relation = [ + "id", + "country", + "power", + "substation", + "voltage", + "frequency", + ] + df_substations_way = pd.DataFrame(columns=cols_substations_way) + df_substations_relation = pd.DataFrame(columns=cols_substations_relation) + + logger.info("Importing substations") + for key in path_substations: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(path_substations[key]): + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(path_substations[key][idx])) + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)}: {ip}" + ) + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data["elements"]) + df["id"] = df["id"].astype(str) + # new string that adds "way/" to id + df["id"] = df["id"].apply( + lambda x: ( + f"way/{x}" if key == "substations_way" else f"relation/{x}" + ) + ) + df["country"] = country + + col_tags = ["power", "substation", "voltage", "frequency"] + + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] + + df = pd.concat([df, tags], axis="columns") + + if key == "substations_way": + df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True) + df_substations_way = pd.concat( + [df_substations_way, df], axis="rows" + ) + elif key == "substations_relation": + df.drop(columns=["type", "tags", "bounds"], inplace=True) + df_substations_relation = pd.concat( + [df_substations_relation, df], axis="rows" + ) + + else: + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)} (empty): {ip}" + ) + continue + logger.info("---") + + df_substations_way.drop_duplicates(subset="id", keep="first", inplace=True) + df_substations_relation.drop_duplicates(subset="id", keep="first", inplace=True) + + df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1) + + # Normalise the members column of df_substations_relation + cols_members = ["id", "type", "ref", "role", "geometry"] + df_substations_relation_members = pd.DataFrame(columns=cols_members) + + for index, row in df_substations_relation.iterrows(): + col_members = ["type", "ref", "role", "geometry"] + df = pd.json_normalize(row["members"]) + + for cm in col_members: + if cm not in df.columns: + df[cm] = pd.NA + + df = df.loc[:, col_members] + df["id"] = str(row["id"]) + df["ref"] = df["ref"].astype(str) + df = df[df["type"] != "node"] + df = df.dropna(subset=["geometry"]) + df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])] + df_substations_relation_members = pd.concat( + [df_substations_relation_members, df], axis="rows" + ) + + df_substations_relation_members.reset_index(inplace=True) + df_substations_relation_members["linestring"] = ( + df_substations_relation_members.apply(_create_linestring, axis=1) + ) + df_substations_relation_members_grouped = ( + df_substations_relation_members.groupby("id")["linestring"] + .apply(lambda x: linemerge(x.tolist())) + .reset_index() + ) + df_substations_relation_members_grouped["geometry"] = ( + df_substations_relation_members_grouped["linestring"].apply( + lambda x: x.convex_hull + ) + ) + + df_substations_relation = ( + df_substations_relation.join( + df_substations_relation_members_grouped.set_index("id"), on="id", how="left" + ) + .drop(columns=["members", "linestring"]) + .dropna(subset=["geometry"]) + ) + + # reorder columns and concatenate + df_substations_relation = df_substations_relation[cols_substations_way] + df_substations = pd.concat( + [df_substations_way, df_substations_relation], axis="rows" + ) + + return df_substations + + +def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): + """ + Removes lines that are within substation polygons from the given + GeoDataFrame of lines. These are not needed to create network (e.g. bus + bars, switchgear, etc.) + + Parameters: + - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id' + and 'geometry' columns. + - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing + substation polygons. + + Returns: + GeoDataFrame: A new GeoDataFrame without lines within substation polygons. + """ + logger.info("Identifying and removing lines within substation polygons...") + gdf = gpd.sjoin( + gdf_lines[["line_id", "geometry"]], + gdf_substations_polygon, + how="inner", + predicate="within", + )["line_id"] + + logger.info( + f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines." + ) + gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(gdf)] + + return gdf_lines + + +def _merge_touching_polygons(df): + """ + Merge touching polygons in a GeoDataFrame. + + Parameters: + - df: pandas.DataFrame or geopandas.GeoDataFrame + The input DataFrame containing the polygons to be merged. + + Returns: + - gdf: geopandas.GeoDataFrame + The GeoDataFrame with merged polygons. + """ + + gdf = gpd.GeoDataFrame(df, geometry="polygon", crs=crs) + combined_polygons = unary_union(gdf.geometry) + if combined_polygons.geom_type == "MultiPolygon": + gdf_combined = gpd.GeoDataFrame( + geometry=[poly for poly in combined_polygons.geoms], crs=crs + ) + else: + gdf_combined = gpd.GeoDataFrame(geometry=[combined_polygons], crs=crs) + + gdf.reset_index(drop=True, inplace=True) + + for i, combined_geom in gdf_combined.iterrows(): + mask = gdf.intersects(combined_geom.geometry) + gdf.loc[mask, "polygon_merged"] = combined_geom.geometry + + gdf.drop(columns=["polygon"], inplace=True) + gdf.rename(columns={"polygon_merged": "polygon"}, inplace=True) + + return gdf + + +def _add_endpoints_to_line(linestring, polygon_dict): + """ + Adds endpoints to a line by removing any overlapping areas with polygons. + + Parameters: + linestring (LineString): The original line to add endpoints to. + polygon_dict (dict): A dictionary of polygons, where the keys are bus IDs and the values are the corresponding polygons. + + Returns: + LineString: The modified line with added endpoints. + """ + if not polygon_dict: + return linestring + polygon_centroids = { + bus_id: polygon.centroid for bus_id, polygon in polygon_dict.items() + } + polygon_unary = polygons = unary_union(list(polygon_dict.values())) + + # difference with polygon + linestring_new = linestring.difference(polygon_unary) + + if type(linestring_new) == MultiLineString: + # keep the longest line in the multilinestring + linestring_new = max(linestring_new.geoms, key=lambda x: x.length) + + for p in polygon_centroids: + linestring_new = _add_bus_centroid_to_line(linestring_new, polygon_centroids[p]) + + return linestring_new + + +def _get_polygons_at_endpoints(linestring, polygon_dict): + """ + Get the polygons that contain the endpoints of a given linestring. + + Parameters: + linestring (LineString): The linestring for which to find the polygons at the endpoints. + polygon_dict (dict): A dictionary containing polygons as values, with bus_ids as keys. + + Returns: + dict: A dictionary containing bus_ids as keys and polygons as values, where the polygons contain the endpoints of the linestring. + """ + # Get the endpoints of the linestring + start_point = Point(linestring.coords[0]) + end_point = Point(linestring.coords[-1]) + + # Initialize dictionary to store bus_ids as keys and polygons as values + bus_id_polygon_dict = {} + + for bus_id, polygon in polygon_dict.items(): + if polygon.contains(start_point) or polygon.contains(end_point): + bus_id_polygon_dict[bus_id] = polygon + + return bus_id_polygon_dict + + +def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): + """ + Extends the lines in the given GeoDataFrame `gdf_lines` to the centroid of + the nearest substations represented by the polygons in the + `gdf_substations_polygon` GeoDataFrame. + + Parameters: + gdf_lines (GeoDataFrame): A GeoDataFrame containing the lines to be extended. + gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing the polygons representing substations. + + Returns: + GeoDataFrame: A new GeoDataFrame with the lines extended to the substations. + """ + gdf = gpd.sjoin( + gdf_lines, + gdf_substations_polygon.drop_duplicates(subset="polygon", inplace=False), + how="left", + lsuffix="line", + rsuffix="bus", + predicate="intersects", + ).drop(columns="index_bus") + + # Group by 'line_id' and create a dictionary mapping 'bus_id' to 'geometry_bus', excluding the grouping columns + gdf = ( + gdf.groupby("line_id") + .apply( + lambda x: x[["bus_id", "geometry_bus"]] + .dropna() + .set_index("bus_id")["geometry_bus"] + .to_dict(), + include_groups=False, + ) + .reset_index() + ) + gdf.columns = ["line_id", "bus_dict"] + + gdf["intersects_bus"] = gdf.apply(lambda row: len(row["bus_dict"]) > 0, axis=1) + + gdf.loc[:, "line_geometry"] = gdf.join( + gdf_lines.set_index("line_id")["geometry"], on="line_id" + )["geometry"] + + # Polygons at the endpoints of the linestring + gdf["bus_endpoints"] = gdf.apply( + lambda row: _get_polygons_at_endpoints(row["line_geometry"], row["bus_dict"]), + axis=1, + ) + + gdf.loc[:, "line_geometry_new"] = gdf.apply( + lambda row: _add_endpoints_to_line(row["line_geometry"], row["bus_endpoints"]), + axis=1, + ) + + gdf.set_index("line_id", inplace=True) + gdf_lines.set_index("line_id", inplace=True) + + gdf_lines.loc[:, "geometry"] = gdf["line_geometry_new"] + + return gdf_lines + + +# Function to bridge gaps between all lines + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("clean_osm_data") + + configure_logging(snakemake) + set_scenario_config(snakemake) + + # Parameters + crs = "EPSG:4326" # Correct crs for OSM data + min_voltage_ac = 200000 # [unit: V] Minimum voltage value to filter AC lines. + min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. + + logger.info("---") + logger.info("SUBSTATIONS") + # Input + path_substations = { + "substations_way": snakemake.input.substations_way, + "substations_relation": snakemake.input.substations_relation, + } + + # Cleaning process + df_substations = _import_substations(path_substations) + df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) + df_substations, list_voltages = _filter_by_voltage( + df_substations, min_voltage=min_voltage_ac + ) + df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) + df_substations = _clean_substations(df_substations, list_voltages) + df_substations = _create_substations_geometry(df_substations) + + # Merge touching polygons + df_substations = _merge_touching_polygons(df_substations) + df_substations = _create_substations_centroid(df_substations) + df_substations = _finalise_substations(df_substations) + + # Create polygon GeoDataFrame to remove lines within substations + gdf_substations_polygon = gpd.GeoDataFrame( + df_substations[["bus_id", "polygon", "voltage"]], + geometry="polygon", + crs=crs, + ) + + gdf_substations_polygon["geometry"] = gdf_substations_polygon.polygon.copy() + + logger.info("---") + logger.info("LINES AND CABLES") + path_lines = { + "lines": snakemake.input.lines_way, + "cables": snakemake.input.cables_way, + } + + # Cleaning process + df_lines = _import_lines_and_cables(path_lines) + df_lines = _drop_duplicate_lines(df_lines) + df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"]) + df_lines, list_voltages = _filter_by_voltage(df_lines, min_voltage=min_voltage_ac) + df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) + df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) + df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) + df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) + df_lines = _clean_lines(df_lines, list_voltages) + + # Drop DC lines, will be added through relations later + len_before = len(df_lines) + df_lines = df_lines[df_lines["frequency"] == "50"] + len_after = len(df_lines) + logger.info( + f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines." + ) + + df_lines = _create_lines_geometry(df_lines) + df_lines = _finalise_lines(df_lines) + + # Create GeoDataFrame + gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs) + gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) + gdf_lines = _extend_lines_to_substations(gdf_lines, gdf_substations_polygon) + + logger.info("---") + logger.info("HVDC LINKS") + path_links = { + "links": snakemake.input.links_relation, + } + + df_links = _import_links(path_links) + + df_links = _drop_duplicate_lines(df_links) + df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) + df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) + # Keep only highest voltage of split string + df_links.loc[:, "voltage"] = df_links["voltage"].apply( + lambda x: str(max(map(int, x.split(";")))) + ) + df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"]) + df_links.loc[:, "rating"] = _clean_rating(df_links["rating"]) + + df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1) + df_links = _finalise_links(df_links) + gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs).set_index( + "link_id" + ) + + # Add line endings to substations + path_country_shapes = snakemake.input.country_shapes + path_offshore_shapes = snakemake.input.offshore_shapes + + df_substations = _add_line_endings_to_substations( + df_substations, + gdf_lines, + path_country_shapes, + path_offshore_shapes, + prefix="line-end", + ) + + df_substations = _add_line_endings_to_substations( + df_substations, + gdf_links, + path_country_shapes, + path_offshore_shapes, + prefix="link-end", + ) + + # Drop polygons and create GDF + gdf_substations = gpd.GeoDataFrame( + df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs + ) + + output_substations_polygon = snakemake.output["substations_polygon"] + output_substations = snakemake.output["substations"] + output_lines = snakemake.output["lines"] + output_links = snakemake.output["links"] + + logger.info( + f"Exporting clean substations with polygon shapes to {output_substations_polygon}" + ) + gdf_substations_polygon.drop(columns=["geometry"]).to_file( + output_substations_polygon, driver="GeoJSON" + ) + logger.info(f"Exporting clean substations to {output_substations}") + gdf_substations.to_file(output_substations, driver="GeoJSON") + logger.info(f"Exporting clean lines to {output_lines}") + gdf_lines.to_file(output_lines, driver="GeoJSON") + logger.info(f"Exporting clean links to {output_links}") + gdf_links.to_file(output_links, driver="GeoJSON") + + logger.info("Cleaning OSM data completed.") diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 79184167e..b63747c24 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -36,7 +36,7 @@ - ``resources/regions_offshore_elec_s{simpl}.geojson``: confer :ref:`simplify` - ``resources/busmap_elec_s{simpl}.csv``: confer :ref:`simplify` - ``networks/elec_s{simpl}.nc``: confer :ref:`simplify` -- ``data/custom_busmap_elec_s{simpl}_{clusters}.csv``: optional input +- ``data/custom_busmap_elec_s{simpl}_{clusters}_{base_network}.csv``: optional input Outputs ------- @@ -511,9 +511,7 @@ def plot_busmap_for_n_clusters(n, n_clusters, solver_name="scip", fn=None): # Fast-path if no clustering is necessary busmap = n.buses.index.to_series() linemap = n.lines.index.to_series() - clustering = pypsa.clustering.spatial.Clustering( - n, busmap, linemap, linemap, pd.Series(dtype="O") - ) + clustering = pypsa.clustering.spatial.Clustering(n, busmap, linemap) else: Nyears = n.snapshot_weightings.objective.sum() / 8760 diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py new file mode 100644 index 000000000..ac6b25354 --- /dev/null +++ b/scripts/prepare_osm_network_release.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +import logging +import os + +import pandas as pd +import pypsa +from _helpers import configure_logging, set_scenario_config + +logger = logging.getLogger(__name__) + + +BUSES_COLUMNS = [ + "bus_id", + "voltage", + "dc", + "symbol", + "under_construction", + "x", + "y", + "country", + "geometry", +] +LINES_COLUMNS = [ + "line_id", + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", +] +LINKS_COLUMNS = [ + "link_id", + "bus0", + "bus1", + "voltage", + "p_nom", + "length", + "underground", + "under_construction", + "geometry", +] +TRANSFORMERS_COLUMNS = [ + "transformer_id", + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "geometry", +] +CONVERTERS_COLUMNS = [ + "converter_id", + "bus0", + "bus1", + "voltage", + "geometry", +] + + +def export_clean_csv(df, columns, output_file): + """ + Export a cleaned DataFrame to a CSV file. + + Args: + df (pandas.DataFrame): The DataFrame to be exported. + columns (list): A list of column names to include in the exported CSV file. + output_file (str): The path to the output CSV file. + + Returns: + None + """ + rename_dict = { + "Bus": "bus_id", + "Line": "line_id", + "Link": "link_id", + "Transformer": "transformer_id", + "v_nom": "voltage", + "num_parallel": "circuits", + } + + if "converter_id" in columns: + rename_dict["Link"] = "converter_id" + + df.reset_index().rename(columns=rename_dict).loc[:, columns].replace( + {True: "t", False: "f"} + ).to_csv(output_file, index=False, quotechar="'") + + return None + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("prepare_osm_network_release") + + configure_logging(snakemake) + set_scenario_config(snakemake) + + network = pypsa.Network(snakemake.input.base_network) + + network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"}) + network.lines.length = network.lines.length * 1e3 + network.links.length = network.links.length * 1e3 + + # Export to clean csv for release + logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses) + export_clean_csv(network.buses, BUSES_COLUMNS, snakemake.output.buses) + + logger.info( + f"Exporting {len(network.transformers)} transformers to %s", + snakemake.output.transformers, + ) + export_clean_csv( + network.transformers, TRANSFORMERS_COLUMNS, snakemake.output.transformers + ) + + logger.info(f"Exporting {len(network.lines)} lines to %s", snakemake.output.lines) + export_clean_csv(network.lines, LINES_COLUMNS, snakemake.output.lines) + + # Boolean that specifies if link element is a converter + is_converter = network.links.index.str.startswith("conv") == True + + logger.info( + f"Exporting {len(network.links[~is_converter])} links to %s", + snakemake.output.links, + ) + export_clean_csv( + network.links[~is_converter], LINKS_COLUMNS, snakemake.output.links + ) + + logger.info( + f"Exporting {len(network.links[is_converter])} converters to %s", + snakemake.output.converters, + ) + export_clean_csv( + network.links[is_converter], CONVERTERS_COLUMNS, snakemake.output.converters + ) + + logger.info("Export of OSM network for release complete.") diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py index 71b504391..b6644d6dc 100644 --- a/scripts/prepare_sector_network.py +++ b/scripts/prepare_sector_network.py @@ -539,7 +539,24 @@ def add_carrier_buses(n, carrier, nodes=None): unit = "MWh_LHV" if carrier == "gas" else "MWh_th" # preliminary value for non-gas carriers to avoid zeros - capital_cost = costs.at["gas storage", "fixed"] if carrier == "gas" else 0.02 + if carrier == "gas": + capital_cost = costs.at["gas storage", "fixed"] + elif carrier == "oil": + # based on https://www.engineeringtoolbox.com/fuels-higher-calorific-values-d_169.html + mwh_per_m3 = 44.9 * 724 * 0.278 * 1e-3 # MJ/kg * kg/m3 * kWh/MJ * MWh/kWh + capital_cost = ( + costs.at["General liquid hydrocarbon storage (product)", "fixed"] + / mwh_per_m3 + ) + elif carrier == "methanol": + # based on https://www.engineeringtoolbox.com/fossil-fuels-energy-content-d_1298.html + mwh_per_m3 = 5.54 * 791 * 1e-3 # kWh/kg * kg/m3 * MWh/kWh + capital_cost = ( + costs.at["General liquid hydrocarbon storage (product)", "fixed"] + / mwh_per_m3 + ) + else: + capital_cost = 0.1 n.madd("Bus", nodes, location=location, carrier=carrier, unit=unit) @@ -2999,24 +3016,7 @@ def add_industry(n, costs): # methanol for industry - n.madd( - "Bus", - spatial.methanol.nodes, - carrier="methanol", - location=spatial.methanol.locations, - unit="MWh_LHV", - ) - - n.madd( - "Store", - spatial.methanol.nodes, - suffix=" Store", - bus=spatial.methanol.nodes, - e_nom_extendable=True, - e_cyclic=True, - carrier="methanol", - capital_cost=0.02, - ) + add_carrier_buses(n, "methanol") n.madd( "Bus", @@ -3187,38 +3187,10 @@ def add_industry(n, costs): ], # CO2 intensity methanol based on stoichiometric calculation with 22.7 GJ/t methanol (32 g/mol), CO2 (44 g/mol), 277.78 MWh/TJ = 0.218 t/MWh ) - if "oil" not in n.buses.carrier.unique(): - n.madd( - "Bus", - spatial.oil.nodes, - location=spatial.oil.locations, - carrier="oil", - unit="MWh_LHV", - ) - - if "oil" not in n.stores.carrier.unique(): - # could correct to e.g. 0.001 EUR/kWh * annuity and O&M - n.madd( - "Store", - spatial.oil.nodes, - suffix=" Store", - bus=spatial.oil.nodes, - e_nom_extendable=True, - e_cyclic=True, - carrier="oil", - ) + if shipping_oil_share: - if options.get("fossil_fuels", True) and "oil" not in n.generators.carrier.unique(): - n.madd( - "Generator", - spatial.oil.nodes, - bus=spatial.oil.nodes, - p_nom_extendable=True, - carrier="oil", - marginal_cost=costs.at["oil", "fuel"], - ) + add_carrier_buses(n, "oil") - if shipping_oil_share: p_set_oil = shipping_oil_share * p_set.rename(lambda x: x + " shipping oil") if not options["regional_oil_demand"]: diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py new file mode 100644 index 000000000..745533cff --- /dev/null +++ b/scripts/retrieve_osm_data.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +Retrieve OSM data for the specified country using the overpass API and save it +to the specified output files. + +Note that overpass requests are based on a fair +use policy. `retrieve_osm_data` is meant to be used in a way that respects this +policy by fetching the needed data once, only. +""" + +import json +import logging +import os +import time + +import requests +from _helpers import ( # set_scenario_config,; update_config_from_wildcards,; update_config_from_wildcards, + configure_logging, + set_scenario_config, +) + +logger = logging.getLogger(__name__) + + +def retrieve_osm_data( + country, + output, + features=[ + "cables_way", + "lines_way", + "links_relation", + "substations_way", + "substations_relation", + ], +): + """ + Retrieve OSM data for the specified country and save it to the specified + output files. + + Parameters + ---------- + country : str + The country code for which the OSM data should be retrieved. + output : dict + A dictionary mapping feature names to the corresponding output file + paths. Saving the OSM data to .json files. + features : list, optional + A list of OSM features to retrieve. The default is [ + "cables_way", + "lines_way", + "substations_way", + "substations_relation", + ]. + """ + # Overpass API endpoint URL + overpass_url = "https://overpass-api.de/api/interpreter" + + features_dict = { + "cables_way": 'way["power"="cable"]', + "lines_way": 'way["power"="line"]', + "links_relation": 'relation["route"="power"]["frequency"="0"]', + "substations_way": 'way["power"="substation"]', + "substations_relation": 'relation["power"="substation"]', + } + + wait_time = 5 + + for f in features: + if f not in features_dict: + logger.info( + f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}" + ) + raise ValueError( + f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}" + ) + + retries = 3 + for attempt in range(retries): + logger.info( + f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})..." + ) + + # Build the overpass query + op_area = f'area["ISO3166-1"="{country}"]' + op_query = f""" + [out:json]; + {op_area}->.searchArea; + ( + {features_dict[f]}(area.searchArea); + ); + out body geom; + """ + try: + # Send the request + response = requests.post(overpass_url, data=op_query) + response.raise_for_status() # Raise HTTPError for bad responses + data = response.json() + + filepath = output[f] + parentfolder = os.path.dirname(filepath) + if not os.path.exists(parentfolder): + os.makedirs(parentfolder) + + with open(filepath, mode="w") as f: + json.dump(response.json(), f, indent=2) + logger.info(" - Done.") + break # Exit the retry loop on success + except (json.JSONDecodeError, requests.exceptions.RequestException) as e: + logger.error(f"Error for feature '{f}' in country {country}: {e}") + logger.debug( + f"Response text: {response.text if response else 'No response'}" + ) + if attempt < retries - 1: + wait_time += 15 + logger.info(f"Waiting {wait_time} seconds before retrying...") + time.sleep(wait_time) + else: + logger.error( + f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." + ) + except Exception as e: + # For now, catch any other exceptions and log them. Treat this + # the same as a RequestException and try to run again two times. + logger.error( + f"Unexpected error for feature '{f}' in country {country}: {e}" + ) + if attempt < retries - 1: + wait_time += 10 + logger.info(f"Waiting {wait_time} seconds before retrying...") + time.sleep(wait_time) + else: + logger.error( + f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." + ) + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("retrieve_osm_data", country="BE") + configure_logging(snakemake) + set_scenario_config(snakemake) + + # Retrieve the OSM data + country = snakemake.wildcards.country + output = snakemake.output + + retrieve_osm_data(country, output) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 9db3435bf..741fd324c 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -108,7 +108,7 @@ logger = logging.getLogger(__name__) -def simplify_network_to_380(n): +def simplify_network_to_380(n, linetype_380): """ Fix all lines to a voltage level of 380 kV and remove all transformers. @@ -132,8 +132,8 @@ def simplify_network_to_380(n): trafo_map = pd.Series(n.transformers.bus1.values, n.transformers.bus0.values) trafo_map = trafo_map[~trafo_map.index.duplicated(keep="first")] - several_trafo_b = trafo_map.isin(trafo_map.index) - trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map) + while (several_trafo_b := trafo_map.isin(trafo_map.index)).any(): + trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map) missing_buses_i = n.buses.index.difference(trafo_map.index) missing = pd.Series(missing_buses_i, missing_buses_i) trafo_map = pd.concat([trafo_map, missing]) @@ -600,7 +600,8 @@ def find_closest_bus(n, x, y, tol=2000): # remove integer outputs for compatibility with PyPSA v0.26.0 n.generators.drop("n_mod", axis=1, inplace=True, errors="ignore") - n, trafo_map = simplify_network_to_380(n) + linetype_380 = snakemake.config["lines"]["types"][380] + n, trafo_map = simplify_network_to_380(n, linetype_380) technology_costs = load_costs( snakemake.input.tech_costs,