diff --git a/Snakefile b/Snakefile
index be2545ada..eb99437bf 100644
--- a/Snakefile
+++ b/Snakefile
@@ -54,6 +54,7 @@ include: "rules/build_sector.smk"
 include: "rules/solve_electricity.smk"
 include: "rules/postprocess.smk"
 include: "rules/validate.smk"
+include: "rules/development.smk"
 
 
 if config["foresight"] == "overnight":
diff --git a/config/config.default.yaml b/config/config.default.yaml
index 9d565b633..0fd6e9869 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -74,7 +74,6 @@ enable:
   custom_busmap: false
   drop_leap_day: true
 
-
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget
 co2_budget:
   2020: 0.701
@@ -87,7 +86,8 @@ co2_budget:
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity
 electricity:
-  voltages: [220., 300., 380., 500., 750.]
+  voltages: [200., 220., 300., 380., 500., 750.]
+  base_network: entsoegridkit
   gaslimit_enable: false
   gaslimit: false
   co2limit_enable: false
@@ -276,6 +276,7 @@ conventional:
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines
 lines:
   types:
+    200.: "Al/St 240/40 2-bundle 200.0"
     220.: "Al/St 240/40 2-bundle 220.0"
     300.: "Al/St 240/40 3-bundle 300.0"
     380.: "Al/St 240/40 4-bundle 380.0"
diff --git a/doc/configtables/electricity.csv b/doc/configtables/electricity.csv
index ee733660c..9bad7bfc6 100644
--- a/doc/configtables/electricity.csv
+++ b/doc/configtables/electricity.csv
@@ -1,5 +1,6 @@
 ,Unit,Values,Description
-voltages,kV,"Any subset of {220., 300., 380.}",Voltage levels to consider
+voltages,kV,"Any subset of {200., 220., 300., 380., 500., 750.}",Voltage levels to consider
+base_network, --, "Any value in {'entsoegridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer."
 gaslimit_enable,bool,true or false,Add an overall absolute gas limit configured in ``electricity: gaslimit``.
 gaslimit,MWhth,float or false,Global gas usage limit
 co2limit_enable,bool,true or false,Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit`` in :mod:`prepare_network`. **Warning:** This option should currently only be used with electricity-only networks, not for sector-coupled networks..
diff --git a/doc/configtables/enable.csv b/doc/configtables/enable.csv
index 0268319eb..5359131de 100644
--- a/doc/configtables/enable.csv
+++ b/doc/configtables/enable.csv
@@ -4,5 +4,5 @@ retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zen
 retrieve_cost_data,bool,"{true, false}","Switch to retrieve technology cost data from `technology-data repository <https://github.com/PyPSA/technology-data>`_."
 build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`."
 retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`."
-custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``."
+custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``. {base_network} is the name of the selected base_network in electricity, e.g. ``gridkit``, ``osm-prebuilt``, or ``osm-raw``."
 drop_leap_day,bool,"{true, false}","Switch to drop February 29 from all time-dependent data in leap years"
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 3ee042840..4df38b019 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -74,6 +74,8 @@ Upcoming Release
 
 * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files.
 
+* Added new major feature to create the base_network from OpenStreetMap (OSM) data (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity["base_network"]``, the base network can be set to "entsoegridkit" (original default setting, deprecated soon), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes.
+
 * Updated pre-built `weather data cutouts
   <https://zenodo.org/records/12791128>`__. These are now merged cutouts with
   solar irradiation from the new SARAH-3 dataset while taking all other
@@ -89,6 +91,8 @@ Upcoming Release
 * In :mod:`base_network`, replace own voronoi polygon calculation function with
   Geopandas `gdf.voronoi_polygons` method.
 
+*  Move custom busmaps to ```data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv``` (if enabled). This allows for different busmaps depending on the base network and scenario.
+
 PyPSA-Eur 0.11.0 (25th May 2024)
 =====================================
 
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 9115ccd97..98cf858d6 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -47,6 +47,7 @@ dependencies:
 - pyxlsb
 - graphviz
 - pre-commit
+- geojson
 
   # Keep in conda environment when calling ipython
 - ipython
diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
index f35050917..1f568e99c 100644
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@@ -50,6 +50,19 @@ rule build_powerplants:
         "../scripts/build_powerplants.py"
 
 
+def input_base_network(w):
+    base_network = config_provider("electricity", "base_network")(w)
+    components = {"buses", "lines", "links", "converters", "transformers"}
+    if base_network == "osm-raw":
+        inputs = {c: resources(f"osm-raw/build/{c}.csv") for c in components}
+    else:
+        inputs = {c: f"data/{base_network}/{c}.csv" for c in components}
+    if base_network == "entsoegridkit":
+        inputs["parameter_corrections"] = "data/parameter_corrections.yaml"
+        inputs["links_p_nom"] = "data/links_p_nom.csv"
+    return inputs
+
+
 rule base_network:
     params:
         countries=config_provider("countries"),
@@ -58,13 +71,7 @@ rule base_network:
         lines=config_provider("lines"),
         transformers=config_provider("transformers"),
     input:
-        eg_buses="data/entsoegridkit/buses.csv",
-        eg_lines="data/entsoegridkit/lines.csv",
-        eg_links="data/entsoegridkit/links.csv",
-        eg_converters="data/entsoegridkit/converters.csv",
-        eg_transformers="data/entsoegridkit/transformers.csv",
-        parameter_corrections="data/parameter_corrections.yaml",
-        links_p_nom="data/links_p_nom.csv",
+        unpack(input_base_network),
         country_shapes=resources("country_shapes.geojson"),
         offshore_shapes=resources("offshore_shapes.geojson"),
         europe_shape=resources("europe_shape.geojson"),
@@ -523,6 +530,15 @@ rule simplify_network:
         "../scripts/simplify_network.py"
 
 
+# Optional input when using custom busmaps - Needs to be tailored to selected base_network
+def input_cluster_network(w):
+    if config_provider("enable", "custom_busmap", default=False)(w):
+        base_network = config_provider("electricity", "base_network")(w)
+        custom_busmap = f"data/busmaps/elec_s{w.simpl}_{w.clusters}_{base_network}.csv"
+        return {"custom_busmap": custom_busmap}
+    return {"custom_busmap": []}
+
+
 rule cluster_network:
     params:
         cluster_network=config_provider("clustering", "cluster_network"),
@@ -539,15 +555,11 @@ rule cluster_network:
         length_factor=config_provider("lines", "length_factor"),
         costs=config_provider("costs"),
     input:
+        unpack(input_cluster_network),
         network=resources("networks/elec_s{simpl}.nc"),
         regions_onshore=resources("regions_onshore_elec_s{simpl}.geojson"),
         regions_offshore=resources("regions_offshore_elec_s{simpl}.geojson"),
         busmap=ancient(resources("busmap_elec_s{simpl}.csv")),
-        custom_busmap=lambda w: (
-            "data/custom_busmap_elec_s{simpl}_{clusters}.csv"
-            if config_provider("enable", "custom_busmap", default=False)(w)
-            else []
-        ),
         tech_costs=lambda w: resources(
             f"costs_{config_provider('costs', 'year')(w)}.csv"
         ),
@@ -629,3 +641,79 @@ rule prepare_network:
         "../envs/environment.yaml"
     script:
         "../scripts/prepare_network.py"
+
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule clean_osm_data:
+        input:
+            cables_way=expand(
+                "data/osm-raw/{country}/cables_way.json",
+                country=config_provider("countries"),
+            ),
+            lines_way=expand(
+                "data/osm-raw/{country}/lines_way.json",
+                country=config_provider("countries"),
+            ),
+            links_relation=expand(
+                "data/osm-raw/{country}/links_relation.json",
+                country=config_provider("countries"),
+            ),
+            substations_way=expand(
+                "data/osm-raw/{country}/substations_way.json",
+                country=config_provider("countries"),
+            ),
+            substations_relation=expand(
+                "data/osm-raw/{country}/substations_relation.json",
+                country=config_provider("countries"),
+            ),
+            offshore_shapes=resources("offshore_shapes.geojson"),
+            country_shapes=resources("country_shapes.geojson"),
+        output:
+            substations=resources("osm-raw/clean/substations.geojson"),
+            substations_polygon=resources("osm-raw/clean/substations_polygon.geojson"),
+            lines=resources("osm-raw/clean/lines.geojson"),
+            links=resources("osm-raw/clean/links.geojson"),
+        log:
+            logs("clean_osm_data.log"),
+        benchmark:
+            benchmarks("clean_osm_data")
+        threads: 1
+        resources:
+            mem_mb=4000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/clean_osm_data.py"
+
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule build_osm_network:
+        input:
+            substations=resources("osm-raw/clean/substations.geojson"),
+            lines=resources("osm-raw/clean/lines.geojson"),
+            links=resources("osm-raw/clean/links.geojson"),
+            country_shapes=resources("country_shapes.geojson"),
+        output:
+            lines=resources("osm-raw/build/lines.csv"),
+            links=resources("osm-raw/build/links.csv"),
+            converters=resources("osm-raw/build/converters.csv"),
+            transformers=resources("osm-raw/build/transformers.csv"),
+            substations=resources("osm-raw/build/buses.csv"),
+            lines_geojson=resources("osm-raw/build/geojson/lines.geojson"),
+            links_geojson=resources("osm-raw/build/geojson/links.geojson"),
+            converters_geojson=resources("osm-raw/build/geojson/converters.geojson"),
+            transformers_geojson=resources("osm-raw/build/geojson/transformers.geojson"),
+            substations_geojson=resources("osm-raw/build/geojson/buses.geojson"),
+        log:
+            logs("build_osm_network.log"),
+        benchmark:
+            benchmarks("build_osm_network")
+        threads: 1
+        resources:
+            mem_mb=4000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/build_osm_network.py"
diff --git a/rules/development.smk b/rules/development.smk
new file mode 100644
index 000000000..465490258
--- /dev/null
+++ b/rules/development.smk
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule prepare_osm_network_release:
+        input:
+            base_network=resources("networks/base.nc"),
+        output:
+            buses=resources("osm-raw/release/buses.csv"),
+            converters=resources("osm-raw/release/converters.csv"),
+            lines=resources("osm-raw/release/lines.csv"),
+            links=resources("osm-raw/release/links.csv"),
+            transformers=resources("osm-raw/release/transformers.csv"),
+        log:
+            logs("prepare_osm_network_release.log"),
+        benchmark:
+            benchmarks("prepare_osm_network_release")
+        threads: 1
+        resources:
+            mem_mb=1000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/prepare_osm_network_release.py"
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index de84b2fae..c30696ccf 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -390,3 +390,85 @@ if config["enable"]["retrieve"]:
             "../envs/retrieve.yaml"
         script:
             "../scripts/retrieve_monthly_fuel_prices.py"
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-prebuilt"
+):
+
+    rule retrieve_osm_prebuilt:
+        input:
+            buses=storage("https://zenodo.org/records/13358976/files/buses.csv"),
+            converters=storage(
+                "https://zenodo.org/records/13358976/files/converters.csv"
+            ),
+            lines=storage("https://zenodo.org/records/13358976/files/lines.csv"),
+            links=storage("https://zenodo.org/records/13358976/files/links.csv"),
+            transformers=storage(
+                "https://zenodo.org/records/13358976/files/transformers.csv"
+            ),
+        output:
+            buses="data/osm-prebuilt/buses.csv",
+            converters="data/osm-prebuilt/converters.csv",
+            lines="data/osm-prebuilt/lines.csv",
+            links="data/osm-prebuilt/links.csv",
+            transformers="data/osm-prebuilt/transformers.csv",
+        log:
+            "logs/retrieve_osm_prebuilt.log",
+        threads: 1
+        resources:
+            mem_mb=500,
+        retries: 2
+        run:
+            for key in input.keys():
+                move(input[key], output[key])
+                validate_checksum(output[key], input[key])
+
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-raw"
+):
+
+    rule retrieve_osm_data:
+        output:
+            cables_way="data/osm-raw/{country}/cables_way.json",
+            lines_way="data/osm-raw/{country}/lines_way.json",
+            links_relation="data/osm-raw/{country}/links_relation.json",
+            substations_way="data/osm-raw/{country}/substations_way.json",
+            substations_relation="data/osm-raw/{country}/substations_relation.json",
+        log:
+            "logs/retrieve_osm_data_{country}.log",
+        threads: 1
+        conda:
+            "../envs/retrieve.yaml"
+        script:
+            "../scripts/retrieve_osm_data.py"
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-raw"
+):
+
+    rule retrieve_osm_data_all:
+        input:
+            expand(
+                "data/osm-raw/{country}/cables_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/lines_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/links_relation.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/substations_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/substations_relation.json",
+                country=config_provider("countries"),
+            ),
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 2b0fee63b..afb66387e 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -5,7 +5,11 @@
 
 # coding: utf-8
 """
-Creates the network topology from an `ENTSO-E map extract <https://github.com/PyPSA/GridKit/tree/master/entsoe>`_ (March 2022) as a PyPSA network.
+Creates the network topology from a `ENTSO-E map extract.
+<https://github.com/PyPSA/GridKit/tree/master/entsoe>`_ (March 2022)
+or `OpenStreetMap data <https://www.openstreetmap.org/>`_ (Aug 2024)
+as a PyPSA
+network.
 
 Relevant Settings
 -----------------
@@ -131,45 +135,50 @@ def _find_closest_links(links, new_links, distance_upper_bound=1.5):
     )
 
 
-def _load_buses_from_eg(eg_buses, europe_shape, config_elec):
+def _load_buses(buses, europe_shape, config):
     buses = (
         pd.read_csv(
-            eg_buses,
+            buses,
             quotechar="'",
             true_values=["t"],
             false_values=["f"],
             dtype=dict(bus_id="str"),
         )
         .set_index("bus_id")
-        .drop(["station_id"], axis=1)
         .rename(columns=dict(voltage="v_nom"))
     )
 
+    if "station_id" in buses.columns:
+        buses.drop("station_id", axis=1, inplace=True)
+
     buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"})
     buses["under_construction"] = buses.under_construction.where(
         lambda s: s.notnull(), False
     ).astype(bool)
-
-    # remove all buses outside of all countries including exclusive economic zones (offshore)
     europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"]
     europe_shape_prepped = shapely.prepared.prep(europe_shape)
     buses_in_europe_b = buses[["x", "y"]].apply(
         lambda p: europe_shape_prepped.contains(Point(p)), axis=1
     )
 
+    v_nom_min = min(config["electricity"]["voltages"])
+    v_nom_max = max(config["electricity"]["voltages"])
+
     buses_with_v_nom_to_keep_b = (
-        buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull()
-    )
-    logger.info(
-        f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}'
+        (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max)
+        | (buses.v_nom.isnull())
+        | (
+            buses.carrier == "DC"
+        )  # Keeping all DC buses from the input dataset independent of voltage (e.g. 150 kV connections)
     )
 
+    logger.info(f"Removing buses outside of range AC {v_nom_min} - {v_nom_max} V")
     return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b])
 
 
-def _load_transformers_from_eg(buses, eg_transformers):
+def _load_transformers(buses, transformers):
     transformers = pd.read_csv(
-        eg_transformers,
+        transformers,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -181,9 +190,9 @@ def _load_transformers_from_eg(buses, eg_transformers):
     return transformers
 
 
-def _load_converters_from_eg(buses, eg_converters):
+def _load_converters_from_eg(buses, converters):
     converters = pd.read_csv(
-        eg_converters,
+        converters,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -197,9 +206,25 @@ def _load_converters_from_eg(buses, eg_converters):
     return converters
 
 
-def _load_links_from_eg(buses, eg_links):
+def _load_converters_from_osm(buses, converters):
+    converters = pd.read_csv(
+        converters,
+        quotechar="'",
+        true_values=["t"],
+        false_values=["f"],
+        dtype=dict(converter_id="str", bus0="str", bus1="str"),
+    ).set_index("converter_id")
+
+    converters = _remove_dangling_branches(converters, buses)
+
+    converters["carrier"] = ""
+
+    return converters
+
+
+def _load_links_from_eg(buses, links):
     links = pd.read_csv(
-        eg_links,
+        links,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -208,7 +233,7 @@ def _load_links_from_eg(buses, eg_links):
 
     links["length"] /= 1e3
 
-    # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg.
+    # Skagerrak Link is connected to 132kV bus which is removed in _load_buses.
     # Connect to neighboring 380kV bus
     links.loc[links.bus1 == "6396", "bus1"] = "6398"
 
@@ -220,10 +245,35 @@ def _load_links_from_eg(buses, eg_links):
     return links
 
 
-def _load_lines_from_eg(buses, eg_lines):
+def _load_links_from_osm(buses, links):
+    links = pd.read_csv(
+        links,
+        quotechar="'",
+        true_values=["t"],
+        false_values=["f"],
+        dtype=dict(
+            link_id="str",
+            bus0="str",
+            bus1="str",
+            voltage="int",
+            p_nom="float",
+        ),
+    ).set_index("link_id")
+
+    links["length"] /= 1e3
+
+    links = _remove_dangling_branches(links, buses)
+
+    # Add DC line parameters
+    links["carrier"] = "DC"
+
+    return links
+
+
+def _load_lines(buses, lines):
     lines = (
         pd.read_csv(
-            eg_lines,
+            lines,
             quotechar="'",
             true_values=["t"],
             false_values=["f"],
@@ -240,6 +290,7 @@ def _load_lines_from_eg(buses, eg_lines):
     )
 
     lines["length"] /= 1e3
+
     lines["carrier"] = "AC"
     lines = _remove_dangling_branches(lines, buses)
 
@@ -290,7 +341,7 @@ def _reconnect_crimea(lines):
     return pd.concat([lines, lines_to_crimea])
 
 
-def _set_electrical_parameters_lines(lines, config):
+def _set_electrical_parameters_lines_eg(lines, config):
     v_noms = config["electricity"]["voltages"]
     linetypes = config["lines"]["types"]
 
@@ -302,16 +353,36 @@ def _set_electrical_parameters_lines(lines, config):
     return lines
 
 
+def _set_electrical_parameters_lines_osm(lines, config):
+    if lines.empty:
+        lines["type"] = []
+        return lines
+
+    v_noms = config["electricity"]["voltages"]
+    linetypes = _get_linetypes_config(config["lines"]["types"], v_noms)
+
+    lines["carrier"] = "AC"
+    lines["dc"] = False
+
+    lines.loc[:, "type"] = lines.v_nom.apply(
+        lambda x: _get_linetype_by_voltage(x, linetypes)
+    )
+
+    lines["s_max_pu"] = config["lines"]["s_max_pu"]
+
+    return lines
+
+
 def _set_lines_s_nom_from_linetypes(n):
     n.lines["s_nom"] = (
         np.sqrt(3)
         * n.lines["type"].map(n.line_types.i_nom)
         * n.lines["v_nom"]
-        * n.lines.num_parallel
+        * n.lines["num_parallel"]
     )
 
 
-def _set_electrical_parameters_links(links, config, links_p_nom):
+def _set_electrical_parameters_links_eg(links, config, links_p_nom):
     if links.empty:
         return links
 
@@ -343,12 +414,27 @@ def _set_electrical_parameters_links(links, config, links_p_nom):
     return links
 
 
+def _set_electrical_parameters_links_osm(links, config):
+    if links.empty:
+        return links
+
+    p_max_pu = config["links"].get("p_max_pu", 1.0)
+    links["p_max_pu"] = p_max_pu
+    links["p_min_pu"] = -p_max_pu
+    links["carrier"] = "DC"
+    links["dc"] = True
+
+    return links
+
+
 def _set_electrical_parameters_converters(converters, config):
     p_max_pu = config["links"].get("p_max_pu", 1.0)
     converters["p_max_pu"] = p_max_pu
     converters["p_min_pu"] = -p_max_pu
 
-    converters["p_nom"] = 2000
+    # if column "p_nom" does not exist, set to 2000
+    if "p_nom" not in converters:
+        converters["p_nom"] = 2000
 
     # Converters are combined with links
     converters["under_construction"] = False
@@ -463,7 +549,7 @@ def prefer_voltage(x, which):
     buses["substation_lv"] = (
         lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b
     )
-    buses["substation_off"] = ((hv_b & offshore_b) | (hv_b & onshore_b)) & (
+    buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & (
         ~buses["under_construction"]
     )
 
@@ -617,11 +703,11 @@ def _set_shapes(n, country_shapes, offshore_shapes):
 
 
 def base_network(
-    eg_buses,
-    eg_converters,
-    eg_transformers,
-    eg_lines,
-    eg_links,
+    buses,
+    converters,
+    transformers,
+    lines,
+    links,
     links_p_nom,
     europe_shape,
     country_shapes,
@@ -629,29 +715,59 @@ def base_network(
     parameter_corrections,
     config,
 ):
-    buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"])
 
-    links = _load_links_from_eg(buses, eg_links)
+    base_network = config["electricity"].get("base_network")
+    assert base_network in {
+        "entsoegridkit",
+        "osm-raw",
+        "osm-prebuilt",
+    }, f"base_network must be either 'entsoegridkit', 'osm-raw' or 'osm-prebuilt', but got '{base_network}'"
+    if base_network == "entsoegridkit":
+        warnings.warn(
+            "The 'entsoegridkit' base network is deprecated and will be removed in future versions. Please use 'osm-raw' or 'osm-prebuilt' instead.",
+            DeprecationWarning,
+        )
+
+    logger.info(f"Creating base network using {base_network}.")
+
+    buses = _load_buses(buses, europe_shape, config)
+    transformers = _load_transformers(buses, transformers)
+    lines = _load_lines(buses, lines)
 
-    converters = _load_converters_from_eg(buses, eg_converters)
+    if base_network == "entsoegridkit":
+        links = _load_links_from_eg(buses, links)
+        converters = _load_converters_from_eg(buses, converters)
 
-    lines = _load_lines_from_eg(buses, eg_lines)
-    transformers = _load_transformers_from_eg(buses, eg_transformers)
+        # Optionally reconnect Crimea
+        if (config["lines"].get("reconnect_crimea", True)) & (
+            "UA" in config["countries"]
+        ):
+            lines = _reconnect_crimea(lines)
 
-    if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]:
-        lines = _reconnect_crimea(lines)
+        # Set electrical parameters of lines and links
+        lines = _set_electrical_parameters_lines_eg(lines, config)
+        links = _set_electrical_parameters_links_eg(links, config, links_p_nom)
+    elif base_network in {"osm-prebuilt", "osm-raw"}:
+        links = _load_links_from_osm(buses, links)
+        converters = _load_converters_from_osm(buses, converters)
+
+        # Set electrical parameters of lines and links
+        lines = _set_electrical_parameters_lines_osm(lines, config)
+        links = _set_electrical_parameters_links_osm(links, config)
+    else:
+        raise ValueError(
+            "base_network must be either 'entsoegridkit', 'osm-raw', or 'osm-prebuilt'"
+        )
 
-    lines = _set_electrical_parameters_lines(lines, config)
+    # Set electrical parameters of transformers and converters
     transformers = _set_electrical_parameters_transformers(transformers, config)
-    links = _set_electrical_parameters_links(links, config, links_p_nom)
     converters = _set_electrical_parameters_converters(converters, config)
 
     n = pypsa.Network()
-    n.name = "PyPSA-Eur"
+    n.name = f"PyPSA-Eur ({base_network})"
 
     time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day)
     n.set_snapshots(time)
-    n.madd("Carrier", ["AC", "DC"])
 
     n.import_components_from_dataframe(buses, "Bus")
     n.import_components_from_dataframe(lines, "Line")
@@ -660,8 +776,8 @@ def base_network(
     n.import_components_from_dataframe(converters, "Link")
 
     _set_lines_s_nom_from_linetypes(n)
-
-    _apply_parameter_corrections(n, parameter_corrections)
+    if config["electricity"].get("base_network") == "entsoegridkit":
+        _apply_parameter_corrections(n, parameter_corrections)
 
     n = _remove_unconnected_components(n)
 
@@ -675,9 +791,64 @@ def base_network(
 
     _set_shapes(n, country_shapes, offshore_shapes)
 
+    # Add carriers if they are present in buses.carriers
+    carriers_in_buses = set(n.buses.carrier.dropna().unique())
+    carriers = carriers_in_buses.intersection({"AC", "DC"})
+
+    if carriers:
+        n.madd("Carrier", carriers)
+
     return n
 
 
+def _get_linetypes_config(line_types, voltages):
+    """
+    Return the dictionary of linetypes for selected voltages. The dictionary is
+    a subset of the dictionary line_types, whose keys match the selected
+    voltages.
+
+    Parameters
+    ----------
+    line_types : dict
+        Dictionary of linetypes: keys are nominal voltages and values are linetypes.
+    voltages : list
+        List of selected voltages.
+
+    Returns
+    -------
+        Dictionary of linetypes for selected voltages.
+    """
+    # get voltages value that are not available in the line types
+    vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys()))
+    if vnoms_diff:
+        logger.warning(
+            f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list."
+        )
+    return {k: v for k, v in line_types.items() if k in voltages}
+
+
+def _get_linetype_by_voltage(v_nom, d_linetypes):
+    """
+    Return the linetype of a specific line based on its voltage v_nom.
+
+    Parameters
+    ----------
+    v_nom : float
+        The voltage of the line.
+    d_linetypes : dict
+        Dictionary of linetypes: keys are nominal voltages and values are linetypes.
+
+    Returns
+    -------
+        The linetype of the line whose nominal voltage is closest to the line voltage.
+    """
+    v_nom_min, line_type_min = min(
+        d_linetypes.items(),
+        key=lambda x: abs(x[0] - v_nom),
+    )
+    return line_type_min
+
+
 def voronoi(points, outline, crs=4326):
     """
     Create Voronoi polygons from a set of points within an outline.
@@ -793,25 +964,47 @@ def append_bus_shapes(n, shapes, type):
     configure_logging(snakemake)
     set_scenario_config(snakemake)
 
+    countries = snakemake.params.countries
+
+    buses = snakemake.input.buses
+    converters = snakemake.input.converters
+    transformers = snakemake.input.transformers
+    lines = snakemake.input.lines
+    links = snakemake.input.links
+    europe_shape = snakemake.input.europe_shape
+    country_shapes = snakemake.input.country_shapes
+    offshore_shapes = snakemake.input.offshore_shapes
+    config = snakemake.config
+
+    if "links_p_nom" in snakemake.input.keys():
+        links_p_nom = snakemake.input.links_p_nom
+    else:
+        links_p_nom = None
+
+    if "parameter_corrections" in snakemake.input.keys():
+        parameter_corrections = snakemake.input.parameter_corrections
+    else:
+        parameter_corrections = None
+
     n = base_network(
-        snakemake.input.eg_buses,
-        snakemake.input.eg_converters,
-        snakemake.input.eg_transformers,
-        snakemake.input.eg_lines,
-        snakemake.input.eg_links,
-        snakemake.input.links_p_nom,
-        snakemake.input.europe_shape,
-        snakemake.input.country_shapes,
-        snakemake.input.offshore_shapes,
-        snakemake.input.parameter_corrections,
-        snakemake.config,
+        buses,
+        converters,
+        transformers,
+        lines,
+        links,
+        links_p_nom,
+        europe_shape,
+        country_shapes,
+        offshore_shapes,
+        parameter_corrections,
+        config,
     )
 
     onshore_regions, offshore_regions, shapes, offshore_shapes = build_bus_shapes(
         n,
-        snakemake.input.country_shapes,
-        snakemake.input.offshore_shapes,
-        snakemake.params.countries,
+        country_shapes,
+        offshore_shapes,
+        countries,
     )
 
     shapes.to_file(snakemake.output.regions_onshore)
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
new file mode 100644
index 000000000..83461a98d
--- /dev/null
+++ b/scripts/build_osm_network.py
@@ -0,0 +1,863 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur and PyPSA-Earth Authors
+#
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+import string
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from _helpers import configure_logging, set_scenario_config
+from shapely.geometry import LineString, Point
+from shapely.ops import linemerge, nearest_points, split
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+
+GEO_CRS = "EPSG:4326"
+DISTANCE_CRS = "EPSG:3035"
+BUS_TOL = (
+    5000  # unit: meters, default 5000 - Buses within this distance are grouped together
+)
+LINES_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage",
+    "circuits",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+LINKS_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage",
+    "p_nom",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+TRANSFORMERS_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage_bus0",
+    "voltage_bus1",
+    "country",
+    "geometry",
+]
+
+
+def line_endings_to_bus_conversion(lines):
+    """
+    Converts line endings to bus connections.
+
+    This function takes a df of lines and converts the line endings to bus
+    connections. It performs the necessary operations to ensure that the line
+    endings are properly connected to the buses in the network.
+
+    Parameters:
+    lines (DataFrame)
+
+    Returns:
+    lines (DataFrame)
+    """
+    # Assign to every line a start and end point
+
+    lines["bounds"] = lines["geometry"].boundary  # create start and end point
+
+    lines["bus_0_coors"] = lines["bounds"].map(lambda p: p.geoms[0])
+    lines["bus_1_coors"] = lines["bounds"].map(lambda p: p.geoms[-1])
+
+    # splits into coordinates
+    lines["bus0_lon"] = lines["bus_0_coors"].x
+    lines["bus0_lat"] = lines["bus_0_coors"].y
+    lines["bus1_lon"] = lines["bus_1_coors"].x
+    lines["bus1_lat"] = lines["bus_1_coors"].y
+
+    return lines
+
+
+# tol in m
+def set_substations_ids(buses, distance_crs, tol=5000):
+    """
+    Function to set substations ids to buses, accounting for location
+    tolerance.
+
+    The algorithm is as follows:
+
+    1. initialize all substation ids to -1
+    2. if the current substation has been already visited [substation_id < 0], then skip the calculation
+    3. otherwise:
+        1. identify the substations within the specified tolerance (tol)
+        2. when all the substations in tolerance have substation_id < 0, then specify a new substation_id
+        3. otherwise, if one of the substation in tolerance has a substation_id >= 0, then set that substation_id to all the others;
+           in case of multiple substations with substation_ids >= 0, the first value is picked for all
+    """
+
+    buses["station_id"] = -1
+
+    # create temporary series to execute distance calculations using m as reference distances
+    temp_bus_geom = buses.geometry.to_crs(distance_crs)
+
+    # set tqdm options for substation ids
+    tqdm_kwargs_substation_ids = dict(
+        ascii=False,
+        unit=" buses",
+        total=buses.shape[0],
+        desc="Set substation ids ",
+    )
+
+    station_id = 0
+    for i, row in tqdm(buses.iterrows(), **tqdm_kwargs_substation_ids):
+        if buses.loc[i, "station_id"] >= 0:
+            continue
+
+        # get substations within tolerance
+        close_nodes = np.flatnonzero(
+            temp_bus_geom.distance(temp_bus_geom.loc[i]) <= tol
+        )
+
+        if len(close_nodes) == 1:
+            # if only one substation is in tolerance, then the substation is the current one iì
+            # Note that the node cannot be with substation_id >= 0, given the preliminary check
+            # at the beginning of the for loop
+            buses.loc[buses.index[i], "station_id"] = station_id
+            # update station id
+            station_id += 1
+        else:
+            # several substations in tolerance
+            # get their ids
+            subset_substation_ids = buses.loc[buses.index[close_nodes], "station_id"]
+            # check if all substation_ids are negative (<0)
+            all_neg = subset_substation_ids.max() < 0
+            # check if at least a substation_id is negative (<0)
+            some_neg = subset_substation_ids.min() < 0
+
+            if all_neg:
+                # when all substation_ids are negative, then this is a new substation id
+                # set the current station_id and increment the counter
+                buses.loc[buses.index[close_nodes], "station_id"] = station_id
+                station_id += 1
+            elif some_neg:
+                # otherwise, when at least a substation_id is non-negative, then pick the first value
+                # and set it to all the other substations within tolerance
+                sub_id = -1
+                for substation_id in subset_substation_ids:
+                    if substation_id >= 0:
+                        sub_id = substation_id
+                        break
+                buses.loc[buses.index[close_nodes], "station_id"] = sub_id
+
+
+def set_lines_ids(lines, buses, distance_crs):
+    """
+    Function to set line buses ids to the closest bus in the list.
+    """
+    # set tqdm options for set lines ids
+    tqdm_kwargs_line_ids = dict(
+        ascii=False,
+        unit=" lines",
+        total=lines.shape[0],
+        desc="Set line/link bus ids ",
+    )
+
+    # initialization
+    lines["bus0"] = -1
+    lines["bus1"] = -1
+
+    busesepsg = buses.to_crs(distance_crs)
+    linesepsg = lines.to_crs(distance_crs)
+
+    for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids):
+        # select buses having the voltage level of the current line
+        buses_sel = busesepsg[
+            (buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"])
+        ]
+
+        # find the closest node of the bus0 of the line
+        bus0_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[0]).idxmin()
+        lines.loc[i, "bus0"] = buses.loc[bus0_id, "bus_id"]
+
+        # check if the line starts exactly in the node, otherwise modify the linestring
+        distance_bus0 = busesepsg.geometry.loc[bus0_id].distance(
+            row.geometry.boundary.geoms[0]
+        )
+
+        if distance_bus0 > 0:
+            # the line does not start in the node, thus modify the linestring
+            line_start_point = lines.geometry.loc[i].boundary.geoms[0]
+            new_segment = LineString([buses.geometry.loc[bus0_id], line_start_point])
+            modified_line = linemerge([new_segment, lines.geometry.loc[i]])
+            lines.loc[i, "geometry"] = modified_line
+
+        # find the closest node of the bus1 of the line
+        bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin()
+        lines.loc[i, "bus1"] = buses.loc[bus1_id, "bus_id"]
+
+        # check if the line ends exactly in the node, otherwise modify the linestring
+        distance_bus1 = busesepsg.geometry.loc[bus1_id].distance(
+            row.geometry.boundary.geoms[1]
+        )
+
+        if distance_bus1 > 0:
+            # the line does not start in the node, thus modify the linestring
+            line_end_point = lines.geometry.loc[i].boundary.geoms[1]
+            new_segment = LineString([line_end_point, buses.geometry.loc[bus1_id]])
+            modified_line = linemerge([lines.geometry.loc[i], new_segment])
+            lines.loc[i, "geometry"] = modified_line
+
+    return lines, buses
+
+
+def merge_stations_same_station_id(
+    buses, delta_lon=0.001, delta_lat=0.001, precision=4
+):
+    """
+    Function to merge buses with same voltage and station_id This function
+    iterates over all substation ids and creates a bus_id for every substation
+    and voltage level.
+
+    Therefore, a substation with multiple voltage levels is represented
+    with different buses, one per voltage level
+    """
+    # initialize list of cleaned buses
+    buses_clean = []
+
+    # initialize the number of buses
+    n_buses = 0
+
+    for g_name, g_value in buses.groupby(by="station_id"):
+        # average location of the buses having the same station_id
+        station_point_x = np.round(g_value.geometry.x.mean(), precision)
+        station_point_y = np.round(g_value.geometry.y.mean(), precision)
+        # is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"])
+
+        # loop for every voltage level in the bus
+        # The location of the buses is averaged; in the case of multiple voltage levels for the same station_id,
+        # each bus corresponding to a voltage level and each polatity is located at a distance regulated by delta_lon/delta_lat
+        v_it = 0
+        for v_name, bus_row in g_value.groupby(by=["voltage", "dc"]):
+            lon_bus = np.round(station_point_x + v_it * delta_lon, precision)
+            lat_bus = np.round(station_point_y + v_it * delta_lat, precision)
+
+            bus_data = [
+                n_buses,  # "bus_id"
+                g_name,  # "station_id"
+                v_name[0],  # "voltage"
+                bus_row["dc"].all(),  # "dc"
+                "|".join(bus_row["symbol"].unique()),  # "symbol"
+                bus_row["under_construction"].any(),  # "under_construction"
+                "|".join(bus_row["tag_substation"].unique()),  # "tag_substation"
+                bus_row["tag_area"].sum(),  # "tag_area"
+                lon_bus,  # "lon"
+                lat_bus,  # "lat"
+                bus_row["country"].iloc[0],  # "country"
+                Point(lon_bus, lat_bus),  # "geometry"
+            ]
+
+            # add the bus
+            buses_clean.append(bus_data)
+
+            # increase counters
+            v_it += 1
+            n_buses += 1
+
+    # names of the columns
+    buses_clean_columns = [
+        "bus_id",
+        "station_id",
+        "voltage",
+        "dc",
+        "symbol",
+        "under_construction",
+        "tag_substation",
+        "tag_area",
+        "x",
+        "y",
+        "country",
+        # "is_dclink_boundary_point",
+        "geometry",
+    ]
+
+    gdf_buses_clean = gpd.GeoDataFrame(
+        buses_clean, columns=buses_clean_columns
+    ).set_crs(crs=buses.crs, inplace=True)
+
+    return gdf_buses_clean
+
+
+def get_ac_frequency(df, fr_col="tag_frequency"):
+    """
+    # Function to define a default frequency value.
+
+    Attempts to find the most usual non-zero frequency across the
+    dataframe; 50 Hz is assumed as a back-up value
+    """
+
+    # Initialize a default frequency value
+    ac_freq_default = 50
+
+    grid_freq_levels = df[fr_col].value_counts(sort=True, dropna=True)
+    if not grid_freq_levels.empty:
+        # AC lines frequency shouldn't be 0Hz
+        ac_freq_levels = grid_freq_levels.loc[
+            grid_freq_levels.index.get_level_values(0) != "0"
+        ]
+        ac_freq_default = ac_freq_levels.index.get_level_values(0)[0]
+
+    return ac_freq_default
+
+
+def get_transformers(buses, lines):
+    """
+    Function to create fake transformer lines that connect buses of the same
+    station_id at different voltage.
+    """
+
+    ac_freq = get_ac_frequency(lines)
+    df_transformers = []
+
+    # Transformers should be added between AC buses only
+    buses_ac = buses[buses["dc"] != True]
+
+    for g_name, g_value in buses_ac.sort_values("voltage", ascending=True).groupby(
+        by="station_id"
+    ):
+        # note: by construction there cannot be more that two buses with the same station_id and same voltage
+        n_voltages = len(g_value)
+
+        if n_voltages > 1:
+            for id in range(n_voltages - 1):
+                # when g_value has more than one node, it means that there are multiple voltages for the same bus
+                transformer_geometry = LineString(
+                    [g_value.geometry.iloc[id], g_value.geometry.iloc[id + 1]]
+                )
+
+                transformer_data = [
+                    f"transf_{g_name}_{id}",  # "line_id"
+                    g_value["bus_id"].iloc[id],  # "bus0"
+                    g_value["bus_id"].iloc[id + 1],  # "bus1"
+                    g_value.voltage.iloc[id],  # "voltage_bus0"
+                    g_value.voltage.iloc[id + 1],  # "voltage_bus0"
+                    g_value.country.iloc[id],  # "country"
+                    transformer_geometry,  # "geometry"
+                ]
+
+                df_transformers.append(transformer_data)
+
+    # name of the columns
+    transformers_columns = [
+        "transformer_id",
+        "bus0",
+        "bus1",
+        "voltage_bus0",
+        "voltage_bus1",
+        "country",
+        "geometry",
+    ]
+
+    df_transformers = gpd.GeoDataFrame(df_transformers, columns=transformers_columns)
+    if not df_transformers.empty:
+        init_index = 0 if lines.empty else lines.index[-1] + 1
+        df_transformers.set_index(init_index + df_transformers.index, inplace=True)
+    # update line endings
+    df_transformers = line_endings_to_bus_conversion(df_transformers)
+    df_transformers.drop(columns=["bounds", "bus_0_coors", "bus_1_coors"], inplace=True)
+
+    gdf_transformers = gpd.GeoDataFrame(df_transformers)
+    gdf_transformers.crs = GEO_CRS
+
+    return gdf_transformers
+
+
+def _find_closest_bus(row, buses, distance_crs, tol=5000):
+    """
+    Find the closest bus to a given bus based on geographical distance and
+    country.
+
+    Parameters:
+    - row: The bus_id of the bus to find the closest bus for.
+    - buses: A GeoDataFrame containing information about all the buses.
+    - distance_crs: The coordinate reference system to use for distance calculations.
+    - tol: The tolerance distance within which a bus is considered closest (default: 5000).
+    Returns:
+    - closest_bus_id: The bus_id of the closest bus, or None if no bus is found within the distance and same country.
+    """
+    gdf_buses = buses.copy()
+    gdf_buses = gdf_buses.to_crs(distance_crs)
+    # Get the geometry of the bus with bus_id = link_bus_id
+    bus = gdf_buses[gdf_buses["bus_id"] == row]
+    bus_geom = bus.geometry.values[0]
+
+    gdf_buses_filtered = gdf_buses[gdf_buses["dc"] == False]
+
+    # Find the closest point in the filtered buses
+    nearest_geom = nearest_points(bus_geom, gdf_buses_filtered.union_all())[1]
+
+    # Get the bus_id of the closest bus
+    closest_bus = gdf_buses_filtered.loc[gdf_buses["geometry"] == nearest_geom]
+
+    # check if closest_bus_id is within the distance
+    within_distance = (
+        closest_bus.to_crs(distance_crs).distance(bus.to_crs(distance_crs), align=False)
+    ).values[0] <= tol
+
+    in_same_country = closest_bus.country.values[0] == bus.country.values[0]
+
+    if within_distance and in_same_country:
+        closest_bus_id = closest_bus.bus_id.values[0]
+    else:
+        closest_bus_id = None
+
+    return closest_bus_id
+
+
+def _get_converters(buses, links, distance_crs):
+    """
+    Get the converters for the given buses and links. Connecting link endings
+    to closest AC bus.
+
+    Parameters:
+    - buses (pandas.DataFrame): DataFrame containing information about buses.
+    - links (pandas.DataFrame): DataFrame containing information about links.
+    Returns:
+    - gdf_converters (geopandas.GeoDataFrame): GeoDataFrame containing information about converters.
+    """
+    converters = []
+    for idx, row in links.iterrows():
+        for conv in range(2):
+            link_end = row[f"bus{conv}"]
+            # HVDC Gotland is connected to 130 kV grid, closest HVAC bus is further away
+
+            closest_bus = _find_closest_bus(link_end, buses, distance_crs, tol=40000)
+
+            if closest_bus is None:
+                continue
+
+            converter_id = f"converter/{row['link_id']}_{conv}"
+            converter_geometry = LineString(
+                [
+                    buses[buses["bus_id"] == link_end].geometry.values[0],
+                    buses[buses["bus_id"] == closest_bus].geometry.values[0],
+                ]
+            )
+
+            logger.info(
+                f"Added converter #{conv+1}/2 for link {row['link_id']}:{converter_id}."
+            )
+
+            converter_data = [
+                converter_id,  # "line_id"
+                link_end,  # "bus0"
+                closest_bus,  # "bus1"
+                row["voltage"],  # "voltage"
+                row["p_nom"],  # "p_nom"
+                False,  # "underground"
+                False,  # "under_construction"
+                buses[buses["bus_id"] == closest_bus].country.values[0],  # "country"
+                converter_geometry,  # "geometry"
+            ]
+
+            # Create the converter
+            converters.append(converter_data)
+
+    conv_columns = [
+        "converter_id",
+        "bus0",
+        "bus1",
+        "voltage",
+        "p_nom",
+        "underground",
+        "under_construction",
+        "country",
+        "geometry",
+    ]
+
+    gdf_converters = gpd.GeoDataFrame(
+        converters, columns=conv_columns, crs=GEO_CRS
+    ).reset_index()
+
+    return gdf_converters
+
+
+def connect_stations_same_station_id(lines, buses):
+    """
+    Function to create fake links between substations with the same
+    substation_id.
+    """
+    ac_freq = get_ac_frequency(lines)
+    station_id_list = buses.station_id.unique()
+
+    add_lines = []
+    from shapely.geometry import LineString
+
+    for s_id in station_id_list:
+        buses_station_id = buses[buses.station_id == s_id]
+
+        if len(buses_station_id) > 1:
+            for b_it in range(1, len(buses_station_id)):
+                line_geometry = LineString(
+                    [
+                        buses_station_id.geometry.iloc[0],
+                        buses_station_id.geometry.iloc[b_it],
+                    ]
+                )
+                line_bounds = line_geometry.bounds
+
+                line_data = [
+                    f"link{buses_station_id}_{b_it}",  # "line_id"
+                    buses_station_id.index[0],  # "bus0"
+                    buses_station_id.index[b_it],  # "bus1"
+                    400000,  # "voltage"
+                    1,  # "circuits"
+                    0.0,  # "length"
+                    False,  # "underground"
+                    False,  # "under_construction"
+                    "transmission",  # "tag_type"
+                    ac_freq,  # "tag_frequency"
+                    buses_station_id.country.iloc[0],  # "country"
+                    line_geometry,  # "geometry"
+                    line_bounds,  # "bounds"
+                    buses_station_id.geometry.iloc[0],  # "bus_0_coors"
+                    buses_station_id.geometry.iloc[b_it],  # "bus_1_coors"
+                    buses_station_id.lon.iloc[0],  # "bus0_lon"
+                    buses_station_id.lat.iloc[0],  # "bus0_lat"
+                    buses_station_id.lon.iloc[b_it],  # "bus1_lon"
+                    buses_station_id.lat.iloc[b_it],  # "bus1_lat"
+                ]
+
+                add_lines.append(line_data)
+
+    # name of the columns
+    add_lines_columns = [
+        "line_id",
+        "bus0",
+        "bus1",
+        "voltage",
+        "circuits",
+        "length",
+        "underground",
+        "under_construction",
+        "tag_type",
+        "tag_frequency",
+        "country",
+        "geometry",
+        "bounds",
+        "bus_0_coors",
+        "bus_1_coors",
+        "bus0_lon",
+        "bus0_lat",
+        "bus1_lon",
+        "bus1_lat",
+    ]
+
+    df_add_lines = gpd.GeoDataFrame(pd.concat(add_lines), columns=add_lines_columns)
+    lines = pd.concat([lines, df_add_lines], ignore_index=True)
+
+    return lines
+
+
+def set_lv_substations(buses):
+    """
+    Function to set what nodes are lv, thereby setting substation_lv The
+    current methodology is to set lv nodes to buses where multiple voltage
+    level are found, hence when the station_id is duplicated.
+    """
+    # initialize column substation_lv to true
+    buses["substation_lv"] = True
+
+    # For each station number with multiple buses make lowest voltage `substation_lv = TRUE`
+    bus_with_stations_duplicates = buses[
+        buses.station_id.duplicated(keep=False)
+    ].sort_values(by=["station_id", "voltage"])
+    lv_bus_at_station_duplicates = (
+        buses[buses.station_id.duplicated(keep=False)]
+        .sort_values(by=["station_id", "voltage"])
+        .drop_duplicates(subset=["station_id"])
+    )
+    # Set all buses with station duplicates "False"
+    buses.loc[bus_with_stations_duplicates.index, "substation_lv"] = False
+    # Set lv_buses with station duplicates "True"
+    buses.loc[lv_bus_at_station_duplicates.index, "substation_lv"] = True
+
+    return buses
+
+
+def merge_stations_lines_by_station_id_and_voltage(
+    lines, links, buses, distance_crs, tol=5000
+):
+    """
+    Function to merge close stations and adapt the line datasets to adhere to
+    the merged dataset.
+    """
+
+    logger.info(" - Setting substation ids with tolerance of %.2f m" % (tol))
+
+    # bus types (AC != DC)
+    buses_ac = buses[buses["dc"] == False].reset_index()
+    buses_dc = buses[buses["dc"] == True].reset_index()
+
+    # set_substations_ids(buses, distance_crs, tol=tol)
+    set_substations_ids(buses_ac, distance_crs, tol=tol)
+    set_substations_ids(buses_dc, distance_crs, tol=tol)
+
+    logger.info(" - Merging substations with the same id")
+
+    # merge buses with same station id and voltage
+    if not buses.empty:
+        buses_ac = merge_stations_same_station_id(buses_ac)
+        buses_dc = merge_stations_same_station_id(buses_dc)
+        buses_dc["bus_id"] = buses_ac["bus_id"].max() + buses_dc["bus_id"] + 1
+        buses = pd.concat([buses_ac, buses_dc], ignore_index=True)
+        set_substations_ids(buses, distance_crs, tol=tol)
+
+    logger.info(" - Specifying the bus ids of the line endings")
+
+    # set the bus ids to the line dataset
+    lines, buses = set_lines_ids(lines, buses, distance_crs)
+    links, buses = set_lines_ids(links, buses, distance_crs)
+
+    # drop lines starting and ending in the same node
+    lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True)
+    links.drop(links[links["bus0"] == links["bus1"]].index, inplace=True)
+    # update line endings
+    lines = line_endings_to_bus_conversion(lines)
+    links = line_endings_to_bus_conversion(links)
+
+    # set substation_lv
+    set_lv_substations(buses)
+
+    # reset index
+    lines.reset_index(drop=True, inplace=True)
+    links.reset_index(drop=True, inplace=True)
+
+    return lines, links, buses
+
+
+def _split_linestring_by_point(linestring, points):
+    """
+    Function to split a linestring geometry by multiple inner points.
+
+    Parameters
+    ----------
+    lstring : LineString
+        Linestring of the line to be split
+    points : list
+        List of points to split the linestring
+
+    Return
+    ------
+    list_lines : list
+        List of linestring to split the line
+    """
+
+    list_linestrings = [linestring]
+
+    for p in points:
+        # execute split to all lines and store results
+        temp_list = [split(l, p) for l in list_linestrings]
+        # nest all geometries
+        list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms]
+
+    return list_linestrings
+
+
+def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
+    """
+    Fix overpassing lines by splitting them at nodes within a given tolerance,
+    to include the buses being overpassed.
+
+    Parameters:
+    - lines (GeoDataFrame): The lines to be fixed.
+    - buses (GeoDataFrame): The buses representing nodes.
+    - distance_crs (str): The coordinate reference system (CRS) for distance calculations.
+    - tol (float): The tolerance distance in meters for determining if a bus is within a line.
+    Returns:
+    - lines (GeoDataFrame): The fixed lines.
+    - buses (GeoDataFrame): The buses representing nodes.
+    """
+
+    lines_to_add = []  # list of lines to be added
+    lines_to_split = []  # list of lines that have been split
+
+    lines_epsgmod = lines.to_crs(distance_crs)
+    buses_epsgmod = buses.to_crs(distance_crs)
+
+    # set tqdm options for substation ids
+    tqdm_kwargs_substation_ids = dict(
+        ascii=False,
+        unit=" lines",
+        total=lines.shape[0],
+        desc="Verify lines overpassing nodes ",
+    )
+
+    for l in tqdm(lines.index, **tqdm_kwargs_substation_ids):
+        # bus indices being within tolerance from the line
+        bus_in_tol_epsg = buses_epsgmod[
+            buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol
+        ]
+
+        # Get boundary points
+        endpoint0 = lines_epsgmod.geometry.loc[l].boundary.geoms[0]
+        endpoint1 = lines_epsgmod.geometry.loc[l].boundary.geoms[1]
+
+        # Calculate distances
+        dist_to_ep0 = bus_in_tol_epsg.geometry.distance(endpoint0)
+        dist_to_ep1 = bus_in_tol_epsg.geometry.distance(endpoint1)
+
+        # exclude endings of the lines
+        bus_in_tol_epsg = bus_in_tol_epsg[(dist_to_ep0 > tol) | (dist_to_ep1 > tol)]
+
+        if not bus_in_tol_epsg.empty:
+            # add index of line to split
+            lines_to_split.append(l)
+
+            buses_locs = buses.geometry.loc[bus_in_tol_epsg.index]
+
+            # get new line geometries
+            new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs)
+            n_geoms = len(new_geometries)
+
+            # create temporary copies of the line
+            df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms)
+            # update geometries
+            df_append["geometry"] = new_geometries
+            # update name of the line if there are multiple line segments
+            df_append["line_id"] = [
+                str(df_append["line_id"].iloc[0])
+                + (f"-{letter}" if n_geoms > 1 else "")
+                for letter in string.ascii_lowercase[:n_geoms]
+            ]
+
+            lines_to_add.append(df_append)
+
+    if not lines_to_add:
+        return lines, buses
+
+    df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True))
+    df_to_add.set_crs(lines.crs, inplace=True)
+    df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True)
+
+    # update length
+    df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length
+
+    # update line endings
+    df_to_add = line_endings_to_bus_conversion(df_to_add)
+
+    # remove original lines
+    lines.drop(lines_to_split, inplace=True)
+
+    lines = df_to_add if lines.empty else pd.concat([lines, df_to_add])
+
+    lines = gpd.GeoDataFrame(lines.reset_index(drop=True), crs=lines.crs)
+
+    return lines, buses
+
+
+def build_network(inputs, outputs):
+
+    logger.info("Reading input data.")
+    buses = gpd.read_file(inputs["substations"])
+    lines = gpd.read_file(inputs["lines"])
+    links = gpd.read_file(inputs["links"])
+
+    lines = line_endings_to_bus_conversion(lines)
+    links = line_endings_to_bus_conversion(links)
+
+    logger.info(
+        "Fixing lines overpassing nodes: Connecting nodes and splittling lines."
+    )
+    lines, buses = fix_overpassing_lines(lines, buses, DISTANCE_CRS, tol=1)
+
+    # Merge buses with same voltage and within tolerance
+    logger.info(f"Aggregating close substations with a tolerance of {BUS_TOL} m")
+
+    lines, links, buses = merge_stations_lines_by_station_id_and_voltage(
+        lines, links, buses, DISTANCE_CRS, BUS_TOL
+    )
+
+    # Recalculate lengths of lines
+    utm = lines.estimate_utm_crs(datum_name="WGS 84")
+    lines["length"] = lines.to_crs(utm).length
+    links["length"] = links.to_crs(utm).length
+
+    transformers = get_transformers(buses, lines)
+    converters = _get_converters(buses, links, DISTANCE_CRS)
+
+    logger.info("Saving outputs")
+
+    ### Convert output to pypsa-eur friendly format
+    # Rename "substation" in buses["symbol"] to "Substation"
+    buses["symbol"] = buses["symbol"].replace({"substation": "Substation"})
+
+    # Drop unnecessary index column and set respective element ids as index
+    lines.set_index("line_id", inplace=True)
+    if not links.empty:
+        links.set_index("link_id", inplace=True)
+    converters.set_index("converter_id", inplace=True)
+    transformers.set_index("transformer_id", inplace=True)
+    buses.set_index("bus_id", inplace=True)
+
+    # Convert voltages from V to kV
+    lines["voltage"] = lines["voltage"] / 1000
+    if not links.empty:
+        links["voltage"] = links["voltage"] / 1000
+    if not converters.empty:
+        converters["voltage"] = converters["voltage"] / 1000
+    transformers["voltage_bus0"], transformers["voltage_bus1"] = (
+        transformers["voltage_bus0"] / 1000,
+        transformers["voltage_bus1"] / 1000,
+    )
+    buses["voltage"] = buses["voltage"] / 1000
+
+    # Convert 'true' and 'false' to 't' and 'f'
+    lines = lines.replace({True: "t", False: "f"})
+    links = links.replace({True: "t", False: "f"})
+    converters = converters.replace({True: "t", False: "f"})
+    buses = buses.replace({True: "t", False: "f"})
+
+    # Change column orders
+    lines = lines[LINES_COLUMNS]
+    if not links.empty:
+        links = links[LINKS_COLUMNS]
+    else:
+        links = pd.DataFrame(columns=["link_id"] + LINKS_COLUMNS)
+        links.set_index("link_id", inplace=True)
+    transformers = transformers[TRANSFORMERS_COLUMNS]
+
+    # Export to csv for base_network
+    buses.to_csv(outputs["substations"], quotechar="'")
+    lines.to_csv(outputs["lines"], quotechar="'")
+    links.to_csv(outputs["links"], quotechar="'")
+    converters.to_csv(outputs["converters"], quotechar="'")
+    transformers.to_csv(outputs["transformers"], quotechar="'")
+
+    # Export to GeoJSON for quick validations
+    buses.to_file(outputs["substations_geojson"])
+    lines.to_file(outputs["lines_geojson"])
+    links.to_file(outputs["links_geojson"])
+    converters.to_file(outputs["converters_geojson"])
+    transformers.to_file(outputs["transformers_geojson"])
+
+    return None
+
+
+if __name__ == "__main__":
+    # Detect running outside of snakemake and mock snakemake for testing
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("build_osm_network")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    countries = snakemake.config["countries"]
+
+    build_network(snakemake.input, snakemake.output)
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
new file mode 100644
index 000000000..8669d0af5
--- /dev/null
+++ b/scripts/clean_osm_data.py
@@ -0,0 +1,1807 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+This script is used to clean OpenStreetMap (OSM) data for creating a PyPSA-Eur
+ready network.
+
+The script performs various cleaning operations on the OSM data, including:
+- Cleaning voltage, circuits, cables, wires, and frequency columns
+- Splitting semicolon-separated cells into new rows
+- Distributing values to circuits based on the number of splits
+- Adding line endings to substations based on line data
+"""
+
+import json
+import logging
+import os
+import re
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from _helpers import configure_logging, set_scenario_config
+from shapely.geometry import LineString, MultiLineString, Point, Polygon
+from shapely.ops import linemerge, unary_union
+
+logger = logging.getLogger(__name__)
+
+
+def _create_linestring(row):
+    """
+    Create a LineString object from the given row.
+
+    Args:
+        row (dict): A dictionary containing the row data.
+
+    Returns:
+        LineString: A LineString object representing the geometry.
+    """
+    coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]]
+    return LineString(coords)
+
+
+def _create_polygon(row):
+    """
+    Create a Shapely Polygon from a list of coordinate dictionaries.
+
+    Parameters:
+        coords (list): List of dictionaries with 'lat' and 'lon' keys
+        representing coordinates.
+
+    Returns:
+        shapely.geometry.Polygon: The constructed polygon object.
+    """
+    # Extract coordinates as tuples
+    point_coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]]
+
+    # Ensure closure by repeating the first coordinate as the last coordinate
+    if point_coords[0] != point_coords[-1]:
+        point_coords.append(point_coords[0])
+
+    # Create Polygon object
+    polygon = Polygon(point_coords)
+
+    return polygon
+
+
+def _find_closest_polygon(gdf, point):
+    """
+    Find the closest polygon in a GeoDataFrame to a given point.
+
+    Parameters:
+    gdf (GeoDataFrame): A GeoDataFrame containing polygons.
+    point (Point): A Point object representing the target point.
+
+    Returns:
+    int: The index of the closest polygon in the GeoDataFrame.
+    """
+    # Compute the distance to each polygon
+    gdf["distance"] = gdf["geometry"].apply(lambda geom: point.distance(geom))
+
+    # Find the index of the closest polygon
+    closest_idx = gdf["distance"].idxmin()
+
+    # Get the closest polygon's row
+    closest_polygon = gdf.loc[closest_idx]
+
+    return closest_idx
+
+
+def _clean_voltage(column):
+    """
+    Function to clean the raw voltage column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning voltages.")
+    column = column.copy()
+
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("400/220/110 kV'", "400000;220000;110000")
+        .str.replace("400/220/110/20_kv", "400000;220000;110000;20000")
+        .str.replace("2x25000", "25000;25000")
+        .str.replace("é", ";")
+    )
+
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("(temp 150000)", "")
+        .str.replace("low", "1000")
+        .str.replace("minor", "1000")
+        .str.replace("medium", "33000")
+        .str.replace("med", "33000")
+        .str.replace("m", "33000")
+        .str.replace("high", "150000")
+        .str.replace("23000-109000", "109000")
+        .str.replace("380000>220000", "380000;220000")
+        .str.replace(":", ";")
+        .str.replace("<", ";")
+        .str.replace(",", ";")
+        .str.replace("kv", "000")
+        .str.replace("kva", "000")
+        .str.replace("/", ";")
+        .str.replace("nan", "")
+        .str.replace("<na>", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", str(x)))
+
+    column.dropna(inplace=True)
+    return column
+
+
+def _clean_circuits(column):
+    """
+    Function to clean the raw circuits column: manual fixing and drop nan
+    values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning circuits.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.replace("partial", "")
+        .str.replace("1operator=RTE operator:wikidata=Q2178795", "")
+        .str.lower()
+        .str.replace("1,5", "3")
+        .str.replace("1/3", "1")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_cables(column):
+    """
+    Function to clean the raw cables column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning cables.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("1/3", "1")
+        .str.replace("3x2;2", "3")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_wires(column):
+    """
+    Function to clean the raw wires column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning wires.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("?", "")
+        .str.replace("trzyprzewodowe", "3")
+        .str.replace("pojedyńcze", "1")
+        .str.replace("single", "1")
+        .str.replace("double", "2")
+        .str.replace("triple", "3")
+        .str.replace("quad", "4")
+        .str.replace("fivefold", "5")
+        .str.replace("yes", "3")
+        .str.replace("1/3", "1")
+        .str.replace("3x2;2", "3")
+        .str.replace("_", "")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _check_voltage(voltage, list_voltages):
+    """
+    Check if the given voltage is present in the list of allowed voltages.
+
+    Parameters:
+    voltage (str): The voltage to check.
+    list_voltages (list): A list of allowed voltages.
+
+    Returns:
+    bool: True if the voltage is present in the list of allowed voltages,
+    False otherwise.
+    """
+    voltages = voltage.split(";")
+    for v in voltages:
+        if v in list_voltages:
+            return True
+    return False
+
+
+def _clean_frequency(column):
+    """
+    Function to clean the raw frequency column: manual fixing and drop nan
+    values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning frequencies.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("16.67", "16.7")
+        .str.replace("16,7", "16.7")
+        .str.replace("?", "")
+        .str.replace("hz", "")
+        .str.replace(" ", "")
+        .str.replace("<NA>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;.]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_rating(column):
+    """
+    Function to clean and sum the rating columns:
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning ratings.")
+    column = column.copy()
+    column = column.astype(str).str.replace("MW", "")
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    # Sum up all ratings if there are multiple entries
+    column = column.str.split(";").apply(lambda x: sum([int(i) for i in x]))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _split_cells(df, cols=["voltage"]):
+    """
+    Split semicolon separated cells i.e. [66000;220000] and create new
+    identical rows.
+
+    Parameters
+    ----------
+    df : dataframe
+        Dataframe under analysis
+    cols : list
+        List of target columns over which to perform the analysis
+
+    Example
+    -------
+    Original data:
+    row 1: '66000;220000', '50'
+
+    After applying split_cells():
+    row 1, '66000', '50', 2
+    row 2, '220000', '50', 2
+    """
+    if df.empty:
+        return df
+
+    # Create a dictionary to store the suffix count for each original ID
+    suffix_counts = {}
+    # Create a dictionary to store the number of splits associated with each
+    # original ID
+    num_splits = {}
+
+    # Split cells and create new rows
+    x = df.assign(**{col: df[col].str.split(";") for col in cols})
+    x = x.explode(cols, ignore_index=True)
+
+    # Count the number of splits associated with each original ID
+    num_splits = x.groupby("id").size().to_dict()
+
+    # Update the 'split_elements' column
+    x["split_elements"] = x["id"].map(num_splits)
+
+    # Function to generate the new ID with suffix and update the number of
+    # splits
+    def generate_new_id(row):
+        original_id = row["id"]
+        if row["split_elements"] == 1:
+            return original_id
+        else:
+            suffix_counts[original_id] = suffix_counts.get(original_id, 0) + 1
+            return f"{original_id}-{suffix_counts[original_id]}"
+
+    # Update the ID column with the new IDs
+    x["id"] = x.apply(generate_new_id, axis=1)
+
+    return x
+
+
+def _distribute_to_circuits(row):
+    """
+    Distributes the number of circuits or cables to individual circuits based
+    on the given row data.
+
+    Parameters:
+    - row: A dictionary representing a row of data containing information about
+      circuits and cables.
+
+    Returns:
+    - single_circuit: The number of circuits to be assigned to each individual
+      circuit.
+    """
+    if row["circuits"] != "":
+        circuits = int(row["circuits"])
+    else:
+        cables = int(row["cables"])
+        circuits = cables / 3
+
+    single_circuit = int(max(1, np.floor_divide(circuits, row["split_elements"])))
+    single_circuit = str(single_circuit)
+
+    return single_circuit
+
+
+def _add_line_endings_to_substations(
+    df_substations,
+    gdf_lines,
+    path_country_shapes,
+    path_offshore_shapes,
+    prefix,
+):
+    """
+    Add line endings to substations.
+
+    This function takes two pandas DataFrames, `substations` and `lines`, and
+    adds line endings to the substations based on the information from the
+    lines DataFrame.
+
+    Parameters:
+    - substations (pandas DataFrame): DataFrame containing information about
+      substations.
+    - lines (pandas DataFrame): DataFrame containing information about lines.
+
+    Returns:
+    - buses (pandas DataFrame): DataFrame containing the updated information
+      about substations with line endings.
+    """
+    if gdf_lines.empty:
+        return df_substations
+
+    logger.info("Adding line endings to substations")
+    # extract columns from df_substations
+    bus_s = pd.DataFrame(columns=df_substations.columns)
+    bus_e = pd.DataFrame(columns=df_substations.columns)
+
+    # TODO pypsa-eur: fix country code to contain single country code
+    # Read information from gdf_lines
+    bus_s[["voltage", "country"]] = gdf_lines[["voltage", "country"]]
+    bus_s.loc[:, "geometry"] = gdf_lines.geometry.boundary.map(
+        lambda p: p.geoms[0] if len(p.geoms) >= 2 else None
+    )
+    bus_s.loc[:, "lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None)
+    bus_s.loc[:, "lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None)
+    bus_s.loc[:, "dc"] = gdf_lines["dc"]
+
+    bus_e[["voltage", "country"]] = gdf_lines[["voltage", "country"]]
+    bus_e.loc[:, "geometry"] = gdf_lines.geometry.boundary.map(
+        lambda p: p.geoms[1] if len(p.geoms) >= 2 else None
+    )
+    bus_e.loc[:, "lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None)
+    bus_e.loc[:, "lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None)
+    bus_e.loc[:, "dc"] = gdf_lines["dc"]
+
+    bus_all = pd.concat([bus_s, bus_e], ignore_index=True)
+
+    # Group gdf_substations by voltage and and geometry (dropping duplicates)
+    bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index()
+    bus_all = bus_all[df_substations.columns]
+    bus_all.loc[:, "bus_id"] = bus_all.apply(
+        lambda row: f"{prefix}/{row.name + 1}", axis=1
+    )
+
+    # Initialize default values
+    bus_all["station_id"] = None
+    # Assuming substations completed for installed lines
+    bus_all["under_construction"] = False
+    bus_all["tag_area"] = None
+    bus_all["symbol"] = "substation"
+    # TODO: this tag may be improved, maybe depending on voltage levels
+    bus_all["tag_substation"] = "transmission"
+    bus_all["tag_source"] = prefix
+
+    buses = pd.concat([df_substations, bus_all], ignore_index=True)
+    buses.set_index("bus_id", inplace=True)
+
+    # Fix country codes
+    # TODO pypsa-eur: Temporary solution as long as the shapes have a low,
+    # incomplete resolution (cf. 2500 meters for buffering)
+    bool_multiple_countries = buses["country"].str.contains(";")
+    gdf_offshore = gpd.read_file(path_offshore_shapes).set_index("name")["geometry"]
+    gdf_offshore = gpd.GeoDataFrame(
+        gdf_offshore, geometry=gdf_offshore, crs=gdf_offshore.crs
+    )
+    gdf_countries = gpd.read_file(path_country_shapes).set_index("name")["geometry"]
+    # reproject to enable buffer
+    gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs=gdf_countries.crs)
+    gdf_union = gdf_countries.merge(
+        gdf_offshore, how="outer", left_index=True, right_index=True
+    )
+    gdf_union["geometry"] = gdf_union.apply(
+        lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).union_all(),
+        axis=1,
+    )
+    gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs)
+    gdf_buses_tofix = gpd.GeoDataFrame(
+        buses[bool_multiple_countries], geometry="geometry", crs=crs
+    )
+    joined = gpd.sjoin(
+        gdf_buses_tofix, gdf_union.reset_index(), how="left", predicate="within"
+    )
+
+    # For all remaining rows where the country/index_right column is NaN, find
+    # find the closest polygon index
+    joined.loc[joined["name"].isna(), "name"] = joined.loc[
+        joined["name"].isna(), "geometry"
+    ].apply(lambda x: _find_closest_polygon(gdf_union, x))
+
+    joined.reset_index(inplace=True)
+    joined = joined.drop_duplicates(subset="bus_id")
+    joined.set_index("bus_id", inplace=True)
+
+    buses.loc[bool_multiple_countries, "country"] = joined.loc[
+        bool_multiple_countries, "name"
+    ]
+
+    return buses.reset_index()
+
+
+def _import_lines_and_cables(path_lines):
+    """
+    Import lines and cables from the given input paths.
+
+    Parameters:
+    - path_lines (dict): A dictionary containing the input paths for lines and
+      cables data.
+
+    Returns:
+    - df_lines (DataFrame): A DataFrame containing the imported lines and
+      cables data.
+    """
+    columns = [
+        "id",
+        "bounds",
+        "nodes",
+        "geometry",
+        "country",
+        "power",
+        "cables",
+        "circuits",
+        "frequency",
+        "voltage",
+        "wires",
+    ]
+    df_lines = pd.DataFrame(columns=columns)
+
+    logger.info("Importing lines and cables")
+    for key in path_lines:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_lines[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_lines[key][idx]))
+
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                df["country"] = country
+
+                col_tags = [
+                    "power",
+                    "cables",
+                    "circuits",
+                    "frequency",
+                    "voltage",
+                    "wires",
+                ]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+                df.drop(columns=["type", "tags"], inplace=True)
+
+                df_lines = pd.concat([df_lines, df], axis="rows")
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+
+    return df_lines
+
+
+def _import_links(path_links):
+    """
+    Import links from the given input paths.
+
+    Parameters:
+    - path_links (dict): A dictionary containing the input paths for links.
+
+    Returns:
+    - df_links (DataFrame): A DataFrame containing the imported links data.
+    """
+    columns = [
+        "id",
+        "bounds",
+        "nodes",
+        "geometry",
+        "country",
+        "circuits",
+        "frequency",
+        "rating",
+        "voltage",
+    ]
+    df_links = pd.DataFrame(columns=columns)
+
+    logger.info("Importing links")
+    for key in path_links:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_links[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_links[key][idx]))
+
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                df["id"] = df["id"].apply(lambda x: (f"relation/{x}"))
+                df["country"] = country
+
+                col_tags = [
+                    "circuits",
+                    "frequency",
+                    "rating",
+                    "voltage",
+                ]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+                df.drop(columns=["type", "tags"], inplace=True)
+
+                df_links = pd.concat([df_links, df], axis="rows")
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+        logger.info("Dropping lines without rating.")
+        len_before = len(df_links)
+        df_links = df_links.dropna(subset=["rating"])
+        len_after = len(df_links)
+        logger.info(
+            f"Dropped {len_before-len_after} elements without rating. "
+            + f"Imported {len_after} elements."
+        )
+
+    return df_links
+
+
+def _create_single_link(row):
+    """
+    Create a single link from multiple rows within a OSM link relation.
+
+    Parameters:
+    - row: A row of OSM data containing information about the link.
+
+    Returns:
+    - single_link: A single LineString representing the link.
+
+    This function takes a row of OSM data and extracts the relevant information
+    to create a single link. It filters out elements (substations, electrodes)
+    with invalid roles and finds the longest link based on its endpoints.
+    If the longest link is a MultiLineString, it extracts the longest
+    linestring from it. The resulting single link is returned.
+    """
+    valid_roles = ["line", "cable"]
+    df = pd.json_normalize(row["members"])
+    df = df[df["role"].isin(valid_roles)]
+    df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1)
+    df.loc[:, "length"] = df["geometry"].apply(lambda x: x.length)
+
+    list_endpoints = []
+    for idx, row in df.iterrows():
+        tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]])
+        # round tuple to 3 decimals
+        tuple = (
+            round(tuple[0][0], 3),
+            round(tuple[0][1], 3),
+            round(tuple[1][0], 3),
+            round(tuple[1][1], 3),
+        )
+        list_endpoints.append(tuple)
+
+    df.loc[:, "endpoints"] = list_endpoints
+    df_longest = df.loc[df.groupby("endpoints")["length"].idxmin()]
+
+    single_link = linemerge(df_longest["geometry"].values.tolist())
+
+    # If the longest component is a MultiLineString, extract the longest linestring from it
+    if isinstance(single_link, MultiLineString):
+        # Find connected components
+        components = list(single_link.geoms)
+
+        # Find the longest connected linestring
+        single_link = max(components, key=lambda x: x.length)
+
+    return single_link
+
+
+def _drop_duplicate_lines(df_lines):
+    """
+    Drop duplicate lines from the given dataframe. Duplicates are usually lines
+    cross-border lines or slightly outside the country border of focus.
+
+    Parameters:
+    - df_lines (pandas.DataFrame): The dataframe containing lines data.
+
+    Returns:
+    - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed
+      and cleaned data.
+
+    This function drops duplicate lines from the given dataframe based on the
+    'id' column. It groups the duplicate rows by 'id' and aggregates the
+    'country' column to a string split by semicolon, as they appear in multiple
+    country datasets. One example of the duplicates is kept, accordingly.
+    Finally, the updated dataframe without multiple duplicates is returned.
+    """
+    logger.info("Dropping duplicate lines.")
+    duplicate_rows = df_lines[df_lines.duplicated(subset=["id"], keep=False)].copy()
+
+    # Group rows by id and aggregate the country column to a string split by semicolon
+    grouped_duplicates = (
+        duplicate_rows.groupby("id")["country"].agg(lambda x: ";".join(x)).reset_index()
+    )
+    duplicate_rows.drop_duplicates(subset="id", inplace=True)
+    duplicate_rows.drop(columns=["country"], inplace=True)
+    duplicate_rows = duplicate_rows.join(
+        grouped_duplicates.set_index("id"), on="id", how="left"
+    )
+
+    len_before = len(df_lines)
+    # Drop duplicates and update the df_lines dataframe with the cleaned data
+    df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])]
+    df_lines = pd.concat([df_lines, duplicate_rows], axis="rows")
+    len_after = len(df_lines)
+
+    logger.info(
+        f"Dropped {len_before - len_after} duplicate elements. "
+        + f"Keeping {len_after} elements."
+    )
+
+    return df_lines
+
+
+def _filter_by_voltage(df, min_voltage=200000):
+    """
+    Filter rows in the DataFrame based on the voltage in V.
+
+    Parameters:
+    - df (pandas.DataFrame): The DataFrame containing the substations or lines data.
+    - min_voltage (int, optional): The minimum voltage value to filter the
+      rows. Defaults to 200000 [unit: V].
+
+    Returns:
+    - filtered df (pandas.DataFrame): The filtered DataFrame containing
+      the lines or substations above min_voltage.
+    - list_voltages (list): A list of unique voltage values above min_voltage.
+      The type of the list elements is string.
+    """
+    if df.empty:
+        return df, []
+
+    logger.info(
+        f"Filtering dataframe by voltage. Only keeping rows above and including {min_voltage} V."
+    )
+    list_voltages = df["voltage"].str.split(";").explode().unique().astype(str)
+    # Keep numeric strings
+    list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)]
+    list_voltages = list_voltages.astype(int)
+    list_voltages = list_voltages[list_voltages >= int(min_voltage)]
+    list_voltages = list_voltages.astype(str)
+
+    bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages)
+    len_before = len(df)
+    df = df[bool_voltages]
+    len_after = len(df)
+    logger.info(
+        f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. "
+        + f"Keeping {len_after} elements."
+    )
+
+    return df, list_voltages
+
+
+def _clean_substations(df_substations, list_voltages):
+    """
+    Clean the substation data by performing the following steps:
+    - Split cells in the dataframe.
+    - Filter substation data based on specified voltages.
+    - Update the frequency values based on the split count.
+    - Split cells in the 'frequency' column.
+    - Set remaining invalid frequency values that are not in ['0', '50']
+      to '50'.
+
+    Parameters:
+    - df_substations (pandas.DataFrame): The input dataframe containing
+      substation data.
+    - list_voltages (list): A list of voltages above min_voltage to filter the
+    substation data.
+
+    Returns:
+    - df_substations (pandas.DataFrame): The cleaned substation dataframe.
+    """
+    df_substations = df_substations.copy()
+
+    df_substations = _split_cells(df_substations)
+
+    bool_voltages = df_substations["voltage"].apply(
+        _check_voltage, list_voltages=list_voltages
+    )
+    df_substations = df_substations[bool_voltages]
+    df_substations.loc[:, "split_count"] = df_substations["id"].apply(
+        lambda x: x.split("-")[1] if "-" in x else "0"
+    )
+    df_substations.loc[:, "split_count"] = df_substations["split_count"].astype(int)
+
+    bool_split = df_substations["split_elements"] > 1
+    bool_frequency_len = (
+        df_substations["frequency"].apply(lambda x: len(x.split(";")))
+        == df_substations["split_elements"]
+    )
+
+    op_freq = lambda row: row["frequency"].split(";")[row["split_count"] - 1]
+
+    df_substations.loc[bool_frequency_len & bool_split, "frequency"] = (
+        df_substations.loc[bool_frequency_len & bool_split,].apply(op_freq, axis=1)
+    )
+
+    df_substations = _split_cells(df_substations, cols=["frequency"])
+    bool_invalid_frequency = df_substations["frequency"].apply(
+        lambda x: x not in ["50", "0"]
+    )
+    df_substations.loc[bool_invalid_frequency, "frequency"] = "50"
+
+    return df_substations
+
+
+def _clean_lines(df_lines, list_voltages):
+    """
+    Cleans and processes the `df_lines` DataFrame heuristically based on the
+    information available per respective line and cable. Further checks to
+    ensure data consistency and completeness.
+
+    Parameters
+    ----------
+    df_lines : pandas.DataFrame
+        The input DataFrame containing line information with columns such as
+        'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id',
+        etc.
+    list_voltages : list
+        A list of unique voltage values above a certain threshold. (type: str)
+
+    Returns
+    -------
+    df_lines : pandas.DataFrame
+        The cleaned DataFrame with updated columns 'circuits', 'frequency', and
+        'cleaned' to reflect the applied transformations.
+
+    Description
+    -----------
+    This function performs the following operations:
+
+    - Initializes a 'cleaned' column with False, step-wise updates to True
+       following the respective cleaning step.
+    - Splits the voltage cells in the DataFrame at semicolons using a helper
+       function `_split_cells`.
+    - Filters the DataFrame to only include rows with valid voltages.
+    - Sets circuits of remaining lines without any applicable heuristic equal
+      to 1.
+
+    The function ensures that the resulting DataFrame has consistent and
+    complete information for further processing or analysis while maintaining
+    the data of the original OSM data set wherever possible.
+    """
+    logger.info("Cleaning lines and determining circuits.")
+    # Initiate boolean with False, only set to true if all cleaning steps are
+    # passed
+    df_lines = df_lines.copy()
+    df_lines["cleaned"] = False
+
+    df_lines["voltage_original"] = df_lines["voltage"]
+    df_lines["circuits_original"] = df_lines["circuits"]
+
+    df_lines = _split_cells(df_lines)
+    bool_voltages = df_lines["voltage"].apply(
+        _check_voltage, list_voltages=list_voltages
+    )
+    df_lines = df_lines[bool_voltages]
+
+    bool_ac = df_lines["frequency"] != "0"
+    bool_dc = ~bool_ac
+    valid_frequency = ["50", "0"]
+    bool_invalid_frequency = df_lines["frequency"].apply(
+        lambda x: x not in valid_frequency
+    )
+
+    bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "")
+    # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit
+    df_lines.loc[bool_noinfo, "circuits"] = "1"
+    df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50"
+    df_lines.loc[bool_noinfo, "cleaned"] = True
+
+    # Fill in all values where cables info exists and split_elements == 1
+    bool_cables_ac = (
+        (df_lines["cables"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["cables"] != "0")
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"] == "")
+        & (df_lines["cleaned"] == False)
+        & bool_ac
+    )
+
+    df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[
+        bool_cables_ac, "cables"
+    ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 3)))))
+
+    df_lines.loc[bool_cables_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables_ac, "cleaned"] = True
+
+    bool_cables_dc = (
+        (df_lines["cables"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["cables"] != "0")
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"] == "")
+        & (df_lines["cleaned"] == False)
+        & bool_dc
+    )
+
+    df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[
+        bool_cables_dc, "cables"
+    ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 2)))))
+
+    df_lines.loc[bool_cables_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables_dc, "cleaned"] = True
+
+    # Fill in all values where circuits info exists and split_elements == 1
+    bool_lines = (
+        (df_lines["circuits"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["circuits"] != "0")
+        & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["cleaned"] == False)
+    )
+
+    df_lines.loc[bool_lines & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_lines & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_lines, "cleaned"] = True
+
+    # Clean those values where number of voltages split by semicolon is larger
+    # than no cables or no circuits
+    bool_cables = (
+        (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1))
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["cleaned"] == False)
+    )
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables].apply(
+        _distribute_to_circuits, axis=1
+    )
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # Clean those values where multiple circuit values are present, divided by
+    # semicolon
+    has_multiple_circuits = df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)
+    circuits_match_split_elements = df_lines.apply(
+        lambda row: len(row["circuits"].split(";")) == row["split_elements"],
+        axis=1,
+    )
+    is_not_cleaned = df_lines["cleaned"] == False
+    bool_cables = has_multiple_circuits & circuits_match_split_elements & is_not_cleaned
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply(
+        lambda row: str(row["circuits"].split(";")[int(row["id"].split("-")[-1]) - 1]),
+        axis=1,
+    )
+
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # Clean those values where multiple cables values are present, divided by
+    # semicolon
+    has_multiple_cables = df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)
+    cables_match_split_elements = df_lines.apply(
+        lambda row: len(row["cables"].split(";")) == row["split_elements"],
+        axis=1,
+    )
+    is_not_cleaned = df_lines["cleaned"] == False
+    bool_cables = has_multiple_cables & cables_match_split_elements & is_not_cleaned
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply(
+        lambda row: str(
+            max(
+                1,
+                np.floor_divide(
+                    int(row["cables"].split(";")[int(row["id"].split("-")[-1]) - 1]), 3
+                ),
+            )
+        ),
+        axis=1,
+    )
+
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # All remaining lines to circuits == 1
+    bool_leftover = df_lines["cleaned"] == False
+    if sum(bool_leftover) > 0:
+        str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"])
+        logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...")
+        logger.info(f"Lines affected: {str_id}")
+
+    df_lines.loc[bool_leftover, "circuits"] = "1"
+    df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_leftover, "cleaned"] = True
+
+    return df_lines
+
+
+def _create_substations_geometry(df_substations):
+    """
+    Creates geometries.
+
+    Parameters:
+    df_substations (DataFrame): The input DataFrame containing the substations
+    data.
+
+    Returns:
+    df_substations (DataFrame): A new DataFrame with the
+    polygons ["polygon"] of the substations geometries.
+    """
+    logger.info("Creating substations geometry.")
+    df_substations = df_substations.copy()
+
+    # Create centroids from geometries and keep the original polygons
+    df_substations.loc[:, "polygon"] = df_substations["geometry"]
+
+    return df_substations
+
+
+def _create_substations_centroid(df_substations):
+    """
+    Creates centroids from geometries and keeps the original polygons.
+
+    Parameters:
+    df_substations (DataFrame): The input DataFrame containing the substations
+    data.
+
+    Returns:
+    df_substations (DataFrame): A new DataFrame with the centroids ["geometry"]
+    and polygons ["polygon"] of the substations geometries.
+    """
+    logger.info("Creating substations geometry.")
+    df_substations = df_substations.copy()
+
+    df_substations.loc[:, "geometry"] = df_substations["polygon"].apply(
+        lambda x: x.centroid
+    )
+
+    df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x)
+    df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y)
+
+    return df_substations
+
+
+def _create_lines_geometry(df_lines):
+    """
+    Create line geometry for the given DataFrame of lines.
+
+    Parameters:
+    - df_lines (pandas.DataFrame): DataFrame containing lines data.
+
+    Returns:
+    - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry'
+      column (type: shapely LineString).
+
+    Notes:
+    - This function transforms 'geometry' column in the input DataFrame by
+      applying the '_create_linestring' function to each row.
+    - It then drops rows where the geometry has equal start and end points,
+      as these are usually not lines but outlines of areas.
+    """
+    logger.info("Creating lines geometry.")
+    df_lines = df_lines.copy()
+    df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1)
+
+    bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1])
+    df_lines = df_lines[~bool_circle]
+
+    return df_lines
+
+
+def _add_bus_centroid_to_line(linestring, point):
+    """
+    Adds the centroid of a substation to a linestring by extending the
+    linestring with a new segment.
+
+    Parameters:
+    linestring (LineString): The original linestring to extend.
+    point (Point): The centroid of the bus.
+
+    Returns:
+    merged (LineString): The extended linestring with the new segment.
+    """
+    start = linestring.coords[0]
+    end = linestring.coords[-1]
+
+    dist_to_start = point.distance(Point(start))
+    dist_to_end = point.distance(Point(end))
+
+    if dist_to_start < dist_to_end:
+        new_segment = LineString([point.coords[0], start])
+    else:
+        new_segment = LineString([point.coords[0], end])
+
+    merged = linemerge([linestring, new_segment])
+
+    return merged
+
+
+def _finalise_substations(df_substations):
+    """
+    Finalises the substations column types.
+
+    Args:
+        df_substations (pandas.DataFrame): The input DataFrame
+        containing substations data.
+
+    Returns:
+        df_substations (pandas.DataFrame(): The DataFrame with finalised column
+        types and transformed data.
+    """
+    logger.info("Finalising substations column types.")
+    df_substations = df_substations.copy()
+    # rename columns
+    df_substations.rename(
+        columns={
+            "id": "bus_id",
+            "power": "symbol",
+            "substation": "tag_substation",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_substations.loc[:, "symbol"] = "substation"
+    df_substations.loc[:, "tag_substation"] = "transmission"
+    df_substations.loc[:, "dc"] = False
+    df_substations.loc[df_substations["frequency"] == "0", "dc"] = True
+    df_substations.loc[:, "under_construction"] = False
+    df_substations.loc[:, "station_id"] = None
+    df_substations.loc[:, "tag_area"] = None
+    df_substations.loc[:, "tag_source"] = df_substations["bus_id"]
+
+    # Only included needed columns
+    df_substations = df_substations[
+        [
+            "bus_id",
+            "symbol",
+            "tag_substation",
+            "voltage",
+            "lon",
+            "lat",
+            "dc",
+            "under_construction",
+            "station_id",
+            "tag_area",
+            "country",
+            "geometry",
+            "polygon",
+            "tag_source",
+        ]
+    ]
+
+    # Substation data types
+    df_substations["voltage"] = df_substations["voltage"].astype(int)
+
+    return df_substations
+
+
+def _finalise_lines(df_lines):
+    """
+    Finalises the lines column types.
+
+    Args:
+        df_lines (pandas.DataFrame): The input DataFrame containing lines data.
+
+    Returns:
+        df_lines (pandas.DataFrame(): The DataFrame with finalised column types
+        and transformed data.
+    """
+    logger.info("Finalising lines column types.")
+    df_lines = df_lines.copy()
+    # Rename columns
+    df_lines.rename(
+        columns={
+            "id": "line_id",
+            "power": "tag_type",
+            "frequency": "tag_frequency",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_lines.loc[:, "bus0"] = None
+    df_lines.loc[:, "bus1"] = None
+    df_lines.loc[:, "length"] = None
+    df_lines.loc[:, "underground"] = False
+    df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False
+    df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True
+    df_lines.loc[:, "under_construction"] = False
+    df_lines.loc[:, "dc"] = False
+    df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False
+    df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True
+
+    # Only include needed columns
+    df_lines = df_lines[
+        [
+            "line_id",
+            "circuits",
+            "tag_type",
+            "voltage",
+            "tag_frequency",
+            "bus0",
+            "bus1",
+            "length",
+            "underground",
+            "under_construction",
+            "dc",
+            "country",
+            "geometry",
+        ]
+    ]
+
+    df_lines["circuits"] = df_lines["circuits"].astype(int)
+    df_lines["voltage"] = df_lines["voltage"].astype(int)
+    df_lines["tag_frequency"] = df_lines["tag_frequency"].astype(int)
+
+    return df_lines
+
+
+def _finalise_links(df_links):
+    """
+    Finalises the links column types.
+
+    Args:
+        df_links (pandas.DataFrame): The input DataFrame containing links data.
+
+    Returns:
+        df_links (pandas.DataFrame(): The DataFrame with finalised column types
+        and transformed data.
+    """
+    logger.info("Finalising links column types.")
+    df_links = df_links.copy()
+    # Rename columns
+    df_links.rename(
+        columns={
+            "id": "link_id",
+            "rating": "p_nom",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_links["bus0"] = None
+    df_links["bus1"] = None
+    df_links["length"] = None
+    df_links["underground"] = True
+    df_links["under_construction"] = False
+    df_links["dc"] = True
+
+    # Only include needed columns
+    df_links = df_links[
+        [
+            "link_id",
+            "voltage",
+            "p_nom",
+            "bus0",
+            "bus1",
+            "length",
+            "underground",
+            "under_construction",
+            "dc",
+            "country",
+            "geometry",
+        ]
+    ]
+
+    df_links["p_nom"] = df_links["p_nom"].astype(int)
+    df_links["voltage"] = df_links["voltage"].astype(int)
+
+    return df_links
+
+
+def _import_substations(path_substations):
+    """
+    Import substations from the given input paths. This function imports both
+    substations from OSM ways as well as relations that contain nested
+    information on the substations shape and electrical parameters. Ways and
+    relations are subsequently concatenated to form a single DataFrame
+    containing unique bus ids.
+
+    Args:
+        path_substations (dict): A dictionary containing input paths for
+        substations.
+
+    Returns:
+        pd.DataFrame: A DataFrame containing the imported substations data.
+    """
+    cols_substations_way = [
+        "id",
+        "geometry",
+        "country",
+        "power",
+        "substation",
+        "voltage",
+        "frequency",
+    ]
+    cols_substations_relation = [
+        "id",
+        "country",
+        "power",
+        "substation",
+        "voltage",
+        "frequency",
+    ]
+    df_substations_way = pd.DataFrame(columns=cols_substations_way)
+    df_substations_relation = pd.DataFrame(columns=cols_substations_relation)
+
+    logger.info("Importing substations")
+    for key in path_substations:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_substations[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_substations[key][idx]))
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                # new string that adds "way/" to id
+                df["id"] = df["id"].apply(
+                    lambda x: (
+                        f"way/{x}" if key == "substations_way" else f"relation/{x}"
+                    )
+                )
+                df["country"] = country
+
+                col_tags = ["power", "substation", "voltage", "frequency"]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+
+                if key == "substations_way":
+                    df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True)
+                    df_substations_way = pd.concat(
+                        [df_substations_way, df], axis="rows"
+                    )
+                elif key == "substations_relation":
+                    df.drop(columns=["type", "tags", "bounds"], inplace=True)
+                    df_substations_relation = pd.concat(
+                        [df_substations_relation, df], axis="rows"
+                    )
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+
+    df_substations_way.drop_duplicates(subset="id", keep="first", inplace=True)
+    df_substations_relation.drop_duplicates(subset="id", keep="first", inplace=True)
+
+    df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1)
+
+    # Normalise the members column of df_substations_relation
+    cols_members = ["id", "type", "ref", "role", "geometry"]
+    df_substations_relation_members = pd.DataFrame(columns=cols_members)
+
+    for index, row in df_substations_relation.iterrows():
+        col_members = ["type", "ref", "role", "geometry"]
+        df = pd.json_normalize(row["members"])
+
+        for cm in col_members:
+            if cm not in df.columns:
+                df[cm] = pd.NA
+
+        df = df.loc[:, col_members]
+        df["id"] = str(row["id"])
+        df["ref"] = df["ref"].astype(str)
+        df = df[df["type"] != "node"]
+        df = df.dropna(subset=["geometry"])
+        df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])]
+        df_substations_relation_members = pd.concat(
+            [df_substations_relation_members, df], axis="rows"
+        )
+
+    df_substations_relation_members.reset_index(inplace=True)
+    df_substations_relation_members["linestring"] = (
+        df_substations_relation_members.apply(_create_linestring, axis=1)
+    )
+    df_substations_relation_members_grouped = (
+        df_substations_relation_members.groupby("id")["linestring"]
+        .apply(lambda x: linemerge(x.tolist()))
+        .reset_index()
+    )
+    df_substations_relation_members_grouped["geometry"] = (
+        df_substations_relation_members_grouped["linestring"].apply(
+            lambda x: x.convex_hull
+        )
+    )
+
+    df_substations_relation = (
+        df_substations_relation.join(
+            df_substations_relation_members_grouped.set_index("id"), on="id", how="left"
+        )
+        .drop(columns=["members", "linestring"])
+        .dropna(subset=["geometry"])
+    )
+
+    # reorder columns and concatenate
+    df_substations_relation = df_substations_relation[cols_substations_way]
+    df_substations = pd.concat(
+        [df_substations_way, df_substations_relation], axis="rows"
+    )
+
+    return df_substations
+
+
+def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon):
+    """
+    Removes lines that are within substation polygons from the given
+    GeoDataFrame of lines. These are not needed to create network (e.g. bus
+    bars, switchgear, etc.)
+
+    Parameters:
+    - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id'
+      and 'geometry' columns.
+    - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing
+      substation polygons.
+
+    Returns:
+    GeoDataFrame: A new GeoDataFrame without lines within substation polygons.
+    """
+    logger.info("Identifying and removing lines within substation polygons...")
+    gdf = gpd.sjoin(
+        gdf_lines[["line_id", "geometry"]],
+        gdf_substations_polygon,
+        how="inner",
+        predicate="within",
+    )["line_id"]
+
+    logger.info(
+        f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines."
+    )
+    gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(gdf)]
+
+    return gdf_lines
+
+
+def _merge_touching_polygons(df):
+    """
+    Merge touching polygons in a GeoDataFrame.
+
+    Parameters:
+    - df: pandas.DataFrame or geopandas.GeoDataFrame
+        The input DataFrame containing the polygons to be merged.
+
+    Returns:
+    - gdf: geopandas.GeoDataFrame
+        The GeoDataFrame with merged polygons.
+    """
+
+    gdf = gpd.GeoDataFrame(df, geometry="polygon", crs=crs)
+    combined_polygons = unary_union(gdf.geometry)
+    if combined_polygons.geom_type == "MultiPolygon":
+        gdf_combined = gpd.GeoDataFrame(
+            geometry=[poly for poly in combined_polygons.geoms], crs=crs
+        )
+    else:
+        gdf_combined = gpd.GeoDataFrame(geometry=[combined_polygons], crs=crs)
+
+    gdf.reset_index(drop=True, inplace=True)
+
+    for i, combined_geom in gdf_combined.iterrows():
+        mask = gdf.intersects(combined_geom.geometry)
+        gdf.loc[mask, "polygon_merged"] = combined_geom.geometry
+
+    gdf.drop(columns=["polygon"], inplace=True)
+    gdf.rename(columns={"polygon_merged": "polygon"}, inplace=True)
+
+    return gdf
+
+
+def _add_endpoints_to_line(linestring, polygon_dict):
+    """
+    Adds endpoints to a line by removing any overlapping areas with polygons.
+
+    Parameters:
+    linestring (LineString): The original line to add endpoints to.
+    polygon_dict (dict): A dictionary of polygons, where the keys are bus IDs and the values are the corresponding polygons.
+
+    Returns:
+    LineString: The modified line with added endpoints.
+    """
+    if not polygon_dict:
+        return linestring
+    polygon_centroids = {
+        bus_id: polygon.centroid for bus_id, polygon in polygon_dict.items()
+    }
+    polygon_unary = polygons = unary_union(list(polygon_dict.values()))
+
+    # difference with polygon
+    linestring_new = linestring.difference(polygon_unary)
+
+    if type(linestring_new) == MultiLineString:
+        # keep the longest line in the multilinestring
+        linestring_new = max(linestring_new.geoms, key=lambda x: x.length)
+
+    for p in polygon_centroids:
+        linestring_new = _add_bus_centroid_to_line(linestring_new, polygon_centroids[p])
+
+    return linestring_new
+
+
+def _get_polygons_at_endpoints(linestring, polygon_dict):
+    """
+    Get the polygons that contain the endpoints of a given linestring.
+
+    Parameters:
+    linestring (LineString): The linestring for which to find the polygons at the endpoints.
+    polygon_dict (dict): A dictionary containing polygons as values, with bus_ids as keys.
+
+    Returns:
+    dict: A dictionary containing bus_ids as keys and polygons as values, where the polygons contain the endpoints of the linestring.
+    """
+    # Get the endpoints of the linestring
+    start_point = Point(linestring.coords[0])
+    end_point = Point(linestring.coords[-1])
+
+    # Initialize dictionary to store bus_ids as keys and polygons as values
+    bus_id_polygon_dict = {}
+
+    for bus_id, polygon in polygon_dict.items():
+        if polygon.contains(start_point) or polygon.contains(end_point):
+            bus_id_polygon_dict[bus_id] = polygon
+
+    return bus_id_polygon_dict
+
+
+def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon):
+    """
+    Extends the lines in the given GeoDataFrame `gdf_lines` to the centroid of
+    the nearest substations represented by the polygons in the
+    `gdf_substations_polygon` GeoDataFrame.
+
+    Parameters:
+    gdf_lines (GeoDataFrame): A GeoDataFrame containing the lines to be extended.
+    gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing the polygons representing substations.
+
+    Returns:
+    GeoDataFrame: A new GeoDataFrame with the lines extended to the substations.
+    """
+    gdf = gpd.sjoin(
+        gdf_lines,
+        gdf_substations_polygon.drop_duplicates(subset="polygon", inplace=False),
+        how="left",
+        lsuffix="line",
+        rsuffix="bus",
+        predicate="intersects",
+    ).drop(columns="index_bus")
+
+    # Group by 'line_id' and create a dictionary mapping 'bus_id' to 'geometry_bus', excluding the grouping columns
+    gdf = (
+        gdf.groupby("line_id")
+        .apply(
+            lambda x: x[["bus_id", "geometry_bus"]]
+            .dropna()
+            .set_index("bus_id")["geometry_bus"]
+            .to_dict(),
+            include_groups=False,
+        )
+        .reset_index()
+    )
+    gdf.columns = ["line_id", "bus_dict"]
+
+    gdf["intersects_bus"] = gdf.apply(lambda row: len(row["bus_dict"]) > 0, axis=1)
+
+    gdf.loc[:, "line_geometry"] = gdf.join(
+        gdf_lines.set_index("line_id")["geometry"], on="line_id"
+    )["geometry"]
+
+    # Polygons at the endpoints of the linestring
+    gdf["bus_endpoints"] = gdf.apply(
+        lambda row: _get_polygons_at_endpoints(row["line_geometry"], row["bus_dict"]),
+        axis=1,
+    )
+
+    gdf.loc[:, "line_geometry_new"] = gdf.apply(
+        lambda row: _add_endpoints_to_line(row["line_geometry"], row["bus_endpoints"]),
+        axis=1,
+    )
+
+    gdf.set_index("line_id", inplace=True)
+    gdf_lines.set_index("line_id", inplace=True)
+
+    gdf_lines.loc[:, "geometry"] = gdf["line_geometry_new"]
+
+    return gdf_lines
+
+
+# Function to bridge gaps between all lines
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("clean_osm_data")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    # Parameters
+    crs = "EPSG:4326"  # Correct crs for OSM data
+    min_voltage_ac = 200000  # [unit: V] Minimum voltage value to filter AC lines.
+    min_voltage_dc = 150000  #  [unit: V] Minimum voltage value to filter DC links.
+
+    logger.info("---")
+    logger.info("SUBSTATIONS")
+    # Input
+    path_substations = {
+        "substations_way": snakemake.input.substations_way,
+        "substations_relation": snakemake.input.substations_relation,
+    }
+
+    # Cleaning process
+    df_substations = _import_substations(path_substations)
+    df_substations["voltage"] = _clean_voltage(df_substations["voltage"])
+    df_substations, list_voltages = _filter_by_voltage(
+        df_substations, min_voltage=min_voltage_ac
+    )
+    df_substations["frequency"] = _clean_frequency(df_substations["frequency"])
+    df_substations = _clean_substations(df_substations, list_voltages)
+    df_substations = _create_substations_geometry(df_substations)
+
+    # Merge touching polygons
+    df_substations = _merge_touching_polygons(df_substations)
+    df_substations = _create_substations_centroid(df_substations)
+    df_substations = _finalise_substations(df_substations)
+
+    # Create polygon GeoDataFrame to remove lines within substations
+    gdf_substations_polygon = gpd.GeoDataFrame(
+        df_substations[["bus_id", "polygon", "voltage"]],
+        geometry="polygon",
+        crs=crs,
+    )
+
+    gdf_substations_polygon["geometry"] = gdf_substations_polygon.polygon.copy()
+
+    logger.info("---")
+    logger.info("LINES AND CABLES")
+    path_lines = {
+        "lines": snakemake.input.lines_way,
+        "cables": snakemake.input.cables_way,
+    }
+
+    # Cleaning process
+    df_lines = _import_lines_and_cables(path_lines)
+    df_lines = _drop_duplicate_lines(df_lines)
+    df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"])
+    df_lines, list_voltages = _filter_by_voltage(df_lines, min_voltage=min_voltage_ac)
+    df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"])
+    df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"])
+    df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"])
+    df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"])
+    df_lines = _clean_lines(df_lines, list_voltages)
+
+    # Drop DC lines, will be added through relations later
+    len_before = len(df_lines)
+    df_lines = df_lines[df_lines["frequency"] == "50"]
+    len_after = len(df_lines)
+    logger.info(
+        f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines."
+    )
+
+    df_lines = _create_lines_geometry(df_lines)
+    df_lines = _finalise_lines(df_lines)
+
+    # Create GeoDataFrame
+    gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs)
+    gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon)
+    gdf_lines = _extend_lines_to_substations(gdf_lines, gdf_substations_polygon)
+
+    logger.info("---")
+    logger.info("HVDC LINKS")
+    path_links = {
+        "links": snakemake.input.links_relation,
+    }
+
+    df_links = _import_links(path_links)
+
+    df_links = _drop_duplicate_lines(df_links)
+    df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"])
+    df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc)
+    # Keep only highest voltage of split string
+    df_links.loc[:, "voltage"] = df_links["voltage"].apply(
+        lambda x: str(max(map(int, x.split(";"))))
+    )
+    df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"])
+    df_links.loc[:, "rating"] = _clean_rating(df_links["rating"])
+
+    df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1)
+    df_links = _finalise_links(df_links)
+    gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs).set_index(
+        "link_id"
+    )
+
+    # Add line endings to substations
+    path_country_shapes = snakemake.input.country_shapes
+    path_offshore_shapes = snakemake.input.offshore_shapes
+
+    df_substations = _add_line_endings_to_substations(
+        df_substations,
+        gdf_lines,
+        path_country_shapes,
+        path_offshore_shapes,
+        prefix="line-end",
+    )
+
+    df_substations = _add_line_endings_to_substations(
+        df_substations,
+        gdf_links,
+        path_country_shapes,
+        path_offshore_shapes,
+        prefix="link-end",
+    )
+
+    # Drop polygons and create GDF
+    gdf_substations = gpd.GeoDataFrame(
+        df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs
+    )
+
+    output_substations_polygon = snakemake.output["substations_polygon"]
+    output_substations = snakemake.output["substations"]
+    output_lines = snakemake.output["lines"]
+    output_links = snakemake.output["links"]
+
+    logger.info(
+        f"Exporting clean substations with polygon shapes to {output_substations_polygon}"
+    )
+    gdf_substations_polygon.drop(columns=["geometry"]).to_file(
+        output_substations_polygon, driver="GeoJSON"
+    )
+    logger.info(f"Exporting clean substations to {output_substations}")
+    gdf_substations.to_file(output_substations, driver="GeoJSON")
+    logger.info(f"Exporting clean lines to {output_lines}")
+    gdf_lines.to_file(output_lines, driver="GeoJSON")
+    logger.info(f"Exporting clean links to {output_links}")
+    gdf_links.to_file(output_links, driver="GeoJSON")
+
+    logger.info("Cleaning OSM data completed.")
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 79184167e..b63747c24 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -36,7 +36,7 @@
 - ``resources/regions_offshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
 - ``resources/busmap_elec_s{simpl}.csv``: confer :ref:`simplify`
 - ``networks/elec_s{simpl}.nc``: confer :ref:`simplify`
-- ``data/custom_busmap_elec_s{simpl}_{clusters}.csv``: optional input
+- ``data/custom_busmap_elec_s{simpl}_{clusters}_{base_network}.csv``: optional input
 
 Outputs
 -------
@@ -511,9 +511,7 @@ def plot_busmap_for_n_clusters(n, n_clusters, solver_name="scip", fn=None):
         # Fast-path if no clustering is necessary
         busmap = n.buses.index.to_series()
         linemap = n.lines.index.to_series()
-        clustering = pypsa.clustering.spatial.Clustering(
-            n, busmap, linemap, linemap, pd.Series(dtype="O")
-        )
+        clustering = pypsa.clustering.spatial.Clustering(n, busmap, linemap)
     else:
         Nyears = n.snapshot_weightings.objective.sum() / 8760
 
diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py
new file mode 100644
index 000000000..ac6b25354
--- /dev/null
+++ b/scripts/prepare_osm_network_release.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+
+import pandas as pd
+import pypsa
+from _helpers import configure_logging, set_scenario_config
+
+logger = logging.getLogger(__name__)
+
+
+BUSES_COLUMNS = [
+    "bus_id",
+    "voltage",
+    "dc",
+    "symbol",
+    "under_construction",
+    "x",
+    "y",
+    "country",
+    "geometry",
+]
+LINES_COLUMNS = [
+    "line_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "circuits",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+LINKS_COLUMNS = [
+    "link_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "p_nom",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+TRANSFORMERS_COLUMNS = [
+    "transformer_id",
+    "bus0",
+    "bus1",
+    "voltage_bus0",
+    "voltage_bus1",
+    "geometry",
+]
+CONVERTERS_COLUMNS = [
+    "converter_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "geometry",
+]
+
+
+def export_clean_csv(df, columns, output_file):
+    """
+    Export a cleaned DataFrame to a CSV file.
+
+    Args:
+        df (pandas.DataFrame): The DataFrame to be exported.
+        columns (list): A list of column names to include in the exported CSV file.
+        output_file (str): The path to the output CSV file.
+
+    Returns:
+        None
+    """
+    rename_dict = {
+        "Bus": "bus_id",
+        "Line": "line_id",
+        "Link": "link_id",
+        "Transformer": "transformer_id",
+        "v_nom": "voltage",
+        "num_parallel": "circuits",
+    }
+
+    if "converter_id" in columns:
+        rename_dict["Link"] = "converter_id"
+
+    df.reset_index().rename(columns=rename_dict).loc[:, columns].replace(
+        {True: "t", False: "f"}
+    ).to_csv(output_file, index=False, quotechar="'")
+
+    return None
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("prepare_osm_network_release")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    network = pypsa.Network(snakemake.input.base_network)
+
+    network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"})
+    network.lines.length = network.lines.length * 1e3
+    network.links.length = network.links.length * 1e3
+
+    # Export to clean csv for release
+    logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses)
+    export_clean_csv(network.buses, BUSES_COLUMNS, snakemake.output.buses)
+
+    logger.info(
+        f"Exporting {len(network.transformers)} transformers to %s",
+        snakemake.output.transformers,
+    )
+    export_clean_csv(
+        network.transformers, TRANSFORMERS_COLUMNS, snakemake.output.transformers
+    )
+
+    logger.info(f"Exporting {len(network.lines)} lines to %s", snakemake.output.lines)
+    export_clean_csv(network.lines, LINES_COLUMNS, snakemake.output.lines)
+
+    # Boolean that specifies if link element is a converter
+    is_converter = network.links.index.str.startswith("conv") == True
+
+    logger.info(
+        f"Exporting {len(network.links[~is_converter])} links to %s",
+        snakemake.output.links,
+    )
+    export_clean_csv(
+        network.links[~is_converter], LINKS_COLUMNS, snakemake.output.links
+    )
+
+    logger.info(
+        f"Exporting {len(network.links[is_converter])} converters to %s",
+        snakemake.output.converters,
+    )
+    export_clean_csv(
+        network.links[is_converter], CONVERTERS_COLUMNS, snakemake.output.converters
+    )
+
+    logger.info("Export of OSM network for release complete.")
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 71b504391..b6644d6dc 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -539,7 +539,24 @@ def add_carrier_buses(n, carrier, nodes=None):
 
     unit = "MWh_LHV" if carrier == "gas" else "MWh_th"
     # preliminary value for non-gas carriers to avoid zeros
-    capital_cost = costs.at["gas storage", "fixed"] if carrier == "gas" else 0.02
+    if carrier == "gas":
+        capital_cost = costs.at["gas storage", "fixed"]
+    elif carrier == "oil":
+        # based on https://www.engineeringtoolbox.com/fuels-higher-calorific-values-d_169.html
+        mwh_per_m3 = 44.9 * 724 * 0.278 * 1e-3  # MJ/kg * kg/m3 * kWh/MJ * MWh/kWh
+        capital_cost = (
+            costs.at["General liquid hydrocarbon storage (product)", "fixed"]
+            / mwh_per_m3
+        )
+    elif carrier == "methanol":
+        # based on https://www.engineeringtoolbox.com/fossil-fuels-energy-content-d_1298.html
+        mwh_per_m3 = 5.54 * 791 * 1e-3  # kWh/kg * kg/m3 * MWh/kWh
+        capital_cost = (
+            costs.at["General liquid hydrocarbon storage (product)", "fixed"]
+            / mwh_per_m3
+        )
+    else:
+        capital_cost = 0.1
 
     n.madd("Bus", nodes, location=location, carrier=carrier, unit=unit)
 
@@ -2999,24 +3016,7 @@ def add_industry(n, costs):
 
     # methanol for industry
 
-    n.madd(
-        "Bus",
-        spatial.methanol.nodes,
-        carrier="methanol",
-        location=spatial.methanol.locations,
-        unit="MWh_LHV",
-    )
-
-    n.madd(
-        "Store",
-        spatial.methanol.nodes,
-        suffix=" Store",
-        bus=spatial.methanol.nodes,
-        e_nom_extendable=True,
-        e_cyclic=True,
-        carrier="methanol",
-        capital_cost=0.02,
-    )
+    add_carrier_buses(n, "methanol")
 
     n.madd(
         "Bus",
@@ -3187,38 +3187,10 @@ def add_industry(n, costs):
             ],  # CO2 intensity methanol based on stoichiometric calculation with 22.7 GJ/t methanol (32 g/mol), CO2 (44 g/mol), 277.78 MWh/TJ = 0.218 t/MWh
         )
 
-    if "oil" not in n.buses.carrier.unique():
-        n.madd(
-            "Bus",
-            spatial.oil.nodes,
-            location=spatial.oil.locations,
-            carrier="oil",
-            unit="MWh_LHV",
-        )
-
-    if "oil" not in n.stores.carrier.unique():
-        # could correct to e.g. 0.001 EUR/kWh * annuity and O&M
-        n.madd(
-            "Store",
-            spatial.oil.nodes,
-            suffix=" Store",
-            bus=spatial.oil.nodes,
-            e_nom_extendable=True,
-            e_cyclic=True,
-            carrier="oil",
-        )
+    if shipping_oil_share:
 
-    if options.get("fossil_fuels", True) and "oil" not in n.generators.carrier.unique():
-        n.madd(
-            "Generator",
-            spatial.oil.nodes,
-            bus=spatial.oil.nodes,
-            p_nom_extendable=True,
-            carrier="oil",
-            marginal_cost=costs.at["oil", "fuel"],
-        )
+        add_carrier_buses(n, "oil")
 
-    if shipping_oil_share:
         p_set_oil = shipping_oil_share * p_set.rename(lambda x: x + " shipping oil")
 
         if not options["regional_oil_demand"]:
diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py
new file mode 100644
index 000000000..745533cff
--- /dev/null
+++ b/scripts/retrieve_osm_data.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+Retrieve OSM data for the specified country using the overpass API and save it
+to the specified output files.
+
+Note that overpass requests are based on a fair
+use policy. `retrieve_osm_data` is meant to be used in a way that respects this
+policy by fetching the needed data once, only.
+"""
+
+import json
+import logging
+import os
+import time
+
+import requests
+from _helpers import (  # set_scenario_config,; update_config_from_wildcards,; update_config_from_wildcards,
+    configure_logging,
+    set_scenario_config,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def retrieve_osm_data(
+    country,
+    output,
+    features=[
+        "cables_way",
+        "lines_way",
+        "links_relation",
+        "substations_way",
+        "substations_relation",
+    ],
+):
+    """
+    Retrieve OSM data for the specified country and save it to the specified
+    output files.
+
+    Parameters
+    ----------
+    country : str
+        The country code for which the OSM data should be retrieved.
+    output : dict
+        A dictionary mapping feature names to the corresponding output file
+        paths. Saving the OSM data to .json files.
+    features : list, optional
+        A list of OSM features to retrieve. The default is [
+            "cables_way",
+            "lines_way",
+            "substations_way",
+            "substations_relation",
+            ].
+    """
+    # Overpass API endpoint URL
+    overpass_url = "https://overpass-api.de/api/interpreter"
+
+    features_dict = {
+        "cables_way": 'way["power"="cable"]',
+        "lines_way": 'way["power"="line"]',
+        "links_relation": 'relation["route"="power"]["frequency"="0"]',
+        "substations_way": 'way["power"="substation"]',
+        "substations_relation": 'relation["power"="substation"]',
+    }
+
+    wait_time = 5
+
+    for f in features:
+        if f not in features_dict:
+            logger.info(
+                f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}"
+            )
+            raise ValueError(
+                f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}"
+            )
+
+        retries = 3
+        for attempt in range(retries):
+            logger.info(
+                f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})..."
+            )
+
+            # Build the overpass query
+            op_area = f'area["ISO3166-1"="{country}"]'
+            op_query = f"""
+                [out:json];
+                {op_area}->.searchArea;
+                (
+                {features_dict[f]}(area.searchArea);
+                );
+                out body geom;
+            """
+            try:
+                # Send the request
+                response = requests.post(overpass_url, data=op_query)
+                response.raise_for_status()  # Raise HTTPError for bad responses
+                data = response.json()
+
+                filepath = output[f]
+                parentfolder = os.path.dirname(filepath)
+                if not os.path.exists(parentfolder):
+                    os.makedirs(parentfolder)
+
+                with open(filepath, mode="w") as f:
+                    json.dump(response.json(), f, indent=2)
+                logger.info(" - Done.")
+                break  # Exit the retry loop on success
+            except (json.JSONDecodeError, requests.exceptions.RequestException) as e:
+                logger.error(f"Error for feature '{f}' in country {country}: {e}")
+                logger.debug(
+                    f"Response text: {response.text if response else 'No response'}"
+                )
+                if attempt < retries - 1:
+                    wait_time += 15
+                    logger.info(f"Waiting {wait_time} seconds before retrying...")
+                    time.sleep(wait_time)
+                else:
+                    logger.error(
+                        f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts."
+                    )
+            except Exception as e:
+                # For now, catch any other exceptions and log them. Treat this
+                # the same as a RequestException and try to run again two times.
+                logger.error(
+                    f"Unexpected error for feature '{f}' in country {country}: {e}"
+                )
+                if attempt < retries - 1:
+                    wait_time += 10
+                    logger.info(f"Waiting {wait_time} seconds before retrying...")
+                    time.sleep(wait_time)
+                else:
+                    logger.error(
+                        f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts."
+                    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("retrieve_osm_data", country="BE")
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    # Retrieve the OSM data
+    country = snakemake.wildcards.country
+    output = snakemake.output
+
+    retrieve_osm_data(country, output)
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 9db3435bf..741fd324c 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -108,7 +108,7 @@
 logger = logging.getLogger(__name__)
 
 
-def simplify_network_to_380(n):
+def simplify_network_to_380(n, linetype_380):
     """
     Fix all lines to a voltage level of 380 kV and remove all transformers.
 
@@ -132,8 +132,8 @@ def simplify_network_to_380(n):
 
     trafo_map = pd.Series(n.transformers.bus1.values, n.transformers.bus0.values)
     trafo_map = trafo_map[~trafo_map.index.duplicated(keep="first")]
-    several_trafo_b = trafo_map.isin(trafo_map.index)
-    trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map)
+    while (several_trafo_b := trafo_map.isin(trafo_map.index)).any():
+        trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map)
     missing_buses_i = n.buses.index.difference(trafo_map.index)
     missing = pd.Series(missing_buses_i, missing_buses_i)
     trafo_map = pd.concat([trafo_map, missing])
@@ -600,7 +600,8 @@ def find_closest_bus(n, x, y, tol=2000):
     # remove integer outputs for compatibility with PyPSA v0.26.0
     n.generators.drop("n_mod", axis=1, inplace=True, errors="ignore")
 
-    n, trafo_map = simplify_network_to_380(n)
+    linetype_380 = snakemake.config["lines"]["types"][380]
+    n, trafo_map = simplify_network_to_380(n, linetype_380)
 
     technology_costs = load_costs(
         snakemake.input.tech_costs,