Skip to content

Commit

Permalink
fixup connection establishment
Browse files Browse the repository at this point in the history
  • Loading branch information
maurerle committed Oct 22, 2024
1 parent 33bded0 commit 37b59db
Showing 1 changed file with 20 additions and 16 deletions.
36 changes: 20 additions & 16 deletions crawler/vea_industrial_load_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
Local electricity generation was excluded from the data as far as it could be discovered (no guarantee of completeness).
Together with load profiles comes respective master data of the industrial sites as well as the information wether each quarterhour was a high load time of the connected German grid operator in 2016.
The data was collected by the VEA.
The dataset as a whole was assembled by Paul Hendrik Tieman in 2017 by selectin complete load profiles without effects of renewable generation from a VEA internal database.
It is a research dataset and was used for master theses and publications.""",
"contact": "",
The dataset as a whole was assembled by Paul Hendrik Tiemann in 2017 by selecting complete load profiles without effects of renewable generation from a VEA internal database.
It is a research dataset and was used for research publications.""",
"contact": "komanns@fh-aachen.de",
"temporal_start": "2016-01-01 00:00:00",
"temporal_end": "2016-12-31 23:45:00",
"concave_hull_geometry": None,
Expand All @@ -48,7 +48,7 @@ def request_zip_archive() -> requests.Response:

response.raise_for_status()

log.info("Succesfully requested zip archive from zenodo")
log.info("Successfully requested zip archive from zenodo")

return response

Expand Down Expand Up @@ -92,7 +92,7 @@ def read_file(file: zipfile.ZipExtFile, filename: str | None = None) -> pd.DataF

df = pd.read_csv(file, sep="\t")

log.info("Succesfully read file into pd.DataFrame")
log.info("Successfully read file into pd.DataFrame")

return df

Expand Down Expand Up @@ -125,7 +125,7 @@ def create_timestep_datetime_dict(columns: list[str]) -> dict[str : pd.Timestamp
idx = int(timestep.split("time")[1])
timestep_timestamp_map[timestep] = timestamps[idx]

log.info("Succesfully created dictionary")
log.info("Successfully created dictionary")

return timestep_timestamp_map

Expand Down Expand Up @@ -155,12 +155,12 @@ def transform_load_hlt_data(
# map timestamps onto timestamp column
df["timestamp"] = df["timestamp"].map(timestep_datetime_map)

log.info("Succesfully converted hlt / load profile")
log.info("Successfully converted hlt / load profile")

return df


def write_to_database(data: pd.DataFrame, name: str) -> None:
def write_to_database(db_conn: str, data: pd.DataFrame, name: str) -> None:
"""Writes dataframe to database.
Args:
Expand All @@ -171,7 +171,7 @@ def write_to_database(data: pd.DataFrame, name: str) -> None:

log.info(f"Trying to write {name} to database")

engine = create_engine(db_uri)
engine = create_engine(db_conn)

rows = 200000
list_df = [data[i : i + rows] for i in range(0, data.shape[0], rows)]
Expand All @@ -185,7 +185,7 @@ def write_to_database(data: pd.DataFrame, name: str) -> None:
index=False,
)

log.info("Succesfully inserted into databse")
log.info("Successfully inserted into databse")


def create_schema():
Expand All @@ -210,19 +210,20 @@ def convert_to_hypertable(relation_name: str):

log.info("Trying to create hypertable")

engine = create_engine(db_uri)
engine = create_engine(db_conn)
try:
with engine.begin() as conn:
query = text(
f"SELECT public.create_hypertable('{relation_name}', 'timestamp', if_not_exists => TRUE, migrate_data => TRUE);"
)
conn.execute(query)
log.info("Succesfully create hypertable")
log.info("Successfully create hypertable")
except Exception as e:
log.error(f"could not create hypertable: {e}")


def main(db_uri):
def main(schema_name):

# request zip archive
response = request_zip_archive()

Expand All @@ -245,23 +246,26 @@ def main(db_uri):
load_data = transform_load_hlt_data(
df=load_data, timestep_datetime_map=timestep_dt_map, name=""
)
write_to_database(data=load_data, name="load")
db_conn = db_uri(schema_name)
write_to_database(db_conn=db_conn, data=load_data, name="load")
del load_data

# read, transform and write hlt data
hlt_data = read_file(hlt_file, filename="hlt")
hlt_data = transform_load_hlt_data(
df=hlt_data, timestep_datetime_map=timestep_dt_map, name=""
)
write_to_database(data=hlt_data, name="high_load_times")
write_to_database(db_conn=db_conn, data=hlt_data, name="high_load_times")
del hlt_data

# read in master data and write to database
master_data = read_file(master_file, filename="master")
write_to_database(data=master_data, name="master")
write_to_database(db_conn=db_conn, data=master_data, name="master")
del master_data

# convert to hypertable
convert_to_hypertable(db_conn=db_conn, "high_load_times")
convert_to_hypertable(db_conn=db_conn, "load")
convert_to_hypertable("high_load_times")
convert_to_hypertable("load")

Expand Down

0 comments on commit 37b59db

Please sign in to comment.