FRESNA · martacki · Jan 21, 2020 · Jan 21, 2020 · Jan 22, 2020 · Jan 22, 2020
diff --git a/vresutils/load.py b/vresutils/load.py
@@ -109,7 +109,6 @@ def read_all_excel(fns):
 
     return data
 
-
 def timeseries_opsd(years=slice("2011", "2015"), fn=None):
     """
     Read load data from OPSD time-series package.
@@ -130,34 +129,55 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None):
         fn = toDataDir('time_series_60min_singleindex_filtered.csv')
 
     load = (pd.read_csv(fn, index_col=0, parse_dates=True)
-            .loc[:, lambda df: df.columns.to_series().str.endswith('_load_old')]
-            .rename(columns=lambda s: s[:-len('_load_old')])
+            .loc[:, lambda df: df.columns.to_series().str.endswith('_load_actual_entsoe_power_statistics')]
+            .rename(columns=lambda s: s[:-len('_load_actual_entsoe_power_statistics')])
             .dropna(how="all", axis=0))
 
     if years is not None:
         load = load.loc[years]
 
     # manual alterations:
+    load = manual_alterations_opsd(load)
+
+    return load
+
+def copy_timeslice(load, cntry, start, stop, delta):
+    start = pd.Timestamp(start)
+    stop = pd.Timestamp(stop)
+    if start in load.index and stop in load.index:
+        load.loc[start:stop, cntry] = load.loc[start-delta:stop-delta, cntry].values
+    return load
+
+
+def manual_alterations_opsd(load):
+    # GB in the input is split in 3 regions:
+    # GBN (Great Britain), NIR (northern ireland), together forming
+    # UKM (united kingdom). Therefore, we choose only UKM.
+    # the sum of GBN and NIR seems incomplete, more data is missing
+    # interpolate the rest (copying from previous weeks might be better)
+    load['GB'] = load['GB_UKM']
+    load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
+
+    # To fill periods of load-gaps (more than 4 hours), we copy a period before into it
+    load = copy_timeslice(load, 'GR', '2015-08-11 21:00', '2015-08-15 20:00', pd.Timedelta(weeks=1))
+    load = copy_timeslice(load, 'AT', '2018-12-31 22:00', '2019-01-01 22:00', pd.Timedelta(days=2))
+    load = copy_timeslice(load, 'CH', '2010-01-19 07:00', '2010-01-19 22:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'CH', '2010-03-28 00:00', '2010-03-28 21:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(weeks=1)) #is a WE, so take WE before
+    load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-364)) #whole january missing
+
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012
     load['KV'] = load['RS'] * (4.8 / 27.)
     # Albania gets the same load curve as Macedonia
     load['AL'] = load['MK'] * (4.1 / 7.4)
-
-    # To fill the half week gap in Greece from start to stop,
-    # we copy the week before into it
-    start = pd.Timestamp('2015-08-11 21:00')
-    stop = pd.Timestamp('2015-08-15 20:00')
-    w = pd.Timedelta(weeks=1)
-
-    if start in load.index and stop in load.index:
-        load.loc[start:stop, 'GR'] = load.loc[start-w:stop-w, 'GR'].values
-
-    # There are three missing hours in 2014 and four in 2015
-    # we interpolate linearly (copying from the previous week
-    # might be better)
-    load['EE'] = load['EE'].interpolate()
-
+
+    # interpolate all countries with missing max 4 hours of demand data (in a row)
+    interpolate_countries = ['AT', 'EE', 'GR', 'IE', 'KV', 'IS', 'LU', 'NO', 'PL', 'PT', 'RS', 'SE', 'SI', 'GB']
+    load[interpolate_countries] = load[interpolate_countries].interpolate(limit=4)
+    # sometimes, the last hour of 2009 is missing. we ignore this - (alternatively, copy first hour of 2010)
     return load
 
 def _upsampling_fitfunc(weights, gdp, pop):