GEMScienceTools · mmpagani · May 17, 2024 · Jun 24, 2024 · Sep 13, 2024 · Sep 28, 2024
diff --git a/openquake/cat/hmg/hmg.py b/openquake/cat/hmg/hmg.py
@@ -126,19 +126,18 @@ def process_magnitude_keep_all(work, mag_rules, msig=0.2):
             flag = rows["eventID"].duplicated(keep='first')
 
             if any(flag):
-                # this line is so the larger M is taken - expiremental based on MEX issue
-                rows = rows.sort_values("value", ascending=False).drop_duplicates('eventID').sort_index()
-                #tmp = sorted_rows[~flag].copy()
-                #rows = tmp
+                # this line is so the larger M is taken - expiremental based
+                # on MEX issue
+                rows = rows.sort_values(
+                    "value", ascending=False).drop_duplicates('eventID').sort_index()
 
             # Magnitude conversion
             if len(rows) > 0:
                 low_mags = mag_rules[agency][mag_type]['low_mags']
                 conv_eqs = mag_rules[agency][mag_type]['conv_eqs']
                 conv_sigma = mag_rules[agency][mag_type]['sigma']
-                save = apply_mag_conversion_rule_keep_all(low_mags, conv_eqs,
-                                                          conv_sigma,
-                                                       rows, save)
+                save = apply_mag_conversion_rule_keep_all(
+                    low_mags, conv_eqs, conv_sigma, rows, save)
         print(")")
 
     return save
@@ -190,7 +189,7 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, conv_sigs, rows, save, work, m
         m_inds = m >= mlow
         if conversion == 'm':
             tmp[m_inds] = m[m_inds]
-            tmpsig[m_inds] = sig[m_inds] 
+            tmpsig[m_inds] = sig[m_inds]
         else:
             tmpstr = re.sub('m', fmt2.format(mlow), conversion)
             tmpstrP = re.sub('m', '(' + fmt2.format(mlow)+'+ 0.001)', conversion)
@@ -203,7 +202,7 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, conv_sigs, rows, save, work, m
                 exec(cmd)
                 exec(cmdsp)
                 exec(cmdsm)
-                deriv = [(ta-tb)/0.002 for ta, tb in zip(tmpsiga, tmpsigb)] 
+                deriv = [(ta-tb)/0.002 for ta, tb in zip(tmpsiga, tmpsigb)]
                 sig_new = np.array([np.sqrt(s**2 + d**2 * sigma**2) for s, d in zip(sig, deriv)])
                 tmpsig[m_inds] = sig_new[m_inds]
 
@@ -304,11 +303,15 @@ def process_origin(odf, ori_rules):
 
 def process_magnitude(work, mag_rules, msig=0.2):
     """
+    This function applies the magnitude conversion rules
+
     :param work:
         A :class:`pandas.DataFrame` instance obtained by joining the origin
         and magnitude dataframes
     :param mag_rules:
         A dictionary with the rules to be used for processing the catalogue
+    :param msig:
+        Standard deviation of magnitude.
     :return:
         Two :class:`pandas.DataFrame` instances. The first one with the
         homogenised catalogue, the second one with the information not
@@ -354,16 +357,18 @@ def process_magnitude(work, mag_rules, msig=0.2):
 
             # Magnitude conversion
             if len(rows) > 0:
+
                 low_mags = mag_rules[agency][mag_type]['low_mags']
                 conv_eqs = mag_rules[agency][mag_type]['conv_eqs']
                 conv_sigma = mag_rules[agency][mag_type]['sigma']
+
                 if 'mag_sigma' in mag_rules[agency][mag_type]:
                     m_sigma = mag_rules[agency][mag_type]['mag_sigma']
                 else:
                     m_sigma = msig
-                save, work = apply_mag_conversion_rule(low_mags, conv_eqs,
-                                                       conv_sigma, rows,
-                                                       save, work, m_sigma)
+
+                save, work = apply_mag_conversion_rule(
+                    low_mags, conv_eqs, conv_sigma, rows, save, work, m_sigma)
         print(")")
 
     return save, work
@@ -383,38 +388,50 @@ def process_dfs(odf_fname, mdf_fname, settings_fname=None):
     save = None
     work = None
 
-    # Reading settings
+    # Reading settings. These include the priority agencies for the origin and
+    # the rules for magnitude conversion.
     rules = toml.load(settings_fname)
 
+    mag_n_sigma = 0.0
+    if 'default' in rules.keys():
+        mag_n_sigma = rules['default'].get('mag_sigma', mag_n_sigma)
+    else:
+        rules['default'] = {'mag_sigma': mag_n_sigma}
+
+    for agency in rules['magnitude'].keys():
+        for mag_type in rules['magnitude'][agency].keys():
+            if 'sigma' not in rules['magnitude'][agency][mag_type].keys():
+                n_mags = len(rules['magnitude'][agency][mag_type]['low_mags'])
+                tmp = [mag_n_sigma for i in range(n_mags)]
+                rules['magnitude'][agency][mag_type]['sigma'] = tmp
+
     # Checking input
-    if not('origin' in rules.keys() or 'magnitude' in rules.keys()):
+    if not ('origin' in rules.keys() or 'magnitude' in rules.keys()):
         raise ValueError('At least one set of settings must be defined')
 
     # These are the tables with origins and magnitudes
     odf = pd.read_hdf(odf_fname)
     mdf = pd.read_hdf(mdf_fname)
-
     print("Number of EventIDs {:d}\n".format(len(odf["eventID"].unique())))
 
     # Processing origins
     if 'origin' in rules.keys():
         print('Selecting origins')
         odf = process_origin(odf, rules['origin'])
-
     print("Number of origins selected {:d}\n".format(len(odf)))
 
-    if 'default' in rules.keys():
-        mag_n_sigma = rules['default']['mag_sigma']
-
     # Processing magnitudes
     if 'magnitude' in rules.keys():
         print('Homogenising magnitudes')
+
         # Creating a single dataframe by joining
         work = pd.merge(odf, mdf, on=["eventID"])
-        save, work = process_magnitude(work, rules['magnitude'], msig=mag_n_sigma)
+        save, work = process_magnitude(
+            work, rules['magnitude'], msig=mag_n_sigma)
 
         work_all_m = pd.merge(odf, mdf, on=["eventID"])
-        save_all_m = process_magnitude_keep_all(work_all_m, rules['magnitude'],msig=mag_n_sigma)
+        save_all_m = process_magnitude_keep_all(
+            work_all_m, rules['magnitude'], msig=mag_n_sigma)
 
     print("Number of origins with final mag type {:d}\n".format(len(save)))
 

diff --git a/openquake/cat/hmg/merge.py b/openquake/cat/hmg/merge.py
@@ -165,8 +165,8 @@ def load_catalogue(fname: str, cat_type: str, cat_code: str, cat_name: str):
         cat = parser.parse(cat_code, cat_name)
     else:
         raise ValueError("Unsupported catalogue type")
-    fmt = '    The original catalogue contains {:d} events'
-    print(fmt.format(len(cat.events)))
+    print(f'    The original catalogue contains {len(cat.events):d} events')
+
     return cat
 
 
@@ -199,7 +199,7 @@ def process_catalogues(settings_fname: str) -> None:
     # Process the catalogue. `tdict` is dictionary with the info
     # required to merge one specific catalogue.
     for icat, tdict in enumerate(settings["catalogues"]):
-        
+
         # Get settings
         fname = os.path.join(path, tdict["filename"])
         cat_type = tdict["type"]
@@ -215,6 +215,9 @@ def process_catalogues(settings_fname: str) -> None:
             nev = catroot.get_number_events()
             print(f"   Catalogue contains: {nev:d} events")
 
+            if nev == 0:
+                raise ValueError(f'Empty catalogue in {fname}')
+
             select_flag = tdict.get("select_region", False)
             if select_flag:
                 msg = "Selecting earthquakes within the region shapefile"
@@ -292,11 +295,11 @@ def process_catalogues(settings_fname: str) -> None:
                 else:
                     fle = tempfile.NamedTemporaryFile(mode = 'w', delete=False)
                     logfle=fle.name
-                    
+
 
             else:
                 logfle = tdict["log_file"]
-            
+
             print(f"   Log file: {logfle:s}".format())
             # Perform the merge
             meth = catroot.add_external_idf_formatted_catalogue
@@ -306,7 +309,7 @@ def process_catalogues(settings_fname: str) -> None:
             # Update the spatial index
             print("      Updating index")
             catroot._create_spatial_index()
-        
+
         nev = catroot.get_number_events()
         print(f"   Whole catalogue contains: {nev:d} events")
 

diff --git a/openquake/cat/parsers/generic_catalogue.py b/openquake/cat/parsers/generic_catalogue.py
@@ -49,7 +49,8 @@ class GeneralCsvCatalogue(object):
                             'SemiMajor90', 'SemiMinor90', 'ErrorStrike',
                             'depth', 'depthError', 'magnitude',
                             'sigmaMagnitude', 'moment', 'mpp', 'mpr', 'mrr',
-                            'mrt', 'mtp', 'mtt', 'dip1', 'str1', 'rake1', 'dip2', 'str2', 'rake2']
+                            'mrt', 'mtp', 'mtt', 'dip1', 'str1', 'rake1',
+                            'dip2', 'str2', 'rake2']
 
     INT_ATTRIBUTE_LIST = ['year', 'month', 'day', 'hour', 'minute',
                           'flag', 'scaling']
@@ -59,8 +60,7 @@ class GeneralCsvCatalogue(object):
 
     TOTAL_ATTRIBUTE_LIST = list(
         (set(FLOAT_ATTRIBUTE_LIST).union(
-            set(INT_ATTRIBUTE_LIST))).union(
-                 set(STRING_ATTRIBUTE_LIST)))
+            set(INT_ATTRIBUTE_LIST))).union(set(STRING_ATTRIBUTE_LIST)))
 
     def __init__(self):
         """
@@ -89,7 +89,6 @@ def parse_csv(self, filename):
         # Replace any whitespace with nan
         df.replace(r'^\s*$', np.nan, regex=True)
 
-
         # Checking information included in the original
         if 'day' in df.columns:
             # Fixing day
@@ -101,30 +100,31 @@ def parse_csv(self, filename):
             df.drop(df[df.minute > 59.599999].index, inplace=True)
         if 'hour' in df.columns:
             df.drop(df[df.hour > 23.99999].index, inplace=True)
-            
+
         if 'str1' in df.columns:
             df['str1'] = pd.to_numeric(df['str1'], errors='coerce')
-        
+
         if 'dip1' in df.columns:
             df['dip1'] = pd.to_numeric(df['dip1'], errors='coerce')
- 
+
         if 'rake1' in df.columns:
             df['rake1'] = pd.to_numeric(df['rake1'], errors='coerce')
-            
+
         if 'str2' in df.columns:
             df['str2'] = pd.to_numeric(df['str2'], errors='coerce')
-            
+
         if 'dip2' in df.columns:
             df['dip2'] = pd.to_numeric(df['dip2'], errors='coerce')
-            
+
         if 'rake2' in df.columns:
             df['rake2'] = pd.to_numeric(df['rake2'], errors='coerce')
-        
+
         if 'SemiMinor90' in df.columns:
-            df['SemiMinor90']= pd.to_numeric(df['SemiMinor90'], errors='coerce') 
-        
+            df['SemiMinor90']= pd.to_numeric(df['SemiMinor90'], errors='coerce')
+
         if 'SemiMajor90' in df.columns:
-            df['SemiMajor90']= pd.to_numeric(df['SemiMajor90'], errors='coerce') 
+            df['SemiMajor90']= pd.to_numeric(df['SemiMajor90'], errors='coerce')
+
         # Processing columns and updating the catalogue
         for col in df.columns:
             if col in self.TOTAL_ATTRIBUTE_LIST:
@@ -134,7 +134,7 @@ def parse_csv(self, filename):
 
                 else:
                     self.data[col] = df[col].to_list()
-                    
+
     def get_number_events(self):
         """
         Returns the number of events
@@ -227,12 +227,15 @@ def write_to_isf_catalogue(self, catalogue_id, name):
         isf_cat = ISFCatalogue(catalogue_id, name)
 
         for iloc in range(0, self.get_number_events()):
+
             # Origin ID
             if len(self.data['eventID']) > 0:
                 event_id = str(self.data['eventID'][iloc])
             else:
-                raise ValueError('Unknown key. Line: {:d}'.format(iloc))
+                msg = 'Event ID column is empty'
+                raise ValueError(msg)
             origin_id = event_id
+
             # Create Magnitude
             sigma_mag = None
             if ('sigmaMagnitude' in self.data and
@@ -436,7 +439,7 @@ def write_to_isf_catalogue(self, catalogue_id, name):
                             semiminor90,
                             error_strike,
                             depth_error)
-                                        
+
             # Create Origin
             # Date
             eq_date = datetime.date(self.data['year'][iloc],

diff --git a/openquake/cat/parsers/isf_catalogue_reader.py b/openquake/cat/parsers/isf_catalogue_reader.py
@@ -206,7 +206,9 @@ def read_file(self, identifier, name):
         is_origin = False
         is_magnitude = False
         comment_str = ""
+
         for row in f.readlines():
+
             # Strip newline carriage
             if row.endswith("\r\n"):
                 # If the file was compiled on windows machines
@@ -273,7 +275,9 @@ def read_file(self, identifier, name):
             else:
                 pass
 
-            if is_magnitude and len(row) == 38:
+            rsplt = re.split('\\s+', row)
+
+            if is_magnitude and len(rsplt) == 4:
                 # Is a magnitude row
                 mag = get_event_magnitude(row,
                                           event.id,
@@ -283,14 +287,17 @@ def read_file(self, identifier, name):
                     magnitudes.append(mag)
                 continue
 
-            if is_origin and len(row) == 136:
+            chk = re.search('^\\d{4}/\\d{2}/\\d{2}', rsplt[0])
+            if is_origin and chk:
                 # Is an origin row
                 orig = get_event_origin_row(row,
                                             self.selected_origin_agencies)
                 if orig:
                     origins.append(orig)
+
         if event is not None:
             self._build_event(event, origins, magnitudes, comment_str)
+
         if len(self.rejected_catalogue):
             # Turn list of rejected events into its own instance of
             # ISFCatalogue
@@ -299,7 +306,9 @@ def read_file(self, identifier, name):
                 name + " - Rejected",
                 events=self.rejected_catalogue)
         f.close()
+
         self.catalogue.ids = [e.id for e in self.catalogue.events]
+
         return self.catalogue
 
     def _build_event(self, event, origins, magnitudes, comment_str):

diff --git a/openquake/ghm/utils.py b/openquake/ghm/utils.py
@@ -101,9 +101,9 @@ def read_hazard_map_csv(fname):
             pass
         else:
             if re.search('lon', line):
-                labels = re.split('\,', line)
+                labels = re.split('\\,', line)
             else:
-                aa = re.split('\,', line)
+                aa = re.split('\\,', line)
                 for l, d in zip(labels, aa):
                     if l in data:
                         data[l].append(float(d))

diff --git a/openquake/mbi/sub/create_ruptures.py b/openquake/mbi/sub/create_ruptures.py
@@ -9,6 +9,9 @@
 
 
 def main(config_fname: str, only_plt: bool = False):
+    """
+    Creates inslab ruptures
+    """
 
     # Parsing config
     model = toml.load(config_fname)
@@ -24,7 +27,7 @@ def main(config_fname: str, only_plt: bool = False):
         calculate_ruptures(ini_fname, ref_fdr=ref_fdr, agr=agr, bgr=bgr,
                            mmin=mmin, mmax=mmax)
 
-descr = 'The path to the .ini file containing info to build the ruptures'
+descr = 'The path to the .toml file containing info to build the ruptures'
 main.config_fname = descr
 
 if __name__ == '__main__':