feat(susie_finemapper): allow for extraction of the log file from man…

…ifest (#859) * feat(susie_finemapper): allow for extraction of the log file from manifest * fix: add missing extension to the log file * fix: row istead of column --------- Co-authored-by: Szymon Szyszkowski <ss60@mib117351s.internal.sanger.ac.uk>
opentargets · Oct 18, 2024 · d650a29 · d650a29
1 parent c68a144
commit d650a29
Showing 1 changed file with 12 additions and 3 deletions.
diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py
@@ -63,15 +63,21 @@ def __init__(
     ) -> None:
         """Run fine-mapping on a studyLocusId from a collected studyLocus table.
 
+        Method require a `study_locus_manifest_path` file that will contain ["study_locus_input", "study_locus_output", "log_file"]. `log_file`
+        is optional parameter to the manifest. In case it does not exist, the logs from the finemapper are saved under the same directory
+        as the `study_locus_output` with `.log` suffix.
+        Each execution of the method will only evaluate a single row from the `study_locus_manifest` that is inferred from the `study_locus_index`
+        variable.
+
         Args:
             session (Session): Spark session
             study_index_path (str): path to the study index
             study_locus_manifest_path (str): Path to the CSV manifest containing all study locus input and output locations. Should contain two columns: study_locus_input and study_locus_output
             study_locus_index (int): Index (0-based) of the locus in the manifest to process in this call
             max_causal_snps (int): Maximum number of causal variants in locus, default is 10
             lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5
-            purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets, default is 0
-            purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets, default is 0.25
+            purity_mean_r2_threshold (float): threshold for purity mean r2 qc metrics for filtering credible sets, default is 0
+            purity_min_r2_threshold (float): threshold for purity min r2 qc metrics for filtering credible sets, default is 0.25
             cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2
             sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set)
             susie_est_tausq (bool): estimate tau squared, default is False
@@ -88,6 +94,9 @@ def __init__(
         row = study_locus_manifest.loc[study_locus_index]
         study_locus_input = row["study_locus_input"]
         study_locus_output = row["study_locus_output"]
+        log_output = study_locus_output + ".log"
+        if "log_output" in study_locus_manifest.columns:
+            log_output = row["log_output"] + ".log"
 
         # Read studyLocus
         study_locus = (
@@ -140,7 +149,7 @@ def __init__(
             if result_logging["log"] is not None:
                 # Write log
                 result_logging["log"].to_parquet(
-                    study_locus_output + ".log",
+                    log_output,
                     engine="pyarrow",
                     index=False,
                 )