From 6a9601f46adb053882ab86b681826f1f5c8fd46f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 12:22:48 +0000 Subject: [PATCH] chore: pre-commit auto fixes [...] --- .../20241114_out_of_sample_validation.ipynb | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/notebooks/20241114_out_of_sample_validation.ipynb b/notebooks/20241114_out_of_sample_validation.ipynb index 85ef1bc72..217eb04ea 100644 --- a/notebooks/20241114_out_of_sample_validation.ipynb +++ b/notebooks/20241114_out_of_sample_validation.ipynb @@ -122,21 +122,15 @@ "import os\n", "\n", "import hail as hl\n", - "import numpy as np\n", "import pyspark.sql.functions as f\n", - "from pyspark.sql import DataFrame\n", "\n", "from gentropy.common.session import Session\n", - "from gentropy.dataset.study_index import StudyIndex\n", - "from gentropy.dataset.summary_statistics import SummaryStatistics\n", - "from gentropy.dataset.study_locus import StudyLocus\n", - "from gentropy.susie_finemapper import SusieFineMapperStep\n", "\n", "hail_dir = os.path.dirname(hl.__file__)\n", "session = Session(hail_home=hail_dir, start_hail=True, extended_spark_conf={\n", " \"spark.driver.memory\": \"12g\",\"spark.kryoserializer.buffer.max\": \"500m\",\"spark.driver.maxResultSize\":\"2g\",\n", - " 'spark.hadoop.fs.gs.requester.pays.buckets': 'requester-pays-bucket1,requester-pays-bucket2',\n", - " 'spark.hadoop.fs.gs.requester.pays.project.id': 'open-targets-genetics-dev',\n", + " \"spark.hadoop.fs.gs.requester.pays.buckets\": \"requester-pays-bucket1,requester-pays-bucket2\",\n", + " \"spark.hadoop.fs.gs.requester.pays.project.id\": \"open-targets-genetics-dev\",\n", " \"spark.hadoop.fs.gs.requester.pays.mode\":\"AUTO\"})" ] }, @@ -173,7 +167,7 @@ " .cache())\n", "\n", " counts_df = df.groupBy(\"gsp_status\", \"score_to_use\").agg(f.count(\"*\").alias(\"count\"))\n", - " counts = { (row.gsp_status, row.score_to_use): row['count'] for row in counts_df.collect() }\n", + " counts = { (row.gsp_status, row.score_to_use): row[\"count\"] for row in counts_df.collect() }\n", " count_1_1 = counts.get((1, 1), 0)\n", " count_1_0 = counts.get((1, 0), 0)\n", " count_0_1 = counts.get((0, 1), 0)\n", @@ -273,7 +267,7 @@ " valid_set_to_remove.select(\"studyLocusId\", \"geneId\"),\n", " on=[\"studyLocusId\", \"geneId\"],\n", " how=\"left_anti\"\n", - " ) \n", + " )\n", " validate_pred(\n", " valid_set=filtered_valid_set,\n", " prediction_set=filtered_l2g,\n", @@ -318,7 +312,7 @@ " gsp_colum=\"goldStandardSet\",\n", " positive=\"positive\",\n", " score_thr=thr,\n", - " ) \n", + " )\n", "\n", " print(\"\")\n", " print(\"\")\n", @@ -333,7 +327,7 @@ " gsp_colum=\"Goldstandard_positive\",\n", " positive=1,\n", " score_thr=thr,\n", - " ) \n" + " )\n" ] }, {