Skip to content

Commit

Permalink
fix: updating cohort parser to new GWAS Catalog format
Browse files Browse the repository at this point in the history
  • Loading branch information
DSuveges committed Feb 28, 2024
1 parent ba956c7 commit 655a5f3
Showing 1 changed file with 2 additions and 12 deletions.
14 changes: 2 additions & 12 deletions src/gentropy/datasource/gwas_catalog/study_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def _parse_study_table(
parse_efos(f.col("MAPPED BACKGROUND TRAIT URI")).alias(
"backgroundTraitFromSourceMappedIds"
),
cls.parse_cohorts(f.col("COHORTS")).alias("cohorts"),
),
_schema=StudyIndexGWASCatalog.get_schema(),
)
Expand Down Expand Up @@ -548,14 +549,6 @@ def annotate_ancestries(
) # studyId has not been split yet
)

# Parsing cohort information:
# cohorts = ancestry_lut.select(
# f.col("STUDY ACCESSION").alias("studyId"),
# GWASCatalogStudyIndexParser.parse_cohorts(f.col("COHORT(S)")).alias(
# "cohorts"
# ),
# ).distinct()

# Get a high resolution dataset on experimental stage:
ancestry_stages = (
ancestry.groupBy("studyId")
Expand Down Expand Up @@ -644,10 +637,7 @@ def annotate_ancestries(
).select(
"studyId", "discoverySamples", "ldPopulationStructure", "replicationSamples"
)
self.df = (
self.df.join(parsed_ancestry_lut, on="studyId", how="left")
# .join(cohorts, on="studyId", how="left")
)
self.df = self.df.join(parsed_ancestry_lut, on="studyId", how="left")
return self

def annotate_sumstats_info(
Expand Down

0 comments on commit 655a5f3

Please sign in to comment.