Skip to content

Commit

Permalink
test: increase modularity of test directories (#502)
Browse files Browse the repository at this point in the history
  • Loading branch information
d0choa authored Feb 21, 2024
1 parent faa99be commit 08e6ff6
Show file tree
Hide file tree
Showing 112 changed files with 57 additions and 45 deletions.
2 changes: 1 addition & 1 deletion docs/development/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ For more details on each of these steps, see the sections below.
### Tests

- Test study fixture in `tests/conftest.py` (example: `mock_study_index_finngen` in that module)
- Test sample data in `tests/data_samples` (example: `tests/data_samples/finngen_studies_sample.json`)
- Test sample data in `tests/data_samples` (example: `tests/gentropy/data_samples/finngen_studies_sample.json`)
- Test definition in `tests/` (example: `tests/dataset/test_study_index.py``test_study_index_finngen_creation`)
6 changes: 3 additions & 3 deletions docs/src_snippets/howto/python_api/b_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def create_from_parquet(session: Session) -> SummaryStatistics:

# --8<-- [end:create_from_parquet_import]

path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
# --8<-- [start:create_from_parquet]
summary_stats = SummaryStatistics.from_parquet(session, path)
# --8<-- [end:create_from_parquet]
Expand All @@ -31,7 +31,7 @@ def create_from_source(session: Session) -> SummaryStatistics:
from gentropy.datasource.finngen.summary_stats import FinnGenSummaryStats

# --8<-- [end:create_from_source_import]
path = "tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
path = "tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
# --8<-- [start:create_from_source]
summary_stats = FinnGenSummaryStats.from_source(session.spark, path)
# --8<-- [end:create_from_source]
Expand All @@ -46,7 +46,7 @@ def create_from_pandas() -> SummaryStatistics:

# --8<-- [end:create_from_pandas_import]

path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
custom_summary_stats_pandas_df = ps.read_parquet(path)
# --8<-- [start:create_from_pandas]

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ exclude = ["dist"]
[tool.pytest.ini_options]
addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml"
pythonpath = [".", "./src/airflow/dags"]
testpaths = ["tests/gentropy", "src/gentropy/"]

# Semi-strict mode for mypy
[tool.mypy]
Expand Down
4 changes: 1 addition & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
"""Unit tests."""

from __future__ import annotations
"""Gentropy tests package."""
3 changes: 3 additions & 0 deletions tests/gentropy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Unit tests."""

from __future__ import annotations
File renamed without changes.
52 changes: 28 additions & 24 deletions tests/conftest.py → tests/gentropy/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def mock_ld_index(spark: SparkSession) -> LDIndex:
def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame:
"""Sample GWAS Catalog studies."""
return spark.read.csv(
"tests/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv",
"tests/gentropy/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv",
sep="\t",
header=True,
)
Expand All @@ -421,7 +421,7 @@ def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame:
def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame:
"""Sample GWAS ancestries sample data."""
return spark.read.csv(
"tests/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv",
"tests/gentropy/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv",
sep="\t",
header=True,
)
Expand All @@ -431,7 +431,7 @@ def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame:
def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFrame:
"""Sample GWAS harmonised sumstats sample data."""
return spark.read.csv(
"tests/data_samples/gwas_catalog_harmonised_list.txt",
"tests/gentropy/data_samples/gwas_catalog_harmonised_list.txt",
sep="\t",
header=False,
)
Expand All @@ -441,7 +441,7 @@ def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFra
def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame:
"""Sample GWAS raw associations sample data."""
return spark.read.csv(
"tests/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv",
"tests/gentropy/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv",
sep="\t",
header=True,
)
Expand All @@ -451,7 +451,7 @@ def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame:
def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics:
"""Sample GWAS raw associations sample data."""
return SummaryStatistics(
_df=spark.read.parquet("tests/data_samples/sumstats_sample"),
_df=spark.read.parquet("tests/gentropy/data_samples/sumstats_sample"),
_schema=SummaryStatistics.get_schema(),
)

Expand All @@ -460,8 +460,10 @@ def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics:
def sample_finngen_studies(spark: SparkSession) -> DataFrame:
"""Sample FinnGen studies."""
# For reference, the sample file was generated with the following command:
# curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/data_samples/finngen_studies_sample.json
with open("tests/data_samples/finngen_studies_sample.json") as finngen_studies:
# curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/gentropy/data_samples/finngen_studies_sample.json
with open(
"tests/gentropy/data_samples/finngen_studies_sample.json"
) as finngen_studies:
json_data = finngen_studies.read()
rdd = spark.sparkContext.parallelize([json_data])
return spark.read.json(rdd)
Expand All @@ -471,8 +473,10 @@ def sample_finngen_studies(spark: SparkSession) -> DataFrame:
def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame:
"""Sample eQTL Catalogue studies."""
# For reference, the sample file was generated with the following command:
# curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/data_samples/eqtl_catalogue_studies_sample.tsv
with open("tests/data_samples/eqtl_catalogue_studies_sample.tsv") as eqtl_catalogue:
# curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv
with open(
"tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv"
) as eqtl_catalogue:
tsv = eqtl_catalogue.read()
rdd = spark.sparkContext.parallelize([tsv])
return spark.read.csv(rdd, sep="\t", header=True)
Expand All @@ -482,11 +486,11 @@ def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame:
def sample_eqtl_catalogue_summary_stats(spark: SparkSession) -> DataFrame:
"""Sample eQTL Catalogue summary stats."""
# For reference, the sample file was generated with the following commands:
# mkdir -p tests/data_samples/imported/GTEx_V8/ge
# curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz
# mkdir -p tests/gentropy/data_samples/imported/GTEx_V8/ge
# curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz
# It's important for the test file to be named in exactly this way, because eQTL Catalogue study ID is populated based on input file name.
return spark.read.option("delimiter", "\t").csv(
"tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz",
"tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz",
header=True,
)

Expand All @@ -496,7 +500,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame:
"""Sample UKBiobank manifest."""
# Sampled 10 rows of the UKBB manifest tsv
return spark.read.csv(
"tests/data_samples/neale2_saige_study_manifest.samples.tsv",
"tests/gentropy/data_samples/neale2_saige_study_manifest.samples.tsv",
sep="\t",
header=True,
inferSchema=True,
Expand All @@ -507,7 +511,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame:
def sample_target_index(spark: SparkSession) -> DataFrame:
"""Sample target index sample data."""
return spark.read.parquet(
"tests/data_samples/target_sample.parquet",
"tests/gentropy/data_samples/target_sample.parquet",
)


Expand Down Expand Up @@ -539,22 +543,22 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex:
@pytest.fixture()
def liftover_chain_37_to_38(spark: SparkSession) -> LiftOverSpark:
"""Sample liftover chain file."""
return LiftOverSpark("tests/data_samples/grch37_to_grch38.over.chain")
return LiftOverSpark("tests/gentropy/data_samples/grch37_to_grch38.over.chain")


@pytest.fixture()
def sample_l2g_gold_standard(spark: SparkSession) -> DataFrame:
"""Sample L2G gold standard curation."""
return spark.read.json(
"tests/data_samples/l2g_gold_standard_curation_sample.json.gz",
"tests/gentropy/data_samples/l2g_gold_standard_curation_sample.json.gz",
)


@pytest.fixture()
def sample_otp_interactions(spark: SparkSession) -> DataFrame:
"""Sample OTP gene-gene interactions dataset."""
return spark.read.parquet(
"tests/data_samples/otp_interactions_sample.parquet",
"tests/gentropy/data_samples/otp_interactions_sample.parquet",
)


Expand Down Expand Up @@ -618,21 +622,21 @@ def mock_l2g_predictions(spark: SparkSession) -> L2GPrediction:
@pytest.fixture()
def sample_data_for_carma() -> list[np.ndarray]:
"""Sample data for fine-mapping by CARMA."""
ld = pd.read_csv("tests/data_samples/01_test_ld.csv", header=None)
ld = pd.read_csv("tests/gentropy/data_samples/01_test_ld.csv", header=None)
ld = np.array(ld)
z = pd.read_csv("tests/data_samples/01_test_z.csv")
z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv")
z = np.array(z.iloc[:, 1])
pips = pd.read_csv("tests/data_samples/01_test_PIPs.txt")
pips = pd.read_csv("tests/gentropy/data_samples/01_test_PIPs.txt")
pips = np.array(pips.iloc[:, 0])
return [ld, z, pips]


@pytest.fixture()
def sample_data_for_susie_inf() -> list[np.ndarray]:
"""Sample data for fine-mapping by SuSiE-inf."""
ld = np.loadtxt("tests/data_samples/01_test_ld.csv", delimiter=",")
z = pd.read_csv("tests/data_samples/01_test_z.csv")
ld = np.loadtxt("tests/gentropy/data_samples/01_test_ld.csv", delimiter=",")
z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv")
z = np.array(z.iloc[:, 1])
lbf_moments = np.loadtxt("tests/data_samples/01_test_lbf_moments.csv")
lbf_mle = np.loadtxt("tests/data_samples/01_test_lbf_mle.csv")
lbf_moments = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_moments.csv")
lbf_mle = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_mle.csv")
return [ld, z, lbf_moments, lbf_mle]
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ def test_finngen_finemapping_from_finngen_susie_finemapping(
assert isinstance(
FinnGenFinemapping.from_finngen_susie_finemapping(
spark=spark,
finngen_finemapping_df="tests/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz",
finngen_finemapping_summaries="tests/data_samples/finngen_credset_summary_sample.tsv",
finngen_finemapping_df="tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz",
finngen_finemapping_summaries="tests/gentropy/data_samples/finngen_credset_summary_sample.tsv",
),
StudyLocus,
)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_finngen_summary_stats_from_source(spark: SparkSession) -> None:
assert isinstance(
FinnGenSummaryStats.from_source(
spark=spark,
raw_file="tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz",
raw_file="tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz",
),
SummaryStatistics,
)
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ def _setup(self: TestGnomADLDMatrixVariants, spark: SparkSession) -> None:
ld_test_population = "test-pop"

gnomad_ld_matrix = GnomADLDMatrix(
ld_matrix_template="tests/data_samples/example_{POP}.bm",
ld_index_raw_template="tests/data_samples/example_{POP}.ht",
grch37_to_grch38_chain_path="tests/data_samples/grch37_to_grch38.over.chain",
ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm",
ld_index_raw_template="tests/gentropy/data_samples/example_{POP}.ht",
grch37_to_grch38_chain_path="tests/gentropy/data_samples/grch37_to_grch38.over.chain",
)
self.ld_slice = gnomad_ld_matrix.get_ld_variants(
gnomad_ancestry=ld_test_population,
Expand Down Expand Up @@ -173,7 +173,7 @@ def _setup(self: TestGnomADLDMatrixSlice, spark: SparkSession) -> None:
"""Prepares fixtures for the test."""
hl.init(sc=spark.sparkContext, log="/dev/null", idempotent=True)
gnomad_ld_matrix = GnomADLDMatrix(
ld_matrix_template="tests/data_samples/example_{POP}.bm"
ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm"
)
test_ld_population: str = "test-pop"
self.slice_start_index: int = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def gwas_catalog_summary_statistics__new_format(
) -> GWASCatalogSummaryStatistics:
"""Test GWASCatalogSummaryStatistics creation with mock data."""
return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats(
spark, "tests/data_samples/new_format_GCST90293086.h.tsv.gz"
spark, "tests/gentropy/data_samples/new_format_GCST90293086.h.tsv.gz"
)

@pytest.fixture(scope="class")
Expand All @@ -36,7 +36,7 @@ def gwas_catalog_summary_statistics__old_format(
) -> GWASCatalogSummaryStatistics:
"""Test GWASCatalogSummaryStatistics creation with mock data."""
return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats(
spark, "tests/data_samples/old_format_GCST006090.h.tsv.gz"
spark, "tests/gentropy/data_samples/old_format_GCST006090.h.tsv.gz"
)

@pytest.fixture(scope="class")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
@pytest.fixture(scope="module")
def sample_intervals_andersson(spark: SparkSession) -> DataFrame:
"""Sample Andersson intervals."""
return IntervalsAndersson.read(spark, "tests/data_samples/andersson_sample.bed")
return IntervalsAndersson.read(
spark, "tests/gentropy/data_samples/andersson_sample.bed"
)


def test_read_andersson(sample_intervals_andersson: DataFrame) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
@pytest.fixture(scope="module")
def sample_intervals_javierre(spark: SparkSession) -> DataFrame:
"""Sample Javierre intervals."""
return IntervalsJavierre.read(spark, "tests/data_samples/javierre_sample.parquet")
return IntervalsJavierre.read(
spark, "tests/gentropy/data_samples/javierre_sample.parquet"
)


def test_read_javierre(sample_intervals_javierre: DataFrame) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
@pytest.fixture(scope="module")
def sample_intervals_jung(spark: SparkSession) -> DataFrame:
"""Sample Jung intervals."""
return IntervalsJung.read(spark, "tests/data_samples/jung_sample.bed")
return IntervalsJung.read(spark, "tests/gentropy/data_samples/jung_sample.bed")


def test_read_jung(sample_intervals_jung: DataFrame) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
@pytest.fixture(scope="module")
def sample_intervals_thurman(spark: SparkSession) -> DataFrame:
"""Sample Thurman intervals."""
return IntervalsThurman.read(spark, "tests/data_samples/thurman_sample.bed8")
return IntervalsThurman.read(
spark, "tests/gentropy/data_samples/thurman_sample.bed8"
)


def test_read_thurman(sample_intervals_thurman: DataFrame) -> None:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_clumpstep_summary_stats(self, session: Session) -> None:
clumped_study_locus_path = Path(temp_dir, "GCST005523_chr18_clumped")
WindowBasedClumpingStep(
session=session,
summary_statistics_input_path="tests/data_samples/sumstats_sample",
summary_statistics_input_path="tests/gentropy/data_samples/sumstats_sample",
study_locus_output_path=str(clumped_study_locus_path),
)
assert Path(clumped_study_locus_path).exists(), "Output directory exists."
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 08e6ff6

Please sign in to comment.