test: increase modularity of test directories (#502)

opentargets · Feb 21, 2024 · 08e6ff6 · 08e6ff6
1 parent faa99be
commit 08e6ff6
Show file tree

Hide file tree

Showing 112 changed files with 57 additions and 45 deletions.
diff --git a/docs/development/contributing.md b/docs/development/contributing.md
@@ -82,5 +82,5 @@ For more details on each of these steps, see the sections below.
 ### Tests
 
 - Test study fixture in `tests/conftest.py` (example: `mock_study_index_finngen` in that module)
-- Test sample data in `tests/data_samples` (example: `tests/data_samples/finngen_studies_sample.json`)
+- Test sample data in `tests/data_samples` (example: `tests/gentropy/data_samples/finngen_studies_sample.json`)
 - Test definition in `tests/` (example: `tests/dataset/test_study_index.py` → `test_study_index_finngen_creation`)
diff --git a/docs/src_snippets/howto/python_api/b_create_dataset.py b/docs/src_snippets/howto/python_api/b_create_dataset.py
@@ -17,7 +17,7 @@ def create_from_parquet(session: Session) -> SummaryStatistics:
 
     # --8<-- [end:create_from_parquet_import]
 
-    path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
+    path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
     # --8<-- [start:create_from_parquet]
     summary_stats = SummaryStatistics.from_parquet(session, path)
     # --8<-- [end:create_from_parquet]
@@ -31,7 +31,7 @@ def create_from_source(session: Session) -> SummaryStatistics:
     from gentropy.datasource.finngen.summary_stats import FinnGenSummaryStats
 
     # --8<-- [end:create_from_source_import]
-    path = "tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
+    path = "tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
     # --8<-- [start:create_from_source]
     summary_stats = FinnGenSummaryStats.from_source(session.spark, path)
     # --8<-- [end:create_from_source]
@@ -46,7 +46,7 @@ def create_from_pandas() -> SummaryStatistics:
 
     # --8<-- [end:create_from_pandas_import]
 
-    path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
+    path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
     custom_summary_stats_pandas_df = ps.read_parquet(path)
     # --8<-- [start:create_from_pandas]
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -127,6 +127,7 @@ exclude = ["dist"]
 [tool.pytest.ini_options]
 addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml"
 pythonpath = [".", "./src/airflow/dags"]
+testpaths = ["tests/gentropy", "src/gentropy/"]
 
 # Semi-strict mode for mypy
 [tool.mypy]

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,3 +1 @@
-"""Unit tests."""
-
-from __future__ import annotations
+"""Gentropy tests package."""
diff --git a/tests/gentropy/__init__.py b/tests/gentropy/__init__.py
@@ -0,0 +1,3 @@
+"""Unit tests."""
+
+from __future__ import annotations
diff --git a/tests/common/test_session.py → tests/gentropy/common/test_session.py b/tests/common/test_session.py → tests/gentropy/common/test_session.py
diff --git a/tests/conftest.py → tests/gentropy/conftest.py b/tests/conftest.py → tests/gentropy/conftest.py
@@ -411,7 +411,7 @@ def mock_ld_index(spark: SparkSession) -> LDIndex:
 def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame:
     """Sample GWAS Catalog studies."""
     return spark.read.csv(
-        "tests/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv",
+        "tests/gentropy/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv",
         sep="\t",
         header=True,
     )
@@ -421,7 +421,7 @@ def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame:
 def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame:
     """Sample GWAS ancestries sample data."""
     return spark.read.csv(
-        "tests/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv",
+        "tests/gentropy/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv",
         sep="\t",
         header=True,
     )
@@ -431,7 +431,7 @@ def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame:
 def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFrame:
     """Sample GWAS harmonised sumstats sample data."""
     return spark.read.csv(
-        "tests/data_samples/gwas_catalog_harmonised_list.txt",
+        "tests/gentropy/data_samples/gwas_catalog_harmonised_list.txt",
         sep="\t",
         header=False,
     )
@@ -441,7 +441,7 @@ def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFra
 def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame:
     """Sample GWAS raw associations sample data."""
     return spark.read.csv(
-        "tests/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv",
+        "tests/gentropy/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv",
         sep="\t",
         header=True,
     )
@@ -451,7 +451,7 @@ def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame:
 def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics:
     """Sample GWAS raw associations sample data."""
     return SummaryStatistics(
-        _df=spark.read.parquet("tests/data_samples/sumstats_sample"),
+        _df=spark.read.parquet("tests/gentropy/data_samples/sumstats_sample"),
         _schema=SummaryStatistics.get_schema(),
     )
 
@@ -460,8 +460,10 @@ def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics:
 def sample_finngen_studies(spark: SparkSession) -> DataFrame:
     """Sample FinnGen studies."""
     # For reference, the sample file was generated with the following command:
-    # curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/data_samples/finngen_studies_sample.json
-    with open("tests/data_samples/finngen_studies_sample.json") as finngen_studies:
+    # curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/gentropy/data_samples/finngen_studies_sample.json
+    with open(
+        "tests/gentropy/data_samples/finngen_studies_sample.json"
+    ) as finngen_studies:
         json_data = finngen_studies.read()
         rdd = spark.sparkContext.parallelize([json_data])
         return spark.read.json(rdd)
@@ -471,8 +473,10 @@ def sample_finngen_studies(spark: SparkSession) -> DataFrame:
 def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame:
     """Sample eQTL Catalogue studies."""
     # For reference, the sample file was generated with the following command:
-    # curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/data_samples/eqtl_catalogue_studies_sample.tsv
-    with open("tests/data_samples/eqtl_catalogue_studies_sample.tsv") as eqtl_catalogue:
+    # curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv
+    with open(
+        "tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv"
+    ) as eqtl_catalogue:
         tsv = eqtl_catalogue.read()
         rdd = spark.sparkContext.parallelize([tsv])
         return spark.read.csv(rdd, sep="\t", header=True)
@@ -482,11 +486,11 @@ def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame:
 def sample_eqtl_catalogue_summary_stats(spark: SparkSession) -> DataFrame:
     """Sample eQTL Catalogue summary stats."""
     # For reference, the sample file was generated with the following commands:
-    # mkdir -p tests/data_samples/imported/GTEx_V8/ge
-    # curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz
+    # mkdir -p tests/gentropy/data_samples/imported/GTEx_V8/ge
+    # curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz
     # It's important for the test file to be named in exactly this way, because eQTL Catalogue study ID is populated based on input file name.
     return spark.read.option("delimiter", "\t").csv(
-        "tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz",
+        "tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz",
         header=True,
     )
 
@@ -496,7 +500,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame:
     """Sample UKBiobank manifest."""
     # Sampled 10 rows of the UKBB manifest tsv
     return spark.read.csv(
-        "tests/data_samples/neale2_saige_study_manifest.samples.tsv",
+        "tests/gentropy/data_samples/neale2_saige_study_manifest.samples.tsv",
         sep="\t",
         header=True,
         inferSchema=True,
@@ -507,7 +511,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame:
 def sample_target_index(spark: SparkSession) -> DataFrame:
     """Sample target index sample data."""
     return spark.read.parquet(
-        "tests/data_samples/target_sample.parquet",
+        "tests/gentropy/data_samples/target_sample.parquet",
     )
 
 
@@ -539,22 +543,22 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex:
 @pytest.fixture()
 def liftover_chain_37_to_38(spark: SparkSession) -> LiftOverSpark:
     """Sample liftover chain file."""
-    return LiftOverSpark("tests/data_samples/grch37_to_grch38.over.chain")
+    return LiftOverSpark("tests/gentropy/data_samples/grch37_to_grch38.over.chain")
 
 
 @pytest.fixture()
 def sample_l2g_gold_standard(spark: SparkSession) -> DataFrame:
     """Sample L2G gold standard curation."""
     return spark.read.json(
-        "tests/data_samples/l2g_gold_standard_curation_sample.json.gz",
+        "tests/gentropy/data_samples/l2g_gold_standard_curation_sample.json.gz",
     )
 
 
 @pytest.fixture()
 def sample_otp_interactions(spark: SparkSession) -> DataFrame:
     """Sample OTP gene-gene interactions dataset."""
     return spark.read.parquet(
-        "tests/data_samples/otp_interactions_sample.parquet",
+        "tests/gentropy/data_samples/otp_interactions_sample.parquet",
     )
 
 
@@ -618,21 +622,21 @@ def mock_l2g_predictions(spark: SparkSession) -> L2GPrediction:
 @pytest.fixture()
 def sample_data_for_carma() -> list[np.ndarray]:
     """Sample data for fine-mapping by CARMA."""
-    ld = pd.read_csv("tests/data_samples/01_test_ld.csv", header=None)
+    ld = pd.read_csv("tests/gentropy/data_samples/01_test_ld.csv", header=None)
     ld = np.array(ld)
-    z = pd.read_csv("tests/data_samples/01_test_z.csv")
+    z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv")
     z = np.array(z.iloc[:, 1])
-    pips = pd.read_csv("tests/data_samples/01_test_PIPs.txt")
+    pips = pd.read_csv("tests/gentropy/data_samples/01_test_PIPs.txt")
     pips = np.array(pips.iloc[:, 0])
     return [ld, z, pips]
 
 
 @pytest.fixture()
 def sample_data_for_susie_inf() -> list[np.ndarray]:
     """Sample data for fine-mapping by SuSiE-inf."""
-    ld = np.loadtxt("tests/data_samples/01_test_ld.csv", delimiter=",")
-    z = pd.read_csv("tests/data_samples/01_test_z.csv")
+    ld = np.loadtxt("tests/gentropy/data_samples/01_test_ld.csv", delimiter=",")
+    z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv")
     z = np.array(z.iloc[:, 1])
-    lbf_moments = np.loadtxt("tests/data_samples/01_test_lbf_moments.csv")
-    lbf_mle = np.loadtxt("tests/data_samples/01_test_lbf_mle.csv")
+    lbf_moments = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_moments.csv")
+    lbf_mle = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_mle.csv")
     return [ld, z, lbf_moments, lbf_mle]
diff --git a/tests/data_samples/01_test_PIPs.txt → tests/gentropy/data_samples/01_test_PIPs.txt b/tests/data_samples/01_test_PIPs.txt → tests/gentropy/data_samples/01_test_PIPs.txt
diff --git a/tests/data_samples/01_test_lbf_mle.csv → ...gentropy/data_samples/01_test_lbf_mle.csv b/tests/data_samples/01_test_lbf_mle.csv → ...gentropy/data_samples/01_test_lbf_mle.csv
diff --git a/tests/data_samples/01_test_lbf_moments.csv → ...ropy/data_samples/01_test_lbf_moments.csv b/tests/data_samples/01_test_lbf_moments.csv → ...ropy/data_samples/01_test_lbf_moments.csv
diff --git a/tests/data_samples/01_test_ld.csv → tests/gentropy/data_samples/01_test_ld.csv b/tests/data_samples/01_test_ld.csv → tests/gentropy/data_samples/01_test_ld.csv
diff --git a/tests/data_samples/01_test_z.csv → tests/gentropy/data_samples/01_test_z.csv b/tests/data_samples/01_test_z.csv → tests/gentropy/data_samples/01_test_z.csv
diff --git a/tests/data_samples/andersson_sample.bed → ...entropy/data_samples/andersson_sample.bed b/tests/data_samples/andersson_sample.bed → ...entropy/data_samples/andersson_sample.bed
diff --git a/...samples/eqtl_catalogue_studies_sample.tsv → ...samples/eqtl_catalogue_studies_sample.tsv b/...samples/eqtl_catalogue_studies_sample.tsv → ...samples/eqtl_catalogue_studies_sample.tsv
diff --git a/...data_samples/example_test-pop.bm/_SUCCESS → ...data_samples/example_test-pop.bm/_SUCCESS b/...data_samples/example_test-pop.bm/_SUCCESS → ...data_samples/example_test-pop.bm/_SUCCESS
diff --git a/...samples/example_test-pop.bm/metadata.json → ...samples/example_test-pop.bm/metadata.json b/...samples/example_test-pop.bm/metadata.json → ...samples/example_test-pop.bm/metadata.json
diff --git a/...-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 → ...-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 b/...-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 → ...-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0
diff --git a/...-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 → ...-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 b/...-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 → ...-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6
diff --git a/...-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b → ...-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b b/...-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b → ...-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b
diff --git a/...-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd → ...-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd b/...-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd → ...-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd
diff --git a/...-3-0-596fbcf8-3832-3676-1b73-987b185558f9 → ...-3-0-596fbcf8-3832-3676-1b73-987b185558f9 b/...-3-0-596fbcf8-3832-3676-1b73-987b185558f9 → ...-3-0-596fbcf8-3832-3676-1b73-987b185558f9
diff --git a/...-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 → ...-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 b/...-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 → ...-4-0-556c0d84-977b-2faa-376b-e758c55e2f42
diff --git a/...-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e → ...-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e b/...-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e → ...-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e
diff --git a/...-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 → ...-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 b/...-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 → ...-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09
diff --git a/...-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 → ...-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 b/...-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 → ...-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60
diff --git a/...-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 → ...-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 b/...-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 → ...-8-0-b6d68819-fa17-6649-1400-4113d86b4b81
diff --git a/...-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa → ...-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa b/...-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa → ...-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa
diff --git a/...-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 → ...-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 b/...-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 → ...-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28
diff --git a/...-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 → ...-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 b/...-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 → ...-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39
diff --git a/...-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 → ...-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 b/...-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 → ...-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5
diff --git a/...-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 → ...-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 b/...-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 → ...-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45
diff --git a/...-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a → ...-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a b/...-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a → ...-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a
diff --git a/...ta_samples/example_test-pop.ht/README.txt → ...ta_samples/example_test-pop.ht/README.txt b/...ta_samples/example_test-pop.ht/README.txt → ...ta_samples/example_test-pop.ht/README.txt
diff --git a/...data_samples/example_test-pop.ht/_SUCCESS → ...data_samples/example_test-pop.ht/_SUCCESS b/...data_samples/example_test-pop.ht/_SUCCESS → ...data_samples/example_test-pop.ht/_SUCCESS
diff --git a/...mple_test-pop.ht/globals/metadata.json.gz → ...mple_test-pop.ht/globals/metadata.json.gz b/...mple_test-pop.ht/globals/metadata.json.gz → ...mple_test-pop.ht/globals/metadata.json.gz
diff --git a/.../example_test-pop.ht/globals/parts/part-0 → .../example_test-pop.ht/globals/parts/part-0 b/.../example_test-pop.ht/globals/parts/part-0 → .../example_test-pop.ht/globals/parts/part-0
diff --git a/...040-1af5-433e-944d-df08b484cf34.idx/index → ...040-1af5-433e-944d-df08b484cf34.idx/index b/...040-1af5-433e-944d-df08b484cf34.idx/index → ...040-1af5-433e-944d-df08b484cf34.idx/index
diff --git a/...3e-944d-df08b484cf34.idx/metadata.json.gz → ...3e-944d-df08b484cf34.idx/metadata.json.gz b/...3e-944d-df08b484cf34.idx/metadata.json.gz → ...3e-944d-df08b484cf34.idx/metadata.json.gz
diff --git a/...ples/example_test-pop.ht/metadata.json.gz → ...ples/example_test-pop.ht/metadata.json.gz b/...ples/example_test-pop.ht/metadata.json.gz → ...ples/example_test-pop.ht/metadata.json.gz
diff --git a/...example_test-pop.ht/rows/metadata.json.gz → ...example_test-pop.ht/rows/metadata.json.gz b/...example_test-pop.ht/rows/metadata.json.gz → ...example_test-pop.ht/rows/metadata.json.gz
diff --git a/...rt-0-020df040-1af5-433e-944d-df08b484cf34 → ...rt-0-020df040-1af5-433e-944d-df08b484cf34 b/...rt-0-020df040-1af5-433e-944d-df08b484cf34 → ...rt-0-020df040-1af5-433e-944d-df08b484cf34
diff --git a/...a_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz → ...a_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz b/...a_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz → ...a_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz
diff --git a/...a_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz → ...a_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz b/...a_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz → ...a_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz
diff --git a/...amples/finngen_credset_summary_sample.tsv → ...amples/finngen_credset_summary_sample.tsv b/...amples/finngen_credset_summary_sample.tsv → ...amples/finngen_credset_summary_sample.tsv
diff --git a/.../data_samples/finngen_studies_sample.json → .../data_samples/finngen_studies_sample.json b/.../data_samples/finngen_studies_sample.json → .../data_samples/finngen_studies_sample.json
diff --git a/tests/data_samples/finucane_PIPs.npy → ...s/gentropy/data_samples/finucane_PIPs.npy b/tests/data_samples/finucane_PIPs.npy → ...s/gentropy/data_samples/finucane_PIPs.npy
diff --git a/.../data_samples/grch37_to_grch38.over.chain → .../data_samples/grch37_to_grch38.over.chain b/.../data_samples/grch37_to_grch38.over.chain → .../data_samples/grch37_to_grch38.over.chain
diff --git a/..._ancestries_sample_v1.0.3-r2022-11-29.tsv → ..._ancestries_sample_v1.0.3-r2022-11-29.tsv b/..._ancestries_sample_v1.0.3-r2022-11-29.tsv → ..._ancestries_sample_v1.0.3-r2022-11-29.tsv
diff --git a/..._associations_sample_e107_r2022-11-29.tsv → ..._associations_sample_e107_r2022-11-29.tsv b/..._associations_sample_e107_r2022-11-29.tsv → ..._associations_sample_e107_r2022-11-29.tsv
diff --git a/..._samples/gwas_catalog_harmonised_list.txt → ..._samples/gwas_catalog_harmonised_list.txt b/..._samples/gwas_catalog_harmonised_list.txt → ..._samples/gwas_catalog_harmonised_list.txt
diff --git a/...as_catalog_studies_sample-r2022-11-29.tsv → ...as_catalog_studies_sample-r2022-11-29.tsv b/...as_catalog_studies_sample-r2022-11-29.tsv → ...as_catalog_studies_sample-r2022-11-29.tsv
diff --git a/...ed/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz → ...ed/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz b/...ed/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz → ...ed/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz
diff --git a/tests/data_samples/javierre_sample.parquet → ...ropy/data_samples/javierre_sample.parquet b/tests/data_samples/javierre_sample.parquet → ...ropy/data_samples/javierre_sample.parquet
diff --git a/tests/data_samples/jung_sample.bed → tests/gentropy/data_samples/jung_sample.bed b/tests/data_samples/jung_sample.bed → tests/gentropy/data_samples/jung_sample.bed
diff --git a/...l2g_gold_standard_curation_sample.json.gz → ...l2g_gold_standard_curation_sample.json.gz b/...l2g_gold_standard_curation_sample.json.gz → ...l2g_gold_standard_curation_sample.json.gz
diff --git a/...s/neale2_saige_study_manifest.samples.tsv → ...s/neale2_saige_study_manifest.samples.tsv b/...s/neale2_saige_study_manifest.samples.tsv → ...s/neale2_saige_study_manifest.samples.tsv
diff --git a/..._samples/new_format_GCST90293086.h.tsv.gz → ..._samples/new_format_GCST90293086.h.tsv.gz b/..._samples/new_format_GCST90293086.h.tsv.gz → ..._samples/new_format_GCST90293086.h.tsv.gz
diff --git a/...ta_samples/old_format_GCST006090.h.tsv.gz → ...ta_samples/old_format_GCST006090.h.tsv.gz b/...ta_samples/old_format_GCST006090.h.tsv.gz → ...ta_samples/old_format_GCST006090.h.tsv.gz
diff --git a/...a_samples/otp_interactions_sample.parquet → ...a_samples/otp_interactions_sample.parquet b/...a_samples/otp_interactions_sample.parquet → ...a_samples/otp_interactions_sample.parquet
diff --git a/.../sumstats_sample/GCST005523_chr18.parquet → .../sumstats_sample/GCST005523_chr18.parquet b/.../sumstats_sample/GCST005523_chr18.parquet → .../sumstats_sample/GCST005523_chr18.parquet
diff --git a/...753-b840-4a0b6289c221-c000.snappy.parquet → ...753-b840-4a0b6289c221-c000.snappy.parquet b/...753-b840-4a0b6289c221-c000.snappy.parquet → ...753-b840-4a0b6289c221-c000.snappy.parquet
diff --git a/tests/data_samples/thurman_sample.bed8 → ...gentropy/data_samples/thurman_sample.bed8 b/tests/data_samples/thurman_sample.bed8 → ...gentropy/data_samples/thurman_sample.bed8
diff --git a/.../data_samples/vep_consequences_sample.tsv → .../data_samples/vep_consequences_sample.tsv b/.../data_samples/vep_consequences_sample.tsv → .../data_samples/vep_consequences_sample.tsv
diff --git a/tests/dataset/test_colocalisation.py → ...s/gentropy/dataset/test_colocalisation.py b/tests/dataset/test_colocalisation.py → ...s/gentropy/dataset/test_colocalisation.py
diff --git a/tests/dataset/test_dataset.py → tests/gentropy/dataset/test_dataset.py b/tests/dataset/test_dataset.py → tests/gentropy/dataset/test_dataset.py
diff --git a/tests/dataset/test_gene_index.py → tests/gentropy/dataset/test_gene_index.py b/tests/dataset/test_gene_index.py → tests/gentropy/dataset/test_gene_index.py
diff --git a/tests/dataset/test_intervals.py → tests/gentropy/dataset/test_intervals.py b/tests/dataset/test_intervals.py → tests/gentropy/dataset/test_intervals.py
diff --git a/tests/dataset/test_l2g.py → tests/gentropy/dataset/test_l2g.py b/tests/dataset/test_l2g.py → tests/gentropy/dataset/test_l2g.py
diff --git a/tests/dataset/test_ld_index.py → tests/gentropy/dataset/test_ld_index.py b/tests/dataset/test_ld_index.py → tests/gentropy/dataset/test_ld_index.py
diff --git a/tests/dataset/test_study_index.py → tests/gentropy/dataset/test_study_index.py b/tests/dataset/test_study_index.py → tests/gentropy/dataset/test_study_index.py
diff --git a/tests/dataset/test_study_locus.py → tests/gentropy/dataset/test_study_locus.py b/tests/dataset/test_study_locus.py → tests/gentropy/dataset/test_study_locus.py
diff --git a/tests/dataset/test_study_locus_overlap.py → ...tropy/dataset/test_study_locus_overlap.py b/tests/dataset/test_study_locus_overlap.py → ...tropy/dataset/test_study_locus_overlap.py
diff --git a/tests/dataset/test_study_locus_overlaps.py → ...ropy/dataset/test_study_locus_overlaps.py b/tests/dataset/test_study_locus_overlaps.py → ...ropy/dataset/test_study_locus_overlaps.py
diff --git a/tests/dataset/test_summary_statistics.py → ...ntropy/dataset/test_summary_statistics.py b/tests/dataset/test_summary_statistics.py → ...ntropy/dataset/test_summary_statistics.py
diff --git a/tests/dataset/test_v2g.py → tests/gentropy/dataset/test_v2g.py b/tests/dataset/test_v2g.py → tests/gentropy/dataset/test_v2g.py
diff --git a/tests/dataset/test_variant_annotation.py → ...ntropy/dataset/test_variant_annotation.py b/tests/dataset/test_variant_annotation.py → ...ntropy/dataset/test_variant_annotation.py
diff --git a/tests/dataset/test_variant_index.py → tests/gentropy/dataset/test_variant_index.py b/tests/dataset/test_variant_index.py → tests/gentropy/dataset/test_variant_index.py
diff --git a/...alogue/test_eqtl_catalogue_study_index.py → ...alogue/test_eqtl_catalogue_study_index.py b/...alogue/test_eqtl_catalogue_study_index.py → ...alogue/test_eqtl_catalogue_study_index.py
diff --git a/...ogue/test_eqtl_catalogue_summary_stats.py → ...ogue/test_eqtl_catalogue_summary_stats.py b/...ogue/test_eqtl_catalogue_summary_stats.py → ...ogue/test_eqtl_catalogue_summary_stats.py
diff --git a/...ource/finngen/test_finngen_finemapping.py → ...ource/finngen/test_finngen_finemapping.py b/...ource/finngen/test_finngen_finemapping.py → ...ource/finngen/test_finngen_finemapping.py
@@ -14,8 +14,8 @@ def test_finngen_finemapping_from_finngen_susie_finemapping(
     assert isinstance(
         FinnGenFinemapping.from_finngen_susie_finemapping(
             spark=spark,
-            finngen_finemapping_df="tests/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz",
-            finngen_finemapping_summaries="tests/data_samples/finngen_credset_summary_sample.tsv",
+            finngen_finemapping_df="tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz",
+            finngen_finemapping_summaries="tests/gentropy/data_samples/finngen_credset_summary_sample.tsv",
         ),
         StudyLocus,
     )
diff --git a/...ource/finngen/test_finngen_study_index.py → ...ource/finngen/test_finngen_study_index.py b/...ource/finngen/test_finngen_study_index.py → ...ource/finngen/test_finngen_study_index.py
diff --git a/...rce/finngen/test_finngen_summary_stats.py → ...rce/finngen/test_finngen_summary_stats.py b/...rce/finngen/test_finngen_summary_stats.py → ...rce/finngen/test_finngen_summary_stats.py
@@ -12,7 +12,7 @@ def test_finngen_summary_stats_from_source(spark: SparkSession) -> None:
     assert isinstance(
         FinnGenSummaryStats.from_source(
             spark=spark,
-            raw_file="tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz",
+            raw_file="tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz",
         ),
         SummaryStatistics,
     )
diff --git a/tests/datasource/gnomad/test_gnomad_ld.py → ...tropy/datasource/gnomad/test_gnomad_ld.py b/tests/datasource/gnomad/test_gnomad_ld.py → ...tropy/datasource/gnomad/test_gnomad_ld.py
@@ -97,9 +97,9 @@ def _setup(self: TestGnomADLDMatrixVariants, spark: SparkSession) -> None:
         ld_test_population = "test-pop"
 
         gnomad_ld_matrix = GnomADLDMatrix(
-            ld_matrix_template="tests/data_samples/example_{POP}.bm",
-            ld_index_raw_template="tests/data_samples/example_{POP}.ht",
-            grch37_to_grch38_chain_path="tests/data_samples/grch37_to_grch38.over.chain",
+            ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm",
+            ld_index_raw_template="tests/gentropy/data_samples/example_{POP}.ht",
+            grch37_to_grch38_chain_path="tests/gentropy/data_samples/grch37_to_grch38.over.chain",
         )
         self.ld_slice = gnomad_ld_matrix.get_ld_variants(
             gnomad_ancestry=ld_test_population,
@@ -173,7 +173,7 @@ def _setup(self: TestGnomADLDMatrixSlice, spark: SparkSession) -> None:
         """Prepares fixtures for the test."""
         hl.init(sc=spark.sparkContext, log="/dev/null", idempotent=True)
         gnomad_ld_matrix = GnomADLDMatrix(
-            ld_matrix_template="tests/data_samples/example_{POP}.bm"
+            ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm"
         )
         test_ld_population: str = "test-pop"
         self.slice_start_index: int = 1

diff --git a/...catalog/test_gwas_catalog_associations.py → ...catalog/test_gwas_catalog_associations.py b/...catalog/test_gwas_catalog_associations.py → ...catalog/test_gwas_catalog_associations.py
diff --git a/...was_catalog/test_gwas_catalog_curation.py → ...was_catalog/test_gwas_catalog_curation.py b/...was_catalog/test_gwas_catalog_curation.py → ...was_catalog/test_gwas_catalog_curation.py
diff --git a/..._catalog/test_gwas_catalog_study_index.py → ..._catalog/test_gwas_catalog_study_index.py b/..._catalog/test_gwas_catalog_study_index.py → ..._catalog/test_gwas_catalog_study_index.py
diff --git a/...talog/test_gwas_catalog_study_splitter.py → ...talog/test_gwas_catalog_study_splitter.py b/...talog/test_gwas_catalog_study_splitter.py → ...talog/test_gwas_catalog_study_splitter.py
diff --git a/...g/test_gwas_catalog_summary_statistics.py → ...g/test_gwas_catalog_summary_statistics.py b/...g/test_gwas_catalog_summary_statistics.py → ...g/test_gwas_catalog_summary_statistics.py
@@ -26,7 +26,7 @@ def gwas_catalog_summary_statistics__new_format(
     ) -> GWASCatalogSummaryStatistics:
         """Test GWASCatalogSummaryStatistics creation with mock data."""
         return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats(
-            spark, "tests/data_samples/new_format_GCST90293086.h.tsv.gz"
+            spark, "tests/gentropy/data_samples/new_format_GCST90293086.h.tsv.gz"
         )
 
     @pytest.fixture(scope="class")
@@ -36,7 +36,7 @@ def gwas_catalog_summary_statistics__old_format(
     ) -> GWASCatalogSummaryStatistics:
         """Test GWASCatalogSummaryStatistics creation with mock data."""
         return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats(
-            spark, "tests/data_samples/old_format_GCST006090.h.tsv.gz"
+            spark, "tests/gentropy/data_samples/old_format_GCST006090.h.tsv.gz"
         )
 
     @pytest.fixture(scope="class")

diff --git a/tests/datasource/intervals/test_andersson.py → ...py/datasource/intervals/test_andersson.py b/tests/datasource/intervals/test_andersson.py → ...py/datasource/intervals/test_andersson.py
@@ -12,7 +12,9 @@
 @pytest.fixture(scope="module")
 def sample_intervals_andersson(spark: SparkSession) -> DataFrame:
     """Sample Andersson intervals."""
-    return IntervalsAndersson.read(spark, "tests/data_samples/andersson_sample.bed")
+    return IntervalsAndersson.read(
+        spark, "tests/gentropy/data_samples/andersson_sample.bed"
+    )
 
 
 def test_read_andersson(sample_intervals_andersson: DataFrame) -> None:

diff --git a/tests/datasource/intervals/test_javierre.py → ...opy/datasource/intervals/test_javierre.py b/tests/datasource/intervals/test_javierre.py → ...opy/datasource/intervals/test_javierre.py
@@ -12,7 +12,9 @@
 @pytest.fixture(scope="module")
 def sample_intervals_javierre(spark: SparkSession) -> DataFrame:
     """Sample Javierre intervals."""
-    return IntervalsJavierre.read(spark, "tests/data_samples/javierre_sample.parquet")
+    return IntervalsJavierre.read(
+        spark, "tests/gentropy/data_samples/javierre_sample.parquet"
+    )
 
 
 def test_read_javierre(sample_intervals_javierre: DataFrame) -> None:

diff --git a/tests/datasource/intervals/test_jung.py → ...entropy/datasource/intervals/test_jung.py b/tests/datasource/intervals/test_jung.py → ...entropy/datasource/intervals/test_jung.py
@@ -12,7 +12,7 @@
 @pytest.fixture(scope="module")
 def sample_intervals_jung(spark: SparkSession) -> DataFrame:
     """Sample Jung intervals."""
-    return IntervalsJung.read(spark, "tests/data_samples/jung_sample.bed")
+    return IntervalsJung.read(spark, "tests/gentropy/data_samples/jung_sample.bed")
 
 
 def test_read_jung(sample_intervals_jung: DataFrame) -> None:

diff --git a/tests/datasource/intervals/test_thurman.py → ...ropy/datasource/intervals/test_thurman.py b/tests/datasource/intervals/test_thurman.py → ...ropy/datasource/intervals/test_thurman.py
@@ -12,7 +12,9 @@
 @pytest.fixture(scope="module")
 def sample_intervals_thurman(spark: SparkSession) -> DataFrame:
     """Sample Thurman intervals."""
-    return IntervalsThurman.read(spark, "tests/data_samples/thurman_sample.bed8")
+    return IntervalsThurman.read(
+        spark, "tests/gentropy/data_samples/thurman_sample.bed8"
+    )
 
 
 def test_read_thurman(sample_intervals_thurman: DataFrame) -> None:

diff --git a/...ce/open_targets/test_l2g_gold_standard.py → ...ce/open_targets/test_l2g_gold_standard.py b/...ce/open_targets/test_l2g_gold_standard.py → ...ce/open_targets/test_l2g_gold_standard.py
diff --git a/tests/datasource/open_targets/test_target.py → ...py/datasource/open_targets/test_target.py b/tests/datasource/open_targets/test_target.py → ...py/datasource/open_targets/test_target.py
diff --git a/...e/ukbiobank/test_ukbiobank_study_index.py → ...e/ukbiobank/test_ukbiobank_study_index.py b/...e/ukbiobank/test_ukbiobank_study_index.py → ...e/ukbiobank/test_ukbiobank_study_index.py
diff --git a/tests/docs/test_applying_methods.py → tests/gentropy/docs/test_applying_methods.py b/tests/docs/test_applying_methods.py → tests/gentropy/docs/test_applying_methods.py
diff --git a/tests/test_docs.py → tests/gentropy/docs/test_build.py b/tests/test_docs.py → tests/gentropy/docs/test_build.py
diff --git a/tests/docs/test_create_dataset.py → tests/gentropy/docs/test_create_dataset.py b/tests/docs/test_create_dataset.py → tests/gentropy/docs/test_create_dataset.py
diff --git a/tests/docs/test_creating_spark_session.py → ...tropy/docs/test_creating_spark_session.py b/tests/docs/test_creating_spark_session.py → ...tropy/docs/test_creating_spark_session.py
diff --git a/tests/docs/test_inspect_dataset.py → tests/gentropy/docs/test_inspect_dataset.py b/tests/docs/test_inspect_dataset.py → tests/gentropy/docs/test_inspect_dataset.py
diff --git a/tests/method/test_carma.py → tests/gentropy/method/test_carma.py b/tests/method/test_carma.py → tests/gentropy/method/test_carma.py
diff --git a/tests/method/test_clump.py → tests/gentropy/method/test_clump.py b/tests/method/test_clump.py → tests/gentropy/method/test_clump.py
diff --git a/tests/method/test_colocalisation_method.py → ...ropy/method/test_colocalisation_method.py b/tests/method/test_colocalisation_method.py → ...ropy/method/test_colocalisation_method.py
diff --git a/tests/method/test_ld.py → tests/gentropy/method/test_ld.py b/tests/method/test_ld.py → tests/gentropy/method/test_ld.py
diff --git a/tests/method/test_locus_to_gene.py → tests/gentropy/method/test_locus_to_gene.py b/tests/method/test_locus_to_gene.py → tests/gentropy/method/test_locus_to_gene.py
diff --git a/tests/method/test_pics.py → tests/gentropy/method/test_pics.py b/tests/method/test_pics.py → tests/gentropy/method/test_pics.py
diff --git a/tests/method/test_susie_inf.py → tests/gentropy/method/test_susie_inf.py b/tests/method/test_susie_inf.py → tests/gentropy/method/test_susie_inf.py
diff --git a/tests/method/test_window_based_clumping.py → ...ropy/method/test_window_based_clumping.py b/tests/method/test_window_based_clumping.py → ...ropy/method/test_window_based_clumping.py
diff --git a/tests/step/test_clump_step.py → tests/gentropy/step/test_clump_step.py b/tests/step/test_clump_step.py → tests/gentropy/step/test_clump_step.py
@@ -20,7 +20,7 @@ def test_clumpstep_summary_stats(self, session: Session) -> None:
             clumped_study_locus_path = Path(temp_dir, "GCST005523_chr18_clumped")
             WindowBasedClumpingStep(
                 session=session,
-                summary_statistics_input_path="tests/data_samples/sumstats_sample",
+                summary_statistics_input_path="tests/gentropy/data_samples/sumstats_sample",
                 study_locus_output_path=str(clumped_study_locus_path),
             )
             assert Path(clumped_study_locus_path).exists(), "Output directory exists."
diff --git a/tests/test_cli.py → tests/gentropy/test_cli.py b/tests/test_cli.py → tests/gentropy/test_cli.py
diff --git a/tests/test_schemas.py → tests/gentropy/test_schemas.py b/tests/test_schemas.py → tests/gentropy/test_schemas.py
diff --git a/tests/test_spark_helpers.py → tests/gentropy/test_spark_helpers.py b/tests/test_spark_helpers.py → tests/gentropy/test_spark_helpers.py