Skip to content

Commit

Permalink
ENH: add clinical_index
Browse files Browse the repository at this point in the history
also added checks for existence of the URLs containing remote indices
  • Loading branch information
fedorov committed Aug 8, 2024
1 parent dfafae1 commit 986e004
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
9 changes: 8 additions & 1 deletion idc_index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class IDCClient:
CITATION_FORMAT_BIBTEX = "application/x-bibtex"

# Singleton pattern
# NOTE: In the future, one may want to use multiple clients e.g. for sub-datasets so a attribute-singleton as shown bewlo seems a better option.
# NOTE: In the future, one may want to use multiple clients e.g. for sub-datasets so a attribute-singleton as shown below seems a better option.
# _instance: IDCClient
# def __new__(cls):
# if not hasattr(cls, "_instance") or getattr(cls, "_instance") is None:
Expand Down Expand Up @@ -74,6 +74,8 @@ def __init__(self):
{"Modality": pd.Series.unique, "series_size_MB": "sum"}
)

idc_version = f"v{Version(idc_index_data.__version__).major}"

self.indices_overview = {
"index": {
"description": "Main index containing one row per DICOM series.",
Expand All @@ -95,6 +97,11 @@ def __init__(self):
"installed": False,
"url": f"{asset_endpoint_url}/sm_instance_index.parquet",
},
"clinical_index": {
"description": "Index of clinical data accompanying the available images.",
"installed": False,
"url": f"https://idc-open-metadata.s3.amazonaws.com/bigquery_export/idc_{idc_version}_clinical/column_metadata/000000000000.parquet",
},
}

# Lookup s5cmd
Expand Down
18 changes: 18 additions & 0 deletions tests/idcindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import pandas as pd
import pytest
import requests
from click.testing import CliRunner
from idc_index import IDCClient, cli

Expand All @@ -18,6 +19,17 @@
logging.basicConfig(level=logging.DEBUG)


def remote_file_exists(url):
try:
response = requests.head(url, allow_redirects=True)
# Check if the status code indicates success
return response.status_code == 200
except requests.RequestException as e:
# Handle any exceptions (e.g., network issues)
print(f"An error occurred: {e}")
return False


@pytest.fixture(autouse=True)
def _change_test_dir(request, monkeypatch):
monkeypatch.chdir(request.fspath.dirname)
Expand Down Expand Up @@ -494,6 +506,12 @@ def test_fetch_index(self):
assert i.indices_overview["sm_index"]["installed"] is True
assert hasattr(i, "sm_index")

def test_indices_urls(self):
i = IDCClient()
for index in i.indices_overview:
if i.indices_overview[index]["url"] is not None:
assert remote_file_exists(i.indices_overview[index]["url"])


if __name__ == "__main__":
unittest.main()

0 comments on commit 986e004

Please sign in to comment.