From cbd072ff56dc7ba86cc5976a10ed993dc42c23ed Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Tue, 19 Nov 2024 16:22:36 +0100 Subject: [PATCH 1/8] tests --- .../continuous_coverage_phenotype.py | 107 ------------- phenex/phenotypes/sex_phenotype.py | 9 +- phenex/test/phenotype_test_generator.py | 2 +- .../test_continuous_coverage_phenotype.py | 141 ------------------ phenex/test/phenotypes/test_sex_phenotype.py | 2 +- 5 files changed, 7 insertions(+), 254 deletions(-) delete mode 100644 phenex/phenotypes/continuous_coverage_phenotype.py delete mode 100644 phenex/test/phenotypes/test_continuous_coverage_phenotype.py diff --git a/phenex/phenotypes/continuous_coverage_phenotype.py b/phenex/phenotypes/continuous_coverage_phenotype.py deleted file mode 100644 index b832ebf..0000000 --- a/phenex/phenotypes/continuous_coverage_phenotype.py +++ /dev/null @@ -1,107 +0,0 @@ -from typing import Union, List, Dict, Optional -from phenex.phenotypes.phenotype import Phenotype -from phenex.filters.value import Value -from phenex.filters.codelist_filter import CodelistFilter -from phenex.filters.relative_time_range_filter import RelativeTimeRangeFilter -from phenex.filters.date_range_filter import DateRangeFilter -from phenex.filters.aggregator import First, Last -from phenex.codelists import Codelist -from phenex.tables import is_phenex_code_table, PHENOTYPE_TABLE_COLUMNS, PhenotypeTable -from phenex.phenotypes.functions import select_phenotype_columns -from ibis import _ -from ibis.expr.types.relations import Table -import ibis - - -class ContinuousCoveragePhenotype(Phenotype): - """ - A phenotype based on continuous coverage within an observation period. - - This class helps generate SQL queries to filter a population based on - continuous coverage criteria within the observation period. - - :param domain: The domain of the phenotype, default is 'observation_period'. The domain - key is used at runtime to determine which table to run on. - :param coverage_period_min: The minimum coverage period for the phenotype with a default - of 0 days. The operator must be '>=' or '>'. - :param return_date: An optional return date for the phenotype result. Possible values are - "first" and "last", where "first" is the beginning of the coverage period containing - the index date and "last" in the end of the coverage period containing the index date. - - Example usage: Find all patients with at least 90 days of continuous coverage - -------------- - >>> coverage_min_filter = ValueFilter(">=", 90) - >>> phenotype = ContinuousCoveragePhenotype(coverage_period_min=coverage_min_filter) - """ - - def __init__(self, - name:Optional[str] = 'continuous_coverage', - domain:Optional[str] = 'OBSERVATION_PERIOD', - relative_time_range:Optional[RelativeTimeRangeFilter] = None, - min_days : Optional[Value] = None, - anchor_phenotype:Optional[Phenotype] = None, - ): - super().__init__() - self.name = name - self.domain = domain - self.relative_time_range = relative_time_range - self.min_days = min_days - - def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: - coverage_table = tables[self.domain] - # first perform time range filter on observation period start date - coverage_table = coverage_table.mutate(EVENT_DATE = coverage_table.OBSERVATION_PERIOD_START_DATE) - coverage_table = self._perform_time_filtering(coverage_table) - # ensure that coverage end extends past the anchor date - coverage_table = self._filter_observation_period_end(coverage_table) - coverage_table = self._filter_coverage_period(coverage_table) - - coverage_table = coverage_table.mutate(EVENT_DATE = ibis.null()) - return coverage_table - - def _perform_time_filtering(self, coverage_table): - ''' - Filter the observation period start - ''' - if self.relative_time_range is not None: - coverage_table = self.relative_time_range.filter(coverage_table) - return coverage_table - - def _filter_observation_period_end(self, coverage_table): - ''' - Get only rows where the observation period end date is after the anchor date - ''' - if self.relative_time_range is not None: - if self.relative_time_range.anchor_phenotype is not None: - reference_column = self.relative_time_range.anchor_phenotype.table.EVENT_DATE - else: - reference_column = coverage_table.INDEX_DATE - - coverage_table = coverage_table.filter( - coverage_table.OBSERVATION_PERIOD_END_DATE >= reference_column - ) - return coverage_table - - - def _filter_coverage_period(self, coverage_table: Table) -> Table: - if self.min_days.operator == '>': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] > self.min_days.value) - ) - elif self.min_days.operator == '>=': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] >= self.min_days.value) - ) - elif self.min_days.operator == '<': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] < self.min_days.value) - ) - elif self.min_days.operator == '<=': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] <= self.min_days.value) - ) - return coverage_table - - - def get_codelists(self): - return [] diff --git a/phenex/phenotypes/sex_phenotype.py b/phenex/phenotypes/sex_phenotype.py index 77b1204..15d0873 100644 --- a/phenex/phenotypes/sex_phenotype.py +++ b/phenex/phenotypes/sex_phenotype.py @@ -24,7 +24,7 @@ class SexPhenotype(Phenotype): def __init__( self, name: str = "sex", - allowed_values: List[str] = ["male", "female"], + allowed_values: Optional[List[str]] = None, domain: str = "PERSON", ): self.name = name @@ -37,7 +37,8 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: person_table = tables[self.domain] assert is_phenex_person_table(person_table) - sex_filter = CategoricalFilter(column_name="SEX", allowed_values=self.allowed_values) - filtered_table = sex_filter._filter(person_table) + if self.allowed_values is not None: + sex_filter = CategoricalFilter(column_name="SEX", allowed_values=self.allowed_values) + person_table = sex_filter._filter(person_table) - return filtered_table.mutate(VALUE=filtered_table.SEX, EVENT_DATE= ibis.null()) + return person_table.mutate(VALUE=person_table.SEX, EVENT_DATE= ibis.null()) diff --git a/phenex/test/phenotype_test_generator.py b/phenex/test/phenotype_test_generator.py index d846f53..989dc20 100644 --- a/phenex/test/phenotype_test_generator.py +++ b/phenex/test/phenotype_test_generator.py @@ -139,7 +139,7 @@ def df_from_test_info(test_info): if "date" in col.lower(): schema[col] = datetime.date elif "value" in col.lower(): - schema[col] = float + schema[col] = str if isinstance(df[col].iloc[0], str) else float elif "boolean" in col.lower(): schema[col] = bool else: diff --git a/phenex/test/phenotypes/test_continuous_coverage_phenotype.py b/phenex/test/phenotypes/test_continuous_coverage_phenotype.py deleted file mode 100644 index df7b62a..0000000 --- a/phenex/test/phenotypes/test_continuous_coverage_phenotype.py +++ /dev/null @@ -1,141 +0,0 @@ -import datetime, os -import pandas as pd - -from phenex.phenotypes.continuous_coverage_phenotype import ContinuousCoveragePhenotype -from phenex.codelists import LocalCSVCodelistFactory -from phenex.filters.date_range_filter import DateRangeFilter -from phenex.filters.relative_time_range_filter import RelativeTimeRangeFilter - -from phenex.test.phenotype_test_generator import PhenotypeTestGenerator -from phenex.filters.value import * - - - -class ContinuousCoveragePhenotypeTestGenerator(PhenotypeTestGenerator): - name_space = "continuouscoverage" - - def define_input_tables(self): - oneday = datetime.timedelta(days=1) - index_date = datetime.datetime.strptime("01-01-2022", "%m-%d-%Y") - - observation_period_min = 90 * oneday - possible_start_dates = [ - index_date - 4 * observation_period_min, - index_date - 2 * observation_period_min, - index_date - observation_period_min - oneday, - index_date - observation_period_min, - index_date - observation_period_min + oneday, - index_date, - index_date + oneday, - ] - - intervals = [ - observation_period_min, - observation_period_min - oneday, - observation_period_min + oneday, - 2 * observation_period_min, - ] - - start_dates = [] - end_dates = [] - for s in possible_start_dates: - for i in intervals: - start_dates.append(s) - end_dates.append(s + i) - - N = len(end_dates) - df_observation_period = pd.DataFrame() - df_observation_period["PERSON_ID"] = [ - f"P{x}" for x in list(range(N)) - ] - df_observation_period["INDEX_DATE"] = index_date - df_observation_period["observation_period_start_date"] = start_dates - df_observation_period["observation_period_end_date"] = end_dates - - - self.df_input = df_observation_period - input_info_observation_period = { - "name": "observation_period", - "df": df_observation_period, - } - - return [input_info_observation_period] - - def define_phenotype_tests(self): - t1 = { - "name": "coverage_min_geq_90", - "coverage_period_min": Value(value=90, operator=">="), - "persons": ["P7", "P10", "P11", "P12", "P14", "P15"], - } - t2 = { - "name": "coverage_min_gt_90", - "coverage_period_min": Value(value=90, operator=">"), - "persons": ["P7", "P10", "P11"], - } - test_infos = [t1, t2] - - for test_info in test_infos: - test_info["phenotype"] = ContinuousCoveragePhenotype( - name=test_info["name"], - domain="observation_period", - coverage_period_min=test_info.get("coverage_period_min"), - ) - test_info["refactor"] = True # TODO remove once refactored - - return test_infos - - -class ContinuousCoverageReturnLastPhenotypeTestGenerator( - ContinuousCoveragePhenotypeTestGenerator -): - name_space = "ccpt_returnlast" - - def define_phenotype_tests(self): - persons = ["P7", "P10", "P11", "P12", "P14", "P15"] - - t1 = { - "name": "coverage_min_geq_90", - "coverage_period_min": Value(value=90, operator=">="), - "persons": persons, - "dates": list( - self.df_input[self.df_input["PERSON_ID"].isin(persons)][ - "observation_period_end_date" - ].values - ), - } - - persons = ["P7", "P10", "P11"] - t2 = { - "name": "coverage_min_gt_90", - "coverage_period_min": Value(value=90, operator=">"), - "persons": ["P7", "P10", "P11"], - "dates": list( - self.df_input[self.df_input["PERSON_ID"].isin(persons)][ - "observation_period_end_date" - ].values - ), - } - test_infos = [t1, t2] - - for test_info in test_infos: - test_info["phenotype"] = ContinuousCoveragePhenotype( - name=test_info["name"], - domain="observation_period", - return_date="last", - coverage_period_min=test_info.get("coverage_period_min"), - ) - test_info["column_types"] = {f"{test_info['name']}_date": "date"} - - return test_infos - - -def test_continuous_coverage_phenotypes(): - spg = ContinuousCoveragePhenotypeTestGenerator() - spg.run_tests() - - spg = ContinuousCoverageReturnLastPhenotypeTestGenerator() - spg.run_tests() - - -if __name__ == "__main__": - test_continuous_coverage_phenotypes() diff --git a/phenex/test/phenotypes/test_sex_phenotype.py b/phenex/test/phenotypes/test_sex_phenotype.py index 385d089..a265ea9 100644 --- a/phenex/test/phenotypes/test_sex_phenotype.py +++ b/phenex/test/phenotypes/test_sex_phenotype.py @@ -29,7 +29,7 @@ def define_input_tables(self): column_types_person = {} input_info_person = { - "name": "person", + "name": "PERSON", "df": df_person, "column_types": column_types_person, } From ea67dea280a07d5f0d5ad2dd779cac56a030667b Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Tue, 19 Nov 2024 16:34:34 +0100 Subject: [PATCH 2/8] fix docs build --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index b23d0ab..d75f74d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -18,7 +18,7 @@ nav: - CodelistPhenotype: api/phenotypes/codelist_phenotype.md - MeasurementPhenotype: api/phenotypes/measurement_phenotype.md - AgePhenotype: api/phenotypes/age_phenotype.md - - SexPhenotype: api/phenotypes/sec_phenotype.md + - SexPhenotype: api/phenotypes/sex_phenotype.md - DeathPhenotype: api/phenotypes/death_phenotype.md - AgePhenotype: api/phenotypes/age_phenotype.md - ArithmeticPhenotype: api/phenotypes/arithmetic_phenotype.md From 292353fbf187e10445b66d89fe0877c5ae48a768 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Tue, 19 Nov 2024 16:38:20 +0100 Subject: [PATCH 3/8] fix docs build --- docs/api/phenotypes/continuous_coverage_phenotype.md | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 docs/api/phenotypes/continuous_coverage_phenotype.md diff --git a/docs/api/phenotypes/continuous_coverage_phenotype.md b/docs/api/phenotypes/continuous_coverage_phenotype.md deleted file mode 100644 index a5b619f..0000000 --- a/docs/api/phenotypes/continuous_coverage_phenotype.md +++ /dev/null @@ -1,3 +0,0 @@ -# ContinuousCoveragePhenotype - -::: phenex.phenotypes.continuous_coverage_phenotype From ed1e67ebcff957bf4ac307bab830686c347256c4 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Wed, 20 Nov 2024 09:26:00 +0100 Subject: [PATCH 4/8] docstrings for ibis connect --- mkdocs.yml | 1 + phenex/ibis_connect.py | 81 ++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index d75f74d..549a8c1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,6 +27,7 @@ nav: - Cohort: api/phenotypes/cohort.md - Codelists: - Codelist: api/codelists/codelists.md + - Connectors: api/ibis_connect.md - License: LICENSE.md plugins: diff --git a/phenex/ibis_connect.py b/phenex/ibis_connect.py index f364973..c48c49b 100644 --- a/phenex/ibis_connect.py +++ b/phenex/ibis_connect.py @@ -1,10 +1,20 @@ +from typing import Optional import os import ibis from ibis.backends import BaseBackend # Snowflake connection function -def check_env_vars(*vars): +def _check_env_vars(*vars: str) -> None: + """ + Check if the required environment variables are set. + + Args: + *vars: Variable length argument list of environment variable names. + + Raises: + EnvironmentError: If any of the required environment variables are missing. + """ missing_vars = [var for var in vars if os.getenv(var) is None] if missing_vars: raise EnvironmentError( @@ -12,7 +22,33 @@ def check_env_vars(*vars): ) -def ibis_snowflake_connect() -> BaseBackend: +def ibis_snowflake_connect( + SNOWFLAKE_USER: Optional[str] = None, + SNOWFLAKE_ACCOUNT: Optional[str] = None, + SNOWFLAKE_WAREHOUSE: Optional[str] = None, + SNOWFLAKE_DATABASE: Optional[str] = None, + SNOWFLAKE_SCHEMA: Optional[str] = None, + SNOWFLAKE_ROLE: Optional[str] = None, + SNOWFLAKE_PASSWORD: Optional[str] = None) -> BaseBackend: + """ + Establish a connection to Snowflake using Ibis. Variables for the connection can + be passed either via this function call or as environment variables of the same name. + All arguments are required to be specified by one of these two methods except + SNOWFLAKE_PASSWORD. + + + Args: + SNOWFLAKE_USER: Snowflake user name. + SNOWFLAKE_ACCOUNT: Snowflake account identifier. + SNOWFLAKE_WAREHOUSE: Snowflake warehouse name. + SNOWFLAKE_DATABASE: Snowflake database name. + SNOWFLAKE_SCHEMA : Snowflake schema name. + SNOWFLAKE_ROLE: Snowflake role name. + SNOWFLAKE_PASSWORD: Snowflake password. If not specified, will attempt to authenticate with externalbrowser. + + Returns: + BaseBackend: An Ibis backend connection to Snowflake. + """ required_vars = [ "SNOWFLAKE_USER", "SNOWFLAKE_ACCOUNT", @@ -21,32 +57,39 @@ def ibis_snowflake_connect() -> BaseBackend: "SNOWFLAKE_SCHEMA", "SNOWFLAKE_ROLE", ] - check_env_vars(*required_vars) + _check_env_vars(*required_vars) if "SNOWFLAKE_PASSWORD" in os.environ: return ibis.snowflake.connect( - user=os.getenv("SNOWFLAKE_USER"), - password=os.getenv("SNOWFLAKE_PASSWORD"), - account=os.getenv("SNOWFLAKE_ACCOUNT"), - warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"), - database=os.getenv("SNOWFLAKE_DATABASE"), - role=os.getenv("SNOWFLAKE_ROLE"), - schema=os.getenv("SNOWFLAKE_SCHEMA"), + user=os.getenv("SNOWFLAKE_USER", SNOWFLAKE_USER), + password=os.getenv("SNOWFLAKE_PASSWORD", SNOWFLAKE_PASSWORD), + account=os.getenv("SNOWFLAKE_ACCOUNT", SNOWFLAKE_ACCOUNT), + warehouse=os.getenv("SNOWFLAKE_WAREHOUSE", SNOWFLAKE_WAREHOUSE), + database=os.getenv("SNOWFLAKE_DATABASE", SNOWFLAKE_DATABASE), + role=os.getenv("SNOWFLAKE_ROLE", SNOWFLAKE_ROLE), + schema=os.getenv("SNOWFLAKE_SCHEMA", SNOWFLAKE_SCHEMA), ) else: return ibis.snowflake.connect( - user=os.getenv("SNOWFLAKE_USER"), + user=os.getenv("SNOWFLAKE_USER", SNOWFLAKE_USER), authenticator="externalbrowser", - account=os.getenv("SNOWFLAKE_ACCOUNT"), - warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"), - database=os.getenv("SNOWFLAKE_DATABASE"), - role=os.getenv("SNOWFLAKE_ROLE"), - schema=os.getenv("SNOWFLAKE_SCHEMA"), + account=os.getenv("SNOWFLAKE_ACCOUNT", SNOWFLAKE_ACCOUNT), + warehouse=os.getenv("SNOWFLAKE_WAREHOUSE", SNOWFLAKE_WAREHOUSE), + database=os.getenv("SNOWFLAKE_DATABASE", SNOWFLAKE_DATABASE), + role=os.getenv("SNOWFLAKE_ROLE", SNOWFLAKE_ROLE), + schema=os.getenv("SNOWFLAKE_SCHEMA", SNOWFLAKE_SCHEMA), ) # DuckDB connection function -def ibis_duckdb_connect() -> BaseBackend: +def ibis_duckdb_connect(DUCKDB_PATH: Optional[str] = ":memory") -> BaseBackend: + """ + Establish a connection to DuckDB using Ibis. Variables for the connection can + be passed either via this function call or as environment variables of the same name. + + Returns: + BaseBackend: An Ibis backend connection to DuckDB. + """ required_vars = ["DUCKDB_PATH"] - check_env_vars(*required_vars) + _check_env_vars(*required_vars) - return ibis.connect(backend="duckdb", path=os.getenv("DUCKDB_PATH", ":memory:")) + return ibis.connect(backend="duckdb", path=os.getenv("DUCKDB_PATH", DUCKDB_PATH)) From d29801a0e35e94812fb1c6967f0a9dc70e03fcf4 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Wed, 20 Nov 2024 10:47:34 +0100 Subject: [PATCH 5/8] documentation --- docs/api/ibis_connect.md | 3 ++ phenex/codelists/codelists.py | 96 +++++++++++++++++++++++++++++------ 2 files changed, 84 insertions(+), 15 deletions(-) create mode 100644 docs/api/ibis_connect.md diff --git a/docs/api/ibis_connect.md b/docs/api/ibis_connect.md new file mode 100644 index 0000000..e8b3101 --- /dev/null +++ b/docs/api/ibis_connect.md @@ -0,0 +1,3 @@ +# Ibis Connectors + +::: phenex.ibis_connect diff --git a/phenex/codelists/codelists.py b/phenex/codelists/codelists.py index 705a9ed..648e62f 100644 --- a/phenex/codelists/codelists.py +++ b/phenex/codelists/codelists.py @@ -7,27 +7,36 @@ class Codelist: """ A Codelist has two fields: - :param name: Descriptive name of codelist - :param codelist: User can enter codelists as either a string, a list of strings + Parameters: + name: Descriptive name of codelist + codelist: User can enter codelists as either a string, a list of strings or a dictionary keyed by code type. In first two cases, the class will convert the input to a dictionary with a single key None. All consumers of the Codelist instance can then assume the codelist in that format. + Example: + ```python # Initialize with a list - >> cl = Codelist( + cl = Codelist( ['x', 'y', 'z'], 'mycodelist' ) - >> print(cl.codelist) + print(cl.codelist) {None: ['x', 'y', 'z']} + ``` + Example: + ```python # Initialize with string - >> cl = Codelist( + cl = Codelist( 'SBP' ) - >> print(cl.codelist) + print(cl.codelist) {None: ['SBP']} - + ``` + + Example: + ```python # Initialize with a dictionary >> atrial_fibrillation_icd_codes = { "ICD-9": [ @@ -40,11 +49,11 @@ class Codelist: "I48.91", # Unspecified atrial fibrillation ] } - >> cl = Codelist( + cl = Codelist( atrial_fibrillation_icd_codes, 'atrial_fibrillation', ) - >> print(cl.codelist) + print(cl.codelist) { "ICD-9": [ "427.31" # Atrial fibrillation @@ -56,6 +65,7 @@ class Codelist: "I48.91", # Unspecified atrial fibrillation ] } + ``` """ def __init__( @@ -77,6 +87,26 @@ def __init__( def from_yaml(cls, path: str) -> "Codelist": """ Load a codelist from a yaml file. + + The YAML file should contain a dictionary where the keys are code types + (e.g., "ICD-9", "ICD-10") and the values are lists of codes for each type. + + Example: + ```yaml + ICD-9: + - "427.31" # Atrial fibrillation + ICD-10: + - "I48.0" # Paroxysmal atrial fibrillation + - "I48.1" # Persistent atrial fibrillation + - "I48.2" # Chronic atrial fibrillation + - "I48.91" # Unspecified atrial fibrillation + ``` + + Parameters: + path: Path to the YAML file. + + Returns: + Codelist instance. """ import yaml @@ -99,13 +129,49 @@ def from_excel( """ Load a codelist from an Excel file. + The Excel file should contain columns for code types, codes, and optionally + codelist names. Each row represents a code entry. + + The codelists can be in one sheet or spread across multiple sheets: + + 1. Single Sheet: + If all codelists are in one sheet, the sheet should have a column for codelist names. + Use codelist_name to point to the specific codelist of interest. + + Example: + ```markdown + | code_type | code | codelist | + |-----------|--------|--------------------| + | ICD-9 | 427.31 | atrial_fibrillation| + | ICD-10 | I48.0 | atrial_fibrillation| + | ICD-10 | I48.1 | atrial_fibrillation| + | ICD-10 | I48.2 | atrial_fibrillation| + | ICD-10 | I48.91 | atrial_fibrillation| + ``` + + 2. Multiple Sheets: + If codelists are spread across multiple sheets, each sheet should represent a single codelist. + Example: + ```markdown + | code_type | code | + |-----------|--------| + | ICD-9 | 427.31 | + | ICD-10 | I48.0 | + | ICD-10 | I48.1 | + | ICD-10 | I48.2 | + | ICD-10 | I48.91 | + ``` + Parameters: - path: path to the excel file. - sheet_name: an optional label for the sheet to read from. If defined, the codelist will be taken from that sheet. If no sheet_name is defined, the first sheet is taken. - codelist_name: an optional name of the codelist which to extract. If defined, codelist_column must be present and the codelist_name must occur within the codelist_column. - code_column: the name of the column containing the codes. - code_type_column: the name of the column containing the code types. - codelist_column: the name of the column containing the codelist names. + path: Path to the Excel file. + sheet_name: An optional label for the sheet to read from. If defined, the codelist will be taken from that sheet. If no sheet_name is defined, the first sheet is taken. + codelist_name: An optional name of the codelist which to extract. If defined, codelist_column must be present and the codelist_name must occur within the codelist_column. + code_column: The name of the column containing the codes. + code_type_column: The name of the column containing the code types. + codelist_column: The name of the column containing the codelist names. + + Returns: + Codelist instance. """ import pandas as pd From 22cdf62250c4a2f429e86f15b510f02dc532c8c2 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Wed, 20 Nov 2024 10:51:41 +0100 Subject: [PATCH 6/8] documentation --- phenex/codelists/codelists.py | 1 + 1 file changed, 1 insertion(+) diff --git a/phenex/codelists/codelists.py b/phenex/codelists/codelists.py index 648e62f..d0ca08b 100644 --- a/phenex/codelists/codelists.py +++ b/phenex/codelists/codelists.py @@ -151,6 +151,7 @@ def from_excel( 2. Multiple Sheets: If codelists are spread across multiple sheets, each sheet should represent a single codelist. + Use sheet_name to point to the specific codelist of interest. Example: ```markdown | code_type | code | From 0febb905bc593074ec47f3c96ce670fcfd20e454 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Wed, 20 Nov 2024 10:52:01 +0100 Subject: [PATCH 7/8] documentation --- phenex/codelists/codelists.py | 1 + 1 file changed, 1 insertion(+) diff --git a/phenex/codelists/codelists.py b/phenex/codelists/codelists.py index d0ca08b..5a1e276 100644 --- a/phenex/codelists/codelists.py +++ b/phenex/codelists/codelists.py @@ -152,6 +152,7 @@ def from_excel( 2. Multiple Sheets: If codelists are spread across multiple sheets, each sheet should represent a single codelist. Use sheet_name to point to the specific codelist of interest. + Example: ```markdown | code_type | code | From 843be1a3f6f1a648151cbf346a8d0e0fad6fedfc Mon Sep 17 00:00:00 2001 From: a-hartens Date: Wed, 20 Nov 2024 14:12:49 +0100 Subject: [PATCH 8/8] updated docstrings --- .../codelists/local_csv_codelist_factory.md | 3 + mkdocs.yml | 1 + phenex/codelists/codelists.py | 114 +++++++++--------- phenex/filters/aggregator.py | 7 +- phenex/filters/categorical_filter.py | 9 +- phenex/ibis_connect.py | 15 +-- phenex/mappers.py | 33 +++-- phenex/phenotypes/categorical_phenotype.py | 9 +- phenex/phenotypes/death_phenotype.py | 13 +- phenex/phenotypes/sex_phenotype.py | 7 +- .../test/phenotypes/test_death_phenotype.py | 68 +++-------- phenex/test/phenotypes/test_sex_phenotype.py | 1 + 12 files changed, 144 insertions(+), 136 deletions(-) create mode 100644 docs/api/codelists/local_csv_codelist_factory.md diff --git a/docs/api/codelists/local_csv_codelist_factory.md b/docs/api/codelists/local_csv_codelist_factory.md new file mode 100644 index 0000000..af90d35 --- /dev/null +++ b/docs/api/codelists/local_csv_codelist_factory.md @@ -0,0 +1,3 @@ +# LocalCSVCodelistFactory + +::: phenex.codelists.codelists.LocalCSVCodelistFactory diff --git a/mkdocs.yml b/mkdocs.yml index 549a8c1..e79ba95 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,6 +27,7 @@ nav: - Cohort: api/phenotypes/cohort.md - Codelists: - Codelist: api/codelists/codelists.md + - LocalCSVCodelistFactory: api/codelists/local_csv_codelist_factory.md - Connectors: api/ibis_connect.md - License: LICENSE.md diff --git a/phenex/codelists/codelists.py b/phenex/codelists/codelists.py index 5a1e276..23a1c92 100644 --- a/phenex/codelists/codelists.py +++ b/phenex/codelists/codelists.py @@ -5,16 +5,15 @@ class Codelist: """ - A Codelist has two fields: + Codelist is a class that allows us to conveniently work with medical codes used in RWD analyses. A Codelist represents a (single) specific medical concept, such as 'atrial fibrillation' or 'myocardial infarction'. A Codelist is associated with a set of medical codes from one or multiple source vocabularies (such as ICD10CM or CPT); we call these vocabularies 'code types'. Code type is important, as there are no assurances that codes from different vocabularies (different code types) do not overlap. It is therefore highly recommended to always specify the code type when using a codelist. + + Codelist is a simple class that stores the codelist as a dictionary. The dictionary is keyed by code type and the value is a list of codes. Codelist also has various convenience methods such as read from excel, csv or yaml files, and export to excel files. Parameters: - name: Descriptive name of codelist - codelist: User can enter codelists as either a string, a list of strings - or a dictionary keyed by code type. In first two cases, the class will convert - the input to a dictionary with a single key None. All consumers of the Codelist - instance can then assume the codelist in that format. + name: Descriptive name of codelist + codelist: User can enter codelists as either a string, a list of strings or a dictionary keyed by code type. In first two cases, the class will convert the input to a dictionary with a single key None. All consumers of the Codelist instance can then assume the codelist in that format. - Example: + Example: ```python # Initialize with a list cl = Codelist( @@ -34,7 +33,7 @@ class Codelist: print(cl.codelist) {None: ['SBP']} ``` - + Example: ```python # Initialize with a dictionary @@ -127,53 +126,50 @@ def from_excel( codelist_column: Optional[str] = "codelist", ) -> "Codelist": """ - Load a codelist from an Excel file. + Load a single codelist located in an Excel file. - The Excel file should contain columns for code types, codes, and optionally - codelist names. Each row represents a code entry. + It is required that the Excel file contains a minimum of two columns for code and code_type. The actual columnnames can be specified using the code_column and code_type_column parameters. - The codelists can be in one sheet or spread across multiple sheets: + If multiple codelists exist in the same excel table, the codelist_column and codelist_name are required to point to the specific codelist of interest. - 1. Single Sheet: - If all codelists are in one sheet, the sheet should have a column for codelist names. - Use codelist_name to point to the specific codelist of interest. + It is possible to specify the sheet name if the codelist is in a specific sheet. - Example: - ```markdown - | code_type | code | codelist | - |-----------|--------|--------------------| - | ICD-9 | 427.31 | atrial_fibrillation| - | ICD-10 | I48.0 | atrial_fibrillation| - | ICD-10 | I48.1 | atrial_fibrillation| - | ICD-10 | I48.2 | atrial_fibrillation| - | ICD-10 | I48.91 | atrial_fibrillation| - ``` - - 2. Multiple Sheets: - If codelists are spread across multiple sheets, each sheet should represent a single codelist. - Use sheet_name to point to the specific codelist of interest. - - Example: - ```markdown - | code_type | code | - |-----------|--------| - | ICD-9 | 427.31 | - | ICD-10 | I48.0 | - | ICD-10 | I48.1 | - | ICD-10 | I48.2 | - | ICD-10 | I48.91 | - ``` - - Parameters: - path: Path to the Excel file. - sheet_name: An optional label for the sheet to read from. If defined, the codelist will be taken from that sheet. If no sheet_name is defined, the first sheet is taken. - codelist_name: An optional name of the codelist which to extract. If defined, codelist_column must be present and the codelist_name must occur within the codelist_column. - code_column: The name of the column containing the codes. - code_type_column: The name of the column containing the code types. - codelist_column: The name of the column containing the codelist names. + 1. Single table, single codelist : The table (whether an entire excel file, or a single sheet in an excel file) contains only one codelist. The table should have columns for code and code_type. - Returns: - Codelist instance. + ```markdown + | code_type | code | + |-----------|--------| + | ICD-9 | 427.31 | + | ICD-10 | I48.0 | + | ICD-10 | I48.1 | + | ICD-10 | I48.2 | + | ICD-10 | I48.91 | + ``` + + 2. Single table, multiple codelists: A single table (whether an entire file, or a single sheet in an excel file) contains multiple codelists. A column for the name of each codelist is required. Use codelist_name to point to the specific codelist of interest. + + ```markdown + | code_type | code | codelist | + |-----------|--------|--------------------| + | ICD-9 | 427.31 | atrial_fibrillation| + | ICD-10 | I48.0 | atrial_fibrillation| + | ICD-10 | I48.1 | atrial_fibrillation| + | ICD-10 | I48.2 | atrial_fibrillation| + | ICD-10 | I48.91 | atrial_fibrillation| + ``` + + + + Parameters: + path: Path to the Excel file. + sheet_name: An optional label for the sheet to read from. If defined, the codelist will be taken from that sheet. If no sheet_name is defined, the first sheet is taken. + codelist_name: An optional name of the codelist which to extract. If defined, codelist_column must be present and the codelist_name must occur within the codelist_column. + code_column: The name of the column containing the codes. + code_type_column: The name of the column containing the code types. + codelist_column: The name of the column containing the codelist names. + + Returns: + Codelist instance. """ import pandas as pd @@ -220,17 +216,20 @@ def __repr__(self): def to_pandas(self) -> pd.DataFrame: """ - Convert the codelist to a pandas DataFrame. + Export the codelist to a pandas DataFrame. The DataFrame will have three columns: code_type, code, and codelist. """ _df = pd.DataFrame(self.to_tuples(), columns=["code_type", "code"]) - _df['codelist'] = self.name + _df["codelist"] = self.name return _df - class LocalCSVCodelistFactory: - """ """ + """ + LocalCSVCodelistFactory allows for the creation of multiple codelists from a single CSV file. Use this class when you have a single CSV file that contains multiple codelists. + + To use, create an instance of the class and then call the `create_codelist` method with the name of the codelist you want to create; this codelist name must be an entry in the name_code_type_column. + """ def __init__( self, @@ -239,6 +238,13 @@ def __init__( name_codelist_column: str = "codelist", name_code_type_column: str = "code_type", ) -> None: + """ + Parameters: + path: Path to the CSV file. + name_code_column: The name of the column containing the codes. + name_codelist_column: The name of the column containing the codelist names. + name_code_type_column: The name of the column containing the code types. + """ self.path = path self.name_code_column = name_code_column self.name_codelist_column = name_codelist_column diff --git a/phenex/filters/aggregator.py b/phenex/filters/aggregator.py index 2ca75d0..d6f88c3 100644 --- a/phenex/filters/aggregator.py +++ b/phenex/filters/aggregator.py @@ -8,7 +8,7 @@ def __init__( aggregation_index=["PERSON_ID"], aggregation_function="sum", event_date_column="EVENT_DATE", - reduce=False + reduce=False, ): self.aggregation_index = aggregation_index self.aggregation_function = aggregation_function @@ -40,7 +40,9 @@ def aggregate(self, input_table: Table): input_table = input_table.mutate(aggregated_date=aggregated_date) # Filter rows where the original date matches the aggregated date - input_table = input_table.filter(input_table[self.event_date_column] == input_table.aggregated_date) + input_table = input_table.filter( + input_table[self.event_date_column] == input_table.aggregated_date + ) # Select the necessary columns @@ -52,6 +54,7 @@ def aggregate(self, input_table: Table): return input_table + class Nearest(VerticalDateAggregator): def __init__(self, **kwargs): super().__init__(aggregation_function="max", **kwargs) diff --git a/phenex/filters/categorical_filter.py b/phenex/filters/categorical_filter.py index f44039a..a92eda2 100644 --- a/phenex/filters/categorical_filter.py +++ b/phenex/filters/categorical_filter.py @@ -2,6 +2,7 @@ from typing import List, Optional, Union from ibis.expr.types.relations import Table + class CategoricalFilter(Filter): """ This class filters events in an EventTable based on specified categorical values @@ -19,10 +20,10 @@ class CategoricalFilter(Filter): """ def __init__( - self, - column_name: str, - allowed_values: List[Union[str, int]], - domain: Optional[str] = None + self, + column_name: str, + allowed_values: List[Union[str, int]], + domain: Optional[str] = None, ): self.column_name = column_name self.allowed_values = allowed_values diff --git a/phenex/ibis_connect.py b/phenex/ibis_connect.py index c48c49b..98080e8 100644 --- a/phenex/ibis_connect.py +++ b/phenex/ibis_connect.py @@ -23,13 +23,14 @@ def _check_env_vars(*vars: str) -> None: def ibis_snowflake_connect( - SNOWFLAKE_USER: Optional[str] = None, - SNOWFLAKE_ACCOUNT: Optional[str] = None, - SNOWFLAKE_WAREHOUSE: Optional[str] = None, - SNOWFLAKE_DATABASE: Optional[str] = None, - SNOWFLAKE_SCHEMA: Optional[str] = None, - SNOWFLAKE_ROLE: Optional[str] = None, - SNOWFLAKE_PASSWORD: Optional[str] = None) -> BaseBackend: + SNOWFLAKE_USER: Optional[str] = None, + SNOWFLAKE_ACCOUNT: Optional[str] = None, + SNOWFLAKE_WAREHOUSE: Optional[str] = None, + SNOWFLAKE_DATABASE: Optional[str] = None, + SNOWFLAKE_SCHEMA: Optional[str] = None, + SNOWFLAKE_ROLE: Optional[str] = None, + SNOWFLAKE_PASSWORD: Optional[str] = None, +) -> BaseBackend: """ Establish a connection to Snowflake using Ibis. Variables for the connection can be passed either via this function call or as environment variables of the same name. diff --git a/phenex/mappers.py b/phenex/mappers.py index db7314a..7e8aeca 100644 --- a/phenex/mappers.py +++ b/phenex/mappers.py @@ -58,7 +58,13 @@ def rename(self, table: Table) -> Table: mapping = copy.deepcopy(asdict(self)) mapping.pop("NAME_TABLE") # delete optional params from mapping - for key in ["DATE_OF_BIRTH", "DATE_OF_DEATH", "YEAR_OF_BIRTH", "SEX", "ETHNICITY"]: + for key in [ + "DATE_OF_BIRTH", + "DATE_OF_DEATH", + "YEAR_OF_BIRTH", + "SEX", + "ETHNICITY", + ]: if getattr(self, key) is None: del mapping[key] return table.rename(**mapping) @@ -118,6 +124,7 @@ class MeasurementTableColumnMapper(CodeTableColumnMapper): VALUE: str = "VALUE" + @dataclass class ObservationPeriodTableMapper: NAME_TABLE: str = "OBSERVATION_PERIOD" @@ -139,26 +146,30 @@ def rename(self, table: Table) -> Table: mapping.pop("NAME_TABLE") return table.rename(**mapping) + # # OMOP Column Mappers # OMOPPersonTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", YEAR_OF_BIRTH="YEAR_OF_BIRTH", - SEX="GENDER_CONCEPT_ID", ETHNICITY="ETHNICITY_CONCEPT_ID" + SEX="GENDER_CONCEPT_ID", + ETHNICITY="ETHNICITY_CONCEPT_ID", ) OMOPDeathTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="DEATH", PERSON_ID="PERSON_ID", - DATE_OF_DEATH="DEATH_DATE" + NAME_TABLE="DEATH", PERSON_ID="PERSON_ID", DATE_OF_DEATH="DEATH_DATE" ) OMOPPersonTableSourceColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", YEAR_OF_BIRTH="YEAR_OF_BIRTH", - SEX="GENDER_SOURCE_VALUE", ETHNICITY="ETHNICITY_SOURCE_VALUE" + SEX="GENDER_SOURCE_VALUE", + ETHNICITY="ETHNICITY_SOURCE_VALUE", ) OMOPConditionOccurrenceColumnMapper = CodeTableColumnMapper( @@ -223,12 +234,14 @@ def rename(self, table: Table) -> Table: OMOPDomains = DomainsDictionary(**OMOPColumnMappers) - # # Vera Column Mappers # VeraPersonTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", DATE_OF_DEATH="DEATH_DATETIME" + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", + DATE_OF_BIRTH="BIRTH_DATETIME", + DATE_OF_DEATH="DEATH_DATETIME", ) VeraConditionOccurrenceColumnMapper = CodeTableColumnMapper( @@ -268,4 +281,4 @@ def rename(self, table: Table) -> Table: # # Domains # -VeraDomains = DomainsDictionary(**VeraColumnMappers) \ No newline at end of file +VeraDomains = DomainsDictionary(**VeraColumnMappers) diff --git a/phenex/phenotypes/categorical_phenotype.py b/phenex/phenotypes/categorical_phenotype.py index fcbc3ae..d74f625 100644 --- a/phenex/phenotypes/categorical_phenotype.py +++ b/phenex/phenotypes/categorical_phenotype.py @@ -29,12 +29,13 @@ class HospitalizationPhenotype(Phenotype): _execute(tables: Dict[str, Table]) -> PhenotypeTable: Executes the filtering process on the provided tables and returns the filtered phenotype table. """ + def __init__( self, domain, column_name: str, allowed_values: List[str], - name = None, + name=None, date_range: DateRangeFilter = None, relative_time_range: Union[ RelativeTimeRangeFilter, List[RelativeTimeRangeFilter] @@ -43,7 +44,9 @@ def __init__( ): super(HospitalizationPhenotype, self).__init__() - self.categorical_filter = CategoricalFilter(column_name=column_name, allowed_values=allowed_values) + self.categorical_filter = CategoricalFilter( + column_name=column_name, allowed_values=allowed_values + ) self.name = name self.date_range = date_range self.return_date = return_date @@ -93,4 +96,4 @@ def _perform_date_selection(self, code_table): aggregator = Last() else: raise ValueError(f"Unknown return_date: {self.return_date}") - return aggregator.aggregate(code_table) \ No newline at end of file + return aggregator.aggregate(code_table) diff --git a/phenex/phenotypes/death_phenotype.py b/phenex/phenotypes/death_phenotype.py index a4a2fed..a8020b2 100644 --- a/phenex/phenotypes/death_phenotype.py +++ b/phenex/phenotypes/death_phenotype.py @@ -3,7 +3,8 @@ from ibis.expr.types.relations import Table from phenex.phenotypes.phenotype import Phenotype from phenex.tables import PhenotypeTable, is_phenex_person_table -import ibis +import ibis + class DeathPhenotype(Phenotype): """ @@ -20,10 +21,14 @@ class DeathPhenotype(Phenotype): Executes the phenotype calculation and returns a table with the filtered individuals. """ - def __init__(self, name: str = "death", domain: str = "PERSON", + def __init__( + self, + name: str = "death", + domain: str = "PERSON", relative_time_range: Union[ RelativeTimeRangeFilter, List[RelativeTimeRangeFilter] - ] = None): + ] = None, + ): self.name = name self.domain = domain self.children = [] @@ -46,4 +51,4 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: for rtr in self.relative_time_range: death_table = rtr.filter(death_table) death_table = death_table.mutate(VALUE=ibis.null()) - return death_table.mutate(EVENT_DATE=death_table.DATE_OF_DEATH) \ No newline at end of file + return death_table.mutate(EVENT_DATE=death_table.DATE_OF_DEATH) diff --git a/phenex/phenotypes/sex_phenotype.py b/phenex/phenotypes/sex_phenotype.py index 15d0873..5544302 100644 --- a/phenex/phenotypes/sex_phenotype.py +++ b/phenex/phenotypes/sex_phenotype.py @@ -5,6 +5,7 @@ from phenex.filters.categorical_filter import CategoricalFilter from phenex.tables import PhenotypeTable, is_phenex_person_table + class SexPhenotype(Phenotype): """ SexPhenotype is a class that represents a sex-based phenotype. It filters individuals @@ -38,7 +39,9 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: assert is_phenex_person_table(person_table) if self.allowed_values is not None: - sex_filter = CategoricalFilter(column_name="SEX", allowed_values=self.allowed_values) + sex_filter = CategoricalFilter( + column_name="SEX", allowed_values=self.allowed_values + ) person_table = sex_filter._filter(person_table) - return person_table.mutate(VALUE=person_table.SEX, EVENT_DATE= ibis.null()) + return person_table.mutate(VALUE=person_table.SEX, EVENT_DATE=ibis.null()) diff --git a/phenex/test/phenotypes/test_death_phenotype.py b/phenex/test/phenotypes/test_death_phenotype.py index 4550404..d082d6f 100644 --- a/phenex/test/phenotypes/test_death_phenotype.py +++ b/phenex/test/phenotypes/test_death_phenotype.py @@ -44,15 +44,11 @@ def define_phenotype_tests(self): idx_persons = [1, 2, 3, 4, 5] t1 = { "name": "death_prior_including_index", - "time_range_filter": RelativeTimeRangeFilter( - when="before" - ), + "time_range_filter": RelativeTimeRangeFilter(when="before"), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -61,15 +57,12 @@ def define_phenotype_tests(self): t2 = { "name": "death_prior_index", "time_range_filter": RelativeTimeRangeFilter( - when="before", - min_days=GreaterThan(0) + when="before", min_days=GreaterThan(0) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -78,15 +71,12 @@ def define_phenotype_tests(self): t3 = { "name": "death_prior_including_index_max20", "time_range_filter": RelativeTimeRangeFilter( - when="before", - max_days=Value("<=", 30) + when="before", max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -95,16 +85,12 @@ def define_phenotype_tests(self): t4 = { "name": "death_prior_index_max_20", "time_range_filter": RelativeTimeRangeFilter( - when="before", - min_days=GreaterThan(0), - max_days=Value("<=", 30) + when="before", min_days=GreaterThan(0), max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -112,15 +98,11 @@ def define_phenotype_tests(self): idx_persons = [1, 5, 6, 7, 8] t5 = { "name": "death_post_including_index", - "time_range_filter": RelativeTimeRangeFilter( - when="after" - ), + "time_range_filter": RelativeTimeRangeFilter(when="after"), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -129,15 +111,12 @@ def define_phenotype_tests(self): t6 = { "name": "death_post_index", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=GreaterThan(0) + when="after", min_days=GreaterThan(0) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -146,15 +125,12 @@ def define_phenotype_tests(self): t7 = { "name": "death_post_including_index_max20", "time_range_filter": RelativeTimeRangeFilter( - when="after", - max_days=Value("<=", 30) + when="after", max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -162,16 +138,12 @@ def define_phenotype_tests(self): t8 = { "name": "death_post_index_max_20", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=GreaterThan(0), - max_days=Value("<=", 30) + when="after", min_days=GreaterThan(0), max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -179,16 +151,12 @@ def define_phenotype_tests(self): t9 = { "name": "death_post_min_30_max_50", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=Value(">", 30), - max_days=Value("<=", 50) + when="after", min_days=Value(">", 30), max_days=Value("<=", 50) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } diff --git a/phenex/test/phenotypes/test_sex_phenotype.py b/phenex/test/phenotypes/test_sex_phenotype.py index a265ea9..951546e 100644 --- a/phenex/test/phenotypes/test_sex_phenotype.py +++ b/phenex/test/phenotypes/test_sex_phenotype.py @@ -9,6 +9,7 @@ from phenex.test.phenotype_test_generator import PhenotypeTestGenerator from phenex.filters.value import * + class SexPhenotypeTestGenerator(PhenotypeTestGenerator): name_space = "sex_phenotype"