Skip to content

Commit

Permalink
clean up, comments, edit make
Browse files Browse the repository at this point in the history
  • Loading branch information
leokim-l committed Sep 18, 2024
1 parent 653291d commit 98e05b7
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 5 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pytest:

test-docs:
$(DOCTEST) src/malco/*.py src/malco/*/*.py
$(DOCTEST) src/malco/runner.py src/malco/run/*.py src/malco/prepare/*.py src/malco/post_process/*.py

%-doctest: %
$(DOCTEST) $<
1 change: 1 addition & 0 deletions src/malco/analysis/disease_avail_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# (1) Parse out disease genes discovered after 2008/9 (First thing in HPOA)
# Look for a correlation between date annotated and disease correctly diagnosed.
# Hypothesis: the older the easier to diagnose
# PNR suggests: for each ppkt we have a date
# (2) To start, looking at the two broad categories found/not-found, count average number of all links
# After that, count average number of links of some kind
# Then, something more graphy, such as, centrality? Maybe need to project out something first to find signal in the noise...
Expand Down
5 changes: 3 additions & 2 deletions src/malco/prepare/setup_run_pars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import csv
import sys

def import_inputdata(self):
"""Example inputfile is located in input_dir and named run_parameters.csv
def import_inputdata(self) -> None:
"""
Example input file is located in ``self.input_dir`` and named run_parameters.csv
It should contain something like:
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"en"
Expand Down
20 changes: 18 additions & 2 deletions src/malco/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,25 @@
from malco.run.search_ppkts import search_ppkts

def call_ontogpt(
lang, raw_results_dir, input_dir, model,
lang: str,
raw_results_dir: Path,
input_dir: Path,
model: str,
modality: typing.Literal['several_languages', 'several_models'],
):
)-> None:
"""
Wrapper used for parallel execution of ontogpt.
Args:
lang (str): Two-letter language code, for example "en" for English.
raw_results_dir (Path): Path to the raw results directory.
output_dir (Path): Path to the output directory.
model (str): Name of the model to be run, e.g. "gpt-4-turbo".
modality (str): Determines whether English and several models or gpt-4o and several languages are being run.
Returns:
None
"""
prompt_dir = f'{input_dir}/prompts/'
if modality == 'several_languages':
lang_or_model_dir = lang
Expand Down
6 changes: 5 additions & 1 deletion src/malco/run/search_ppkts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@
def search_ppkts(input_dir, prompt_dir, raw_results_dir, lang_or_model):
"""
Check what ppkts have already been computed in current output dir, for current run parameters.
ontogpt will run every .txt that is in inputdir, we need a tmp inputdir
excluding already run cases.
excluding already run cases. Source of truth is the results.yaml output by ontogpt.
Only extracted_object containing terms is considered successfully run.
Note that rerunning
"""

# List of "labels" that are already present in results.yaml iff terms is not None
Expand Down

0 comments on commit 98e05b7

Please sign in to comment.