-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* if run step is manually aborted it can be restarted without recomputing what has already been computed. Needs more testing and works only if post_process has not started yet, but seems to do the tick * added somewhat dangerous cleanup, does the job but make it safer * finished modularization in theory: only not run ppkts are run, ontogpt output is appended, the full dataframe save location has been moved, plots and data dir spearated. BUT all of this needs to be tested * lifted buggy is_file(), tested adding a model, then adding a phenopacket, in both cases successful run * added possibility of ppkts not being correctly run. If output file is present but empty, rerun it. Tested. * significant cleanup of run with Daniel and Peter, input is now manually given thru csv file, extended analysis significantly * added safe saving of dfs and some analysis scripts
- Loading branch information
Showing
12 changed files
with
373 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import pandas as pd | ||
from typing import List | ||
|
||
import pandas as pd | ||
import yaml | ||
#from malco.post_process.post_process_results_format import read_raw_result_yaml | ||
from pathlib import Path | ||
import sys | ||
|
||
def read_raw_result_yaml(raw_result_path: Path) -> List[dict]: | ||
""" | ||
Read the raw result file. | ||
Args: | ||
raw_result_path(Path): Path to the raw result file. | ||
Returns: | ||
dict: Contents of the raw result file. | ||
""" | ||
with open(raw_result_path, 'r') as raw_result: | ||
return list(yaml.safe_load_all(raw_result.read().replace(u'\x04',''))) # Load and convert to list | ||
|
||
unique_ppkts = {} | ||
#model=str(sys.argv[1]) | ||
models = ["gpt-3.5-turbo", "gpt-4-turbo", "gpt-4", "gpt-4o"] | ||
for model in models: | ||
print("==="*10, "\nEvaluating now: ", model, "\n"+"==="*10) | ||
|
||
yamlfile = f"out_openAI_models/raw_results/multimodel/{model}/results.yaml" | ||
all_results=read_raw_result_yaml(yamlfile) | ||
|
||
counter = 0 | ||
labelvec = [] | ||
|
||
# Cannot have further files in raw_result_path! | ||
for this_result in all_results: | ||
extracted_object = this_result.get("extracted_object") | ||
if extracted_object: | ||
label = extracted_object.get('label') | ||
labelvec.append(label) | ||
terms = extracted_object.get('terms') | ||
if terms: | ||
counter += 1 | ||
|
||
full_df_file = f"out_openAI_models/multimodel/{model}/results.tsv" | ||
df = pd.read_csv(full_df_file, sep='\t') | ||
num_ppkts = df['label'].nunique() | ||
unique_ppkts[model] = df['label'].unique() | ||
# The first should be equivalent to grepping "raw_" in some results.yaml | ||
print("The number of prompts that have something in results.yaml are: ", len(labelvec)) | ||
print("The number of prompts that have a non-empty differential (i.e. term is not None) is:", counter) | ||
print("The number of unique prompts/ppkts with a non-empty differential in results.tsv are:", num_ppkts, "\n") | ||
|
||
# This we know a posteriori, gpt-4o and gpt-4-turbo both have 5213 phenopackets | ||
# Thus, let's print out what is missing in the others | ||
for i in unique_ppkts["gpt-4-turbo"]: | ||
if i in unique_ppkts["gpt-4"]: | ||
continue | ||
else: | ||
print(f"Missing ppkt in gpt-4 is:\t", i) | ||
print("\n") | ||
|
||
for i in unique_ppkts["gpt-4-turbo"]: | ||
if i in unique_ppkts["gpt-3.5-turbo"]: | ||
continue | ||
else: | ||
print(f"Missing ppkt in gpt-3.5-turbo is:\t", i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# check shelved cache. Can maxsize be changed at a later point in time? | ||
|
||
from cachetools import LRUCache | ||
from cachetools.keys import hashkey | ||
from shelved_cache import PersistentCache | ||
|
||
file_name = "test_increasing_cache" | ||
|
||
pc = PersistentCache(LRUCache, file_name, maxsize=4096) | ||
|
||
pc["a"] = 42 | ||
|
||
pc.close() | ||
breakpoint() | ||
|
||
pc2 = PersistentCache(LRUCache, file_name, maxsize=16384) | ||
|
||
breakpoint() | ||
pc2.close() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import shutil | ||
import os | ||
import pandas as pd | ||
|
||
def safe_save_tsv(path, filename, df): | ||
full_path = path / filename | ||
# If full_path already exists, prepend "old_" | ||
# It's the user's responsibility to know only up to 2 versions can exist, then data is lost | ||
if os.path.isfile(full_path): | ||
old_full_path = path / ("old_" + filename) | ||
if os.path.isfile(old_full_path): | ||
os.remove(old_full_path) | ||
shutil.copy(full_path, old_full_path) | ||
os.remove(full_path) | ||
df.to_csv(full_path, sep='\t', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.