diff --git a/caches/cache_log.txt b/caches/cache_log.txt
new file mode 100644
index 00000000..c58bcb90
--- /dev/null
+++ b/caches/cache_log.txt
@@ -0,0 +1,166 @@
+Timestamp: 20240821-140112
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=15970, misses=8315, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=90813, misses=14568, maxsize=16384, currsize=14568
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=29929, misses=17045, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=189862, misses=18113, maxsize=16384, currsize=16384
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=36854, misses=19422, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=220689, misses=18252, maxsize=16384, currsize=16384
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=56160, misses=25978, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=326321, misses=19439, maxsize=16384, currsize=16384
+
+Timestamp: 20240822-193603
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=23718, misses=10045, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=118177, misses=5610, maxsize=16384, currsize=16384
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=53220, misses=27955, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=316509, misses=9986, maxsize=16384, currsize=16384
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=81216, misses=36794, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=425550, misses=10637, maxsize=16384, currsize=16384
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=100499, misses=43378, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=532067, misses=11525, maxsize=16384, currsize=16384
+
+Timestamp: 20240828-114052
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=23726, misses=10037, maxsize=4096, currsize=4096
+omim_mappings cache info:
+CacheInfo: hits=116389, misses=7706, maxsize=16384, currsize=16384
+
+Timestamp: 20240903-201528
+
+Timestamp: 20240904-111909
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=33763, misses=0, maxsize=524288, currsize=12774
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=74660, misses=6556, maxsize=524288, currsize=19330
+omim_mappings cache info:
+CacheInfo: hits=64985, misses=8, maxsize=524288, currsize=20618
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=109269, misses=8782, maxsize=524288, currsize=21556
+omim_mappings cache info:
+CacheInfo: hits=90157, misses=8, maxsize=524288, currsize=20618
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=134134, misses=9936, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=115413, misses=10, maxsize=524288, currsize=20620
+
+Timestamp: 20240904-115833
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=33763, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=81216, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=118051, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=144070, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+Timestamp: 20240904-121924
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=33763, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=81216, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=118051, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=144070, misses=0, maxsize=524288, currsize=22710
+omim_mappings cache info:
+CacheInfo: hits=0, misses=0, maxsize=524288, currsize=0
+
+Timestamp: 20240905-132835
+
+gpt-4o/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=39813, misses=2626, maxsize=524288, currsize=25336
+omim_mappings cache info:
+CacheInfo: hits=21842, misses=279, maxsize=524288, currsize=20899
+
+gpt-4/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=97434, misses=5307, maxsize=524288, currsize=28017
+omim_mappings cache info:
+CacheInfo: hits=53565, misses=936, maxsize=524288, currsize=21556
+
+gpt-4-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=143644, misses=6097, maxsize=524288, currsize=28807
+omim_mappings cache info:
+CacheInfo: hits=62521, misses=1074, maxsize=524288, currsize=21694
+
+gpt-3.5-turbo/results.tsv
+score_grounded_result cache info:
+CacheInfo: hits=176206, misses=6703, maxsize=524288, currsize=29413
+omim_mappings cache info:
+CacheInfo: hits=80073, misses=2993, maxsize=524288, currsize=23613
+
diff --git a/caches/omim_mappings_cache.db b/caches/omim_mappings_cache.db
new file mode 100644
index 00000000..74e30758
Binary files /dev/null and b/caches/omim_mappings_cache.db differ
diff --git a/caches/score_grounded_result_cache.db b/caches/score_grounded_result_cache.db
new file mode 100644
index 00000000..628ddb71
Binary files /dev/null and b/caches/score_grounded_result_cache.db differ
diff --git a/src/malco/analysis/disease_avail_knowledge.py b/src/malco/analysis/disease_avail_knowledge.py
new file mode 100644
index 00000000..28a35c13
--- /dev/null
+++ b/src/malco/analysis/disease_avail_knowledge.py
@@ -0,0 +1,92 @@
+# Let us try to parametrize how much is known about the diseases, there are two ideas beyond eval_diagnose_category, looking at the MONDO categories
+# Idea (0), (number of HPOs present, number of HPOs excluded) correlated to diseases found?
+# (1) HPOA and (2) Monarch KG
+# (1) Parse out disease genes discovered after 2008/9 (First thing in HPOA)
+#     Look for a correlation between date annotated and disease correctly diagnosed. 
+#     Hypothesis: the older the easier to diagnose
+# (2) To start, looking at the two broad categories found/not-found, count average number of all links
+#     After that, count average number of links of some kind
+#     Then, something more graphy, such as, centrality? Maybe need to project out something first to find signal in the noise...
+import sys
+import pandas as pd
+import numpy as np
+import datetime as dt
+
+hpoa_file_path = "/Users/leonardo/IdeaProjects/maxodiff/data/phenotype.hpoa"
+hpoa_df = pd.read_csv(
+        hpoa_file_path, sep="\t" , header=4
+    )
+
+hpoa_cleaned = pd.DataFrame()
+hpoa_cleaned["database_id"] = hpoa_df["database_id"]
+hpoa_cleaned['date'] = hpoa_df["biocuration"].str.extract(r'\[(.*?)\]')
+#string_dates = str(hpoa_df["biocuration"].str.extract(r'\[(.*?)\]'))
+# Mi sto un po attorcigliando, sarebbe da, semplicemente, fare un color coding
+#hpoa_cleaned['date'] = [dt.datetime.strptime(day, '%Y-%m-%d').date() for day in string_dates]
+hpoa_cleaned = hpoa_cleaned[hpoa_cleaned['database_id'].str.startswith("OMIM")]
+
+model = str(sys.argv[1])
+ranking_results_filename = f"out_openAI_models/multimodel/{model}/full_df_results.tsv"
+rank_results_df = pd.read_csv(
+        ranking_results_filename, sep="\t" 
+    )
+
+found_diseases = []
+not_found_diseases = []
+ppkts = rank_results_df.groupby("label")[["term", "correct_term", "is_correct"]] 
+for ppkt in ppkts:
+    # is there a true? ppkt is tuple ("filename", dataframe) --> ppkt[1] is a dataframe 
+    disease = ppkt[1].iloc[0]['correct_term']
+    if any(ppkt[1]["is_correct"]):
+       found_diseases.append(disease)
+    else:
+       not_found_diseases.append(disease)
+
+found_set = set(found_diseases)
+notfound_set = set(not_found_diseases)
+overlap = []
+
+for i in found_set:
+   if i in notfound_set:
+      overlap.append(i)
+
+print(f"Number of found diseases by {model} is {len(found_set)}.")
+print(f"Number of not found diseases by {model} is {len(notfound_set)}.")
+print(f"Found diseases also present in not-found set, by {model} is {len(overlap)}.\n")
+# Need some more statistic
+
+# header = ["disease_id", "found", "date"]
+
+# Problematic, goes from 27 k unique values to 8.2k
+hpoa_cleaned = hpoa_cleaned.drop_duplicates(subset='database_id')
+# Idea here could be to look at the 263-129 (gpt-4o) found diseases not present in not found set and the opposite
+# namely never found diseases and look for a correlation with date.
+always_found = found_set - notfound_set # 134
+never_found = notfound_set - found_set # 213
+
+results_dict = {} # turns out being 281 long 
+found_dict = {}
+notfound_dict = {}
+
+# TODO
+results_df = pd.DataFrame(columns=["disease", "found", "date"])
+
+for af in always_found:
+   try:
+      results_dict[af] = [True, hpoa_cleaned.loc[hpoa_cleaned['database_id'] == af, 'date'].item() ]
+      found_dict[af] = hpoa_cleaned.loc[hpoa_cleaned['database_id'] == af, 'date'].item()
+      results_df
+   except ValueError:
+      print(f"No HPOA for {af}.")
+for nf in never_found:
+   try:
+      results_dict[nf] = [False, hpoa_cleaned.loc[hpoa_cleaned['database_id'] == nf, 'date'].item() ]
+      notfound_dict[nf] = hpoa_cleaned.loc[hpoa_cleaned['database_id'] == af, 'date'].item()
+   except ValueError:
+      print(f"No HPOA for {nf}.")
+
+res_to_clean = pd.DataFrame.from_dict(results_dict).transpose()
+res_to_clean.columns=["found","date"]
+res_to_clean.date = pd.to_datetime(res_to_clean.date).values.astype(np.int64)
+final_avg = pd.DataFrame(pd.to_datetime(res_to_clean.groupby('found').mean().date))
+print(final_avg)
\ No newline at end of file
diff --git a/src/malco/post_process/ranking_utils.py b/src/malco/post_process/ranking_utils.py
index b19ee02f..0049914c 100644
--- a/src/malco/post_process/ranking_utils.py
+++ b/src/malco/post_process/ranking_utils.py
@@ -43,7 +43,8 @@ def compute_mrr_and_ranks(
     ) -> Path:
 
     # Read in results TSVs from self.output_dir that match glob results*tsv 
-    out_caches = output_dir / "caches"
+    out_caches = Path("caches")
+    #out_caches = output_dir / "caches"
     out_caches.mkdir(exist_ok=True)
     output_dir = output_dir / out_subdir
     results_data = []