From ebaacb2a34cd607432a56860addd79e45cb71271 Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Tue, 25 Jun 2024 14:08:16 -0400
Subject: [PATCH 1/2] Update footer

---
 src/indra_cogex/apps/chat_page/app/public/index.html |  8 ++++----
 src/indra_cogex/apps/templates/base.html             |  8 ++++----
 src/indra_cogex/apps/templates/downtime/index.html   | 12 +++++-------
 3 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/src/indra_cogex/apps/chat_page/app/public/index.html b/src/indra_cogex/apps/chat_page/app/public/index.html
index 0ad659d1b..7c98f081c 100644
--- a/src/indra_cogex/apps/chat_page/app/public/index.html
+++ b/src/indra_cogex/apps/chat_page/app/public/index.html
@@ -448,11 +448,11 @@ <h1>IndraLab</h1>
                     <a href="#">Back to top</a>
                 </span>
                 <p class="small text-center text-muted">
-                    Developed by the <a href="https://indralab.github.io">INDRA Lab</a> in the
-                    <a href="https://hits.harvard.edu">Harvard Program in Therapeutic Science (HiTS)</a>.<br/>
+                    Developed by the <a href="https://gyorilab.github.io">Gyori Lab</a> at
+                    Northeastern University.<br/>
                     INDRA CoGEx (Context Graph Extension) is an automatically assembled biomedical knowledge graph which integrates
-                    causal mechanisms from INDRA with non-causal contextual relations including properties, ontology, and data.<br/>
-                    INDRA CoGEx is funded by the <b>DARPA Young Faculty Award W911NF2010255 (PI: Benjamin M. Gyori).</b><br/>
+                    causal mechanisms from INDRA with non-causal contextual relations including properties, ontologies, and data.<br/>
+                    INDRA CoGEx is funded by grant HR00112220036 under the DARPA ASKEM / ARPA-H BDF programs<br/>
                 </p>
             </div>
             <div class="col-md-4">
diff --git a/src/indra_cogex/apps/templates/base.html b/src/indra_cogex/apps/templates/base.html
index bc2f1dfc9..a10db858e 100644
--- a/src/indra_cogex/apps/templates/base.html
+++ b/src/indra_cogex/apps/templates/base.html
@@ -154,11 +154,11 @@
                     <a href="#">Back to top</a>
                 </span>
                 <p class="small text-center text-muted">
-                    Developed by the <a href="https://indralab.github.io">INDRA Lab</a> in the
-                    <a href="https://hits.harvard.edu">Harvard Program in Therapeutic Science (HiTS)</a>.<br/>
+                    Developed by the <a href="https://gyorilab.github.io">Gyori Lab</a> at
+                    Northeastern University.<br/>
                     INDRA CoGEx (Context Graph Extension) is an automatically assembled biomedical knowledge graph which integrates
-                    causal mechanisms from INDRA with non-causal contextual relations including properties, ontology, and data.<br/>
-                    INDRA CoGEx is funded by the <b>DARPA Young Faculty Award W911NF2010255 (PI: Benjamin M. Gyori).</b><br/>
+                    causal mechanisms from INDRA with non-causal contextual relations including properties, ontologies, and data.<br/>
+                    INDRA CoGEx is funded by grant HR00112220036 under the DARPA ASKEM / ARPA-H BDF programs<br/>
                 </p>
             </div>
             <div class="col-md-4">
diff --git a/src/indra_cogex/apps/templates/downtime/index.html b/src/indra_cogex/apps/templates/downtime/index.html
index a1c527092..321250380 100644
--- a/src/indra_cogex/apps/templates/downtime/index.html
+++ b/src/indra_cogex/apps/templates/downtime/index.html
@@ -47,13 +47,11 @@ <h2 class="text-center">The INDRA Discovery service for INDRA CoGEx is currently
         <a href="#">Back to top</a>
     </span>
         <p class="small text-center text-muted">
-            Developed by the <a href="https://indralab.github.io">INDRA Lab</a> in the
-            <a href="https://hits.harvard.edu">Harvard Program in Therapeutic Science (HiTS)</a>.<br/>
-            INDRA CoGEx (Context Graph Extension) is an automatically assembled biomedical knowledge graph which
-            integrates
-            causal mechanisms from INDRA with non-causal contextual relations including properties, ontology, and
-            data.<br/>
-            INDRA CoGEx is funded by the <b>DARPA Young Faculty Award W911NF2010255 (PI: Benjamin M. Gyori).</b><br/>
+            Developed by the <a href="https://gyorilab.github.io">Gyori Lab</a> at
+            Northeastern University.<br/>
+            INDRA CoGEx (Context Graph Extension) is an automatically assembled biomedical knowledge graph which integrates
+            causal mechanisms from INDRA with non-causal contextual relations including properties, ontologies, and data.<br/>
+            INDRA CoGEx is funded by grant HR00112220036 under the DARPA ASKEM / ARPA-H BDF programs<br/>
         </p>
     </div>
 </footer>

From 30aec0d2e1a6e8a6bd1f50735c5b3f27275afeb3 Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Tue, 25 Jun 2024 14:18:43 -0400
Subject: [PATCH 2/2] Add notebook

---
 notebooks/phosphoproteomics_geffen/README.md  |    3 +
 .../phosphoprot_explanation.ipynb             | 1569 +++++++++++++++++
 2 files changed, 1572 insertions(+)
 create mode 100644 notebooks/phosphoproteomics_geffen/README.md
 create mode 100644 notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb

diff --git a/notebooks/phosphoproteomics_geffen/README.md b/notebooks/phosphoproteomics_geffen/README.md
new file mode 100644
index 000000000..3d69739b0
--- /dev/null
+++ b/notebooks/phosphoproteomics_geffen/README.md
@@ -0,0 +1,3 @@
+This notebook demonstrates finding explanations for phosphorylation
+changes detected in https://www.cell.com/cell/fulltext/S0092-8674(23)00781-X#gr2
+between homologous repair deficient and non-deficient samples.
diff --git a/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb b/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb
new file mode 100644
index 000000000..df3d5520f
--- /dev/null
+++ b/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb
@@ -0,0 +1,1569 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "38f360f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the URL for the supplementary table\n",
+    "url = 'https://www.cell.com/cms/10.1016/j.cell.2023.07.013/attachment/b342834f-0ab7-4d68-be07-66e9ba38e3df/mmc3.xlsx'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "938eb46b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7cb32c76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Naively trying to load the table from the URL errors with 403: Forbidden\n",
+    "# sheets = pd.read_excel(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "44277396",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>gene_name</th>\n",
+       "      <th>logFC</th>\n",
+       "      <th>AveExpr</th>\n",
+       "      <th>t</th>\n",
+       "      <th>P.Value</th>\n",
+       "      <th>adj.P.Val</th>\n",
+       "      <th>B</th>\n",
+       "      <th>qval</th>\n",
+       "      <th>propMissing</th>\n",
+       "      <th>...</th>\n",
+       "      <th>propMissingOut</th>\n",
+       "      <th>id</th>\n",
+       "      <th>id.description</th>\n",
+       "      <th>variableSites</th>\n",
+       "      <th>accession_number</th>\n",
+       "      <th>feature</th>\n",
+       "      <th>gsea_rank</th>\n",
+       "      <th>gsea_rank_p</th>\n",
+       "      <th>causalpath_adjusted_id</th>\n",
+       "      <th>prot_residue</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NP_001269315.1_K345k_1_1_345_345</td>\n",
+       "      <td>IDH1</td>\n",
+       "      <td>-3.904846</td>\n",
+       "      <td>-0.941172</td>\n",
+       "      <td>-6.944246</td>\n",
+       "      <td>8.017521e-07</td>\n",
+       "      <td>0.000565</td>\n",
+       "      <td>4.015043</td>\n",
+       "      <td>0.000440</td>\n",
+       "      <td>0.685185</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.583333</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>isocitrate dehydrogenase [NADP] cytoplasmic G...</td>\n",
+       "      <td>['K345k', 'K345k']</td>\n",
+       "      <td>NP_001269315.1</td>\n",
+       "      <td>acetylome</td>\n",
+       "      <td>-7.854981e+00</td>\n",
+       "      <td>-23.803785</td>\n",
+       "      <td>NP_001269315.1_K345k_1_1_345_345</td>\n",
+       "      <td>IDH1_K345k_1_1_345_345</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NP_006752.1_K142k_1_1_142_142</td>\n",
+       "      <td>YWHAE</td>\n",
+       "      <td>-1.019937</td>\n",
+       "      <td>0.148372</td>\n",
+       "      <td>-4.070577</td>\n",
+       "      <td>1.611649e-04</td>\n",
+       "      <td>0.049597</td>\n",
+       "      <td>0.820275</td>\n",
+       "      <td>0.038650</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.125000</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>14-3-3 protein epsilon GN=YWHAE</td>\n",
+       "      <td>['K142k', 'K142k']</td>\n",
+       "      <td>NP_006752.1</td>\n",
+       "      <td>acetylome</td>\n",
+       "      <td>-1.209124e+00</td>\n",
+       "      <td>-3.868344</td>\n",
+       "      <td>NP_006752.1_K142k_1_1_142_142</td>\n",
+       "      <td>YWHAE_K142k_1_1_142_142</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NP_001609.2_K105k_1_1_105_105</td>\n",
+       "      <td>PARP1</td>\n",
+       "      <td>1.236072</td>\n",
+       "      <td>-0.516186</td>\n",
+       "      <td>3.981427</td>\n",
+       "      <td>2.110521e-04</td>\n",
+       "      <td>0.049597</td>\n",
+       "      <td>0.579085</td>\n",
+       "      <td>0.038650</td>\n",
+       "      <td>0.092593</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.083333</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>poly [ADP-ribose] polymerase 1 GN=PARP1</td>\n",
+       "      <td>['K105k', 'K105k']</td>\n",
+       "      <td>NP_001609.2</td>\n",
+       "      <td>acetylome</td>\n",
+       "      <td>1.428431e+00</td>\n",
+       "      <td>4.543317</td>\n",
+       "      <td>NP_001609.2_K105k_1_1_105_105</td>\n",
+       "      <td>PARP1_K105k_1_1_105_105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NP_001122321.1_K455k_1_1_455_455</td>\n",
+       "      <td>SMARCA4</td>\n",
+       "      <td>0.913719</td>\n",
+       "      <td>-0.796626</td>\n",
+       "      <td>3.485168</td>\n",
+       "      <td>9.466467e-04</td>\n",
+       "      <td>0.118682</td>\n",
+       "      <td>-0.768856</td>\n",
+       "      <td>0.092486</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>transcription activator BRG1 isoform A GN=SMA...</td>\n",
+       "      <td>['K455k', 'K455k']</td>\n",
+       "      <td>NP_001122321.1</td>\n",
+       "      <td>acetylome</td>\n",
+       "      <td>8.427154e-01</td>\n",
+       "      <td>2.762915</td>\n",
+       "      <td>NP_001122321.1_K455k_1_1_455_455</td>\n",
+       "      <td>SMARCA4_K455k_1_1_455_455</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NP_001609.2_K621k_1_1_621_621</td>\n",
+       "      <td>PARP1</td>\n",
+       "      <td>0.734708</td>\n",
+       "      <td>-0.190376</td>\n",
+       "      <td>3.490196</td>\n",
+       "      <td>9.621225e-04</td>\n",
+       "      <td>0.118682</td>\n",
+       "      <td>-0.770646</td>\n",
+       "      <td>0.092486</td>\n",
+       "      <td>0.055556</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.041667</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>poly [ADP-ribose] polymerase 1 GN=PARP1</td>\n",
+       "      <td>['K621k', 'K621k']</td>\n",
+       "      <td>NP_001609.2</td>\n",
+       "      <td>acetylome</td>\n",
+       "      <td>6.776145e-01</td>\n",
+       "      <td>2.216444</td>\n",
+       "      <td>NP_001609.2_K621k_1_1_621_621</td>\n",
+       "      <td>PARP1_K621k_1_1_621_621</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5388</th>\n",
+       "      <td>ENSG00000097007.19</td>\n",
+       "      <td>ABL1</td>\n",
+       "      <td>0.002431</td>\n",
+       "      <td>5.878027</td>\n",
+       "      <td>0.017314</td>\n",
+       "      <td>9.862142e-01</td>\n",
+       "      <td>0.993643</td>\n",
+       "      <td>-6.474316</td>\n",
+       "      <td>0.406492</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>transcriptome</td>\n",
+       "      <td>5.029325e-06</td>\n",
+       "      <td>0.000015</td>\n",
+       "      <td>ENSG00000097007.19</td>\n",
+       "      <td>ENSG00000097007.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5389</th>\n",
+       "      <td>ENSG00000102977.17</td>\n",
+       "      <td>ACD</td>\n",
+       "      <td>-0.001802</td>\n",
+       "      <td>2.935979</td>\n",
+       "      <td>-0.012782</td>\n",
+       "      <td>9.898221e-01</td>\n",
+       "      <td>0.994550</td>\n",
+       "      <td>-6.062690</td>\n",
+       "      <td>0.406863</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>transcriptome</td>\n",
+       "      <td>-2.292426e-06</td>\n",
+       "      <td>-0.000008</td>\n",
+       "      <td>ENSG00000102977.17</td>\n",
+       "      <td>ENSG00000102977.17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5390</th>\n",
+       "      <td>ENSG00000167325.15</td>\n",
+       "      <td>RRM1</td>\n",
+       "      <td>-0.001353</td>\n",
+       "      <td>6.557619</td>\n",
+       "      <td>-0.011514</td>\n",
+       "      <td>9.908319e-01</td>\n",
+       "      <td>0.994550</td>\n",
+       "      <td>-6.503115</td>\n",
+       "      <td>0.406863</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>transcriptome</td>\n",
+       "      <td>-1.463072e-06</td>\n",
+       "      <td>-0.000005</td>\n",
+       "      <td>ENSG00000167325.15</td>\n",
+       "      <td>ENSG00000167325.15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5391</th>\n",
+       "      <td>ENSG00000161036.13</td>\n",
+       "      <td>LRWD1</td>\n",
+       "      <td>-0.001006</td>\n",
+       "      <td>3.850618</td>\n",
+       "      <td>-0.007013</td>\n",
+       "      <td>9.944159e-01</td>\n",
+       "      <td>0.996278</td>\n",
+       "      <td>-6.224021</td>\n",
+       "      <td>0.407570</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>transcriptome</td>\n",
+       "      <td>-7.332363e-07</td>\n",
+       "      <td>-0.000002</td>\n",
+       "      <td>ENSG00000161036.13</td>\n",
+       "      <td>ENSG00000161036.13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5392</th>\n",
+       "      <td>ENSG00000127616.18</td>\n",
+       "      <td>SMARCA4</td>\n",
+       "      <td>0.000826</td>\n",
+       "      <td>6.543572</td>\n",
+       "      <td>0.004348</td>\n",
+       "      <td>9.965375e-01</td>\n",
+       "      <td>0.996538</td>\n",
+       "      <td>-6.502125</td>\n",
+       "      <td>0.407676</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>HRD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>transcriptome</td>\n",
+       "      <td>3.851904e-07</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>ENSG00000127616.18</td>\n",
+       "      <td>ENSG00000127616.18</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5393 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                            Unnamed: 0 gene_name     logFC   AveExpr  \\\n",
+       "0     NP_001269315.1_K345k_1_1_345_345      IDH1 -3.904846 -0.941172   \n",
+       "1        NP_006752.1_K142k_1_1_142_142     YWHAE -1.019937  0.148372   \n",
+       "2        NP_001609.2_K105k_1_1_105_105     PARP1  1.236072 -0.516186   \n",
+       "3     NP_001122321.1_K455k_1_1_455_455   SMARCA4  0.913719 -0.796626   \n",
+       "4        NP_001609.2_K621k_1_1_621_621     PARP1  0.734708 -0.190376   \n",
+       "...                                ...       ...       ...       ...   \n",
+       "5388                ENSG00000097007.19      ABL1  0.002431  5.878027   \n",
+       "5389                ENSG00000102977.17       ACD -0.001802  2.935979   \n",
+       "5390                ENSG00000167325.15      RRM1 -0.001353  6.557619   \n",
+       "5391                ENSG00000161036.13     LRWD1 -0.001006  3.850618   \n",
+       "5392                ENSG00000127616.18   SMARCA4  0.000826  6.543572   \n",
+       "\n",
+       "             t       P.Value  adj.P.Val         B      qval  propMissing  ...  \\\n",
+       "0    -6.944246  8.017521e-07   0.000565  4.015043  0.000440     0.685185  ...   \n",
+       "1    -4.070577  1.611649e-04   0.049597  0.820275  0.038650     0.111111  ...   \n",
+       "2     3.981427  2.110521e-04   0.049597  0.579085  0.038650     0.092593  ...   \n",
+       "3     3.485168  9.466467e-04   0.118682 -0.768856  0.092486     0.000000  ...   \n",
+       "4     3.490196  9.621225e-04   0.118682 -0.770646  0.092486     0.055556  ...   \n",
+       "...        ...           ...        ...       ...       ...          ...  ...   \n",
+       "5388  0.017314  9.862142e-01   0.993643 -6.474316  0.406492          NaN  ...   \n",
+       "5389 -0.012782  9.898221e-01   0.994550 -6.062690  0.406863          NaN  ...   \n",
+       "5390 -0.011514  9.908319e-01   0.994550 -6.503115  0.406863          NaN  ...   \n",
+       "5391 -0.007013  9.944159e-01   0.996278 -6.224021  0.407570          NaN  ...   \n",
+       "5392  0.004348  9.965375e-01   0.996538 -6.502125  0.407676          NaN  ...   \n",
+       "\n",
+       "      propMissingOut   id                                     id.description  \\\n",
+       "0           0.583333  HRD   isocitrate dehydrogenase [NADP] cytoplasmic G...   \n",
+       "1           0.125000  HRD                    14-3-3 protein epsilon GN=YWHAE   \n",
+       "2           0.083333  HRD            poly [ADP-ribose] polymerase 1 GN=PARP1   \n",
+       "3           0.000000  HRD   transcription activator BRG1 isoform A GN=SMA...   \n",
+       "4           0.041667  HRD            poly [ADP-ribose] polymerase 1 GN=PARP1   \n",
+       "...              ...  ...                                                ...   \n",
+       "5388             NaN  HRD                                                NaN   \n",
+       "5389             NaN  HRD                                                NaN   \n",
+       "5390             NaN  HRD                                                NaN   \n",
+       "5391             NaN  HRD                                                NaN   \n",
+       "5392             NaN  HRD                                                NaN   \n",
+       "\n",
+       "           variableSites accession_number        feature     gsea_rank  \\\n",
+       "0     ['K345k', 'K345k']   NP_001269315.1      acetylome -7.854981e+00   \n",
+       "1     ['K142k', 'K142k']      NP_006752.1      acetylome -1.209124e+00   \n",
+       "2     ['K105k', 'K105k']      NP_001609.2      acetylome  1.428431e+00   \n",
+       "3     ['K455k', 'K455k']   NP_001122321.1      acetylome  8.427154e-01   \n",
+       "4     ['K621k', 'K621k']      NP_001609.2      acetylome  6.776145e-01   \n",
+       "...                  ...              ...            ...           ...   \n",
+       "5388                 NaN              NaN  transcriptome  5.029325e-06   \n",
+       "5389                 NaN              NaN  transcriptome -2.292426e-06   \n",
+       "5390                 NaN              NaN  transcriptome -1.463072e-06   \n",
+       "5391                 NaN              NaN  transcriptome -7.332363e-07   \n",
+       "5392                 NaN              NaN  transcriptome  3.851904e-07   \n",
+       "\n",
+       "      gsea_rank_p            causalpath_adjusted_id               prot_residue  \n",
+       "0      -23.803785  NP_001269315.1_K345k_1_1_345_345     IDH1_K345k_1_1_345_345  \n",
+       "1       -3.868344     NP_006752.1_K142k_1_1_142_142    YWHAE_K142k_1_1_142_142  \n",
+       "2        4.543317     NP_001609.2_K105k_1_1_105_105    PARP1_K105k_1_1_105_105  \n",
+       "3        2.762915  NP_001122321.1_K455k_1_1_455_455  SMARCA4_K455k_1_1_455_455  \n",
+       "4        2.216444     NP_001609.2_K621k_1_1_621_621    PARP1_K621k_1_1_621_621  \n",
+       "...           ...                               ...                        ...  \n",
+       "5388     0.000015                ENSG00000097007.19         ENSG00000097007.19  \n",
+       "5389    -0.000008                ENSG00000102977.17         ENSG00000102977.17  \n",
+       "5390    -0.000005                ENSG00000167325.15         ENSG00000167325.15  \n",
+       "5391    -0.000002                ENSG00000161036.13         ENSG00000161036.13  \n",
+       "5392     0.000001                ENSG00000127616.18         ENSG00000127616.18  \n",
+       "\n",
+       "[5393 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load a local copy of the table and select Table 3G\n",
+    "df = pd.read_excel('mmc3.xlsx', sheet_name='Table 3G')\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5e41eac5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter the table to adjusted p-values less than 0.055 to retain significant results\n",
+    "df = df[df['adj.P.Val'] < 0.055]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "608b94bc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Counter({'phosphoproteome': 206,\n",
+       "         'transcriptome': 124,\n",
+       "         'phosphoproteome_res': 69,\n",
+       "         'proteome': 64,\n",
+       "         'acetylome': 3,\n",
+       "         'acetylome_res': 1})"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Look at statistics of different modification types that are significant\n",
+    "from collections import Counter\n",
+    "Counter(df.feature)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "1f852e08",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{('ATAD5', 'hgnc', 'S', '44'),\n",
+       " ('ATR', 'hgnc', 'T', '1989'),\n",
+       " ('ATRIP', 'hgnc', 'S', '224'),\n",
+       " ('ATRIP', 'hgnc', 'S', '239'),\n",
+       " ('ATRIP', 'hgnc', 'S', '518'),\n",
+       " ('BAZ1B', 'hgnc', 'S', '330'),\n",
+       " ('BAZ1B', 'hgnc', 'S', '349'),\n",
+       " ('BLM', 'hgnc', 'S', '28'),\n",
+       " ('BOD1L1', 'hgnc', 'S', '2905'),\n",
+       " ('BRCA2', 'hgnc', 'S', '93'),\n",
+       " ('BRIP1', 'hgnc', 'S', '226'),\n",
+       " ('CDC25B', 'hgnc', 'S', '321'),\n",
+       " ('CDC25B', 'hgnc', 'S', '353'),\n",
+       " ('CDC25B', 'hgnc', 'S', '375'),\n",
+       " ('CDC6', 'hgnc', 'S', '54'),\n",
+       " ('CDK1', 'hgnc', 'T', '14'),\n",
+       " ('CDK1', 'hgnc', 'T', '161'),\n",
+       " ('CDK1', 'hgnc', 'Y', '15'),\n",
+       " ('CHAF1A', 'hgnc', 'S', '775'),\n",
+       " ('CHEK2', 'hgnc', 'S', '303'),\n",
+       " ('CHTF18', 'hgnc', 'S', '225'),\n",
+       " ('CHTF18', 'hgnc', 'S', '64'),\n",
+       " ('CHTF18', 'hgnc', 'S', '871'),\n",
+       " ('CLSPN', 'hgnc', 'S', '225'),\n",
+       " ('CLSPN', 'hgnc', 'S', '83'),\n",
+       " ('CLSPN', 'hgnc', 'S', '846'),\n",
+       " ('CLSPN', 'hgnc', 'T', '1287'),\n",
+       " ('CUL4B', 'hgnc', 'S', '180'),\n",
+       " ('DBF4', 'hgnc', 'S', '359'),\n",
+       " ('DBF4', 'hgnc', 'S', '381'),\n",
+       " ('DBF4', 'hgnc', 'S', '508'),\n",
+       " ('DBF4', 'hgnc', 'T', '345'),\n",
+       " ('DBF4', 'hgnc', 'T', '553'),\n",
+       " ('DONSON', 'hgnc', 'S', '34'),\n",
+       " ('DTL', 'hgnc', 'S', '485'),\n",
+       " ('DTL', 'hgnc', 'S', '490'),\n",
+       " ('DTL', 'hgnc', 'S', '512'),\n",
+       " ('DTL', 'hgnc', 'S', '679'),\n",
+       " ('DTL', 'hgnc', 'S', '697'),\n",
+       " ('DTL', 'hgnc', 'T', '429'),\n",
+       " ('ERCC6L', 'hgnc', 'S', '1028'),\n",
+       " ('ERCC6L', 'hgnc', 'S', '14'),\n",
+       " ('ERCC6L', 'hgnc', 'S', '820'),\n",
+       " ('EXO1', 'hgnc', 'S', '598'),\n",
+       " ('EXO1', 'hgnc', 'S', '610'),\n",
+       " ('EXO1', 'hgnc', 'S', '639'),\n",
+       " ('EXO1', 'hgnc', 'S', '700'),\n",
+       " ('EXO1', 'hgnc', 'S', '702'),\n",
+       " ('EXO1', 'hgnc', 'S', '714'),\n",
+       " ('EXO1', 'hgnc', 'S', '746'),\n",
+       " ('EXO1', 'hgnc', 'S', '815'),\n",
+       " ('EXO1', 'hgnc', 'T', '475'),\n",
+       " ('FANCD2', 'hgnc', 'S', '1435'),\n",
+       " ('FANCE', 'hgnc', 'S', '249'),\n",
+       " ('FANCM', 'hgnc', 'S', '34'),\n",
+       " ('INO80B', 'hgnc', 'T', '60'),\n",
+       " ('KPNA2', 'hgnc', 'S', '490'),\n",
+       " ('LIG1', 'hgnc', 'S', '881'),\n",
+       " ('LIG1', 'hgnc', 'S', '883'),\n",
+       " ('LIG1', 'hgnc', 'T', '165'),\n",
+       " ('LIG1', 'hgnc', 'T', '203'),\n",
+       " ('LRWD1', 'hgnc', 'S', '243'),\n",
+       " ('LRWD1', 'hgnc', 'S', '259'),\n",
+       " ('MBD4', 'hgnc', 'S', '422'),\n",
+       " ('MCM2', 'hgnc', 'S', '139'),\n",
+       " ('MCM2', 'hgnc', 'S', '381'),\n",
+       " ('MCM2', 'hgnc', 'S', '40'),\n",
+       " ('MCM2', 'hgnc', 'S', '41'),\n",
+       " ('MCM3', 'hgnc', 'S', '756'),\n",
+       " ('MCM3', 'hgnc', 'T', '758'),\n",
+       " ('MCM3', 'hgnc', 'T', '767'),\n",
+       " ('MCM4', 'hgnc', 'S', '120'),\n",
+       " ('MCM4', 'hgnc', 'S', '131'),\n",
+       " ('MCM4', 'hgnc', 'T', '110'),\n",
+       " ('MCM6', 'hgnc', 'S', '13'),\n",
+       " ('MCM6', 'hgnc', 'S', '762'),\n",
+       " ('MCMBP', 'hgnc', 'T', '160'),\n",
+       " ('MDC1', 'hgnc', 'S', '1820'),\n",
+       " ('MDC1', 'hgnc', 'S', '453'),\n",
+       " ('MDC1', 'hgnc', 'T', '1157'),\n",
+       " ('MDC1', 'hgnc', 'T', '1239'),\n",
+       " ('MDC1', 'hgnc', 'T', '455'),\n",
+       " ('MPLKIP', 'hgnc', 'S', '66'),\n",
+       " ('MSH6', 'hgnc', 'S', '227'),\n",
+       " ('MSH6', 'hgnc', 'S', '309'),\n",
+       " ('MSH6', 'hgnc', 'S', '830'),\n",
+       " ('MSH6', 'hgnc', 'S', '91'),\n",
+       " ('NSD2', 'hgnc', 'T', '110'),\n",
+       " ('NSD2', 'hgnc', 'T', '114'),\n",
+       " ('NSD2', 'hgnc', 'T', '115'),\n",
+       " ('NSD2', 'hgnc', 'T', '544'),\n",
+       " ('NUDT5', 'hgnc', 'S', '3'),\n",
+       " ('ORC1', 'hgnc', 'S', '201'),\n",
+       " ('ORC1', 'hgnc', 'S', '273'),\n",
+       " ('ORC1', 'hgnc', 'S', '287'),\n",
+       " ('ORC2', 'hgnc', 'S', '280'),\n",
+       " ('ORC2', 'hgnc', 'T', '226'),\n",
+       " ('ORC6', 'hgnc', 'T', '195'),\n",
+       " ('PALB2', 'hgnc', 'S', '781'),\n",
+       " ('PARG', 'hgnc', 'S', '68'),\n",
+       " ('PARP1', 'hgnc', 'S', '179'),\n",
+       " ('PARP1', 'hgnc', 'S', '257'),\n",
+       " ('PARP1', 'hgnc', 'S', '782'),\n",
+       " ('PCLAF', 'hgnc', 'S', '72'),\n",
+       " ('PKMYT1', 'hgnc', 'S', '143'),\n",
+       " ('PLK1', 'hgnc', 'T', '210'),\n",
+       " ('PMS2', 'hgnc', 'T', '573'),\n",
+       " ('POLD3', 'hgnc', 'T', '277'),\n",
+       " ('POLQ', 'hgnc', 'S', '1587'),\n",
+       " ('POLR2C', 'hgnc', 'S', '124'),\n",
+       " ('RAD18', 'hgnc', 'S', '103'),\n",
+       " ('RAD18', 'hgnc', 'S', '99'),\n",
+       " ('RAD50', 'hgnc', 'S', '635'),\n",
+       " ('RAD51AP1', 'hgnc', 'S', '19'),\n",
+       " ('RAD51AP1', 'hgnc', 'S', '21'),\n",
+       " ('RAD51AP1', 'hgnc', 'S', '294'),\n",
+       " ('RAD51AP1', 'hgnc', 'T', '66'),\n",
+       " ('RECQL5', 'hgnc', 'S', '727'),\n",
+       " ('REV1', 'hgnc', 'S', '1144'),\n",
+       " ('RFC1', 'hgnc', 'S', '156'),\n",
+       " ('RIF1', 'hgnc', 'S', '1454'),\n",
+       " ('RIF1', 'hgnc', 'S', '1542'),\n",
+       " ('RIF1', 'hgnc', 'S', '1579'),\n",
+       " ('RIF1', 'hgnc', 'S', '1616'),\n",
+       " ('RIF1', 'hgnc', 'S', '1688'),\n",
+       " ('RIF1', 'hgnc', 'S', '1873'),\n",
+       " ('RIF1', 'hgnc', 'S', '2157'),\n",
+       " ('RIF1', 'hgnc', 'S', '2172'),\n",
+       " ('RIF1', 'hgnc', 'S', '2176'),\n",
+       " ('RIF1', 'hgnc', 'S', '2205'),\n",
+       " ('RIF1', 'hgnc', 'S', '2243'),\n",
+       " ('RIF1', 'hgnc', 'S', '2265'),\n",
+       " ('RIF1', 'hgnc', 'S', '2348'),\n",
+       " ('RIF1', 'hgnc', 'S', '2393'),\n",
+       " ('RIF1', 'hgnc', 'S', '782'),\n",
+       " ('RNF168', 'hgnc', 'S', '134'),\n",
+       " ('RPA1', 'hgnc', 'S', '384'),\n",
+       " ('SAMHD1', 'hgnc', 'T', '592'),\n",
+       " ('SLF2', 'hgnc', 'S', '710'),\n",
+       " ('SLF2', 'hgnc', 'T', '711'),\n",
+       " ('SMARCA5', 'hgnc', 'S', '755'),\n",
+       " ('SMARCC1', 'hgnc', 'T', '398'),\n",
+       " ('SMC6', 'hgnc', 'S', '11'),\n",
+       " ('TERF2', 'hgnc', 'S', '365'),\n",
+       " ('TERF2IP', 'hgnc', 'S', '36'),\n",
+       " ('TICRR', 'hgnc', 'S', '1750'),\n",
+       " ('TICRR', 'hgnc', 'S', '599'),\n",
+       " ('TICRR', 'hgnc', 'S', '865'),\n",
+       " ('TICRR', 'hgnc', 'S', '923'),\n",
+       " ('TICRR', 'hgnc', 'T', '1678'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1106'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1213'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1247'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1374'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1377'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1474'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1504'),\n",
+       " ('TOP2A', 'hgnc', 'S', '1525'),\n",
+       " ('TOP2B', 'hgnc', 'S', '1236'),\n",
+       " ('TOPBP1', 'hgnc', 'S', '1504'),\n",
+       " ('TOPBP1', 'hgnc', 'S', '888'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1623'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1670'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1683'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1706'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1763'),\n",
+       " ('TP53BP1', 'hgnc', 'S', '1764'),\n",
+       " ('TTK', 'hgnc', 'S', '281'),\n",
+       " ('TTK', 'hgnc', 'S', '436'),\n",
+       " ('TTK', 'hgnc', 'S', '821'),\n",
+       " ('TTK', 'hgnc', 'T', '33'),\n",
+       " ('UBE2T', 'hgnc', 'S', '184'),\n",
+       " ('UFD1', 'hgnc', 'S', '245'),\n",
+       " ('UFD1', 'hgnc', 'S', '247'),\n",
+       " ('UFD1', 'hgnc', 'S', '299'),\n",
+       " ('USP1', 'hgnc', 'S', '313'),\n",
+       " ('USP1', 'hgnc', 'S', '327'),\n",
+       " ('USP1', 'hgnc', 'S', '475'),\n",
+       " ('USP10', 'hgnc', 'S', '547'),\n",
+       " ('USP37', 'hgnc', 'S', '650'),\n",
+       " ('USP37', 'hgnc', 'S', '652'),\n",
+       " ('WDHD1', 'hgnc', 'S', '333'),\n",
+       " ('WDHD1', 'hgnc', 'S', '868'),\n",
+       " ('WDHD1', 'hgnc', 'S', '958'),\n",
+       " ('WRN', 'hgnc', 'S', '1133'),\n",
+       " ('XRCC1', 'hgnc', 'S', '266'),\n",
+       " ('XRCC1', 'hgnc', 'T', '257'),\n",
+       " ('XRCC6', 'hgnc', 'S', '2'),\n",
+       " ('XRCC6', 'hgnc', 'T', '455')}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Construct a list of all phosphorylation sites that have significantly\n",
+    "# increased compared to control, and represent these as tuples compatible\n",
+    "# with Protmapper (gene_name, 'hgnc', residue, position).\n",
+    "import re\n",
+    "sites = set()\n",
+    "for _, row in df.iterrows():\n",
+    "    if row['feature'] == 'phosphoproteome' and row['logFC'] > 0:\n",
+    "        matches = re.findall(r'([STY]\\d+)', row['variableSites'])\n",
+    "        sites |= {(row['gene_name'], 'hgnc', match[0], match[1:]) for match in matches}\n",
+    "sites"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b15177e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Mapping sites: 0it [00:00, ?it/s]INFO: [2024-06-25 12:55:01] protmapper.uniprot_client - Loading Swissprot sequences...\n",
+      "INFO: [2024-06-25 12:55:03] protmapper.uniprot_client - Loading Uniprot isoform sequences...\n",
+      "Mapping sites: 189it [00:11, 16.96it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Use the protmapper to map the sites to human reference\n",
+    "import protmapper\n",
+    "mapped_sites = protmapper.default_mapper.map_sitelist_to_human_ref(sites)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "c5904b00",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Counter({'VALID': 174, 'NO_MAPPING_FOUND': 15})"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Print counts of mapping results\n",
+    "Counter([ms.description for ms in mapped_sites])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "26a813ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter to valid or validly mapped sites\n",
+    "valid_sites = [ms for ms in mapped_sites if ms.valid or ms.mapped_id]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "93f16f6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Query the INDRA DB for Phosphorylation statements whose substrate is one\n",
+    "# of the proteins whose phosphorylation appears in the site list\n",
+    "from indra.sources.indra_db_rest import get_statements_from_query\n",
+    "from indra.sources.indra_db_rest.query import HasAgent, HasType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "7f816be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stmts_by_target = {}\n",
+    "unique_genes = {ms.gene_name for ms in valid_sites}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "1b294e7b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|                                                   | 0/78 [00:00<?, ?it/s]INFO: [2024-06-25 12:57:00] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CHTF18 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  1%|▌                                          | 1/78 [00:01<01:39,  1.29s/it]INFO: [2024-06-25 12:57:02] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RIF1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  3%|█                                          | 2/78 [00:03<02:01,  1.60s/it]INFO: [2024-06-25 12:57:03] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDK1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs - Running 1st request for statements\n",
+      "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs -   OFFSET: 500\n",
+      "  4%|█▋                                         | 3/78 [00:14<07:28,  5.98s/it]INFO: [2024-06-25 12:57:15] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLD3 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  5%|██▏                                        | 4/78 [00:15<04:59,  4.05s/it]INFO: [2024-06-25 12:57:16] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PMS2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  6%|██▊                                        | 5/78 [00:16<03:48,  3.13s/it]INFO: [2024-06-25 12:57:17] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  8%|███▎                                       | 6/78 [00:19<03:22,  2.82s/it]INFO: [2024-06-25 12:57:19] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "  9%|███▊                                       | 7/78 [00:19<02:34,  2.18s/it]INFO: [2024-06-25 12:57:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TERF2IP with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 10%|████▍                                      | 8/78 [00:21<02:19,  2.00s/it]INFO: [2024-06-25 12:57:22] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MPLKIP with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 12%|████▉                                      | 9/78 [00:22<01:56,  1.69s/it]INFO: [2024-06-25 12:57:23] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DBF4 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 13%|█████▍                                    | 10/78 [00:23<01:48,  1.59s/it]INFO: [2024-06-25 12:57:24] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PARG with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 14%|█████▉                                    | 11/78 [00:25<01:37,  1.46s/it]INFO: [2024-06-25 12:57:25] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=NSD2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 15%|██████▍                                   | 12/78 [00:26<01:37,  1.48s/it]INFO: [2024-06-25 12:57:27] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=UFD1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 17%|███████                                   | 13/78 [00:27<01:26,  1.33s/it]INFO: [2024-06-25 12:57:28] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=INO80B with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 18%|███████▌                                  | 14/78 [00:29<01:27,  1.36s/it]INFO: [2024-06-25 12:57:29] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCD2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 19%|████████                                  | 15/78 [00:31<01:44,  1.65s/it]INFO: [2024-06-25 12:57:32] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=XRCC1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 21%|████████▌                                 | 16/78 [00:35<02:25,  2.35s/it]INFO: [2024-06-25 12:57:36] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=LRWD1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs - Running 0th request for statements\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 22%|█████████▏                                | 17/78 [00:36<01:58,  1.95s/it]INFO: [2024-06-25 12:57:37] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLR2C with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 23%|█████████▋                                | 18/78 [00:37<01:37,  1.63s/it]INFO: [2024-06-25 12:57:37] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RECQL5 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 24%|██████████▏                               | 19/78 [00:38<01:25,  1.46s/it]INFO: [2024-06-25 12:57:39] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=WDHD1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 26%|██████████▊                               | 20/78 [00:39<01:21,  1.41s/it]INFO: [2024-06-25 12:57:40] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RNF168 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 27%|███████████▎                              | 21/78 [00:40<01:17,  1.36s/it]INFO: [2024-06-25 12:57:41] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ERCC6L with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 28%|███████████▊                              | 22/78 [00:41<01:12,  1.29s/it]INFO: [2024-06-25 12:57:42] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 29%|████████████▍                             | 23/78 [00:43<01:16,  1.39s/it]INFO: [2024-06-25 12:57:44] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BRCA2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 31%|████████████▉                             | 24/78 [00:46<01:35,  1.78s/it]INFO: [2024-06-25 12:57:46] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD51AP1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 32%|█████████████▍                            | 25/78 [00:47<01:22,  1.55s/it]INFO: [2024-06-25 12:57:48] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PLK1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 33%|██████████████                            | 26/78 [00:51<02:07,  2.46s/it]INFO: [2024-06-25 12:57:52] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RFC1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 35%|██████████████▌                           | 27/78 [00:53<01:49,  2.15s/it]INFO: [2024-06-25 12:57:54] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATRIP with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 36%|███████████████                           | 28/78 [00:54<01:33,  1.87s/it]INFO: [2024-06-25 12:57:55] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOP2A with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 37%|███████████████▌                          | 29/78 [00:56<01:36,  1.97s/it]INFO: [2024-06-25 12:57:57] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATR with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 38%|████████████████▏                         | 30/78 [01:02<02:22,  2.97s/it]INFO: [2024-06-25 12:58:02] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD50 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 40%|████████████████▋                         | 31/78 [01:03<02:05,  2.66s/it]INFO: [2024-06-25 12:58:04] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PALB2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 41%|█████████████████▏                        | 32/78 [01:05<01:48,  2.36s/it]INFO: [2024-06-25 12:58:06] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BRIP1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 42%|█████████████████▊                        | 33/78 [01:07<01:34,  2.11s/it]INFO: [2024-06-25 12:58:07] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs -   OFFSET: 0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 44%|██████████████████▎                       | 34/78 [01:08<01:17,  1.76s/it]INFO: [2024-06-25 12:58:08] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMARCC1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 45%|██████████████████▊                       | 35/78 [01:09<01:08,  1.59s/it]INFO: [2024-06-25 12:58:09] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=KPNA2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 46%|███████████████████▍                      | 36/78 [01:11<01:11,  1.69s/it]INFO: [2024-06-25 12:58:11] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=WRN with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 47%|███████████████████▉                      | 37/78 [01:13<01:11,  1.75s/it]INFO: [2024-06-25 12:58:13] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SLF2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 49%|████████████████████▍                     | 38/78 [01:13<00:59,  1.49s/it]INFO: [2024-06-25 12:58:14] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CLSPN with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 50%|█████████████████████                     | 39/78 [01:16<01:05,  1.67s/it]INFO: [2024-06-25 12:58:16] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP37 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 51%|█████████████████████▌                    | 40/78 [01:17<00:57,  1.51s/it]INFO: [2024-06-25 12:58:17] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MSH6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 53%|██████████████████████                    | 41/78 [01:19<00:59,  1.60s/it]INFO: [2024-06-25 12:58:19] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCE with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 54%|██████████████████████▌                   | 42/78 [01:19<00:50,  1.41s/it]INFO: [2024-06-25 12:58:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=UBE2T with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 55%|███████████████████████▏                  | 43/78 [01:21<00:45,  1.29s/it]INFO: [2024-06-25 12:58:21] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP10 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 56%|███████████████████████▋                  | 44/78 [01:22<00:46,  1.36s/it]INFO: [2024-06-25 12:58:23] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BLM with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 58%|████████████████████████▏                 | 45/78 [01:26<01:07,  2.03s/it]INFO: [2024-06-25 12:58:26] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM4 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 59%|████████████████████████▊                 | 46/78 [01:27<01:03,  1.97s/it]INFO: [2024-06-25 12:58:28] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMC6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 60%|█████████████████████████▎                | 47/78 [01:29<00:52,  1.70s/it]INFO: [2024-06-25 12:58:29] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BAZ1B with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 62%|█████████████████████████▊                | 48/78 [01:30<00:50,  1.68s/it]INFO: [2024-06-25 12:58:31] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BOD1L1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 63%|██████████████████████████▍               | 49/78 [01:31<00:43,  1.50s/it]INFO: [2024-06-25 12:58:32] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDC6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 64%|██████████████████████████▉               | 50/78 [01:34<00:51,  1.85s/it]INFO: [2024-06-25 12:58:35] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOP2B with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 65%|███████████████████████████▍              | 51/78 [01:35<00:44,  1.65s/it]INFO: [2024-06-25 12:58:36] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TTK with role=OBJECT and have type Phosphorylation.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 67%|████████████████████████████              | 52/78 [01:38<00:49,  1.92s/it]INFO: [2024-06-25 12:58:38] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CHAF1A with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 68%|████████████████████████████▌             | 53/78 [01:40<00:54,  2.20s/it]INFO: [2024-06-25 12:58:41] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DTL with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 69%|█████████████████████████████             | 54/78 [01:42<00:46,  1.96s/it]INFO: [2024-06-25 12:58:43] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PCLAF with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 71%|█████████████████████████████▌            | 55/78 [01:44<00:49,  2.14s/it]INFO: [2024-06-25 12:58:45] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TERF2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 72%|██████████████████████████████▏           | 56/78 [01:46<00:43,  1.97s/it]INFO: [2024-06-25 12:58:47] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDC25B with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 73%|██████████████████████████████▋           | 57/78 [01:48<00:41,  1.97s/it]INFO: [2024-06-25 12:58:49] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TICRR with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 74%|███████████████████████████████▏          | 58/78 [01:49<00:33,  1.69s/it]INFO: [2024-06-25 12:58:50] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCM with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 76%|███████████████████████████████▊          | 59/78 [01:50<00:28,  1.52s/it]INFO: [2024-06-25 12:58:51] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD18 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 77%|████████████████████████████████▎         | 60/78 [01:52<00:27,  1.54s/it]INFO: [2024-06-25 12:58:52] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MDC1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 78%|████████████████████████████████▊         | 61/78 [01:54<00:28,  1.69s/it]INFO: [2024-06-25 12:58:54] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=XRCC6 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:54] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:55] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:55] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 79%|█████████████████████████████████▍        | 62/78 [01:56<00:27,  1.72s/it]INFO: [2024-06-25 12:58:56] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PKMYT1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 81%|█████████████████████████████████▉        | 63/78 [01:57<00:23,  1.54s/it]INFO: [2024-06-25 12:58:57] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCMBP with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 82%|██████████████████████████████████▍       | 64/78 [01:58<00:20,  1.47s/it]INFO: [2024-06-25 12:58:59] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=LIG1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 83%|███████████████████████████████████       | 65/78 [01:59<00:18,  1.39s/it]INFO: [2024-06-25 12:59:00] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMARCA5 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 85%|███████████████████████████████████▌      | 66/78 [02:00<00:16,  1.35s/it]INFO: [2024-06-25 12:59:01] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RPA1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 86%|████████████████████████████████████      | 67/78 [02:03<00:17,  1.62s/it]INFO: [2024-06-25 12:59:03] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATAD5 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 87%|████████████████████████████████████▌     | 68/78 [02:04<00:14,  1.42s/it]INFO: [2024-06-25 12:59:04] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DONSON with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs - Running 0th request for statements\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 88%|█████████████████████████████████████▏    | 69/78 [02:04<00:11,  1.23s/it]INFO: [2024-06-25 12:59:05] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=NUDT5 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 90%|█████████████████████████████████████▋    | 70/78 [02:05<00:09,  1.13s/it]INFO: [2024-06-25 12:59:06] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 91%|██████████████████████████████████████▏   | 71/78 [02:07<00:08,  1.18s/it]INFO: [2024-06-25 12:59:07] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLQ with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 92%|██████████████████████████████████████▊   | 72/78 [02:08<00:07,  1.18s/it]INFO: [2024-06-25 12:59:09] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC2 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 94%|███████████████████████████████████████▎  | 73/78 [02:09<00:06,  1.29s/it]INFO: [2024-06-25 12:59:10] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PARP1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 95%|███████████████████████████████████████▊  | 74/78 [02:18<00:13,  3.38s/it]INFO: [2024-06-25 12:59:18] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SAMHD1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 96%|████████████████████████████████████████▍ | 75/78 [02:20<00:08,  2.97s/it]INFO: [2024-06-25 12:59:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOPBP1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 97%|████████████████████████████████████████▉ | 76/78 [02:21<00:04,  2.49s/it]INFO: [2024-06-25 12:59:22] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=EXO1 with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs -   OFFSET: 0\n",
+      " 99%|█████████████████████████████████████████▍| 77/78 [02:23<00:02,  2.31s/it]INFO: [2024-06-25 12:59:24] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CUL4B with role=OBJECT and have type Phosphorylation.\n",
+      "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs - Running 0th request for statements\n",
+      "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs -   LIMIT: None\n",
+      "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs -   OFFSET: 0\n",
+      "100%|██████████████████████████████████████████| 78/78 [02:24<00:00,  1.86s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tqdm\n",
+    "for gene in tqdm.tqdm(unique_genes):\n",
+    "    q = HasAgent(gene, role='OBJECT') & HasType('Phosphorylation')\n",
+    "    ip = get_statements_from_query(q)\n",
+    "    stmts_by_target[gene] = ip.statements"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "9480d8b9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{('TICRR', 'S', '599'): [],\n",
+       " ('DBF4', 'T', '345'): [],\n",
+       " ('ATR', 'T', '1989'): [Phosphorylation(ATR(), ATR(), T, 1989),\n",
+       "  Phosphorylation(KDM5A(), ATR(), T, 1989),\n",
+       "  Phosphorylation(IVNS1ABP(), ATR(), T, 1989),\n",
+       "  Phosphorylation(SSB(), ATR(), T, 1989),\n",
+       "  Phosphorylation(KDM5B(), ATR(), T, 1989),\n",
+       "  Phosphorylation(PRPF19(), ATR(), T, 1989)],\n",
+       " ('UFD1', 'S', '299'): [],\n",
+       " ('UBE2T', 'S', '184'): [],\n",
+       " ('XRCC6', 'S', '2'): [],\n",
+       " ('EXO1', 'S', '714'): [Phosphorylation(ATM(), EXO1(), S, 714),\n",
+       "  Phosphorylation(ATR(), EXO1(), S, 714)],\n",
+       " ('RAD51AP1', 'S', '19'): [],\n",
+       " ('POLR2C', 'S', '124'): [],\n",
+       " ('CDC25B', 'S', '321'): [Phosphorylation(CDK1(), CDC25B(), S, 321),\n",
+       "  Phosphorylation(MELK(), CDC25B(), S, 321),\n",
+       "  Phosphorylation(PRKCA(), CDC25B(), S, 321),\n",
+       "  Phosphorylation(CDC25B(), CDC25B(), S, 321),\n",
+       "  Phosphorylation(PRKACA(), CDC25B(), S, 321)],\n",
+       " ('BAZ1B', 'S', '330'): [],\n",
+       " ('MDC1', 'T', '455'): [Phosphorylation(CSNK2A1(), MDC1(), T, 455)],\n",
+       " ('RIF1', 'S', '2205'): [Phosphorylation(CDK1(), RIF1(), S, 2205),\n",
+       "  Phosphorylation(CHEK1(), RIF1(), S, 2205),\n",
+       "  Phosphorylation(AURKB(), RIF1(), S, 2205),\n",
+       "  Phosphorylation(ATR(), RIF1(), S, 2205)],\n",
+       " ('MCM4', 'S', '120'): [],\n",
+       " ('ORC1', 'S', '287'): [],\n",
+       " ('DTL', 'S', '697'): [],\n",
+       " ('TOP2A', 'S', '1377'): [Phosphorylation(CSNK2A1(), TOP2A(), S, 1377),\n",
+       "  Phosphorylation(CSNK2A2(), TOP2A(), S, 1377)],\n",
+       " ('DTL', 'S', '512'): [],\n",
+       " ('PARG', 'S', '68'): [],\n",
+       " ('DBF4', 'S', '508'): [],\n",
+       " ('RIF1', 'S', '1688'): [],\n",
+       " ('CHTF18', 'S', '871'): [],\n",
+       " ('UFD1', 'S', '245'): [],\n",
+       " ('RIF1', 'S', '2243'): [],\n",
+       " ('TERF2', 'S', '365'): [Phosphorylation(CDK2(), TERF2(), S, 365),\n",
+       "  Phosphorylation(MAPK1(), TERF2(), S, 365)],\n",
+       " ('BRIP1', 'S', '226'): [],\n",
+       " ('RIF1', 'S', '1454'): [],\n",
+       " ('RIF1', 'S', '782'): [],\n",
+       " ('RIF1', 'S', '1873'): [],\n",
+       " ('PARP1', 'S', '179'): [Phosphorylation(ATR(), PARP1(), S, 179)],\n",
+       " ('MDC1', 'S', '1820'): [],\n",
+       " ('LIG1', 'T', '165'): [],\n",
+       " ('MCM4', 'S', '131'): [],\n",
+       " ('FANCD2', 'S', '1435'): [],\n",
+       " ('WDHD1', 'S', '868'): [],\n",
+       " ('RAD51AP1', 'S', '294'): [],\n",
+       " ('XRCC6', 'T', '455'): [Phosphorylation(CDK2(), XRCC6(), T, 455)],\n",
+       " ('TTK', 'S', '436'): [Phosphorylation(TTK(), TTK(), S, 436),\n",
+       "  Phosphorylation(CDK2(), TTK(), S, 436),\n",
+       "  Phosphorylation(BRAF(), TTK(), S, 436)],\n",
+       " ('FANCE', 'S', '249'): [],\n",
+       " ('CDC25B', 'S', '353'): [Phosphorylation(AURKA(), CDC25B(), S, 353),\n",
+       "  Phosphorylation(MAPKAPK2(), CDC25B(), S, 353),\n",
+       "  Phosphorylation(RPS6KA1(), CDC25B(), S, 353),\n",
+       "  Phosphorylation(AURKA(kinase), CDC25B(), S, 353),\n",
+       "  Phosphorylation(PLK1(), CDC25B(), S, 353),\n",
+       "  Phosphorylation(AKT1(), CDC25B(), S, 353)],\n",
+       " ('MCM4', 'T', '110'): [Phosphorylation(CDK2(), MCM4(), T, 110),\n",
+       "  Phosphorylation(CDK1(), MCM4(), T, 110),\n",
+       "  Phosphorylation(CCNA2(bound: [CDK2, True]), MCM4(), T, 110),\n",
+       "  Phosphorylation(CDK2(kinase), MCM4(), T, 110)],\n",
+       " ('RAD51AP1', 'S', '21'): [],\n",
+       " ('ORC2', 'T', '226'): [Phosphorylation(CDK2(), ORC2(), T, 226),\n",
+       "  Phosphorylation(CCNA2(bound: [CDK2, True]), ORC2(), T, 226)],\n",
+       " ('CHTF18', 'S', '64'): [],\n",
+       " ('XRCC1', 'S', '266'): [],\n",
+       " ('PMS2', 'T', '573'): [],\n",
+       " ('TICRR', 'S', '865'): [Phosphorylation(CHEK1(), TICRR(), S, 865)],\n",
+       " ('MCM2', 'S', '41'): [Phosphorylation(CDC7(), MCM2(), S, 41),\n",
+       "  Phosphorylation(CDK2(), MCM2(), S, 41),\n",
+       "  Phosphorylation(CDK7(), MCM2(), S, 41),\n",
+       "  Phosphorylation(CSNK2A1(), MCM2(), S, 41)],\n",
+       " ('DTL', 'S', '490'): [],\n",
+       " ('MCM2', 'S', '381'): [],\n",
+       " ('TOP2A', 'S', '1504'): [],\n",
+       " ('SLF2', 'S', '710'): [],\n",
+       " ('LRWD1', 'S', '243'): [],\n",
+       " ('RNF168', 'S', '134'): [],\n",
+       " ('NSD2', 'T', '114'): [],\n",
+       " ('CLSPN', 'S', '846'): [],\n",
+       " ('MCM2', 'S', '40'): [Phosphorylation(CDC7(), MCM2(), S, 40),\n",
+       "  Phosphorylation(CDK2(), MCM2(), S, 40),\n",
+       "  Phosphorylation(CSNK2A1(), MCM2(), S, 40),\n",
+       "  Phosphorylation(CDK7(), MCM2(), S, 40)],\n",
+       " ('CDC6', 'S', '54'): [Phosphorylation(CDK2(), CDC6(), S, 54),\n",
+       "  Phosphorylation(CCNA2(bound: [CDK2, True]), CDC6(), S, 54)],\n",
+       " ('PCLAF', 'S', '72'): [],\n",
+       " ('MCM2', 'S', '139'): [Phosphorylation(CDC7(), MCM2(), S, 139),\n",
+       "  Phosphorylation(CDK7(), MCM2(), S, 139),\n",
+       "  Phosphorylation(CDK2(), MCM2(), S, 139),\n",
+       "  Phosphorylation(SIK1(), MCM2(), S, 139),\n",
+       "  Phosphorylation(CSNK2A1(), MCM2(), S, 139)],\n",
+       " ('RIF1', 'S', '2265'): [],\n",
+       " ('MPLKIP', 'S', '66'): [],\n",
+       " ('CUL4B', 'S', '180'): [],\n",
+       " ('MDC1', 'T', '1239'): [],\n",
+       " ('ERCC6L', 'S', '14'): [],\n",
+       " ('CDK1', 'T', '161'): [Phosphorylation(CDK7(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDK1(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDK2(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDK12(), CDK1(), T, 161),\n",
+       "  Phosphorylation(RGCC(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDK4(), CDK1(), T, 161),\n",
+       "  Phosphorylation(TGFBR2(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDKN1A(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CXADR(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CDKN1B(), CDK1(), T, 161),\n",
+       "  Phosphorylation(PKMYT1(), CDK1(), T, 161),\n",
+       "  Phosphorylation(MAP3K8(), CDK1(), T, 161),\n",
+       "  Phosphorylation(CCNH(), CDK1(), T, 161)],\n",
+       " ('PLK1', 'T', '210'): [Phosphorylation(AURKA(), PLK1(), T, 210),\n",
+       "  Phosphorylation(AURKB(), PLK1(), T, 210),\n",
+       "  Phosphorylation(BORA(), PLK1(), T, 210),\n",
+       "  Phosphorylation(CHEK1(), PLK1(), T, 210),\n",
+       "  Phosphorylation(PLK1(), PLK1(), T, 210),\n",
+       "  Phosphorylation(PIM1(), PLK1(), T, 210),\n",
+       "  Phosphorylation(VRK2(), PLK1(), T, 210),\n",
+       "  Phosphorylation(PPP1R12A(), PLK1(), T, 210),\n",
+       "  Phosphorylation(MAP3K8(), PLK1(), T, 210),\n",
+       "  Phosphorylation(OPTN(), PLK1(), T, 210),\n",
+       "  Phosphorylation(STK10(), PLK1(), T, 210),\n",
+       "  Phosphorylation(SLK(), PLK1(), T, 210),\n",
+       "  Phosphorylation(PDPK1(), PLK1(), T, 210),\n",
+       "  Phosphorylation(CDK2(), PLK1(), T, 210),\n",
+       "  Phosphorylation(FRY(), PLK1(), T, 210),\n",
+       "  Phosphorylation(ATR(), PLK1(), T, 210),\n",
+       "  Phosphorylation(CCNA2(), PLK1(), T, 210),\n",
+       "  Phosphorylation(AURKA(mods: (phosphorylation, T, 288)), PLK1(), T, 210),\n",
+       "  Phosphorylation(MELK(), PLK1(), T, 210)],\n",
+       " ('FANCM', 'S', '34'): [],\n",
+       " ('EXO1', 'S', '610'): [],\n",
+       " ('TOP2A', 'S', '1106'): [],\n",
+       " ('DTL', 'S', '485'): [],\n",
+       " ('CLSPN', 'S', '83'): [],\n",
+       " ('SMARCA5', 'S', '755'): [],\n",
+       " ('RECQL5', 'S', '727'): [Phosphorylation(CDK1(), RECQL5(), S, 727)],\n",
+       " ('USP1', 'S', '475'): [],\n",
+       " ('ATRIP', 'S', '224'): [Phosphorylation(CDK2(), ATRIP(), S, 224),\n",
+       "  Phosphorylation(CDK2(kinase), ATRIP(), S, 224)],\n",
+       " ('USP37', 'S', '652'): [],\n",
+       " ('RAD18', 'S', '99'): [Phosphorylation(CDK2(), RAD18(), S, 99)],\n",
+       " ('CHTF18', 'S', '225'): [],\n",
+       " ('DBF4', 'T', '553'): [],\n",
+       " ('TOP2B', 'S', '1236'): [],\n",
+       " ('KPNA2', 'S', '490'): [],\n",
+       " ('UFD1', 'S', '247'): [],\n",
+       " ('MSH6', 'S', '309'): [],\n",
+       " ('MSH6', 'S', '91'): [],\n",
+       " ('RIF1', 'S', '2393'): [],\n",
+       " ('EXO1', 'T', '475'): [],\n",
+       " ('PALB2', 'S', '781'): [],\n",
+       " ('RAD50', 'S', '635'): [Phosphorylation(ATM(), RAD50(), S, 635),\n",
+       "  Phosphorylation(ATR(), RAD50(), S, 635)],\n",
+       " ('SAMHD1', 'T', '592'): [Phosphorylation(CDK1(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(CDK2(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(CCNA2(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(IL7(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(IL2(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(CDK6(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(DBF4(), SAMHD1(), T, 592),\n",
+       "  Phosphorylation(CD4(), SAMHD1(), T, 592)],\n",
+       " ('NSD2', 'T', '544'): [],\n",
+       " ('TOP2A', 'S', '1247'): [Phosphorylation(CDK1(), TOP2A(), S, 1247),\n",
+       "  Phosphorylation(MAPK3(), TOP2A(), S, 1247),\n",
+       "  Phosphorylation(MAPK1(), TOP2A(), S, 1247),\n",
+       "  Phosphorylation(RAC1(), TOP2A(), S, 1247)],\n",
+       " ('CHAF1A', 'S', '775'): [],\n",
+       " ('USP37', 'S', '650'): [],\n",
+       " ('RIF1', 'S', '1542'): [],\n",
+       " ('CDC25B', 'S', '375'): [Phosphorylation(BRSK1(), CDC25B(), S, 375),\n",
+       "  Phosphorylation(MAPKAPK2(), CDC25B(), S, 375),\n",
+       "  Phosphorylation(PLK1(), CDC25B(), S, 375)],\n",
+       " ('MCM6', 'S', '762'): [],\n",
+       " ('ATAD5', 'S', '44'): [],\n",
+       " ('TOPBP1', 'S', '1504'): [],\n",
+       " ('ATRIP', 'S', '518'): [],\n",
+       " ('TTK', 'T', '33'): [Phosphorylation(TTK(), TTK(), T, 33),\n",
+       "  Phosphorylation(PLK1(), TTK(), T, 33)],\n",
+       " ('XRCC1', 'T', '257'): [],\n",
+       " ('CLSPN', 'S', '225'): [],\n",
+       " ('MSH6', 'S', '830'): [],\n",
+       " ('ORC6', 'T', '195'): [Phosphorylation(CDK1(), ORC6(), T, 195)],\n",
+       " ('DONSON', 'S', '34'): [],\n",
+       " ('EXO1', 'S', '598'): [],\n",
+       " ('WDHD1', 'S', '333'): [],\n",
+       " ('SMC6', 'S', '11'): [],\n",
+       " ('ATRIP', 'S', '239'): [Phosphorylation(CDK2(), ATRIP(), S, 239)],\n",
+       " ('RIF1', 'S', '1579'): [],\n",
+       " ('TTK', 'S', '281'): [Phosphorylation(BRAF(), TTK(), S, 281),\n",
+       "  Phosphorylation(BRAF(muts: (V, 600, E)), TTK(), S, 281),\n",
+       "  Phosphorylation(CDK1(), TTK(), S, 281)],\n",
+       " ('CDK1', 'T', '14'): [Phosphorylation(MYT1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(PKMYT1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(WEE1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(CDK1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(CHEK1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(IRS1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(ID1(), CDK1(), T, 14),\n",
+       "  Phosphorylation(HGF(), CDK1(), T, 14),\n",
+       "  Phosphorylation(WEE2(), CDK1(), T, 14),\n",
+       "  Phosphorylation(CHEK2(), CDK1(), T, 14),\n",
+       "  Phosphorylation(MASTL(), CDK1(), T, 14),\n",
+       "  Phosphorylation(TGFBR2(), CDK1(), T, 14)],\n",
+       " ('TICRR', 'T', '1678'): [],\n",
+       " ('TICRR', 'S', '923'): [],\n",
+       " ('ERCC6L', 'S', '1028'): [],\n",
+       " ('ORC1', 'S', '273'): [Phosphorylation(CDK2(), ORC1(), S, 273),\n",
+       "  Phosphorylation(CDK1(), ORC1(), S, 273)],\n",
+       " ('DBF4', 'S', '381'): [],\n",
+       " ('DBF4', 'S', '359'): [],\n",
+       " ('RIF1', 'S', '1616'): [],\n",
+       " ('TOP2A', 'S', '1213'): [Phosphorylation(CDK1(), TOP2A(), S, 1213),\n",
+       "  Phosphorylation(MAPK1(), TOP2A(), S, 1213),\n",
+       "  Phosphorylation(MAPK3(), TOP2A(), S, 1213),\n",
+       "  Phosphorylation(CDC7(), TOP2A(), S, 1213)],\n",
+       " ('RIF1', 'S', '2176'): [],\n",
+       " ('EXO1', 'S', '702'): [],\n",
+       " ('NSD2', 'T', '115'): [],\n",
+       " ('CDK1', 'Y', '15'): [Phosphorylation(WEE1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(MYT1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(PKMYT1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(LYN(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(BDNF(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CHEK1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(ERBB2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDC25C(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDK1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(LCK(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(WEE1(), CDK1(mods: (phosphorylation, T, 161)), Y, 15),\n",
+       "  Phosphorylation(CHEK2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(PEF1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(WEE1(), CDK1(mods: (phosphorylation, T, 161), (phosphorylation, T, 14)), Y, 15),\n",
+       "  Phosphorylation(FHL1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDKN1A(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(ATM(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(WEE2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDK2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(MMP8(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(TAP1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(MASTL(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDC25A(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(NR1D1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(NBN(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(PRTN3(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDK7(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(IRS1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(TAP2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(PRKN(mods: (modification)), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDKN1A(mods: (modification)), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDC6(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(ERBB2(mods: (modification)), CDK1(), Y, 15),\n",
+       "  Phosphorylation(DDX39A(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(FLNB(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(FHL2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CASP3(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(SRC(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(FGF2(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(TP53(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(SRC(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CAT(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CDK1(mods: (modification)), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CHEK1(mods: (modification)), CDK1(), Y, 15),\n",
+       "  Phosphorylation(SPPL2B(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(CIB1(), CDK1(), Y, 15),\n",
+       "  Phosphorylation(PTS(), CDK1(), Y, 15)],\n",
+       " ('USP10', 'S', '547'): [],\n",
+       " ('MSH6', 'S', '227'): [],\n",
+       " ('TOP2A', 'S', '1374'): [],\n",
+       " ('TOPBP1', 'S', '888'): [],\n",
+       " ('DTL', 'T', '429'): [],\n",
+       " ('TOP2A', 'S', '1525'): [Phosphorylation(CSNK2A1(), TOP2A(), S, 1525),\n",
+       "  Phosphorylation(PLK1(), TOP2A(), S, 1525),\n",
+       "  Phosphorylation(CSNK2A2(), TOP2A(), S, 1525),\n",
+       "  Phosphorylation(CDC7(), TOP2A(), S, 1525)],\n",
+       " ('RPA1', 'S', '384'): [],\n",
+       " ('MCM6', 'S', '13'): [],\n",
+       " ('PARP1', 'S', '257'): [],\n",
+       " ('USP1', 'S', '313'): [Phosphorylation(CDK1(), USP1(), S, 313)],\n",
+       " ('RFC1', 'S', '156'): [],\n",
+       " ('EXO1', 'S', '639'): [],\n",
+       " ('RIF1', 'S', '2172'): [],\n",
+       " ('BLM', 'S', '28'): [],\n",
+       " ('EXO1', 'S', '815'): [],\n",
+       " ('DTL', 'S', '679'): [],\n",
+       " ('RIF1', 'S', '2157'): [],\n",
+       " ('PKMYT1', 'S', '143'): [],\n",
+       " ('USP1', 'S', '327'): [],\n",
+       " ('WDHD1', 'S', '958'): [],\n",
+       " ('INO80B', 'T', '60'): [],\n",
+       " ('CLSPN', 'T', '1287'): [],\n",
+       " ('TOP2A', 'S', '1474'): [],\n",
+       " ('RAD51AP1', 'T', '66'): [],\n",
+       " ('RIF1', 'S', '2348'): [],\n",
+       " ('ORC1', 'S', '201'): [],\n",
+       " ('SLF2', 'T', '711'): [],\n",
+       " ('POLD3', 'T', '277'): [],\n",
+       " ('MCMBP', 'T', '160'): [],\n",
+       " ('NUDT5', 'S', '3'): [],\n",
+       " ('WRN', 'S', '1133'): [Phosphorylation(CDK1(), WRN(), S, 1133),\n",
+       "  Phosphorylation(CDK2(), WRN(), S, 1133)],\n",
+       " ('TTK', 'S', '821'): [Phosphorylation(CDK1(), TTK(), S, 821),\n",
+       "  Phosphorylation(TTK(), TTK(), S, 821),\n",
+       "  Phosphorylation(MAPK3(), TTK(), S, 821),\n",
+       "  Phosphorylation(MAPK1(), TTK(), S, 821)],\n",
+       " ('BRCA2', 'S', '93'): [],\n",
+       " ('TICRR', 'S', '1750'): [],\n",
+       " ('POLQ', 'S', '1587'): [],\n",
+       " ('MDC1', 'T', '1157'): [],\n",
+       " ('SMARCC1', 'T', '398'): [],\n",
+       " ('RAD18', 'S', '103'): [],\n",
+       " ('PARP1', 'S', '782'): [Phosphorylation(CDK5(), PARP1(), S, 782),\n",
+       "  Phosphorylation(PRKACA(), PARP1(), S, 782)],\n",
+       " ('TERF2IP', 'S', '36'): [],\n",
+       " ('EXO1', 'S', '700'): [],\n",
+       " ('NSD2', 'T', '110'): [],\n",
+       " ('EXO1', 'S', '746'): [Phosphorylation(CHEK1(), EXO1(), S, 746),\n",
+       "  Phosphorylation(PRKAA1(), EXO1(), S, 746)],\n",
+       " ('BAZ1B', 'S', '349'): [],\n",
+       " ('ERCC6L', 'S', '820'): [],\n",
+       " ('ORC2', 'S', '280'): [],\n",
+       " ('LRWD1', 'S', '259'): [],\n",
+       " ('MDC1', 'S', '453'): [Phosphorylation(CSNK2A1(), MDC1(), S, 453)],\n",
+       " ('BOD1L1', 'S', '2905'): []}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Now construct a dictionary of statements organized by specific sites making\n",
+    "# sure that phosphorylation of that specific site is described in the list\n",
+    "# of statements as values\n",
+    "stmts_by_site = {}\n",
+    "for site in valid_sites:\n",
+    "    stmts = stmts_by_target[site.gene_name]\n",
+    "    stmts = [s for s in stmts if s.enz and 'HGNC' in s.enz.db_refs]\n",
+    "    stmts = [s for s in stmts\n",
+    "             if s.residue == site.orig_res and s.position == site.orig_pos]\n",
+    "    stmts_by_site[(site.gene_name, site.orig_res, site.orig_pos)] = stmts\n",
+    "stmts_by_site"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "acc350d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(174, 42)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Print statistics of the number of sites and the number with\n",
+    "# any known annotations\n",
+    "len(stmts_by_site), len([k for k, v in stmts_by_site.items() if v])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "b22dcd50",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(('EXO1', 'S', '714'),\n",
+       "  [Phosphorylation(ATM(), EXO1(), S, 714),\n",
+       "   Phosphorylation(ATR(), EXO1(), S, 714)]),\n",
+       " (('EXO1', 'S', '610'), []),\n",
+       " (('EXO1', 'T', '475'), []),\n",
+       " (('EXO1', 'S', '598'), []),\n",
+       " (('EXO1', 'S', '702'), []),\n",
+       " (('EXO1', 'S', '639'), []),\n",
+       " (('EXO1', 'S', '815'), []),\n",
+       " (('EXO1', 'S', '700'), []),\n",
+       " (('EXO1', 'S', '746'),\n",
+       "  [Phosphorylation(CHEK1(), EXO1(), S, 746),\n",
+       "   Phosphorylation(PRKAA1(), EXO1(), S, 746)])]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Explore specific examples of site annotations\n",
+    "[s for s in stmts_by_site.items() if s[0][0] == 'EXO1']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	Unnamed: 0	gene_name	logFC	AveExpr	t	P.Value	adj.P.Val	B	qval	propMissing	...	propMissingOut	id	id.description	variableSites	accession_number	feature	gsea_rank	gsea_rank_p	causalpath_adjusted_id	prot_residue
0	NP_001269315.1_K345k_1_1_345_345	IDH1	-3.904846	-0.941172	-6.944246	8.017521e-07	0.000565	4.015043	0.000440	0.685185	...	0.583333	HRD	isocitrate dehydrogenase [NADP] cytoplasmic G...	['K345k', 'K345k']	NP_001269315.1	acetylome	-7.854981e+00	-23.803785	NP_001269315.1_K345k_1_1_345_345	IDH1_K345k_1_1_345_345
1	NP_006752.1_K142k_1_1_142_142	YWHAE	-1.019937	0.148372	-4.070577	1.611649e-04	0.049597	0.820275	0.038650	0.111111	...	0.125000	HRD	14-3-3 protein epsilon GN=YWHAE	['K142k', 'K142k']	NP_006752.1	acetylome	-1.209124e+00	-3.868344	NP_006752.1_K142k_1_1_142_142	YWHAE_K142k_1_1_142_142
2	NP_001609.2_K105k_1_1_105_105	PARP1	1.236072	-0.516186	3.981427	2.110521e-04	0.049597	0.579085	0.038650	0.092593	...	0.083333	HRD	poly [ADP-ribose] polymerase 1 GN=PARP1	['K105k', 'K105k']	NP_001609.2	acetylome	1.428431e+00	4.543317	NP_001609.2_K105k_1_1_105_105	PARP1_K105k_1_1_105_105
3	NP_001122321.1_K455k_1_1_455_455	SMARCA4	0.913719	-0.796626	3.485168	9.466467e-04	0.118682	-0.768856	0.092486	0.000000	...	0.000000	HRD	transcription activator BRG1 isoform A GN=SMA...	['K455k', 'K455k']	NP_001122321.1	acetylome	8.427154e-01	2.762915	NP_001122321.1_K455k_1_1_455_455	SMARCA4_K455k_1_1_455_455
4	NP_001609.2_K621k_1_1_621_621	PARP1	0.734708	-0.190376	3.490196	9.621225e-04	0.118682	-0.770646	0.092486	0.055556	...	0.041667	HRD	poly [ADP-ribose] polymerase 1 GN=PARP1	['K621k', 'K621k']	NP_001609.2	acetylome	6.776145e-01	2.216444	NP_001609.2_K621k_1_1_621_621	PARP1_K621k_1_1_621_621
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5388	ENSG00000097007.19	ABL1	0.002431	5.878027	0.017314	9.862142e-01	0.993643	-6.474316	0.406492	NaN	...	NaN	HRD	NaN	NaN	NaN	transcriptome	5.029325e-06	0.000015	ENSG00000097007.19	ENSG00000097007.19
5389	ENSG00000102977.17	ACD	-0.001802	2.935979	-0.012782	9.898221e-01	0.994550	-6.062690	0.406863	NaN	...	NaN	HRD	NaN	NaN	NaN	transcriptome	-2.292426e-06	-0.000008	ENSG00000102977.17	ENSG00000102977.17
5390	ENSG00000167325.15	RRM1	-0.001353	6.557619	-0.011514	9.908319e-01	0.994550	-6.503115	0.406863	NaN	...	NaN	HRD	NaN	NaN	NaN	transcriptome	-1.463072e-06	-0.000005	ENSG00000167325.15	ENSG00000167325.15
5391	ENSG00000161036.13	LRWD1	-0.001006	3.850618	-0.007013	9.944159e-01	0.996278	-6.224021	0.407570	NaN	...	NaN	HRD	NaN	NaN	NaN	transcriptome	-7.332363e-07	-0.000002	ENSG00000161036.13	ENSG00000161036.13
5392	ENSG00000127616.18	SMARCA4	0.000826	6.543572	0.004348	9.965375e-01	0.996538	-6.502125	0.407676	NaN	...	NaN	HRD	NaN	NaN	NaN	transcriptome	3.851904e-07	0.000001	ENSG00000127616.18	ENSG00000127616.18