diff --git a/notebooks/phosphoproteomics_geffen/README.md b/notebooks/phosphoproteomics_geffen/README.md
new file mode 100644
index 000000000..3d69739b0
--- /dev/null
+++ b/notebooks/phosphoproteomics_geffen/README.md
@@ -0,0 +1,3 @@
+This notebook demonstrates finding explanations for phosphorylation
+changes detected in https://www.cell.com/cell/fulltext/S0092-8674(23)00781-X#gr2
+between homologous repair deficient and non-deficient samples.
diff --git a/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb b/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb
new file mode 100644
index 000000000..df3d5520f
--- /dev/null
+++ b/notebooks/phosphoproteomics_geffen/phosphoprot_explanation.ipynb
@@ -0,0 +1,1569 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "38f360f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This is the URL for the supplementary table\n",
+ "url = 'https://www.cell.com/cms/10.1016/j.cell.2023.07.013/attachment/b342834f-0ab7-4d68-be07-66e9ba38e3df/mmc3.xlsx'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "938eb46b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "7cb32c76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Naively trying to load the table from the URL errors with 403: Forbidden\n",
+ "# sheets = pd.read_excel(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "44277396",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " gene_name | \n",
+ " logFC | \n",
+ " AveExpr | \n",
+ " t | \n",
+ " P.Value | \n",
+ " adj.P.Val | \n",
+ " B | \n",
+ " qval | \n",
+ " propMissing | \n",
+ " ... | \n",
+ " propMissingOut | \n",
+ " id | \n",
+ " id.description | \n",
+ " variableSites | \n",
+ " accession_number | \n",
+ " feature | \n",
+ " gsea_rank | \n",
+ " gsea_rank_p | \n",
+ " causalpath_adjusted_id | \n",
+ " prot_residue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NP_001269315.1_K345k_1_1_345_345 | \n",
+ " IDH1 | \n",
+ " -3.904846 | \n",
+ " -0.941172 | \n",
+ " -6.944246 | \n",
+ " 8.017521e-07 | \n",
+ " 0.000565 | \n",
+ " 4.015043 | \n",
+ " 0.000440 | \n",
+ " 0.685185 | \n",
+ " ... | \n",
+ " 0.583333 | \n",
+ " HRD | \n",
+ " isocitrate dehydrogenase [NADP] cytoplasmic G... | \n",
+ " ['K345k', 'K345k'] | \n",
+ " NP_001269315.1 | \n",
+ " acetylome | \n",
+ " -7.854981e+00 | \n",
+ " -23.803785 | \n",
+ " NP_001269315.1_K345k_1_1_345_345 | \n",
+ " IDH1_K345k_1_1_345_345 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NP_006752.1_K142k_1_1_142_142 | \n",
+ " YWHAE | \n",
+ " -1.019937 | \n",
+ " 0.148372 | \n",
+ " -4.070577 | \n",
+ " 1.611649e-04 | \n",
+ " 0.049597 | \n",
+ " 0.820275 | \n",
+ " 0.038650 | \n",
+ " 0.111111 | \n",
+ " ... | \n",
+ " 0.125000 | \n",
+ " HRD | \n",
+ " 14-3-3 protein epsilon GN=YWHAE | \n",
+ " ['K142k', 'K142k'] | \n",
+ " NP_006752.1 | \n",
+ " acetylome | \n",
+ " -1.209124e+00 | \n",
+ " -3.868344 | \n",
+ " NP_006752.1_K142k_1_1_142_142 | \n",
+ " YWHAE_K142k_1_1_142_142 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NP_001609.2_K105k_1_1_105_105 | \n",
+ " PARP1 | \n",
+ " 1.236072 | \n",
+ " -0.516186 | \n",
+ " 3.981427 | \n",
+ " 2.110521e-04 | \n",
+ " 0.049597 | \n",
+ " 0.579085 | \n",
+ " 0.038650 | \n",
+ " 0.092593 | \n",
+ " ... | \n",
+ " 0.083333 | \n",
+ " HRD | \n",
+ " poly [ADP-ribose] polymerase 1 GN=PARP1 | \n",
+ " ['K105k', 'K105k'] | \n",
+ " NP_001609.2 | \n",
+ " acetylome | \n",
+ " 1.428431e+00 | \n",
+ " 4.543317 | \n",
+ " NP_001609.2_K105k_1_1_105_105 | \n",
+ " PARP1_K105k_1_1_105_105 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NP_001122321.1_K455k_1_1_455_455 | \n",
+ " SMARCA4 | \n",
+ " 0.913719 | \n",
+ " -0.796626 | \n",
+ " 3.485168 | \n",
+ " 9.466467e-04 | \n",
+ " 0.118682 | \n",
+ " -0.768856 | \n",
+ " 0.092486 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " HRD | \n",
+ " transcription activator BRG1 isoform A GN=SMA... | \n",
+ " ['K455k', 'K455k'] | \n",
+ " NP_001122321.1 | \n",
+ " acetylome | \n",
+ " 8.427154e-01 | \n",
+ " 2.762915 | \n",
+ " NP_001122321.1_K455k_1_1_455_455 | \n",
+ " SMARCA4_K455k_1_1_455_455 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " NP_001609.2_K621k_1_1_621_621 | \n",
+ " PARP1 | \n",
+ " 0.734708 | \n",
+ " -0.190376 | \n",
+ " 3.490196 | \n",
+ " 9.621225e-04 | \n",
+ " 0.118682 | \n",
+ " -0.770646 | \n",
+ " 0.092486 | \n",
+ " 0.055556 | \n",
+ " ... | \n",
+ " 0.041667 | \n",
+ " HRD | \n",
+ " poly [ADP-ribose] polymerase 1 GN=PARP1 | \n",
+ " ['K621k', 'K621k'] | \n",
+ " NP_001609.2 | \n",
+ " acetylome | \n",
+ " 6.776145e-01 | \n",
+ " 2.216444 | \n",
+ " NP_001609.2_K621k_1_1_621_621 | \n",
+ " PARP1_K621k_1_1_621_621 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5388 | \n",
+ " ENSG00000097007.19 | \n",
+ " ABL1 | \n",
+ " 0.002431 | \n",
+ " 5.878027 | \n",
+ " 0.017314 | \n",
+ " 9.862142e-01 | \n",
+ " 0.993643 | \n",
+ " -6.474316 | \n",
+ " 0.406492 | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " HRD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " transcriptome | \n",
+ " 5.029325e-06 | \n",
+ " 0.000015 | \n",
+ " ENSG00000097007.19 | \n",
+ " ENSG00000097007.19 | \n",
+ "
\n",
+ " \n",
+ " 5389 | \n",
+ " ENSG00000102977.17 | \n",
+ " ACD | \n",
+ " -0.001802 | \n",
+ " 2.935979 | \n",
+ " -0.012782 | \n",
+ " 9.898221e-01 | \n",
+ " 0.994550 | \n",
+ " -6.062690 | \n",
+ " 0.406863 | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " HRD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " transcriptome | \n",
+ " -2.292426e-06 | \n",
+ " -0.000008 | \n",
+ " ENSG00000102977.17 | \n",
+ " ENSG00000102977.17 | \n",
+ "
\n",
+ " \n",
+ " 5390 | \n",
+ " ENSG00000167325.15 | \n",
+ " RRM1 | \n",
+ " -0.001353 | \n",
+ " 6.557619 | \n",
+ " -0.011514 | \n",
+ " 9.908319e-01 | \n",
+ " 0.994550 | \n",
+ " -6.503115 | \n",
+ " 0.406863 | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " HRD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " transcriptome | \n",
+ " -1.463072e-06 | \n",
+ " -0.000005 | \n",
+ " ENSG00000167325.15 | \n",
+ " ENSG00000167325.15 | \n",
+ "
\n",
+ " \n",
+ " 5391 | \n",
+ " ENSG00000161036.13 | \n",
+ " LRWD1 | \n",
+ " -0.001006 | \n",
+ " 3.850618 | \n",
+ " -0.007013 | \n",
+ " 9.944159e-01 | \n",
+ " 0.996278 | \n",
+ " -6.224021 | \n",
+ " 0.407570 | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " HRD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " transcriptome | \n",
+ " -7.332363e-07 | \n",
+ " -0.000002 | \n",
+ " ENSG00000161036.13 | \n",
+ " ENSG00000161036.13 | \n",
+ "
\n",
+ " \n",
+ " 5392 | \n",
+ " ENSG00000127616.18 | \n",
+ " SMARCA4 | \n",
+ " 0.000826 | \n",
+ " 6.543572 | \n",
+ " 0.004348 | \n",
+ " 9.965375e-01 | \n",
+ " 0.996538 | \n",
+ " -6.502125 | \n",
+ " 0.407676 | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " HRD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " transcriptome | \n",
+ " 3.851904e-07 | \n",
+ " 0.000001 | \n",
+ " ENSG00000127616.18 | \n",
+ " ENSG00000127616.18 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5393 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 gene_name logFC AveExpr \\\n",
+ "0 NP_001269315.1_K345k_1_1_345_345 IDH1 -3.904846 -0.941172 \n",
+ "1 NP_006752.1_K142k_1_1_142_142 YWHAE -1.019937 0.148372 \n",
+ "2 NP_001609.2_K105k_1_1_105_105 PARP1 1.236072 -0.516186 \n",
+ "3 NP_001122321.1_K455k_1_1_455_455 SMARCA4 0.913719 -0.796626 \n",
+ "4 NP_001609.2_K621k_1_1_621_621 PARP1 0.734708 -0.190376 \n",
+ "... ... ... ... ... \n",
+ "5388 ENSG00000097007.19 ABL1 0.002431 5.878027 \n",
+ "5389 ENSG00000102977.17 ACD -0.001802 2.935979 \n",
+ "5390 ENSG00000167325.15 RRM1 -0.001353 6.557619 \n",
+ "5391 ENSG00000161036.13 LRWD1 -0.001006 3.850618 \n",
+ "5392 ENSG00000127616.18 SMARCA4 0.000826 6.543572 \n",
+ "\n",
+ " t P.Value adj.P.Val B qval propMissing ... \\\n",
+ "0 -6.944246 8.017521e-07 0.000565 4.015043 0.000440 0.685185 ... \n",
+ "1 -4.070577 1.611649e-04 0.049597 0.820275 0.038650 0.111111 ... \n",
+ "2 3.981427 2.110521e-04 0.049597 0.579085 0.038650 0.092593 ... \n",
+ "3 3.485168 9.466467e-04 0.118682 -0.768856 0.092486 0.000000 ... \n",
+ "4 3.490196 9.621225e-04 0.118682 -0.770646 0.092486 0.055556 ... \n",
+ "... ... ... ... ... ... ... ... \n",
+ "5388 0.017314 9.862142e-01 0.993643 -6.474316 0.406492 NaN ... \n",
+ "5389 -0.012782 9.898221e-01 0.994550 -6.062690 0.406863 NaN ... \n",
+ "5390 -0.011514 9.908319e-01 0.994550 -6.503115 0.406863 NaN ... \n",
+ "5391 -0.007013 9.944159e-01 0.996278 -6.224021 0.407570 NaN ... \n",
+ "5392 0.004348 9.965375e-01 0.996538 -6.502125 0.407676 NaN ... \n",
+ "\n",
+ " propMissingOut id id.description \\\n",
+ "0 0.583333 HRD isocitrate dehydrogenase [NADP] cytoplasmic G... \n",
+ "1 0.125000 HRD 14-3-3 protein epsilon GN=YWHAE \n",
+ "2 0.083333 HRD poly [ADP-ribose] polymerase 1 GN=PARP1 \n",
+ "3 0.000000 HRD transcription activator BRG1 isoform A GN=SMA... \n",
+ "4 0.041667 HRD poly [ADP-ribose] polymerase 1 GN=PARP1 \n",
+ "... ... ... ... \n",
+ "5388 NaN HRD NaN \n",
+ "5389 NaN HRD NaN \n",
+ "5390 NaN HRD NaN \n",
+ "5391 NaN HRD NaN \n",
+ "5392 NaN HRD NaN \n",
+ "\n",
+ " variableSites accession_number feature gsea_rank \\\n",
+ "0 ['K345k', 'K345k'] NP_001269315.1 acetylome -7.854981e+00 \n",
+ "1 ['K142k', 'K142k'] NP_006752.1 acetylome -1.209124e+00 \n",
+ "2 ['K105k', 'K105k'] NP_001609.2 acetylome 1.428431e+00 \n",
+ "3 ['K455k', 'K455k'] NP_001122321.1 acetylome 8.427154e-01 \n",
+ "4 ['K621k', 'K621k'] NP_001609.2 acetylome 6.776145e-01 \n",
+ "... ... ... ... ... \n",
+ "5388 NaN NaN transcriptome 5.029325e-06 \n",
+ "5389 NaN NaN transcriptome -2.292426e-06 \n",
+ "5390 NaN NaN transcriptome -1.463072e-06 \n",
+ "5391 NaN NaN transcriptome -7.332363e-07 \n",
+ "5392 NaN NaN transcriptome 3.851904e-07 \n",
+ "\n",
+ " gsea_rank_p causalpath_adjusted_id prot_residue \n",
+ "0 -23.803785 NP_001269315.1_K345k_1_1_345_345 IDH1_K345k_1_1_345_345 \n",
+ "1 -3.868344 NP_006752.1_K142k_1_1_142_142 YWHAE_K142k_1_1_142_142 \n",
+ "2 4.543317 NP_001609.2_K105k_1_1_105_105 PARP1_K105k_1_1_105_105 \n",
+ "3 2.762915 NP_001122321.1_K455k_1_1_455_455 SMARCA4_K455k_1_1_455_455 \n",
+ "4 2.216444 NP_001609.2_K621k_1_1_621_621 PARP1_K621k_1_1_621_621 \n",
+ "... ... ... ... \n",
+ "5388 0.000015 ENSG00000097007.19 ENSG00000097007.19 \n",
+ "5389 -0.000008 ENSG00000102977.17 ENSG00000102977.17 \n",
+ "5390 -0.000005 ENSG00000167325.15 ENSG00000167325.15 \n",
+ "5391 -0.000002 ENSG00000161036.13 ENSG00000161036.13 \n",
+ "5392 0.000001 ENSG00000127616.18 ENSG00000127616.18 \n",
+ "\n",
+ "[5393 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load a local copy of the table and select Table 3G\n",
+ "df = pd.read_excel('mmc3.xlsx', sheet_name='Table 3G')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "5e41eac5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Filter the table to adjusted p-values less than 0.055 to retain significant results\n",
+ "df = df[df['adj.P.Val'] < 0.055]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "608b94bc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Counter({'phosphoproteome': 206,\n",
+ " 'transcriptome': 124,\n",
+ " 'phosphoproteome_res': 69,\n",
+ " 'proteome': 64,\n",
+ " 'acetylome': 3,\n",
+ " 'acetylome_res': 1})"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Look at statistics of different modification types that are significant\n",
+ "from collections import Counter\n",
+ "Counter(df.feature)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1f852e08",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{('ATAD5', 'hgnc', 'S', '44'),\n",
+ " ('ATR', 'hgnc', 'T', '1989'),\n",
+ " ('ATRIP', 'hgnc', 'S', '224'),\n",
+ " ('ATRIP', 'hgnc', 'S', '239'),\n",
+ " ('ATRIP', 'hgnc', 'S', '518'),\n",
+ " ('BAZ1B', 'hgnc', 'S', '330'),\n",
+ " ('BAZ1B', 'hgnc', 'S', '349'),\n",
+ " ('BLM', 'hgnc', 'S', '28'),\n",
+ " ('BOD1L1', 'hgnc', 'S', '2905'),\n",
+ " ('BRCA2', 'hgnc', 'S', '93'),\n",
+ " ('BRIP1', 'hgnc', 'S', '226'),\n",
+ " ('CDC25B', 'hgnc', 'S', '321'),\n",
+ " ('CDC25B', 'hgnc', 'S', '353'),\n",
+ " ('CDC25B', 'hgnc', 'S', '375'),\n",
+ " ('CDC6', 'hgnc', 'S', '54'),\n",
+ " ('CDK1', 'hgnc', 'T', '14'),\n",
+ " ('CDK1', 'hgnc', 'T', '161'),\n",
+ " ('CDK1', 'hgnc', 'Y', '15'),\n",
+ " ('CHAF1A', 'hgnc', 'S', '775'),\n",
+ " ('CHEK2', 'hgnc', 'S', '303'),\n",
+ " ('CHTF18', 'hgnc', 'S', '225'),\n",
+ " ('CHTF18', 'hgnc', 'S', '64'),\n",
+ " ('CHTF18', 'hgnc', 'S', '871'),\n",
+ " ('CLSPN', 'hgnc', 'S', '225'),\n",
+ " ('CLSPN', 'hgnc', 'S', '83'),\n",
+ " ('CLSPN', 'hgnc', 'S', '846'),\n",
+ " ('CLSPN', 'hgnc', 'T', '1287'),\n",
+ " ('CUL4B', 'hgnc', 'S', '180'),\n",
+ " ('DBF4', 'hgnc', 'S', '359'),\n",
+ " ('DBF4', 'hgnc', 'S', '381'),\n",
+ " ('DBF4', 'hgnc', 'S', '508'),\n",
+ " ('DBF4', 'hgnc', 'T', '345'),\n",
+ " ('DBF4', 'hgnc', 'T', '553'),\n",
+ " ('DONSON', 'hgnc', 'S', '34'),\n",
+ " ('DTL', 'hgnc', 'S', '485'),\n",
+ " ('DTL', 'hgnc', 'S', '490'),\n",
+ " ('DTL', 'hgnc', 'S', '512'),\n",
+ " ('DTL', 'hgnc', 'S', '679'),\n",
+ " ('DTL', 'hgnc', 'S', '697'),\n",
+ " ('DTL', 'hgnc', 'T', '429'),\n",
+ " ('ERCC6L', 'hgnc', 'S', '1028'),\n",
+ " ('ERCC6L', 'hgnc', 'S', '14'),\n",
+ " ('ERCC6L', 'hgnc', 'S', '820'),\n",
+ " ('EXO1', 'hgnc', 'S', '598'),\n",
+ " ('EXO1', 'hgnc', 'S', '610'),\n",
+ " ('EXO1', 'hgnc', 'S', '639'),\n",
+ " ('EXO1', 'hgnc', 'S', '700'),\n",
+ " ('EXO1', 'hgnc', 'S', '702'),\n",
+ " ('EXO1', 'hgnc', 'S', '714'),\n",
+ " ('EXO1', 'hgnc', 'S', '746'),\n",
+ " ('EXO1', 'hgnc', 'S', '815'),\n",
+ " ('EXO1', 'hgnc', 'T', '475'),\n",
+ " ('FANCD2', 'hgnc', 'S', '1435'),\n",
+ " ('FANCE', 'hgnc', 'S', '249'),\n",
+ " ('FANCM', 'hgnc', 'S', '34'),\n",
+ " ('INO80B', 'hgnc', 'T', '60'),\n",
+ " ('KPNA2', 'hgnc', 'S', '490'),\n",
+ " ('LIG1', 'hgnc', 'S', '881'),\n",
+ " ('LIG1', 'hgnc', 'S', '883'),\n",
+ " ('LIG1', 'hgnc', 'T', '165'),\n",
+ " ('LIG1', 'hgnc', 'T', '203'),\n",
+ " ('LRWD1', 'hgnc', 'S', '243'),\n",
+ " ('LRWD1', 'hgnc', 'S', '259'),\n",
+ " ('MBD4', 'hgnc', 'S', '422'),\n",
+ " ('MCM2', 'hgnc', 'S', '139'),\n",
+ " ('MCM2', 'hgnc', 'S', '381'),\n",
+ " ('MCM2', 'hgnc', 'S', '40'),\n",
+ " ('MCM2', 'hgnc', 'S', '41'),\n",
+ " ('MCM3', 'hgnc', 'S', '756'),\n",
+ " ('MCM3', 'hgnc', 'T', '758'),\n",
+ " ('MCM3', 'hgnc', 'T', '767'),\n",
+ " ('MCM4', 'hgnc', 'S', '120'),\n",
+ " ('MCM4', 'hgnc', 'S', '131'),\n",
+ " ('MCM4', 'hgnc', 'T', '110'),\n",
+ " ('MCM6', 'hgnc', 'S', '13'),\n",
+ " ('MCM6', 'hgnc', 'S', '762'),\n",
+ " ('MCMBP', 'hgnc', 'T', '160'),\n",
+ " ('MDC1', 'hgnc', 'S', '1820'),\n",
+ " ('MDC1', 'hgnc', 'S', '453'),\n",
+ " ('MDC1', 'hgnc', 'T', '1157'),\n",
+ " ('MDC1', 'hgnc', 'T', '1239'),\n",
+ " ('MDC1', 'hgnc', 'T', '455'),\n",
+ " ('MPLKIP', 'hgnc', 'S', '66'),\n",
+ " ('MSH6', 'hgnc', 'S', '227'),\n",
+ " ('MSH6', 'hgnc', 'S', '309'),\n",
+ " ('MSH6', 'hgnc', 'S', '830'),\n",
+ " ('MSH6', 'hgnc', 'S', '91'),\n",
+ " ('NSD2', 'hgnc', 'T', '110'),\n",
+ " ('NSD2', 'hgnc', 'T', '114'),\n",
+ " ('NSD2', 'hgnc', 'T', '115'),\n",
+ " ('NSD2', 'hgnc', 'T', '544'),\n",
+ " ('NUDT5', 'hgnc', 'S', '3'),\n",
+ " ('ORC1', 'hgnc', 'S', '201'),\n",
+ " ('ORC1', 'hgnc', 'S', '273'),\n",
+ " ('ORC1', 'hgnc', 'S', '287'),\n",
+ " ('ORC2', 'hgnc', 'S', '280'),\n",
+ " ('ORC2', 'hgnc', 'T', '226'),\n",
+ " ('ORC6', 'hgnc', 'T', '195'),\n",
+ " ('PALB2', 'hgnc', 'S', '781'),\n",
+ " ('PARG', 'hgnc', 'S', '68'),\n",
+ " ('PARP1', 'hgnc', 'S', '179'),\n",
+ " ('PARP1', 'hgnc', 'S', '257'),\n",
+ " ('PARP1', 'hgnc', 'S', '782'),\n",
+ " ('PCLAF', 'hgnc', 'S', '72'),\n",
+ " ('PKMYT1', 'hgnc', 'S', '143'),\n",
+ " ('PLK1', 'hgnc', 'T', '210'),\n",
+ " ('PMS2', 'hgnc', 'T', '573'),\n",
+ " ('POLD3', 'hgnc', 'T', '277'),\n",
+ " ('POLQ', 'hgnc', 'S', '1587'),\n",
+ " ('POLR2C', 'hgnc', 'S', '124'),\n",
+ " ('RAD18', 'hgnc', 'S', '103'),\n",
+ " ('RAD18', 'hgnc', 'S', '99'),\n",
+ " ('RAD50', 'hgnc', 'S', '635'),\n",
+ " ('RAD51AP1', 'hgnc', 'S', '19'),\n",
+ " ('RAD51AP1', 'hgnc', 'S', '21'),\n",
+ " ('RAD51AP1', 'hgnc', 'S', '294'),\n",
+ " ('RAD51AP1', 'hgnc', 'T', '66'),\n",
+ " ('RECQL5', 'hgnc', 'S', '727'),\n",
+ " ('REV1', 'hgnc', 'S', '1144'),\n",
+ " ('RFC1', 'hgnc', 'S', '156'),\n",
+ " ('RIF1', 'hgnc', 'S', '1454'),\n",
+ " ('RIF1', 'hgnc', 'S', '1542'),\n",
+ " ('RIF1', 'hgnc', 'S', '1579'),\n",
+ " ('RIF1', 'hgnc', 'S', '1616'),\n",
+ " ('RIF1', 'hgnc', 'S', '1688'),\n",
+ " ('RIF1', 'hgnc', 'S', '1873'),\n",
+ " ('RIF1', 'hgnc', 'S', '2157'),\n",
+ " ('RIF1', 'hgnc', 'S', '2172'),\n",
+ " ('RIF1', 'hgnc', 'S', '2176'),\n",
+ " ('RIF1', 'hgnc', 'S', '2205'),\n",
+ " ('RIF1', 'hgnc', 'S', '2243'),\n",
+ " ('RIF1', 'hgnc', 'S', '2265'),\n",
+ " ('RIF1', 'hgnc', 'S', '2348'),\n",
+ " ('RIF1', 'hgnc', 'S', '2393'),\n",
+ " ('RIF1', 'hgnc', 'S', '782'),\n",
+ " ('RNF168', 'hgnc', 'S', '134'),\n",
+ " ('RPA1', 'hgnc', 'S', '384'),\n",
+ " ('SAMHD1', 'hgnc', 'T', '592'),\n",
+ " ('SLF2', 'hgnc', 'S', '710'),\n",
+ " ('SLF2', 'hgnc', 'T', '711'),\n",
+ " ('SMARCA5', 'hgnc', 'S', '755'),\n",
+ " ('SMARCC1', 'hgnc', 'T', '398'),\n",
+ " ('SMC6', 'hgnc', 'S', '11'),\n",
+ " ('TERF2', 'hgnc', 'S', '365'),\n",
+ " ('TERF2IP', 'hgnc', 'S', '36'),\n",
+ " ('TICRR', 'hgnc', 'S', '1750'),\n",
+ " ('TICRR', 'hgnc', 'S', '599'),\n",
+ " ('TICRR', 'hgnc', 'S', '865'),\n",
+ " ('TICRR', 'hgnc', 'S', '923'),\n",
+ " ('TICRR', 'hgnc', 'T', '1678'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1106'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1213'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1247'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1374'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1377'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1474'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1504'),\n",
+ " ('TOP2A', 'hgnc', 'S', '1525'),\n",
+ " ('TOP2B', 'hgnc', 'S', '1236'),\n",
+ " ('TOPBP1', 'hgnc', 'S', '1504'),\n",
+ " ('TOPBP1', 'hgnc', 'S', '888'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1623'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1670'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1683'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1706'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1763'),\n",
+ " ('TP53BP1', 'hgnc', 'S', '1764'),\n",
+ " ('TTK', 'hgnc', 'S', '281'),\n",
+ " ('TTK', 'hgnc', 'S', '436'),\n",
+ " ('TTK', 'hgnc', 'S', '821'),\n",
+ " ('TTK', 'hgnc', 'T', '33'),\n",
+ " ('UBE2T', 'hgnc', 'S', '184'),\n",
+ " ('UFD1', 'hgnc', 'S', '245'),\n",
+ " ('UFD1', 'hgnc', 'S', '247'),\n",
+ " ('UFD1', 'hgnc', 'S', '299'),\n",
+ " ('USP1', 'hgnc', 'S', '313'),\n",
+ " ('USP1', 'hgnc', 'S', '327'),\n",
+ " ('USP1', 'hgnc', 'S', '475'),\n",
+ " ('USP10', 'hgnc', 'S', '547'),\n",
+ " ('USP37', 'hgnc', 'S', '650'),\n",
+ " ('USP37', 'hgnc', 'S', '652'),\n",
+ " ('WDHD1', 'hgnc', 'S', '333'),\n",
+ " ('WDHD1', 'hgnc', 'S', '868'),\n",
+ " ('WDHD1', 'hgnc', 'S', '958'),\n",
+ " ('WRN', 'hgnc', 'S', '1133'),\n",
+ " ('XRCC1', 'hgnc', 'S', '266'),\n",
+ " ('XRCC1', 'hgnc', 'T', '257'),\n",
+ " ('XRCC6', 'hgnc', 'S', '2'),\n",
+ " ('XRCC6', 'hgnc', 'T', '455')}"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Construct a list of all phosphorylation sites that have significantly\n",
+ "# increased compared to control, and represent these as tuples compatible\n",
+ "# with Protmapper (gene_name, 'hgnc', residue, position).\n",
+ "import re\n",
+ "sites = set()\n",
+ "for _, row in df.iterrows():\n",
+ " if row['feature'] == 'phosphoproteome' and row['logFC'] > 0:\n",
+ " matches = re.findall(r'([STY]\\d+)', row['variableSites'])\n",
+ " sites |= {(row['gene_name'], 'hgnc', match[0], match[1:]) for match in matches}\n",
+ "sites"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "b15177e1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Mapping sites: 0it [00:00, ?it/s]INFO: [2024-06-25 12:55:01] protmapper.uniprot_client - Loading Swissprot sequences...\n",
+ "INFO: [2024-06-25 12:55:03] protmapper.uniprot_client - Loading Uniprot isoform sequences...\n",
+ "Mapping sites: 189it [00:11, 16.96it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Use the protmapper to map the sites to human reference\n",
+ "import protmapper\n",
+ "mapped_sites = protmapper.default_mapper.map_sitelist_to_human_ref(sites)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c5904b00",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Counter({'VALID': 174, 'NO_MAPPING_FOUND': 15})"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Print counts of mapping results\n",
+ "Counter([ms.description for ms in mapped_sites])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "26a813ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Filter to valid or validly mapped sites\n",
+ "valid_sites = [ms for ms in mapped_sites if ms.valid or ms.mapped_id]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "93f16f6a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Query the INDRA DB for Phosphorylation statements whose substrate is one\n",
+ "# of the proteins whose phosphorylation appears in the site list\n",
+ "from indra.sources.indra_db_rest import get_statements_from_query\n",
+ "from indra.sources.indra_db_rest.query import HasAgent, HasType"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "7f816be1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stmts_by_target = {}\n",
+ "unique_genes = {ms.gene_name for ms in valid_sites}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "1b294e7b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 0%| | 0/78 [00:00, ?it/s]INFO: [2024-06-25 12:57:00] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CHTF18 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:00] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 1%|▌ | 1/78 [00:01<01:39, 1.29s/it]INFO: [2024-06-25 12:57:02] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RIF1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:02] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 3%|█ | 2/78 [00:03<02:01, 1.60s/it]INFO: [2024-06-25 12:57:03] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDK1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:03] indra_db_rest.request_logs - OFFSET: 0\n",
+ "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs - Running 1st request for statements\n",
+ "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:13] indra_db_rest.request_logs - OFFSET: 500\n",
+ " 4%|█▋ | 3/78 [00:14<07:28, 5.98s/it]INFO: [2024-06-25 12:57:15] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLD3 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:15] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 5%|██▏ | 4/78 [00:15<04:59, 4.05s/it]INFO: [2024-06-25 12:57:16] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PMS2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:16] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 6%|██▊ | 5/78 [00:16<03:48, 3.13s/it]INFO: [2024-06-25 12:57:17] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:17] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 8%|███▎ | 6/78 [00:19<03:22, 2.82s/it]INFO: [2024-06-25 12:57:19] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:19] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 9%|███▊ | 7/78 [00:19<02:34, 2.18s/it]INFO: [2024-06-25 12:57:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TERF2IP with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:20] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 10%|████▍ | 8/78 [00:21<02:19, 2.00s/it]INFO: [2024-06-25 12:57:22] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MPLKIP with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:22] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 12%|████▉ | 9/78 [00:22<01:56, 1.69s/it]INFO: [2024-06-25 12:57:23] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DBF4 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:23] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 13%|█████▍ | 10/78 [00:23<01:48, 1.59s/it]INFO: [2024-06-25 12:57:24] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PARG with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:24] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 14%|█████▉ | 11/78 [00:25<01:37, 1.46s/it]INFO: [2024-06-25 12:57:25] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=NSD2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:25] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 15%|██████▍ | 12/78 [00:26<01:37, 1.48s/it]INFO: [2024-06-25 12:57:27] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=UFD1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:27] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 17%|███████ | 13/78 [00:27<01:26, 1.33s/it]INFO: [2024-06-25 12:57:28] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=INO80B with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:28] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 18%|███████▌ | 14/78 [00:29<01:27, 1.36s/it]INFO: [2024-06-25 12:57:29] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCD2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:29] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 19%|████████ | 15/78 [00:31<01:44, 1.65s/it]INFO: [2024-06-25 12:57:32] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=XRCC1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:32] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 21%|████████▌ | 16/78 [00:35<02:25, 2.35s/it]INFO: [2024-06-25 12:57:36] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=LRWD1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs - Running 0th request for statements\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:36] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 22%|█████████▏ | 17/78 [00:36<01:58, 1.95s/it]INFO: [2024-06-25 12:57:37] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLR2C with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 23%|█████████▋ | 18/78 [00:37<01:37, 1.63s/it]INFO: [2024-06-25 12:57:37] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RECQL5 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:37] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 24%|██████████▏ | 19/78 [00:38<01:25, 1.46s/it]INFO: [2024-06-25 12:57:39] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=WDHD1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:39] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 26%|██████████▊ | 20/78 [00:39<01:21, 1.41s/it]INFO: [2024-06-25 12:57:40] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RNF168 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:40] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 27%|███████████▎ | 21/78 [00:40<01:17, 1.36s/it]INFO: [2024-06-25 12:57:41] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ERCC6L with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:41] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 28%|███████████▊ | 22/78 [00:41<01:12, 1.29s/it]INFO: [2024-06-25 12:57:42] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:42] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 29%|████████████▍ | 23/78 [00:43<01:16, 1.39s/it]INFO: [2024-06-25 12:57:44] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BRCA2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:44] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 31%|████████████▉ | 24/78 [00:46<01:35, 1.78s/it]INFO: [2024-06-25 12:57:46] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD51AP1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:46] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 32%|█████████████▍ | 25/78 [00:47<01:22, 1.55s/it]INFO: [2024-06-25 12:57:48] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PLK1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:48] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 33%|██████████████ | 26/78 [00:51<02:07, 2.46s/it]INFO: [2024-06-25 12:57:52] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RFC1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:52] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 35%|██████████████▌ | 27/78 [00:53<01:49, 2.15s/it]INFO: [2024-06-25 12:57:54] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATRIP with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:54] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 36%|███████████████ | 28/78 [00:54<01:33, 1.87s/it]INFO: [2024-06-25 12:57:55] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOP2A with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:55] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 37%|███████████████▌ | 29/78 [00:56<01:36, 1.97s/it]INFO: [2024-06-25 12:57:57] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATR with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:57:57] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 38%|████████████████▏ | 30/78 [01:02<02:22, 2.97s/it]INFO: [2024-06-25 12:58:02] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD50 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:02] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 40%|████████████████▋ | 31/78 [01:03<02:05, 2.66s/it]INFO: [2024-06-25 12:58:04] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PALB2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:04] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 41%|█████████████████▏ | 32/78 [01:05<01:48, 2.36s/it]INFO: [2024-06-25 12:58:06] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BRIP1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:06] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 42%|█████████████████▊ | 33/78 [01:07<01:34, 2.11s/it]INFO: [2024-06-25 12:58:07] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:07] indra_db_rest.request_logs - OFFSET: 0\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 44%|██████████████████▎ | 34/78 [01:08<01:17, 1.76s/it]INFO: [2024-06-25 12:58:08] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMARCC1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:08] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 45%|██████████████████▊ | 35/78 [01:09<01:08, 1.59s/it]INFO: [2024-06-25 12:58:09] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=KPNA2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:09] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 46%|███████████████████▍ | 36/78 [01:11<01:11, 1.69s/it]INFO: [2024-06-25 12:58:11] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=WRN with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:11] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 47%|███████████████████▉ | 37/78 [01:13<01:11, 1.75s/it]INFO: [2024-06-25 12:58:13] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SLF2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:13] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 49%|████████████████████▍ | 38/78 [01:13<00:59, 1.49s/it]INFO: [2024-06-25 12:58:14] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CLSPN with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:14] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 50%|█████████████████████ | 39/78 [01:16<01:05, 1.67s/it]INFO: [2024-06-25 12:58:16] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP37 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:16] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 51%|█████████████████████▌ | 40/78 [01:17<00:57, 1.51s/it]INFO: [2024-06-25 12:58:17] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MSH6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:17] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 53%|██████████████████████ | 41/78 [01:19<00:59, 1.60s/it]INFO: [2024-06-25 12:58:19] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCE with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:19] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 54%|██████████████████████▌ | 42/78 [01:19<00:50, 1.41s/it]INFO: [2024-06-25 12:58:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=UBE2T with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:20] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 55%|███████████████████████▏ | 43/78 [01:21<00:45, 1.29s/it]INFO: [2024-06-25 12:58:21] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=USP10 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:21] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 56%|███████████████████████▋ | 44/78 [01:22<00:46, 1.36s/it]INFO: [2024-06-25 12:58:23] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BLM with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:23] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 58%|████████████████████████▏ | 45/78 [01:26<01:07, 2.03s/it]INFO: [2024-06-25 12:58:26] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCM4 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:26] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 59%|████████████████████████▊ | 46/78 [01:27<01:03, 1.97s/it]INFO: [2024-06-25 12:58:28] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMC6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:28] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 60%|█████████████████████████▎ | 47/78 [01:29<00:52, 1.70s/it]INFO: [2024-06-25 12:58:29] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BAZ1B with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:29] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 62%|█████████████████████████▊ | 48/78 [01:30<00:50, 1.68s/it]INFO: [2024-06-25 12:58:31] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=BOD1L1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:31] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 63%|██████████████████████████▍ | 49/78 [01:31<00:43, 1.50s/it]INFO: [2024-06-25 12:58:32] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDC6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:32] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 64%|██████████████████████████▉ | 50/78 [01:34<00:51, 1.85s/it]INFO: [2024-06-25 12:58:35] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOP2B with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:35] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 65%|███████████████████████████▍ | 51/78 [01:35<00:44, 1.65s/it]INFO: [2024-06-25 12:58:36] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TTK with role=OBJECT and have type Phosphorylation.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:36] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 67%|████████████████████████████ | 52/78 [01:38<00:49, 1.92s/it]INFO: [2024-06-25 12:58:38] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CHAF1A with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:38] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 68%|████████████████████████████▌ | 53/78 [01:40<00:54, 2.20s/it]INFO: [2024-06-25 12:58:41] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DTL with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:41] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 69%|█████████████████████████████ | 54/78 [01:42<00:46, 1.96s/it]INFO: [2024-06-25 12:58:43] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PCLAF with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:43] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 71%|█████████████████████████████▌ | 55/78 [01:44<00:49, 2.14s/it]INFO: [2024-06-25 12:58:45] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TERF2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:45] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 72%|██████████████████████████████▏ | 56/78 [01:46<00:43, 1.97s/it]INFO: [2024-06-25 12:58:47] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CDC25B with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:47] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 73%|██████████████████████████████▋ | 57/78 [01:48<00:41, 1.97s/it]INFO: [2024-06-25 12:58:49] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TICRR with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:49] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 74%|███████████████████████████████▏ | 58/78 [01:49<00:33, 1.69s/it]INFO: [2024-06-25 12:58:50] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=FANCM with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:50] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 76%|███████████████████████████████▊ | 59/78 [01:50<00:28, 1.52s/it]INFO: [2024-06-25 12:58:51] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RAD18 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:51] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 77%|████████████████████████████████▎ | 60/78 [01:52<00:27, 1.54s/it]INFO: [2024-06-25 12:58:52] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MDC1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:52] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 78%|████████████████████████████████▊ | 61/78 [01:54<00:28, 1.69s/it]INFO: [2024-06-25 12:58:54] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=XRCC6 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:54] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:55] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:55] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 79%|█████████████████████████████████▍ | 62/78 [01:56<00:27, 1.72s/it]INFO: [2024-06-25 12:58:56] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PKMYT1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:56] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 81%|█████████████████████████████████▉ | 63/78 [01:57<00:23, 1.54s/it]INFO: [2024-06-25 12:58:57] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=MCMBP with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:57] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 82%|██████████████████████████████████▍ | 64/78 [01:58<00:20, 1.47s/it]INFO: [2024-06-25 12:58:59] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=LIG1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:58:59] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 83%|███████████████████████████████████ | 65/78 [01:59<00:18, 1.39s/it]INFO: [2024-06-25 12:59:00] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SMARCA5 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:00] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 85%|███████████████████████████████████▌ | 66/78 [02:00<00:16, 1.35s/it]INFO: [2024-06-25 12:59:01] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=RPA1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:01] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 86%|████████████████████████████████████ | 67/78 [02:03<00:17, 1.62s/it]INFO: [2024-06-25 12:59:03] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ATAD5 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:03] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 87%|████████████████████████████████████▌ | 68/78 [02:04<00:14, 1.42s/it]INFO: [2024-06-25 12:59:04] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=DONSON with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs - Running 0th request for statements\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:04] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 88%|█████████████████████████████████████▏ | 69/78 [02:04<00:11, 1.23s/it]INFO: [2024-06-25 12:59:05] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=NUDT5 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:05] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 90%|█████████████████████████████████████▋ | 70/78 [02:05<00:09, 1.13s/it]INFO: [2024-06-25 12:59:06] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:06] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 91%|██████████████████████████████████████▏ | 71/78 [02:07<00:08, 1.18s/it]INFO: [2024-06-25 12:59:07] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=POLQ with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:07] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 92%|██████████████████████████████████████▊ | 72/78 [02:08<00:07, 1.18s/it]INFO: [2024-06-25 12:59:09] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=ORC2 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:09] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 94%|███████████████████████████████████████▎ | 73/78 [02:09<00:06, 1.29s/it]INFO: [2024-06-25 12:59:10] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PARP1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:10] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 95%|███████████████████████████████████████▊ | 74/78 [02:18<00:13, 3.38s/it]INFO: [2024-06-25 12:59:18] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=SAMHD1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:18] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 96%|████████████████████████████████████████▍ | 75/78 [02:20<00:08, 2.97s/it]INFO: [2024-06-25 12:59:20] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=TOPBP1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:20] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 97%|████████████████████████████████████████▉ | 76/78 [02:21<00:04, 2.49s/it]INFO: [2024-06-25 12:59:22] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=EXO1 with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:22] indra_db_rest.request_logs - OFFSET: 0\n",
+ " 99%|█████████████████████████████████████████▍| 77/78 [02:23<00:02, 2.31s/it]INFO: [2024-06-25 12:59:24] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=CUL4B with role=OBJECT and have type Phosphorylation.\n",
+ "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs - Running 0th request for statements\n",
+ "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs - LIMIT: None\n",
+ "INFO: [2024-06-25 12:59:24] indra_db_rest.request_logs - OFFSET: 0\n",
+ "100%|██████████████████████████████████████████| 78/78 [02:24<00:00, 1.86s/it]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import tqdm\n",
+ "for gene in tqdm.tqdm(unique_genes):\n",
+ " q = HasAgent(gene, role='OBJECT') & HasType('Phosphorylation')\n",
+ " ip = get_statements_from_query(q)\n",
+ " stmts_by_target[gene] = ip.statements"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "9480d8b9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{('TICRR', 'S', '599'): [],\n",
+ " ('DBF4', 'T', '345'): [],\n",
+ " ('ATR', 'T', '1989'): [Phosphorylation(ATR(), ATR(), T, 1989),\n",
+ " Phosphorylation(KDM5A(), ATR(), T, 1989),\n",
+ " Phosphorylation(IVNS1ABP(), ATR(), T, 1989),\n",
+ " Phosphorylation(SSB(), ATR(), T, 1989),\n",
+ " Phosphorylation(KDM5B(), ATR(), T, 1989),\n",
+ " Phosphorylation(PRPF19(), ATR(), T, 1989)],\n",
+ " ('UFD1', 'S', '299'): [],\n",
+ " ('UBE2T', 'S', '184'): [],\n",
+ " ('XRCC6', 'S', '2'): [],\n",
+ " ('EXO1', 'S', '714'): [Phosphorylation(ATM(), EXO1(), S, 714),\n",
+ " Phosphorylation(ATR(), EXO1(), S, 714)],\n",
+ " ('RAD51AP1', 'S', '19'): [],\n",
+ " ('POLR2C', 'S', '124'): [],\n",
+ " ('CDC25B', 'S', '321'): [Phosphorylation(CDK1(), CDC25B(), S, 321),\n",
+ " Phosphorylation(MELK(), CDC25B(), S, 321),\n",
+ " Phosphorylation(PRKCA(), CDC25B(), S, 321),\n",
+ " Phosphorylation(CDC25B(), CDC25B(), S, 321),\n",
+ " Phosphorylation(PRKACA(), CDC25B(), S, 321)],\n",
+ " ('BAZ1B', 'S', '330'): [],\n",
+ " ('MDC1', 'T', '455'): [Phosphorylation(CSNK2A1(), MDC1(), T, 455)],\n",
+ " ('RIF1', 'S', '2205'): [Phosphorylation(CDK1(), RIF1(), S, 2205),\n",
+ " Phosphorylation(CHEK1(), RIF1(), S, 2205),\n",
+ " Phosphorylation(AURKB(), RIF1(), S, 2205),\n",
+ " Phosphorylation(ATR(), RIF1(), S, 2205)],\n",
+ " ('MCM4', 'S', '120'): [],\n",
+ " ('ORC1', 'S', '287'): [],\n",
+ " ('DTL', 'S', '697'): [],\n",
+ " ('TOP2A', 'S', '1377'): [Phosphorylation(CSNK2A1(), TOP2A(), S, 1377),\n",
+ " Phosphorylation(CSNK2A2(), TOP2A(), S, 1377)],\n",
+ " ('DTL', 'S', '512'): [],\n",
+ " ('PARG', 'S', '68'): [],\n",
+ " ('DBF4', 'S', '508'): [],\n",
+ " ('RIF1', 'S', '1688'): [],\n",
+ " ('CHTF18', 'S', '871'): [],\n",
+ " ('UFD1', 'S', '245'): [],\n",
+ " ('RIF1', 'S', '2243'): [],\n",
+ " ('TERF2', 'S', '365'): [Phosphorylation(CDK2(), TERF2(), S, 365),\n",
+ " Phosphorylation(MAPK1(), TERF2(), S, 365)],\n",
+ " ('BRIP1', 'S', '226'): [],\n",
+ " ('RIF1', 'S', '1454'): [],\n",
+ " ('RIF1', 'S', '782'): [],\n",
+ " ('RIF1', 'S', '1873'): [],\n",
+ " ('PARP1', 'S', '179'): [Phosphorylation(ATR(), PARP1(), S, 179)],\n",
+ " ('MDC1', 'S', '1820'): [],\n",
+ " ('LIG1', 'T', '165'): [],\n",
+ " ('MCM4', 'S', '131'): [],\n",
+ " ('FANCD2', 'S', '1435'): [],\n",
+ " ('WDHD1', 'S', '868'): [],\n",
+ " ('RAD51AP1', 'S', '294'): [],\n",
+ " ('XRCC6', 'T', '455'): [Phosphorylation(CDK2(), XRCC6(), T, 455)],\n",
+ " ('TTK', 'S', '436'): [Phosphorylation(TTK(), TTK(), S, 436),\n",
+ " Phosphorylation(CDK2(), TTK(), S, 436),\n",
+ " Phosphorylation(BRAF(), TTK(), S, 436)],\n",
+ " ('FANCE', 'S', '249'): [],\n",
+ " ('CDC25B', 'S', '353'): [Phosphorylation(AURKA(), CDC25B(), S, 353),\n",
+ " Phosphorylation(MAPKAPK2(), CDC25B(), S, 353),\n",
+ " Phosphorylation(RPS6KA1(), CDC25B(), S, 353),\n",
+ " Phosphorylation(AURKA(kinase), CDC25B(), S, 353),\n",
+ " Phosphorylation(PLK1(), CDC25B(), S, 353),\n",
+ " Phosphorylation(AKT1(), CDC25B(), S, 353)],\n",
+ " ('MCM4', 'T', '110'): [Phosphorylation(CDK2(), MCM4(), T, 110),\n",
+ " Phosphorylation(CDK1(), MCM4(), T, 110),\n",
+ " Phosphorylation(CCNA2(bound: [CDK2, True]), MCM4(), T, 110),\n",
+ " Phosphorylation(CDK2(kinase), MCM4(), T, 110)],\n",
+ " ('RAD51AP1', 'S', '21'): [],\n",
+ " ('ORC2', 'T', '226'): [Phosphorylation(CDK2(), ORC2(), T, 226),\n",
+ " Phosphorylation(CCNA2(bound: [CDK2, True]), ORC2(), T, 226)],\n",
+ " ('CHTF18', 'S', '64'): [],\n",
+ " ('XRCC1', 'S', '266'): [],\n",
+ " ('PMS2', 'T', '573'): [],\n",
+ " ('TICRR', 'S', '865'): [Phosphorylation(CHEK1(), TICRR(), S, 865)],\n",
+ " ('MCM2', 'S', '41'): [Phosphorylation(CDC7(), MCM2(), S, 41),\n",
+ " Phosphorylation(CDK2(), MCM2(), S, 41),\n",
+ " Phosphorylation(CDK7(), MCM2(), S, 41),\n",
+ " Phosphorylation(CSNK2A1(), MCM2(), S, 41)],\n",
+ " ('DTL', 'S', '490'): [],\n",
+ " ('MCM2', 'S', '381'): [],\n",
+ " ('TOP2A', 'S', '1504'): [],\n",
+ " ('SLF2', 'S', '710'): [],\n",
+ " ('LRWD1', 'S', '243'): [],\n",
+ " ('RNF168', 'S', '134'): [],\n",
+ " ('NSD2', 'T', '114'): [],\n",
+ " ('CLSPN', 'S', '846'): [],\n",
+ " ('MCM2', 'S', '40'): [Phosphorylation(CDC7(), MCM2(), S, 40),\n",
+ " Phosphorylation(CDK2(), MCM2(), S, 40),\n",
+ " Phosphorylation(CSNK2A1(), MCM2(), S, 40),\n",
+ " Phosphorylation(CDK7(), MCM2(), S, 40)],\n",
+ " ('CDC6', 'S', '54'): [Phosphorylation(CDK2(), CDC6(), S, 54),\n",
+ " Phosphorylation(CCNA2(bound: [CDK2, True]), CDC6(), S, 54)],\n",
+ " ('PCLAF', 'S', '72'): [],\n",
+ " ('MCM2', 'S', '139'): [Phosphorylation(CDC7(), MCM2(), S, 139),\n",
+ " Phosphorylation(CDK7(), MCM2(), S, 139),\n",
+ " Phosphorylation(CDK2(), MCM2(), S, 139),\n",
+ " Phosphorylation(SIK1(), MCM2(), S, 139),\n",
+ " Phosphorylation(CSNK2A1(), MCM2(), S, 139)],\n",
+ " ('RIF1', 'S', '2265'): [],\n",
+ " ('MPLKIP', 'S', '66'): [],\n",
+ " ('CUL4B', 'S', '180'): [],\n",
+ " ('MDC1', 'T', '1239'): [],\n",
+ " ('ERCC6L', 'S', '14'): [],\n",
+ " ('CDK1', 'T', '161'): [Phosphorylation(CDK7(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDK1(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDK2(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDK12(), CDK1(), T, 161),\n",
+ " Phosphorylation(RGCC(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDK4(), CDK1(), T, 161),\n",
+ " Phosphorylation(TGFBR2(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDKN1A(), CDK1(), T, 161),\n",
+ " Phosphorylation(CXADR(), CDK1(), T, 161),\n",
+ " Phosphorylation(CDKN1B(), CDK1(), T, 161),\n",
+ " Phosphorylation(PKMYT1(), CDK1(), T, 161),\n",
+ " Phosphorylation(MAP3K8(), CDK1(), T, 161),\n",
+ " Phosphorylation(CCNH(), CDK1(), T, 161)],\n",
+ " ('PLK1', 'T', '210'): [Phosphorylation(AURKA(), PLK1(), T, 210),\n",
+ " Phosphorylation(AURKB(), PLK1(), T, 210),\n",
+ " Phosphorylation(BORA(), PLK1(), T, 210),\n",
+ " Phosphorylation(CHEK1(), PLK1(), T, 210),\n",
+ " Phosphorylation(PLK1(), PLK1(), T, 210),\n",
+ " Phosphorylation(PIM1(), PLK1(), T, 210),\n",
+ " Phosphorylation(VRK2(), PLK1(), T, 210),\n",
+ " Phosphorylation(PPP1R12A(), PLK1(), T, 210),\n",
+ " Phosphorylation(MAP3K8(), PLK1(), T, 210),\n",
+ " Phosphorylation(OPTN(), PLK1(), T, 210),\n",
+ " Phosphorylation(STK10(), PLK1(), T, 210),\n",
+ " Phosphorylation(SLK(), PLK1(), T, 210),\n",
+ " Phosphorylation(PDPK1(), PLK1(), T, 210),\n",
+ " Phosphorylation(CDK2(), PLK1(), T, 210),\n",
+ " Phosphorylation(FRY(), PLK1(), T, 210),\n",
+ " Phosphorylation(ATR(), PLK1(), T, 210),\n",
+ " Phosphorylation(CCNA2(), PLK1(), T, 210),\n",
+ " Phosphorylation(AURKA(mods: (phosphorylation, T, 288)), PLK1(), T, 210),\n",
+ " Phosphorylation(MELK(), PLK1(), T, 210)],\n",
+ " ('FANCM', 'S', '34'): [],\n",
+ " ('EXO1', 'S', '610'): [],\n",
+ " ('TOP2A', 'S', '1106'): [],\n",
+ " ('DTL', 'S', '485'): [],\n",
+ " ('CLSPN', 'S', '83'): [],\n",
+ " ('SMARCA5', 'S', '755'): [],\n",
+ " ('RECQL5', 'S', '727'): [Phosphorylation(CDK1(), RECQL5(), S, 727)],\n",
+ " ('USP1', 'S', '475'): [],\n",
+ " ('ATRIP', 'S', '224'): [Phosphorylation(CDK2(), ATRIP(), S, 224),\n",
+ " Phosphorylation(CDK2(kinase), ATRIP(), S, 224)],\n",
+ " ('USP37', 'S', '652'): [],\n",
+ " ('RAD18', 'S', '99'): [Phosphorylation(CDK2(), RAD18(), S, 99)],\n",
+ " ('CHTF18', 'S', '225'): [],\n",
+ " ('DBF4', 'T', '553'): [],\n",
+ " ('TOP2B', 'S', '1236'): [],\n",
+ " ('KPNA2', 'S', '490'): [],\n",
+ " ('UFD1', 'S', '247'): [],\n",
+ " ('MSH6', 'S', '309'): [],\n",
+ " ('MSH6', 'S', '91'): [],\n",
+ " ('RIF1', 'S', '2393'): [],\n",
+ " ('EXO1', 'T', '475'): [],\n",
+ " ('PALB2', 'S', '781'): [],\n",
+ " ('RAD50', 'S', '635'): [Phosphorylation(ATM(), RAD50(), S, 635),\n",
+ " Phosphorylation(ATR(), RAD50(), S, 635)],\n",
+ " ('SAMHD1', 'T', '592'): [Phosphorylation(CDK1(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(CDK2(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(CCNA2(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(IL7(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(IL2(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(CDK6(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(DBF4(), SAMHD1(), T, 592),\n",
+ " Phosphorylation(CD4(), SAMHD1(), T, 592)],\n",
+ " ('NSD2', 'T', '544'): [],\n",
+ " ('TOP2A', 'S', '1247'): [Phosphorylation(CDK1(), TOP2A(), S, 1247),\n",
+ " Phosphorylation(MAPK3(), TOP2A(), S, 1247),\n",
+ " Phosphorylation(MAPK1(), TOP2A(), S, 1247),\n",
+ " Phosphorylation(RAC1(), TOP2A(), S, 1247)],\n",
+ " ('CHAF1A', 'S', '775'): [],\n",
+ " ('USP37', 'S', '650'): [],\n",
+ " ('RIF1', 'S', '1542'): [],\n",
+ " ('CDC25B', 'S', '375'): [Phosphorylation(BRSK1(), CDC25B(), S, 375),\n",
+ " Phosphorylation(MAPKAPK2(), CDC25B(), S, 375),\n",
+ " Phosphorylation(PLK1(), CDC25B(), S, 375)],\n",
+ " ('MCM6', 'S', '762'): [],\n",
+ " ('ATAD5', 'S', '44'): [],\n",
+ " ('TOPBP1', 'S', '1504'): [],\n",
+ " ('ATRIP', 'S', '518'): [],\n",
+ " ('TTK', 'T', '33'): [Phosphorylation(TTK(), TTK(), T, 33),\n",
+ " Phosphorylation(PLK1(), TTK(), T, 33)],\n",
+ " ('XRCC1', 'T', '257'): [],\n",
+ " ('CLSPN', 'S', '225'): [],\n",
+ " ('MSH6', 'S', '830'): [],\n",
+ " ('ORC6', 'T', '195'): [Phosphorylation(CDK1(), ORC6(), T, 195)],\n",
+ " ('DONSON', 'S', '34'): [],\n",
+ " ('EXO1', 'S', '598'): [],\n",
+ " ('WDHD1', 'S', '333'): [],\n",
+ " ('SMC6', 'S', '11'): [],\n",
+ " ('ATRIP', 'S', '239'): [Phosphorylation(CDK2(), ATRIP(), S, 239)],\n",
+ " ('RIF1', 'S', '1579'): [],\n",
+ " ('TTK', 'S', '281'): [Phosphorylation(BRAF(), TTK(), S, 281),\n",
+ " Phosphorylation(BRAF(muts: (V, 600, E)), TTK(), S, 281),\n",
+ " Phosphorylation(CDK1(), TTK(), S, 281)],\n",
+ " ('CDK1', 'T', '14'): [Phosphorylation(MYT1(), CDK1(), T, 14),\n",
+ " Phosphorylation(PKMYT1(), CDK1(), T, 14),\n",
+ " Phosphorylation(WEE1(), CDK1(), T, 14),\n",
+ " Phosphorylation(CDK1(), CDK1(), T, 14),\n",
+ " Phosphorylation(CHEK1(), CDK1(), T, 14),\n",
+ " Phosphorylation(IRS1(), CDK1(), T, 14),\n",
+ " Phosphorylation(ID1(), CDK1(), T, 14),\n",
+ " Phosphorylation(HGF(), CDK1(), T, 14),\n",
+ " Phosphorylation(WEE2(), CDK1(), T, 14),\n",
+ " Phosphorylation(CHEK2(), CDK1(), T, 14),\n",
+ " Phosphorylation(MASTL(), CDK1(), T, 14),\n",
+ " Phosphorylation(TGFBR2(), CDK1(), T, 14)],\n",
+ " ('TICRR', 'T', '1678'): [],\n",
+ " ('TICRR', 'S', '923'): [],\n",
+ " ('ERCC6L', 'S', '1028'): [],\n",
+ " ('ORC1', 'S', '273'): [Phosphorylation(CDK2(), ORC1(), S, 273),\n",
+ " Phosphorylation(CDK1(), ORC1(), S, 273)],\n",
+ " ('DBF4', 'S', '381'): [],\n",
+ " ('DBF4', 'S', '359'): [],\n",
+ " ('RIF1', 'S', '1616'): [],\n",
+ " ('TOP2A', 'S', '1213'): [Phosphorylation(CDK1(), TOP2A(), S, 1213),\n",
+ " Phosphorylation(MAPK1(), TOP2A(), S, 1213),\n",
+ " Phosphorylation(MAPK3(), TOP2A(), S, 1213),\n",
+ " Phosphorylation(CDC7(), TOP2A(), S, 1213)],\n",
+ " ('RIF1', 'S', '2176'): [],\n",
+ " ('EXO1', 'S', '702'): [],\n",
+ " ('NSD2', 'T', '115'): [],\n",
+ " ('CDK1', 'Y', '15'): [Phosphorylation(WEE1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(MYT1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(PKMYT1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(LYN(), CDK1(), Y, 15),\n",
+ " Phosphorylation(BDNF(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CHEK1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(ERBB2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDC25C(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDK1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(LCK(), CDK1(), Y, 15),\n",
+ " Phosphorylation(WEE1(), CDK1(mods: (phosphorylation, T, 161)), Y, 15),\n",
+ " Phosphorylation(CHEK2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(PEF1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(WEE1(), CDK1(mods: (phosphorylation, T, 161), (phosphorylation, T, 14)), Y, 15),\n",
+ " Phosphorylation(FHL1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDKN1A(), CDK1(), Y, 15),\n",
+ " Phosphorylation(ATM(), CDK1(), Y, 15),\n",
+ " Phosphorylation(WEE2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDK2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(MMP8(), CDK1(), Y, 15),\n",
+ " Phosphorylation(TAP1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(MASTL(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDC25A(), CDK1(), Y, 15),\n",
+ " Phosphorylation(NR1D1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(NBN(), CDK1(), Y, 15),\n",
+ " Phosphorylation(PRTN3(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDK7(), CDK1(), Y, 15),\n",
+ " Phosphorylation(IRS1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(TAP2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(PRKN(mods: (modification)), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDKN1A(mods: (modification)), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDC6(), CDK1(), Y, 15),\n",
+ " Phosphorylation(ERBB2(mods: (modification)), CDK1(), Y, 15),\n",
+ " Phosphorylation(DDX39A(), CDK1(), Y, 15),\n",
+ " Phosphorylation(FLNB(), CDK1(), Y, 15),\n",
+ " Phosphorylation(FHL2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CASP3(), CDK1(), Y, 15),\n",
+ " Phosphorylation(SRC(), CDK1(), Y, 15),\n",
+ " Phosphorylation(FGF2(), CDK1(), Y, 15),\n",
+ " Phosphorylation(TP53(), CDK1(), Y, 15),\n",
+ " Phosphorylation(SRC(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CAT(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CDK1(mods: (modification)), CDK1(), Y, 15),\n",
+ " Phosphorylation(CHEK1(mods: (modification)), CDK1(), Y, 15),\n",
+ " Phosphorylation(SPPL2B(), CDK1(), Y, 15),\n",
+ " Phosphorylation(CIB1(), CDK1(), Y, 15),\n",
+ " Phosphorylation(PTS(), CDK1(), Y, 15)],\n",
+ " ('USP10', 'S', '547'): [],\n",
+ " ('MSH6', 'S', '227'): [],\n",
+ " ('TOP2A', 'S', '1374'): [],\n",
+ " ('TOPBP1', 'S', '888'): [],\n",
+ " ('DTL', 'T', '429'): [],\n",
+ " ('TOP2A', 'S', '1525'): [Phosphorylation(CSNK2A1(), TOP2A(), S, 1525),\n",
+ " Phosphorylation(PLK1(), TOP2A(), S, 1525),\n",
+ " Phosphorylation(CSNK2A2(), TOP2A(), S, 1525),\n",
+ " Phosphorylation(CDC7(), TOP2A(), S, 1525)],\n",
+ " ('RPA1', 'S', '384'): [],\n",
+ " ('MCM6', 'S', '13'): [],\n",
+ " ('PARP1', 'S', '257'): [],\n",
+ " ('USP1', 'S', '313'): [Phosphorylation(CDK1(), USP1(), S, 313)],\n",
+ " ('RFC1', 'S', '156'): [],\n",
+ " ('EXO1', 'S', '639'): [],\n",
+ " ('RIF1', 'S', '2172'): [],\n",
+ " ('BLM', 'S', '28'): [],\n",
+ " ('EXO1', 'S', '815'): [],\n",
+ " ('DTL', 'S', '679'): [],\n",
+ " ('RIF1', 'S', '2157'): [],\n",
+ " ('PKMYT1', 'S', '143'): [],\n",
+ " ('USP1', 'S', '327'): [],\n",
+ " ('WDHD1', 'S', '958'): [],\n",
+ " ('INO80B', 'T', '60'): [],\n",
+ " ('CLSPN', 'T', '1287'): [],\n",
+ " ('TOP2A', 'S', '1474'): [],\n",
+ " ('RAD51AP1', 'T', '66'): [],\n",
+ " ('RIF1', 'S', '2348'): [],\n",
+ " ('ORC1', 'S', '201'): [],\n",
+ " ('SLF2', 'T', '711'): [],\n",
+ " ('POLD3', 'T', '277'): [],\n",
+ " ('MCMBP', 'T', '160'): [],\n",
+ " ('NUDT5', 'S', '3'): [],\n",
+ " ('WRN', 'S', '1133'): [Phosphorylation(CDK1(), WRN(), S, 1133),\n",
+ " Phosphorylation(CDK2(), WRN(), S, 1133)],\n",
+ " ('TTK', 'S', '821'): [Phosphorylation(CDK1(), TTK(), S, 821),\n",
+ " Phosphorylation(TTK(), TTK(), S, 821),\n",
+ " Phosphorylation(MAPK3(), TTK(), S, 821),\n",
+ " Phosphorylation(MAPK1(), TTK(), S, 821)],\n",
+ " ('BRCA2', 'S', '93'): [],\n",
+ " ('TICRR', 'S', '1750'): [],\n",
+ " ('POLQ', 'S', '1587'): [],\n",
+ " ('MDC1', 'T', '1157'): [],\n",
+ " ('SMARCC1', 'T', '398'): [],\n",
+ " ('RAD18', 'S', '103'): [],\n",
+ " ('PARP1', 'S', '782'): [Phosphorylation(CDK5(), PARP1(), S, 782),\n",
+ " Phosphorylation(PRKACA(), PARP1(), S, 782)],\n",
+ " ('TERF2IP', 'S', '36'): [],\n",
+ " ('EXO1', 'S', '700'): [],\n",
+ " ('NSD2', 'T', '110'): [],\n",
+ " ('EXO1', 'S', '746'): [Phosphorylation(CHEK1(), EXO1(), S, 746),\n",
+ " Phosphorylation(PRKAA1(), EXO1(), S, 746)],\n",
+ " ('BAZ1B', 'S', '349'): [],\n",
+ " ('ERCC6L', 'S', '820'): [],\n",
+ " ('ORC2', 'S', '280'): [],\n",
+ " ('LRWD1', 'S', '259'): [],\n",
+ " ('MDC1', 'S', '453'): [Phosphorylation(CSNK2A1(), MDC1(), S, 453)],\n",
+ " ('BOD1L1', 'S', '2905'): []}"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Now construct a dictionary of statements organized by specific sites making\n",
+ "# sure that phosphorylation of that specific site is described in the list\n",
+ "# of statements as values\n",
+ "stmts_by_site = {}\n",
+ "for site in valid_sites:\n",
+ " stmts = stmts_by_target[site.gene_name]\n",
+ " stmts = [s for s in stmts if s.enz and 'HGNC' in s.enz.db_refs]\n",
+ " stmts = [s for s in stmts\n",
+ " if s.residue == site.orig_res and s.position == site.orig_pos]\n",
+ " stmts_by_site[(site.gene_name, site.orig_res, site.orig_pos)] = stmts\n",
+ "stmts_by_site"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "acc350d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(174, 42)"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Print statistics of the number of sites and the number with\n",
+ "# any known annotations\n",
+ "len(stmts_by_site), len([k for k, v in stmts_by_site.items() if v])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "b22dcd50",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(('EXO1', 'S', '714'),\n",
+ " [Phosphorylation(ATM(), EXO1(), S, 714),\n",
+ " Phosphorylation(ATR(), EXO1(), S, 714)]),\n",
+ " (('EXO1', 'S', '610'), []),\n",
+ " (('EXO1', 'T', '475'), []),\n",
+ " (('EXO1', 'S', '598'), []),\n",
+ " (('EXO1', 'S', '702'), []),\n",
+ " (('EXO1', 'S', '639'), []),\n",
+ " (('EXO1', 'S', '815'), []),\n",
+ " (('EXO1', 'S', '700'), []),\n",
+ " (('EXO1', 'S', '746'),\n",
+ " [Phosphorylation(CHEK1(), EXO1(), S, 746),\n",
+ " Phosphorylation(PRKAA1(), EXO1(), S, 746)])]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Explore specific examples of site annotations\n",
+ "[s for s in stmts_by_site.items() if s[0][0] == 'EXO1']"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}