From a11723e34ef225347b7fb059c44412f8f06cc18d Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 7 Jun 2024 16:14:28 -0700 Subject: [PATCH 01/23] Remove constraint on flask-jwt-extended --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ea0581ea2..b56af24b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,7 @@ web = flask-restx flask-wtf bootstrap-flask>=2.0.0 - flask_jwt_extended<4.0.0 + flask_jwt_extended docstring-parser indralab_auth_tools @ git+https://github.com/indralab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools pusher From a3f4af80ab1365c7956681f9b6058103aab89fc1 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 7 Jun 2024 16:18:36 -0700 Subject: [PATCH 02/23] Update flask_jwt_extended decorator --- .../apps/curator/curator_blueprint.py | 34 +++++++++---------- src/indra_cogex/apps/data_display/__init__.py | 8 ++--- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/indra_cogex/apps/curator/curator_blueprint.py b/src/indra_cogex/apps/curator/curator_blueprint.py index 6b5725c7f..37d8df69b 100644 --- a/src/indra_cogex/apps/curator/curator_blueprint.py +++ b/src/indra_cogex/apps/curator/curator_blueprint.py @@ -6,7 +6,7 @@ import flask from flask import Response, abort, redirect, render_template, url_for -from flask_jwt_extended import jwt_optional +from flask_jwt_extended import jwt_required from flask_wtf import FlaskForm from indra.statements import Statement from wtforms import BooleanField, StringField, SubmitField, TextAreaField @@ -81,7 +81,7 @@ def gene_ontology(): @curator_blueprint.route("/go/", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def curate_go(term: str): stmts = indra_subnetwork_go( go_term=("GO", term), @@ -163,7 +163,7 @@ def mesh(): @curator_blueprint.route("/mesh/", methods=["GET"]) @curator_blueprint.route("/mesh//", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def curate_mesh(term: str, subset: Optional[str] = None): """Curate all statements for papers with a given MeSH annotation.""" if subset is None: @@ -311,7 +311,7 @@ def _render_evidence_counts( @curator_blueprint.route("/ppi", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def ppi(): """The PPI curator looks for the highest evidences for PPIs that don't appear in a database.""" return _render_func( @@ -329,7 +329,7 @@ def ppi(): @curator_blueprint.route("/goa", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def goa(): """The GO Annotation curator looks for the highest evidence gene-GO term relations that don't appear in GOA.""" return _render_func( @@ -348,7 +348,7 @@ def goa(): @curator_blueprint.route("/conflicts", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def conflicts(): """Curate statements with conflicting prior curations.""" return _render_func( @@ -363,7 +363,7 @@ def conflicts(): @curator_blueprint.route("/tf", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def tf(): """Curate transcription factors.""" return _render_func( @@ -380,7 +380,7 @@ def tf(): @curator_blueprint.route("/kinase", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def kinase(): """Curate kinases.""" return _render_func( @@ -397,7 +397,7 @@ def kinase(): @curator_blueprint.route("/phosphatase", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def phosphatase(): """Curate phosphatases.""" return _render_func( @@ -414,7 +414,7 @@ def phosphatase(): @curator_blueprint.route("/dub", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def deubiquitinase(): """Curate deubiquitinases.""" return _render_func( @@ -431,7 +431,7 @@ def deubiquitinase(): @curator_blueprint.route("/mirna", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def mirna(): """Curate miRNAs.""" return _render_func( @@ -449,7 +449,7 @@ def mirna(): @curator_blueprint.route("/disprot", methods=["GET"]) @curator_blueprint.route("/disprot/", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def disprot(object_prefix: Optional[str] = None): """Curate intrensically disordered proteins.""" assert object_prefix in {None, "hgnc", "go", "chebi"} @@ -466,14 +466,14 @@ def disprot(object_prefix: Optional[str] = None): @curator_blueprint.route("/modulator/", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def modulator(): """Get small molecule modulators for the given protein.""" raise NotImplementedError @curator_blueprint.route("/entity/:", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def entity(prefix: str, identifier: str): """Get all statements about the given entity.""" if prefix in {"pubmed", "pmc", "doi", "trid"}: @@ -548,7 +548,7 @@ def get_term(self) -> Tuple[str, str]: @curator_blueprint.route("/paper", methods=["GET", "POST"]) -@jwt_optional +@jwt_required(optional=True) def paper(): """Get all statements for the given paper.""" form = PaperForm() @@ -622,7 +622,7 @@ def get_nodes(self) -> List[Tuple[str, str]]: @curator_blueprint.route("/subnetwork", methods=["GET", "POST"]) -@jwt_optional +@jwt_required(optional=True) def subnetwork(): """Get all statements induced by the nodes.""" form = NodesForm() @@ -659,7 +659,7 @@ def subnetwork(): @curator_blueprint.route("/statement/", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def curate_statement(stmt_hash: int): """Curate all evidences for the statement.""" start_time = time.time() diff --git a/src/indra_cogex/apps/data_display/__init__.py b/src/indra_cogex/apps/data_display/__init__.py index 5b7a3c4e9..f07e1342f 100644 --- a/src/indra_cogex/apps/data_display/__init__.py +++ b/src/indra_cogex/apps/data_display/__init__.py @@ -14,7 +14,7 @@ import requests from flask import Blueprint, Response, abort, jsonify, render_template, request -from flask_jwt_extended import jwt_optional +from flask_jwt_extended import jwt_required from indra.assemblers.english import EnglishAssembler from indra.sources.indra_db_rest import IndraDBRestAPIError @@ -223,7 +223,7 @@ def biolookup(curie): # Endpoint for getting evidence @data_display_blueprint.route("/expand/", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def get_evidence(stmt_hash): try: # Todo: @@ -297,7 +297,7 @@ def get_evidence(stmt_hash): # Serve the statement display template @data_display_blueprint.route("/statement_display", methods=["GET"]) -@jwt_optional +@jwt_required(optional=True) def statement_display(): user, roles = resolve_auth(dict(request.args)) email = user.email if user else "" @@ -400,7 +400,7 @@ def _get_user(): @data_display_blueprint.route("/curate/", methods=["POST"]) -@jwt_optional +@jwt_required(optional=True) def submit_curation_endpoint(hash_val: str): email = _get_user() if not isinstance(email, str): From 853ac003f48fd1e6e099201bc1bf50cfa5572282 Mon Sep 17 00:00:00 2001 From: kkaris Date: Thu, 27 Jun 2024 11:28:55 -0700 Subject: [PATCH 03/23] Temporarily refer to branch for ui_util install --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index b56af24b5..09b0e49e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,7 +72,7 @@ web = bootstrap-flask>=2.0.0 flask_jwt_extended docstring-parser - indralab_auth_tools @ git+https://github.com/indralab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools + indralab_auth_tools @ git+https://github.com/indralab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools pusher markupsafe<2.1.0 gunicorn = From cf3859dc36f94389a97decc5536453b0dfe202c5 Mon Sep 17 00:00:00 2001 From: kkaris Date: Thu, 27 Jun 2024 13:10:57 -0700 Subject: [PATCH 04/23] Temporarily refer to branch for indra install as well --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 09b0e49e6..e1f8bba82 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ keywords = [options] install_requires = - indra @ git+https://github.com/sorgerlab/indra.git + indra @ git+https://github.com/sorgerlab/indra.git@gilda-flask-deps neo4j click more_click>=0.1.1 From 3105c988b83fd8a14137d51995bb76f315d7a4ce Mon Sep 17 00:00:00 2001 From: kkaris Date: Thu, 27 Jun 2024 15:15:04 -0700 Subject: [PATCH 05/23] Fix pydantic dependency --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e1f8bba82..54013a2aa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,7 +48,7 @@ install_requires = statsmodels pandas python-dateutil - pydantic<2.0.0 + pydantic>=1.10,<2.0.0 include_package_data = True python_requires = >=3.7 From 9ac0cef290344c9fc27f4c6be4b552c33e6c9fdd Mon Sep 17 00:00:00 2001 From: kkaris Date: Thu, 27 Jun 2024 17:06:29 -0700 Subject: [PATCH 06/23] Force update of Flask-JWT-Extended and Flask-WTF --- docker/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 482ad21a7..31a524847 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -8,6 +8,8 @@ ARG RGD_VERSION RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] && \ python -m indra.ontology.bio build +RUN pip3 install -U Flask-JWT-Extended && \ + pip3 install -U Flask-WTF # Copy the names.tsv files for the pre-built pyobo caches to this directory as # mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the From b172b406fd740aed91d4f85c622bce651f092d77 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 14:09:59 -0700 Subject: [PATCH 07/23] Also restrict SQLAlchemy --- docker/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 31a524847..c8cda2239 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,8 @@ ARG RGD_VERSION RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] && \ python -m indra.ontology.bio build RUN pip3 install -U Flask-JWT-Extended && \ - pip3 install -U Flask-WTF + pip3 install -U Flask-WTF && \ + pip3 install -U 'SQLAlchemy<2' # Copy the names.tsv files for the pre-built pyobo caches to this directory as # mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the From 95c6833880e9f6ba9a81a3019baf46508b3a994d Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 15:07:16 -0700 Subject: [PATCH 08/23] Working new Dockerfile --- docker/Dockerfile | 65 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index c8cda2239..12d0dbab5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM labsyspharm/indra +FROM ubuntu:20.04 ARG INDRA_NEO4J_URL ARG INDRA_NEO4J_USER @@ -6,8 +6,67 @@ ARG INDRA_NEO4J_PASSWORD ARG MGI_VERSION ARG RGD_VERSION -RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] && \ - python -m indra.ontology.bio build +# Setup copied and slightly modified from indra_docker Dockerfile +ARG INDRA_BUILD_BRANCH + +# Set working folder +ENV DIRPATH /sw +WORKDIR $DIRPATH + +RUN apt-get update && \ + # Install Java + apt-get install -y openjdk-8-jdk && \ + # jnius-indra requires cython which requires gcc + apt-get install -y git wget zip unzip bzip2 gcc graphviz graphviz-dev \ + pkg-config python3-dev python3-pip && \ + ln -s /usr/bin/python3 /usr/bin/python + +# Set default character encoding +# See http://stackoverflow.com/questions/27931668/encoding-problems-when-running-an-app-in-docker-python-java-ruby-with-u/27931669 +# See http://stackoverflow.com/questions/39760663/docker-ubuntu-bin-sh-1-locale-gen-not-found +RUN apt-get install -y locales && \ + locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +# Set environment variables +ENV BNGPATH $DIRPATH/BioNetGen-2.4.0 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 + +RUN git clone https://github.com/sorgerlab/indra.git && \ + cd indra && \ + git checkout $INDRA_BUILD_BRANCH && \ + git branch && \ + mkdir /root/.config && \ + mkdir /root/.config/indra && \ + echo "[indra]" > /root/.config/indra/config.ini && \ + # Install Python dependencies + pip install --upgrade pip && \ + # Install cython first for pyjnius + pip install cython && \ + pip install -e .[all] && \ + pip uninstall -y enum34 && \ + # Pre-build the bio ontology + python -m indra.ontology.bio build && \ + # Download Adeft models + python -m adeft.download && \ + # Download protmapper resources + python -m protmapper.resources && \ + # Install BioNetGen + cd $DIRPATH && \ + wget "https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-2.4.0/BioNetGen-2.4.0-Linux.tgz" \ + -O bionetgen.tar.gz -nv && \ + tar xzf bionetgen.tar.gz && \ + # Install things related to API deployment + pip install gunicorn + + +# CoGEx specific setup +RUN pip3 install git+https://github.com/bgyori/indra_cogex.git@flask3#egg=indra_cogex[web,gunicorn,gsea] + +# todo: Figure out how to get the installs working in setup.cfg instead of haiving to do this +RUN pip3 install --upgrade --force-reinstall git+https://github.com/gyorilab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools RUN pip3 install -U Flask-JWT-Extended && \ pip3 install -U Flask-WTF && \ pip3 install -U 'SQLAlchemy<2' From ac93f00531ceb5f03fa0b7e86dd8acfcaf285212 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 15:42:34 -0700 Subject: [PATCH 09/23] Reference gyorilab instead of indralab --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 54013a2aa..949d788d9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,7 +72,7 @@ web = bootstrap-flask>=2.0.0 flask_jwt_extended docstring-parser - indralab_auth_tools @ git+https://github.com/indralab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools + indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools pusher markupsafe<2.1.0 gunicorn = From 40edfc5ec60c03939edc4b4592fb34149c0f802c Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 16:04:35 -0700 Subject: [PATCH 10/23] Rely on configs to install correct packages --- docker/Dockerfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 12d0dbab5..a8e1d80a7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -65,12 +65,6 @@ RUN git clone https://github.com/sorgerlab/indra.git && \ # CoGEx specific setup RUN pip3 install git+https://github.com/bgyori/indra_cogex.git@flask3#egg=indra_cogex[web,gunicorn,gsea] -# todo: Figure out how to get the installs working in setup.cfg instead of haiving to do this -RUN pip3 install --upgrade --force-reinstall git+https://github.com/gyorilab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools -RUN pip3 install -U Flask-JWT-Extended && \ - pip3 install -U Flask-WTF && \ - pip3 install -U 'SQLAlchemy<2' - # Copy the names.tsv files for the pre-built pyobo caches to this directory as # mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the # full cache build process for these resources (which can take hours). From 05b51b53a4a813b6e99b374921793fa92e0e9ad0 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 16:22:16 -0700 Subject: [PATCH 11/23] Try removing markupsafe pin --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 949d788d9..bb0dbf99c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -74,7 +74,7 @@ web = docstring-parser indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools pusher - markupsafe<2.1.0 + markupsafe gunicorn = gunicorn gsea = From e072917c5c295779a7b9d455cace7e77aa5fbdb2 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 16:52:34 -0700 Subject: [PATCH 12/23] Put file copy in the beginning --- docker/Dockerfile | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a8e1d80a7..8e535fa3c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,6 +6,14 @@ ARG INDRA_NEO4J_PASSWORD ARG MGI_VERSION ARG RGD_VERSION +# Copy the names.tsv files for the pre-built pyobo caches to this directory as +# mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the +# full cache build process for these resources (which can take hours). +RUN mkdir -p /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/ && \ + mkdir -p /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/ +COPY mgi_names.tsv /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/names.tsv +COPY rgd_names.tsv /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/names.tsv + # Setup copied and slightly modified from indra_docker Dockerfile ARG INDRA_BUILD_BRANCH @@ -61,18 +69,9 @@ RUN git clone https://github.com/sorgerlab/indra.git && \ # Install things related to API deployment pip install gunicorn - # CoGEx specific setup RUN pip3 install git+https://github.com/bgyori/indra_cogex.git@flask3#egg=indra_cogex[web,gunicorn,gsea] -# Copy the names.tsv files for the pre-built pyobo caches to this directory as -# mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the -# full cache build process for these resources (which can take hours). -RUN mkdir -p /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/ && \ - mkdir -p /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/ -COPY mgi_names.tsv /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/names.tsv -COPY rgd_names.tsv /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/names.tsv - # Force rebuild the pickle caches for the GSEA and MSEA apps RUN python -m indra_cogex.client.enrichment.utils --force From 414075fea27ecbd3b14c47cdc4cd5a14f64c74c5 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 28 Jun 2024 17:03:39 -0700 Subject: [PATCH 13/23] Try removing pydantic --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bb0dbf99c..941afdd7f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,7 +48,6 @@ install_requires = statsmodels pandas python-dateutil - pydantic>=1.10,<2.0.0 include_package_data = True python_requires = >=3.7 From 2420565b44a8b58d1439437c7fb4d7085b0b3a1b Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 1 Jul 2024 11:27:45 -0700 Subject: [PATCH 14/23] Build from Ubuntu 22 to use python 3.10 --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8e535fa3c..a02ce5b79 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 ARG INDRA_NEO4J_URL ARG INDRA_NEO4J_USER From ee4ada508d44f5cf54cd086e7d82ecd86b6f7f8e Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 1 Jul 2024 14:20:24 -0700 Subject: [PATCH 15/23] Skip python3-dev install --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a02ce5b79..89a4b1dc8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,7 +26,7 @@ RUN apt-get update && \ apt-get install -y openjdk-8-jdk && \ # jnius-indra requires cython which requires gcc apt-get install -y git wget zip unzip bzip2 gcc graphviz graphviz-dev \ - pkg-config python3-dev python3-pip && \ + pkg-config python3-pip && \ ln -s /usr/bin/python3 /usr/bin/python # Set default character encoding From ad4861971d7e38057438437e29eb0eef47ae7036 Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 1 Jul 2024 14:52:23 -0700 Subject: [PATCH 16/23] Revert "Build from Ubuntu 22 to use python 3.10" This reverts commit 2420565b44a8b58d1439437c7fb4d7085b0b3a1b. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 89a4b1dc8..d14a929de 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM ubuntu:20.04 ARG INDRA_NEO4J_URL ARG INDRA_NEO4J_USER From 324bcea48490462788c235702b1b0565e9b685de Mon Sep 17 00:00:00 2001 From: kkaris Date: Tue, 2 Jul 2024 13:49:37 -0700 Subject: [PATCH 17/23] Restore installs to non-branch --- docker/Dockerfile | 2 +- setup.cfg | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d14a929de..9cee9bf05 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -70,7 +70,7 @@ RUN git clone https://github.com/sorgerlab/indra.git && \ pip install gunicorn # CoGEx specific setup -RUN pip3 install git+https://github.com/bgyori/indra_cogex.git@flask3#egg=indra_cogex[web,gunicorn,gsea] +RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] # Force rebuild the pickle caches for the GSEA and MSEA apps RUN python -m indra_cogex.client.enrichment.utils --force diff --git a/setup.cfg b/setup.cfg index 941afdd7f..dff2e2e2e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ keywords = [options] install_requires = - indra @ git+https://github.com/sorgerlab/indra.git@gilda-flask-deps + indra @ git+https://github.com/sorgerlab/indra.git neo4j click more_click>=0.1.1 @@ -71,7 +71,7 @@ web = bootstrap-flask>=2.0.0 flask_jwt_extended docstring-parser - indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git@flask3#egg=indralab_auth_tools&subdirectory=indralab_auth_tools + indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools pusher markupsafe gunicorn = From d44347c6572b5593bda25b7bc2cf27b702416b36 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 19 Jul 2024 21:41:04 -0400 Subject: [PATCH 18/23] Add optional source/target name for Relation --- src/indra_cogex/client/neo4j_client.py | 5 ++++- src/indra_cogex/representation.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/indra_cogex/client/neo4j_client.py b/src/indra_cogex/client/neo4j_client.py index 5a3f22f1d..a03ccd4b2 100644 --- a/src/indra_cogex/client/neo4j_client.py +++ b/src/indra_cogex/client/neo4j_client.py @@ -868,8 +868,11 @@ def neo4j_to_relations(neo4j_path: neo4j.graph.Path) -> List[Relation]: rel_type = neo4j_relation.type props = dict(neo4j_relation) source_ns, source_id = process_identifier(neo4j_relation.start_node["id"]) + source_name = neo4j_relation.start_node.get("name") target_ns, target_id = process_identifier(neo4j_relation.end_node["id"]) - rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props) + target_name = neo4j_relation.end_node.get("name") + rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props, + source_name=source_name, target_name=target_name) relations.append(rel) return relations diff --git a/src/indra_cogex/representation.py b/src/indra_cogex/representation.py index 7e3e76786..2cb3a621b 100644 --- a/src/indra_cogex/representation.py +++ b/src/indra_cogex/representation.py @@ -164,6 +164,8 @@ def __init__( target_id: str, rel_type: str, data: Optional[Mapping[str, Any]] = None, + source_name: Optional[str] = None, + target_name: Optional[str] = None, ): """Initialize the relation. @@ -181,6 +183,10 @@ def __init__( The type of relation. data : An optional data dictionary associated with the relation. + source_name : + An optional name for the source node. + target_name : + An optional name for the target node. """ self.source_ns = source_ns self.source_id = source_id @@ -188,6 +194,8 @@ def __init__( self.target_id = target_id self.rel_type = rel_type self.data = data if data else {} + self.source_name = source_name + self.target_name = target_name def to_json(self) -> RelJson: """Serialize the relation to JSON format. @@ -204,6 +212,8 @@ def to_json(self) -> RelJson: "target_id": self.target_id, "rel_type": self.rel_type, "data": self.data, + "source_name": self.source_name, + "target_name": self.target_name, } def __str__(self): # noqa:D105 From 6500fe1bdaac88eb17bfde0da8d2c35ba6fab496 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 19 Jul 2024 22:02:48 -0400 Subject: [PATCH 19/23] Add test for names in relations --- tests/test_neo4j_client.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_neo4j_client.py b/tests/test_neo4j_client.py index 371dea0d0..b74f55cb1 100644 --- a/tests/test_neo4j_client.py +++ b/tests/test_neo4j_client.py @@ -59,3 +59,16 @@ def test_process_identifier(): assert process_identifier("hgnc:6871") == ("HGNC", "6871") assert process_identifier("chebi:1234") == ("CHEBI", "CHEBI:1234") assert process_identifier("uploc:SL-0086") == ("UPLOC", "SL-0086") + + +@pytest.mark.nonpublic +def test_get_source_relations(): + nc = _get_client() + relations = nc.get_source_relations( + target=("HGNC", "9875"), + relation="indra_rel", + source_type='BioEntity', + target_type='BioEntity', + ) + + assert relations[0].target_name == "RASGRF1" From cb0710aa934d7a138bb78f8d5e8291dfd398d211 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 21 Jul 2024 20:03:51 -0400 Subject: [PATCH 20/23] Implement DepMap source --- README.md | 1 + src/indra_cogex/sources/depmap/__init__.py | 82 ++++++++++++++++++++++ src/indra_cogex/sources/depmap/__main__.py | 5 ++ 3 files changed, 88 insertions(+) create mode 100644 src/indra_cogex/sources/depmap/__init__.py create mode 100644 src/indra_cogex/sources/depmap/__main__.py diff --git a/README.md b/README.md index 724a47b69..df5828a1e 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ non-causal contextual relations including properties, ontology, and data. | [PubMed](https://ncbi.nlm.nih.gov/) | published_in | The publication represented by the source is published in the journal represented by the target. | | [CellMarker](http://xteam.xbio.top/) | has_marker | The cell type represented by the source node has the gene marker represented by the target node. | | [InterPro](https://www.ebi.ac.uk/interpro/) | has_domain | The gene represented by the source node has a protein domain represented by the target node. | +| [DepMap](https://depmap.org/portal/) | codependent_with | The gene represented by the source node is codependent with the gene represented by the target node in cancer cell lines. | ## Installation diff --git a/src/indra_cogex/sources/depmap/__init__.py b/src/indra_cogex/sources/depmap/__init__.py new file mode 100644 index 000000000..25c2aac8f --- /dev/null +++ b/src/indra_cogex/sources/depmap/__init__.py @@ -0,0 +1,82 @@ +"""Process DepMap, a resource for gene-gene dependencies in cancer cell lines.""" + +import logging +import pickle +import tqdm +from collections import defaultdict + +import click +import pystow +from indra.databases import hgnc_client + +from indra_cogex.representation import Node, Relation +from indra_cogex.sources.processor import Processor + + +__all__ = [ + "DepmapProcessor", +] + +logger = logging.getLogger(__name__) + +SUBMODULE = pystow.module("indra", "cogex", "depmap") + +DEPMAP_SIGS = pystow.join('depmap_analysis', 'depmap', '21q2', + name='dep_stouffer_signif.pkl') + + +def load_sigs(): + with open(DEPMAP_SIGS, 'rb') as f: + df = pickle.load(f) + sig_by_gene = defaultdict(dict) + for row in tqdm.tqdm(df.itertuples(), total=len(df), + desc='Processing DepMap significant pairs'): + a, b = sorted(row.Index) + a_hgnc_id = hgnc_client.get_current_hgnc_id(a) + b_hgnc_id = hgnc_client.get_current_hgnc_id(b) + if a_hgnc_id is None or b_hgnc_id is None: + continue + if isinstance(a_hgnc_id, list): + a_hgnc_id = a_hgnc_id[0] + if isinstance(b_hgnc_id, list): + b_hgnc_id = b_hgnc_id[0] + a_current = hgnc_client.get_hgnc_name(a_hgnc_id) + b_current = hgnc_client.get_hgnc_name(b_hgnc_id) + sig_by_gene[(a_current, a_hgnc_id)][(b_current, b_hgnc_id)] = row.logp + sigs_by_gene = dict(sig_by_gene) + return sigs_by_gene + + +class DepmapProcessor(Processor): + """Processor for the DepMap dataset.""" + + name = "depmap" + node_types = ["BioEntity"] + depmap_relation = "codependent_with" + + def __init__(self): + """Initialize the DisGeNet processor.""" + self.sigs_by_gene = load_sigs() + + def get_nodes(self): # noqa:D102 + all_genes = set(self.sigs_by_gene) + for genes in self.sigs_by_gene.values(): + all_genes |= set(genes) + + for gene_name, hgnc_id in all_genes: + yield Node(db_ns="HGNC", db_id=hgnc_id, labels=["BioEntity"], + data={'name': gene_name}) + + def get_relations(self): # noqa:D102 + for (a, a_hgnc_id), genes in \ + tqdm.tqdm(self.sigs_by_gene.items(), + desc='Processing DepMap into relations'): + for (b, b_hgnc_id), logp in genes.items(): + yield Relation( + source_ns="HGNC", + source_id=a_hgnc_id, + target_ns="HGNC", + target_id=b_hgnc_id, + rel_type=self.depmap_relation, + data={"logp": logp}, + ) diff --git a/src/indra_cogex/sources/depmap/__main__.py b/src/indra_cogex/sources/depmap/__main__.py new file mode 100644 index 000000000..b574663de --- /dev/null +++ b/src/indra_cogex/sources/depmap/__main__.py @@ -0,0 +1,5 @@ +from . import DepmapProcessor + + +if __name__ == "__main__": + DepmapProcessor.cli() From 0057363e2df237b65c632d13ca6f0a51acc2287b Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 21 Jul 2024 20:17:57 -0400 Subject: [PATCH 21/23] Implement adjusted significance cutoffs --- src/indra_cogex/sources/depmap/__init__.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/indra_cogex/sources/depmap/__init__.py b/src/indra_cogex/sources/depmap/__init__.py index 25c2aac8f..27a4e15e4 100644 --- a/src/indra_cogex/sources/depmap/__init__.py +++ b/src/indra_cogex/sources/depmap/__init__.py @@ -21,13 +21,32 @@ SUBMODULE = pystow.module("indra", "cogex", "depmap") +# This is an intermediate processed file created using +# https://github.com/sorgerlab/indra_assembly_paper/blob/master/bioexp/depmap/data_processing.py DEPMAP_SIGS = pystow.join('depmap_analysis', 'depmap', '21q2', name='dep_stouffer_signif.pkl') +CORRECTION_METHODS = { + 'bonferroni': 'bc_cutoff', + 'benjamini-hochberg': 'bh_crit_val', + 'benjamini-yekutieli': 'by_crit_val', +} -def load_sigs(): +CORRECTION_METHOD = 'benjamini-yekutieli' + + +def load_sigs(correction_method=CORRECTION_METHOD): + # Load the significance data frame with open(DEPMAP_SIGS, 'rb') as f: df = pickle.load(f) + + # Apply correction method filter + if correction_method is not None: + crit_col = CORRECTION_METHODS[correction_method] + df = df[df.logp < df[crit_col]] + + # Get the current HGNC IDs for the genes since + # some are outdated and organize them by pairs sig_by_gene = defaultdict(dict) for row in tqdm.tqdm(df.itertuples(), total=len(df), desc='Processing DepMap significant pairs'): From 0d0bfc2083b1cebd6cd95810661bd88fd4ca39ce Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 21 Jul 2024 20:20:47 -0400 Subject: [PATCH 22/23] Add more comments --- src/indra_cogex/sources/depmap/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/indra_cogex/sources/depmap/__init__.py b/src/indra_cogex/sources/depmap/__init__.py index 27a4e15e4..875a3a151 100644 --- a/src/indra_cogex/sources/depmap/__init__.py +++ b/src/indra_cogex/sources/depmap/__init__.py @@ -50,6 +50,9 @@ def load_sigs(correction_method=CORRECTION_METHOD): sig_by_gene = defaultdict(dict) for row in tqdm.tqdm(df.itertuples(), total=len(df), desc='Processing DepMap significant pairs'): + # Note that we are sorting the genes here since + # we will generate a single directed edge a->b + # and this makes that process deterministic a, b = sorted(row.Index) a_hgnc_id = hgnc_client.get_current_hgnc_id(a) b_hgnc_id = hgnc_client.get_current_hgnc_id(b) @@ -87,6 +90,8 @@ def get_nodes(self): # noqa:D102 data={'name': gene_name}) def get_relations(self): # noqa:D102 + # Note that we have previously sorted a and b and + # we are generating a single directed edge a->b here for (a, a_hgnc_id), genes in \ tqdm.tqdm(self.sigs_by_gene.items(), desc='Processing DepMap into relations'): From ec4951226ac6678ea9d04d365daf0d0a88d99d41 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 21 Jul 2024 20:26:31 -0400 Subject: [PATCH 23/23] Register the processor --- src/indra_cogex/sources/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/indra_cogex/sources/__init__.py b/src/indra_cogex/sources/__init__.py index a5d2b6c6b..01767f38a 100644 --- a/src/indra_cogex/sources/__init__.py +++ b/src/indra_cogex/sources/__init__.py @@ -27,6 +27,7 @@ from .sider import SIDERSideEffectProcessor from .wikidata import JournalPublisherProcessor from .gwas import GWASProcessor +from .depmap import DepmapProcessor __all__ = [ "processor_resolver", @@ -55,6 +56,7 @@ "DisgenetProcessor", "GWASProcessor", "HGNCEnzymeProcessor", + "DepmapProcessor", ] processor_resolver: Resolver[Processor] = Resolver.from_subclasses(Processor)