Merge branch 'gyorilab:main' into Protein-Analysis

gyorilab · Jul 22, 2024 · 4dcc2ee · 4dcc2ee
2 parents efb9ad4 + aad5a25
commit 4dcc2ee
Show file tree

Hide file tree

Showing 11 changed files with 224 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -40,6 +40,7 @@ non-causal contextual relations including properties, ontology, and data.
 | [PubMed](https://ncbi.nlm.nih.gov/)                                 | published_in                        | The publication represented by the source is published in the journal represented by the target.                                                   |
 | [CellMarker](http://xteam.xbio.top/)                                | has_marker                          | The cell type represented by the source node has the gene marker represented by the target node.                                                   |
 | [InterPro](https://www.ebi.ac.uk/interpro/)                         | has_domain                          | The gene represented by the source node has a protein domain represented by the target node.                                                       |
+| [DepMap](https://depmap.org/portal/) | codependent_with | The gene represented by the source node is codependent with the gene represented by the target node in cancer cell lines.                          |
 
 ## Installation
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,14 +1,11 @@
-FROM labsyspharm/indra
+FROM ubuntu:20.04
 
 ARG INDRA_NEO4J_URL
 ARG INDRA_NEO4J_USER
 ARG INDRA_NEO4J_PASSWORD
 ARG MGI_VERSION
 ARG RGD_VERSION
 
-RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] && \
-    python -m indra.ontology.bio build
-
 # Copy the names.tsv files for the pre-built pyobo caches to this directory as
 # mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the
 # full cache build process for these resources (which can take hours).
@@ -17,6 +14,64 @@ RUN mkdir -p /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/ && \
 COPY mgi_names.tsv /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/names.tsv
 COPY rgd_names.tsv /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/names.tsv
 
+# Setup copied and slightly modified from indra_docker Dockerfile
+ARG INDRA_BUILD_BRANCH
+
+# Set working folder
+ENV DIRPATH /sw
+WORKDIR $DIRPATH
+
+RUN apt-get update && \
+    # Install Java
+    apt-get install -y openjdk-8-jdk && \
+    # jnius-indra requires cython which requires gcc
+    apt-get install -y git wget zip unzip bzip2 gcc graphviz graphviz-dev \
+        pkg-config python3-pip && \
+    ln -s /usr/bin/python3 /usr/bin/python
+
+# Set default character encoding
+# See http://stackoverflow.com/questions/27931668/encoding-problems-when-running-an-app-in-docker-python-java-ruby-with-u/27931669
+# See http://stackoverflow.com/questions/39760663/docker-ubuntu-bin-sh-1-locale-gen-not-found
+RUN apt-get install -y locales && \
+    locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+
+# Set environment variables
+ENV BNGPATH $DIRPATH/BioNetGen-2.4.0
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
+
+RUN git clone https://github.com/sorgerlab/indra.git && \
+    cd indra && \
+    git checkout $INDRA_BUILD_BRANCH && \
+    git branch && \
+    mkdir /root/.config && \
+    mkdir /root/.config/indra && \
+    echo "[indra]" > /root/.config/indra/config.ini && \
+    # Install Python dependencies
+    pip install --upgrade pip && \
+    # Install cython first for pyjnius
+    pip install cython && \
+    pip install -e .[all] && \
+    pip uninstall -y enum34 && \
+    # Pre-build the bio ontology
+    python -m indra.ontology.bio build && \
+    # Download Adeft models
+    python -m adeft.download && \
+    # Download protmapper resources
+    python -m protmapper.resources && \
+    # Install BioNetGen
+    cd $DIRPATH && \
+    wget "https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-2.4.0/BioNetGen-2.4.0-Linux.tgz" \
+        -O bionetgen.tar.gz -nv && \
+    tar xzf bionetgen.tar.gz && \
+    # Install things related to API deployment
+    pip install gunicorn
+
+# CoGEx specific setup
+RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea]
+
 # Force rebuild the pickle caches for the GSEA and MSEA apps
 RUN python -m indra_cogex.client.enrichment.utils --force
 

diff --git a/setup.cfg b/setup.cfg
@@ -48,7 +48,6 @@ install_requires =
     statsmodels
     pandas
     python-dateutil
-    pydantic<2.0.0
 
 include_package_data = True
 python_requires = >=3.7
@@ -70,11 +69,11 @@ web =
     flask-restx
     flask-wtf
     bootstrap-flask>=2.0.0
-    flask_jwt_extended<4.0.0
+    flask_jwt_extended
     docstring-parser
-    indralab_auth_tools @ git+https://github.com/indralab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools
+    indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools
     pusher
-    markupsafe<2.1.0
+    markupsafe
 gunicorn =
     gunicorn
 gsea =

diff --git a/src/indra_cogex/apps/curator/curator_blueprint.py b/src/indra_cogex/apps/curator/curator_blueprint.py
@@ -6,7 +6,7 @@
 
 import flask
 from flask import Response, abort, redirect, render_template, url_for
-from flask_jwt_extended import jwt_optional
+from flask_jwt_extended import jwt_required
 from flask_wtf import FlaskForm
 from indra.statements import Statement
 from wtforms import BooleanField, StringField, SubmitField, TextAreaField
@@ -81,7 +81,7 @@ def gene_ontology():
 
 
 @curator_blueprint.route("/go/<term>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def curate_go(term: str):
     stmts = indra_subnetwork_go(
         go_term=("GO", term),
@@ -163,7 +163,7 @@ def mesh():
 
 @curator_blueprint.route("/mesh/<term>", methods=["GET"])
 @curator_blueprint.route("/mesh/<term>/<subset>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def curate_mesh(term: str, subset: Optional[str] = None):
     """Curate all statements for papers with a given MeSH annotation."""
     if subset is None:
@@ -311,7 +311,7 @@ def _render_evidence_counts(
 
 
 @curator_blueprint.route("/ppi", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def ppi():
     """The PPI curator looks for the highest evidences for PPIs that don't appear in a database."""
     return _render_func(
@@ -329,7 +329,7 @@ def ppi():
 
 
 @curator_blueprint.route("/goa", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def goa():
     """The GO Annotation curator looks for the highest evidence gene-GO term relations that don't appear in GOA."""
     return _render_func(
@@ -348,7 +348,7 @@ def goa():
 
 
 @curator_blueprint.route("/conflicts", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def conflicts():
     """Curate statements with conflicting prior curations."""
     return _render_func(
@@ -363,7 +363,7 @@ def conflicts():
 
 
 @curator_blueprint.route("/tf", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def tf():
     """Curate transcription factors."""
     return _render_func(
@@ -380,7 +380,7 @@ def tf():
 
 
 @curator_blueprint.route("/kinase", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def kinase():
     """Curate kinases."""
     return _render_func(
@@ -397,7 +397,7 @@ def kinase():
 
 
 @curator_blueprint.route("/phosphatase", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def phosphatase():
     """Curate phosphatases."""
     return _render_func(
@@ -414,7 +414,7 @@ def phosphatase():
 
 
 @curator_blueprint.route("/dub", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def deubiquitinase():
     """Curate deubiquitinases."""
     return _render_func(
@@ -431,7 +431,7 @@ def deubiquitinase():
 
 
 @curator_blueprint.route("/mirna", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def mirna():
     """Curate miRNAs."""
     return _render_func(
@@ -449,7 +449,7 @@ def mirna():
 
 @curator_blueprint.route("/disprot", methods=["GET"])
 @curator_blueprint.route("/disprot/<object_prefix>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def disprot(object_prefix: Optional[str] = None):
     """Curate intrensically disordered proteins."""
     assert object_prefix in {None, "hgnc", "go", "chebi"}
@@ -466,14 +466,14 @@ def disprot(object_prefix: Optional[str] = None):
 
 
 @curator_blueprint.route("/modulator/", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def modulator():
     """Get small molecule modulators for the given protein."""
     raise NotImplementedError
 
 
 @curator_blueprint.route("/entity/<prefix>:<identifier>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def entity(prefix: str, identifier: str):
     """Get all statements about the given entity."""
     if prefix in {"pubmed", "pmc", "doi", "trid"}:
@@ -548,7 +548,7 @@ def get_term(self) -> Tuple[str, str]:
 
 
 @curator_blueprint.route("/paper", methods=["GET", "POST"])
-@jwt_optional
+@jwt_required(optional=True)
 def paper():
     """Get all statements for the given paper."""
     form = PaperForm()
@@ -622,7 +622,7 @@ def get_nodes(self) -> List[Tuple[str, str]]:
 
 
 @curator_blueprint.route("/subnetwork", methods=["GET", "POST"])
-@jwt_optional
+@jwt_required(optional=True)
 def subnetwork():
     """Get all statements induced by the nodes."""
     form = NodesForm()
@@ -659,7 +659,7 @@ def subnetwork():
 
 
 @curator_blueprint.route("/statement/<int:stmt_hash>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def curate_statement(stmt_hash: int):
     """Curate all evidences for the statement."""
     start_time = time.time()

diff --git a/src/indra_cogex/apps/data_display/__init__.py b/src/indra_cogex/apps/data_display/__init__.py
@@ -14,7 +14,7 @@
 
 import requests
 from flask import Blueprint, Response, abort, jsonify, render_template, request
-from flask_jwt_extended import jwt_optional
+from flask_jwt_extended import jwt_required
 
 from indra.assemblers.english import EnglishAssembler
 from indra.sources.indra_db_rest import IndraDBRestAPIError
@@ -223,7 +223,7 @@ def biolookup(curie):
 
 # Endpoint for getting evidence
 @data_display_blueprint.route("/expand/<stmt_hash>", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def get_evidence(stmt_hash):
     try:
         # Todo:
@@ -297,7 +297,7 @@ def get_evidence(stmt_hash):
 
 # Serve the statement display template
 @data_display_blueprint.route("/statement_display", methods=["GET"])
-@jwt_optional
+@jwt_required(optional=True)
 def statement_display():
     user, roles = resolve_auth(dict(request.args))
     email = user.email if user else ""
@@ -400,7 +400,7 @@ def _get_user():
 
 
 @data_display_blueprint.route("/curate/<hash_val>", methods=["POST"])
-@jwt_optional
+@jwt_required(optional=True)
 def submit_curation_endpoint(hash_val: str):
     email = _get_user()
     if not isinstance(email, str):

diff --git a/src/indra_cogex/client/neo4j_client.py b/src/indra_cogex/client/neo4j_client.py
@@ -868,8 +868,11 @@ def neo4j_to_relations(neo4j_path: neo4j.graph.Path) -> List[Relation]:
             rel_type = neo4j_relation.type
             props = dict(neo4j_relation)
             source_ns, source_id = process_identifier(neo4j_relation.start_node["id"])
+            source_name = neo4j_relation.start_node.get("name")
             target_ns, target_id = process_identifier(neo4j_relation.end_node["id"])
-            rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props)
+            target_name = neo4j_relation.end_node.get("name")
+            rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props,
+                           source_name=source_name, target_name=target_name)
             relations.append(rel)
         return relations
 

diff --git a/src/indra_cogex/representation.py b/src/indra_cogex/representation.py
@@ -164,6 +164,8 @@ def __init__(
         target_id: str,
         rel_type: str,
         data: Optional[Mapping[str, Any]] = None,
+        source_name: Optional[str] = None,
+        target_name: Optional[str] = None,
     ):
         """Initialize the relation.
 
@@ -181,13 +183,19 @@ def __init__(
             The type of relation.
         data :
             An optional data dictionary associated with the relation.
+        source_name :
+            An optional name for the source node.
+        target_name :
+            An optional name for the target node.
         """
         self.source_ns = source_ns
         self.source_id = source_id
         self.target_ns = target_ns
         self.target_id = target_id
         self.rel_type = rel_type
         self.data = data if data else {}
+        self.source_name = source_name
+        self.target_name = target_name
 
     def to_json(self) -> RelJson:
         """Serialize the relation to JSON format.
@@ -204,6 +212,8 @@ def to_json(self) -> RelJson:
             "target_id": self.target_id,
             "rel_type": self.rel_type,
             "data": self.data,
+            "source_name": self.source_name,
+            "target_name": self.target_name,
         }
 
     def __str__(self):  # noqa:D105

diff --git a/src/indra_cogex/sources/__init__.py b/src/indra_cogex/sources/__init__.py
@@ -27,6 +27,7 @@
 from .sider import SIDERSideEffectProcessor
 from .wikidata import JournalPublisherProcessor
 from .gwas import GWASProcessor
+from .depmap import DepmapProcessor
 
 __all__ = [
     "processor_resolver",
@@ -55,6 +56,7 @@
     "DisgenetProcessor",
     "GWASProcessor",
     "HGNCEnzymeProcessor",
+    "DepmapProcessor",
 ]
 
 processor_resolver: Resolver[Processor] = Resolver.from_subclasses(Processor)