Skip to content

Commit

Permalink
Merge branch 'gyorilab:main' into Protein-Analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
AriaAgarwal authored Jul 22, 2024
2 parents efb9ad4 + aad5a25 commit 4dcc2ee
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 30 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ non-causal contextual relations including properties, ontology, and data.
| [PubMed](https://ncbi.nlm.nih.gov/) | published_in | The publication represented by the source is published in the journal represented by the target. |
| [CellMarker](http://xteam.xbio.top/) | has_marker | The cell type represented by the source node has the gene marker represented by the target node. |
| [InterPro](https://www.ebi.ac.uk/interpro/) | has_domain | The gene represented by the source node has a protein domain represented by the target node. |
| [DepMap](https://depmap.org/portal/) | codependent_with | The gene represented by the source node is codependent with the gene represented by the target node in cancer cell lines. |

## Installation

Expand Down
63 changes: 59 additions & 4 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
FROM labsyspharm/indra
FROM ubuntu:20.04

ARG INDRA_NEO4J_URL
ARG INDRA_NEO4J_USER
ARG INDRA_NEO4J_PASSWORD
ARG MGI_VERSION
ARG RGD_VERSION

RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea] && \
python -m indra.ontology.bio build

# Copy the names.tsv files for the pre-built pyobo caches to this directory as
# mg_names.tsv and rgd_names.tsv, respectively. This is necessary to avoid running the
# full cache build process for these resources (which can take hours).
Expand All @@ -17,6 +14,64 @@ RUN mkdir -p /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/ && \
COPY mgi_names.tsv /root/.data/pyobo/raw/mgi/${MGI_VERSION}/cache/names.tsv
COPY rgd_names.tsv /root/.data/pyobo/raw/rgd/${RGD_VERSION}/cache/names.tsv

# Setup copied and slightly modified from indra_docker Dockerfile
ARG INDRA_BUILD_BRANCH

# Set working folder
ENV DIRPATH /sw
WORKDIR $DIRPATH

RUN apt-get update && \
# Install Java
apt-get install -y openjdk-8-jdk && \
# jnius-indra requires cython which requires gcc
apt-get install -y git wget zip unzip bzip2 gcc graphviz graphviz-dev \
pkg-config python3-pip && \
ln -s /usr/bin/python3 /usr/bin/python

# Set default character encoding
# See http://stackoverflow.com/questions/27931668/encoding-problems-when-running-an-app-in-docker-python-java-ruby-with-u/27931669
# See http://stackoverflow.com/questions/39760663/docker-ubuntu-bin-sh-1-locale-gen-not-found
RUN apt-get install -y locales && \
locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8

# Set environment variables
ENV BNGPATH $DIRPATH/BioNetGen-2.4.0
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64

RUN git clone https://github.com/sorgerlab/indra.git && \
cd indra && \
git checkout $INDRA_BUILD_BRANCH && \
git branch && \
mkdir /root/.config && \
mkdir /root/.config/indra && \
echo "[indra]" > /root/.config/indra/config.ini && \
# Install Python dependencies
pip install --upgrade pip && \
# Install cython first for pyjnius
pip install cython && \
pip install -e .[all] && \
pip uninstall -y enum34 && \
# Pre-build the bio ontology
python -m indra.ontology.bio build && \
# Download Adeft models
python -m adeft.download && \
# Download protmapper resources
python -m protmapper.resources && \
# Install BioNetGen
cd $DIRPATH && \
wget "https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-2.4.0/BioNetGen-2.4.0-Linux.tgz" \
-O bionetgen.tar.gz -nv && \
tar xzf bionetgen.tar.gz && \
# Install things related to API deployment
pip install gunicorn

# CoGEx specific setup
RUN pip3 install git+https://github.com/bgyori/indra_cogex.git#egg=indra_cogex[web,gunicorn,gsea]

# Force rebuild the pickle caches for the GSEA and MSEA apps
RUN python -m indra_cogex.client.enrichment.utils --force

Expand Down
7 changes: 3 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ install_requires =
statsmodels
pandas
python-dateutil
pydantic<2.0.0

include_package_data = True
python_requires = >=3.7
Expand All @@ -70,11 +69,11 @@ web =
flask-restx
flask-wtf
bootstrap-flask>=2.0.0
flask_jwt_extended<4.0.0
flask_jwt_extended
docstring-parser
indralab_auth_tools @ git+https://github.com/indralab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools
indralab_auth_tools @ git+https://github.com/gyorilab/ui_util.git#egg=indralab_auth_tools&subdirectory=indralab_auth_tools
pusher
markupsafe<2.1.0
markupsafe
gunicorn =
gunicorn
gsea =
Expand Down
34 changes: 17 additions & 17 deletions src/indra_cogex/apps/curator/curator_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import flask
from flask import Response, abort, redirect, render_template, url_for
from flask_jwt_extended import jwt_optional
from flask_jwt_extended import jwt_required
from flask_wtf import FlaskForm
from indra.statements import Statement
from wtforms import BooleanField, StringField, SubmitField, TextAreaField
Expand Down Expand Up @@ -81,7 +81,7 @@ def gene_ontology():


@curator_blueprint.route("/go/<term>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def curate_go(term: str):
stmts = indra_subnetwork_go(
go_term=("GO", term),
Expand Down Expand Up @@ -163,7 +163,7 @@ def mesh():

@curator_blueprint.route("/mesh/<term>", methods=["GET"])
@curator_blueprint.route("/mesh/<term>/<subset>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def curate_mesh(term: str, subset: Optional[str] = None):
"""Curate all statements for papers with a given MeSH annotation."""
if subset is None:
Expand Down Expand Up @@ -311,7 +311,7 @@ def _render_evidence_counts(


@curator_blueprint.route("/ppi", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def ppi():
"""The PPI curator looks for the highest evidences for PPIs that don't appear in a database."""
return _render_func(
Expand All @@ -329,7 +329,7 @@ def ppi():


@curator_blueprint.route("/goa", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def goa():
"""The GO Annotation curator looks for the highest evidence gene-GO term relations that don't appear in GOA."""
return _render_func(
Expand All @@ -348,7 +348,7 @@ def goa():


@curator_blueprint.route("/conflicts", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def conflicts():
"""Curate statements with conflicting prior curations."""
return _render_func(
Expand All @@ -363,7 +363,7 @@ def conflicts():


@curator_blueprint.route("/tf", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def tf():
"""Curate transcription factors."""
return _render_func(
Expand All @@ -380,7 +380,7 @@ def tf():


@curator_blueprint.route("/kinase", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def kinase():
"""Curate kinases."""
return _render_func(
Expand All @@ -397,7 +397,7 @@ def kinase():


@curator_blueprint.route("/phosphatase", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def phosphatase():
"""Curate phosphatases."""
return _render_func(
Expand All @@ -414,7 +414,7 @@ def phosphatase():


@curator_blueprint.route("/dub", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def deubiquitinase():
"""Curate deubiquitinases."""
return _render_func(
Expand All @@ -431,7 +431,7 @@ def deubiquitinase():


@curator_blueprint.route("/mirna", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def mirna():
"""Curate miRNAs."""
return _render_func(
Expand All @@ -449,7 +449,7 @@ def mirna():

@curator_blueprint.route("/disprot", methods=["GET"])
@curator_blueprint.route("/disprot/<object_prefix>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def disprot(object_prefix: Optional[str] = None):
"""Curate intrensically disordered proteins."""
assert object_prefix in {None, "hgnc", "go", "chebi"}
Expand All @@ -466,14 +466,14 @@ def disprot(object_prefix: Optional[str] = None):


@curator_blueprint.route("/modulator/", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def modulator():
"""Get small molecule modulators for the given protein."""
raise NotImplementedError


@curator_blueprint.route("/entity/<prefix>:<identifier>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def entity(prefix: str, identifier: str):
"""Get all statements about the given entity."""
if prefix in {"pubmed", "pmc", "doi", "trid"}:
Expand Down Expand Up @@ -548,7 +548,7 @@ def get_term(self) -> Tuple[str, str]:


@curator_blueprint.route("/paper", methods=["GET", "POST"])
@jwt_optional
@jwt_required(optional=True)
def paper():
"""Get all statements for the given paper."""
form = PaperForm()
Expand Down Expand Up @@ -622,7 +622,7 @@ def get_nodes(self) -> List[Tuple[str, str]]:


@curator_blueprint.route("/subnetwork", methods=["GET", "POST"])
@jwt_optional
@jwt_required(optional=True)
def subnetwork():
"""Get all statements induced by the nodes."""
form = NodesForm()
Expand Down Expand Up @@ -659,7 +659,7 @@ def subnetwork():


@curator_blueprint.route("/statement/<int:stmt_hash>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def curate_statement(stmt_hash: int):
"""Curate all evidences for the statement."""
start_time = time.time()
Expand Down
8 changes: 4 additions & 4 deletions src/indra_cogex/apps/data_display/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import requests
from flask import Blueprint, Response, abort, jsonify, render_template, request
from flask_jwt_extended import jwt_optional
from flask_jwt_extended import jwt_required

from indra.assemblers.english import EnglishAssembler
from indra.sources.indra_db_rest import IndraDBRestAPIError
Expand Down Expand Up @@ -223,7 +223,7 @@ def biolookup(curie):

# Endpoint for getting evidence
@data_display_blueprint.route("/expand/<stmt_hash>", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def get_evidence(stmt_hash):
try:
# Todo:
Expand Down Expand Up @@ -297,7 +297,7 @@ def get_evidence(stmt_hash):

# Serve the statement display template
@data_display_blueprint.route("/statement_display", methods=["GET"])
@jwt_optional
@jwt_required(optional=True)
def statement_display():
user, roles = resolve_auth(dict(request.args))
email = user.email if user else ""
Expand Down Expand Up @@ -400,7 +400,7 @@ def _get_user():


@data_display_blueprint.route("/curate/<hash_val>", methods=["POST"])
@jwt_optional
@jwt_required(optional=True)
def submit_curation_endpoint(hash_val: str):
email = _get_user()
if not isinstance(email, str):
Expand Down
5 changes: 4 additions & 1 deletion src/indra_cogex/client/neo4j_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,8 +868,11 @@ def neo4j_to_relations(neo4j_path: neo4j.graph.Path) -> List[Relation]:
rel_type = neo4j_relation.type
props = dict(neo4j_relation)
source_ns, source_id = process_identifier(neo4j_relation.start_node["id"])
source_name = neo4j_relation.start_node.get("name")
target_ns, target_id = process_identifier(neo4j_relation.end_node["id"])
rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props)
target_name = neo4j_relation.end_node.get("name")
rel = Relation(source_ns, source_id, target_ns, target_id, rel_type, props,
source_name=source_name, target_name=target_name)
relations.append(rel)
return relations

Expand Down
10 changes: 10 additions & 0 deletions src/indra_cogex/representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def __init__(
target_id: str,
rel_type: str,
data: Optional[Mapping[str, Any]] = None,
source_name: Optional[str] = None,
target_name: Optional[str] = None,
):
"""Initialize the relation.
Expand All @@ -181,13 +183,19 @@ def __init__(
The type of relation.
data :
An optional data dictionary associated with the relation.
source_name :
An optional name for the source node.
target_name :
An optional name for the target node.
"""
self.source_ns = source_ns
self.source_id = source_id
self.target_ns = target_ns
self.target_id = target_id
self.rel_type = rel_type
self.data = data if data else {}
self.source_name = source_name
self.target_name = target_name

def to_json(self) -> RelJson:
"""Serialize the relation to JSON format.
Expand All @@ -204,6 +212,8 @@ def to_json(self) -> RelJson:
"target_id": self.target_id,
"rel_type": self.rel_type,
"data": self.data,
"source_name": self.source_name,
"target_name": self.target_name,
}

def __str__(self): # noqa:D105
Expand Down
2 changes: 2 additions & 0 deletions src/indra_cogex/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .sider import SIDERSideEffectProcessor
from .wikidata import JournalPublisherProcessor
from .gwas import GWASProcessor
from .depmap import DepmapProcessor

__all__ = [
"processor_resolver",
Expand Down Expand Up @@ -55,6 +56,7 @@
"DisgenetProcessor",
"GWASProcessor",
"HGNCEnzymeProcessor",
"DepmapProcessor",
]

processor_resolver: Resolver[Processor] = Resolver.from_subclasses(Processor)
Loading

0 comments on commit 4dcc2ee

Please sign in to comment.