feat(datasets): add datasets endpoint

uc-cdis · Feb 21, 2019 · 9621c0e · 9621c0e
1 parent 9a73b46
commit 9621c0e
Show file tree

Hide file tree

Showing 6 changed files with 163 additions and 0 deletions.
diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml
@@ -19,6 +19,30 @@ tags:
   - name: system
     description: System endpoints
 paths:
+  /datasets:
+    get:
+      tags:
+        - datasets
+      summary: Get counts for nodes for each project
+      parameters:
+        - in: query
+          name: nodes
+          description: quoma delimited nodes to get counts for
+          schema:
+            type: string
+      responses:
+        '200':
+          description: node counts for each project
+          content:
+            application/json:
+              schema:
+                type: object
+                example: {project__A: {case: 0, aliquot: 1}, project_B: {case:2, aliquot: 3}}
+        '401':
+          description: unauthorized request
+        '400':
+          description: invalid request
+
   /graphql:
     post:
       tags:

diff --git a/peregrine/api.py b/peregrine/api.py
@@ -15,6 +15,7 @@
 
 import peregrine
 from peregrine import dictionary
+from peregrine.blueprints import datasets
 from .errors import APIError, setup_default_handlers, UnhealthyCheck
 from .resources import submission
 from .version_data import VERSION, COMMIT, DICTVERSION, DICTCOMMIT
@@ -32,12 +33,14 @@ def app_register_blueprints(app):
     app.url_map.strict_slashes = False
 
     app.register_blueprint(peregrine.blueprints.blueprint, url_prefix=v0+'/submission')
+    app.register_blueprint(datasets.blueprint, url_prefix=v0+'/datasets')
 
 
 def app_register_duplicate_blueprints(app):
     # TODO: (jsm) deprecate this v0 version under root endpoint.  This
     # root endpoint duplicates /v0 to allow gradual client migration
     app.register_blueprint(peregrine.blueprints.blueprint, url_prefix='/submission')
+    app.register_blueprint(datasets.blueprint, url_prefix='/datasets')
 
 
 def async_pool_init(app):

diff --git a/peregrine/blueprints/datasets.py b/peregrine/blueprints/datasets.py
@@ -0,0 +1,63 @@
+import flask
+import os
+import re
+
+import peregrine.utils
+from peregrine.resources.submission import (
+    graphql,
+    set_read_access_projects_for_public_endpoint,
+    set_read_access_projects,
+)
+
+from cdiserrors import UserError, AuthZError
+
+blueprint = flask.Blueprint("datasets", "datasets")
+
+
+@blueprint.route("/", methods=["GET"])
+def get_datasets():
+    """
+    Get dataset level summary counts, if a deployment is configured
+    to set PUBLIC_DATASETS to True, this endpoint will be open to
+    anonymous users
+    """
+    nodes = flask.request.args.get("nodes", "")
+    nodes = nodes.split(",")
+    if nodes == []:
+        raise UserError("Need to provide target nodes in query param")
+    if os.environ.get("PUBLIC_DATASETS", False) == "true":
+        set_read_access_projects_for_public_endpoint()
+    else:
+        set_read_access_projects()
+    projects = flask.g.read_access_projects
+    if not projects:
+        raise AuthZError("You are not authorized to access any projects")
+    # construct a query that get counts for all projects
+    # because graphql can't add structure to group by projects,
+    # we labeled the count by project index and later parse it
+    # with regex to add structure to response
+    query = "{"
+    for i, project_id in enumerate(projects):
+        query += (
+            " ".join(
+                map(
+                    lambda x: """i{i}_{node}: _{node}_count(project_id: "{p}")""".format(
+                        i=i, node=x, p=project_id
+                    ),
+                    nodes,
+                )
+            )
+            + " "
+        )
+    query += "}"
+    data, errors = graphql.execute_query(query, variables={})
+    if errors:
+        return flask.jsonify({"data": data, "errors": errors}), 400
+    result = {project_id: {} for project_id in projects}
+
+    for name, value in data.iteritems():
+        match = re.search("^i(\d)_(.*)", name)
+        index = int(match.group(1))
+        node = match.group(2)
+        result[projects[index]][node] = value
+    return flask.jsonify(result)
diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py
@@ -43,6 +43,24 @@ def get_open_project_ids():
             for program in project['programs']
         ]
 
+def set_read_access_projects_for_public_endpoint():
+    """
+    Set the global user project list to include all projects for endpoint
+    that doesn't need authorization
+    """
+
+    with flask.current_app.db.session_scope():
+        projects = (
+            flask.current_app.db
+            .nodes(models.Project)
+            .all()
+        )
+        flask.g.read_access_projects = [
+            program['name'] + '-' + project['code']
+            for project in projects
+            for program in project['programs']
+        ]
+
 
 def set_read_access_projects():
     """

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -238,3 +238,11 @@ def es_teardown():
     es.indices.refresh(index=INDEX)
 
     json_data.close()
+
+
+@pytest.fixture
+def public_dataset_api(request):
+    os.environ["PUBLIC_DATASETS"] = "true"
+    def tearDown():
+        os.environ["PUBLIC_DATASETS"] = "false"
+    request.addfinalizer(tearDown)
diff --git a/tests/graphql/test_datasets.py b/tests/graphql/test_datasets.py
@@ -0,0 +1,47 @@
+from test_graphql import post_example_entities_together
+from datamodelutils import models
+import os
+
+
+def test_authorized_call_with_protected_config(
+    client, submitter, pg_driver_clean, cgci_blgsp
+):
+    post_example_entities_together(client, pg_driver_clean, submitter)
+    #: number of nodes to change project_id on, there should be 5
+    with pg_driver_clean.session_scope() as s:
+        cases = pg_driver_clean.nodes(models.Case).all()
+        case_count = len(cases)
+        for case in cases[0:-3]:
+            case.project_id = "OTHER-OTHER"
+            s.merge(case)
+    r = client.get("/datasets?nodes=case,aliquot", headers=submitter)
+    assert r.json.keys() == ["CGCI-BLGSP"]
+    assert r.json["CGCI-BLGSP"]["case"] == case_count - 2
+
+
+def test_anonymous_call_with_protected_config(client, pg_driver_clean, cgci_blgsp):
+    r = client.get("/datasets?nodes=case,aliquot")
+    assert r.status_code == 401
+
+
+def test_anonymous_call_with_public_config(
+    client, submitter, pg_driver_clean, cgci_blgsp, public_dataset_api
+):
+    post_example_entities_together(client, pg_driver_clean, submitter)
+    with pg_driver_clean.session_scope() as s:
+        project = models.Project("other", code="OTHER")
+        program = pg_driver_clean.nodes(models.Program).props(name="CGCI").first()
+        project.programs = [program]
+        s.add(project)
+        aliquot_count = pg_driver_clean.nodes(models.Aliquot).count()
+        cases = pg_driver_clean.nodes(models.Case).all()
+        case_count = len(cases)
+        for case in cases[0:-3]:
+            case.project_id = "CGCI-OTHER"
+            s.merge(case)
+
+    r = client.get("/datasets?nodes=case,aliquot")
+    assert r.json["CGCI-BLGSP"]["case"] == case_count - 2
+    assert r.json["CGCI-BLGSP"]["aliquot"] == aliquot_count
+    assert r.json["CGCI-OTHER"]["aliquot"] == 0
+    assert r.json["CGCI-OTHER"]["case"] == 2