Merge pull request #323 from superphy/322-vf

Merge: more descriptive VF results
superphy · Jul 9, 2018 · 109ac21 · 109ac21
2 parents 2c03635 + 579c726
commit 109ac21
Show file tree

Hide file tree

Showing 8 changed files with 50 additions and 125 deletions.
diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
@@ -1,10 +1,11 @@
 import logging
+import re
 import pandas as pd
 import cPickle as pickle
 from modules.loggingFunctions import initialize_logging
 from middleware.display.find_widest import check_alleles
 from middleware.graphers.turtle_utils import actual_filename
-from middleware.models import SubtypingResult, model_to_json, unpickle
+from middleware.models import unpickle
 from middleware.modellers import model_vf
 
 # logging
@@ -66,6 +67,37 @@ def json_return(gene_dict, args_dict):
                         instance_dict['hitorientation'] = item['ORIENTATION']
                         instance_dict['hitstart'] = item['START']
                         instance_dict['hitstop'] = item['STOP']
+                        # For VF.
+                        if 'RAW' in item:
+                            # Search the GI.
+                            pattern = r'gi:\d*'
+                            a = re.search(pattern, item['RAW'])
+                            # Try searching for other format.
+                            if not a:
+                                pattern = r'gi\|\d*'
+                                a = re.search(pattern, item['RAW'])
+                            # Try searching for GB.
+                            if not a:
+                                pattern = r'gi\|\d*'
+                                b = re.search(pattern, item['RAW'])
+                            if a:
+                                gi = a.group()
+                                # Calling it 'aro' for now.
+                                # TODO: rename to something generic (have to modify grouch).
+                                instance_dict['aro'] = 'https://www.ncbi.nlm.nih.gov/protein/' + gi
+                                # Find the longname.
+                                longname = item['RAW'].split(gi)[-1][2:]
+                                instance_dict['longname'] = longname
+                            elif b:
+                                s = b.group()
+                                gb = s.split('|')[-1]
+                                instance_dict[
+                                    'aro'] = 'https://www.ncbi.nlm.nih.gov/nuccore/' + gb
+                                # Too many cases to parse.
+                                instance_dict['longname'] = item['RAW']
+                            else:
+                                instance_dict['aro'] = 'n/a'
+                                instance_dict['longname'] = item['RAW']
                         if analysis == 'Antimicrobial Resistance':
                             instance_dict['hitcutoff'] = item['CUT_OFF']
                         else:
@@ -132,20 +164,17 @@ def beautify(gene_dict, args_dict=None):
         return handle_failed(json_r, args_dict)
     else:
         return json_r
-        # Everything worked, cast result into a model.
-        # model = model_vf(json_r)
-        # return model_to_json(model)
 
 def display_subtyping(pickled_result, args_dict=None):
     result = unpickle(pickled_result)
     if isinstance(result, dict):
         # VF.
         list_return = beautify(gene_dict=result, args_dict=args_dict)
         assert isinstance(list_return, list)
-        model = model_vf(list_return)
-        return model_to_json(model)
+        l = model_vf(list_return)
+        return l
     elif isinstance(result, list):
         # Serotyping.
-        return model_to_json(result)
+        return result
     else:
         raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
@@ -4,7 +4,7 @@
 from middleware.graphers.turtle_grapher import generate_graph
 from middleware.blazegraph.upload_graph import queue_upload
 from modules.PanPredic.pan_utils import contig_name_parse
-from middleware.models import SubtypingResult, unpickle
+from middleware.models import unpickle
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
 

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
@@ -1,12 +1,11 @@
 # We try to keep all model creation in this file so it's easier to reference.
 import pandas as pd
-from middleware.models import SubtypingRow, SubtypingResult
 from middleware.graphers.turtle_utils import actual_filename
 
 
 def model_serotype(pi, pl, output_file):
     """
-    Creates a SubtypingResult model from ECTYper's serotyping output.
+    Creates a list from ECTYper's serotyping output.
     """
     # Read the vanilla output_file from ECTyper.
     df = pd.read_csv(output_file)
@@ -28,17 +27,13 @@ def model_serotype(pi, pl, output_file):
         }
     for index, row in df.iterrows()]
 
-    # Convert the list of rows into a SubtypingResult model.
-    # subtyping_result = SubtypingResult(
-    #     rows = subtyping_list
-    # )
     assert subtyping_list
     assert subtyping_list[0]
     return subtyping_list
 
 def model_vf(lst):
     """
-    Casts the output from display.beautify into a SubtypingResult object.
+    Casts the output from display.beautify into a list.
     """
     # Type check.
     assert isinstance(lst, list)
@@ -54,13 +49,11 @@ def model_vf(lst):
             'hitorientation':item['hitorientation'],
             'hitstart':item['hitstart'],
             'hitstop':item['hitstop'],
-            'probability':'n/a'
+            'probability':'n/a',
+            'longname':item['longname'],
+            'aro': item['aro']
         }
     for item in lst]
-    # Convert the list of rows into a SubtypingResult model.
-    # subtyping_result = SubtypingResult(
-    #     rows = subtyping_list
-    # )
     return subtyping_list
 
 def model_phylotyper(lst):

diff --git a/app/middleware/models.py b/app/middleware/models.py
@@ -12,34 +12,6 @@
 from middleware.graphers.turtle_utils import actual_filename
 from routes.job_utils import fetch_job
 
-# def _convert_model(model):
-#     # Convert the model to a generic JSON structure.
-#     struct = model.to_struct()
-#     # Check that struct isn't empty.
-#     assert struct
-#     if 'rows' in struct:
-#         # This is not strictly json; more like a list than a dict structure.
-#         rows_list = struct['rows']
-#         return rows_list
-#     else:
-#         return struct
-
-def model_to_json(model):
-    """
-    Converts models to json for the front-end.
-    """
-    #TODO: can access the list directly, no longer need this.
-    # Validate the model submitted before processing.
-    assert isinstance(model, list)
-    # model.validate()
-    # Conversion.
-    # print("model_to_json() called with model: {0}".format(str(model)))
-    return model
-    # if isinstance(model, models.Base):
-    #     return _convert_model(model)
-    # else:
-    #     raise Exception('model_to_json() called for a model without a handler.')
-
 def store(pipeline):
     """
     Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return.
@@ -95,39 +67,6 @@ def unpickle(pickled_file):
 def dump(obj, path):
     dill.dump(obj, open(path, 'wb'))
 
-class SubtypingRow(models.Base):
-    def __init__(self, analysis="", contigid="", filename="", hitcutoff="", hitname="", hitorientation="", hitstart="",hitstop=""):
-        self.analysis = analysis
-        self.contigid = contigid
-        self.filename = filename
-        self.hitcutoff = hitcutoff
-        self.hitname = hitname
-        self.hitorientation = hitorientation
-        self.hitstart = hitstart
-        self.hitstop = hitstop
-
-
-class SubtypingResult(models.Base):
-    def __init__(self, rows=None):
-        if not rows:
-            rows = []
-        self.rows = rows
-
-class PhylotyperRow(models.Base):
-    def __init__(self):
-        self.contig = fields.StringField(nullable=True)
-        self.genome = fields.StringField()
-        self.probability = fields.StringField(nullable=True) # actually float
-        self.start = fields.StringField(nullable=True) # actually int
-        self.stop = fields.StringField(nullable=True) # actually int
-        self.subtype = fields.StringField()
-        self.subtype_gene = fields.StringField(nullable=True)
-
-class PhylotyperResult(models.Base):
-    def __init__(self):
-        self.rows = fields.ListField([PhylotyperRow], nullable=True)
-
-
 class Job():
     def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False):
         """
@@ -385,14 +324,12 @@ def to_json(self):
         l = []
         for j in completed_jobs:
             rq_job = j.rq_job
-            model = rq_job.result
+            lr = rq_job.result
             try:
-                # TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it.
-                assert isinstance(model, (models.Base,list))
+                assert isinstance(l, (models.Base,list))
             except:
                 raise Exception("to_json() called for job {0}  with result of type {1} and info {2}".format(j.name, type(model), str(model)))
-            list_json = model_to_json(model)
-            l += list_json
+            l += lr
         return jsonify(l)
 
     def _function_signature(self):

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
@@ -99,7 +99,7 @@ def call_ectyper_serotype(args_dict, pickle=True):
     ])
     if ret_code == 0:
         output_file = os.path.join(output_dir, 'output.csv')
-        # Create a SubtypingResult model from the output.
+        # Create a list from the output.
         subtyping_result = model_serotype(
             pi=pi,
             pl=pl,

diff --git a/app/modules/ectyper/ecoli_serotyping b/app/modules/ectyper/ecoli_serotyping
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
@@ -24,28 +24,6 @@ def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE):
     # Return for incorporation into later tests.
     return subtyping_list
 
-# def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
-#     """
-#     Use our dataset to directly create a phylotyper results model and validate it.
-#     """
-#     phylotyper_list = [
-#         models.PhylotyperRow(
-#             contig=d['contig'],
-#             genome=d['genome'],
-#             probability=str(d['probability']),
-#             start=str(d['start']),
-#             stop=str(d['stop']),
-#             subtype=d['subtype'],
-#             subtype_gene=d['subtype_gene']
-#         )
-#     for d in l]
-#     phylotyper_result = models.PhylotyperResult(
-#         rows = phylotyper_list
-#     )
-#     phylotyper_result.validate()
-#     # Return for incorporation into later tests.
-#     return phylotyper_result
-
 def _create_example_pipeline():
     p = models.Pipeline(
         func=spfy,

diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
@@ -12,7 +12,7 @@
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from middleware.display.beautify import beautify, model_to_json
+from middleware.display.beautify import beautify
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.models import unpickle
@@ -69,18 +69,6 @@ def test_ectyper_vf(return_one=False):
         if return_one:
             return json_return
 
-def _validate_model(model):
-    # Validate (throws error if invalidate).
-    # model.validate()
-    # Check that the return rows is not some random empty list.
-    # assert model.rows
-    # Check the conversion for the front-end.
-    # r = model_to_json(model)
-    # This is not really json; more like a list than a dict structure.
-    assert isinstance(model, list)
-    # Check that this isn't empty.
-    assert model
-
 def test_ectyper_serotype_direct():
     """Check the ECTyper from `master` which only performs serotyping.
     Installed in the conda environment.
@@ -99,7 +87,7 @@ def test_ectyper_serotype_call_nopickle():
         single_dict.update({'i':ecoli_genome})
         # Have the call return the model without pickling.
         serotype_model = call_ectyper_serotype(single_dict, pickle=False)
-        _validate_model(serotype_model)
+        assert isinstance(serotype_model, list)
 
 def test_ectyper_serotype_call_pickle(return_one=False):
     """
@@ -111,7 +99,7 @@ def test_ectyper_serotype_call_pickle(return_one=False):
         # Pickle the model, and return the path to the file.
         pickled_serotype_model = call_ectyper_serotype(single_dict)
         ectyper_serotype_model = unpickle(pickled_serotype_model)
-        _validate_model(ectyper_serotype_model)
+        assert isinstance(ectyper_serotype_model, list)
         if return_one:
             return ectyper_serotype_model