Skip to content

Commit

Permalink
Merge pull request #323 from superphy/322-vf
Browse files Browse the repository at this point in the history
Merge: more descriptive VF results
  • Loading branch information
kevinkle authored Jul 9, 2018
2 parents 2c03635 + 579c726 commit 109ac21
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 125 deletions.
43 changes: 36 additions & 7 deletions app/middleware/display/beautify.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import re
import pandas as pd
import cPickle as pickle
from modules.loggingFunctions import initialize_logging
from middleware.display.find_widest import check_alleles
from middleware.graphers.turtle_utils import actual_filename
from middleware.models import SubtypingResult, model_to_json, unpickle
from middleware.models import unpickle
from middleware.modellers import model_vf

# logging
Expand Down Expand Up @@ -66,6 +67,37 @@ def json_return(gene_dict, args_dict):
instance_dict['hitorientation'] = item['ORIENTATION']
instance_dict['hitstart'] = item['START']
instance_dict['hitstop'] = item['STOP']
# For VF.
if 'RAW' in item:
# Search the GI.
pattern = r'gi:\d*'
a = re.search(pattern, item['RAW'])
# Try searching for other format.
if not a:
pattern = r'gi\|\d*'
a = re.search(pattern, item['RAW'])
# Try searching for GB.
if not a:
pattern = r'gi\|\d*'
b = re.search(pattern, item['RAW'])
if a:
gi = a.group()
# Calling it 'aro' for now.
# TODO: rename to something generic (have to modify grouch).
instance_dict['aro'] = 'https://www.ncbi.nlm.nih.gov/protein/' + gi
# Find the longname.
longname = item['RAW'].split(gi)[-1][2:]
instance_dict['longname'] = longname
elif b:
s = b.group()
gb = s.split('|')[-1]
instance_dict[
'aro'] = 'https://www.ncbi.nlm.nih.gov/nuccore/' + gb
# Too many cases to parse.
instance_dict['longname'] = item['RAW']
else:
instance_dict['aro'] = 'n/a'
instance_dict['longname'] = item['RAW']
if analysis == 'Antimicrobial Resistance':
instance_dict['hitcutoff'] = item['CUT_OFF']
else:
Expand Down Expand Up @@ -132,20 +164,17 @@ def beautify(gene_dict, args_dict=None):
return handle_failed(json_r, args_dict)
else:
return json_r
# Everything worked, cast result into a model.
# model = model_vf(json_r)
# return model_to_json(model)

def display_subtyping(pickled_result, args_dict=None):
result = unpickle(pickled_result)
if isinstance(result, dict):
# VF.
list_return = beautify(gene_dict=result, args_dict=args_dict)
assert isinstance(list_return, list)
model = model_vf(list_return)
return model_to_json(model)
l = model_vf(list_return)
return l
elif isinstance(result, list):
# Serotyping.
return model_to_json(result)
return result
else:
raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
2 changes: 1 addition & 1 deletion app/middleware/graphers/datastruct_savvy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from middleware.graphers.turtle_grapher import generate_graph
from middleware.blazegraph.upload_graph import queue_upload
from modules.PanPredic.pan_utils import contig_name_parse
from middleware.models import SubtypingResult, unpickle
from middleware.models import unpickle
# working with Serotype, Antimicrobial Resistance, & Virulence Factor data
# structures

Expand Down
17 changes: 5 additions & 12 deletions app/middleware/modellers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# We try to keep all model creation in this file so it's easier to reference.
import pandas as pd
from middleware.models import SubtypingRow, SubtypingResult
from middleware.graphers.turtle_utils import actual_filename


def model_serotype(pi, pl, output_file):
"""
Creates a SubtypingResult model from ECTYper's serotyping output.
Creates a list from ECTYper's serotyping output.
"""
# Read the vanilla output_file from ECTyper.
df = pd.read_csv(output_file)
Expand All @@ -28,17 +27,13 @@ def model_serotype(pi, pl, output_file):
}
for index, row in df.iterrows()]

# Convert the list of rows into a SubtypingResult model.
# subtyping_result = SubtypingResult(
# rows = subtyping_list
# )
assert subtyping_list
assert subtyping_list[0]
return subtyping_list

def model_vf(lst):
"""
Casts the output from display.beautify into a SubtypingResult object.
Casts the output from display.beautify into a list.
"""
# Type check.
assert isinstance(lst, list)
Expand All @@ -54,13 +49,11 @@ def model_vf(lst):
'hitorientation':item['hitorientation'],
'hitstart':item['hitstart'],
'hitstop':item['hitstop'],
'probability':'n/a'
'probability':'n/a',
'longname':item['longname'],
'aro': item['aro']
}
for item in lst]
# Convert the list of rows into a SubtypingResult model.
# subtyping_result = SubtypingResult(
# rows = subtyping_list
# )
return subtyping_list

def model_phylotyper(lst):
Expand Down
69 changes: 3 additions & 66 deletions app/middleware/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,6 @@
from middleware.graphers.turtle_utils import actual_filename
from routes.job_utils import fetch_job

# def _convert_model(model):
# # Convert the model to a generic JSON structure.
# struct = model.to_struct()
# # Check that struct isn't empty.
# assert struct
# if 'rows' in struct:
# # This is not strictly json; more like a list than a dict structure.
# rows_list = struct['rows']
# return rows_list
# else:
# return struct

def model_to_json(model):
"""
Converts models to json for the front-end.
"""
#TODO: can access the list directly, no longer need this.
# Validate the model submitted before processing.
assert isinstance(model, list)
# model.validate()
# Conversion.
# print("model_to_json() called with model: {0}".format(str(model)))
return model
# if isinstance(model, models.Base):
# return _convert_model(model)
# else:
# raise Exception('model_to_json() called for a model without a handler.')

def store(pipeline):
"""
Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return.
Expand Down Expand Up @@ -95,39 +67,6 @@ def unpickle(pickled_file):
def dump(obj, path):
dill.dump(obj, open(path, 'wb'))

class SubtypingRow(models.Base):
def __init__(self, analysis="", contigid="", filename="", hitcutoff="", hitname="", hitorientation="", hitstart="",hitstop=""):
self.analysis = analysis
self.contigid = contigid
self.filename = filename
self.hitcutoff = hitcutoff
self.hitname = hitname
self.hitorientation = hitorientation
self.hitstart = hitstart
self.hitstop = hitstop


class SubtypingResult(models.Base):
def __init__(self, rows=None):
if not rows:
rows = []
self.rows = rows

class PhylotyperRow(models.Base):
def __init__(self):
self.contig = fields.StringField(nullable=True)
self.genome = fields.StringField()
self.probability = fields.StringField(nullable=True) # actually float
self.start = fields.StringField(nullable=True) # actually int
self.stop = fields.StringField(nullable=True) # actually int
self.subtype = fields.StringField()
self.subtype_gene = fields.StringField(nullable=True)

class PhylotyperResult(models.Base):
def __init__(self):
self.rows = fields.ListField([PhylotyperRow], nullable=True)


class Job():
def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False):
"""
Expand Down Expand Up @@ -385,14 +324,12 @@ def to_json(self):
l = []
for j in completed_jobs:
rq_job = j.rq_job
model = rq_job.result
lr = rq_job.result
try:
# TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it.
assert isinstance(model, (models.Base,list))
assert isinstance(l, (models.Base,list))
except:
raise Exception("to_json() called for job {0} with result of type {1} and info {2}".format(j.name, type(model), str(model)))
list_json = model_to_json(model)
l += list_json
l += lr
return jsonify(l)

def _function_signature(self):
Expand Down
2 changes: 1 addition & 1 deletion app/modules/ectyper/call_ectyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def call_ectyper_serotype(args_dict, pickle=True):
])
if ret_code == 0:
output_file = os.path.join(output_dir, 'output.csv')
# Create a SubtypingResult model from the output.
# Create a list from the output.
subtyping_result = model_serotype(
pi=pi,
pl=pl,
Expand Down
2 changes: 1 addition & 1 deletion app/modules/ectyper/ecoli_serotyping
22 changes: 0 additions & 22 deletions app/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,6 @@ def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE):
# Return for incorporation into later tests.
return subtyping_list

# def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
# """
# Use our dataset to directly create a phylotyper results model and validate it.
# """
# phylotyper_list = [
# models.PhylotyperRow(
# contig=d['contig'],
# genome=d['genome'],
# probability=str(d['probability']),
# start=str(d['start']),
# stop=str(d['stop']),
# subtype=d['subtype'],
# subtype_gene=d['subtype_gene']
# )
# for d in l]
# phylotyper_result = models.PhylotyperResult(
# rows = phylotyper_list
# )
# phylotyper_result.validate()
# # Return for incorporation into later tests.
# return phylotyper_result

def _create_example_pipeline():
p = models.Pipeline(
func=spfy,
Expand Down
18 changes: 3 additions & 15 deletions app/tests/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
from modules.amr.amr import amr
from modules.amr.amr_to_dict import amr_to_dict
from middleware.display.beautify import beautify, model_to_json
from middleware.display.beautify import beautify
from middleware.graphers.datastruct_savvy import datastruct_savvy
from middleware.graphers.turtle_grapher import turtle_grapher
from middleware.models import unpickle
Expand Down Expand Up @@ -69,18 +69,6 @@ def test_ectyper_vf(return_one=False):
if return_one:
return json_return

def _validate_model(model):
# Validate (throws error if invalidate).
# model.validate()
# Check that the return rows is not some random empty list.
# assert model.rows
# Check the conversion for the front-end.
# r = model_to_json(model)
# This is not really json; more like a list than a dict structure.
assert isinstance(model, list)
# Check that this isn't empty.
assert model

def test_ectyper_serotype_direct():
"""Check the ECTyper from `master` which only performs serotyping.
Installed in the conda environment.
Expand All @@ -99,7 +87,7 @@ def test_ectyper_serotype_call_nopickle():
single_dict.update({'i':ecoli_genome})
# Have the call return the model without pickling.
serotype_model = call_ectyper_serotype(single_dict, pickle=False)
_validate_model(serotype_model)
assert isinstance(serotype_model, list)

def test_ectyper_serotype_call_pickle(return_one=False):
"""
Expand All @@ -111,7 +99,7 @@ def test_ectyper_serotype_call_pickle(return_one=False):
# Pickle the model, and return the path to the file.
pickled_serotype_model = call_ectyper_serotype(single_dict)
ectyper_serotype_model = unpickle(pickled_serotype_model)
_validate_model(ectyper_serotype_model)
assert isinstance(ectyper_serotype_model, list)
if return_one:
return ectyper_serotype_model

Expand Down

0 comments on commit 109ac21

Please sign in to comment.