Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/qiime/emperor into pep8-util
Browse files Browse the repository at this point in the history
  • Loading branch information
josenavas committed Mar 23, 2015
2 parents 3e7fecc + b74c666 commit d0a2d3a
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 101 deletions.
72 changes: 36 additions & 36 deletions emperor/sort.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,37 @@
#!/usr/bin/env python
# File created on 20 Apr 2013
from __future__ import division

__author__ = "Yoshiki Vazquez Baeza"
__copyright__ = "Copyright 2013, The Emperor Project"
__credits__ = ["Yoshiki Vazquez Baeza"]
__license__ = "BSD"
__version__ = "0.9.51-dev"
__maintainer__ = "Yoshiki Vazquez Baeza"
__email__ = "yoshiki89@gmail.com"
__status__ = "Development"

from numpy import zeros
from re import compile, search


def sort_taxa_table_by_pcoa_coords(coords_header, otu_table, otu_header):
"""Sort and match the samples in the otu table and in the coordinates data
Inputs:
coords_header: sample ids that are present in principal coordinates data
otu_table: numpy array with the data for an otu table
otu_header: sample ids present in the otu table
Ouputs:
sorted_otu_headers: sample ids that were present in the coords_header list,
the order in this table matches the order of the coordinates data
sorted_otu_table: otu table data with columns belonging to the sample ids in
the sorted_otu_headers list
This function will sort the columns of an otu table as suggested by the
sample ids in the coords_header
Parameters
----------
coords_header: list of str
sample ids that are present in principal coordinates data
otu_table: numpy array
numpy array with the data for an otu table
otu_header: list of str
sample ids present in the otu table
Returns
-------
sorted_otu_headers: list of str
sample ids that were present in the coords_header list, the order in
this table matches the order of the coordinates data
sorted_otu_table: numpy array
otu table data with columns belonging to the sample ids in the
sorted_otu_headers list
"""

sorted_otu_headers = []

# the size of the otu table can be pre-allocated for better memory usage
matching_headers = len(set(coords_header)&set(otu_header))
matching_headers = len(set(coords_header) & set(otu_header))
sorted_otu_table = zeros([otu_table.shape[0], matching_headers])

# iterate through the available sample ids in the coordinates file and work
Expand All @@ -44,31 +40,36 @@ def sort_taxa_table_by_pcoa_coords(coords_header, otu_table, otu_header):
for i, element in enumerate(coords_header):
if element in otu_header:
current_index = otu_header.index(element)
sorted_otu_table[:,i] = otu_table[:,current_index]
sorted_otu_table[:, i] = otu_table[:, current_index]
sorted_otu_headers.append(element)

return sorted_otu_headers, sorted_otu_table


def sort_comparison_filenames(coord_fps):
"""Pass in a list of file names and sort them using the suffix
Input:
coord_fps: list of filenames with the format something_something_qX.txt
where X is the index of the file.
Output:
Returns a sorted version of the list that was passed in where the strings
are sorted according to the suffix they have, if the string doesn't have
a suffix it will be added to the beginning of the list.
Parameters
----------
coord_fps: list of str
The filenames with the format something_something_qX.txt where X is
the index of the file.
Returns
-------
list of str
A sorted version of the list that was passed in where the strings are
sorted according to the suffix they have, if the string doesn't have a
suffix it will be added to the beginning of the list.
"""

if coord_fps == []:
return []

def _get_suffix(fp):
"""Gets the number in the suffix for a string using a regex"""
# any alphanumeric set of characters proceeded by a 'q', a number, a dot
# & a txt extension at the end of the line. Take for example
# any alphanumeric set of characters proceeded by a 'q', a number,
# a dot & a txt extension at the end of the line. Take for example
# bray_curtis_q1.txt or unifrac_q11.txt
re = compile(r'(\w+)_q([0-9]+).txt$')
tmatch = search(re, fp)
Expand All @@ -84,4 +85,3 @@ def _get_suffix(fp):
# the key function retrieves the suffix number for the function to sort
# according to it's floating point representation i. e. the cast to float
return sorted(coord_fps, key=_get_suffix)

162 changes: 97 additions & 65 deletions tests/test_sort.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,43 @@
#!/usr/bin/env python
# File created on 20 Apr 2013
from __future__ import division

__author__ = "Yoshiki Vazquez Baeza"
__copyright__ = "Copyright 2013, The Emperor Project"
__credits__ = ["Yoshiki Vazquez Baeza"]
__license__ = "BSD"
__version__ = "0.9.51-dev"
__maintainer__ = "Yoshiki Vazquez Baeza"
__email__ = "yoshiki89@gmail.com"
__status__ = "Development"

from __future__ import division
from unittest import TestCase, main

from numpy import array
from numpy.testing import assert_almost_equal

from emperor.sort import (sort_taxa_table_by_pcoa_coords,
sort_comparison_filenames)
sort_comparison_filenames)


class TopLevelTests(TestCase):
def setUp(self):
self.otu_headers = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354',
'PC.593', 'PC.355', 'PC.607', 'PC.634']

self.otu_table = array([[0.02739726, 0.04697987, 0.02, 0.04697987, 0.01,
0.02027027, 0.01360544, 0.01342282, 0.02666667], [0.00684932,
0.02013423, 0.02, 0.00671141, 0., 0.00675676, 0., 0., 0.], [
0.14383562, 0.27516779, 0.65333333, 0.52348993, 0.38926174,
0.69594595, 0.28571429, 0.0738255, 0.19333333], [0., 0.02013423,
0.03333333, 0.01342282, 0., 0.0472973, 0., 0., 0.], [0.78767123,
0.45637584, 0.22, 0.39597315, 0.41610738, 0.20945946, 0.70068027,
0.89932886, 0.77333333], [0.,0.02013423, 0.01333333, 0.00671141,
0.03355705, 0.00675676, 0., 0., 0.],[0., 0., 0.01333333, 0., 0., 0.,
0., 0., 0.], [0.03424658, 0.16107383, 0.02666667, 0.00671141,
0.14765101, 0.01351351, 0., 0.01342282, 0.00666667]])
'PC.593', 'PC.355', 'PC.607', 'PC.634']

self.otu_table = array(
[[0.02739726, 0.04697987, 0.02, 0.04697987, 0.01, 0.02027027,
0.01360544, 0.01342282, 0.02666667],
[0.00684932, 0.02013423, 0.02, 0.00671141, 0., 0.00675676, 0.,
0., 0.],
[0.14383562, 0.27516779, 0.65333333, 0.52348993, 0.38926174,
0.69594595, 0.28571429, 0.0738255, 0.19333333],
[0., 0.02013423, 0.03333333, 0.01342282, 0., 0.0472973, 0.,
0., 0.],
[0.78767123, 0.45637584, 0.22, 0.39597315, 0.41610738, 0.20945946,
0.70068027, 0.89932886, 0.77333333],
[0., 0.02013423, 0.01333333, 0.00671141, 0.03355705, 0.00675676,
0., 0., 0.],
[0., 0., 0.01333333, 0., 0., 0., 0., 0., 0.],
[0.03424658, 0.16107383, 0.02666667, 0.00671141, 0.14765101,
0.01351351, 0., 0.01342282, 0.00666667]])

self.coords = COORDS
self.coords_header = ['PC.354','PC.356','PC.481','PC.593',
'PC.355','PC.607','PC.634', 'PC.636', 'PC.635']
self.coords_header = ['PC.354', 'PC.356', 'PC.481', 'PC.593', 'PC.355',
'PC.607', 'PC.634', 'PC.636', 'PC.635']

self.coord_fps = ['output_data/emperor/bray_curtis_pc_transformed_q1.txt',
self.coord_fps = [
'output_data/emperor/bray_curtis_pc_transformed_q1.txt',
'output_data/emperor/bray_curtis_pc_transformed_q10.txt',
'output_data/emperor/bray_curtis_pc_transformed_q11.txt',
'output_data/emperor/bray_curtis_pc_transformed_q12.txt',
Expand Down Expand Up @@ -77,15 +74,18 @@ def setUp(self):
'output_data/emperor/bray_curtis_pc_transformed_q12.txt',
'output_data/emperor/qqq2223_curtis_qc_transformed_q13.txt',
'output_data/emperor/bray_curtis_pc_transformed_q14.txt',
'output_data/emperor/bray_curtis_pc_transformed_reference.txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt',
'output_data/emperor/bray_curtis_pc_transformed_reference.'
'txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt',
'output_data/emperor/bray_curtis_pc_transformed_q16.txt',
'output_data/emperor/bray_curtis_pc_transformed_q17.txt',
'output_data/emperor/bray_curtis_pc_transformed_q18.txt',
'output_data/emperor/bray_curtis_pc_transformed_q19.txt',
'output_data/emperor/bray_curtis_pc_transformed_q2.txt',
'output_data/emperor/boom.txt',
'output_data/emperor/another_file with some characters and stuff .txt',
'output_data/emperor/some_other_file_that_foo_wants_to_compare.txt',
'output_data/emperor/another_file with some characters '
'and stuff .txt',
'output_data/emperor/some_other_file_that_foo_wants_to_'
'compare.txt',
'output_data/emperor/bray_curtis_pc_transformed_q23.txt',
'output_data/emperor/bray_curtis_pc_transformed_q24.txt',
'output_data/emperor/bray_curtis_pc_transformed_q25.txt',
Expand All @@ -101,28 +101,31 @@ def setUp(self):
'output_data/emperor/bray_curtis_pc_transformed_q8.txt',
'output_data/emperor/bray_curtis_pc_transformed_q9.txt']



def test_sort_taxa_table_by_pcoa_coords(self):
"""Make sure OTU table and coordinates are sorted equally"""

# case with shuffled inputs
o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
self.coords_header, self.otu_table, self.otu_headers)

self.assertEquals(o_headers, ['PC.354','PC.356','PC.481','PC.593',
'PC.355','PC.607','PC.634', 'PC.636', 'PC.635'])
self.assertEquals(o_headers, ['PC.354', 'PC.356', 'PC.481', 'PC.593',
'PC.355', 'PC.607', 'PC.634', 'PC.636',
'PC.635'])
assert_almost_equal(o_otu_table, OTU_TABLE_A)

# case with shuffled inputs and fewer samples
o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(['PC.354',
'PC.356','PC.635'], self.otu_table, self.otu_headers)
self.assertEquals(o_headers, ['PC.354','PC.356','PC.635'])
assert_almost_equal(o_otu_table, array([[ 0.01, 0.02, 0.04697987],[0.,
0.02, 0.02013423], [0.38926174, 0.65333333, 0.27516779],[0.,
0.03333333, 0.02013423],[0.41610738, 0.22, 0.45637584],[0.03355705,
0.01333333, 0.02013423],[0., 0.01333333, 0.],[0.14765101,
0.02666667, 0.16107383]]))
o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
['PC.354', 'PC.356', 'PC.635'], self.otu_table, self.otu_headers)
self.assertEquals(o_headers, ['PC.354', 'PC.356', 'PC.635'])
assert_almost_equal(o_otu_table, array(
[[0.01, 0.02, 0.04697987],
[0., 0.02, 0.02013423],
[0.38926174, 0.65333333, 0.27516779],
[0., 0.03333333, 0.02013423],
[0.41610738, 0.22, 0.45637584],
[0.03355705, 0.01333333, 0.02013423],
[0., 0.01333333, 0.],
[0.14765101, 0.02666667, 0.16107383]]))

def test_sort_comparison_filenames_regular(self):
"""Check filenames are sorted correctly"""
Expand Down Expand Up @@ -163,10 +166,13 @@ def test_sort_comparison_filenames_regular(self):
# if files with garbage are passed in, the sorting should be still
# consistent,putting the "garbaged" filenames at the beginning
out_sorted = sort_comparison_filenames(self.coord_fps_garbage)
self.assertEquals(out_sorted, ['output_data/emperor/aaaaaaa.txt',
self.assertEquals(out_sorted, [
'output_data/emperor/aaaaaaa.txt',
'output_data/emperor/boom.txt',
'output_data/emperor/another_file with some characters and stuff .txt',
'output_data/emperor/some_other_file_that_foo_wants_to_compare.txt',
'output_data/emperor/another_file with some characters and '
'stuff .txt',
'output_data/emperor/some_other_file_that_foo_wants_to_'
'compare.txt',
'output_data/emperor/bray_qurtis_pc_transformed_q1.txt',
'output_data/emperor/bray_curtis_pc_transformed_q2.txt',
'output_data/emperor/bray_curtis_pc_transformed_q3.txt',
Expand All @@ -180,7 +186,8 @@ def test_sort_comparison_filenames_regular(self):
'output_data/emperor/bray_curtis_pc_transformed_q12.txt',
'output_data/emperor/qqq2223_curtis_qc_transformed_q13.txt',
'output_data/emperor/bray_curtis_pc_transformed_q14.txt',
'output_data/emperor/bray_curtis_pc_transformed_reference.txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt',
'output_data/emperor/bray_curtis_pc_transformed_reference.'
'txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt',
'output_data/emperor/bray_curtis_pc_transformed_q16.txt',
'output_data/emperor/bray_curtis_pc_transformed_q17.txt',
'output_data/emperor/bray_curtis_pc_transformed_q18.txt',
Expand Down Expand Up @@ -211,24 +218,49 @@ def test_sort_comparison_filenames_regular(self):
self.assertEquals(sort_comparison_filenames([]), [])


COORDS = array([[0.280399117569, -0.0060128286014, 0.0234854344148, -0.0468109474823, -0.146624450094, 0.00566979124596, -0.0354299634191, -0.255785794275, -4.84141986706e-09],
[0.228820399536, -0.130142097093, -0.287149447883, 0.0864498846421, 0.0442951919304, 0.20604260722, 0.0310003571386, 0.0719920436501, -4.84141986706e-09],
[0.0422628480532, -0.0139681511889, 0.0635314615517, -0.346120552134, -0.127813807608, 0.0139350721063, 0.0300206887328, 0.140147849223, -4.84141986706e-09],
[0.232872767451, 0.139788385269, 0.322871079774, 0.18334700682, 0.0204661596818, 0.0540589147147, -0.0366250872041, 0.0998235721267, -4.84141986706e-09],
[0.170517581885, -0.194113268955, -0.0308965283066, 0.0198086158783, 0.155100062794, -0.279923941712, 0.0576092515759, 0.0242481862127, -4.84141986706e-09],
[-0.0913299284215, 0.424147148265, -0.135627421345, -0.057519480907, 0.151363490722, -0.0253935675552, 0.0517306152066, -0.038738217609, -4.84141986706e-09],
[-0.349339228244, -0.120787589539, 0.115274502117, 0.0694953933826, -0.0253722182853, 0.067853201946, 0.244447634756, -0.0598827706386, -4.84141986706e-09],
[-0.276542163845, -0.144964375408, 0.0666467344429, -0.0677109454288, 0.176070269506, 0.072969390136, -0.229889463523, -0.0465989416581, -4.84141986706e-09],
[-0.237661393984, 0.0460527772512, -0.138135814766, 0.159061025229, -0.247484698646, -0.115211468101, -0.112864033263, 0.0647940729676, -4.84141986706e-09]])

OTU_TABLE_A = array([[ 0.01, 0.02, 0.04697987, 0.02027027, 0.01360544, 0.01342282, 0.02666667, 0.02739726, 0.04697987],
[ 0., 0.02, 0.00671141, 0.00675676, 0., 0., 0., 0.00684932, 0.02013423],
[ 0.38926174, 0.65333333, 0.52348993, 0.69594595, 0.28571429, 0.0738255, 0.19333333, 0.14383562, 0.27516779],
[ 0., 0.03333333, 0.01342282, 0.0472973, 0., 0., 0., 0., 0.02013423],
[ 0.41610738, 0.22, 0.39597315, 0.20945946, 0.70068027, 0.89932886, 0.77333333, 0.78767123, 0.45637584],
[ 0.03355705, 0.01333333, 0.00671141, 0.00675676, 0., 0., 0., 0., 0.02013423],
[ 0., 0.01333333, 0., 0., 0., 0., 0., 0., 0.],
[ 0.14765101, 0.02666667, 0.00671141, 0.01351351, 0., 0.01342282, 0.00666667, 0.03424658, 0.16107383]])
COORDS = array(
[[0.280399117569, -0.0060128286014, 0.0234854344148, -0.0468109474823,
-0.146624450094, 0.00566979124596, -0.0354299634191, -0.255785794275,
-4.84141986706e-09],
[0.228820399536, -0.130142097093, -0.287149447883, 0.0864498846421,
0.0442951919304, 0.20604260722, 0.0310003571386, 0.0719920436501,
-4.84141986706e-09],
[0.0422628480532, -0.0139681511889, 0.0635314615517, -0.346120552134,
-0.127813807608, 0.0139350721063, 0.0300206887328, 0.140147849223,
-4.84141986706e-09],
[0.232872767451, 0.139788385269, 0.322871079774, 0.18334700682,
0.0204661596818, 0.0540589147147, -0.0366250872041, 0.0998235721267,
-4.84141986706e-09],
[0.170517581885, -0.194113268955, -0.0308965283066, 0.0198086158783,
0.155100062794, -0.279923941712, 0.0576092515759, 0.0242481862127,
-4.84141986706e-09],
[-0.0913299284215, 0.424147148265, -0.135627421345, -0.057519480907,
0.151363490722, -0.0253935675552, 0.0517306152066, -0.038738217609,
-4.84141986706e-09],
[-0.349339228244, -0.120787589539, 0.115274502117, 0.0694953933826,
-0.0253722182853, 0.067853201946, 0.244447634756, -0.0598827706386,
-4.84141986706e-09],
[-0.276542163845, -0.144964375408, 0.0666467344429, -0.0677109454288,
0.176070269506, 0.072969390136, -0.229889463523, -0.0465989416581,
-4.84141986706e-09],
[-0.237661393984, 0.0460527772512, -0.138135814766, 0.159061025229,
-0.247484698646, -0.115211468101, -0.112864033263, 0.0647940729676,
-4.84141986706e-09]])

OTU_TABLE_A = array(
[[0.01, 0.02, 0.04697987, 0.02027027, 0.01360544, 0.01342282, 0.02666667,
0.02739726, 0.04697987],
[0., 0.02, 0.00671141, 0.00675676, 0., 0., 0., 0.00684932, 0.02013423],
[0.38926174, 0.65333333, 0.52348993, 0.69594595, 0.28571429, 0.0738255,
0.19333333, 0.14383562, 0.27516779],
[0., 0.03333333, 0.01342282, 0.0472973, 0., 0., 0., 0., 0.02013423],
[0.41610738, 0.22, 0.39597315, 0.20945946, 0.70068027, 0.89932886,
0.77333333, 0.78767123, 0.45637584],
[0.03355705, 0.01333333, 0.00671141, 0.00675676, 0., 0., 0., 0.,
0.02013423],
[0., 0.01333333, 0., 0., 0., 0., 0., 0., 0.],
[0.14765101, 0.02666667, 0.00671141, 0.01351351, 0., 0.01342282,
0.00666667, 0.03424658, 0.16107383]])


if __name__ == "__main__":
Expand Down

0 comments on commit d0a2d3a

Please sign in to comment.