diff --git a/ChangeLog.md b/ChangeLog.md index ecd816b7..aecfe53e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -18,6 +18,8 @@ Emperor 0.9.1 (changes since Emperor 0.9.0 go here) * Export to SVG your visualization. * Emperor now relies on QIIME 1.7.0. * Added option `--number_of_segments` to control the quality of all spheres +* Add color pickers for connecting bars in coordinate comparison plots. +* Add option to select a master set of coordinates when making a comparison plot. *Bug Fixes* @@ -25,6 +27,9 @@ Emperor 0.9.1 (changes since Emperor 0.9.0 go here) * Category names are sorted alphabetically. * Category names with non-alphanumeric characters are colored correctly now. * Biplots checkbox now accurately reflects status of biplot visiblity rather than opposite. +* Comparison bars checkbox now accurately reflects status of the visiblity rather than opposite. +* Scaling by percent explained now works with vectors and coordinate comparison plots. +* Fixed bug where only the first bars in coordinate comparison plots could be hidden. Emperor 0.9.0 (14 May 2013) =========================== diff --git a/emperor/format.py b/emperor/format.py index f1205716..84301f20 100755 --- a/emperor/format.py +++ b/emperor/format.py @@ -307,13 +307,16 @@ def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data, return ''.join(js_vectors_string) -def format_comparison_bars_to_js(coords_data, coords_headers, clones): +def format_comparison_bars_to_js(coords_data, coords_headers, clones, + is_serial_comparison=True): """Format coordinates data to create a comparison plot Inputs: coords_data: numpy array with the replicated coordinates cooreds_headers: list with the headers for each of replicated coordinates clones: number of replicates in the coords_data and coords_headers + is_serial_comparison: whether the samples will be connected one after the + other (True) or all will originate in the first set of coordinates. Outputs: Javascript object that contains the data for the comparison plot @@ -323,11 +326,19 @@ def format_comparison_bars_to_js(coords_data, coords_headers, clones): length. AssertionError if the number of clones doesn't concord with the samples being presented. + + Unless the value of clones is > 0 this function will return an empty + javascript object initialization. """ js_comparison_string = [] js_comparison_string.append('\nvar g_comparisonPositions = new Array();\n') + if is_serial_comparison: + js_comparison_string.append('var g_isSerialComparisonPlot = true;\n') + else: + js_comparison_string.append('var g_isSerialComparisonPlot = false;\n') + if clones: headers_length = len(coords_headers) @@ -367,6 +378,9 @@ def format_emperor_html_footer_string(has_biplots=False, has_ellipses=False, has_biplots: whether the plot has biplots or not has_ellipses: whether the plot has ellipses or not + has_vectors: whether the plot has vectors or not + has_edges: whether the plot has edges between samples (comparison plot) + This function will remove unnecessary GUI elements from index.html to avoid confusions i. e. showing an ellipse opacity slider when there are no @@ -378,6 +392,7 @@ def format_emperor_html_footer_string(has_biplots=False, has_ellipses=False, optional_strings.append(if_(has_biplots, _BIPLOT_SPHERES_COLOR_SELECTOR,'')) optional_strings.append(if_(has_biplots, _BIPLOT_VISIBILITY_SELECTOR, '')) optional_strings.append(if_(has_biplots, _TAXA_LABELS_SELECTOR, '')) + optional_strings.append(if_(has_edges, _EDGES_COLOR_SELECTOR, '')) optional_strings.append(if_(has_ellipses, _ELLIPSE_OPACITY_SLIDER, '')) optional_strings.append(if_(has_vectors, _VECTORS_OPACITY_SLIDER, '')) optional_strings.append(if_(has_edges, _EDGES_VISIBILITY_SELECTOR, '')) @@ -454,10 +469,15 @@ def format_emperor_html_footer_string(has_biplots=False, has_ellipses=False, _EDGES_VISIBILITY_SELECTOR = """
- Edges Visibility + Edges Visibility

""" +_EDGES_COLOR_SELECTOR = """ +
Edge Color Selector A +
Edge Color Selector B +""" + _EMPEROR_FOOTER_HTML_STRING ="""document.getElementById("logo").style.display = 'none'; document.getElementById("logotable").style.display = 'none'; @@ -566,16 +586,16 @@ def format_emperor_html_footer_string(has_biplots=False, has_ellipses=False, - + %s
Axes Labels Color
Axes Color
Background Color
Background Color
-
-
- Use gradient colors -

-
%s%s%s +
+
+
+ Use gradient colors +
%s%s%s
diff --git a/emperor/sort.py b/emperor/sort.py index 68547f47..82bacf7c 100644 --- a/emperor/sort.py +++ b/emperor/sort.py @@ -11,8 +11,8 @@ __email__ = "yoshiki89@gmail.com" __status__ = "Development" - from numpy import zeros +from re import compile, search def sort_taxa_table_by_pcoa_coords(coords_header, otu_table, otu_header): """Sort and match the samples in the otu table and in the coordinates data @@ -48,3 +48,40 @@ def sort_taxa_table_by_pcoa_coords(coords_header, otu_table, otu_header): sorted_otu_headers.append(element) return sorted_otu_headers, sorted_otu_table + +def sort_comparison_filenames(coord_fps): + """Pass in a list of file names and sort them using the suffix + + Input: + coord_fps: list of filenames with the format something_something_qX.txt + where X is the index of the file. + + Output: + Returns a sorted version of the list that was passed in where the strings + are sorted according to the suffix they have, if the string doesn't have + a suffix it will be added to the beginning of the list. + """ + + if coord_fps == []: + return [] + + def _get_suffix(fp): + """Gets the number in the suffix for a string using a regex""" + # any alphanumeric set of characters proceeded by a 'q', a number, a dot + # & a txt extension at the end of the line. Take for example + # bray_curtis_q1.txt or unifrac_q11.txt + re = compile(r'(\w+)_q([0-9]+).txt$') + tmatch = search(re, fp) + + try: + number = tmatch.group(2) + # if the regex doesn't match then put it at the beginning + except (IndexError, AttributeError): + number = -1 + + return float(number) + + # the key function retrieves the suffix number for the function to sort + # according to it's floating point representation i. e. the cast to float + return sorted(coord_fps, key=_get_suffix) + diff --git a/emperor/support_files/emperor/js/emperor.js b/emperor/support_files/emperor/js/emperor.js index 4c84ecb4..0856bd86 100644 --- a/emperor/support_files/emperor/js/emperor.js +++ b/emperor/support_files/emperor/js/emperor.js @@ -130,6 +130,9 @@ function toggleScaleCoordinates(element) { var axesLen; var operation; + // used only for vector and edges re-drawing + var currentPosition = [], currentColor = 0x000000; + if (!isNumeric(g_fractionExplained[g_viewingAxes[0]])) { alert("PC" + (g_viewingAxes[0]+1) + " is too small for this feature, change your selection."); return; @@ -140,9 +143,6 @@ function toggleScaleCoordinates(element) { alert("PC" + (g_viewingAxes[2]+1) + " is too small for this feature, change your selection."); return; } - - // used only for the vector re-drawing - var currentPosition = [], currentColor; // XOR operation for the checkbox widget, this will select an operation // to perform over various properties, either a multiplication or a division @@ -223,14 +223,14 @@ function toggleScaleCoordinates(element) { for (sample_id in g_plotVectors){ // the color has to be formatted as an hex number for makeLine to work - currentColor = "0x"+g_plotVectors[sample_id].material.color.getHexString(); + currentColor = g_plotVectors[sample_id].material.color.getHex(); // updating the position of a vertex in a line is a really expensive // operation, hence we just remove it from the group and create it again g_elementsGroup.remove(g_plotVectors[sample_id]); for (vertex in g_plotVectors[sample_id].geometry.vertices){ - currentPosition[vertex] = g_plotVectors[sample_id].geometry.vertices[vertex].position; + currentPosition[vertex] = g_plotVectors[sample_id].geometry.vertices[vertex]; // scale the position of each of the vertices currentPosition[vertex].x = operation(currentPosition[vertex].x, @@ -250,6 +250,45 @@ function toggleScaleCoordinates(element) { currentPosition[1], currentColor, 2); g_elementsGroup.add(g_plotVectors[sample_id]); } + + // support scaling of edges in plot comparisons + for(var sample_id in g_plotEdges){ + // each edge is composed of two separate lines + for(var section in g_plotEdges[sample_id]){ + + // the color has to be formatted as an hex number for makeLine to work + currentColor = g_plotEdges[sample_id][section].material.color.getHex(); + + // remove them completely from the group and scene we no longer need + // these objects as re-creating them is as expensive as modifying + // most of their features + g_elementsGroup.remove(g_plotEdges[sample_id][section]) + g_mainScene.remove(g_plotEdges[sample_id][section]) + + for (vertex in g_plotEdges[sample_id][section].geometry.vertices){ + currentPosition[vertex] = g_plotEdges[sample_id][section].geometry.vertices[vertex]; + + // scale the position of each of the vertices + currentPosition[vertex].x = operation(currentPosition[vertex].x, + g_fractionExplained[g_viewingAxes[0]]) + currentPosition[vertex].y = operation(currentPosition[vertex].y, + g_fractionExplained[g_viewingAxes[1]]) + currentPosition[vertex].z = operation(currentPosition[vertex].z, + g_fractionExplained[g_viewingAxes[2]]) + + // create an array we can pass to makeLine + currentPosition[vertex] = [currentPosition[vertex].x, + currentPosition[vertex].y, currentPosition[vertex].z] + } + + // add the element to the main vector array and to the group + g_plotEdges[sample_id][section] = makeLine(currentPosition[0], + currentPosition[1], currentColor, 2); + g_elementsGroup.add(g_plotEdges[sample_id][section]); + g_mainScene.add(g_plotEdges[sample_id][section]); + + } + } } /* Toggle between discrete and continuous coloring for samples and labels */ @@ -753,6 +792,17 @@ function colorChangedForTaxaSpheres(color){ } } +/* This function is called when a new color is selected for the edges + + The two input parameters are a hexadecimal formatted color and an index, the + index indicates which side of the edges are going to be re-colored. +*/ +function colorChangedForEdges(color, index){ + for(var sample_id in g_plotEdges){ + currentColor = g_plotEdges[sample_id][index].material.color.setHex(color); + } +} + /*This function is called when a new value is selected in the label menu*/ function labelMenuChanged() { if(document.getElementById('labelcombo').selectedIndex == 0){ @@ -898,12 +948,12 @@ function toggleBiplotVisibility(){ // if the element is supposed to be present; 0.5 is the default value if(!document.biplotsvisibility.elements[0].checked){ for (index in g_plotTaxa){ - g_plotTaxa[index].material.opacity = 0; + g_mainScene.remove(g_plotTaxa[index]); } } else{ for (index in g_plotTaxa){ - g_plotTaxa[index].material.opacity = 0.5; + g_mainScene.add(g_plotTaxa[index]) } } } @@ -913,16 +963,16 @@ function toggleEdgesVisibility(){ // each edge is really composed of two lines and those elements are stored // in each of the keys that are stored for each sample comparison - if(document.edgesvisibility.elements[0].checked){ + if(!document.edgesvisibility.elements[0].checked){ for (index in g_plotEdges){ - g_plotEdges[index][0].material.opacity = 0; - g_plotEdges[index][1].material.opacity = 0; + g_mainScene.remove(g_plotEdges[index][0]); + g_mainScene.remove(g_plotEdges[index][1]); } } else{ for (index in g_plotEdges){ - g_plotEdges[index][0].material.opacity = 1; - g_plotEdges[index][1].material.opacity = 1; + g_mainScene.add(g_plotEdges[index][0]); + g_mainScene.add(g_plotEdges[index][1]); } } } @@ -1165,6 +1215,43 @@ function setJqueryUi() { }); } + // check if the plot is a comparison plot if so, setup the elements that + // will allow the user to change the color of the two sides of the edges + if(document.getElementById('edgecolorselector_a')){ + $('#edgecolorselector_a').css('backgroundColor',"#FFFFFF"); + $("#edgecolorselector_a").spectrum({ + localStorageKey: 'key', + color: "#FFFFFF", + preferredFormat: "hex6", + showInitial: true, + showInput: true, + change: + function(color) { + // pass a boolean flag to convert to hex6 string + var c = color.toHexString(true); + $(this).css('backgroundColor', c); + colorChangedForEdges(c.replace('#', '0x'), 0); + } + }); + } + if(document.getElementById('edgecolorselector_b')){ + $('#edgecolorselector_b').css('backgroundColor',"#FF0000"); + $("#edgecolorselector_b").spectrum({ + localStorageKey: 'key', + color: "#FFFFFF", + preferredFormat: "hex6", + showInitial: true, + showInput: true, + change: + function(color) { + // pass a boolean flag to convert to hex6 string + var c = color.toHexString(true); + $(this).css('backgroundColor', c); + colorChangedForEdges(c.replace('#', '0x'), 1); + } + }); + } + $("#sopacityslider").slider({ range: "max", min: 0, @@ -1207,7 +1294,6 @@ function setJqueryUi() { }); document.getElementById('labelopacity').innerHTML = $( "#lopacityslider" ).slider( "value")+"%" - //default color for axes labels is white $('#axeslabelscolor').css('backgroundColor',"#FFFFFF"); $("#axeslabelscolor").spectrum({ @@ -1375,6 +1461,7 @@ function drawTaxa(){ mesh.matrixAutoUpdate = true; // add the element to the scene and to the g_plotTaxa dictionary + g_elementsGroup.add(mesh) g_mainScene.add(mesh); g_plotTaxa[key] = mesh; } @@ -1418,53 +1505,104 @@ function drawVectors(){ } } - /*Draw the lines that connect samples being compared (see g_comparePositiosn) This will draw two lines between each compared sample, one with color red and - the other one with color white, the that must be noted here is that, these two + the other one with color white, what must be noted here is that, these two lines visually compose a single line and are both stored in the g_plotEdges array in arrays of two elements where the first element is the red line and the second element is the white line. + + In the case of a non-serial comparison plot, all edges will originate in the + same point. */ function drawEdges(){ - var current_vector, previous = null, middle_point, index=0, line_a, line_b; - - for (var sampleKey in g_comparisonPositions){ - for (var edgePosition in g_comparisonPositions[sampleKey]){ + var previous = null, origin = null, current, middle_point, index=0, line_a, line_b; - // if we don't have a start point store it and move along - if (previous == null) { - previous = g_comparisonPositions[sampleKey][edgePosition]; - } - // if we already have a start point then draw the edge - else{ - current = g_comparisonPositions[sampleKey][edgePosition]; + // note that this function is composed of an if-else statement with a loop + // that's almost identical under each case. This approach was taken as + // otherwise the comparison would need to happen N times instead of 1 time + // (N is the number of edges*2). - // the edge is composed by two lines so calculate the middle - // point between these two lines and end the first line in this - // point and start the second line in this point - middle_point = [(previous[0]+current[0])/2, - (previous[1]+current[1])/2, (previous[2]+current[2])/2] + // if the comparison is serial draw one edge after the other + if (g_isSerialComparisonPlot == true){ + for (var sampleKey in g_comparisonPositions){ + for (var edgePosition in g_comparisonPositions[sampleKey]){ - line_a = makeLine(previous, middle_point, 0xFF0000, 2) - line_b = makeLine(middle_point, current, 0xFFFFFF, 2) - - // index the two lines by the name of the sample plus a suffix - g_plotEdges[sampleKey+'_'+toString(index)] = [line_a, line_b] + // if we don't have a start point store it and move along + if (previous == null) { + previous = g_comparisonPositions[sampleKey][edgePosition]; + } + // if we already have a start point then draw the edge + else{ + current = g_comparisonPositions[sampleKey][edgePosition]; + + // the edge is composed by two lines so calculate the middle + // point between these two lines and end the first line in this + // point and start the second line in this point + middle_point = [(previous[0]+current[0])/2, + (previous[1]+current[1])/2, (previous[2]+current[2])/2]; + + line_a = makeLine(previous, middle_point, 0xFFFFFF, 2); + line_b = makeLine(middle_point, current, 0xFF0000, 2); + line_a.transparent = false; + line_b.transparent = false; + + // index the two lines by the name of the sample plus a suffix + g_plotEdges[sampleKey+'_'+index.toString()] = [line_a, line_b]; + + g_elementsGroup.add(line_a); + g_elementsGroup.add(line_b); + g_mainScene.add(line_a); + g_mainScene.add(line_b); + + // the current line becomes the previous line for the next + // iteration as all samples must be connected + previous = g_comparisonPositions[sampleKey][edgePosition]; + } + index = index+1; + } - g_elementsGroup.add(line_a) - g_elementsGroup.add(line_b) + // if we've finished with the connecting lines let a new line start + previous = null; + } + } + // if the comparison is not serial, originate all edges in the same coords + else{ + for (var sampleKey in g_comparisonPositions){ + for (var edgePosition in g_comparisonPositions[sampleKey]){ + if (origin == null) { + origin = g_comparisonPositions[sampleKey][edgePosition]; + } + else{ + current = g_comparisonPositions[sampleKey][edgePosition]; + + // edges are composed of two lines so use the start and + // the end point to calculate the position of the vertices + middle_point = [(origin[0]+current[0])/2, + (origin[1]+current[1])/2, (origin[2]+current[2])/2]; + + // in the case of centered comparisons the origins are + // painted in color white one one side and red on the other + line_a = makeLine(origin, middle_point, 0xFFFFFF, 2); + line_b = makeLine(middle_point, current, 0xFF0000, 2); + line_a.transparent = false; + line_b.transparent = false; + + // given that these are just sample repetitions just + // just add a suffix at the end of the sample id + g_plotEdges[sampleKey+'_'+index.toString()] = [line_a, line_b]; + + g_elementsGroup.add(line_a); + g_elementsGroup.add(line_b); + g_mainScene.add(line_a); + g_mainScene.add(line_b); - // the current line becomes the previous line for the next - // iteration as all samples must be conected - previous = g_comparisonPositions[sampleKey][edgePosition]; + } + index = index + 1; } - index = index+1; + origin = null; } - - // if we've finished with the connecting lines let a new line start - previous = null; } } @@ -1541,7 +1679,7 @@ function saveSVG(button){ $('body').css('cursor','default'); } -/*Utility function to draw two vertices lines at a time +/*Utility function to draw two-vertices lines at a time This function allows you to create a line with only two vertices i. e. the start point and the end point, plus the color and width of the line. The @@ -1564,7 +1702,7 @@ function makeLine(coords_a, coords_b, color, width){ geometry.vertices.push(new THREE.Vector3(coords_a[0], coords_a[1], coords_a[2])); geometry.vertices.push(new THREE.Vector3(coords_b[0], coords_b[1], coords_b[2])); - // the line will contain the two vertices and the describecd material + // the line will contain the two vertices and the described material line = new THREE.Line(geometry, material); return line; @@ -1576,18 +1714,30 @@ function makeLine(coords_a, coords_b, color, width){ displayed uses. */ function drawAxisLines() { + var axesColorFromColorPicker; + // removing axes, if they do not exist the scene doesn't complain g_mainScene.remove(g_xAxisLine); g_mainScene.remove(g_yAxisLine); g_mainScene.remove(g_zAxisLine); - + + // value should be retrieved from the picker every time the axes are drawn + axesColorFromColorPicker = $("#axescolor").spectrum("get").toHexString(true); + axesColorFromColorPicker = axesColorFromColorPicker.replace('#','0x') + axesColorFromColorPicker = parseInt(axesColorFromColorPicker, 16) + // one line for each of the axes g_xAxisLine = makeLine([g_xMinimumValue, g_yMinimumValue, g_zMinimumValue], - [g_xMaximumValue, g_yMinimumValue, g_zMinimumValue], 0xFFFFFF, 3); + [g_xMaximumValue, g_yMinimumValue, g_zMinimumValue], axesColorFromColorPicker, 3); g_yAxisLine = makeLine([g_xMinimumValue, g_yMinimumValue, g_zMinimumValue], - [g_xMinimumValue, g_yMaximumValue, g_zMinimumValue], 0xFFFFFF, 3); + [g_xMinimumValue, g_yMaximumValue, g_zMinimumValue], axesColorFromColorPicker, 3); g_zAxisLine = makeLine([g_xMinimumValue, g_yMinimumValue, g_zMinimumValue], - [g_xMinimumValue, g_yMinimumValue, g_zMaximumValue], 0xFFFFFF, 3); + [g_xMinimumValue, g_yMinimumValue, g_zMaximumValue], axesColorFromColorPicker, 3); + + // axes shouldn't be transparent + g_xAxisLine.material.transparent = false; + g_yAxisLine.material.transparent = false; + g_zAxisLine.material.transparent = false; g_mainScene.add(g_xAxisLine) g_mainScene.add(g_yAxisLine) @@ -1599,7 +1749,7 @@ function changePointCount() { document.getElementById('pointCount').innerHTML = g_visiblePoints+'/'+g_plotIds.length+' points' } -/* Validating and modifying the view axes */ +/* Validating and modifying the view axes */ function changeAxesDisplayed() { if (!jQuery.isEmptyObject(g_vectorPositions) || !jQuery.isEmptyObject(g_taxaPositions) || !jQuery.isEmptyObject(g_ellipsesDimensions) || g_number_of_custom_axes!=0) { @@ -2001,7 +2151,7 @@ $(document).ready(function() { var labelCoordinates; - // reposistion the labels for the axes in the 3D plot + // reposition the labels for the axes in the 3D plot labelCoordinates = toScreenXY(new THREE.Vector3(g_xMaximumValue, g_yMinimumValue, g_zMinimumValue), g_sceneCamera,$('#main_plot')); $("#pc1_label").css('left', labelCoordinates['x']) $("#pc1_label").css('top', labelCoordinates['y']) diff --git a/scripts/make_emperor.py b/scripts/make_emperor.py index 4ed93edf..0a2172ab 100755 --- a/scripts/make_emperor.py +++ b/scripts/make_emperor.py @@ -23,6 +23,7 @@ from qiime.biplots import make_biplot_scores_output from emperor.biplots import preprocess_otu_table +from emperor.sort import sort_comparison_filenames from emperor.filter import keep_samples_from_pcoa_data from emperor.util import (copy_support_files, preprocess_mapping_file, preprocess_coords_file, fill_mapping_field_from_mapping_file, @@ -62,14 +63,15 @@ " to use for the missing values: ", "%prog -i unweighted_unifrac_pc.txt -m " "Fasting_Map_modified.txt -a DOB -o pcoa_dob_with_missing_custom_axes_value" "s -x 'DOB:20060000'"), - ("PCoA plot with an explicit axis and using --missing_custom_axes_values but " - "setting different values based on another column", "Create a PCoA plot with an " - "axis of the plot representing the 'DOB' of the samples and defining the position " - "over the gradient of those samples missing a numeric value but using as reference " - "another column of the mapping file. In this case we are going to plot the samples " - "that are Control on the Treatment column on 20080220 and on 20080240 those that " - "are Fast:", "%prog -i unweighted_unifrac_pc.txt -m Fasting_Map_modified.txt -a DOB " - "-o pcoa_dob_with_missing_custom_axes_with_multiple_values -x " + ("PCoA plot with an explicit axis and using --missing_custom_axes_values " + "but setting different values based on another column", "Create a PCoA plot" + " with an axis of the plot representing the 'DOB' of the samples and " + "defining the position over the gradient of those samples missing a numeric" + " value but using as reference another column of the mapping file. In this " + "case we are going to plot the samples that are Control on the Treatment " + "column on 20080220 and on 20080240 those that are Fast:", "%prog -i " + "unweighted_unifrac_pc.txt -m Fasting_Map_modified.txt -a DOB -o " + "pcoa_dob_with_missing_custom_axes_with_multiple_values -x " "'DOB:Treatment==Control=20080220' -x 'DOB:Treatment==Fast=20080240'"), ("Jackknifed principal coordinates analysis plot", "Create a jackknifed " "PCoA plot (with confidence intervals for each sample) passing as the input" @@ -178,25 +180,30 @@ ' the "--taxa_fp" file to display. Passing "-1" will cause to display all ' 'the taxonomic groups, this option is only used when creating BiPlots. ' '[default=%default]', default=10, type='int'), - make_option('-s', '--master_pcoa', help='Used only when plotting ellipsoids' - ' for jackknifed beta diversity (i.e. using a directory of coord files' - ' instead of a single coord file). The coordinates in this file will be the' - ' center of each ellipisoid. [default: arbitrarily selected file from the ' - 'input directory]', default=None, type='existing_filepath'), + make_option('-s', '--master_pcoa', help='Used only when the input is a ' + 'directory of coordinate files i. e. for jackknifed beta diversity plot or' + ' for a coordinate comparison plot (procrustes analysis). The coordinates ' + 'in this file will be the center of each ellipsoid in the case of a ' + 'jackknifed PCoA plot or the center where the connecting arrows originate ' + 'from for a comparison plot. [default: arbitrarily selected file from the ' + 'input directory for a jackknifed plot or None for a comparison plot in ' + 'this case one file will be connected to the next one and so on]', + default=None, type='existing_filepath'), make_option('-t', '--taxa_fp', help='Path to a summarized taxa file (i. ' 'e. the output of summarize_taxa.py). This option is only used when ' 'creating BiPlots. [default=%default]', default=None, type= 'existing_filepath'), make_option('-x', '--missing_custom_axes_values', help='Option to override ' - 'the error shown when the catergory used in \'--custom_axes\' has non-numeric ' - 'values in the mapping file. The basic format is custom_axis:new_value. For ' - 'example, if you want to plot in time 0 all the samples that do not have a numeric ' - 'value in the column Time. you would pass -x "Time:0". Additionally, you can pass ' - 'this format custom_axis:other_column==value_in_other_column=new_value, with this ' - 'format you can specify different values (new_value) to use in the substitution ' - 'based on other column (other_column) value (value_in_other_column); see example ' - 'above. This option could be used in all explicit axes.',action='append', - default=None), + 'the error shown when the catergory used in \'--custom_axes\' has ' + 'non-numeric values in the mapping file. The basic format is ' + 'custom_axis:new_value. For example, if you want to plot in time 0 all the ' + 'samples that do not have a numeric value in the column Time. you would ' + 'pass -x "Time:0". Additionally, you can pass this format ' + 'custom_axis:other_column==value_in_other_column=new_value, with this ' + 'format you can specify different values (new_value) to use in the ' + 'substitution based on other column (other_column) value ' + '(value_in_other_column); see example above. This option could be used in ' + 'all explicit axes.',action='append', default=None), make_option('-o','--output_dir',type="new_dirpath", help='path to the ' 'output directory that will contain the PCoA plot. [default: %default]', default='emperor'), @@ -230,7 +237,7 @@ def main(): number_of_axes = opts.number_of_axes compare_plots = opts.compare_plots number_of_segments = opts.number_of_segments - + # verifying that the number of axes requested is greater than 3 if number_of_axes<3: option_parser.error(('You need to plot at least 3 axes.')) @@ -244,6 +251,8 @@ def main(): offending_fields = [] non_numeric_categories = [] + serial_comparison = True + # can't do averaged pcoa plots _and_ custom axes in the same plot if custom_axes!=None and len(custom_axes.split(','))>1 and\ isdir(input_coords): @@ -254,7 +263,7 @@ def main(): # make sure the flag is not misunderstood from the command line interface if isdir(input_coords) == False and compare_plots: option_parser.error('Cannot use the \'--compare_plots\' flag unless the' - ' input input path is a directory.') + ' input path is a directory.') # before creating any output, check correct parsing of the main input files try: @@ -290,6 +299,31 @@ def main(): if master_pcoa in coord_fps: # remove it if duplicated coord_fps.remove(master_pcoa) coord_fps = [master_pcoa] + coord_fps # prepend it to the list + # passing a master file means that the comparison is not serial + elif master_pcoa and compare_plots: + serial_comparison = False + + # guarantee that the master is the first and is not repeated + if master_pcoa in coord_fps: + coord_fps.remove(master_pcoa) + coord_fps = [master_pcoa] + sort_comparison_filenames(coord_fps) + + # QIIME generates folders of transformed coordinates for the specific + # purpose of connecting all coordinates to a set of origin coordinates. + # The name of this file is suffixed as _transformed_reference.txt + elif master_pcoa == None and len([f for f in coord_fps if f.endswith( + '_transformed_reference.txt')]): + master_pcoa = [f for f in coord_fps if f.endswith( + '_transformed_reference.txt')][0] + serial_comparison = False + + # Note: the following steps are to guarantee consistency. + # remove the master from the list and re-add it as a first element + # the rest of the files must be sorted alphabetically so the result + # will be: ['unifrac_transformed_reference.txt', + # 'unifrac_transformed_q1.txt', 'unifrac_transformed_q2.txt'] etc + coord_fps.remove(master_pcoa) + coord_fps = [master_pcoa] + sort_comparison_filenames(coord_fps) for fp in coord_fps: try: @@ -344,7 +378,7 @@ def main(): # other exeptions should be catched here; code will be updated then except ValueError: option_parser.error(('The PCoA file \'%s\' does not seem to be a ' - 'coordinates formatted file, verify by manuall inspecting ' + 'coordinates formatted file, verify by manually inspecting ' 'the contents.') % input_coords) # number of samples ids that are shared between coords and mapping files @@ -410,18 +444,16 @@ def main(): header, mapping_data = filter_mapping_file(mapping_data, header, sids_intersection, include_repeat_cols=True) - - - # catch the errors that could ocurr when filling the mapping file values + # catch the errors that could occur when filling the mapping file values if missing_custom_axes_values: try: # the fact that this uses parse_metadata_state_descriptions makes - # the follwoing option '-x Category:7;PH:12' to work as well as the + # the following option '-x Category:7;PH:12' to work as well as the # script-interface-documented '-x Category:7 -x PH:12' option for val in missing_custom_axes_values: if ':' not in val: - option_parser.error("Not valid missing value for custom axes: %s" - % val) + option_parser.error("Not valid missing value for custom " + "axes: %s" % val) mapping_data = fill_mapping_field_from_mapping_file(mapping_data, header, ';'.join(missing_custom_axes_values)) @@ -563,14 +595,14 @@ def main(): fp_out.write(format_vectors_to_js(mapping_data, header, coords_data, coords_headers, add_vectors[0], add_vectors[1])) fp_out.write(format_comparison_bars_to_js(coords_data, coords_headers, - clones)) + clones, is_serial_comparison=serial_comparison)) fp_out.write(format_emperor_html_footer_string(taxa_fp != None, isdir(input_coords) and not compare_plots, add_vectors != [None, None], clones>0)) fp_out.close() copy_support_files(output_dir) - # write the bilot coords in the output file if a path is passed + # write the biplot coords in the output file if a path is passed if biplot_fp and taxa_fp: # make sure this file can be created try: diff --git a/tests/test_format.py b/tests/test_format.py index 68fe1e12..7ba7f96f 100755 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -162,14 +162,31 @@ def test_format_comparison_bars_to_js(self): # empty string generation for comparison i. e. no clones out_js_comparison_string = format_comparison_bars_to_js( - self.comparison_coords_data, self.comparison_coords_headers, 0) + self.comparison_coords_data, self.comparison_coords_headers, 0, + True) self.assertEquals(out_js_comparison_string, '\nvar ' - 'g_comparisonPositions = new Array();\n') + 'g_comparisonPositions = new Array();\nvar g_isSerialComparisonPlot' + ' = true;\n') out_js_comparison_string = format_comparison_bars_to_js( - self.comparison_coords_data, self.comparison_coords_headers, 3) + self.comparison_coords_data, self.comparison_coords_headers, 3, + True) self.assertEquals(out_js_comparison_string, COMPARISON_JS_STRING) + # empty string generation for comparison i. e. no clones + out_js_comparison_string = format_comparison_bars_to_js( + self.comparison_coords_data, self.comparison_coords_headers, 0, + False) + self.assertEquals(out_js_comparison_string, '\nvar ' + 'g_comparisonPositions = new Array();\nvar g_isSerialComparisonPlot' + ' = false;\n') + + out_js_comparison_string = format_comparison_bars_to_js( + self.comparison_coords_data, self.comparison_coords_headers, 3, + False) + self.assertEquals(out_js_comparison_string, + COMPARISON_JS_STRING_NON_SERIAL) + def test_format_comparison_bars_to_js_exceptions(self): """Check the correct exceptions are raised for incorrect inputs""" @@ -400,6 +417,18 @@ def test_format_emperor_html_footer_string(self): COMPARISON_JS_STRING = """ var g_comparisonPositions = new Array(); +var g_isSerialComparisonPlot = true; +g_comparisonPositions['sampa'] = [[-0.0677, -2.036, 0.2726], [-0.972, 0.551, 1.144], [0.2339, -0.88, -1.753]]; +g_comparisonPositions['sampb'] = [[-1.782, -0.972, 0.1582], [1.438, -2.603, -1.39], [0.436, 2.12, -0.935]]; +g_comparisonPositions['sampc'] = [[-0.659, -0.2566, 0.514], [-0.356, 0.0875, 0.772], [-0.88, 1.069, 1.069]]; +g_comparisonPositions['sampd'] = [[-1.179, -0.968, 2.525], [1.512, -1.239, -0.0365], [0.294, 0.2988, 0.0467]]; +g_comparisonPositions['sampe'] = [[-0.896, -1.765, 0.274], [1.17, 1.31, -1.407], [1.64, 0.2485, -0.354]]; +g_comparisonPositions['sampf'] = [[-0.0923, 1.414, -0.622], [2.618, 0.739, -0.01295], [0.821, -1.13, -1.794]]; +""" + +COMPARISON_JS_STRING_NON_SERIAL = """ +var g_comparisonPositions = new Array(); +var g_isSerialComparisonPlot = false; g_comparisonPositions['sampa'] = [[-0.0677, -2.036, 0.2726], [-0.972, 0.551, 1.144], [0.2339, -0.88, -1.753]]; g_comparisonPositions['sampb'] = [[-1.782, -0.972, 0.1582], [1.438, -2.603, -1.39], [0.436, 2.12, -0.935]]; g_comparisonPositions['sampc'] = [[-0.659, -0.2566, 0.514], [-0.356, 0.0875, 0.772], [-0.88, 1.069, 1.069]]; @@ -519,14 +548,14 @@ def test_format_emperor_html_footer_string(self):
Axes Color
Background Color -
-
- Use gradient colors -


+
+
+ Use gradient colors +

@@ -694,15 +723,15 @@ def test_format_emperor_html_footer_string(self):
Axes Color
Background Color -
-
- Use gradient colors -



+
+ Use gradient colors +
+
@@ -852,15 +881,15 @@ def test_format_emperor_html_footer_string(self):
Axes Color
Background Color -
-
- Use gradient colors -



+
+ Use gradient colors +
+
@@ -1009,14 +1038,14 @@ def test_format_emperor_html_footer_string(self):
Axes Color
Background Color -
-
- Use gradient colors -


+
+
+ Use gradient colors +

@@ -1169,18 +1198,21 @@ def test_format_emperor_html_footer_string(self):
Axes Labels Color
Axes Color
Background Color +
Edge Color Selector A +
Edge Color Selector B + -
-
- Use gradient colors -


+
+
+ Use gradient colors +

- Edges Visibility + Edges Visibility


diff --git a/tests/test_sort.py b/tests/test_sort.py index 54d98947..fad0b198 100755 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -14,7 +14,8 @@ from numpy import array from cogent.util.unit_test import TestCase, main -from emperor.sort import sort_taxa_table_by_pcoa_coords +from emperor.sort import (sort_taxa_table_by_pcoa_coords, + sort_comparison_filenames) class TopLevelTests(TestCase): def setUp(self): @@ -37,8 +38,71 @@ def setUp(self): self.coords_header = ['PC.354','PC.356','PC.481','PC.593', 'PC.355','PC.607','PC.634', 'PC.636', 'PC.635'] + self.coord_fps = ['output_data/emperor/bray_curtis_pc_transformed_q1.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q10.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q11.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q12.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q13.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q14.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q15.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q16.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q17.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q18.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q19.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q2.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q20.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q21.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q22.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q23.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q24.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q25.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q26.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q27.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q28.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q29.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q4.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q5.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q6.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q7.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q8.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q9.txt'] + + self.coord_fps_garbage = [ + 'output_data/emperor/bray_qurtis_pc_transformed_q1.txt', + 'output_data/emperor/bray_111urtis_q_transformed_q10.txt', + 'output_data/emperor/aaaaaaa.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q12.txt', + 'output_data/emperor/qqq2223_curtis_qc_transformed_q13.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q14.txt', + 'output_data/emperor/bray_curtis_pc_transformed_reference.txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q16.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q17.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q18.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q19.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q2.txt', + 'output_data/emperor/boom.txt', + 'output_data/emperor/another_file with some characters and stuff .txt', + 'output_data/emperor/some_other_file_that_foo_wants_to_compare.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q23.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q24.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q25.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q26.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q27.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q28.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q29.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q4.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q5.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q6.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q7.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q8.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q9.txt'] + + + def test_sort_taxa_table_by_pcoa_coords(self): - """ """ + """Make sure OTU table and coordinates are sorted equally""" # case with shuffled inputs o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords( @@ -58,6 +122,93 @@ def test_sort_taxa_table_by_pcoa_coords(self): 0.01333333, 0.02013423],[0., 0.01333333, 0.],[0.14765101, 0.02666667, 0.16107383]])) + def test_sort_comparison_filenames_regular(self): + """Check filenames are sorted correctly""" + + # check it correctly sorts the files according to the suffix + out_sorted = sort_comparison_filenames(self.coord_fps) + self.assertEquals(out_sorted, [ + 'output_data/emperor/bray_curtis_pc_transformed_q1.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q2.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q4.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q5.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q6.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q7.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q8.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q9.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q10.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q11.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q12.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q13.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q14.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q15.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q16.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q17.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q18.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q19.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q20.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q21.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q22.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q23.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q24.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q25.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q26.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q27.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q28.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q29.txt']) + + # if files with garbage are passed in, the sorting should be still + # consistent,putting the "garbaged" filenames at the beginning + out_sorted = sort_comparison_filenames(self.coord_fps_garbage) + self.assertEquals(out_sorted, ['output_data/emperor/aaaaaaa.txt', + 'output_data/emperor/boom.txt', + 'output_data/emperor/another_file with some characters and stuff .txt', + 'output_data/emperor/some_other_file_that_foo_wants_to_compare.txt', + 'output_data/emperor/bray_qurtis_pc_transformed_q1.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q2.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q4.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q5.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q6.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q7.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q8.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q9.txt', + 'output_data/emperor/bray_111urtis_q_transformed_q10.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q12.txt', + 'output_data/emperor/qqq2223_curtis_qc_transformed_q13.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q14.txt', + 'output_data/emperor/bray_curtis_pc_transformed_reference.txtoutput_data/emperor/bray_curtis_pc_transformed_q15.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q16.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q17.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q18.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q19.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q23.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q24.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q25.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q26.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q27.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q28.txt', + 'output_data/emperor/bray_curtis_pc_transformed_q29.txt']) + + # tricky case with extensions in things that are not the filename + out_sorted = sort_comparison_filenames([ + 'output_data_q1.txt/emperor/bray_curtis_pc_transformed_q9.txt', + 'output_data/emperorq11.txt/bray_curtis_pc_transformed_q2.txt', + 'output_data_q44.txt/emperor/bray_curtis_pc_transformed_q11.txt', + 'output_dataq-5.txt/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data_q511.txt/emperor/bray_curtis_pc_transformed_q1.txt']) + self.assertEquals(out_sorted, [ + 'output_data_q511.txt/emperor/bray_curtis_pc_transformed_q1.txt', + 'output_data/emperorq11.txt/bray_curtis_pc_transformed_q2.txt', + 'output_dataq-5.txt/emperor/bray_curtis_pc_transformed_q3.txt', + 'output_data_q1.txt/emperor/bray_curtis_pc_transformed_q9.txt', + 'output_data_q44.txt/emperor/bray_curtis_pc_transformed_q11.txt']) + + # make sure nothing happens when an empty list is passed + self.assertEquals(sort_comparison_filenames([]), []) + + COORDS = array([[0.280399117569, -0.0060128286014, 0.0234854344148, -0.0468109474823, -0.146624450094, 0.00566979124596, -0.0354299634191, -0.255785794275, -4.84141986706e-09], [0.228820399536, -0.130142097093, -0.287149447883, 0.0864498846421, 0.0442951919304, 0.20604260722, 0.0310003571386, 0.0719920436501, -4.84141986706e-09], [0.0422628480532, -0.0139681511889, 0.0635314615517, -0.346120552134, -0.127813807608, 0.0139350721063, 0.0300206887328, 0.140147849223, -4.84141986706e-09],