-
Notifications
You must be signed in to change notification settings - Fork 80
/
constants.py
93 lines (78 loc) · 3.95 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from collections import namedtuple
from future.utils import viewkeys, viewvalues
from datetime import datetime
Restriction = namedtuple('Restriction', ['columns', 'error_msg'])
# A dict containing the restrictions that apply to the sample templates
SAMPLE_TEMPLATE_COLUMNS = {
# The following columns are required by EBI for submission
'EBI': Restriction(columns={'collection_timestamp': datetime,
'physical_specimen_location': str,
'taxon_id': int,
'scientific_name': str},
error_msg="EBI submission disabled"),
# The following columns are required for the official main QIITA site
'qiita_main': Restriction(columns={'sample_type': str,
'description': str,
'physical_specimen_remaining': bool,
'dna_extracted': bool,
'latitude': float,
'longitude': float,
'host_subject_id': str},
error_msg="Processed data approval disabled")
}
# A dict containing the restrictions that apply to the prep templates
PREP_TEMPLATE_COLUMNS = {
# The following columns are required by EBI for submission
'EBI': Restriction(
columns={'primer': str,
'center_name': str,
'platform': str,
'instrument_model': str,
'library_construction_protocol': str,
'experiment_design_description': str},
error_msg="EBI submission disabled")
}
# Different prep templates have different requirements depending on the data
# type. We create a dictionary for each of these special datatypes
TARGET_GENE_DATA_TYPES = ['16S', '18S', 'ITS']
PREP_TEMPLATE_COLUMNS_TARGET_GENE = {
# The following columns are required by QIIME to execute split libraries
'demultiplex': Restriction(
columns={'barcode': str,
'primer': str},
error_msg="Demultiplexing disabled."),
# The following columns are required by Qiita to know how to execute split
# libraries using QIIME over a study with multiple illumina lanes
'demultiplex_multiple': Restriction(
columns={'barcode': str,
'primer': str,
'run_prefix': str},
error_msg="Demultiplexing with multiple input files disabled.")
}
# This list is useful to have if we want to loop through all the restrictions
# in a template-independent manner
ALL_RESTRICTIONS = [SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS,
PREP_TEMPLATE_COLUMNS_TARGET_GENE]
# This is what we consider as "NaN" cell values on metadata import
# from http://www.ebi.ac.uk/ena/about/missing-values-reporting
EBI_NULL_VALUES = ['Not applicable', 'Missing: Not collected',
'Missing: Not provided', 'Missing: Restricted access']
# These are what will be considered 'True' bool values on metadata import
TRUE_VALUES = ['Yes', 'yes', 'YES', 'Y', 'y', 'True', 'true', 'TRUE', 't', 'T']
# These are what will be considered 'False' bool values on metadata import
FALSE_VALUES = ['No', 'no', 'NO', 'N', 'n', 'False', 'false', 'FALSE',
'f', 'F']
# A set holding all the controlled columns, useful to avoid recalculating it
def _col_iterator():
for r_set in ALL_RESTRICTIONS:
for restriction in viewvalues(r_set):
for cols in viewkeys(restriction.columns):
yield cols
CONTROLLED_COLS = set(col for col in _col_iterator())