-
Notifications
You must be signed in to change notification settings - Fork 0
/
condor_descript.py
executable file
·349 lines (301 loc) · 13.3 KB
/
condor_descript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/python2.7
"""Generate a description file for `condor_submit`"""
# TODO:
# - define an Error class
# -
# - Use a config/template file storing the prefered defaults
# - Use the same short options as `condor_qsub`
#
# NOTE: Possible sources of bugs in /usr/local/bin/condior-submit.sh:
# - variable NBQUEUE defined and not used.
# - niceuser written instead of nice_user
# - "Memory" in (Memory > 1024) is deprecated. Use TARGET.Memory instead
import os, errno # FileNotFoundError
import os.path as op
from sys import stdout, exit, version_info
import argparse
from distutils.spawn import find_executable
from datetime import datetime
import logging, errno
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(levelname)s:%(lineno)s:%(message)s')
if version_info.major < 3:
class FileNotFoundError(OSError):
pass
# Executable: first, Queue: last.
# (short option, description name, help text)
# TODO: add default columns.
ORDERED_PARAMS = (
('-u', 'universe' ,None),
('-o', 'output' ,None), # can contain {time}, {dir}, {base}
('-i', 'input' ,None), # idem
('-e', 'error' ,None), # idem
('-l', 'log' ,None), # idem
('-a', 'arguments' ,None),
('-c', 'request_cpus' ,None),
('-m', 'request_memory' ,None),
('-g', 'getenv' ,None),
('-id', 'initialdir' ,None),
('-stf', 'should_transfer_files',None),
('-ro', 'run_as_owner' ,None),
('-rq', 'requirements' ,None),
('-nu', 'notify_user' ,
"email address for notification [$USER@biologie.ens.fr]"),
('-n', 'notification' ,None),
('-ni', 'niceuser' ,None),
('-p', 'priority' ,None),
('-ra', 'rank' ,None),
('-cl', 'concurrency_limits' ,None),
('-q', 'queue' ,None)
)
# TODO: add equivalent options in capital letters to read from file.
# use the Condor macros $(Cluster) and $(Process) to automatically name output
# files. $(Cluster) : the submission ID (one per submission file)
# $(Process) : the process ID (one per block in the file)
TEMPLATE = '{dir}/{base}_$(Cluster)-$(Process)'
PREFERED_PARAMS = {
'output' : [TEMPLATE + '.stdout'],
'error' : [TEMPLATE + '.stderr'],
'log' : [TEMPLATE + '.log' ],
#'notification' : ['Always'], # Condor default: Never
'notify_user' : [os.environ['USER'] + '@biologie.ens.fr'],
'request_memory' : ['1G'], # TODO: $(ncores) * 1024
'getenv' : [True],
'should_transfer_files' : ['NO'],
'run_as_owner' : [True], # Condor default: True (Unix) / False (Windows)
'concurrency_limits' : [os.environ['USER'] + ':34']
#'requirements' : ['(TARGET.memory > 1024)'] # that didn't work for me
#'Initialdir' : op.abspath(outbase),
#'universe' : "vanilla",
#'environment' :
#'priority' : 0,
#'log_xml' :,
#'input' : $INPUT,
#'arguments' : $ARGS,
#'request_cpus' : 1,
#'nice_user' : False,
#'rank' : kflops+1000*Memory,
#'queue' : 1
}
PREFERED_PARAMS_REPR = "\n".join(' %-21s: %s' % (p, PREFERED_PARAMS[p])
for p in ORDERED_PARAMS
if PREFERED_PARAMS.get(p))
EPILOG="""
DETAILS:
Argument formatting:
any argument can contain python formatting elements that will be converted:
{time} : formatted time string. Use --timefmt to change format.
{dir} : dirname of description file. '.' if stdout is used.
{base} : basename of the description file.
'condorjob_{time}' if stdout is used.
Other Condor Arguments:
Any other needed argument for Condor can be specified as the following:
--{argumentname} {argumentvalue} [...].
See `man condor_submit` for available arguments
Default Arguments:
Unless `--condor-defaults` is used, this script has its own defaults:
%s
Examples:
condor_descript.py subset6-7_runcodemlsh.condor.txt $(which run_codeml_separatedir.sh) -a ENSGT00790000122969.subset{6,7}
""" % PREFERED_PARAMS_REPR
def generate_description(description, executable, dir=None, base=None,
condor_defaults=False, template=None,
timefmt='%Y%m%d-%Hh%Mm%S',
**user_params):
"""
- description : filehandle or string;
- executable : the mandatory argument;
- timefmt : format specification to replace {time} in arguments [%Y%m%d-%Hh%Mm%S]
- condor_defaults: wether to use condor defaults instead of PREFERED_DEFAULTS
- user_params : params for a condor description file:
universe
output
input
error
log
arguments
request_cpus
request_memory
getenv
initialdir
should_transfer_files
run_as_owner
requirements
notify_user
notification
niceuser
priority
rank
queue """
generate_time = datetime.now().strftime(timefmt)
if description == stdout:
outdir = op.abspath(op.curdir)
outbase = "condorjob_%s" % generate_time
else:
outdesc = description.name if isinstance(description, file) else description
outdir, outfile = op.split(outdesc)
if not outdir: outdir = '.'
#print "outdir: %s" % outdir, "outfile %s" % outfile
outbase, _ = op.splitext(outfile)
# override automatic outdir and outbase values by user-provided values.
if dir is not None: outdir = dir.rstrip('/')
if base is not None: outbase = base
# Load prefered parameters if needed
params = {}
if not condor_defaults:
params.update(**PREFERED_PARAMS) # TODO: move this part outside, and
# add the defaults in the argparse.ArgumentParser
# Uses user-defined template to name the output, error and log files:
if template:
params['output'] = [t + '.stdout' for t in template]
params['error'] = [t + '.stderr' for t in template]
params['log'] = [t + '.log' for t in template]
# replace defaults by user-specified values
for p,v in user_params.items():
if v is None:
user_params.pop(p)
params.update(**user_params)
# Update parameters (correct type str to list, format strings)
#if not (isinstance(v, list) or isinstance(v, tuple)):
# v = [v]
for p,v in params.iteritems():
for i, value_item in enumerate(v):
try:
v[i] = value_item.format(dir=outdir, base=outbase, time=generate_time)
except AttributeError:
# This is not a string, do not format
pass
except IndexError:
logger.error("The only formatting characters allowed are "
"{dir}, {base} and {time}. To escape curly "
"braces, use {{ and }}.")
raise ValueError("Error formatting value: %s\n" % (v[i],))
get_param = params.get
ordered_params_list = [p for _,p,_ in ORDERED_PARAMS if get_param(p)]
# Add unknown arguments
ordered_params_list.extend(p for p in params if p not in ordered_params_list)
single_params = [p for p in ordered_params_list if len(params[p]) == 1]
single_params_set = set(single_params)
perblock_params = [p for p in ordered_params_list if p not in single_params_set]
# TODO:
# Add extra params not known in ordered_params.
# Open output description file. Because both str and unicode are
# subclasses of basestring.
OUT = description if isinstance(description, file) else open(outdesc, 'w')
executable_path = find_executable(executable)
if not executable_path:
raise FileNotFoundError(errno.ENOENT,
"Executable not in PATH. Please specify the "
"absolute or relative path",
executable)
OUT.write("executable = %s\n" % executable_path)
for k in single_params:
OUT.write("%s = %s\n" % (k, params.pop(k)[0]))
if not perblock_params: # There is only one block
OUT.write("Queue\n")
else:
OUT.write("\n")
# Determine number of jobs, and check if consistent across arguments
njobs = len(params[perblock_params[0]])
if not all(njobs == len(params[p]) for p in perblock_params):
raise ValueError("Not the same number of arguments for each "
"argument. Must be 1 or the same anywhere")
for i in range(njobs):
block = ''
for param in perblock_params:
value = get_param(param)
if value:
block += "%s = %s\n" % (param, str(value[i]))
block += "Queue\n\n"
OUT.write(block)
if OUT != stdout:
OUT.close()
def parse_unknown_args(uargs):
"""uargs: a list of strings from the command line.
parse using the rule nargs='+' """
uargdict = {}
if not uargs:
return uargdict
opt = ''
values = []
while uargs:
term = uargs.pop(0)
if term.startswith('--') or term.startswith('-'):
if (not values) and opt:
logger.error("Invalid option. At least one value needed for %s", opt)
exit(errno.EINVAL)
if not uargs:
logger.error("Invalid option. At least one value needed for %s", term)
exit(errno.EINVAL)
opt=term
values = uargdict.setdefault(opt.lstrip('-'), [])
else:
values.append(term)
return uargdict
# TODO: Use csv module
def parse_fromfile(filename):
"""Read condor arguments from space delimited table
The first line must contain the names of the arguments"""
with open(filename) as IN:
try:
argnames = IN.next().rstrip().split('\t')
except StopIteration:
logger.error("File %s is empty", filename)
exit(errno.EINVAL)
args_fromfile = {arg:[] for arg in argnames}
for line in IN:
args = line.rstrip().split('\t')
for argname, arg in zip(argnames, args):
args_fromfile[argname].append(arg)
return args_fromfile
def main():
parser = argparse.ArgumentParser(description=__doc__,
epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
#g1 = parser.add_argument_group()
condor_arg_group = parser.add_argument_group('Condor description arguments')
# TODO: nicer-looking printing help for the condor args
aa = parser.add_argument
aac = condor_arg_group.add_argument
#aa('-d', '--description','--desc', type=argparse.FileType('w'), default=stdout,
aa('description', nargs='?', type=argparse.FileType('w'), default=stdout,
help='File name to write description in. Optional [stdout].')
aa('--dir',
help="string used to format arguments containing '{dir}'. It uses "\
"`dirname description file` by default")
aa('--base',
help="string used to format arguments containing '{base}'. It uses "\
"`basename description file` (without extensions) by default.")
aa('--template', nargs='+', # default=TEMPLATE,
help="Name of output, error, and log files, without the extensions ("\
".stdout, .stderr, .log respectively)")
aa('--fromfile',
help='Take arguments from columns of a tabulated file. The '\
'first line must contain arguments names (condor names or also '\
'options from this script). These values will be overriden by ' \
'commandline options, with a warning.')
#aa('--submit', action='store_true',
# help='Directly submit job to the cluster')
aa('--condor-defaults', action='store_true',
help='Whether to use condor default arguments (not this script defaults).')
aa('--timefmt', default='%Y%m%d-%Hh%Mm%S',
help='time formatting (time of execution of this script)')
aac('executable')
for shortopt, longopt, hlp in ORDERED_PARAMS:
aac(shortopt, '--' + longopt, nargs='+', help=hlp)
args, uargs = parser.parse_known_args()
# uargs contains unknown args. When you need to add arguments for condor
# not defined in this script.
dictargs = vars(args)
dictargs.update(parse_unknown_args(uargs))
#print dictargs
if args.fromfile:
args_fromfile = parse_fromfile(dictargs.pop('fromfile'))
for argname, arg in args_fromfile.iteritems():
if dictargs.get(argname) is not None:
logger.warning("Argument '%s' from file will be overriden by commandline")
else:
dictargs[argname] = arg
generate_description(**dictargs)
if __name__ == '__main__':
main()