forked from themightyoarfish/deepVO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
403 lines (344 loc) · 14.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
'''
.. module:: utils
Miscellaneous functions for data processing and batching. This module defines - among other things -
:py:class:`OptimizerSpec` for specifying optimizers, and :py:class:`DataManager` to partition
the data into batches.
.. moduleauthor Rasmus Diederichsen, Alexander Mock
'''
import numpy as np
def tensor_from_lstm_tuple(tuples, validate_shape=False):
'''Create a tensor from a tuple of :py:class:`tf.contrib.rnn.LSTMStateTuple` s.
.. note::
We do not check all possible error cases. For instance, the different LSTMStateTuples could
not only have differing shapes (which we check for to some extent see ``validate_shape``
parameter), but also the state members ``c`` and ``h`` could differ in their data type (Tensor,
array), which we *do not* check.
Parameters
----------
tuples : tuple(LSTMStateTuple)
Tuple of ``LSTMStateTuple`` s (as many as there are stacked lstm cells) where each
of the tuples has members of shape ``(batch_size, memory_size)``
validate_shape : bool
Enforce identical shapes of all cell and memory states. This entails that
all dimensions must be known. When using variable batch size, set to
``False`` and ensure the shapes are identical at runtime.
Returns
-------
tf.Tensor or np.ndarray
Tensor of shape ``(N_lstm, 2, batch_size, memory_size)`` with cell and hidden states per
lstm cell stacked together. An array is returned instead in case the LSTMStateTuple members
are already fully-defined arrays
'''
import tensorflow as tf
# one state tuple has two members of shape (batch_size, memory_size)
N_lstm = len(tuples)
batch_size = tuples[0].c.shape[0]
memory_size = tuples[0].c.shape[1]
# return value. Since we don't know the dimensions upfront, make it a list instead of an array
list_array = [[None, None]] * N_lstm
# explanation: see at return
states_are_tensors = False
for lstm_idx in range(N_lstm):
lstm_state = tuples[lstm_idx]
# check for incompatible shapes
if validate_shape:
# all dims must match
if not ((batch_size, memory_size) == lstm_state.c.shape == lstm_state.h.shape):
raise ValueError('All states must have the same dimenstion.')
else:
# only the memory_size must match, batch_size is assumed to match, but cannot be
# verified
if not (memory_size == lstm_state.c.shape[1] == lstm_state.h.shape[1]):
raise ValueError('All states must have the same memory size.')
if isinstance(lstm_state.c, tf.Tensor):
states_are_tensors = True
list_array[lstm_idx][0] = lstm_state.h # cell state
list_array[lstm_idx][1] = lstm_state.c # hidden state
################################################################################################
# Why this? convert_to_tensor works when the list elements are tensors, but not if they are #
# numpy arrays. This is probably a bug/missing feature. For this case, we must first convert #
# the fully defined list of arrays to an array. #
#################################################################################################
######################################################################
# UPDATE: We now return an array if the states are already arrays. #
######################################################################
if not states_are_tensors:
return np.array(list_array)
else:
return tf.convert_to_tensor(list_array)
# q = x,y,z,w
# return [roll,pitch,yaw]
def toEulerAngles(q):
'''Convert quaternion to euler angles
Parameters
----------
q : np.array or list
Returns
-------
np.ndarray
Array of 3 elements [roll, pitch, yaw]
'''
sinr = 2.0 * (q[3] * q[0] + q[1] * q[2])
cosr = 1.0 - 2.0 * (q[0] * q[0] + q[1] * q[1])
roll = np.arctan2(sinr, cosr)
sinp = 2.0 * (q[3] * q[1] - q[2] * q[0])
if np.abs(sinp) >= 1:
pitch = np.copysign(np.pi / 2.0, sinp)
else:
pitch = np.arcsin(sinp)
siny = 2.0 * (q[3] * q[0] + q[0] * q[1])
cosy = 1.0 - 2.0 * (q[1] * q[1] + q[2] * q[2])
yaw = np.arctan2(siny, cosy)
return np.array([roll, pitch, yaw])
def posesFromQuaternionToRPY(poses):
'''Batch-convert a set of poses from quaternions to euler angles.'''
poses_xyzrpy = []
for idx in range(0, len(poses)):
pose = np.zeros(6)
pose[0:3] = poses[idx, 0:3]
pose[3:6] = toEulerAngles(poses[idx, 3:7])
poses_xyzrpy.append(pose)
return np.array(poses_xyzrpy)
def resize_to_multiple(images, multiples):
'''Resize a batch of images in the height and width dimensions so their size are an integer
multiple of some value.
Parameters
----------
images : tf.Tensor
Tensor of shape ``(batch, height, width, channels)``
multiples : int or tuple
The value/s that should evenly divide the resized image's dimensions
Returns
-------
tf.Operation
Tensorflow op for resizing images
'''
from tensorflow.image import resize_images
_, h, w, _ = images.get_shape()
# if only one multiple, assume it's the value to use for all dims
if not isinstance(multiples, tuple):
multiples = (multiples, multiples)
new_h, new_w = [int(ceil(input_shape[0] / multiples[0])),
int(ceil(input_shape[1] / multiples[1]))]
return resize_images(images, [new_h, new_w])
def image_pairs(image_sequence, sequence_length):
'''Generate sequences of stacked pairs of images where two 3-channel images are merged to on
6-channel image. If the image sequence length is not evenly divided by the sequence length,
fewer than the total number of images will be yielded.
.. note::
This function is deprecated by :py:class:`DataManager`
Parameters
----------
image_sequence : np.ndarray
Array of shape ``(num, h, w, 3)``
sequence_length : int
Number of elements (6-channel imgs) yielded each time
Returns
-------
np.ndarray
Array of shape ``(sequence_length, h, w, 6)``
'''
N, h, w, c = image_sequence.shape
for idx in range(0, N, sequence_length):
stacked_indices = np.empty((sequence_length - 1) * 2, dtype=np.uint8)
batch_indices = np.arange(sequence_length - 1) + idx
stacked_indices[0::2] = batch_indices
stacked_indices[1::2] = batch_indices + 1
# stacked is [img0, img1, img1, img2, img2, img3, ...]
# stacked.shape = (sequence_length * 2, h, w, c)
stacked = image_sequence[stacked_indices, ...]
# return array stacks every 2 images together and thus has 6 channels per image, each image
# appears twice
ret = np.empty((sequence_length, h, w, 2 * c), dtype=stacked.dtype)
indices = np.arange(0, sequence_length - 1)
ret[indices, ..., 0:3] = stacked[indices * 2]
ret[indices, ..., 3:6] = stacked[indices * 2 + 1]
assert (ret[0, ..., :3] == image_sequence[0]).all()
assert (ret[0, ..., 3:] == image_sequence[1]).all()
yield ret
def compute_rgb_mean(image_sequence):
'''Compute the mean over each channel separately over a set of images.
Parameters
----------
image_sequence : np.ndarray
Array of shape ``(N, h, w, c)`` or ``(h, w, c)``
'''
if image_sequence.ndim == 4:
_, h, w, c = image_sequence.shape
if image_sequence.ndim == 3:
h, w, c = image_sequence.shape
# compute mean separately for each channel
# somehow this expression is buggy, so we must do it manually
# mode = image_sequence.mean((0, 1, 2))
mean_r = image_sequence[..., 0].mean()
mean_g = image_sequence[..., 1].mean()
mean_b = image_sequence[..., 2].mean()
mean = np.array([mean_r, mean_g, mean_b])
return mean
def convert_large_array(file_in, file_out, dtype, factor=1.0):
'''Convert data type of an array possibly too large to fit in memory.
This uses memory-mapped files and will therefore be very slow.
Parameters
----------
file_in : str
Name of the input file
file_out : str
Name of the output file
dtype : np.dtype
Destination data type
factor : float
Scaling factor to apply to all elements
'''
source = np.lib.format.open_memmap(file_in, mode='r')
dest = np.lib.format.open_memmap(file_out, mode='w+', dtype=dtype, shape=source.shape)
np.copyto(dest, source, casting='unsafe')
if factor != 1.0:
np.multiply(dest, factor, out=dest)
def subtract_poses(pose_x, pose_y):
'''Correct subtraction of two poses
Parameters
----------
pose_x : np.array
input array of poses or one pose
pose_y : np.array
input array of poses or one pose
return : np.array
output array of pose_x - pose_y
'''
pose_diff = np.subtract(pose_x, pose_y)
pose_diff[..., 3:6] = np.arctan2(np.sin(pose_diff[..., 3:6]), np.cos(pose_diff[..., 3:6]))
return pose_diff
from skimage.transform import resize
class OptimizerSpec(dict):
'''Encapsulate all the info needed for creating any kind of optimizer. Learning rate scheduling
is fixed to exponential decay
Attributes
----------
step_counter : Variable
Counter to be passed to :py:meth:`Optimizer.minimize` so it gets incremented
during each update
learning_rate : tf.train.piecewise_constant
Learning rate of the optimizer (for later retrieval)
'''
def __init__(self, **kwargs):
'''
Parameters
----------
kind : str
Name of the optimizer
learning_rate : float
Base learning rate used
name : str
Optional name for the piecewise_constant operation
momentum : float
Optional momentum for momentum optimizers
use_nesterov : bool
Nesterov flag for momentum optimizer
steps : int (optional)
Exponential decay steps
decay : int (optional)
Exponential decay rate
'''
if not 'kind' in kwargs:
raise ValueError('No optimizer name given')
if not 'learning_rate' in kwargs:
raise ValueError('No base learning_rate given')
self.update(kwargs)
import tensorflow as tf
self.step_counter = tf.Variable(0, trainable=False, dtype=tf.int32, name='step_counter')
rate = kwargs['learning_rate']
# use exponential_decay
if 'steps' in kwargs and 'decay' in kwargs:
steps = kwargs.get('steps')
decay = kwargs.get('decay')
self.learning_rate = tf.train.exponential_decay(rate, self.step_counter, steps, decay)
else: # plain learning
self.learning_rate = rate
def create(self):
'''Build the Optimizer object from the properties
Return
------
tf.train.Optimizer
Ready-made optimizer
'''
kind = self['kind']
learning_rate = self.learning_rate
name = self.get('name', 'optimizer')
optimizer_cls = OptimizerSpec.get_optimizer(kind)
if kind in ['Momentum', 'RMSProp']:
# only those two use momentum param
try:
momentum = self['momentum']
except KeyError:
raise ValueError('Momentum parameter is necessary for MomentumOptimizer')
if kind == 'Momentum':
if 'use_nesterov' in self:
use_nesterov = self['use_nesterov']
else:
use_nesterov = False
return optimizer_cls(learning_rate, momentum, use_nesterov, name=name)
else:
return optimizer_cls(learning_rate, momentum, name=name)
else:
return optimizer_cls(learning_rate, name=name)
def __str__(self):
key_val_str = ', '.join(str(k) + '=' + str(v) for k, v in self.items())
return f'<Optimizer: {key_val_str}>'
@staticmethod
def get_optimizer(name):
import tensorflow as tf
if isinstance(name, tf.train.Optimizer):
return name
else:
return getattr(tf.train, name + 'Optimizer')
def conv_layer(input, channels_out, kernel_width, strides, activation, kernel_initializer,
bias_initializer, use_bias=True, padding='SAME',
var_names=(None, None), trainable=True):
'''Create a convolutional layer with activation function and variable
initialisation.
Parameters
----------
input : tf.Variable
Input to the layer
channels_out : int
Number of output feature maps
kernel_width : int
Size of the convolution filter
strides : tuple or int
Strides
activation : function
Activation function
use_bias : bool
padding : str
'SAME' or 'VALID'
var_names : tuple
Names of the weight and bias variables
trainable : bool
Returns
-------
tf.Variable
The variable representing the layer activation
'''
import tensorflow as tf
if not activation:
activation = tf.identity
kernel_name = var_names[0] or 'kernels'
bias_name = var_names[1] or 'bias'
_, h, w, channels_in = input.shape
if isinstance(strides, int):
strides = (1, strides, strides, 1)
kernels = tf.get_variable(shape=(kernel_width, kernel_width, channels_in, channels_out),
initializer=kernel_initializer, name=kernel_name, trainable=trainable)
if use_bias:
bias_shape = (channels_out,)
biases = tf.get_variable(shape=bias_shape, initializer=bias_initializer, name=bias_name,
trainable=trainable)
conv = tf.nn.conv2d(
input,
kernels,
strides,
padding=padding)
if use_bias:
return activation(conv + biases)
else:
return activation(conv)