Skip to content

Commit

Permalink
Allow duplicate indices for advanced indexing
Browse files Browse the repository at this point in the history
Using atomic operations in the slicedcopy kernel enables this.
Only use atomic operations if necessary, since they slow things
down a bit.
  • Loading branch information
hunse committed Apr 9, 2018
1 parent 0fb409c commit b863ad9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 16 deletions.
45 changes: 40 additions & 5 deletions nengo_ocl/clra_nonlinearities.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,40 @@ def plan_slicedcopy(queue, X, Y, Xinds, Yinds, incs, tag=None):
assert Xinds.ctype == Yinds.ctype == 'int'

text = """
% if atomic:
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
void safe_add(volatile global float *source, const float operand) {
// source: http://suhorukov.blogspot.ca/
// 2011/12/opencl-11-atomic-operations-on-floating.html
union {
unsigned int intVal;
float floatVal;
} newVal;
union {
unsigned int intVal;
float floatVal;
} prevVal;
do {
prevVal.floatVal = *source;
newVal.floatVal = prevVal.floatVal + operand;
} while (atomic_cmpxchg(
(volatile global unsigned int*)source, prevVal.intVal, newVal.intVal)
!= prevVal.intVal);
}
inline void safe_xchg(global float *source, const float operand) {
atom_xchg(source, operand);
}
% else:
inline void safe_add(global float *source, const float operand) {
*source += operand;
}
inline void safe_xchg(global float *source, const float operand) {
*source = operand;
}
% endif
////////// MAIN FUNCTION //////////
__kernel void slicedcopy(
% if inc is None:
Expand Down Expand Up @@ -336,14 +370,14 @@ def plan_slicedcopy(queue, X, Y, Xinds, Yinds, incs, tag=None):
if (i < Isizes[n]) {
% if inc is True:
b[bind[i]*Ystride0] += a[aind[i]*Xstride0];
safe_add(&b[bind[i]*Ystride0], a[aind[i]*Xstride0]);
% elif inc is False:
b[bind[i]*Ystride0] = a[aind[i]*Xstride0];
safe_xchg(&b[bind[i]*Ystride0], a[aind[i]*Xstride0]);
% else:
if (incdata[n])
b[bind[i]*Ystride0] += a[aind[i]*Xstride0];
safe_add(&b[bind[i]*Ystride0], a[aind[i]*Xstride0]);
else
b[bind[i]*Ystride0] = a[aind[i]*Xstride0];
safe_xchg(&b[bind[i]*Ystride0], a[aind[i]*Xstride0]);
% endif
}
}
Expand All @@ -359,7 +393,8 @@ def plan_slicedcopy(queue, X, Y, Xinds, Yinds, incs, tag=None):
lsize = (lsize0, lsize1)
gsize = (lsize0, round_up(N, lsize1))

textconf = dict(Xtype=X.ctype, Ytype=Y.ctype, N=N, inc=None)
atomic = any(np.unique(i).size < i.size for i in Yinds) # duplicates
textconf = dict(Xtype=X.ctype, Ytype=Y.ctype, N=N, inc=None, atomic=atomic)

full_args = [
to_device(queue, X.stride0s[inds]),
Expand Down
11 changes: 0 additions & 11 deletions nengo_ocl/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,6 @@ class Simulator(object):
# unsupported = [('test_pes*', 'PES rule not implemented')]
# would skip all test whose names start with 'test_pes'.
unsupported = [
# advanced indexing
('nengo/tests/test_connection.py:test_list_indexing*',
"Advanced indexing with repeated indices not implemented"),

# neuron types
('nengo/tests/test_neurons.py:test_izhikevich',
"Izhikevich neurons not implemented"),
Expand Down Expand Up @@ -691,13 +687,6 @@ def plan_Copy(self, ops, legacy=False):
xinds = [inds(op.src, op.src_slice) for op in ops]
yinds = [inds(op.dst, op.dst_slice) for op in ops]

dupl = lambda s: (
s is not None
and not (isinstance(s, np.ndarray) and s.dtype == np.bool)
and len(s) != len(set(s)))
if any(dupl(i) for i in xinds) or any(dupl(i) for i in yinds):
raise NotImplementedError("Duplicates in indices")

X = self.all_data[[self.sidx[op.src] for op in ops]]
Y = self.all_data[[self.sidx[op.dst] for op in ops]]
Xinds = self.RaggedArray(xinds)
Expand Down

0 comments on commit b863ad9

Please sign in to comment.