Skip to content

Commit

Permalink
cleaned up examples and removed tabs
Browse files Browse the repository at this point in the history
  • Loading branch information
gblelloch committed Apr 2, 2024
1 parent 2b5154e commit 2654e1c
Show file tree
Hide file tree
Showing 24 changed files with 251 additions and 236 deletions.
6 changes: 3 additions & 3 deletions examples/2d_linear_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ auto linear_program_2d(const constraints& H_in, constraint c) {
long top = std::min(2*i, n);
// check for a violating constraint from i to top
long loc = parlay::reduce(parlay::delayed_tabulate(top-i, [&] (long j) {
return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum<long>());
return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum<long>());

if (loc == n) i = top; // no violing constraint found, repeat and double again
else { // found a violating constraint at location loc
// select constraints h up to loc that jointly with H[loc] bound the solution
// i.e. H[loc] x c and h x c have opposite signs
coord cr = cross(H[loc],c);
auto Hf = parlay::filter(H.cut(0,loc), [&] (constraint h) {
return cr * cross(h,c) < 0;});
return cr * cross(h,c) < 0;});

// find the tightest such constraint
auto min = [&] (constraint a, constraint b) {
return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;};
return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;};
constraint cx = parlay::reduce(Hf, parlay::binary_op(min,constraint{0.,0.,0.}));

// update the optimal point and the index i
Expand Down
8 changes: 4 additions & 4 deletions examples/3d_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ struct search {
leaf* TL = static_cast<leaf*>(T);
for (int i = 0; i < TL->size; i++)
if (TL->pts[i].id != p.id &&
distance_squared(TL->pts[i].pnt) < r * r)
in_range.push_back(TL->pts[i].id); }
distance_squared(TL->pts[i].pnt) < r * r)
in_range.push_back(TL->pts[i].id); }

// looks for points within range for p in subtree rooted at T.
// Can return immediately if radius does not intersect the box.
Expand All @@ -85,8 +85,8 @@ struct search {
if (T->is_leaf) add_leaf(T);
else {
interior* TI = static_cast<interior*>(T);
range_search_down(TI->left);
range_search_down(TI->right);
range_search_down(TI->left);
range_search_down(TI->right);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions examples/BFS.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ auto BFS(vertex start, const graph& G) {

// keep the v that succeed in setting the visited array
frontier = delayed::to_sequence(delayed::map_maybe(out, [&] (vertex v) {
bool expected = false;
bool expected = false;
if ((!visited[v]) && visited[v].compare_exchange_strong(expected, true))
return std::optional<vertex>(v);
return std::optional<vertex>(v);
return std::optional<vertex>();}));
}

Expand Down
14 changes: 7 additions & 7 deletions examples/bigint_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,20 @@ bigint add(const Bigint1& a, const Bigint2& b, bool extra_one=false) {
} else { // do in parallel
// check which digits will carry or propagate
auto c = delayed::tabulate(na, [&] (long i) {
double_digit s = a[i] + static_cast<double_digit>(B(i));
s += (i == 0 && extra_one);
return static_cast<carry>(2 * (s == mask) + (s >> digit_len));});
double_digit s = a[i] + static_cast<double_digit>(B(i));
s += (i == 0 && extra_one);
return static_cast<carry>(2 * (s == mask) + (s >> digit_len));});

// use scan to do the propagation
auto f = [] (carry a, carry b) {return (b == propagate) ? a : b;};
auto cc = delayed::scan(c, parlay::binary_op(f, propagate)).first;
auto z = delayed::zip(cc, parlay::iota(na));
result = delayed::to_sequence(delayed::map(z, [&] (auto p) {
auto [ci, i] = p;
return a[i] + B(i) + ci;}));
auto [ci, i] = p;
return a[i] + B(i) + ci;}));
//auto cc = parlay::scan(c, parlay::binary_op(f, propagate)).first;
//result = parlay::tabulate(na, [&] (long i) {
// return a[i] + B(i) + cc[i];});
// return a[i] + B(i) + cc[i];});
}
if ((a_sign == b_sign) && ((result[na-1] >> (digit_len - 1)) != a_sign))
result.push_back(a_sign ? mask : 1u);
Expand All @@ -83,5 +83,5 @@ template <typename Bigint1, typename Bigint2>
bigint subtract(const Bigint1& a, const Bigint2& b) {
// effectively negate b and add
return add(a, delayed::map(b, [] (auto bv) {return ~bv;}),
true);
true);
}
8 changes: 4 additions & 4 deletions examples/boruvka.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ using edge = std::pair<vertex,vertex>;
using w_edge = std::pair<edge,w_type>;

parlay::sequence<w_type> boruvka(const parlay::sequence<w_edge>& E,
const parlay::sequence<vertex> V,
parlay::sequence<std::atomic<w_type>>& W,
parlay::sequence<vertex>& P) {
const parlay::sequence<vertex> V,
parlay::sequence<std::atomic<w_type>>& W,
parlay::sequence<vertex>& P) {
//std::cout << E.size() << ", " << V.size() << std::endl;

if (E.size() == 0) return parlay::sequence<w_type>();
Expand All @@ -36,7 +36,7 @@ parlay::sequence<w_type> boruvka(const parlay::sequence<w_edge>& E,
return (W[u] == e.second || W[v] == e.second);});

auto V_new = star_contract(map(Es, [] (w_edge e) {return e.first;}),
V, P, parlay::random_generator(0));
V, P, parlay::random_generator(0));

// update edges to new endpoints and filter out self edges
auto ES = parlay::delayed::map(E, [&] (w_edge e) {
Expand Down
30 changes: 15 additions & 15 deletions examples/box_kdtree.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct tree_node {
bool is_leaf() {return left == nullptr;}

tree_node(tree_node* L, tree_node* R,
int cut_dim, float cut_off, Bounding_Box B)
int cut_dim, float cut_off, Bounding_Box B)
: left(L), right(R), cut_dim(cut_dim),
cut_off(cut_off), box(B) {
n = L->n + R->n;
Expand All @@ -99,9 +99,9 @@ struct tree_node {
if (!is_leaf()) {
if (left == nullptr || right == nullptr) abort();
parlay::par_do_if(n > 1000,
[&] () { node_allocator.retire(left);},
[&] () { node_allocator.retire(right);}
);};
[&] () { node_allocator.retire(left);},
[&] () { node_allocator.retire(right);}
);};
}
};

Expand Down Expand Up @@ -158,8 +158,8 @@ cut_info best_cut(parlay::sequence<event> const &E, range r, range r1, range r2)
using events_pair = std::pair<parlay::sequence<event>, parlay::sequence<event>>;

events_pair split_events(const parlay::sequence<range> &box_ranges,
const parlay::sequence<event> &events,
float cut_off) {
const parlay::sequence<event> &events,
float cut_off) {
index_t n = events.size();
auto lower = parlay::sequence<bool>::uninitialized(n);
auto upper = parlay::sequence<bool>::uninitialized(n);
Expand All @@ -173,9 +173,9 @@ events_pair split_events(const parlay::sequence<range> &box_ranges,

// n is the number of events (i.e. twice the number of triangles)
tree_node* generate_node(Ranges &boxes,
Events events,
Bounding_Box B,
size_t maxDepth) {
Events events,
Bounding_Box B,
size_t maxDepth) {
index_t n = events[0].size();
if (n <= 2 || maxDepth == 0)
return tree_node::node_allocator.allocate(std::move(events), n, B);
Expand Down Expand Up @@ -218,9 +218,9 @@ tree_node* generate_node(Ranges &boxes,
for (int i=0; i < 3; i++) events[i] = parlay::sequence<event>();
tree_node *L, *R;
parlay::par_do([&] () {L = generate_node(boxes, std::move(left_events),
BBL, maxDepth-1);},
[&] () {R = generate_node(boxes, std::move(right_events),
BBR, maxDepth-1);});
BBL, maxDepth-1);},
[&] () {R = generate_node(boxes, std::move(right_events),
BBR, maxDepth-1);});
return tree_node::node_allocator.allocate(L, R, cut_dim, cut_off, B);
}

Expand All @@ -236,9 +236,9 @@ auto kdtree_from_boxes(Boxes& boxes) {
events[d] = parlay::sequence<event>(2*n);
ranges[d] = parlay::sequence<range>(n);
parlay::parallel_for(0, n, [&] (size_t i) {
events[d][2*i] = event(boxes[i][d][0], i, false);
events[d][2*i+1] = event(boxes[i][d][1], i, true);
ranges[d][i] = boxes[i][d];});
events[d][2*i] = event(boxes[i][d][0], i, false);
events[d][2*i+1] = event(boxes[i][d][1], i, true);
ranges[d][i] = boxes[i][d];});
parlay::sort_inplace(events[d], [] (event a, event b) {return a.v < b.v;});
boundingBox[d] = range{events[d][0].v, events[d][2*n-1].v};
}
Expand Down
14 changes: 8 additions & 6 deletions examples/counting_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,23 @@ int main(int argc, char* argv[]) {
auto data = parlay::tabulate(n, [&] (long i) {
auto r = gen[i];
return dis(r);});

parlay::internal::timer t("Time");
parlay::sequence<long> result(n);
for (int i=0; i < 5; i++) {
t.start();
counting_sort(data.begin(), data.end(),
result.begin(),
data.begin(),
num_buckets);
result.begin(),
data.begin(),
num_buckets);
t.next("counting_sort");
}

auto first_ten = result.head(10);
auto last_ten = result.tail(10);
std::cout << "first 10 elements: " << parlay::to_chars(first_ten) << std::endl;
std::cout << "last 10 elements: " << parlay::to_chars(last_ten) << std::endl;
std::cout << "first 10 elements: " << parlay::to_chars(first_ten)
<< std::endl;
std::cout << "last 10 elements: " << parlay::to_chars(last_ten)
<< std::endl;
}
}
81 changes: 43 additions & 38 deletions examples/counting_sort.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <algorithm>
#include <functional>
#include <vector>

#include <parlay/parallel.h>
#include <parlay/sequence.h>
Expand All @@ -13,10 +14,20 @@
// of each key there are. It then using scan to calculate the offsets
// for each bucket in each partition, and does a final pass placing
// all keys in their correct position.
//
// For input of size n, and for m buckets
// Work = O(n)
// Span = O(m + n / m)
// **************************************************************

using counter_type = unsigned long;

void prefetch(void* l) {
#if defined(__GNUC__) || defined(__clang__)
__builtin_prefetch (l);
#endif
}

// **************************************************************
// Input:
// begin and end iterators for the values to be rearranged
Expand All @@ -30,65 +41,59 @@ using counter_type = unsigned long;
// **************************************************************
template <typename InIt, typename OutIt, typename KeyIt>
parlay::sequence<counter_type>
counting_sort(const InIt& begin, const InIt& end,
counting_sort(const InIt& begin, const InIt& end,
OutIt out, const KeyIt& keys,
long num_buckets) {
long n = end - begin;
counter_type n = end - begin;
if (n == 0) return parlay::sequence<counter_type>(1, 0);
long num_parts = std::min(1000l, n / (num_buckets * 64) + 1);
long part_size = (n - 1)/num_parts + 1;
long num_parts = std::min<long>(1000l, n / (num_buckets * 64) + 1);
counter_type part_n = (n - 1)/num_parts + 1;
long m = num_buckets * num_parts;

// first count buckets within each partition
auto counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
auto counts = parlay::sequence<counter_type>::uninitialized(m);
parlay::parallel_for(0, num_parts, [&] (long i) {
long start = i * part_size;
long end = std::min<long>(start + part_size, n);
for (long j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
for (long j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
std::vector<counter_type> local_counts(num_buckets);
for (long j = 0; j < num_buckets; j++)
local_counts[j] = 0;
for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++)
local_counts[keys[j]]++;
// transpose so equal buckets are adjacent
for (long j = 0; j < num_buckets; j++)
counts[num_parts * j + i] = local_counts[j];
}, 1);

// transpose the counts if more than one part
parlay::sequence<counter_type> trans_counts;
if (num_parts > 1) {
trans_counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
parlay::parallel_for(0, num_buckets, [&] (long i) {
for (size_t j = 0; j < num_parts; j++)
trans_counts[i* num_parts + j] = counts[j * num_buckets + i];}, 1);
} else trans_counts = std::move(counts);

// scan for offsets for all buckets
parlay::scan_inplace(trans_counts);
parlay::scan_inplace(counts);

// go back over partitions to place in final location
parlay::parallel_for(0, num_parts, [&] (long i) {
long start = i * part_size;
long end = std::min<long>(start + part_size, n);
parlay::sequence<counter_type> local_offsets(num_buckets);

// transpose back
std::vector<counter_type> local_offsets(num_buckets);
// transpose back into local offsets
for (long j = 0; j < num_buckets; j++)
local_offsets[j] = trans_counts[num_parts * j + i];

local_offsets[j] = counts[num_parts * j + i];
// copy to output
for (long j = start; j < end; j++) {
for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++) {
counter_type k = local_offsets[keys[j]]++;
// prefetching speeds up the code
#if defined(__GNUC__) || defined(__clang__)
__builtin_prefetch (((char*) &out[k]) + 64);
#endif
prefetch(((char*) &out[k]) + 64);
out[k] = begin[j];
}}, 1);

return parlay::tabulate(num_buckets+1, [&] (long i) {
return (i == num_buckets) ? (counter_type) n : trans_counts[i * num_parts];});
// return offsets for each bucket.
// includes extra element at end containing n
return parlay::tabulate(num_buckets + 1, [&] (long i) {
return (i == num_buckets) ? n : counts[i * num_parts];});
}

// A version that uses ranges as inputs and generates its own output sequence
// A wrapper that uses ranges as inputs and generates its own output
// sequence
template <typename InRange, typename KeysRange>
auto counting_sort(const InRange& in, const KeysRange& keys,
long num_buckets) {
auto out = parlay::sequence<typename InRange::value_type>::uninitialized(in.size());
auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
num_buckets);
long num_buckets) {
using V = typename InRange::value_type;
auto out = parlay::sequence<V>::uninitialized(in.size());
auto offsets = counting_sort(in.begin(), in.end(),
out.begin(), keys.begin(),
num_buckets);
return std::pair(std::move(out), std::move(offsets));
}
Loading

0 comments on commit 2654e1c

Please sign in to comment.