Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize unionfind #294

Merged
merged 1 commit into from
Oct 30, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 31 additions & 32 deletions common/unionfind.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ either expressed or implied, of the Regents of The University of Michigan.

#pragma once

#include <string.h>
#include <stdint.h>
#include <stdlib.h>

Expand All @@ -35,50 +36,43 @@ typedef struct unionfind unionfind_t;
struct unionfind
{
uint32_t maxid;
struct ufrec *data;
};

struct ufrec
{
// the parent of this node. If a node's parent is its own index,
// then it is a root.
uint32_t parent;
// Parent node for each. Initialized to 0xffffffff
uint32_t *parent;

// for the root of a connected component, the number of components
// connected to it. For intermediate values, it's not meaningful.
uint32_t size;
// The size of the tree excluding the root
uint32_t *size;
};

static inline unionfind_t *unionfind_create(uint32_t maxid)
{
unionfind_t *uf = (unionfind_t*) calloc(1, sizeof(unionfind_t));
uf->maxid = maxid;
uf->data = (struct ufrec*) malloc((maxid+1) * sizeof(struct ufrec));
for (uint32_t i = 0; i <= maxid; i++) {
uf->data[i].size = 1;
uf->data[i].parent = i;
}
uf->parent = (uint32_t *) malloc((maxid+1) * sizeof(uint32_t) * 2);
memset(uf->parent, 0xff, (maxid+1) * sizeof(uint32_t));
uf->size = uf->parent + (maxid+1);
memset(uf->size, 0, (maxid+1) * sizeof(uint32_t));
return uf;
}

static inline void unionfind_destroy(unionfind_t *uf)
{
free(uf->data);
free(uf->parent);
free(uf);
}

/*
static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id)
{
// base case: a node is its own parent
if (uf->data[id].parent == id)
if (uf->parent[id] == id)
return id;

// otherwise, recurse
uint32_t root = unionfind_get_representative(uf, uf->data[id].parent);
uint32_t root = unionfind_get_representative(uf, uf->parent[id]);

// short circuit the path. [XXX This write prevents tail recursion]
uf->data[id].parent = root;
uf->parent[id] = root;

return root;
}
Expand All @@ -88,17 +82,22 @@ static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id
// version above.
static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id)
{
uint32_t root = id;
uint32_t root = uf->parent[id];
// unititialized node, so set to self
if (root == 0xffffffff) {
uf->parent[id] = id;
return id;
}

// chase down the root
while (uf->data[root].parent != root) {
root = uf->data[root].parent;
while (uf->parent[root] != root) {
root = uf->parent[root];
}

// go back and collapse the tree.
while (uf->data[id].parent != root) {
uint32_t tmp = uf->data[id].parent;
uf->data[id].parent = root;
while (uf->parent[id] != root) {
uint32_t tmp = uf->parent[id];
uf->parent[id] = root;
id = tmp;
}

Expand All @@ -108,7 +107,7 @@ static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id
static inline uint32_t unionfind_get_set_size(unionfind_t *uf, uint32_t id)
{
uint32_t repid = unionfind_get_representative(uf, id);
return uf->data[repid].size;
return uf->size[repid] + 1;
}

static inline uint32_t unionfind_connect(unionfind_t *uf, uint32_t aid, uint32_t bid)
Expand All @@ -126,21 +125,21 @@ static inline uint32_t unionfind_connect(unionfind_t *uf, uint32_t aid, uint32_t
// for rank. In my testing, it's often *faster* to use size than
// rank, perhaps because the rank of the tree isn't that critical
// if there are very few nodes in it.
uint32_t asize = uf->data[aroot].size;
uint32_t bsize = uf->data[broot].size;
uint32_t asize = uf->size[aroot] + 1;
uint32_t bsize = uf->size[broot] + 1;

// optimization idea: We could shortcut some or all of the tree
// that is grafted onto the other tree. Pro: those nodes were just
// read and so are probably in cache. Con: it might end up being
// wasted effort -- the tree might be grafted onto another tree in
// a moment!
if (asize > bsize) {
uf->data[broot].parent = aroot;
uf->data[aroot].size += bsize;
uf->parent[broot] = aroot;
uf->size[aroot] += bsize;
return aroot;
} else {
uf->data[aroot].parent = broot;
uf->data[broot].size += asize;
uf->parent[aroot] = broot;
uf->size[broot] += asize;
return broot;
}
}
Loading