Skip to content

Commit

Permalink
Merge branch 'master' of github.com:seung-lab/connected-components-3d
Browse files Browse the repository at this point in the history
  • Loading branch information
william-silversmith committed Jul 23, 2024
2 parents b141119 + 6e6ec00 commit 0cd42e5
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 4 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,5 @@ callgrind*
subvol.bin


cc3d.cpp
cc3d.cpp
cc3d/fastcc3d.cpp
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ labels_out = cc3d.connected_components(labels_in, delta=10)
labels_in = np.memmap("labels.bin", order="F", dtype=np.uint32, shape=(5000, 5000, 2000))
labels_out = cc3d.connected_components(labels_in, out_file="out.bin")

# Here's another strategy that you can use for huge files that won't even
# take up any disk space. Provide any iterator to this function that produces
# thick z sections of the input array that are in sequential order.
# The output is a highly compressed CrackleArray that is still random access.
# See: https://github.com/seung-lab/crackle
# You need to pip install connected-components-3d[stack] to get the extra modules.
def sections(labels_in):
"""
A generator that produces thick Z slices
of an image
"""
for z in range(0, labels_in.shape[2], 100):
yield labels_in[:,:,z:z+100]

compressed_labels_out = cc3d.connected_components_stack(sections(labels))

# You can extract the number of labels (which is also the maximum
# label value) like so:
labels_out, N = cc3d.connected_components(labels_in, return_N=True) # free
Expand Down
31 changes: 30 additions & 1 deletion automated_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import sys

import fastremap
import cc3d
import numpy as np

Expand Down Expand Up @@ -1490,15 +1491,43 @@ def test_pytorch_integration_ccl_doesnt_crash():
assert isinstance(out, torch.Tensor)
assert torch.all(out == labels)

@pytest.mark.parametrize("connectivity", [6, 26])
def test_connected_components_stack(connectivity):
stack = [
np.ones([100,100,100], dtype=np.uint32)
for i in range(4)
]

stack += [ np.zeros([100,100,1], dtype=np.uint32) ]

stack += [
np.ones([100,100,100], dtype=np.uint32)
for i in range(2)
]

arr = cc3d.connected_components_stack(stack, connectivity=connectivity)

ans = np.ones([100,100,601], dtype=np.uint32)
ans[:,:,400] = 0
ans[:,:,401:] = 2

assert np.all(np.unique(arr[:]) == [ 0, 1, 2 ])
assert np.all(arr[:] == ans)

image = np.random.randint(0,100, size=[100,100,11], dtype=np.uint8)

ans = cc3d.connected_components(image, connectivity=connectivity)
ans, _ = fastremap.renumber(ans[:])

stack = [
image[:,:,:2],
image[:,:,2:6],
image[:,:,6:9],
image[:,:,9:11],
]

res = cc3d.connected_components_stack(stack, connectivity=connectivity)
res, _ = fastremap.renumber(res[:])


assert np.all(res[:] == ans[:,:,:11])

159 changes: 158 additions & 1 deletion cc3d/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def largest_k(
return cc_out, len(preserve)
return cc_out

def _view_as_unsigned(img:np.ndarray):
def _view_as_unsigned(img:np.ndarray) -> np.ndarray:
if np.issubdtype(img.dtype, np.unsignedinteger) or img.dtype == bool:
return img
elif img.dtype == np.int8:
Expand All @@ -121,3 +121,160 @@ def _view_as_unsigned(img:np.ndarray):
return img.view(np.uint64)

return img


class DisjointSet:
def __init__(self):
self.data = {}
def makeset(self, x):
self.data[x] = x
return x
def find(self, x):
if not x in self.data:
return None
i = self.data[x]
while i != self.data[i]:
self.data[i] = self.data[self.data[i]]
i = self.data[i]
return i
def union(self, x, y):
i = self.find(x)
j = self.find(y)
if i is None:
i = self.makeset(x)
if j is None:
j = self.makeset(y)

if i < j:
self.data[j] = i
else:
self.data[i] = j

def connected_components_stack(
stacked_images:Sequence[np.ndarray],
connectivity:int = 26,
return_N:bool = False,
out_dtype:Optional[Any] = None,
):
"""
This is for performing connected component labeling
on an array larger than RAM.
stacked_images is a sequence of 3D images that are of equal
width and height (x,y) and arbitrary depth (z). For example,
you might define a generator that produces a tenth of your
data at a time. The data must be sequenced in z order from
z = 0 to z = depth - 1.
Each 3D image will have CCL run on it and then compressed
into crackle format (https://github.com/seung-lab/crackle)
which is highly compressed but still usable and randomly
accessible by z-slice.
The bottom previous slice and top current
slice will be analyzed to produce a merged image.
The final output will be a CrackleArray. You
can access parts of the image using standard array
operations, write the array data to disk using arr.binary
or fully decompressing the array using arr.decompress()
to obtain a numpy array (but presumably this will blow
out your RAM since the image is so big).
"""
try:
import crackle
import fastremap
except ImportError:
print("You need to pip install connected-components-3d[stack]")
raise

full_binary = None
bottom_cc_img = None
bottom_cc_labels = None

if connectivity not in (6,26):
raise ValueError(f"Connectivity must be 6 or 26. Got: {connectivity}")

offset = 0

for image in stacked_images:
cc_labels, N = connected_components(
image, connectivity=connectivity,
return_N=True, out_dtype=np.uint64,
)
cc_labels[cc_labels != 0] += offset
offset += N
binary = crackle.compress(cc_labels)

if full_binary is None:
full_binary = binary
bottom_cc_img = image[:,:,-1]
bottom_cc_labels = cc_labels[:,:,-1]
continue

top_cc_labels = cc_labels[:,:,0]

equivalences = DisjointSet()

buniq = fastremap.unique(bottom_cc_labels)
tuniq = fastremap.unique(top_cc_labels)

for u in buniq:
equivalences.makeset(u)
for u in tuniq:
equivalences.makeset(u)

if connectivity == 6:
for y in range(image.shape[1]):
for x in range(image.shape[0]):
if bottom_cc_labels[x,y] == 0 or top_cc_labels[x,y] == 0:
continue
if bottom_cc_img[x,y] == image[x,y,0]:
equivalences.union(bottom_cc_labels[x,y], top_cc_labels[x,y])
else:
for y in range(image.shape[1]):
for x in range(image.shape[0]):
if bottom_cc_labels[x,y] == 0:
continue

for y0 in range(max(y - 1, 0), min(y + 1, image.shape[1] - 1) + 1):
for x0 in range(max(x - 1, 0), min(x + 1, image.shape[0] - 1) + 1):
if top_cc_labels[x0,y0] == 0:
continue

if bottom_cc_img[x,y] == image[x0,y0,0]:
equivalences.union(
bottom_cc_labels[x,y], top_cc_labels[x0,y0]
)

relabel = {}
for u in buniq:
relabel[int(u)] = int(equivalences.find(u))
for u in tuniq:
relabel[int(u)] = int(equivalences.find(u))

full_binary = crackle.zstack([
full_binary,
binary,
])
full_binary = crackle.remap(full_binary, relabel, preserve_missing_labels=True)

bottom_cc_img = image[:,:,-1]
bottom_cc_labels = cc_labels[:,:,-1]
bottom_cc_labels = fastremap.remap(bottom_cc_labels, relabel, preserve_missing_labels=True)

if crackle.contains(full_binary, 0):
start = 0
else:
start = 1

full_binary, mapping = crackle.renumber(full_binary, start=start)
arr = crackle.CrackleArray(full_binary).refit()

if return_N:
return arr, arr.num_labels()
else:
return arr



5 changes: 4 additions & 1 deletion cc3d/fastcc3d.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,10 @@ def connected_components(
raise ValueError(f"periodic_boundary is not yet implemented continuous data.")
if data.size == 0:
out_labels = np.zeros(shape=(0,), dtype=data.dtype)
dtype = data.dtype
if out_dtype is not None:
dtype = out_dtype
out_labels = np.zeros(shape=(0,), dtype=dtype)
if return_N:
return (out_labels, 0)
return out_labels
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def requirements():
setup_requires=['pbr', 'numpy', 'cython'],
install_requires=['numpy'],
python_requires=">=3.8,<4.0",
extras_requires={
"stack": [ "crackle-codec", "fastremap" ],
},
ext_modules=[
setuptools.Extension(
'fastcc3d',
Expand Down

0 comments on commit 0cd42e5

Please sign in to comment.