%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
Test of different algorithms to crop 64x64x64 in an example event:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tables
import time
from sklearn.cluster import DBSCAN
f=tables.open_file('sample3d.hdf5','r')
def plot(x, y, z, v, **kwargs):
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.scatter(x,y,z,c=v, marker='o', **kwargs)
plt.show()
def draw_cube(ax, coord, size):
x, y, z = coord[0], coord[1], coord[2]
vertices = [
[[x, y, z], [x, y+size, z], [x+size, y+size, z], [x+size, y, z]],
[[x, y, z+size], [x, y+size, z+size], [x+size, y+size, z+size], [x+size, y, z+size]],
[[x, y, z], [x, y+size, z], [x, y+size, z+size], [x, y, z+size]],
[[x+size, y, z], [x+size, y+size, z], [x+size, y+size, z+size], [x+size, y, z+size]],
[[x, y, z], [x+size, y, z], [x+size, y, z+size], [x, y, z+size]],
[[x, y+size, z], [x+size, y+size, z], [x+size, y+size, z+size], [x, y+size, z+size]]
]
poly = Poly3DCollection(vertices, linewidths=0.1, edgecolors='r')
poly.set_alpha(0)
poly.set_facecolors('cyan')
ax.add_collection3d(poly)
#data
print("%d events in file." % len(f.root.data))
data=f.root.data[0]
x,y,z=np.where(data>0)
v=data[np.where(data>0)]
plot(x, y, z, v, vmin=0,vmax=40)
np.savetxt("event.csv", [x, y, z], delimiter=",")
#label
label=f.root.label[0]
x,y,z=np.where(label>0)
l=label[np.where(label>0)]
plot(x, y, z, l)
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.set_xlim(0, 384)
ax.set_ylim(0, 384)
ax.set_zlim(0, 384)
draw_cube(ax, [150, 150, 150], 300)
plt.show()
N = 192 # Size of image
d = 64 # Size of patch
a = 32 # Size of patch's core
coords = np.stack([x, y, z], axis=-1)
def compute_overlap(coords, patch_centers, sizes=d/2):
# Compute overlap for each voxel
overlap = []
for voxel in coords:
overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
return dict(zip(*np.unique(overlap, return_counts=True)))
def compute_voxel_overlap(coords, patch_centers, sizes=d/2):
# Compute overlap for each voxel
overlap = []
for voxel in coords:
overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
return overlap
def bounding_box(coordinates):
return np.amin(coordinates, axis=0), np.amax(coordinates, axis=0)
def plot_with_patches(patches, color = v, sizes=d/2):
fig = plt.figure(figsize=(14, 11))
ax = fig.add_subplot(111,projection='3d')
ax.scatter(x,y,z,c=color, marker='o')
for i, p in enumerate(patches):
if type(sizes) == int or type(sizes) == float:
draw_cube(ax, p - sizes, sizes*2)
else:
draw_cube(ax, p - sizes[i], sizes[i]*2)
plt.show()
def plot_with_overlap(patches, sizes=d/2):
overlap = compute_voxel_overlap(coords, patches, sizes=sizes)
fig = plt.figure(figsize=(14, 11))
ax = fig.add_subplot(111,projection='3d')
cmap = plt.get_cmap('rainbow', 20)
cax = ax.scatter(x,y,z,c=overlap, marker='o', cmap=cmap, vmin=0, vmax=20)
fig.colorbar(cax)
#for p in final_patches:
# draw_cube(ax, p - d/2, d)
plt.show()
from matplotlib.ticker import MaxNLocator
def plot_overlap(patches, sizes=d/2):
overlap_data = np.array(compute_voxel_overlap(coords, patches, sizes=sizes))
ax = plt.figure(figsize=(14, 11)).gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.xlabel("Overlaps")
plt.ylabel("Voxels")
plt.xlim(0, 23)
plt.hist(overlap_data, np.arange(overlap_data.min() - 0.5, overlap_data.max() + 1.5, 1))
plt.show()
def plot_core(patch_centers):
overlap = []
for voxel in coords:
overlap.append(np.any(np.all(np.logical_and(patch_centers-a/2 <= voxel, patch_centers + a/2 >= voxel), axis=1)))
fig = plt.figure(figsize=(14, 11))
ax = fig.add_subplot(111,projection='3d')
ax.scatter(x,y,z,c=overlap, marker='o')
plt.show()
x, y, z, v
Tile the space evenly and keep only cubes that contain at least one non-zero voxel.
Pros: Easy. Deterministic.
Cons:
def naive_patching(N, d, a, coords, include_borders=True):
if type(N) == int:
shift = 0
if N < d:
shift = (d - N)/2
N = d
coords = coords + shift
patch_x = np.linspace(d/2, N-d/2, int(N/a))
patch_y = np.linspace(d/2, N-d/2, int(N/a))
patch_z = np.linspace(d/2, N-d/2, int(N/a))
else:
shift = np.where(N < d, (d - N)/2, 0)
coords = coords + shift
N = np.where(N < d, d, N)
patch_x = np.linspace(d/2, N[0]-d/2, int(N[0]/a))
patch_y = np.linspace(d/2, N[1]-d/2, int(N[1]/a))
patch_z = np.linspace(d/2, N[2]-d/2, int(N[2]/a))
mesh_x, mesh_y, mesh_z = np.meshgrid(patch_x, patch_y, patch_z)
patch_centers = np.stack([mesh_x, mesh_y, mesh_z], axis=-1)
patch_centers = np.reshape(patch_centers, (-1, 3))
# Add border cases
if include_borders:
border_xy0 = np.reshape(np.stack(np.meshgrid(patch_x[0], patch_y[0], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
border_xy1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], patch_y[-1], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
border_xz0 = np.reshape(np.stack(np.meshgrid(patch_x[0], (patch_y+d/2)[:-1], patch_z[0]), axis=-1), (-1, 3))
border_xz1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], (patch_y+d/2)[:-1], patch_z[-1]), axis=-1), (-1, 3))
border_zy0 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[0], patch_z[0]), axis=-1), (-1, 3))
border_zy1 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[-1], patch_z[-1]), axis=-1) , (-1, 3))
patch_centers = np.concatenate([patch_centers, border_xy0, border_xy1, border_xz0, border_xz1, border_zy0, border_zy1], axis=0)
contains = []
for p in patch_centers:
points = coords[np.all(np.logical_and(coords >= p - d/2, coords <= p + d/2), axis=1)]
contains.append(points)
final_patches = []
for i, p in enumerate(patch_centers):
if len(contains[i]) > 0:
final_patches.append(p)
return np.array(final_patches) - shift
start = time.time()
final_patches = naive_patching(N, d, a, coords)
end = time.time()
print("Found %d patches" % len(final_patches))
print("Overlap: ", compute_overlap(coords, final_patches))
print("Took %f s" % (end - start))
# Plot
plot_with_patches(final_patches)
plot_with_overlap(final_patches)
plot_overlap(final_patches)
plot_core(final_patches)
Start with a flat probability distribution over all voxels.
Pros
Cons
def normalize(array):
return array / np.sum(array)
def proba_patching(d, a, coords):
n = coords.shape[0]
proba = np.ones((n)) / n
i = 0
max_patches = 100
patches = [] # List of center coordinates of patches dxd
voxel_num_boxes = np.zeros_like(proba)
while np.count_nonzero(voxel_num_boxes < 2) > 0 and i < max_patches:
indices = np.random.choice(np.arange(n), p=proba)
selection_inside = np.all(np.logical_and(coords >= coords[indices] - d/2, coords <= coords[indices] + d/2), axis=-1)
indices_inside = np.argwhere(selection_inside)
voxels_inside = coords[selection_inside]
distances_to_center = np.sqrt(np.sum(np.power(voxels_inside - coords[indices], 2), axis=-1))
new_proba = np.ones_like(proba)
core_indices = distances_to_center <= a
new_proba[indices_inside[core_indices]] = 0.01
new_proba[indices_inside[np.logical_and(np.logical_not(core_indices), distances_to_center <= d)]] = 0.4
voxel_num_boxes[indices_inside] += 1 # Update voxel_num_boxes: increment all voxels inside box
patches.append(coords[indices])
i += 1
proba = proba * new_proba
if np.sum(proba) == 0.0:
break
proba = normalize(proba)
return np.array(patches)
start = time.time()
patches2 = proba_patching(d, a, coords)
end = time.time()
print("Found %d patches" % len(patches2))
print("Overlap: ", compute_overlap(coords, patches2))
print("Took %f s" % (end - start))
plot_with_patches(patches2)
plot_with_overlap(patches2)
plot_overlap(patches2)
plot_core(patches2)
Define several clusters with DBScan for example. Then find a minimum bounding box for each of the cluster and tile deterministically.
Pros
Cons
coords = np.stack([x, y, z], axis=-1)
def clustering(d, a, coords):
db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
# Tile a cluster
final_patches3 = []
for i_cluster in np.unique(db):
voxels = coords[db == i_cluster]
p1, p2 = bounding_box(voxels)
final_patches3.append(naive_patching(p2-p1, d, a, voxels-p1, include_borders=False)+p1)
final_patches3 = np.concatenate(np.array(final_patches3), axis=0)
return db, final_patches3
start = time.time()
db, final_patches3 = clustering(d, a, coords)
end = time.time()
print("Found %d patches" % len(final_patches3))
print("Overlap: ", compute_overlap(coords, final_patches3))
print("Took %f s" % (end - start))
plot_with_patches(final_patches3, color=db)
plot_with_overlap(final_patches3)
plot_overlap(final_patches3)
plot_core(final_patches3)
Let's try to combine the best of both worlds.
Pros
Cons
coords = np.stack([x, y, z], axis=-1)
def hybrid(d, a, coords):
db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
# Tile a cluster
final_patches4 = []
for i_cluster in np.unique(db):
voxels = coords[db == i_cluster]
p1, p2 = bounding_box(voxels)
final_patches4.append(proba_patching(d, a, voxels-p1)+p1)
final_patches4 = np.concatenate(np.array(final_patches4), axis=0)
return db, final_patches4
start = time.time()
db, final_patches4 = hybrid(d, a, coords)
end = time.time()
print("Found %d patches" % len(final_patches4))
print("Overlap: ", compute_overlap(coords, final_patches4))
print("Took %f s" % (end - start))
plot_with_patches(final_patches4, color=db)
plot_with_overlap(final_patches4)
plot_overlap(final_patches4)
plot_core(final_patches4)
Pros
Cons
coords = np.stack([x, y, z], axis=-1)
def octotree(N, d, coords, num_choice=6, final_size=0.7*d):
heap = [([N/2, N/2, N/2], N/2, coords)] # List
patches = []
sizes = []
while len(heap) > 0:
coordinates, size, voxels = heap.pop(0)
x0, y0, z0 = coordinates
new_size = size/2
if size > final_size:
new_cubes = [
([x0 + new_size, y0 + new_size, z0 - new_size], new_size),
([x0 - new_size, y0 + new_size, z0 - new_size], new_size),
([x0 + new_size, y0 - new_size, z0 - new_size], new_size),
([x0 - new_size, y0 - new_size, z0 - new_size], new_size),
([x0 + new_size, y0 + new_size, z0 + new_size], new_size),
([x0 - new_size, y0 + new_size, z0 + new_size], new_size),
([x0 + new_size, y0 - new_size, z0 + new_size], new_size),
([x0 - new_size, y0 - new_size, z0 + new_size], new_size)
]
new_voxel_sets = []
for cube, new_size in new_cubes:
if voxels.shape[0] > 0:
cube = np.array(cube)
indices = np.all(np.logical_and(voxels >= cube - new_size, voxels <= cube + new_size), axis=1)
new_voxel_sets.append(voxels[indices])
voxels = np.delete(voxels, indices, axis=0)
for i, voxel_set in enumerate(new_voxel_sets):
if len(voxel_set) > 0:
heap.append(new_cubes[i] + (voxel_set,))
else:
#patches.append(coordinates)
#sizes.append(size)
new_cubes = [
[x0 + new_size, y0 + new_size, z0 - new_size],
[x0 - new_size, y0 + new_size, z0 - new_size],
[x0 + new_size, y0 - new_size, z0 - new_size],
[x0 - new_size, y0 - new_size, z0 - new_size],
[x0 + new_size, y0 + new_size, z0 + new_size],
[x0 - new_size, y0 + new_size, z0 + new_size],
[x0 + new_size, y0 - new_size, z0 + new_size],
[x0 - new_size, y0 - new_size, z0 + new_size],
]
our_choice = np.unique(np.random.choice(np.arange(8), size=num_choice))
new_cubes = np.take(new_cubes, our_choice, axis=0)
patches.extend(new_cubes)
#sizes.extend([size] * our_choice.shape[0])
sizes.extend([d/2] * our_choice.shape[0])
return np.array(patches), np.array(sizes)[:, None]
start = time.time()
final_patches5, sizes5 = octotree(N, d, coords)
end = time.time()
print("Found %d patches with sizes " % len(final_patches5), np.unique(sizes5*2))
print("Overlap: ", compute_overlap(coords, final_patches5, sizes=sizes5))
print("Took %f s" % (end - start))
plot_with_patches(final_patches5, sizes=sizes5)
plot_with_overlap(final_patches5, sizes=sizes5)
plot_overlap(final_patches5, sizes=sizes5)
plot_core(final_patches5)
Using first 100 events from the sample.
def compare(n):
times = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
num_patches = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
overlap_mean = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
overlap_std = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
for i in range(n):
if i%100 == 0:
print("%d / %d" % (i, n))
data=f.root.data[i]
x,y,z=np.where(data>0)
v=data[np.where(data>0)]
coords = np.stack([x, y, z], axis=-1)
start = time.time()
final_patches = naive_patching(N, d, a, coords)
end = time.time()
times['naive'].append(end - start)
num_patches['naive'].append(len(final_patches))
overlap = compute_voxel_overlap(coords, final_patches)
overlap_mean['naive'].append(np.mean(overlap))
overlap_std['naive'].append(np.std(overlap))
start = time.time()
final_patches2 = proba_patching(d, a, coords)
end = time.time()
times['proba'].append(end - start)
num_patches['proba'].append(len(final_patches2))
overlap = compute_voxel_overlap(coords, final_patches2)
overlap_mean['proba'].append(np.mean(overlap))
overlap_std['proba'].append(np.std(overlap))
start = time.time()
db, final_patches3 = clustering(d, a, coords)
end = time.time()
times['cluster'].append(end - start)
num_patches['cluster'].append(len(final_patches3))
overlap = compute_voxel_overlap(coords, final_patches3)
overlap_mean['cluster'].append(np.mean(overlap))
overlap_std['cluster'].append(np.std(overlap))
start = time.time()
db, final_patches4 = hybrid(d, a, coords)
end = time.time()
times['hybrid'].append(end - start)
num_patches['hybrid'].append(len(final_patches4))
overlap = compute_voxel_overlap(coords, final_patches4)
overlap_mean['hybrid'].append(np.mean(overlap))
overlap_std['hybrid'].append(np.std(overlap))
start = time.time()
final_patches5, sizes5 = octotree(N, d, coords)
end = time.time()
times['octree'].append(end - start)
num_patches['octree'].append(len(final_patches5))
overlap = compute_voxel_overlap(coords, final_patches5)
overlap_mean['octree'].append(np.mean(overlap))
overlap_std['octree'].append(np.std(overlap))
return times, num_patches, overlap_mean, overlap_std
times, num_patches, overlap_mean, overlap_std = compare(1000)
alpha = 0.6
bins=40
r = (0, 0.06)
fig = plt.figure(figsize=(14, 10))
y3 = plt.hist(times['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(times['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
y1 = plt.hist(times['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
y2 = plt.hist(times['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Execution time per event")
plt.show()
fig = plt.figure(figsize=(14, 10))
r = (0, 250)
plt.hist(num_patches['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Number of patches per event")
plt.show()
fig = plt.figure(figsize=(14, 10))
r = (0, 40)
plt.hist(overlap_mean['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Overlap Mean")
plt.show()
fig = plt.figure(figsize=(14, 10))
r = (0, 25)
plt.hist(overlap_std['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.title("Overlap STD")
plt.legend()
plt.show()
We want to be in the lower right quadrant: low number of patches but high overlap mean.
plt.hist2d(overlap_mean['naive'], num_patches['naive'])
plt.title("Naive")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()
plt.hist2d(overlap_mean['cluster'], num_patches['cluster'])
plt.title("Clustering")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()
plt.hist2d(overlap_mean['proba'], num_patches['proba'])
plt.title("Probabilistic")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()
plt.hist2d(overlap_mean['hybrid'], num_patches['hybrid'])
plt.title("Hybrid")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()
plt.hist2d(overlap_mean['octree'], num_patches['octree'])
plt.title("Octree")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()
Let's check that the C++ implementation of Octree and Probabilistic algorithms works fine (and is faster than Python version!).
The probabilistic algorithm currently does not gain much by being implemented in C++. The octree algorithm however is drastically faster.
duration_proba = np.genfromtxt("duration_proba.csv", delimiter=",")
duration_octree = np.genfromtxt("duration_octree.csv", delimiter=",")
alpha = 0.6
bins=40
r = (0, 0.02)
fig = plt.figure(figsize=(14, 10))
y2 = plt.hist(times['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
y3 = plt.hist(duration_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(duration_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)
plt.title("Execution time per event")
plt.legend()
plt.show()
num_patches_proba = np.genfromtxt("num_patches_proba.csv", delimiter=",")
num_patches_octree = np.genfromtxt("num_patches_octree.csv", delimiter=",")
alpha = 0.6
bins=40
r = (0, 250)
fig = plt.figure(figsize=(14, 10))
plt.hist(num_patches['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)
plt.title("Number of patches per event")
plt.legend()
plt.show()
overlap_mean_proba = np.genfromtxt("overlap_mean_proba.csv", delimiter=",")
overlap_mean_octree = np.genfromtxt("overlap_mean_octree.csv", delimiter=",")
alpha = 0.6
bins=40
r = (0, 40)
fig = plt.figure(figsize=(14, 10))
plt.hist(overlap_mean['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)
plt.title("Overlap mean")
plt.legend()
plt.show()
overlap_std_proba = np.genfromtxt("overlap_std_proba.csv", delimiter=",")
overlap_std_octree = np.genfromtxt("overlap_std_octree.csv", delimiter=",")
alpha = 0.6
bins=40
r = (0, 25)
fig = plt.figure(figsize=(14, 10))
plt.hist(overlap_std['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)
plt.title("Overlap STD")
plt.legend()
plt.show()