%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

Test of different algorithms to crop 64x64x64 in an example event:

Naive Algorithm
Probability-driven greedy algorithm
Clustering-first Algorithm
Clustering-first probabilistic greedy algorithm
Octree Algorithm
Simple Comparison Study
C++ Version

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tables
import time
from sklearn.cluster import DBSCAN

f=tables.open_file('sample3d.hdf5','r')

def plot(x, y, z, v, **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=v, marker='o', **kwargs)
    plt.show()

def draw_cube(ax, coord, size):
    x, y, z = coord[0], coord[1], coord[2]
    vertices = [
        [[x, y, z], [x, y+size, z], [x+size, y+size, z], [x+size, y, z]],
        [[x, y, z+size], [x, y+size, z+size], [x+size, y+size, z+size], [x+size, y, z+size]],
        [[x, y, z], [x, y+size, z], [x, y+size, z+size], [x, y, z+size]],
        [[x+size, y, z], [x+size, y+size, z], [x+size, y+size, z+size], [x+size, y, z+size]],
        [[x, y, z], [x+size, y, z], [x+size, y, z+size], [x, y, z+size]],
        [[x, y+size, z], [x+size, y+size, z], [x+size, y+size, z+size], [x, y+size, z+size]]
    ]
    poly = Poly3DCollection(vertices, linewidths=0.1, edgecolors='r')
    poly.set_alpha(0)
    poly.set_facecolors('cyan')
    ax.add_collection3d(poly)
    
#data
print("%d events in file." % len(f.root.data))
data=f.root.data[0]
x,y,z=np.where(data>0)
v=data[np.where(data>0)]
plot(x, y, z, v, vmin=0,vmax=40)
np.savetxt("event.csv", [x, y, z], delimiter=",")

#label
label=f.root.label[0]
x,y,z=np.where(label>0)
l=label[np.where(label>0)]
plot(x, y, z, l)

fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.set_xlim(0, 384)
ax.set_ylim(0, 384)
ax.set_zlim(0, 384)
draw_cube(ax, [150, 150, 150], 300)
plt.show()

10000 events in file.

N = 192 # Size of image
d = 64 # Size of patch
a = 32 # Size of patch's core
coords = np.stack([x, y, z], axis=-1)

def compute_overlap(coords, patch_centers, sizes=d/2):
    # Compute overlap for each voxel
    overlap = []
    for voxel in coords:
        overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
    return dict(zip(*np.unique(overlap, return_counts=True)))

def compute_voxel_overlap(coords, patch_centers, sizes=d/2):
    # Compute overlap for each voxel
    overlap = []
    for voxel in coords:
        overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
    return overlap

def bounding_box(coordinates):
    return np.amin(coordinates, axis=0), np.amax(coordinates, axis=0)

def plot_with_patches(patches, color = v, sizes=d/2):
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=color, marker='o')
    for i, p in enumerate(patches):
        if type(sizes) == int or type(sizes) == float:
            draw_cube(ax, p - sizes, sizes*2)
        else:
            draw_cube(ax, p - sizes[i], sizes[i]*2)
    plt.show()    

def plot_with_overlap(patches, sizes=d/2):
    overlap = compute_voxel_overlap(coords, patches, sizes=sizes)
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    cmap = plt.get_cmap('rainbow', 20)
    cax = ax.scatter(x,y,z,c=overlap, marker='o', cmap=cmap, vmin=0, vmax=20)
    fig.colorbar(cax)
    #for p in final_patches:
    #    draw_cube(ax, p - d/2, d)
    plt.show() 
    
from matplotlib.ticker import MaxNLocator

def plot_overlap(patches, sizes=d/2):
    overlap_data = np.array(compute_voxel_overlap(coords, patches, sizes=sizes))
    ax = plt.figure(figsize=(14, 11)).gca()
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.xlabel("Overlaps")
    plt.ylabel("Voxels")
    plt.xlim(0, 23)
    plt.hist(overlap_data, np.arange(overlap_data.min() - 0.5, overlap_data.max() + 1.5, 1))
    plt.show()
    
def plot_core(patch_centers):
    overlap = []
    for voxel in coords:
        overlap.append(np.any(np.all(np.logical_and(patch_centers-a/2 <= voxel, patch_centers + a/2 >= voxel), axis=1)))
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=overlap, marker='o')
    plt.show()

x, y, z, v

(array([ 25,  25,  25, ..., 127, 127, 127]),
 array([17, 17, 18, ...,  6,  6,  7]),
 array([119, 136,  24, ...,  81,  82,  82]),
 array([13.091445 , 10.05871  , 12.7822485, ..., 30.641525 , 28.27896  ,
        10.36825  ], dtype=float32))

1. Naive algorithm¶

Tile the space evenly and keep only cubes that contain at least one non-zero voxel.

Pros: Easy. Deterministic.

Cons:

Slow!
Lots of proposals
No guarantee of minimum coverage

def naive_patching(N, d, a, coords, include_borders=True):
    if type(N) == int:
        shift = 0
        if N < d:
            shift = (d - N)/2
            N = d
        coords = coords + shift
        patch_x = np.linspace(d/2, N-d/2, int(N/a))
        patch_y = np.linspace(d/2, N-d/2, int(N/a))
        patch_z = np.linspace(d/2, N-d/2, int(N/a))
    else:
        shift = np.where(N < d, (d - N)/2, 0)
        coords = coords + shift
        N = np.where(N < d, d, N)
        patch_x = np.linspace(d/2, N[0]-d/2, int(N[0]/a))
        patch_y = np.linspace(d/2, N[1]-d/2, int(N[1]/a))
        patch_z = np.linspace(d/2, N[2]-d/2, int(N[2]/a)) 
    
    mesh_x, mesh_y, mesh_z = np.meshgrid(patch_x, patch_y, patch_z)
    patch_centers = np.stack([mesh_x, mesh_y, mesh_z], axis=-1)
    patch_centers = np.reshape(patch_centers, (-1, 3))
    # Add border cases
    if include_borders:
        border_xy0 = np.reshape(np.stack(np.meshgrid(patch_x[0], patch_y[0], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
        border_xy1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], patch_y[-1], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
        border_xz0 = np.reshape(np.stack(np.meshgrid(patch_x[0], (patch_y+d/2)[:-1], patch_z[0]), axis=-1), (-1, 3))
        border_xz1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], (patch_y+d/2)[:-1], patch_z[-1]), axis=-1), (-1, 3))
        border_zy0 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[0], patch_z[0]), axis=-1), (-1, 3))
        border_zy1 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[-1], patch_z[-1]), axis=-1) , (-1, 3))   
        patch_centers = np.concatenate([patch_centers, border_xy0, border_xy1, border_xz0, border_xz1, border_zy0, border_zy1], axis=0)
    
    contains = []
    for p in patch_centers:
        points = coords[np.all(np.logical_and(coords >= p - d/2, coords <= p + d/2), axis=1)]
        contains.append(points)

    final_patches = []
    for i, p in enumerate(patch_centers):
        if len(contains[i]) > 0:
            final_patches.append(p)
    return np.array(final_patches) - shift

start = time.time()
final_patches = naive_patching(N, d, a, coords)
end = time.time()

print("Found %d patches" % len(final_patches))
print("Overlap: ", compute_overlap(coords, final_patches))
print("Took %f s" % (end - start))

Found 86 patches
Overlap:  {1: 3, 2: 42, 3: 1, 4: 253, 5: 28, 6: 292, 7: 40, 8: 40, 9: 109, 10: 29, 12: 160, 14: 23, 15: 15, 18: 118, 21: 30, 27: 1}
Took 0.021933 s

# Plot
plot_with_patches(final_patches)

plot_with_overlap(final_patches)

plot_overlap(final_patches)

plot_core(final_patches)

2. Probability-driven greedy algorithm¶

Start with a flat probability distribution over all voxels.

Select randomly a voxel according to the probability distribution.
Mark all voxels inside the box centered at that voxel (mark differently the core and the sides)
Update the probability distribution (becomes zero for the core for example)
Start from 1. again until all voxels are marked properly (i.e. any voxel is either in a core or overlapped at least $x$ times)

Pros

Less patches for same coverage (about 2 times less?)
Faster!
Guarantees minimum coverage

Cons

Parameters need to be tuned: probabilities update, max_patches
Randomized algorithm -> performance (time, coverage) is non-deterministic

def normalize(array):
    return array / np.sum(array)

def proba_patching(d, a, coords):
    n = coords.shape[0]
    proba = np.ones((n)) / n
    i = 0
    max_patches = 100
    patches = [] # List of center coordinates of patches dxd
    voxel_num_boxes = np.zeros_like(proba)
    while np.count_nonzero(voxel_num_boxes < 2) > 0 and  i < max_patches:
        indices = np.random.choice(np.arange(n), p=proba)
        selection_inside = np.all(np.logical_and(coords >= coords[indices] - d/2, coords <= coords[indices] + d/2), axis=-1)
        indices_inside = np.argwhere(selection_inside)
        voxels_inside = coords[selection_inside]
        distances_to_center = np.sqrt(np.sum(np.power(voxels_inside - coords[indices], 2), axis=-1))
        new_proba = np.ones_like(proba)
        core_indices = distances_to_center <= a
        new_proba[indices_inside[core_indices]] = 0.01
        new_proba[indices_inside[np.logical_and(np.logical_not(core_indices), distances_to_center <= d)]] = 0.4

        voxel_num_boxes[indices_inside] += 1 # Update voxel_num_boxes: increment all voxels inside box
        patches.append(coords[indices])
        i += 1
        proba = proba * new_proba
        if np.sum(proba) == 0.0:
            break
        proba = normalize(proba)
    return np.array(patches)

start = time.time()
patches2 = proba_patching(d, a, coords)
end = time.time()

print("Found %d patches" % len(patches2))
print("Overlap: ", compute_overlap(coords, patches2))
print("Took %f s" % (end - start))

Found 27 patches
Overlap:  {2: 189, 3: 351, 4: 325, 5: 317, 7: 2}
Took 0.006260 s

plot_with_patches(patches2)

plot_with_overlap(patches2)

plot_overlap(patches2)

plot_core(patches2)

3. Clustering first¶

Define several clusters with DBScan for example. Then find a minimum bounding box for each of the cluster and tile deterministically.

Pros

Faster?
Not too many boxes either.

Cons

Dunno how it scales with the number of voxels (dbscan).
DBScan becomes the main bottleneck (in time of execution)
No guarantee of minimum coverage (although it covers better than naive algorithm)

coords = np.stack([x, y, z], axis=-1)

def clustering(d, a, coords):
    db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
    # Tile a cluster
    final_patches3 = []
    for i_cluster in np.unique(db):
        voxels = coords[db == i_cluster]
        p1, p2 = bounding_box(voxels)
        final_patches3.append(naive_patching(p2-p1, d, a, voxels-p1, include_borders=False)+p1)
    final_patches3 = np.concatenate(np.array(final_patches3), axis=0)
    return db, final_patches3

start = time.time()
db, final_patches3 = clustering(d, a, coords)
end = time.time()


print("Found %d patches" % len(final_patches3))
print("Overlap: ", compute_overlap(coords, final_patches3))
print("Took %f s" % (end - start))

Found 60 patches
Overlap:  {8: 733, 12: 200, 4: 171, 16: 72, 20: 8}
Took 0.013046 s

plot_with_patches(final_patches3, color=db)

plot_with_overlap(final_patches3)

plot_overlap(final_patches3)

plot_core(final_patches3)

4. Clustering-first probabilistic greedy algorithm¶

Let's try to combine the best of both worlds.

Pros

We keep the low number of boxes from the greedy algorithm.
Faster than naive algorithm
Guarantee of minimum coverage

Cons

Slower than greedy or clustering-first algorithms alone
DBScan is the main bottleneck

coords = np.stack([x, y, z], axis=-1)

def hybrid(d, a, coords):
    db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
    # Tile a cluster
    final_patches4 = []
    for i_cluster in np.unique(db):
        voxels = coords[db == i_cluster]
        p1, p2 = bounding_box(voxels)
        final_patches4.append(proba_patching(d, a, voxels-p1)+p1)
    final_patches4 = np.concatenate(np.array(final_patches4), axis=0) 
    return db, final_patches4

start = time.time()
db, final_patches4 = hybrid(d, a, coords)
end = time.time()



print("Found %d patches" % len(final_patches4))
print("Overlap: ", compute_overlap(coords, final_patches4))
print("Took %f s" % (end - start))

Found 24 patches
Overlap:  {2: 417, 3: 360, 4: 349, 5: 7, 6: 51}
Took 0.015162 s

plot_with_patches(final_patches4, color=db)

plot_with_overlap(final_patches4)

plot_overlap(final_patches4)

plot_core(final_patches4)

5. Octree algorithm¶

Consider a cube and split it in 8 equal subcubes.
Discard empty subcubes.
If we reached final patch size continue, otherwise go back to 1.
Given the list of patches with final size (that covers all voxels), compute a better coverage: select a random number of sub-cubes among 8 cubes of same size, shifted of a small amount in 8 different diagonal directions.

Pros

Fast
Pretty uniform in space
Reasonable amount of final boxes

Cons

Need to tune some parameters such as final patch size and random selection of the final cubes
Does not guarantee minimum coverage

coords = np.stack([x, y, z], axis=-1)
def octotree(N, d, coords, num_choice=6, final_size=0.7*d):
    heap = [([N/2, N/2, N/2], N/2, coords)] # List
    patches = []
    sizes = []
    while len(heap) > 0:
        coordinates, size, voxels = heap.pop(0)
        x0, y0, z0 = coordinates
        new_size = size/2
        if size > final_size:
            new_cubes = [
                ([x0 + new_size, y0 + new_size, z0 - new_size], new_size), 
                ([x0 - new_size, y0 + new_size, z0 - new_size], new_size),
                ([x0 + new_size, y0 - new_size, z0 - new_size], new_size),
                ([x0 - new_size, y0 - new_size, z0 - new_size], new_size),
                ([x0 + new_size, y0 + new_size, z0 + new_size], new_size),
                ([x0 - new_size, y0 + new_size, z0 + new_size], new_size),
                ([x0 + new_size, y0 - new_size, z0 + new_size], new_size),
                ([x0 - new_size, y0 - new_size, z0 + new_size], new_size)
            ]
            new_voxel_sets = []
            for cube, new_size in new_cubes:
                if voxels.shape[0] > 0:
                    cube = np.array(cube)
                    indices = np.all(np.logical_and(voxels >= cube - new_size, voxels <= cube + new_size), axis=1)
                    new_voxel_sets.append(voxels[indices])
                    voxels = np.delete(voxels, indices, axis=0)

            for i, voxel_set in enumerate(new_voxel_sets):
                if len(voxel_set) > 0:
                    heap.append(new_cubes[i] + (voxel_set,))
        else:
            #patches.append(coordinates)
            #sizes.append(size)
            new_cubes = [
                [x0 + new_size, y0 + new_size, z0 - new_size],
                [x0 - new_size, y0 + new_size, z0 - new_size],
                [x0 + new_size, y0 - new_size, z0 - new_size],
                [x0 - new_size, y0 - new_size, z0 - new_size],
                [x0 + new_size, y0 + new_size, z0 + new_size],
                [x0 - new_size, y0 + new_size, z0 + new_size],
                [x0 + new_size, y0 - new_size, z0 + new_size],
                [x0 - new_size, y0 - new_size, z0 + new_size],
            ]
            our_choice = np.unique(np.random.choice(np.arange(8), size=num_choice))
            new_cubes = np.take(new_cubes, our_choice, axis=0)
            patches.extend(new_cubes)
            #sizes.extend([size] * our_choice.shape[0])
            sizes.extend([d/2] * our_choice.shape[0])
    return np.array(patches), np.array(sizes)[:, None]
        
start = time.time()
final_patches5, sizes5 = octotree(N, d, coords)
end = time.time()

print("Found %d patches with sizes " % len(final_patches5), np.unique(sizes5*2))
print("Overlap: ", compute_overlap(coords, final_patches5, sizes=sizes5))
print("Took %f s" % (end - start))

Found 51 patches with sizes  [64.]
Overlap:  {0: 1, 1: 6, 2: 51, 3: 49, 4: 122, 5: 423, 6: 81, 7: 186, 8: 147, 9: 66, 10: 16, 11: 6, 12: 30}
Took 0.008477 s

plot_with_patches(final_patches5, sizes=sizes5)

plot_with_overlap(final_patches5, sizes=sizes5)

plot_overlap(final_patches5, sizes=sizes5)

plot_core(final_patches5)

6. Simple comparison study¶

Using first 100 events from the sample.

def compare(n):
    times = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    num_patches = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    overlap_mean = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    overlap_std = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    for i in range(n):
        if i%100 == 0:
            print("%d / %d" % (i, n))
        data=f.root.data[i]
        x,y,z=np.where(data>0)
        v=data[np.where(data>0)]
        coords = np.stack([x, y, z], axis=-1)
        
        start = time.time()
        final_patches = naive_patching(N, d, a, coords)
        end = time.time()
        times['naive'].append(end - start)
        num_patches['naive'].append(len(final_patches))
        overlap = compute_voxel_overlap(coords, final_patches)
        overlap_mean['naive'].append(np.mean(overlap))
        overlap_std['naive'].append(np.std(overlap))
        
        start = time.time()
        final_patches2 = proba_patching(d, a, coords)
        end = time.time()     
        times['proba'].append(end - start)
        num_patches['proba'].append(len(final_patches2))
        overlap = compute_voxel_overlap(coords, final_patches2)
        overlap_mean['proba'].append(np.mean(overlap))
        overlap_std['proba'].append(np.std(overlap))
        
        start = time.time()
        db, final_patches3 = clustering(d, a, coords)
        end = time.time()        
        times['cluster'].append(end - start)
        num_patches['cluster'].append(len(final_patches3))
        overlap = compute_voxel_overlap(coords, final_patches3)
        overlap_mean['cluster'].append(np.mean(overlap))
        overlap_std['cluster'].append(np.std(overlap))
        
        start = time.time()
        db, final_patches4 = hybrid(d, a, coords)
        end = time.time()        
        times['hybrid'].append(end - start)
        num_patches['hybrid'].append(len(final_patches4))       
        overlap = compute_voxel_overlap(coords, final_patches4)
        overlap_mean['hybrid'].append(np.mean(overlap))
        overlap_std['hybrid'].append(np.std(overlap))
        
        start = time.time()
        final_patches5, sizes5 = octotree(N, d, coords)
        end = time.time()
        times['octree'].append(end - start)
        num_patches['octree'].append(len(final_patches5))
        overlap = compute_voxel_overlap(coords, final_patches5)
        overlap_mean['octree'].append(np.mean(overlap))
        overlap_std['octree'].append(np.std(overlap))
        
    return times, num_patches, overlap_mean, overlap_std

times, num_patches, overlap_mean, overlap_std = compare(1000)

0 / 1000
100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000

Execution time per event¶

alpha = 0.6
bins=40
r = (0, 0.06)
fig = plt.figure(figsize=(14, 10))
y3 = plt.hist(times['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(times['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
y1 = plt.hist(times['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
y2 = plt.hist(times['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)

plt.legend()
plt.title("Execution time per event")
plt.show()

Number of patches per event¶

fig = plt.figure(figsize=(14, 10))
r = (0, 250)
plt.hist(num_patches['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Number of patches per event")
plt.show()

Overlap mean¶

fig = plt.figure(figsize=(14, 10))
r = (0, 40)
plt.hist(overlap_mean['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Overlap Mean")
plt.show()

Overlap STD¶

fig = plt.figure(figsize=(14, 10))
r = (0, 25)
plt.hist(overlap_std['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.title("Overlap STD")
plt.legend()
plt.show()

Compromise number of patches vs overlap mean¶

We want to be in the lower right quadrant: low number of patches but high overlap mean.

plt.hist2d(overlap_mean['naive'], num_patches['naive'])
plt.title("Naive")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['cluster'], num_patches['cluster'])
plt.title("Clustering")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['proba'], num_patches['proba'])
plt.title("Probabilistic")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['hybrid'], num_patches['hybrid'])
plt.title("Hybrid")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['octree'], num_patches['octree'])
plt.title("Octree")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

7. C++ version¶

Let's check that the C++ implementation of Octree and Probabilistic algorithms works fine (and is faster than Python version!).

Execution time per event¶

The probabilistic algorithm currently does not gain much by being implemented in C++. The octree algorithm however is drastically faster.

duration_proba = np.genfromtxt("duration_proba.csv", delimiter=",")
duration_octree = np.genfromtxt("duration_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 0.02)
fig = plt.figure(figsize=(14, 10))

y2 = plt.hist(times['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
y3 = plt.hist(duration_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(duration_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Execution time per event")
plt.legend()
plt.show()

Number of patches per event¶

num_patches_proba = np.genfromtxt("num_patches_proba.csv", delimiter=",")
num_patches_octree = np.genfromtxt("num_patches_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 250)
fig = plt.figure(figsize=(14, 10))

plt.hist(num_patches['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Number of patches per event")
plt.legend()
plt.show()

Overlap mean¶

overlap_mean_proba = np.genfromtxt("overlap_mean_proba.csv", delimiter=",")
overlap_mean_octree = np.genfromtxt("overlap_mean_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 40)
fig = plt.figure(figsize=(14, 10))

plt.hist(overlap_mean['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Overlap mean")
plt.legend()
plt.show()

Overlap STD¶

overlap_std_proba = np.genfromtxt("overlap_std_proba.csv", delimiter=",")
overlap_std_octree = np.genfromtxt("overlap_std_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 25)
fig = plt.figure(figsize=(14, 10))

plt.hist(overlap_std['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Overlap STD")
plt.legend()
plt.show()