In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}
In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tables
import time
from sklearn.cluster import DBSCAN

f=tables.open_file('sample3d.hdf5','r')
In [3]:
def plot(x, y, z, v, **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=v, marker='o', **kwargs)
    plt.show()

def draw_cube(ax, coord, size):
    x, y, z = coord[0], coord[1], coord[2]
    vertices = [
        [[x, y, z], [x, y+size, z], [x+size, y+size, z], [x+size, y, z]],
        [[x, y, z+size], [x, y+size, z+size], [x+size, y+size, z+size], [x+size, y, z+size]],
        [[x, y, z], [x, y+size, z], [x, y+size, z+size], [x, y, z+size]],
        [[x+size, y, z], [x+size, y+size, z], [x+size, y+size, z+size], [x+size, y, z+size]],
        [[x, y, z], [x+size, y, z], [x+size, y, z+size], [x, y, z+size]],
        [[x, y+size, z], [x+size, y+size, z], [x+size, y+size, z+size], [x, y+size, z+size]]
    ]
    poly = Poly3DCollection(vertices, linewidths=0.1, edgecolors='r')
    poly.set_alpha(0)
    poly.set_facecolors('cyan')
    ax.add_collection3d(poly)
    
#data
print("%d events in file." % len(f.root.data))
data=f.root.data[0]
x,y,z=np.where(data>0)
v=data[np.where(data>0)]
plot(x, y, z, v, vmin=0,vmax=40)
np.savetxt("event.csv", [x, y, z], delimiter=",")

#label
label=f.root.label[0]
x,y,z=np.where(label>0)
l=label[np.where(label>0)]
plot(x, y, z, l)

fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.set_xlim(0, 384)
ax.set_ylim(0, 384)
ax.set_zlim(0, 384)
draw_cube(ax, [150, 150, 150], 300)
plt.show()
10000 events in file.
In [4]:
N = 192 # Size of image
d = 64 # Size of patch
a = 32 # Size of patch's core
coords = np.stack([x, y, z], axis=-1)

def compute_overlap(coords, patch_centers, sizes=d/2):
    # Compute overlap for each voxel
    overlap = []
    for voxel in coords:
        overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
    return dict(zip(*np.unique(overlap, return_counts=True)))

def compute_voxel_overlap(coords, patch_centers, sizes=d/2):
    # Compute overlap for each voxel
    overlap = []
    for voxel in coords:
        overlap.append(np.sum(np.all(np.logical_and(patch_centers-sizes <= voxel, patch_centers + sizes >= voxel), axis=1)))
    return overlap

def bounding_box(coordinates):
    return np.amin(coordinates, axis=0), np.amax(coordinates, axis=0)

def plot_with_patches(patches, color = v, sizes=d/2):
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=color, marker='o')
    for i, p in enumerate(patches):
        if type(sizes) == int or type(sizes) == float:
            draw_cube(ax, p - sizes, sizes*2)
        else:
            draw_cube(ax, p - sizes[i], sizes[i]*2)
    plt.show()    

def plot_with_overlap(patches, sizes=d/2):
    overlap = compute_voxel_overlap(coords, patches, sizes=sizes)
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    cmap = plt.get_cmap('rainbow', 20)
    cax = ax.scatter(x,y,z,c=overlap, marker='o', cmap=cmap, vmin=0, vmax=20)
    fig.colorbar(cax)
    #for p in final_patches:
    #    draw_cube(ax, p - d/2, d)
    plt.show() 
    
from matplotlib.ticker import MaxNLocator

def plot_overlap(patches, sizes=d/2):
    overlap_data = np.array(compute_voxel_overlap(coords, patches, sizes=sizes))
    ax = plt.figure(figsize=(14, 11)).gca()
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.xlabel("Overlaps")
    plt.ylabel("Voxels")
    plt.xlim(0, 23)
    plt.hist(overlap_data, np.arange(overlap_data.min() - 0.5, overlap_data.max() + 1.5, 1))
    plt.show()
    
def plot_core(patch_centers):
    overlap = []
    for voxel in coords:
        overlap.append(np.any(np.all(np.logical_and(patch_centers-a/2 <= voxel, patch_centers + a/2 >= voxel), axis=1)))
    fig = plt.figure(figsize=(14, 11))
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(x,y,z,c=overlap, marker='o')
    plt.show() 
In [5]:
x, y, z, v
Out[5]:
(array([ 25,  25,  25, ..., 127, 127, 127]),
 array([17, 17, 18, ...,  6,  6,  7]),
 array([119, 136,  24, ...,  81,  82,  82]),
 array([13.091445 , 10.05871  , 12.7822485, ..., 30.641525 , 28.27896  ,
        10.36825  ], dtype=float32))

1. Naive algorithm

Tile the space evenly and keep only cubes that contain at least one non-zero voxel.

Pros: Easy. Deterministic.

Cons:

  • Slow!
  • Lots of proposals
  • No guarantee of minimum coverage
In [6]:
def naive_patching(N, d, a, coords, include_borders=True):
    if type(N) == int:
        shift = 0
        if N < d:
            shift = (d - N)/2
            N = d
        coords = coords + shift
        patch_x = np.linspace(d/2, N-d/2, int(N/a))
        patch_y = np.linspace(d/2, N-d/2, int(N/a))
        patch_z = np.linspace(d/2, N-d/2, int(N/a))
    else:
        shift = np.where(N < d, (d - N)/2, 0)
        coords = coords + shift
        N = np.where(N < d, d, N)
        patch_x = np.linspace(d/2, N[0]-d/2, int(N[0]/a))
        patch_y = np.linspace(d/2, N[1]-d/2, int(N[1]/a))
        patch_z = np.linspace(d/2, N[2]-d/2, int(N[2]/a)) 
    
    mesh_x, mesh_y, mesh_z = np.meshgrid(patch_x, patch_y, patch_z)
    patch_centers = np.stack([mesh_x, mesh_y, mesh_z], axis=-1)
    patch_centers = np.reshape(patch_centers, (-1, 3))
    # Add border cases
    if include_borders:
        border_xy0 = np.reshape(np.stack(np.meshgrid(patch_x[0], patch_y[0], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
        border_xy1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], patch_y[-1], (patch_z+d/2)[:-1]), axis=-1), (-1, 3))
        border_xz0 = np.reshape(np.stack(np.meshgrid(patch_x[0], (patch_y+d/2)[:-1], patch_z[0]), axis=-1), (-1, 3))
        border_xz1 = np.reshape(np.stack(np.meshgrid(patch_x[-1], (patch_y+d/2)[:-1], patch_z[-1]), axis=-1), (-1, 3))
        border_zy0 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[0], patch_z[0]), axis=-1), (-1, 3))
        border_zy1 = np.reshape(np.stack(np.meshgrid((patch_x+d/2)[:-1], patch_y[-1], patch_z[-1]), axis=-1) , (-1, 3))   
        patch_centers = np.concatenate([patch_centers, border_xy0, border_xy1, border_xz0, border_xz1, border_zy0, border_zy1], axis=0)
    
    contains = []
    for p in patch_centers:
        points = coords[np.all(np.logical_and(coords >= p - d/2, coords <= p + d/2), axis=1)]
        contains.append(points)

    final_patches = []
    for i, p in enumerate(patch_centers):
        if len(contains[i]) > 0:
            final_patches.append(p)
    return np.array(final_patches) - shift

start = time.time()
final_patches = naive_patching(N, d, a, coords)
end = time.time()

print("Found %d patches" % len(final_patches))
print("Overlap: ", compute_overlap(coords, final_patches))
print("Took %f s" % (end - start))
Found 86 patches
Overlap:  {1: 3, 2: 42, 3: 1, 4: 253, 5: 28, 6: 292, 7: 40, 8: 40, 9: 109, 10: 29, 12: 160, 14: 23, 15: 15, 18: 118, 21: 30, 27: 1}
Took 0.021933 s
In [7]:
# Plot
plot_with_patches(final_patches)
In [8]:
plot_with_overlap(final_patches)
In [9]:
plot_overlap(final_patches)
In [10]:
plot_core(final_patches)

2. Probability-driven greedy algorithm

Start with a flat probability distribution over all voxels.

  1. Select randomly a voxel according to the probability distribution.
  2. Mark all voxels inside the box centered at that voxel (mark differently the core and the sides)
  3. Update the probability distribution (becomes zero for the core for example)
  4. Start from 1. again until all voxels are marked properly (i.e. any voxel is either in a core or overlapped at least $x$ times)

Pros

  • Less patches for same coverage (about 2 times less?)
  • Faster!
  • Guarantees minimum coverage

Cons

  • Parameters need to be tuned: probabilities update, max_patches
  • Randomized algorithm -> performance (time, coverage) is non-deterministic
In [11]:
def normalize(array):
    return array / np.sum(array)

def proba_patching(d, a, coords):
    n = coords.shape[0]
    proba = np.ones((n)) / n
    i = 0
    max_patches = 100
    patches = [] # List of center coordinates of patches dxd
    voxel_num_boxes = np.zeros_like(proba)
    while np.count_nonzero(voxel_num_boxes < 2) > 0 and  i < max_patches:
        indices = np.random.choice(np.arange(n), p=proba)
        selection_inside = np.all(np.logical_and(coords >= coords[indices] - d/2, coords <= coords[indices] + d/2), axis=-1)
        indices_inside = np.argwhere(selection_inside)
        voxels_inside = coords[selection_inside]
        distances_to_center = np.sqrt(np.sum(np.power(voxels_inside - coords[indices], 2), axis=-1))
        new_proba = np.ones_like(proba)
        core_indices = distances_to_center <= a
        new_proba[indices_inside[core_indices]] = 0.01
        new_proba[indices_inside[np.logical_and(np.logical_not(core_indices), distances_to_center <= d)]] = 0.4

        voxel_num_boxes[indices_inside] += 1 # Update voxel_num_boxes: increment all voxels inside box
        patches.append(coords[indices])
        i += 1
        proba = proba * new_proba
        if np.sum(proba) == 0.0:
            break
        proba = normalize(proba)
    return np.array(patches)

start = time.time()
patches2 = proba_patching(d, a, coords)
end = time.time()

print("Found %d patches" % len(patches2))
print("Overlap: ", compute_overlap(coords, patches2))
print("Took %f s" % (end - start))
Found 27 patches
Overlap:  {2: 189, 3: 351, 4: 325, 5: 317, 7: 2}
Took 0.006260 s
In [12]:
plot_with_patches(patches2)
In [13]:
plot_with_overlap(patches2)
In [14]:
plot_overlap(patches2)
In [15]:
plot_core(patches2)

3. Clustering first

Define several clusters with DBScan for example. Then find a minimum bounding box for each of the cluster and tile deterministically.

Pros

  • Faster?
  • Not too many boxes either.

Cons

  • Dunno how it scales with the number of voxels (dbscan).
  • DBScan becomes the main bottleneck (in time of execution)
  • No guarantee of minimum coverage (although it covers better than naive algorithm)
In [16]:
coords = np.stack([x, y, z], axis=-1)

def clustering(d, a, coords):
    db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
    # Tile a cluster
    final_patches3 = []
    for i_cluster in np.unique(db):
        voxels = coords[db == i_cluster]
        p1, p2 = bounding_box(voxels)
        final_patches3.append(naive_patching(p2-p1, d, a, voxels-p1, include_borders=False)+p1)
    final_patches3 = np.concatenate(np.array(final_patches3), axis=0)
    return db, final_patches3

start = time.time()
db, final_patches3 = clustering(d, a, coords)
end = time.time()


print("Found %d patches" % len(final_patches3))
print("Overlap: ", compute_overlap(coords, final_patches3))
print("Took %f s" % (end - start))
Found 60 patches
Overlap:  {8: 733, 12: 200, 4: 171, 16: 72, 20: 8}
Took 0.013046 s
In [17]:
plot_with_patches(final_patches3, color=db)
In [18]:
plot_with_overlap(final_patches3)
In [19]:
plot_overlap(final_patches3)
In [20]:
plot_core(final_patches3)

4. Clustering-first probabilistic greedy algorithm

Let's try to combine the best of both worlds.

Pros

  • We keep the low number of boxes from the greedy algorithm.
  • Faster than naive algorithm
  • Guarantee of minimum coverage

Cons

  • Slower than greedy or clustering-first algorithms alone
  • DBScan is the main bottleneck
In [21]:
coords = np.stack([x, y, z], axis=-1)

def hybrid(d, a, coords):
    db = DBSCAN(eps=10, min_samples=1).fit_predict(coords)
    # Tile a cluster
    final_patches4 = []
    for i_cluster in np.unique(db):
        voxels = coords[db == i_cluster]
        p1, p2 = bounding_box(voxels)
        final_patches4.append(proba_patching(d, a, voxels-p1)+p1)
    final_patches4 = np.concatenate(np.array(final_patches4), axis=0) 
    return db, final_patches4

start = time.time()
db, final_patches4 = hybrid(d, a, coords)
end = time.time()



print("Found %d patches" % len(final_patches4))
print("Overlap: ", compute_overlap(coords, final_patches4))
print("Took %f s" % (end - start))
Found 24 patches
Overlap:  {2: 417, 3: 360, 4: 349, 5: 7, 6: 51}
Took 0.015162 s
In [22]:
plot_with_patches(final_patches4, color=db)
In [23]:
plot_with_overlap(final_patches4)
In [24]:
plot_overlap(final_patches4)
In [25]:
plot_core(final_patches4)

5. Octree algorithm

  1. Consider a cube and split it in 8 equal subcubes.
  2. Discard empty subcubes.
  3. If we reached final patch size continue, otherwise go back to 1.
  4. Given the list of patches with final size (that covers all voxels), compute a better coverage: select a random number of sub-cubes among 8 cubes of same size, shifted of a small amount in 8 different diagonal directions.

Pros

  • Fast
  • Pretty uniform in space
  • Reasonable amount of final boxes

Cons

  • Need to tune some parameters such as final patch size and random selection of the final cubes
  • Does not guarantee minimum coverage
In [26]:
coords = np.stack([x, y, z], axis=-1)
def octotree(N, d, coords, num_choice=6, final_size=0.7*d):
    heap = [([N/2, N/2, N/2], N/2, coords)] # List
    patches = []
    sizes = []
    while len(heap) > 0:
        coordinates, size, voxels = heap.pop(0)
        x0, y0, z0 = coordinates
        new_size = size/2
        if size > final_size:
            new_cubes = [
                ([x0 + new_size, y0 + new_size, z0 - new_size], new_size), 
                ([x0 - new_size, y0 + new_size, z0 - new_size], new_size),
                ([x0 + new_size, y0 - new_size, z0 - new_size], new_size),
                ([x0 - new_size, y0 - new_size, z0 - new_size], new_size),
                ([x0 + new_size, y0 + new_size, z0 + new_size], new_size),
                ([x0 - new_size, y0 + new_size, z0 + new_size], new_size),
                ([x0 + new_size, y0 - new_size, z0 + new_size], new_size),
                ([x0 - new_size, y0 - new_size, z0 + new_size], new_size)
            ]
            new_voxel_sets = []
            for cube, new_size in new_cubes:
                if voxels.shape[0] > 0:
                    cube = np.array(cube)
                    indices = np.all(np.logical_and(voxels >= cube - new_size, voxels <= cube + new_size), axis=1)
                    new_voxel_sets.append(voxels[indices])
                    voxels = np.delete(voxels, indices, axis=0)

            for i, voxel_set in enumerate(new_voxel_sets):
                if len(voxel_set) > 0:
                    heap.append(new_cubes[i] + (voxel_set,))
        else:
            #patches.append(coordinates)
            #sizes.append(size)
            new_cubes = [
                [x0 + new_size, y0 + new_size, z0 - new_size],
                [x0 - new_size, y0 + new_size, z0 - new_size],
                [x0 + new_size, y0 - new_size, z0 - new_size],
                [x0 - new_size, y0 - new_size, z0 - new_size],
                [x0 + new_size, y0 + new_size, z0 + new_size],
                [x0 - new_size, y0 + new_size, z0 + new_size],
                [x0 + new_size, y0 - new_size, z0 + new_size],
                [x0 - new_size, y0 - new_size, z0 + new_size],
            ]
            our_choice = np.unique(np.random.choice(np.arange(8), size=num_choice))
            new_cubes = np.take(new_cubes, our_choice, axis=0)
            patches.extend(new_cubes)
            #sizes.extend([size] * our_choice.shape[0])
            sizes.extend([d/2] * our_choice.shape[0])
    return np.array(patches), np.array(sizes)[:, None]
        
start = time.time()
final_patches5, sizes5 = octotree(N, d, coords)
end = time.time()

print("Found %d patches with sizes " % len(final_patches5), np.unique(sizes5*2))
print("Overlap: ", compute_overlap(coords, final_patches5, sizes=sizes5))
print("Took %f s" % (end - start))
Found 51 patches with sizes  [64.]
Overlap:  {0: 1, 1: 6, 2: 51, 3: 49, 4: 122, 5: 423, 6: 81, 7: 186, 8: 147, 9: 66, 10: 16, 11: 6, 12: 30}
Took 0.008477 s
In [27]:
plot_with_patches(final_patches5, sizes=sizes5)
In [28]:
plot_with_overlap(final_patches5, sizes=sizes5)
In [29]:
plot_overlap(final_patches5, sizes=sizes5)
In [30]:
plot_core(final_patches5)

6. Simple comparison study

Using first 100 events from the sample.

In [31]:
def compare(n):
    times = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    num_patches = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    overlap_mean = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    overlap_std = {'naive': [], 'proba': [], 'cluster': [], 'hybrid': [], 'octree': []}
    for i in range(n):
        if i%100 == 0:
            print("%d / %d" % (i, n))
        data=f.root.data[i]
        x,y,z=np.where(data>0)
        v=data[np.where(data>0)]
        coords = np.stack([x, y, z], axis=-1)
        
        start = time.time()
        final_patches = naive_patching(N, d, a, coords)
        end = time.time()
        times['naive'].append(end - start)
        num_patches['naive'].append(len(final_patches))
        overlap = compute_voxel_overlap(coords, final_patches)
        overlap_mean['naive'].append(np.mean(overlap))
        overlap_std['naive'].append(np.std(overlap))
        
        start = time.time()
        final_patches2 = proba_patching(d, a, coords)
        end = time.time()     
        times['proba'].append(end - start)
        num_patches['proba'].append(len(final_patches2))
        overlap = compute_voxel_overlap(coords, final_patches2)
        overlap_mean['proba'].append(np.mean(overlap))
        overlap_std['proba'].append(np.std(overlap))
        
        start = time.time()
        db, final_patches3 = clustering(d, a, coords)
        end = time.time()        
        times['cluster'].append(end - start)
        num_patches['cluster'].append(len(final_patches3))
        overlap = compute_voxel_overlap(coords, final_patches3)
        overlap_mean['cluster'].append(np.mean(overlap))
        overlap_std['cluster'].append(np.std(overlap))
        
        start = time.time()
        db, final_patches4 = hybrid(d, a, coords)
        end = time.time()        
        times['hybrid'].append(end - start)
        num_patches['hybrid'].append(len(final_patches4))       
        overlap = compute_voxel_overlap(coords, final_patches4)
        overlap_mean['hybrid'].append(np.mean(overlap))
        overlap_std['hybrid'].append(np.std(overlap))
        
        start = time.time()
        final_patches5, sizes5 = octotree(N, d, coords)
        end = time.time()
        times['octree'].append(end - start)
        num_patches['octree'].append(len(final_patches5))
        overlap = compute_voxel_overlap(coords, final_patches5)
        overlap_mean['octree'].append(np.mean(overlap))
        overlap_std['octree'].append(np.std(overlap))
        
    return times, num_patches, overlap_mean, overlap_std

times, num_patches, overlap_mean, overlap_std = compare(1000)
0 / 1000
100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000

Execution time per event

In [32]:
alpha = 0.6
bins=40
r = (0, 0.06)
fig = plt.figure(figsize=(14, 10))
y3 = plt.hist(times['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(times['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
y1 = plt.hist(times['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
y2 = plt.hist(times['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)

plt.legend()
plt.title("Execution time per event")
plt.show()

Number of patches per event

In [33]:
fig = plt.figure(figsize=(14, 10))
r = (0, 250)
plt.hist(num_patches['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Number of patches per event")
plt.show()

Overlap mean

In [34]:
fig = plt.figure(figsize=(14, 10))
r = (0, 40)
plt.hist(overlap_mean['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.legend()
plt.title("Overlap Mean")
plt.show()

Overlap STD

In [35]:
fig = plt.figure(figsize=(14, 10))
r = (0, 25)
plt.hist(overlap_std['cluster'], label='Clustering', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['hybrid'], label='Hybrid', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['naive'], label='Naive', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['octree'], label='Octree', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic', alpha=alpha, bins=bins, range=r)
plt.title("Overlap STD")
plt.legend()
plt.show()

Compromise number of patches vs overlap mean

We want to be in the lower right quadrant: low number of patches but high overlap mean.

In [36]:
plt.hist2d(overlap_mean['naive'], num_patches['naive'])
plt.title("Naive")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['cluster'], num_patches['cluster'])
plt.title("Clustering")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['proba'], num_patches['proba'])
plt.title("Probabilistic")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['hybrid'], num_patches['hybrid'])
plt.title("Hybrid")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

plt.hist2d(overlap_mean['octree'], num_patches['octree'])
plt.title("Octree")
plt.xlabel('Overlap mean')
plt.ylabel('Number of patches')
plt.show()

7. C++ version

Let's check that the C++ implementation of Octree and Probabilistic algorithms works fine (and is faster than Python version!).

Execution time per event

The probabilistic algorithm currently does not gain much by being implemented in C++. The octree algorithm however is drastically faster.

In [54]:
duration_proba = np.genfromtxt("duration_proba.csv", delimiter=",")
duration_octree = np.genfromtxt("duration_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 0.02)
fig = plt.figure(figsize=(14, 10))

y2 = plt.hist(times['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
y5 = plt.hist(times['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
y3 = plt.hist(duration_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
y4 = plt.hist(duration_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Execution time per event")
plt.legend()
plt.show()

Number of patches per event

In [50]:
num_patches_proba = np.genfromtxt("num_patches_proba.csv", delimiter=",")
num_patches_octree = np.genfromtxt("num_patches_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 250)
fig = plt.figure(figsize=(14, 10))

plt.hist(num_patches['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(num_patches_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Number of patches per event")
plt.legend()
plt.show()

Overlap mean

In [51]:
overlap_mean_proba = np.genfromtxt("overlap_mean_proba.csv", delimiter=",")
overlap_mean_octree = np.genfromtxt("overlap_mean_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 40)
fig = plt.figure(figsize=(14, 10))

plt.hist(overlap_mean['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_mean_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Overlap mean")
plt.legend()
plt.show()

Overlap STD

In [52]:
overlap_std_proba = np.genfromtxt("overlap_std_proba.csv", delimiter=",")
overlap_std_octree = np.genfromtxt("overlap_std_octree.csv", delimiter=",")

alpha = 0.6
bins=40
r = (0, 25)
fig = plt.figure(figsize=(14, 10))

plt.hist(overlap_std['octree'], label='Octree (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std['proba'], label='Probabilistic (Python)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_proba, label='Probabilistic (C++)', alpha=alpha, bins=bins, range=r)
plt.hist(overlap_std_octree, label='Octree (C++)', alpha=alpha, bins=bins, range=r)

plt.title("Overlap STD")
plt.legend()
plt.show()