import numpy as np
import yaml
import torch
import pandas as pd
from tqdm.notebook import tqdm, trange
# 3D visualization
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=False)
# 2D plotting
import matplotlib
from matplotlib import pyplot as plt
import seaborn
seaborn.set(rc={
'figure.figsize':(15, 10),
})
seaborn.set_context('talk') # or paper
import sys, os
# set software directory
software_dir = '%s/lartpc_mlreco3d' % os.environ.get('HOME')
sys.path.insert(0,software_dir)
from mlreco.visualization import scatter_points, plotly_layout3d
from mlreco.visualization.gnn import scatter_clusters, network_topology, network_schematic
from mlreco.utils.ppn import uresnet_ppn_type_point_selector
from mlreco.utils.cluster.dense_cluster import fit_predict, gaussian_kernel
from mlreco.main_funcs import process_config, prepare
from mlreco.utils.gnn.cluster import get_cluster_label
from mlreco.utils.deghosting import adapt_labels_numpy as adapt_labels
from mlreco.visualization.gnn import network_topology
from mlreco.utils.cluster.cluster_graph_constructor import ClusterGraphConstructor
from mlreco.utils.gnn.cluster import form_clusters
from mlreco.utils.gnn.evaluation import primary_assignment
from larcv import larcv
/usr/local/lib/python3.8/dist-packages/MinkowskiEngine/__init__.py:36: UserWarning: The environment variable `OMP_NUM_THREADS` not set. MinkowskiEngine will automatically set `OMP_NUM_THREADS=16`. If you want to set `OMP_NUM_THREADS` manually, please export it on the command line before running a python script. e.g. `export OMP_NUM_THREADS=12; python your_program.py`. It is recommended to set it below 24.
Welcome to JupyROOT 6.22/09
(To save space, I am capturing the output of initializing the config below)
You can set the batch size according to your computing resources.
cfg = open('/sdf/group/neutrino/ldomine/chain/me/v04/workshop.cfg', 'r')
yaml_cfg=yaml.load(cfg,Loader=yaml.Loader)
batch_size = 8
yaml_cfg['iotool']['batch_size'] = batch_size
yaml_cfg['iotool']['sampler']['batch_size'] = batch_size
yaml_cfg['model']['modules']['chain']['verbose'] = False
%%capture
# pre-process configuration (checks + certain non-specified default settings)
process_config(yaml_cfg)
# prepare function configures necessary "handlers"
hs=prepare(yaml_cfg)
# Call forward to run the net, store the output in "output"
data, output = hs.trainer.forward(hs.data_io_iter)
We will demonstrate on one entry the process of Michel electron selection. You can pick an entry index in [0, batch_size], preferably one that has a Michel electron!
entry = 0
Michel_label = 2
Track_label = 1
clust_label = data['cluster_label'][entry]
input_data = data['input_data'][entry]
segment_label = data['segment_label'][entry][:, -1]
segment_pred = output['segmentation'][entry].argmax(axis=1)
For each voxel, the UResNet stage of the full chain predicts a semantic class: electromagnetic shower (0), track (1), Michel (2), Delta (3) or low energy deposition (4). Let's visualize the predictions along with the truth labels:
trace = []
trace+= scatter_points(input_data,markersize=1,color=segment_label, cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'True semantic labels'
trace+= scatter_points(input_data,markersize=1,color=segment_pred, cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted semantic labels'
fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))
iplot(fig)
Track_coords = input_data[segment_label == Track_label][:, 1:4]
Michel_coords = input_data[segment_label == Michel_label][:, 1:4]
Track_coords_pred = input_data[segment_pred == Track_label][:, 1:4]
Michel_coords_pred = input_data[segment_pred == Michel_label][:, 1:4]
We can do a sanity check by looking at what we just selected:
trace = []
trace+= scatter_points(Track_coords,markersize=1,color=plotly.colors.qualitative.D3[0])
trace[-1].name = 'True tracks'
trace+= scatter_points(Michel_coords,markersize=1,color=plotly.colors.qualitative.D3[1])
trace[-1].name = 'True Michel'
trace+= scatter_points(Track_coords_pred,markersize=1,color=plotly.colors.qualitative.D3[2])
trace[-1].name = 'Predicted tracks'
trace+= scatter_points(Michel_coords_pred,markersize=1,color=plotly.colors.qualitative.D3[3])
trace[-1].name = 'Predicted Michel'
fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))
iplot(fig)
We use throughout the notebook a hyperparameter for the selection that we call $ \eps $. It is set to the diagonal of a voxel and used to measure touching radius, etc.
eps = 2.8284271247461903
from sklearn.cluster import DBSCAN
Michel_true_clusters = clust_label[clust_label[:, -1] == Michel_label][:, -3].astype(np.int64)
Track_pred_clusters = DBSCAN(eps=eps, min_samples=5).fit(Track_coords_pred).labels_
Michel_pred_clusters = DBSCAN(eps=2*eps, min_samples=5).fit(Michel_coords_pred).labels_
Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])
As you might guess, using DBSCAN to form predicted clusters might result in several tracks being clustered together. It is sufficient for the purpose of determining Michel candidates (touching the edge of a Track).
trace = []
trace+= scatter_points(Michel_coords,markersize=1,color=Michel_true_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'True Michel'
trace+= scatter_points(Track_coords_pred,markersize=1,color=Track_pred_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted tracks clusters'
trace+= scatter_points(Michel_coords_pred,markersize=1,color=Michel_pred_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted Michel clusters'
fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))
iplot(fig)
Michel candidates will be predicted Michel clusters (as determined with DBSCAN just above) that also touch the edge of a predicted Track cluster. This means 2 criteria:
ablation_radius = 15
from scipy.spatial.distance import cdist
def find_Michel_candidates(Michel_pred_clusters,
Track_pred_clusters,
Michel_coords_pred,
Track_coords_pred):
Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])
Michel_is_edge, Michel_is_attached = [], []
for Michel_id in Michel_pred_clusters_id:
current_index = (Michel_pred_clusters == Michel_id)
distances = cdist(Michel_coords_pred[current_index], Track_coords_pred[Track_pred_clusters > -1])
is_attached = np.min(distances) < eps
is_edge = False
if is_attached:
Michel_min, Track_min = np.unravel_index(np.argmin(distances), distances.shape)
Track_id = Track_pred_clusters[Track_pred_clusters > -1][Track_min]
Track_min_coords = Track_coords_pred[Track_pred_clusters > -1][Track_min]
Track_cluster_coords = Track_coords_pred[Track_pred_clusters == Track_id]
ablated_cluster = Track_cluster_coords[np.linalg.norm(Track_cluster_coords-Track_min_coords, axis=1)> ablation_radius]
if ablated_cluster.shape[0] > 0:
new_cluster = DBSCAN(eps=eps, min_samples=5).fit(ablated_cluster).labels_
is_edge = len(np.unique(new_cluster[new_cluster>-1])) == 1
else:
is_edge = True
Michel_is_edge.append(is_edge)
Michel_is_attached.append(is_attached)
return np.array(Michel_is_edge), np.array(Michel_is_attached)
is_edge, is_attached = find_Michel_candidates(Michel_pred_clusters,
Track_pred_clusters,
Michel_coords_pred,
Track_coords_pred)
is_edge, is_attached
(array([ True]), array([ True]))
We loop over the true Michel clusters inside the event. We match them with a Michel cluster candidate by maximizing the overlap voxel count.
def compute_metrics(input_data,
Michel_true_clusters,
Michel_pred_clusters,
Michel_coords,
Michel_coords_pred,
candidates):
Michel_true_clusters_id = np.unique(Michel_true_clusters[Michel_true_clusters > -1])
metrics = {
"michel_true_num_pix": [],
"michel_pred_num_pix": [],
"michel_true_sum_pix": [],
"michel_pred_sum_pix": [],
"overlap_num" : [],
"overlap_sum": []
}
for Michel_id in Michel_true_clusters_id:
current_index = Michel_true_clusters == Michel_id
distances = cdist(Michel_coords_pred[candidates], Michel_coords[current_index])
if distances.shape[0] == 0 or distances.shape[0] == 0:
print(distances.shape)
closest_clusters = Michel_pred_clusters[candidates][np.argmin(distances, axis=0)]
# FIXME what if closest voxel is labelled -1 but legit Michel?
closest_clusters_matching = closest_clusters[(closest_clusters > -1) & (np.min(distances, axis=0) < eps)]
if len(closest_clusters_matching) == 0:
continue
# Index of Michel predicted cluster that overlaps the most
closest_pred_id = np.bincount(closest_clusters_matching).argmax()
if closest_pred_id < 0:
continue
closest_Michel_pred = Michel_coords_pred[Michel_pred_clusters == closest_pred_id]
# Compute overlap
overlap_num, overlap_sum = 0., 0.
for v in input_data[segment_label == Michel_label][current_index]:
count = int(np.any(np.all(v[1:4] == closest_Michel_pred, axis=1)))
overlap_num += count
if count > 0:
overlap_sum += v[4]
metrics["overlap_num"].append(overlap_num)
metrics["overlap_sum"].append(overlap_sum)
# Other metrics
metrics["michel_pred_num_pix"].append(np.count_nonzero(Michel_pred_clusters == closest_pred_id))
metrics["michel_true_num_pix"].append(np.count_nonzero(current_index))
metrics["michel_pred_sum_pix"].append(input_data[segment_pred == Michel_label][Michel_pred_clusters == closest_pred_id, 4].sum())
metrics["michel_true_sum_pix"].append(input_data[segment_label == Michel_label][current_index, 4].sum())
return pd.DataFrame(metrics)
candidates = np.isin(Michel_pred_clusters, Michel_pred_clusters_id[is_edge & is_attached])
metrics = compute_metrics(input_data,
Michel_true_clusters,
Michel_pred_clusters,
Michel_coords,
Michel_coords_pred,
candidates)
metrics
| michel_true_num_pix | michel_pred_num_pix | michel_true_sum_pix | michel_pred_sum_pix | overlap_num | overlap_sum | |
|---|---|---|---|---|---|---|
| 0 | 85 | 87 | 39.186994 | 39.382535 | 85.0 | 39.186994 |
As you may have noted, there are not too many Michel electron in one event. There may even be none. To accumulate higher statistics metrics, we have to loop over many events.
#%%capture
metrics = None
missed = 0
for idx in tqdm(range(100)):
data, output = hs.trainer.forward(hs.data_io_iter)
for entry in range(batch_size):
clust_label = data['cluster_label'][entry]
input_data = data['input_data'][entry]
segment_label = data['segment_label'][entry][:, -1]
segment_pred = output['segmentation'][entry].argmax(axis=1)
Track_coords = input_data[segment_label == Track_label][:, 1:4]
Michel_coords = input_data[segment_label == Michel_label][:, 1:4]
if Michel_coords.shape[0] == 0:
#print(idx, entry, "No Michel voxels")
continue
Track_coords_pred = input_data[segment_pred == Track_label][:, 1:4]
Michel_coords_pred = input_data[segment_pred == Michel_label][:, 1:4]
if Michel_coords_pred.shape[0] == 0:
missed += 1
#print(idx, entry, "No Michel predictions")
continue
Michel_true_clusters = clust_label[clust_label[:, -1] == Michel_label][:, -3].astype(np.int64)
Track_pred_clusters = DBSCAN(eps=eps, min_samples=5).fit(Track_coords_pred).labels_
Michel_pred_clusters = DBSCAN(eps=2*eps, min_samples=5).fit(Michel_coords_pred).labels_
Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])
is_edge, is_attached = find_Michel_candidates(Michel_pred_clusters,
Track_pred_clusters,
Michel_coords_pred,
Track_coords_pred)
candidates = np.isin(Michel_pred_clusters, Michel_pred_clusters_id[is_edge & is_attached])
if np.count_nonzero(candidates) == 0:
#print(idx, entry, "No candidate found")
continue
result = compute_metrics(input_data,
Michel_true_clusters,
Michel_pred_clusters,
Michel_coords,
Michel_coords_pred,
candidates)
if metrics is None:
metrics = result
else:
metrics = pd.concat([metrics, result])
We use voxel count on the x-axis instead of the "reconstructed charge".
metrics
| michel_true_num_pix | michel_pred_num_pix | michel_true_sum_pix | michel_pred_sum_pix | overlap_num | overlap_sum | |
|---|---|---|---|---|---|---|
| 0 | 52 | 53 | 30.987504 | 31.475994 | 52.0 | 30.987504 |
| 0 | 46 | 51 | 28.517298 | 25.406504 | 45.0 | 21.689628 |
| 0 | 78 | 69 | 39.940113 | 34.143687 | 63.0 | 32.165350 |
| 0 | 37 | 37 | 22.231038 | 17.427203 | 36.0 | 17.340804 |
| 0 | 60 | 61 | 35.605816 | 28.329325 | 57.0 | 27.592317 |
| ... | ... | ... | ... | ... | ... | ... |
| 0 | 139 | 78 | 75.077820 | 46.738332 | 78.0 | 46.738332 |
| 0 | 67 | 57 | 31.526055 | 24.710911 | 57.0 | 24.710911 |
| 0 | 122 | 94 | 62.243649 | 46.339559 | 94.0 | 46.339559 |
| 0 | 65 | 67 | 28.473585 | 32.190850 | 65.0 | 28.473585 |
| 0 | 37 | 37 | 20.567448 | 20.567448 | 37.0 | 20.567448 |
417 rows × 6 columns
r = [0, 200]
b = 20
plt.hist(metrics['michel_true_num_pix'], range = r, bins = b, histtype='step', label="True Michel (primary)")
plt.hist(metrics['michel_pred_num_pix'], range = r, bins = b, histtype='step', label="Candidate Michel (primary)")
plt.xlabel('Voxel count')
plt.ylabel('Michel candidates')
plt.legend()
<matplotlib.legend.Legend at 0x7f4d22dd0e80>