import numpy as np
import yaml
import torch
import pandas as pd
from tqdm.notebook import tqdm, trange

# 3D visualization
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=False)

# 2D plotting
import matplotlib
from matplotlib import pyplot as plt
import seaborn
seaborn.set(rc={
    'figure.figsize':(15, 10),
})
seaborn.set_context('talk') # or paper


import sys, os
# set software directory
software_dir = '%s/lartpc_mlreco3d' % os.environ.get('HOME')
sys.path.insert(0,software_dir)


from mlreco.visualization import scatter_points, plotly_layout3d
from mlreco.visualization.gnn import scatter_clusters, network_topology, network_schematic
from mlreco.utils.ppn import uresnet_ppn_type_point_selector
from mlreco.utils.cluster.dense_cluster import fit_predict, gaussian_kernel
from mlreco.main_funcs import process_config, prepare
from mlreco.utils.gnn.cluster import get_cluster_label
from mlreco.utils.deghosting import adapt_labels_numpy as adapt_labels
from mlreco.visualization.gnn import network_topology
from mlreco.utils.cluster.cluster_graph_constructor import ClusterGraphConstructor
from mlreco.utils.gnn.cluster import form_clusters
from mlreco.utils.gnn.evaluation import primary_assignment

from larcv import larcv

/usr/local/lib/python3.8/dist-packages/MinkowskiEngine/__init__.py:36: UserWarning:

The environment variable `OMP_NUM_THREADS` not set. MinkowskiEngine will automatically set `OMP_NUM_THREADS=16`. If you want to set `OMP_NUM_THREADS` manually, please export it on the command line before running a python script. e.g. `export OMP_NUM_THREADS=12; python your_program.py`. It is recommended to set it below 24.

Welcome to JupyROOT 6.22/09


cfg = open('/sdf/group/neutrino/ldomine/chain/me/v04/workshop.cfg', 'r')
yaml_cfg=yaml.load(cfg,Loader=yaml.Loader)

batch_size = 8
yaml_cfg['iotool']['batch_size'] = batch_size
yaml_cfg['iotool']['sampler']['batch_size'] = batch_size
yaml_cfg['model']['modules']['chain']['verbose'] = False


%%capture

# pre-process configuration (checks + certain non-specified default settings)
process_config(yaml_cfg)
# prepare function configures necessary "handlers"
hs=prepare(yaml_cfg)


# Call forward to run the net, store the output in "output"
data, output = hs.trainer.forward(hs.data_io_iter)


entry = 0


Michel_label = 2
Track_label = 1


clust_label = data['cluster_label'][entry]
input_data = data['input_data'][entry]
segment_label = data['segment_label'][entry][:, -1]

segment_pred = output['segmentation'][entry].argmax(axis=1)


trace = []

trace+= scatter_points(input_data,markersize=1,color=segment_label, cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'True semantic labels'

trace+= scatter_points(input_data,markersize=1,color=segment_pred, cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted semantic labels'

fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))

iplot(fig)


Track_coords = input_data[segment_label == Track_label][:, 1:4]
Michel_coords = input_data[segment_label == Michel_label][:, 1:4]

Track_coords_pred = input_data[segment_pred == Track_label][:, 1:4]
Michel_coords_pred = input_data[segment_pred == Michel_label][:, 1:4]


trace = []

trace+= scatter_points(Track_coords,markersize=1,color=plotly.colors.qualitative.D3[0])
trace[-1].name = 'True tracks'

trace+= scatter_points(Michel_coords,markersize=1,color=plotly.colors.qualitative.D3[1])
trace[-1].name = 'True Michel'

trace+= scatter_points(Track_coords_pred,markersize=1,color=plotly.colors.qualitative.D3[2])
trace[-1].name = 'Predicted tracks'

trace+= scatter_points(Michel_coords_pred,markersize=1,color=plotly.colors.qualitative.D3[3])
trace[-1].name = 'Predicted Michel'

fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))

iplot(fig)


eps = 2.8284271247461903


from sklearn.cluster import DBSCAN

Michel_true_clusters = clust_label[clust_label[:, -1] == Michel_label][:, -3].astype(np.int64)
Track_pred_clusters = DBSCAN(eps=eps, min_samples=5).fit(Track_coords_pred).labels_
Michel_pred_clusters = DBSCAN(eps=2*eps, min_samples=5).fit(Michel_coords_pred).labels_
Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])


trace = []

trace+= scatter_points(Michel_coords,markersize=1,color=Michel_true_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'True Michel'

trace+= scatter_points(Track_coords_pred,markersize=1,color=Track_pred_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted tracks clusters'

trace+= scatter_points(Michel_coords_pred,markersize=1,color=Michel_pred_clusters,cmin=0,cmax=20,colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted Michel clusters'

fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))

iplot(fig)


ablation_radius = 15


from scipy.spatial.distance import cdist

def find_Michel_candidates(Michel_pred_clusters,
                           Track_pred_clusters,
                          Michel_coords_pred,
                          Track_coords_pred):
    
    Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])
    
    Michel_is_edge, Michel_is_attached = [], []

    for Michel_id in Michel_pred_clusters_id:
        current_index = (Michel_pred_clusters == Michel_id)
        distances = cdist(Michel_coords_pred[current_index], Track_coords_pred[Track_pred_clusters > -1])
        is_attached = np.min(distances) < eps
        is_edge = False
        if is_attached:
            Michel_min, Track_min = np.unravel_index(np.argmin(distances), distances.shape)
            Track_id = Track_pred_clusters[Track_pred_clusters > -1][Track_min]
            Track_min_coords = Track_coords_pred[Track_pred_clusters > -1][Track_min]
            Track_cluster_coords = Track_coords_pred[Track_pred_clusters == Track_id]

            ablated_cluster = Track_cluster_coords[np.linalg.norm(Track_cluster_coords-Track_min_coords, axis=1)> ablation_radius]
            if ablated_cluster.shape[0] > 0:
                new_cluster = DBSCAN(eps=eps, min_samples=5).fit(ablated_cluster).labels_
                is_edge = len(np.unique(new_cluster[new_cluster>-1])) == 1
            else:
                is_edge = True

        Michel_is_edge.append(is_edge)
        Michel_is_attached.append(is_attached)
    return np.array(Michel_is_edge), np.array(Michel_is_attached)


is_edge, is_attached = find_Michel_candidates(Michel_pred_clusters,
                                            Track_pred_clusters,
                                            Michel_coords_pred,
                                            Track_coords_pred)
is_edge, is_attached

(array([ True]), array([ True]))


def compute_metrics(input_data,
                   Michel_true_clusters,
                   Michel_pred_clusters,
                   Michel_coords,
                   Michel_coords_pred,
                   candidates):
    
    Michel_true_clusters_id = np.unique(Michel_true_clusters[Michel_true_clusters > -1])
    
    metrics = {
        "michel_true_num_pix": [],
        "michel_pred_num_pix": [],
        "michel_true_sum_pix": [],
        "michel_pred_sum_pix": [],
        "overlap_num" : [],
        "overlap_sum": []
    }
    for Michel_id in Michel_true_clusters_id:
        current_index = Michel_true_clusters == Michel_id
        distances = cdist(Michel_coords_pred[candidates], Michel_coords[current_index])
        if distances.shape[0] == 0 or distances.shape[0] == 0:
            print(distances.shape)
        closest_clusters = Michel_pred_clusters[candidates][np.argmin(distances, axis=0)]
        # FIXME what if closest voxel is labelled -1 but legit Michel?
        closest_clusters_matching = closest_clusters[(closest_clusters > -1) & (np.min(distances, axis=0) < eps)]
        if len(closest_clusters_matching) == 0:
            continue
        # Index of Michel predicted cluster that overlaps the most
        closest_pred_id = np.bincount(closest_clusters_matching).argmax()
        if closest_pred_id < 0:
            continue
        closest_Michel_pred = Michel_coords_pred[Michel_pred_clusters == closest_pred_id]

        # Compute overlap
        overlap_num, overlap_sum = 0., 0.
        for v in input_data[segment_label == Michel_label][current_index]:
            count = int(np.any(np.all(v[1:4] == closest_Michel_pred, axis=1)))
            overlap_num += count
            if count > 0:
                overlap_sum += v[4]
        metrics["overlap_num"].append(overlap_num)
        metrics["overlap_sum"].append(overlap_sum)

        # Other metrics
        metrics["michel_pred_num_pix"].append(np.count_nonzero(Michel_pred_clusters == closest_pred_id))
        metrics["michel_true_num_pix"].append(np.count_nonzero(current_index))
        metrics["michel_pred_sum_pix"].append(input_data[segment_pred == Michel_label][Michel_pred_clusters == closest_pred_id, 4].sum())
        metrics["michel_true_sum_pix"].append(input_data[segment_label == Michel_label][current_index, 4].sum())
        
        return pd.DataFrame(metrics)


candidates = np.isin(Michel_pred_clusters, Michel_pred_clusters_id[is_edge & is_attached])

metrics = compute_metrics(input_data,
                   Michel_true_clusters,
                   Michel_pred_clusters,
                   Michel_coords,
                   Michel_coords_pred,
                   candidates)
metrics


#%%capture

metrics = None
missed = 0
for idx in tqdm(range(100)):
    data, output = hs.trainer.forward(hs.data_io_iter)
    
    for entry in range(batch_size):
        clust_label = data['cluster_label'][entry]
        input_data = data['input_data'][entry]
        segment_label = data['segment_label'][entry][:, -1]

        segment_pred = output['segmentation'][entry].argmax(axis=1)

        Track_coords = input_data[segment_label == Track_label][:, 1:4]
        Michel_coords = input_data[segment_label == Michel_label][:, 1:4]
        if Michel_coords.shape[0] == 0:
            #print(idx, entry, "No Michel voxels")
            continue
        
        Track_coords_pred = input_data[segment_pred == Track_label][:, 1:4]
        Michel_coords_pred = input_data[segment_pred == Michel_label][:, 1:4]
        if Michel_coords_pred.shape[0] == 0:
            missed += 1
            #print(idx, entry, "No Michel predictions")
            continue

        Michel_true_clusters = clust_label[clust_label[:, -1] == Michel_label][:, -3].astype(np.int64)
        Track_pred_clusters = DBSCAN(eps=eps, min_samples=5).fit(Track_coords_pred).labels_
        Michel_pred_clusters = DBSCAN(eps=2*eps, min_samples=5).fit(Michel_coords_pred).labels_
        Michel_pred_clusters_id = np.unique(Michel_pred_clusters[Michel_pred_clusters > -1])
        
        is_edge, is_attached = find_Michel_candidates(Michel_pred_clusters,
                                                    Track_pred_clusters,
                                                    Michel_coords_pred,
                                                    Track_coords_pred)
        
        candidates = np.isin(Michel_pred_clusters, Michel_pred_clusters_id[is_edge & is_attached])
        if np.count_nonzero(candidates) == 0:
            #print(idx, entry, "No candidate found")
            continue
            
        result = compute_metrics(input_data,
                           Michel_true_clusters,
                           Michel_pred_clusters,
                           Michel_coords,
                           Michel_coords_pred,
                           candidates)

        if metrics is None:
            metrics = result
        else:
            metrics = pd.concat([metrics, result])


metrics


r = [0, 200]
b = 20
plt.hist(metrics['michel_true_num_pix'], range = r, bins = b, histtype='step', label="True Michel (primary)")
plt.hist(metrics['michel_pred_num_pix'], range = r, bins = b, histtype='step', label="Candidate Michel (primary)")
plt.xlabel('Voxel count')
plt.ylabel('Michel candidates')
plt.legend()

<matplotlib.legend.Legend at 0x7f4d22dd0e80>

	michel_true_num_pix	michel_pred_num_pix	michel_true_sum_pix	michel_pred_sum_pix	overlap_num	overlap_sum
0	52	53	30.987504	31.475994	52.0	30.987504
0	46	51	28.517298	25.406504	45.0	21.689628
0	78	69	39.940113	34.143687	63.0	32.165350
0	37	37	22.231038	17.427203	36.0	17.340804
0	60	61	35.605816	28.329325	57.0	27.592317
...	...	...	...	...	...	...
0	139	78	75.077820	46.738332	78.0	46.738332
0	67	57	31.526055	24.710911	57.0	24.710911
0	122	94	62.243649	46.339559	94.0	46.339559
0	65	67	28.473585	32.190850	65.0	28.473585
0	37	37	20.567448	20.567448	37.0	20.567448

Michel electrons¶

First things first: imports¶

Running the full chain on a small sample¶

Michel candidates selection¶

Selecting Michel and muon voxels via semantic predictions¶

Step 1: First we identify the coordinates of true and predicted Michel/Tracks voxels.¶

Step 2: Compute true and predicted clusters using DBSCAN¶

Step 3: Loop over predicted Michels and filter the ones which are attached to the edge of a Track cluster.¶

Step 4: Now is time to match true/predicted Michel clusters and compute some metrics !¶

Metrics with higher statistics¶

Towards Michel electron spectrum¶