Skip to content
Snippets Groups Projects
Commit 54a3c7cf authored by Mason Housenga's avatar Mason Housenga
Browse files

Merge branch 'rootToNumpy' into 'main'

merge ml start, read TTree and export to numpy into main

See merge request !4
parents 85d27a7f 6117f255
No related branches found
No related tags found
1 merge request!4merge ml start, read TTree and export to numpy into main
import ROOT
import numpy as np
from json import dumps
FILE_GLOB = "../data/zdcTopoAnalysis_1N.root"
def main():
"""
this script reads a ROOT file with a TTree with RDataFrame and numpy-fies some of its data
based on: https://root.cern/doc/master/tmva101__Training_8py.html
and https://root.cern/doc/master/df026__AsNumpyArrays_8py.html
see also (RDataFrame): https://root.cern/doc/master/classROOT_1_1RDataFrame.html
this is also helpful (cppyy, Python-C++ bindings used by PyROOT): https://cppyy.readthedocs.io/en/latest/stl.html
the training will be done in Python using Pytorch, TensorFlow, etc
once we have a model, we will want to see what it's doing by making plots
this could be done with matplotlib.pyplot, but if we will be showing plots to the ZDC group,
we'll need to have plots made with ROOT
we can do this with PyROOT, but I think it would be ideal to load the (trained) model in C++
and then analyze the data as we've been doing with our scripts since eventually this is what
we'd like to do with real data
models can be exported in ONNX (Open Neural Network eXchange) format, e.g., https://pytorch.org/docs/stable/onnx.html
then, we can load the model in Python or C++ with ROOT tools:
https://indico.cern.ch/event/1176076/contributions/4939648/attachments/2474114/4245117/SOFIE@ICHEP.pdf
it may or may not be worth it to extract the data we need (the TTree contains much more than that)
and save to a file (.npy, pytables, h5py), depending on the performance of reading directly from TTree
"""
dataframe = ROOT.RDataFrame("zdcTree", FILE_GLOB)
# print all columns and their types
columns = [str(col) for col in dataframe.GetColumnNames()]
columns_and_types = {col: dataframe.GetColumnType(col) for col in columns}
print("all branches and types:")
print(dumps(columns_and_types, indent=2))
# zdc_ZdcModuleTruthTotal is an option, but it includes "invisible" and "escaped" energy,
# which can't be seen in our detectors, so we'll instead sum the "EM" and "non EM" energies per module
# unlike in C++, we can't pass a callable to Define(), but we can pass a string
# like this, which I guess will evaluate to ROOT::VecOps::operator+()
dataframe = dataframe.Define(
"zdc_ZdcModuleTruthEMNonEM", "zdc_ZdcModuleTruthEM + zdc_ZdcModuleTruthNonEM"
)
# now get zdc_ZdcModuleTruthEMNonEM branch into a numpy array
numpy_data = dataframe.AsNumpy(columns=["zdc_ZdcModuleTruthEMNonEM"])
zdc_ZdcModuleTruthEMNonEM_halfNumpy = numpy_data["zdc_ZdcModuleTruthEMNonEM"]
print("half numpy-fied zdc_ZdcModuleTruthEMNonEM:")
print(zdc_ZdcModuleTruthEMNonEM_halfNumpy.shape)
print(zdc_ZdcModuleTruthEMNonEM_halfNumpy)
print("full numpy-fied zdc_ZdcModuleTruthEMNonEM:")
zdc_ZdcModuleTruthEMNonEM_fullNumpy = np.stack(zdc_ZdcModuleTruthEMNonEM_halfNumpy)
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy.shape)
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy)
# split the data into the different sides
# zdc_ZdcModuleTruthEMNonEM is a vector with length 14; the two sides are concatenated
# the last entry in each side is unused
# for one side, the order is ["EM", "HAD1" ,"HAD2" ,"HAD3" ,"RPD" ,"BRAN", (unused)]
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC = zdc_ZdcModuleTruthEMNonEM_fullNumpy[:, :6]
print("side C numpy-fied zdc_ZdcModuleTruthEMNonEM:")
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.shape)
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC)
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA = zdc_ZdcModuleTruthEMNonEM_fullNumpy[:, 7:13]
print("side A numpy-fied zdc_ZdcModuleTruthEMNonEM:")
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.shape)
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment