Skip to content
Snippets Groups Projects
Commit bc42e2cf authored by akshayv4's avatar akshayv4
Browse files

Update 2 files

- /ml/rootToNumpy.py
- /ml/basic_tree_based_models.py
parent 54a3c7cf
Branches initial-model
No related tags found
No related merge requests found
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
import matplotlib.pyplot as plot
####################################################################################
#
# MAKE SURE THAT rootToNumpy.py RUNS
# DO NOT RUN THIS FILE UNTIL BOTH .npy
# FILES HAVE BEEN CREATED BY rootToNumpy.py
#
####################################################################################
def main():
zdc_sideA_withRPD = np.load("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy")
zdc_sideC_withRPD = np.load("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy")
zdc_sideA_noRPD = np.load("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy")
zdc_sideC_noRPD = np.load("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy")
zdc_sideA_noRPD = np.delete(zdc_sideA_noRPD, 4, 1)
zdc_sideC_noRPD = np.delete(zdc_sideC_noRPD, 4, 1)
# print(len(zdc_sideA_noRPD))
# rf_cross_val(zdc_sideA_withRPD[:10000, :5], zdc_sideA_withRPD[:10000, 5], 4)
# rf_cross_val(zdc_sideC_withRPD[:10000, :5], zdc_sideC_withRPD[:10000, 5], 4)
# rf_cross_val(zdc_sideA_noRPD[:10000, :4], zdc_sideA_noRPD[:10000, 4], 4)
# rf_cross_val(zdc_sideC_noRPD[:10000, :4], zdc_sideC_noRPD[:10000, 4], 4)
# xg_cross_val(zdc_sideA_withRPD[:10000, :5], zdc_sideA_withRPD[:10000, 5], 4)
# xg_cross_val(zdc_sideC_withRPD[:10000, :5], zdc_sideC_withRPD[:10000, 5], 4)
# xg_cross_val(zdc_sideA_noRPD[:10000, :4], zdc_sideA_noRPD[:10000, 4], 4)
# xg_cross_val(zdc_sideC_noRPD[:10000, :4], zdc_sideC_noRPD[:10000, 4], 4)
tree_depth = 4
training_ratio = 0.05
xg_train_and_test(zdc_sideA_withRPD[:, :5], zdc_sideA_withRPD[:, 5], tree_depth, training_ratio, "Side A (WITH RPD)")
xg_train_and_test(zdc_sideC_withRPD[:, :5], zdc_sideC_withRPD[:, 5], tree_depth, training_ratio, "Side C (WITH RPD)")
xg_train_and_test(zdc_sideA_noRPD[:, :4], zdc_sideA_noRPD[:, 4], tree_depth, training_ratio, "Side A (NO RPD)")
xg_train_and_test(zdc_sideC_noRPD[:, :4], zdc_sideC_noRPD[:, 4], tree_depth, training_ratio, "Side C (NO RPD)")
def rf_cross_val(X, y, tree_depth):
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8)
regr = RandomForestRegressor(max_depth = tree_depth, random_state = 2)
regr.fit(X, y)
print(cross_val_score(regr, X, y, cv = 5))
def xg_cross_val(X, y, tree_depth):
regr = HistGradientBoostingRegressor(max_depth = tree_depth)
regr.fit(X, y)
print(cross_val_score(regr, X, y, cv = 5))
def xg_train_and_test(X, y, tree_depth, training_ratio, name):
print(name)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = training_ratio)
model = HistGradientBoostingRegressor(max_depth = tree_depth)
model.fit(X_train, y_train)
# Evaluate Model Against Test Set
y_pred = model.predict(X_test)
y_test_mean = sum(v for v in y_test) / len(y_test)
######## R^2 value = 1 - SSE/SST
accuracy = 1 - sum( (y_test[i] - y_pred[i])**2 for i in range(len(y_test)) ) / sum( (y_test[i] - y_test_mean)**2 for i in range(len(y_test)) )
print("Test accuracy:", accuracy)
# Cross-validation
scores = cross_val_score(model, X, y, cv=5)
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", scores.mean())
print()
if __name__ == "__main__":
main()
\ No newline at end of file
...@@ -2,8 +2,9 @@ import ROOT ...@@ -2,8 +2,9 @@ import ROOT
import numpy as np import numpy as np
from json import dumps from json import dumps
FILE_GLOB = "../data/zdcTopoAnalysis_1N.root" FILE_GLOB = "./data/zdcTopoAnalysis_1N.root"
# To run, call python3 ml/rootToNumpy.py in command prompt from top of ml4zdc repo
def main(): def main():
""" """
...@@ -29,7 +30,7 @@ def main(): ...@@ -29,7 +30,7 @@ def main():
""" """
dataframe = ROOT.RDataFrame("zdcTree", FILE_GLOB) dataframe = ROOT.RDataFrame("zdcTree", FILE_GLOB)
print(dataframe)
# print all columns and their types # print all columns and their types
columns = [str(col) for col in dataframe.GetColumnNames()] columns = [str(col) for col in dataframe.GetColumnNames()]
columns_and_types = {col: dataframe.GetColumnType(col) for col in columns} columns_and_types = {col: dataframe.GetColumnType(col) for col in columns}
...@@ -70,6 +71,10 @@ def main(): ...@@ -70,6 +71,10 @@ def main():
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.shape) print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.shape)
print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA) print(zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA)
np.save("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy", zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC)
np.save("zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy", zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA)
return
if __name__ == "__main__": if __name__ == "__main__":
main() main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment