Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
ml4zdc
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Mason Housenga
ml4zdc
Commits
bc42e2cf
Commit
bc42e2cf
authored
9 months ago
by
akshayv4
Browse files
Options
Downloads
Patches
Plain Diff
Update 2 files
- /ml/rootToNumpy.py - /ml/basic_tree_based_models.py
parent
54a3c7cf
Branches
initial-model
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
ml/basic_tree_based_models.py
+81
-0
81 additions, 0 deletions
ml/basic_tree_based_models.py
ml/rootToNumpy.py
+7
-2
7 additions, 2 deletions
ml/rootToNumpy.py
with
88 additions
and
2 deletions
ml/basic_tree_based_models.py
0 → 100644
+
81
−
0
View file @
bc42e2cf
from
sklearn.ensemble
import
RandomForestRegressor
,
HistGradientBoostingRegressor
from
sklearn.model_selection
import
train_test_split
,
cross_val_score
import
numpy
as
np
import
matplotlib.pyplot
as
plot
####################################################################################
#
# MAKE SURE THAT rootToNumpy.py RUNS
# DO NOT RUN THIS FILE UNTIL BOTH .npy
# FILES HAVE BEEN CREATED BY rootToNumpy.py
#
####################################################################################
def
main
():
zdc_sideA_withRPD
=
np
.
load
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy
"
)
zdc_sideC_withRPD
=
np
.
load
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy
"
)
zdc_sideA_noRPD
=
np
.
load
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy
"
)
zdc_sideC_noRPD
=
np
.
load
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy
"
)
zdc_sideA_noRPD
=
np
.
delete
(
zdc_sideA_noRPD
,
4
,
1
)
zdc_sideC_noRPD
=
np
.
delete
(
zdc_sideC_noRPD
,
4
,
1
)
# print(len(zdc_sideA_noRPD))
# rf_cross_val(zdc_sideA_withRPD[:10000, :5], zdc_sideA_withRPD[:10000, 5], 4)
# rf_cross_val(zdc_sideC_withRPD[:10000, :5], zdc_sideC_withRPD[:10000, 5], 4)
# rf_cross_val(zdc_sideA_noRPD[:10000, :4], zdc_sideA_noRPD[:10000, 4], 4)
# rf_cross_val(zdc_sideC_noRPD[:10000, :4], zdc_sideC_noRPD[:10000, 4], 4)
# xg_cross_val(zdc_sideA_withRPD[:10000, :5], zdc_sideA_withRPD[:10000, 5], 4)
# xg_cross_val(zdc_sideC_withRPD[:10000, :5], zdc_sideC_withRPD[:10000, 5], 4)
# xg_cross_val(zdc_sideA_noRPD[:10000, :4], zdc_sideA_noRPD[:10000, 4], 4)
# xg_cross_val(zdc_sideC_noRPD[:10000, :4], zdc_sideC_noRPD[:10000, 4], 4)
tree_depth
=
4
training_ratio
=
0.05
xg_train_and_test
(
zdc_sideA_withRPD
[:,
:
5
],
zdc_sideA_withRPD
[:,
5
],
tree_depth
,
training_ratio
,
"
Side A (WITH RPD)
"
)
xg_train_and_test
(
zdc_sideC_withRPD
[:,
:
5
],
zdc_sideC_withRPD
[:,
5
],
tree_depth
,
training_ratio
,
"
Side C (WITH RPD)
"
)
xg_train_and_test
(
zdc_sideA_noRPD
[:,
:
4
],
zdc_sideA_noRPD
[:,
4
],
tree_depth
,
training_ratio
,
"
Side A (NO RPD)
"
)
xg_train_and_test
(
zdc_sideC_noRPD
[:,
:
4
],
zdc_sideC_noRPD
[:,
4
],
tree_depth
,
training_ratio
,
"
Side C (NO RPD)
"
)
def
rf_cross_val
(
X
,
y
,
tree_depth
):
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8)
regr
=
RandomForestRegressor
(
max_depth
=
tree_depth
,
random_state
=
2
)
regr
.
fit
(
X
,
y
)
print
(
cross_val_score
(
regr
,
X
,
y
,
cv
=
5
))
def
xg_cross_val
(
X
,
y
,
tree_depth
):
regr
=
HistGradientBoostingRegressor
(
max_depth
=
tree_depth
)
regr
.
fit
(
X
,
y
)
print
(
cross_val_score
(
regr
,
X
,
y
,
cv
=
5
))
def
xg_train_and_test
(
X
,
y
,
tree_depth
,
training_ratio
,
name
):
print
(
name
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
train_size
=
training_ratio
)
model
=
HistGradientBoostingRegressor
(
max_depth
=
tree_depth
)
model
.
fit
(
X_train
,
y_train
)
# Evaluate Model Against Test Set
y_pred
=
model
.
predict
(
X_test
)
y_test_mean
=
sum
(
v
for
v
in
y_test
)
/
len
(
y_test
)
######## R^2 value = 1 - SSE/SST
accuracy
=
1
-
sum
(
(
y_test
[
i
]
-
y_pred
[
i
])
**
2
for
i
in
range
(
len
(
y_test
))
)
/
sum
(
(
y_test
[
i
]
-
y_test_mean
)
**
2
for
i
in
range
(
len
(
y_test
))
)
print
(
"
Test accuracy:
"
,
accuracy
)
# Cross-validation
scores
=
cross_val_score
(
model
,
X
,
y
,
cv
=
5
)
print
(
"
Cross-validation scores:
"
,
scores
)
print
(
"
Mean cross-validation score:
"
,
scores
.
mean
())
print
()
if
__name__
==
"
__main__
"
:
main
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
ml/rootToNumpy.py
+
7
−
2
View file @
bc42e2cf
...
@@ -2,8 +2,9 @@ import ROOT
...
@@ -2,8 +2,9 @@ import ROOT
import
numpy
as
np
import
numpy
as
np
from
json
import
dumps
from
json
import
dumps
FILE_GLOB
=
"
.
.
/data/zdcTopoAnalysis_1N.root
"
FILE_GLOB
=
"
./data/zdcTopoAnalysis_1N.root
"
# To run, call python3 ml/rootToNumpy.py in command prompt from top of ml4zdc repo
def
main
():
def
main
():
"""
"""
...
@@ -29,7 +30,7 @@ def main():
...
@@ -29,7 +30,7 @@ def main():
"""
"""
dataframe
=
ROOT
.
RDataFrame
(
"
zdcTree
"
,
FILE_GLOB
)
dataframe
=
ROOT
.
RDataFrame
(
"
zdcTree
"
,
FILE_GLOB
)
print
(
dataframe
)
# print all columns and their types
# print all columns and their types
columns
=
[
str
(
col
)
for
col
in
dataframe
.
GetColumnNames
()]
columns
=
[
str
(
col
)
for
col
in
dataframe
.
GetColumnNames
()]
columns_and_types
=
{
col
:
dataframe
.
GetColumnType
(
col
)
for
col
in
columns
}
columns_and_types
=
{
col
:
dataframe
.
GetColumnType
(
col
)
for
col
in
columns
}
...
@@ -70,6 +71,10 @@ def main():
...
@@ -70,6 +71,10 @@ def main():
print
(
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA
.
shape
)
print
(
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA
.
shape
)
print
(
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA
)
print
(
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA
)
np
.
save
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC.npy
"
,
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideC
)
np
.
save
(
"
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA.npy
"
,
zdc_ZdcModuleTruthEMNonEM_fullNumpy_sideA
)
return
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
main
()
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment