Skip to content
Snippets Groups Projects
Commit 735ceab5 authored by r-sowers's avatar r-sowers
Browse files

updated error analyses notebooks

parent 3dbe5ada
No related branches found
No related tags found
No related merge requests found
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id: tags:
<img src="logo.png" alt="University of Illinois" style="width: 200px;"/>
### Seasonal Error Analysis ###
by: Richard Sowers
* <r-sowers@illinois.edu>
* <https://publish.illinois.edu/r-sowers/>
Copyright 2019 University of Illinois Board of Trustees. All Rights Reserved. Licensed under the MIT license
### Explanation###
This code plots error analysis for Manhattan Traffic Data
%% Cell type:markdown id: tags:
imports
%% Cell type:code id: tags:
``` python
import pandas
import numpy
import matplotlib.pylab as plt
%matplotlib inline
import scipy.interpolate
import scipy.optimize
```
%% Cell type:code id: tags:
``` python
def saver(fname):
plt.savefig(fname+".png",bbox_inches="tight")
params={
#"font.size":20,
"figure.titlesize":"large",
"lines.linewidth":3,
#"legend.fontsize":"small",
#"xtick.labelsize":"x-small",
#"ytick.labelsize":"x-small",
#"axes.labelsize": 'small',
}
plt.rcParams.update(params)
```
%% Cell type:markdown id: tags:
constants
%% Cell type:code id: tags:
``` python
colorsequence=['b', 'g', 'r', 'c', 'm', 'y', 'k']
stylesequence=["-",":"]
```
%% Cell type:code id: tags:
``` python
class processor:
def __init__(self,df):
self.rank_vals=pandas.unique(df.index.get_level_values("rank"))
self.df=df.dropna(axis="index")
def by_penalty(self,rank):
temp=self.df.groupby(by="rank").get_group(rank)
return temp.reset_index(level="rank",drop=True)
def sparsity_by_penalty(self,rank):
temp=self.by_penalty(rank)["sparsity"]
return temp
def error_by_sparsity(self,rank):
temp=self.by_penalty(rank)
temp=temp.set_index(keys="sparsity",drop=True)["error"]
temp.sort_index(axis="index",inplace=True)
return temp
```
%% Cell type:code id: tags:
``` python
class monotone_invert:
def __init__(self,df,sign="increasing"):
self.df=df
self.tvals=numpy.array(self.df.index)
self.yvals=numpy.array(self.df.to_numpy())
if len(self.df)<2:
return None
self.N=len(self.df)
self.L=numpy.tril(numpy.ones(shape=(self.N,self.N)),k=0)
self.ctr=1
x0=[numpy.mean(self.yvals)/self.N]*self.N
def objective(d):
error=self.yvals-self.L.dot(d)
return 0.5*error.dot(error)
def jacobian(self,d): #not used
error=self.yvals-self.L.dot(d)
return self.L.T.dot(error)
def hessian(self,d): # not used
return self.L.T*dot(self.L)
print(self.N)
pm=1
if (sign=="decreasing"):
pm=-1
constraints={"type":"ineq","fun":lambda x:pm*x}
res=scipy.optimize.minimize(objective,x0=x0,method="COBYLA",constraints=constraints)
print(res)
d_best=res.x
self.y_approx_vals=self.L.dot(d_best)
print("y_approx",self.y_approx_vals)
self.linapprox=scipy.interpolate.interp1d(self.tvals,self.y_approx_vals,copy=True,bounds_error=True)
def inc_approx(self,t):
if not (min(self.tvals)<=t<=max(self.tvals)):
return numpy.nan
return self.linapprox(t).item()
tval=scipy.optimize.brentq(lambda x:self.linapprox(x)-yval,min(self.tvals),max(self.tvals))
return tval
```
%% Cell type:code id: tags:
``` python
#fname="LevelCurveData2"
fname="fall_values_COMBINED"
```
%% Cell type:markdown id: tags:
read data
%% Cell type:code id: tags:
``` python
data_raw=pandas.read_csv(fname+".csv",na_values=['nan',' nan'])
print(data_raw.head())
```
%% Output
rank beta error_year_preAxing error_year_postAxing \
0 40.0 0.0 26.638031 41.724601
1 NaN NaN NaN NaN
2 40.0 1000.0 26.958008 41.843305
3 NaN NaN NaN NaN
4 40.0 2000.0 26.952959 41.781512
error_fall_preAxing error_fall_postAxing sparsity_preAxing \
0 27.410104 42.350878 0.675348
1 NaN NaN NaN
2 27.733881 42.377424 0.694921
3 NaN NaN NaN
4 27.722486 42.350216 0.716475
sparsity_postAxing
0 0.838865
1 NaN
2 0.851688
3 NaN
4 0.865042
%% Cell type:code id: tags:
``` python
data=data_raw.copy()
data.columns=[colname.strip() for colname in data.columns]
data=data.dropna(axis='index',subset=['rank','beta'])
data["rank"]=data["rank"].astype('int')
data=data.set_index(keys=["rank","beta"])
print(data.head())
print(data.columns)
```
%% Output
error_year_preAxing error_year_postAxing error_fall_preAxing \
rank beta
40 0.0 26.638031 41.724601 27.410104
1000.0 26.958008 41.843305 27.733881
2000.0 26.952959 41.781512 27.722486
3000.0 26.990673 41.414115 27.758684
4000.0 27.039437 41.120108 27.807777
error_fall_postAxing sparsity_preAxing sparsity_postAxing
rank beta
40 0.0 42.350878 0.675348 0.838865
1000.0 42.377424 0.694921 0.851688
2000.0 42.350216 0.716475 0.865042
3000.0 42.045900 0.730274 0.872522
4000.0 41.763680 0.740969 0.878487
Index(['error_year_preAxing', 'error_year_postAxing', 'error_fall_preAxing',
'error_fall_postAxing', 'sparsity_preAxing', 'sparsity_postAxing'],
dtype='object')
%% Cell type:code id: tags:
``` python
dataDict={}
```
%% Cell type:code id: tags:
``` python
data_year=data[["error_year_preAxing","sparsity_preAxing"]]
data_year=data_year.rename(mapper={"error_year_preAxing":"error","sparsity_preAxing":"sparsity"},axis="columns")
data_year.head()
dataDict["year"]=data_year
```
%% Cell type:code id: tags:
``` python
data_fall=data[["error_fall_preAxing","sparsity_preAxing"]]
data_fall=data_fall.rename(mapper={"error_fall_preAxing":"error","sparsity_preAxing":"sparsity"},axis="columns")
data_fall.head()
dataDict["fall"]=data_fall
```
%% Cell type:code id: tags:
``` python
pDict={season:processor(dataDict[season]) for season in dataDict.keys()}
```
%% Cell type:code id: tags:
``` python
plt.figure()
for n,rank in enumerate(pDict["year"].rank_vals):
df=pDict["year"].sparsity_by_penalty(rank)
plt.plot(df.index,df.values,label="N={:}".format(rank),color=colorsequence[n])
plt.legend(bbox_to_anchor=(1.1, 1))
plt.xlabel("beta")
plt.ylabel("sparsity")
plt.title("sparsity as a function of penalty",fontsize="xx-large")
saver("sparsity_by_penalty_seasonal")
plt.show()
plt.close()
```
%% Output
%% Cell type:code id: tags:
``` python
plt.figure()
for n,season in enumerate(dataDict.keys()):
for nn,rank in enumerate(pDict[season].rank_vals):
df=pDict[season].error_by_sparsity(rank)
plt.plot(df.index,df.values,label=season+"; N={:}".format(rank),color=colorsequence[nn],linestyle=stylesequence[n])
plt.legend(bbox_to_anchor=(1.1, 1))
plt.xlabel("sparsity")
plt.ylabel("error")
plt.title("error as a function of sparsity",fontsize="xx-large")
saver("error_by_sparsity_seasonal")
plt.show()
plt.close()
```
%% Output
%% Cell type:code id: tags:
``` python
fDict={}
for season in dataDict.keys():
fDict[season]={rank:monotone_invert(pDict[season].error_by_sparsity(rank)) for rank in pDict[season].rank_vals}
```
%% Output
9
fun: 7.132430141822445e-06
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 894
status: 1
success: True
x: array([2.66372674e+01, 3.18616516e-01, 4.40280354e-21, 3.40886982e-02,
4.96443675e-02, 6.78453293e-02, 9.29968697e-02, 1.12435298e-01,
1.58159535e-01])
y_approx [26.63726738 26.9558839 26.9558839 26.9899726 27.03961696 27.10746229
27.20045916 27.31289446 27.471054 ]
8
fun: 0.0017795021108989624
maxcv: 9.899114871240379e-20
message: 'Optimization terminated successfully.'
nfev: 583
status: 1
success: True
x: array([ 2.53616629e+01, 2.57533934e-01, -6.93976136e-20, -9.89911487e-20,
1.07118695e-01, 1.05529569e-01, 1.76549560e-01, 1.23894889e-01])
y_approx [25.36166289 25.61919682 25.61919682 25.61919682 25.72631552 25.83184509
26.00839465 26.13228954]
5
fun: 0.0008590621772018529
maxcv: 3.3881317890170426e-21
message: 'Optimization terminated successfully.'
nfev: 376
status: 1
success: True
x: array([ 2.43728220e+01, -3.38813179e-21, 1.99339048e-01, 1.51237338e-01,
1.79932175e-01])
y_approx [24.37282196 24.37282196 24.57216101 24.72339835 24.90333052]
4
fun: 0.001068647311509867
maxcv: 6.776263578034251e-21
message: 'Optimization terminated successfully.'
nfev: 167
status: 1
success: True
x: array([ 2.35373237e+01, 1.86633477e-01, -6.77626358e-21, 2.65040154e-01])
y_approx [23.53732367 23.72395715 23.72395715 23.9889973 ]
2
fun: 6.7002469141090226e-09
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 79
status: 1
success: True
x: array([22.63634145, 0.14971901])
y_approx [22.63634145 22.78606045]
9
fun: 3.510115132378873e-05
maxcv: 1.4546810998747153e-20
message: 'Optimization terminated successfully.'
nfev: 811
status: 1
success: True
x: array([ 2.74085714e+01, 3.20392504e-01, -1.45468110e-20, 2.92111455e-02,
4.93564043e-02, 7.08905340e-02, 9.42394896e-02, 1.16246374e-01,
1.61321703e-01])
y_approx [27.40857144 27.72896394 27.72896394 27.75817509 27.80753149 27.87842202
27.97266151 28.08890789 28.25022959]
8
fun: 0.0016831224316709828
maxcv: 1.3135861272349847e-20
message: 'Optimization terminated successfully.'
nfev: 656
status: 1
success: True
x: array([ 2.61398064e+01, 2.78284812e-01, -1.24419719e-20, -1.31358613e-20,
1.16613513e-01, 1.00491128e-01, 1.70526604e-01, 1.18523312e-01])
y_approx [26.1398064 26.41809121 26.41809121 26.41809121 26.53470472 26.63519585
26.80572245 26.92424577]
5
fun: 0.0019902916617093335
maxcv: -0.0
message: 'Optimization terminated successfully.'
nfev: 330
status: 1
success: True
x: array([25.02340161, 0. , 0.19259593, 0.16557375, 0.18968663])
y_approx [25.02340161 25.02340161 25.21599754 25.3815713 25.57125793]
4
fun: 0.0013820258944647282
maxcv: 4.68583827096163e-22
message: 'Optimization terminated successfully.'
nfev: 262
status: 1
success: True
x: array([ 2.42317132e+01, 1.80481528e-01, -4.68583827e-22, 2.66855950e-01])
y_approx [24.23171316 24.41219469 24.41219469 24.67905064]
2
fun: 9.01398407261569e-09
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 59
status: 1
success: True
x: array([23.27293882, 0.11570728])
y_approx [23.27293882 23.3886461 ]
%% Cell type:code id: tags:
``` python
sparsityvals=numpy.linspace(start=0.68,stop=0.83,num=5)
print(sparsityvals)
```
%% Output
[0.68 0.7175 0.755 0.7925 0.83 ]
%% Cell type:code id: tags:
``` python
plt.figure()
for n,season in enumerate(dataDict.keys()):
for nn,sparsity in enumerate(sparsityvals):
errvals=[fDict[season][rank].inc_approx(sparsity) for rank in pDict[season].rank_vals]
plt.plot(pDict[season].rank_vals,errvals,linewidth=5,label=season+"; sparsity={:.2f}".format(sparsity),color=colorsequence[nn],
linestyle=stylesequence[n])
plt.legend(bbox_to_anchor=(1.1, 1))
plt.xlabel("rank")
plt.ylabel("error")
plt.title("error as a function of rank",fontsize="xx-large")
saver("error_by_rank_seasonal")
plt.show()
plt.close()
```
%% Output
%% Cell type:code id: tags:
``` python
```
ErrorAnalysis/compare.png

45.1 KiB

ErrorAnalysis/error_by_penalty_fall.png

28.4 KiB

ErrorAnalysis/error_by_rank_seasonal.png

39.7 KiB

ErrorAnalysis/error_by_sparsity_seasonal.png

26.7 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment