Skip to content
Snippets Groups Projects
Commit 36b7f950 authored by r-sowers's avatar r-sowers
Browse files

Combined Intervals files into one json file, Intervals.py. Removed algorithm.py

parent 4ba2f4ff
No related branches found
No related tags found
No related merge requests found
Peter Maneykowski did some early versions of the algorithm
Peter Maneykowski (ISE undergraduate) did some early versions of the algorithm.
Nitin Srivastava (M.S. in ISE) wrote a thesis connected to this work.
\ No newline at end of file
File moved
%% Cell type:markdown id: tags:
# Code for **Task Disambiguation in Hand-Picked Agriculture**
Project Director: Richard Sowers <r-sowers@illinois.edu>
Copyright 2018 University of Illinois Board of Trustees. All Rights Reserved.
Licensed under the MIT license
%% Cell type:markdown id: tags:
imports
%% Cell type:code id: tags:
``` python
import numpy
import pandas
import pickle
import itertools
import datetime
#%matplotlib notebook
%matplotlib inline
import pytz
import matplotlib.pyplot as plotter
#import matplotlib.mlab as mlab
#import statsmodels.api as sm
imagesuffix=".png"
N_finer=10
region=pytz.timezone("America/Los_Angeles")
fname="data.csv"
```
%% Cell type:markdown id: tags:
getData function
data should be in .csv file with columns labelled "IMEI","Latitude","locationTimestamp"
* locationTimestamp should be seconds since epoch
%% Cell type:code id: tags:
``` python
def ts_to_time(ts):
return region.normalize(region.localize(datetime.datetime.fromtimestamp(ts)))
```
%% Cell type:code id: tags:
``` python
#############code to get data
class getData:
def __init__(self,fname):
#sheetname="outdata_with_time"
#raw_data = pandas.read_excel("outdata_with_time.xlsx", sheetname="outdata_with_time", header=0)
self.data=pandas.read_csv(str(fname))
self.data.columns=["IMEI","Latitude","locationTimestamp"]
self.data = self.data.drop_duplicates()
self.data["IMEI"] = self.data["IMEI"].astype("str")
self.data["datetime"]=pandas.to_datetime(self.data["locationTimestamp"].apply(ts_to_time))
#print(data)
self.IMEISet=sorted(list(frozenset(self.data["IMEI"])))
self.data["date"]=pandas.to_datetime(self.data["datetime"].apply(lambda t:t.date()))
self.data.set_index(["IMEI","locationTimestamp","datetime","date"],append=True,drop=True,inplace=True)
self.dateSet=sorted(list(frozenset(self.data.index.get_level_values("date"))))
def get(self,IMEI,DATE):
flags=numpy.logical_and(self.data.index.get_level_values("date")==DATE,
self.data.index.get_level_values("IMEI")==IMEI)
reduced_data=self.data.loc[flags]
#temp=numpy.array(reduced_data).reshape([-1,len(outlist)])
#print("shape of data: ",temp.shape)
return reduced_data
gd=getData(fname)
print(sorted(gd.IMEISet))
print(gd.dateSet)
```
%% Output
['351554053682895', '353918057262822', '353918059182986', '869578020239930']
[Timestamp('2016-02-19 00:00:00'), Timestamp('2016-02-22 00:00:00')]
%% Cell type:code id: tags:
``` python
test_imei=gd.IMEISet[0]
test_date=gd.dateSet[0]
print(test_imei)
print(test_date)
```
%% Output
351554053682895
2016-02-19 00:00:00
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Box function is reference excursion shape
%% Cell type:code id: tags:
``` python
class Box:
def __init__(self, width=1,height=1,shift=0):
self.width=float(width)
self.height=float(height)
self.shift=float(shift)
if (self.width<0):
raise ValueError('negative width in LeftBox')
def refBox(self,x):
x=float(x)
#width=1,height=1,shift=0
return 1 if 0<=x<=1 else 0
def eval(self, x):
if self.width<=0:
return numpy.inf
return self.height * self.refBox((x-self.shift)/self.width)
def harvestFlag(self,x):
return 0<(x-self.shift)<self.width
def __le__(self,other):
if not isinstance(other, Box):
return NotImplemented
#return ((other.shift<=self.shift) and ((self.shift+self.width)<=(other.shift+other.width)))
return (other.shift<=self.shift<=(other.shift+other.width))
def __ge__(self,other):
return (other<=self)
myBox=Box()
print("B(0)=",myBox.eval(0))
xvals_b=numpy.linspace(-3,3,200)
yvals_b=numpy.array([myBox.eval(xx) for xx in xvals_b])
flags=numpy.array([myBox.harvestFlag(xx) for xx in xvals_b],dtype='bool')
plotter.figure()
plotter.plot(xvals_b,yvals_b)
plotter.plot(xvals_b[flags],yvals_b[flags],'ro',linestyle='--',linewidth=4)
plotter.ylim((-0.5,1.5))
plotter.show()
print(Box()<=Box(2,1,-5))
```
%% Output
B(0)= 1.0
False
%% Cell type:markdown id: tags:
code to implement [https://arxiv.org/pdf/1407.7508v1.pdf](https://arxiv.org/pdf/1407.7508v1.pdf)
%% Cell type:code id: tags:
``` python
class L0_EM:
def __init__(self,data,feature_info,vkap,tau=0.01):
#data=[[time_1,y_1],[time_2,y_2],....]
self.N_data=len(data)
self.times=numpy.array(data.index.get_level_values("locationTimestamp"))
self.y=numpy.matrix(data["Latitude"]).transpose()
N_finer=10
self.times_finer=numpy.linspace(min(self.times),max(self.times),N_finer*len(self.times))
self.feature_info=list(feature_info)
features=[[f.eval(t) for t in self.times] for f in self.feature_info]
#features=[[feature_1(time_1),feature_1(time_2)..],[feature_2(time_1)..]..]
self.N_features=len(features)
self.Feat_e_T=numpy.matrix(features+[numpy.ones(self.N_data)])
self.Feat_e=self.Feat_e_T.transpose()
dt=numpy.diff(self.times)
temp=(dt[1:]+dt[:-1])/2
D=numpy.concatenate(([dt[0]],temp,[dt[-1]]))
self.D=numpy.diag(D)
self.A=self.Feat_e_T.dot(self.D).dot(self.Feat_e)
self.b=self.Feat_e_T.dot(self.D).dot(self.y)
self.Id=numpy.diag([float(vkap)]*self.N_features+[0])
self.alpha_e=None
self.stopFlag=False;
self.tau=float(tau)
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.dalpha=None
def initialize(self):
#print("rank(A): ",numpy.linalg.matrix_rank(self.A))
#print("shape of A: ",self.A.shape)
#self.alpha_e=numpy.linalg.solve(self.A,self.b)
self.alpha_e=numpy.linalg.pinv(self.A).dot(self.b)
#print("initial alpha: ",self.alpha_e)
return(self.alpha_e)
def iterate(self,alpha_e=None):
#alpha_e is external, self.alpha_e is class variable
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
temp=numpy.ravel(alpha_e)**2
temp[self.N_features]=1
S=numpy.diag(temp)
new_alpha_e=numpy.linalg.pinv(S.dot(self.A)+self.Id).dot(S.dot(self.b))
denom=numpy.linalg.norm(numpy.ravel(self.alpha_e),1)
num=numpy.linalg.norm(numpy.ravel(new_alpha_e-self.alpha_e),1)
self.dalpha=num/denom
print("dalpha/alpha=",self.dalpha)
self.stopFlag=(num<self.tau*denom)
self.alpha_e=new_alpha_e
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[]
return(self.alpha_e)
def evaluate(self,alpha_e=None):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
return self.Feat_e.dot(alpha_e)
def evaluate_finer(self,alpha_e=None):
alpha_e=self.alpha_e if alpha_e is None else alpha_e
alpha_e=numpy.ravel(alpha_e)
constant=alpha_e[self.N_features]
temp=numpy.array([constant]*len(self.times_finer))
for n,f in enumerate(self.feature_info):
temp+=numpy.array([alpha_e[n]*f.eval(t) for t in self.times_finer])
return temp
def combine(self,a,b):
return (min(a[0],b[0]),max(a[1],b[1]))
def findfeatures(self,alpha_e=None,delta=0.01,combineFlag=True):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
alpha_e=numpy.ravel(alpha_e)
delta=0 if (delta is False) else float(delta) #feature threshold
self.feature_count=0
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[(f.shift,f.shift+f.width) for aa,f in zip(alpha_e,self.feature_info)]
#threshold out the small features
alpha_e=numpy.array([aa if abs(aa)>=delta else 0 for aa in alpha_e])
#combine features
if combineFlag:
for n in range(self.N_features-1,-1,-1):
int_n=self.intervals[n]
for nn in range(n-1,-1,-1):
int_nn=self.intervals[nn]
Flag=(alpha_e[n]!=0) and (alpha_e[nn]!=0)
#Flag = Flag and (numpy.sign(alpha_e[n])==numpy.sign(alpha_e[nn]))
Flag = Flag and (self.feature_info[nn]>=self.feature_info[n])
if (Flag):
alpha_e[nn]+=alpha_e[n]
alpha_e[n]=0
self.intervals[nn]=self.combine(int_n,int_nn)
for aa,f in zip(alpha_e,self.feature_info):
if abs(aa)==0:
continue
tempflags=numpy.array([f.harvestFlag(tt) for tt in self.times],dtype='bool')
self.flags.append(tempflags)
self.feature_times.append(f.shift)
self.feature_peaks.append(f.height*aa+alpha_e[self.N_features])
self.feature_count+=1
self.feature_times=numpy.array(self.feature_times)
self.feature_peaks=numpy.array(self.feature_peaks)
self.intervals=[ival for aa,ival in zip(alpha_e,self.intervals) if abs(aa)!=0]
return alpha_e
```
%% Cell type:markdown id: tags:
plot
%% Cell type:code id: tags:
``` python
def makeplot(thisEM,alpha_e=None,fname=None,startFlag=True):
alpha_e=thisEM.alpha_e if alpha_e is None else alpha_e
image_prefix="./images/fig"
image_suffix=".png"
temp=None
if fname is not None:
plotter.ioff()
temp=image_prefix+str(fname)+image_suffix
#print("temp: ",temp)
yvals_finer=thisEM.evaluate_finer(alpha_e)
T_min=min(thisEM.times)
T_max=max(thisEM.times)
y_min=min(numpy.ravel(thisEM.y))
fig=plotter.figure()
plotter.plot(thisEM.times_finer-T_min,yvals_finer-y_min,'g',linewidth=2)
plotter.plot(thisEM.times-T_min,thisEM.y-y_min,'ro',linestyle='--',linewidth=4)
#plotter.plot(myEM.times-T_min,myEM.evaluate(alpha_e)-y_min,'g',linewidth=2)
if startFlag:
plotter.plot(thisEM.feature_times-T_min,thisEM.feature_peaks-y_min,'bo',ms=10)
dy=numpy.ptp(thisEM.y)
plotter.xlim((0,T_max-T_min))
plotter.ylim((-0.25*dy,1.5*dy))
plotter.xlabel("Timestamp")
plotter.ylabel("Latitude")
for flaglist in thisEM.flags:
pass
#tempt=myEM.times[flaglist]-T_min
#tempy=myEM.y[flaglist]-y_min
#print(len(myEM.times))
#print(len(tempt))
#plotter.plot(tempt,tempy,'ko',linestyle='-',linewidth=4)
if temp is None:
plotter.show(fig)
return None
else:
plotter.savefig(temp)
plotter.close()
return temp
```
%% Cell type:code id: tags:
``` python
def makefname(IMEI,varkappa=None,delta=None,combineFlag=None):
strings=[]
strings.append("Hequals"+str(IMEI))
if (varkappa is not None):
strings.append("vkapequals"+str(varkappa))
if (delta is not None):
strings.append("deltaequals"+str(delta))
if (combineFlag is not None):
strings.append("combineFlagequals"+str(combineFlag))
temp="_".join(strings)
return temp.replace(".","point")
```
%% Cell type:code id: tags:
``` python
DATE=gd.dateSet[0]
HEIGHT=0.0002
WIDTHS=[200,300,400,500]
```
%% Cell type:markdown id: tags:
extract data for harvester and visualize it
%% Cell type:code id: tags:
``` python
IMEI=gd.IMEISet[0] #should be either 0,1,2, or 3
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
print("size of data: ",len(data))
tvals=numpy.array(data.index.get_level_values("locationTimestamp"))
lats=numpy.array(data["Latitude"])
dlats=numpy.ptp(lats)
plotter.figure()
plotter.plot(tvals-min(tvals),lats-numpy.min(lats),'ro',linestyle='--',linewidth=4)
plotter.xlabel("Timestamp (seconds)")
plotter.ylabel("Latitude")
plotter.ylim(-0.1*dlats,1.1*dlats)
plotter.show()
print("h0_latitude")
#plotter.savefig("IMEI_0_lat"+imagesuffix)
```
%% Output
size of data: 325
h0_latitude
%% Cell type:markdown id: tags:
short example of Box approximation
%% Cell type:code id: tags:
``` python
yvals=lats
tvals_short=numpy.array(tvals[0:50])
tvals_short-=numpy.min(tvals_short)
yvals_short=yvals[0:50]
print("mean of yvals_short: ",numpy.mean(yvals_short))
yvals_short_min=min(yvals_short)
dy=numpy.ptp(yvals_short)
BoxA=Box(height=0.0003,width=200,shift=515)
BoxB=Box(height=-.00026,width=400,shift=2550)
tvals_short_finer=numpy.linspace(0,numpy.max(tvals_short),len(tvals_short)*N_finer)
yvals_short_finer_box=numpy.mean(yvals_short)
yvals_short_finer_box+=numpy.array([BoxA.eval(tt) for tt in tvals_short_finer])
yvals_short_finer_box+=numpy.array([BoxB.eval(tt) for tt in tvals_short_finer])
plotter.figure()
plotter.plot(tvals_short,yvals_short-yvals_short_min,'ro',linestyle='--',linewidth=4)
plotter.plot(tvals_short_finer,yvals_short_finer_box-yvals_short_min,'g',linewidth=2)
plotter.xlabel("Timestamp (seconds)")
plotter.ylabel("Latitude")
plotter.ylim(-0.1*dy,1.1*dy)
plotter.show()
print("h0_reduced_boxexample")
#plotter.savefig("IMEI_0_short_box"+imagesuffix)
```
%% Output
mean of yvals_short: 34.179845825600005
h0_reduced_boxexample
%% Cell type:markdown id: tags:
constants
%% Cell type:code id: tags:
``` python
TVALS=numpy.array(data.index.get_level_values("locationTimestamp"))
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
```
%% Output
making feature list
there are 1300 features
%% Cell type:markdown id: tags:
run for L2 approximation
%% Cell type:code id: tags:
``` python
EML2=L0_EM(data,FEATURES,0)
alpha_e=EML2.initialize()
alpha_e=EML2.findfeatures(alpha_e=alpha_e,delta=0,combineFlag=False)
print("mean of alpha_e",numpy.mean(alpha_e))
print("stdev of alpha_e",numpy.std(alpha_e))
```
%% Output
mean of alpha_e 0.029380998406231047
stdev of alpha_e 0.9553821210312209
%% Cell type:markdown id: tags:
plot L2 approximation
%% Cell type:code id: tags:
``` python
makeplot(EML2,alpha_e,startFlag=False)#,fname="fourthharvester")
print("h0_L2")
```
%% Output
h0_L2
%% Cell type:markdown id: tags:
histogram of alpha_e's for L^2 approximation
%% Cell type:code id: tags:
``` python
print("max(alpha_e):",numpy.max(alpha_e))
print("min(alpha_e):",numpy.min(alpha_e))
mean_alpha_e=numpy.mean(alpha_e)
std_alpha_e=numpy.std(alpha_e)
print("mean(alpha_e)",mean_alpha_e)
print("std(alpha_e)",std_alpha_e)
plotter.figure()
n, bins, patches = plotter.hist(alpha_e, bins=100, range=(-1,1), facecolor='green')
plotter.xlabel("alpha")
plotter.ylabel("count")
plotter.show()
print("h0_L2_hist")
```
%% Output
max(alpha_e): 34.17966762063247
min(alpha_e): -0.8828588614123873
mean(alpha_e) 0.029380998406231047
std(alpha_e) 0.9553821210312209
h0_L2_hist
%% Cell type:markdown id: tags:
kappa=1E-10 (small)
%% Cell type:code id: tags:
``` python
KAP=1E-10 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
dalpha=[]
print("about to iterate", flush=True)
for n in range(N_ITER):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
dalpha.append(myEM.dalpha)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Output
L0_EM created
about to iterate
n= 0
%% Cell type:markdown id: tags:
Show that alpha does not converge
%% Cell type:code id: tags:
``` python
plotter.figure()
plotter.plot(dalpha)
plotter.xlabel("iteration")
plotter.ylabel("dalpha/alpha")
plotter.show()
print("nonconvergencefor"+makefname(IMEI,KAP))
```
%% Cell type:code id: tags:
``` python
DELTA=False #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
print("max(alpha_e_uncombined):",numpy.max(alpha_e_uncombined))
print("min(alpha_e_uncombined):",numpy.min(alpha_e_uncombined))
mean_alpha_e_uncombined=numpy.mean(alpha_e_uncombined)
std_alpha_e_uncombined=numpy.std(alpha_e_uncombined)
print("mean(alpha_e_uncombined)",mean_alpha_e_uncombined)
print("std(alpha_e_uncombined)",std_alpha_e_uncombined)
plotter.figure()
n, bins, patches = plotter.hist(alpha_e_uncombined, bins=100, range=(-1,1), facecolor='green')
plotter.xlabel("alpha")
plotter.ylabel("count")
plotter.show()
```
%% Cell type:markdown id: tags:
kappa=1E-5 (large)
%% Cell type:code id: tags:
``` python
KAP=1E-5 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Cell type:code id: tags:
``` python
DELTA=False #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:markdown id: tags:
kappa=1.5E-7 (mid)
%% Cell type:code id: tags:
``` python
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Cell type:code id: tags:
``` python
DELTA=False #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=True #combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
intervals=myEM.intervals
picklename="IMEI_"+str(IMEI)+"_intervals.p"
pickle.dump( intervals, open( picklename, "wb" ) )
```
%% Cell type:code id: tags:
``` python
print(myEM.intervals)
```
%% Cell type:markdown id: tags:
harvester 1
%% Cell type:code id: tags:
``` python
IMEI=gd.IMEISet(1)
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
TVALS=numpy.array(data.index.get_level_values("locationTimestamp"))
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
```
%% Cell type:code id: tags:
``` python
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=True #combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:markdown id: tags:
harvester 2
%% Cell type:code id: tags:
``` python
IMEI=gd.IMEISet(2)
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
TVALS=numpy.array([line[gd.data_idx["locationTimestamp"]] for line in data])
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
```
%% Cell type:code id: tags:
``` python
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=True #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
print(myEM.intervals)
```
%% Cell type:markdown id: tags:
harvester 3 (CURRENT)
%% Cell type:code id: tags:
``` python
IMEI=gd.IMEISet(3)
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
TVALS=numpy.array(data["locationTimestamp"])
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
```
%% Cell type:code id: tags:
``` python
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=False #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
DELTA=0.01 #don't threshold
COMBINEFLAG=True #combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
makeplot(myEM,alpha_e_uncombined)
print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))
```
%% Cell type:code id: tags:
``` python
print(myEM.intervals)
```
......
import numpy
import itertools
import pandas
import sys
IMEI=int(sys.argv[1])
out_fname="intervals_"+str(IMEI)+".txt"
class getData:
def __init__(self,fname):
#sheetname="outdata_with_time"
#raw_data = pandas.read_excel("outdata_with_time.xlsx", sheetname="outdata_with_time", header=0)
raw_data=pandas.read_csv(str(fname)+".csv")
self.data=raw_data.loc[:,["IMEI","Latitude","Longitude","locationTimestamp", "timestamp"]]
self.data["IMEI"] = self.data["IMEI"].astype("str")
self.data = self.data.drop_duplicates()
#print(data)
self.IMEISet=sorted(list(frozenset(self.data["IMEI"])))
self.data["date"]=pandas.to_datetime(self.data.loc[:,"timestamp"]).dt.date
self.dateSet=sorted(list(frozenset(self.data["date"])))
print(self.IMEISet)
print([str(d) for d in self.dateSet])
def get(self,IMEI_index,date_index,outlist=["locationTimestamp","Latitude","Longitude","timestamp"]):
outlist=list(outlist)
self.data_idx={}
for n,d in enumerate(outlist):
self.data_idx[d]=n
date=self.dateSet[date_index]
imei=self.IMEISet[IMEI_index]
flags=(self.data["date"]==date) & (self.data["IMEI"]==imei)
reduced_data=self.data.loc[flags,outlist].as_matrix()
#temp=numpy.array(reduced_data).reshape([-1,len(outlist)])
temp=reduced_data.tolist()
#print("shape of data: ",temp.shape)
return temp
class Box:
def __init__(self, width=1,height=1,shift=0):
self.width=float(width)
self.height=float(height)
self.shift=float(shift)
if (self.width<0):
raise ValueError('negative width in LeftBox')
def refBox(self,x):
x=float(x)
#width=1,height=1,shift=0
return 1 if 0<=x<=1 else 0
def eval(self, x):
if self.width<=0:
return numpy.inf
return self.height * self.refBox((x-self.shift)/self.width)
def harvestFlag(self,x):
return 0<(x-self.shift)<self.width
def __le__(self,other):
if not isinstance(other, Box):
return NotImplemented
#return ((other.shift<=self.shift) and ((self.shift+self.width)<=(other.shift+other.width)))
return (other.shift<=self.shift<=(other.shift+other.width))
def __ge__(self,other):
return (other<=self)
class L0_EM:
def __init__(self,data,feature_info,vkap,tau=0.01):
#data=[[time_1,y_1],[time_2,y_2],....]
data=list(data)
self.N_data=len(data)
self.times=numpy.array([line[0] for line in data])
self.y=numpy.matrix([line[1] for line in data]).transpose()
N_finer=10
self.times_finer=numpy.linspace(min(self.times),max(self.times),N_finer*len(self.times))
self.feature_info=list(feature_info)
features=[[f.eval(t) for t in self.times] for f in self.feature_info]
#features=[[feature_1(time_1),feature_1(time_2)..],[feature_2(time_1)..]..]
self.N_features=len(features)
self.Feat_e_T=numpy.matrix(features+[numpy.ones(self.N_data)])
self.Feat_e=self.Feat_e_T.transpose()
dt=numpy.diff(self.times)
temp=(dt[1:]+dt[:-1])/2
D=numpy.concatenate(([dt[0]],temp,[dt[-1]]))
self.D=numpy.diag(D)
self.A=self.Feat_e_T.dot(self.D).dot(self.Feat_e)
self.b=self.Feat_e_T.dot(self.D).dot(self.y)
self.Id=numpy.diag([float(vkap)]*self.N_features+[0])
self.alpha_e=None
self.stopFlag=False;
self.tau=float(tau)
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.dalpha=None
def initialize(self):
#print("rank(A): ",numpy.linalg.matrix_rank(self.A))
#print("shape of A: ",self.A.shape)
#self.alpha_e=numpy.linalg.solve(self.A,self.b)
self.alpha_e=numpy.linalg.pinv(self.A).dot(self.b)
#print("initial alpha: ",self.alpha_e)
return(self.alpha_e)
def iterate(self,alpha_e=None):
#alpha_e is external, self.alpha_e is class variable
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
temp=numpy.ravel(alpha_e)**2
temp[self.N_features]=1
S=numpy.diag(temp)
new_alpha_e=numpy.linalg.pinv(S.dot(self.A)+self.Id).dot(S.dot(self.b))
denom=numpy.linalg.norm(numpy.ravel(self.alpha_e),1)
num=numpy.linalg.norm(numpy.ravel(new_alpha_e-self.alpha_e),1)
self.dalpha=num/denom
print("dalpha/alpha=",self.dalpha)
self.stopFlag=(num<self.tau*denom)
self.alpha_e=new_alpha_e
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[]
return(self.alpha_e)
def evaluate(self,alpha_e=None):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
return self.Feat_e.dot(alpha_e)
def evaluate_finer(self,alpha_e=None):
alpha_e=self.alpha_e if alpha_e is None else alpha_e
alpha_e=numpy.ravel(alpha_e)
constant=alpha_e[self.N_features]
temp=numpy.array([constant]*len(self.times_finer))
for n,f in enumerate(self.feature_info):
temp+=numpy.array([alpha_e[n]*f.eval(t) for t in self.times_finer])
return temp
def combine(self,a,b):
return (min(a[0],b[0]),max(a[1],b[1]))
def findfeatures(self,alpha_e=None,delta=0.01,combineFlag=True):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
alpha_e=numpy.ravel(alpha_e)
delta=0 if (delta is False) else float(delta) #feature threshold
self.feature_count=0
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[(f.shift,f.shift+f.width) for aa,f in zip(alpha_e,self.feature_info)]
#threshold out the small features
alpha_e=numpy.array([aa if abs(aa)>=delta else 0 for aa in alpha_e])
#combine features
if combineFlag:
for n in range(self.N_features-1,-1,-1):
int_n=self.intervals[n]
for nn in range(n-1,-1,-1):
int_nn=self.intervals[nn]
Flag=(alpha_e[n]!=0) and (alpha_e[nn]!=0)
#Flag = Flag and (numpy.sign(alpha_e[n])==numpy.sign(alpha_e[nn]))
Flag = Flag and (self.feature_info[nn]>=self.feature_info[n])
if (Flag):
alpha_e[nn]+=alpha_e[n]
alpha_e[n]=0
self.intervals[nn]=self.combine(int_n,int_nn)
for aa,f in zip(alpha_e,self.feature_info):
if abs(aa)==0:
continue
tempflags=numpy.array([f.harvestFlag(tt) for tt in self.times],dtype='bool')
self.flags.append(tempflags)
self.feature_times.append(f.shift)
self.feature_peaks.append(f.height*aa+alpha_e[self.N_features])
self.feature_count+=1
self.feature_times=numpy.array(self.feature_times)
self.feature_peaks=numpy.array(self.feature_peaks)
self.intervals=[ival for aa,ival in zip(alpha_e,self.intervals) if abs(aa)!=0]
return alpha_e
fname="outdata_with_time"
gd=getData(fname)
DATE=0
HEIGHT=0.0002
WIDTHS=[200,300,400,500]
#IMEI=2
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
TVALS=numpy.array([line[gd.data_idx["locationTimestamp"]] for line in data])
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
DELTA=0.01 #don't threshold
COMBINEFLAG=True #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
print(myEM.intervals)
with open(out_fname,'w') as f:
f.write(str(myEM.intervals))
Intervals={
'351554053682895':[(1455901052.0, 1455901925.0), (1455902880.0, 1455903380.0), (1455903382.0, 1455903973.0), (1455903987.0, 1455904387.0), (1455904491.0, 1455904991.0), (1455905055.0, 1455905255.0), (1455905777.0, 1455907652.0), (1455907743.0, 1455908925.0), (1455908951.0, 1455909690.0), (1455910298.0, 1455911149.0), (1455911381.0, 1455911881.0), (1455912123.0, 1455912958.0), (1455915030.0, 1455915330.0), (1455915371.0, 1455916250.0), (1455916469.0, 1455916669.0), (1455917074.0, 1455917911.0)],
'353918057262822':[(1455901334.0, 1455901856.0), (1455901905.0, 1455902404.0), (1455903541.0, 1455904542.0), (1455904610.0, 1455905196.0), (1455905223.0, 1455906007.0), (1455906065.0, 1455906595.0), (1455906723.0, 1455908210.0), (1455908285.0, 1455908955.0), (1455909139.0, 1455909639.0), (1455910149.0, 1455911169.0), (1455911300.0, 1455912032.0), (1455912103.0, 1455912967.0), (1455915249.0, 1455917584.0)]
'353918059182986':[(1455901176.0, 1455901897.0), (1455902110.0, 1455902410.0), (1455903567.0, 1455904534.0), (1455904647.0, 1455905147.0), (1455905337.0, 1455905882.0), (1455906117.0, 1455906632.0), (1455906701.0, 1455907567.0), (1455907737.0, 1455908237.0), (1455908397.0, 1455909091.0), (1455909147.0, 1455909578.0), (1455910156.0, 1455911162.0), (1455911298.0, 1455912972.0), (1455915341.0, 1455916086.0), (1455916090.0, 1455917587.0)]
'869578020239930':[(1455901393.0, 1455901818.0), (1455902046.0, 1455902709.0), (1455903709.0, 1455904869.0), (1455904877.0, 1455905077.0), (1455905153.0, 1455905653.0), (1455905657.0, 1455906157.0), (1455906248.0, 1455906793.0), (1455907628.0, 1455908962.0), (1455908988.0, 1455910082.0), (1455910341.0, 1455911281.0), (1455911283.0, 1455911803.0), (1455912036.0, 1455912653.0), (1455912666.0, 1455913478.0), (1455914616.0, 1455915016.0), (1455915124.0, 1455916136.0), (1455916199.0, 1455917576.0)]
}
[(1455901052.0, 1455901925.0), (1455902880.0, 1455903380.0), (1455903382.0, 1455903973.0), (1455903987.0, 1455904387.0), (1455904491.0, 1455904991.0), (1455905055.0, 1455905255.0), (1455905777.0, 1455907652.0), (1455907743.0, 1455908925.0), (1455908951.0, 1455909690.0), (1455910298.0, 1455911149.0), (1455911381.0, 1455911881.0), (1455912123.0, 1455912958.0), (1455915030.0, 1455915330.0), (1455915371.0, 1455916250.0), (1455916469.0, 1455916669.0), (1455917074.0, 1455917911.0)]
\ No newline at end of file
[(1455901334.0, 1455901856.0), (1455901905.0, 1455902404.0), (1455903541.0, 1455904542.0), (1455904610.0, 1455905196.0), (1455905223.0, 1455906007.0), (1455906065.0, 1455906595.0), (1455906723.0, 1455908210.0), (1455908285.0, 1455908955.0), (1455909139.0, 1455909639.0), (1455910149.0, 1455911169.0), (1455911300.0, 1455912032.0), (1455912103.0, 1455912967.0), (1455915249.0, 1455917584.0)]
\ No newline at end of file
[(1455901176.0, 1455901897.0), (1455902110.0, 1455902410.0), (1455903567.0, 1455904534.0), (1455904647.0, 1455905147.0), (1455905337.0, 1455905882.0), (1455906117.0, 1455906632.0), (1455906701.0, 1455907567.0), (1455907737.0, 1455908237.0), (1455908397.0, 1455909091.0), (1455909147.0, 1455909578.0), (1455910156.0, 1455911162.0), (1455911298.0, 1455912972.0), (1455915341.0, 1455916086.0), (1455916090.0, 1455917587.0)]
\ No newline at end of file
[(1455901393.0, 1455901818.0), (1455902046.0, 1455902709.0), (1455903709.0, 1455904869.0), (1455904877.0, 1455905077.0), (1455905153.0, 1455905653.0), (1455905657.0, 1455906157.0), (1455906248.0, 1455906793.0), (1455907628.0, 1455908962.0), (1455908988.0, 1455910082.0), (1455910341.0, 1455911281.0), (1455911283.0, 1455911803.0), (1455912036.0, 1455912653.0), (1455912666.0, 1455913478.0), (1455914616.0, 1455915016.0), (1455915124.0, 1455916136.0), (1455916199.0, 1455917576.0)]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment