Skip to content
Snippets Groups Projects
Commit 36b7f950 authored by r-sowers's avatar r-sowers
Browse files

Combined Intervals files into one json file, Intervals.py. Removed algorithm.py

parent 4ba2f4ff
No related branches found
No related tags found
No related merge requests found
Peter Maneykowski did some early versions of the algorithm
Peter Maneykowski (ISE undergraduate) did some early versions of the algorithm.
Nitin Srivastava (M.S. in ISE) wrote a thesis connected to this work.
\ No newline at end of file
File moved
This diff is collapsed.
import numpy
import itertools
import pandas
import sys
IMEI=int(sys.argv[1])
out_fname="intervals_"+str(IMEI)+".txt"
class getData:
def __init__(self,fname):
#sheetname="outdata_with_time"
#raw_data = pandas.read_excel("outdata_with_time.xlsx", sheetname="outdata_with_time", header=0)
raw_data=pandas.read_csv(str(fname)+".csv")
self.data=raw_data.loc[:,["IMEI","Latitude","Longitude","locationTimestamp", "timestamp"]]
self.data["IMEI"] = self.data["IMEI"].astype("str")
self.data = self.data.drop_duplicates()
#print(data)
self.IMEISet=sorted(list(frozenset(self.data["IMEI"])))
self.data["date"]=pandas.to_datetime(self.data.loc[:,"timestamp"]).dt.date
self.dateSet=sorted(list(frozenset(self.data["date"])))
print(self.IMEISet)
print([str(d) for d in self.dateSet])
def get(self,IMEI_index,date_index,outlist=["locationTimestamp","Latitude","Longitude","timestamp"]):
outlist=list(outlist)
self.data_idx={}
for n,d in enumerate(outlist):
self.data_idx[d]=n
date=self.dateSet[date_index]
imei=self.IMEISet[IMEI_index]
flags=(self.data["date"]==date) & (self.data["IMEI"]==imei)
reduced_data=self.data.loc[flags,outlist].as_matrix()
#temp=numpy.array(reduced_data).reshape([-1,len(outlist)])
temp=reduced_data.tolist()
#print("shape of data: ",temp.shape)
return temp
class Box:
def __init__(self, width=1,height=1,shift=0):
self.width=float(width)
self.height=float(height)
self.shift=float(shift)
if (self.width<0):
raise ValueError('negative width in LeftBox')
def refBox(self,x):
x=float(x)
#width=1,height=1,shift=0
return 1 if 0<=x<=1 else 0
def eval(self, x):
if self.width<=0:
return numpy.inf
return self.height * self.refBox((x-self.shift)/self.width)
def harvestFlag(self,x):
return 0<(x-self.shift)<self.width
def __le__(self,other):
if not isinstance(other, Box):
return NotImplemented
#return ((other.shift<=self.shift) and ((self.shift+self.width)<=(other.shift+other.width)))
return (other.shift<=self.shift<=(other.shift+other.width))
def __ge__(self,other):
return (other<=self)
class L0_EM:
def __init__(self,data,feature_info,vkap,tau=0.01):
#data=[[time_1,y_1],[time_2,y_2],....]
data=list(data)
self.N_data=len(data)
self.times=numpy.array([line[0] for line in data])
self.y=numpy.matrix([line[1] for line in data]).transpose()
N_finer=10
self.times_finer=numpy.linspace(min(self.times),max(self.times),N_finer*len(self.times))
self.feature_info=list(feature_info)
features=[[f.eval(t) for t in self.times] for f in self.feature_info]
#features=[[feature_1(time_1),feature_1(time_2)..],[feature_2(time_1)..]..]
self.N_features=len(features)
self.Feat_e_T=numpy.matrix(features+[numpy.ones(self.N_data)])
self.Feat_e=self.Feat_e_T.transpose()
dt=numpy.diff(self.times)
temp=(dt[1:]+dt[:-1])/2
D=numpy.concatenate(([dt[0]],temp,[dt[-1]]))
self.D=numpy.diag(D)
self.A=self.Feat_e_T.dot(self.D).dot(self.Feat_e)
self.b=self.Feat_e_T.dot(self.D).dot(self.y)
self.Id=numpy.diag([float(vkap)]*self.N_features+[0])
self.alpha_e=None
self.stopFlag=False;
self.tau=float(tau)
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.dalpha=None
def initialize(self):
#print("rank(A): ",numpy.linalg.matrix_rank(self.A))
#print("shape of A: ",self.A.shape)
#self.alpha_e=numpy.linalg.solve(self.A,self.b)
self.alpha_e=numpy.linalg.pinv(self.A).dot(self.b)
#print("initial alpha: ",self.alpha_e)
return(self.alpha_e)
def iterate(self,alpha_e=None):
#alpha_e is external, self.alpha_e is class variable
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
temp=numpy.ravel(alpha_e)**2
temp[self.N_features]=1
S=numpy.diag(temp)
new_alpha_e=numpy.linalg.pinv(S.dot(self.A)+self.Id).dot(S.dot(self.b))
denom=numpy.linalg.norm(numpy.ravel(self.alpha_e),1)
num=numpy.linalg.norm(numpy.ravel(new_alpha_e-self.alpha_e),1)
self.dalpha=num/denom
print("dalpha/alpha=",self.dalpha)
self.stopFlag=(num<self.tau*denom)
self.alpha_e=new_alpha_e
self.feature_alpha_e=None
self.feature_count=None
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[]
return(self.alpha_e)
def evaluate(self,alpha_e=None):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
return self.Feat_e.dot(alpha_e)
def evaluate_finer(self,alpha_e=None):
alpha_e=self.alpha_e if alpha_e is None else alpha_e
alpha_e=numpy.ravel(alpha_e)
constant=alpha_e[self.N_features]
temp=numpy.array([constant]*len(self.times_finer))
for n,f in enumerate(self.feature_info):
temp+=numpy.array([alpha_e[n]*f.eval(t) for t in self.times_finer])
return temp
def combine(self,a,b):
return (min(a[0],b[0]),max(a[1],b[1]))
def findfeatures(self,alpha_e=None,delta=0.01,combineFlag=True):
alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e
alpha_e=numpy.ravel(alpha_e)
delta=0 if (delta is False) else float(delta) #feature threshold
self.feature_count=0
self.feature_times=[]
self.feature_peaks=[]
self.flags=[]
self.intervals=[(f.shift,f.shift+f.width) for aa,f in zip(alpha_e,self.feature_info)]
#threshold out the small features
alpha_e=numpy.array([aa if abs(aa)>=delta else 0 for aa in alpha_e])
#combine features
if combineFlag:
for n in range(self.N_features-1,-1,-1):
int_n=self.intervals[n]
for nn in range(n-1,-1,-1):
int_nn=self.intervals[nn]
Flag=(alpha_e[n]!=0) and (alpha_e[nn]!=0)
#Flag = Flag and (numpy.sign(alpha_e[n])==numpy.sign(alpha_e[nn]))
Flag = Flag and (self.feature_info[nn]>=self.feature_info[n])
if (Flag):
alpha_e[nn]+=alpha_e[n]
alpha_e[n]=0
self.intervals[nn]=self.combine(int_n,int_nn)
for aa,f in zip(alpha_e,self.feature_info):
if abs(aa)==0:
continue
tempflags=numpy.array([f.harvestFlag(tt) for tt in self.times],dtype='bool')
self.flags.append(tempflags)
self.feature_times.append(f.shift)
self.feature_peaks.append(f.height*aa+alpha_e[self.N_features])
self.feature_count+=1
self.feature_times=numpy.array(self.feature_times)
self.feature_peaks=numpy.array(self.feature_peaks)
self.intervals=[ival for aa,ival in zip(alpha_e,self.intervals) if abs(aa)!=0]
return alpha_e
fname="outdata_with_time"
gd=getData(fname)
DATE=0
HEIGHT=0.0002
WIDTHS=[200,300,400,500]
#IMEI=2
raw_data=gd.get(IMEI,DATE)
data=raw_data#[0:500]
TVALS=numpy.array([line[gd.data_idx["locationTimestamp"]] for line in data])
SHIFTS=TVALS
N_ITER=30
print("making feature list",flush=True)
FEATURES=[]
for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):
FEATURES.append(Box(height=HEIGHT,width=w,shift=s))
print("there are ",len(FEATURES), "features", flush=True)
KAP=1.5E-7 #for box
myEM=L0_EM(data,FEATURES,KAP)
print("L0_EM created", flush=True)
alpha_e=myEM.initialize()
print("about to iterate", flush=True)
N_iter=20
for n in range(N_iter):
print("n=",n,flush=True)
alpha_e=myEM.iterate(alpha_e)
myEM.findfeatures()
if (myEM.stopFlag):
break
print("done")
DELTA=0.01 #don't threshold
COMBINEFLAG=True #don't combine features
alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)
print("there are ",myEM.feature_count,"features", flush=True)
print(myEM.intervals)
with open(out_fname,'w') as f:
f.write(str(myEM.intervals))
Intervals={
'351554053682895':[(1455901052.0, 1455901925.0), (1455902880.0, 1455903380.0), (1455903382.0, 1455903973.0), (1455903987.0, 1455904387.0), (1455904491.0, 1455904991.0), (1455905055.0, 1455905255.0), (1455905777.0, 1455907652.0), (1455907743.0, 1455908925.0), (1455908951.0, 1455909690.0), (1455910298.0, 1455911149.0), (1455911381.0, 1455911881.0), (1455912123.0, 1455912958.0), (1455915030.0, 1455915330.0), (1455915371.0, 1455916250.0), (1455916469.0, 1455916669.0), (1455917074.0, 1455917911.0)],
'353918057262822':[(1455901334.0, 1455901856.0), (1455901905.0, 1455902404.0), (1455903541.0, 1455904542.0), (1455904610.0, 1455905196.0), (1455905223.0, 1455906007.0), (1455906065.0, 1455906595.0), (1455906723.0, 1455908210.0), (1455908285.0, 1455908955.0), (1455909139.0, 1455909639.0), (1455910149.0, 1455911169.0), (1455911300.0, 1455912032.0), (1455912103.0, 1455912967.0), (1455915249.0, 1455917584.0)]
'353918059182986':[(1455901176.0, 1455901897.0), (1455902110.0, 1455902410.0), (1455903567.0, 1455904534.0), (1455904647.0, 1455905147.0), (1455905337.0, 1455905882.0), (1455906117.0, 1455906632.0), (1455906701.0, 1455907567.0), (1455907737.0, 1455908237.0), (1455908397.0, 1455909091.0), (1455909147.0, 1455909578.0), (1455910156.0, 1455911162.0), (1455911298.0, 1455912972.0), (1455915341.0, 1455916086.0), (1455916090.0, 1455917587.0)]
'869578020239930':[(1455901393.0, 1455901818.0), (1455902046.0, 1455902709.0), (1455903709.0, 1455904869.0), (1455904877.0, 1455905077.0), (1455905153.0, 1455905653.0), (1455905657.0, 1455906157.0), (1455906248.0, 1455906793.0), (1455907628.0, 1455908962.0), (1455908988.0, 1455910082.0), (1455910341.0, 1455911281.0), (1455911283.0, 1455911803.0), (1455912036.0, 1455912653.0), (1455912666.0, 1455913478.0), (1455914616.0, 1455915016.0), (1455915124.0, 1455916136.0), (1455916199.0, 1455917576.0)]
}
[(1455901052.0, 1455901925.0), (1455902880.0, 1455903380.0), (1455903382.0, 1455903973.0), (1455903987.0, 1455904387.0), (1455904491.0, 1455904991.0), (1455905055.0, 1455905255.0), (1455905777.0, 1455907652.0), (1455907743.0, 1455908925.0), (1455908951.0, 1455909690.0), (1455910298.0, 1455911149.0), (1455911381.0, 1455911881.0), (1455912123.0, 1455912958.0), (1455915030.0, 1455915330.0), (1455915371.0, 1455916250.0), (1455916469.0, 1455916669.0), (1455917074.0, 1455917911.0)]
\ No newline at end of file
[(1455901334.0, 1455901856.0), (1455901905.0, 1455902404.0), (1455903541.0, 1455904542.0), (1455904610.0, 1455905196.0), (1455905223.0, 1455906007.0), (1455906065.0, 1455906595.0), (1455906723.0, 1455908210.0), (1455908285.0, 1455908955.0), (1455909139.0, 1455909639.0), (1455910149.0, 1455911169.0), (1455911300.0, 1455912032.0), (1455912103.0, 1455912967.0), (1455915249.0, 1455917584.0)]
\ No newline at end of file
[(1455901176.0, 1455901897.0), (1455902110.0, 1455902410.0), (1455903567.0, 1455904534.0), (1455904647.0, 1455905147.0), (1455905337.0, 1455905882.0), (1455906117.0, 1455906632.0), (1455906701.0, 1455907567.0), (1455907737.0, 1455908237.0), (1455908397.0, 1455909091.0), (1455909147.0, 1455909578.0), (1455910156.0, 1455911162.0), (1455911298.0, 1455912972.0), (1455915341.0, 1455916086.0), (1455916090.0, 1455917587.0)]
\ No newline at end of file
[(1455901393.0, 1455901818.0), (1455902046.0, 1455902709.0), (1455903709.0, 1455904869.0), (1455904877.0, 1455905077.0), (1455905153.0, 1455905653.0), (1455905657.0, 1455906157.0), (1455906248.0, 1455906793.0), (1455907628.0, 1455908962.0), (1455908988.0, 1455910082.0), (1455910341.0, 1455911281.0), (1455911283.0, 1455911803.0), (1455912036.0, 1455912653.0), (1455912666.0, 1455913478.0), (1455914616.0, 1455915016.0), (1455915124.0, 1455916136.0), (1455916199.0, 1455917576.0)]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment