{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Code for **Task Disambiguation in Hand-Picked Agriculture**\n", "Project Director: Richard Sowers <r-sowers@illinois.edu>\n", " \n", "Copyright 2018 University of Illinois Board of Trustees. All Rights Reserved.\n", "Licensed under the MIT license" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy\n", "import pandas\n", "import pickle\n", "import itertools\n", "import datetime\n", "#%matplotlib notebook\n", "%matplotlib inline\n", "import pytz\n", "import matplotlib.pyplot as plotter\n", "#import matplotlib.mlab as mlab\n", "#import statsmodels.api as sm\n", "imagesuffix=\".png\"\n", "N_finer=10\n", "region=pytz.timezone(\"America/Los_Angeles\")\n", "fname=\"data.csv\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "getData function\n", "data should be in .csv file with columns labelled \"IMEI\",\"Latitude\",\"locationTimestamp\"\n", "* locationTimestamp should be seconds since epoch" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def ts_to_time(ts):\n", " return region.normalize(region.localize(datetime.datetime.fromtimestamp(ts)))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['351554053682895', '353918057262822', '353918059182986', '869578020239930']\n", "[Timestamp('2016-02-19 00:00:00'), Timestamp('2016-02-22 00:00:00')]\n" ] } ], "source": [ "#############code to get data\n", "class getData:\n", "\tdef __init__(self,fname):\n", "\t\t#sheetname=\"outdata_with_time\"\n", "\t\t#raw_data = pandas.read_excel(\"outdata_with_time.xlsx\", sheetname=\"outdata_with_time\", header=0)\n", "\t\tself.data=pandas.read_csv(str(fname))\n", "\t\tself.data.columns=[\"IMEI\",\"Latitude\",\"locationTimestamp\"]\n", "\t\tself.data = self.data.drop_duplicates()\n", "\t\tself.data[\"IMEI\"] = self.data[\"IMEI\"].astype(\"str\")\n", "\t\tself.data[\"datetime\"]=pandas.to_datetime(self.data[\"locationTimestamp\"].apply(ts_to_time))\n", "\t\t#print(data)\n", "\t\tself.IMEISet=sorted(list(frozenset(self.data[\"IMEI\"])))\n", "\t\tself.data[\"date\"]=pandas.to_datetime(self.data[\"datetime\"].apply(lambda t:t.date()))\n", "\t\tself.data.set_index([\"IMEI\",\"locationTimestamp\",\"datetime\",\"date\"],append=True,drop=True,inplace=True)\n", "\t\tself.dateSet=sorted(list(frozenset(self.data.index.get_level_values(\"date\"))))\n", "\n", "\tdef get(self,IMEI,DATE):\n", "\t\tflags=numpy.logical_and(self.data.index.get_level_values(\"date\")==DATE,\n", " self.data.index.get_level_values(\"IMEI\")==IMEI)\n", "\t\treduced_data=self.data.loc[flags]\n", "\t\t#temp=numpy.array(reduced_data).reshape([-1,len(outlist)])\n", "\t\t#print(\"shape of data: \",temp.shape)\n", "\t\treturn reduced_data\n", " \n", " \n", "gd=getData(fname)\n", "print(sorted(gd.IMEISet))\n", "print(gd.dateSet)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "test_imei=gd.IMEISet[0]\n", "test_date=gd.dateSet[0]\n", "print(test_imei)\n", "print(test_date)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Box function is reference excursion shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class Box:\n", " def __init__(self, width=1,height=1,shift=0):\n", " self.width=float(width)\n", " self.height=float(height)\n", " self.shift=float(shift)\n", " if (self.width<0):\n", " raise ValueError('negative width in LeftBox')\n", "\n", " def refBox(self,x):\n", " x=float(x)\n", " #width=1,height=1,shift=0\n", " return 1 if 0<=x<=1 else 0\n", "\n", " def eval(self, x):\n", " if self.width<=0:\n", " return numpy.inf\n", " return self.height * self.refBox((x-self.shift)/self.width)\n", " \n", " def harvestFlag(self,x):\n", " return 0<(x-self.shift)<self.width\n", " \n", " def __le__(self,other):\n", " if not isinstance(other, Box):\n", " return NotImplemented\n", " #return ((other.shift<=self.shift) and ((self.shift+self.width)<=(other.shift+other.width)))\n", " return (other.shift<=self.shift<=(other.shift+other.width))\n", " \n", " def __ge__(self,other):\n", " return (other<=self)\n", " \n", " \n", " \n", "myBox=Box()\n", "print(\"B(0)=\",myBox.eval(0))\n", "xvals_b=numpy.linspace(-3,3,200)\n", "yvals_b=numpy.array([myBox.eval(xx) for xx in xvals_b])\n", "flags=numpy.array([myBox.harvestFlag(xx) for xx in xvals_b],dtype='bool')\n", "plotter.figure()\n", "plotter.plot(xvals_b,yvals_b)\n", "plotter.plot(xvals_b[flags],yvals_b[flags],'ro',linestyle='--',linewidth=4)\n", "plotter.ylim((-0.5,1.5))\n", "plotter.show()\n", "print(Box()<=Box(2,1,-5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "code to implement [https://arxiv.org/pdf/1407.7508v1.pdf](https://arxiv.org/pdf/1407.7508v1.pdf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class L0_EM:\n", " def __init__(self,data,feature_info,vkap,tau=0.01):\n", " #data=[[time_1,y_1],[time_2,y_2],....]\n", " self.N_data=len(data)\n", " self.times=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n", " self.y=numpy.matrix(data[\"Latitude\"]).transpose()\n", " \n", " N_finer=10\n", " self.times_finer=numpy.linspace(min(self.times),max(self.times),N_finer*len(self.times))\n", " \n", " self.feature_info=list(feature_info)\n", " features=[[f.eval(t) for t in self.times] for f in self.feature_info]\n", " #features=[[feature_1(time_1),feature_1(time_2)..],[feature_2(time_1)..]..]\n", " self.N_features=len(features)\n", " self.Feat_e_T=numpy.matrix(features+[numpy.ones(self.N_data)])\n", " self.Feat_e=self.Feat_e_T.transpose()\n", " \n", " dt=numpy.diff(self.times)\n", " temp=(dt[1:]+dt[:-1])/2\n", " D=numpy.concatenate(([dt[0]],temp,[dt[-1]]))\n", " self.D=numpy.diag(D)\n", "\n", " self.A=self.Feat_e_T.dot(self.D).dot(self.Feat_e)\n", " self.b=self.Feat_e_T.dot(self.D).dot(self.y)\n", " \n", "\n", " self.Id=numpy.diag([float(vkap)]*self.N_features+[0])\n", " self.alpha_e=None\n", " self.stopFlag=False;\n", "\n", " self.tau=float(tau)\n", " self.feature_alpha_e=None\n", " self.feature_count=None\n", " self.feature_times=[]\n", " self.feature_peaks=[]\n", " self.flags=[]\n", " self.dalpha=None\n", "\n", "\n", " def initialize(self):\n", " #print(\"rank(A): \",numpy.linalg.matrix_rank(self.A))\n", " #print(\"shape of A: \",self.A.shape)\n", " #self.alpha_e=numpy.linalg.solve(self.A,self.b)\n", " self.alpha_e=numpy.linalg.pinv(self.A).dot(self.b)\n", " #print(\"initial alpha: \",self.alpha_e)\n", " return(self.alpha_e)\n", "\n", " def iterate(self,alpha_e=None):\n", "\n", " #alpha_e is external, self.alpha_e is class variable\n", " alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n", " temp=numpy.ravel(alpha_e)**2\n", " temp[self.N_features]=1\n", " S=numpy.diag(temp)\n", "\n", " new_alpha_e=numpy.linalg.pinv(S.dot(self.A)+self.Id).dot(S.dot(self.b))\n", "\n", " denom=numpy.linalg.norm(numpy.ravel(self.alpha_e),1)\n", " num=numpy.linalg.norm(numpy.ravel(new_alpha_e-self.alpha_e),1)\n", " self.dalpha=num/denom\n", " print(\"dalpha/alpha=\",self.dalpha)\n", " self.stopFlag=(num<self.tau*denom)\n", " self.alpha_e=new_alpha_e\n", "\n", " self.feature_alpha_e=None\n", " self.feature_count=None\n", " self.feature_times=[]\n", " self.feature_peaks=[]\n", " self.flags=[]\n", " self.intervals=[]\n", " return(self.alpha_e)\n", "\n", " def evaluate(self,alpha_e=None):\n", " alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n", " return self.Feat_e.dot(alpha_e)\n", " \n", " def evaluate_finer(self,alpha_e=None):\n", " alpha_e=self.alpha_e if alpha_e is None else alpha_e\n", " alpha_e=numpy.ravel(alpha_e)\n", " constant=alpha_e[self.N_features]\n", " temp=numpy.array([constant]*len(self.times_finer))\n", " for n,f in enumerate(self.feature_info):\n", " temp+=numpy.array([alpha_e[n]*f.eval(t) for t in self.times_finer])\n", " return temp\n", " \n", " \n", " def combine(self,a,b):\n", " return (min(a[0],b[0]),max(a[1],b[1]))\n", " \n", " def findfeatures(self,alpha_e=None,delta=0.01,combineFlag=True):\n", " alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n", " alpha_e=numpy.ravel(alpha_e)\n", " delta=0 if (delta is False) else float(delta) #feature threshold\n", " self.feature_count=0\n", " self.feature_times=[]\n", " self.feature_peaks=[]\n", " self.flags=[]\n", " self.intervals=[(f.shift,f.shift+f.width) for aa,f in zip(alpha_e,self.feature_info)]\n", " \n", " \n", " #threshold out the small features\n", " alpha_e=numpy.array([aa if abs(aa)>=delta else 0 for aa in alpha_e])\n", " \n", " #combine features\n", " if combineFlag:\n", " for n in range(self.N_features-1,-1,-1):\n", " int_n=self.intervals[n]\n", " for nn in range(n-1,-1,-1):\n", " int_nn=self.intervals[nn]\n", " Flag=(alpha_e[n]!=0) and (alpha_e[nn]!=0)\n", " #Flag = Flag and (numpy.sign(alpha_e[n])==numpy.sign(alpha_e[nn]))\n", " Flag = Flag and (self.feature_info[nn]>=self.feature_info[n])\n", " if (Flag):\n", " alpha_e[nn]+=alpha_e[n]\n", " alpha_e[n]=0\n", " self.intervals[nn]=self.combine(int_n,int_nn)\n", " \n", " for aa,f in zip(alpha_e,self.feature_info):\n", " if abs(aa)==0:\n", " continue\n", " tempflags=numpy.array([f.harvestFlag(tt) for tt in self.times],dtype='bool')\n", " self.flags.append(tempflags)\n", " self.feature_times.append(f.shift)\n", " self.feature_peaks.append(f.height*aa+alpha_e[self.N_features])\n", " self.feature_count+=1\n", " self.feature_times=numpy.array(self.feature_times)\n", " self.feature_peaks=numpy.array(self.feature_peaks)\n", " self.intervals=[ival for aa,ival in zip(alpha_e,self.intervals) if abs(aa)!=0]\n", " return alpha_e" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "plot " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def makeplot(thisEM,alpha_e=None,fname=None,startFlag=True):\n", " alpha_e=thisEM.alpha_e if alpha_e is None else alpha_e\n", " image_prefix=\"./images/fig\"\n", " image_suffix=\".png\"\n", " temp=None\n", " if fname is not None:\n", " plotter.ioff()\n", " temp=image_prefix+str(fname)+image_suffix\n", " #print(\"temp: \",temp)\n", " \n", " yvals_finer=thisEM.evaluate_finer(alpha_e)\n", " T_min=min(thisEM.times)\n", " T_max=max(thisEM.times)\n", " y_min=min(numpy.ravel(thisEM.y))\n", " \n", "\n", " fig=plotter.figure()\n", " plotter.plot(thisEM.times_finer-T_min,yvals_finer-y_min,'g',linewidth=2)\n", " plotter.plot(thisEM.times-T_min,thisEM.y-y_min,'ro',linestyle='--',linewidth=4)\n", " #plotter.plot(myEM.times-T_min,myEM.evaluate(alpha_e)-y_min,'g',linewidth=2)\n", " if startFlag:\n", " plotter.plot(thisEM.feature_times-T_min,thisEM.feature_peaks-y_min,'bo',ms=10)\n", " dy=numpy.ptp(thisEM.y)\n", " plotter.xlim((0,T_max-T_min))\n", " plotter.ylim((-0.25*dy,1.5*dy))\n", " plotter.xlabel(\"Timestamp\")\n", " plotter.ylabel(\"Latitude\")\n", " for flaglist in thisEM.flags:\n", " pass\n", " #tempt=myEM.times[flaglist]-T_min\n", " #tempy=myEM.y[flaglist]-y_min\n", " #print(len(myEM.times))\n", " #print(len(tempt))\n", " #plotter.plot(tempt,tempy,'ko',linestyle='-',linewidth=4)\n", " if temp is None:\n", " plotter.show(fig)\n", " return None\n", " else:\n", " plotter.savefig(temp)\n", " plotter.close()\n", "\n", " return temp\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def makefname(IMEI,varkappa=None,delta=None,combineFlag=None):\n", " strings=[]\n", " strings.append(\"Hequals\"+str(IMEI))\n", " if (varkappa is not None):\n", " strings.append(\"vkapequals\"+str(varkappa))\n", " if (delta is not None):\n", " strings.append(\"deltaequals\"+str(delta))\n", " if (combineFlag is not None):\n", " strings.append(\"combineFlagequals\"+str(combineFlag))\n", " temp=\"_\".join(strings)\n", " return temp.replace(\".\",\"point\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DATE=gd.dateSet[0]\n", "HEIGHT=0.0002\n", "WIDTHS=[200,300,400,500]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "extract data for harvester and visualize it" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "IMEI=gd.IMEISet[0] #should be either 0,1,2, or 3\n", "\n", "raw_data=gd.get(IMEI,DATE)\n", "data=raw_data#[0:500]\n", "print(\"size of data: \",len(data))\n", "tvals=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n", "\n", "lats=numpy.array(data[\"Latitude\"])\n", "dlats=numpy.ptp(lats)\n", "plotter.figure()\n", "plotter.plot(tvals-min(tvals),lats-numpy.min(lats),'ro',linestyle='--',linewidth=4)\n", "plotter.xlabel(\"Timestamp (seconds)\")\n", "plotter.ylabel(\"Latitude\")\n", "plotter.ylim(-0.1*dlats,1.1*dlats)\n", "plotter.show()\n", "print(\"h0_latitude\")\n", "#plotter.savefig(\"IMEI_0_lat\"+imagesuffix)\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "short example of Box approximation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "yvals=lats\n", "\n", "tvals_short=numpy.array(tvals[0:50])\n", "tvals_short-=numpy.min(tvals_short)\n", "yvals_short=yvals[0:50]\n", "print(\"mean of yvals_short: \",numpy.mean(yvals_short))\n", "yvals_short_min=min(yvals_short)\n", "dy=numpy.ptp(yvals_short)\n", "\n", "BoxA=Box(height=0.0003,width=200,shift=515)\n", "BoxB=Box(height=-.00026,width=400,shift=2550)\n", "tvals_short_finer=numpy.linspace(0,numpy.max(tvals_short),len(tvals_short)*N_finer)\n", "yvals_short_finer_box=numpy.mean(yvals_short)\n", "yvals_short_finer_box+=numpy.array([BoxA.eval(tt) for tt in tvals_short_finer])\n", "yvals_short_finer_box+=numpy.array([BoxB.eval(tt) for tt in tvals_short_finer])\n", "plotter.figure()\n", "plotter.plot(tvals_short,yvals_short-yvals_short_min,'ro',linestyle='--',linewidth=4)\n", "plotter.plot(tvals_short_finer,yvals_short_finer_box-yvals_short_min,'g',linewidth=2)\n", "plotter.xlabel(\"Timestamp (seconds)\")\n", "plotter.ylabel(\"Latitude\")\n", "plotter.ylim(-0.1*dy,1.1*dy)\n", "plotter.show()\n", "print(\"h0_reduced_boxexample\")\n", "#plotter.savefig(\"IMEI_0_short_box\"+imagesuffix)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "constants" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "TVALS=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n", "SHIFTS=TVALS\n", "N_ITER=30\n", "\n", "\n", "print(\"making feature list\",flush=True)\n", "FEATURES=[]\n", "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n", " FEATURES.append(Box(height=HEIGHT,width=w,shift=s)) \n", "print(\"there are \",len(FEATURES), \"features\", flush=True)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "run for L2 approximation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "EML2=L0_EM(data,FEATURES,0)\n", "\n", "alpha_e=EML2.initialize()\n", " \n", "alpha_e=EML2.findfeatures(alpha_e=alpha_e,delta=0,combineFlag=False)\n", "print(\"mean of alpha_e\",numpy.mean(alpha_e))\n", "print(\"stdev of alpha_e\",numpy.std(alpha_e))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "plot L2 approximation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "makeplot(EML2,alpha_e,startFlag=False)#,fname=\"fourthharvester\")\n", "print(\"h0_L2\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "histogram of alpha_e's for L^2 approximation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(\"max(alpha_e):\",numpy.max(alpha_e))\n", "print(\"min(alpha_e):\",numpy.min(alpha_e))\n", "mean_alpha_e=numpy.mean(alpha_e)\n", "std_alpha_e=numpy.std(alpha_e)\n", "print(\"mean(alpha_e)\",mean_alpha_e)\n", "print(\"std(alpha_e)\",std_alpha_e)\n", "plotter.figure()\n", "n, bins, patches = plotter.hist(alpha_e, bins=100, range=(-1,1), facecolor='green')\n", "plotter.xlabel(\"alpha\")\n", "plotter.ylabel(\"count\")\n", "plotter.show()\n", "print(\"h0_L2_hist\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "kappa=1E-10 (small)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "KAP=1E-10 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "dalpha=[]\n", "print(\"about to iterate\", flush=True)\n", "for n in range(N_ITER):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " dalpha.append(myEM.dalpha)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Show that alpha does not converge" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "plotter.figure()\n", "plotter.plot(dalpha)\n", "plotter.xlabel(\"iteration\")\n", "plotter.ylabel(\"dalpha/alpha\")\n", "plotter.show()\n", "print(\"nonconvergencefor\"+makefname(IMEI,KAP))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=False #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(\"max(alpha_e_uncombined):\",numpy.max(alpha_e_uncombined))\n", "print(\"min(alpha_e_uncombined):\",numpy.min(alpha_e_uncombined))\n", "mean_alpha_e_uncombined=numpy.mean(alpha_e_uncombined)\n", "std_alpha_e_uncombined=numpy.std(alpha_e_uncombined)\n", "print(\"mean(alpha_e_uncombined)\",mean_alpha_e_uncombined)\n", "print(\"std(alpha_e_uncombined)\",std_alpha_e_uncombined)\n", "plotter.figure()\n", "n, bins, patches = plotter.hist(alpha_e_uncombined, bins=100, range=(-1,1), facecolor='green')\n", "plotter.xlabel(\"alpha\")\n", "plotter.ylabel(\"count\")\n", "plotter.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "kappa=1E-5 (large)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "KAP=1E-5 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "print(\"about to iterate\", flush=True)\n", "N_iter=20\n", "for n in range(N_iter):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=False #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "kappa=1.5E-7 (mid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "KAP=1.5E-7 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "print(\"about to iterate\", flush=True)\n", "N_iter=20\n", "for n in range(N_iter):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=False #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=True #combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))\n", "intervals=myEM.intervals\n", "picklename=\"IMEI_\"+str(IMEI)+\"_intervals.p\"\n", "pickle.dump( intervals, open( picklename, \"wb\" ) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(myEM.intervals)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "harvester 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "IMEI=gd.IMEISet(1)\n", "raw_data=gd.get(IMEI,DATE)\n", "data=raw_data#[0:500]\n", "TVALS=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n", "SHIFTS=TVALS\n", "N_ITER=30\n", "\n", "print(\"making feature list\",flush=True)\n", "FEATURES=[]\n", "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n", " FEATURES.append(Box(height=HEIGHT,width=w,shift=s)) \n", "print(\"there are \",len(FEATURES), \"features\", flush=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "KAP=1.5E-7 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "print(\"about to iterate\", flush=True)\n", "N_iter=20\n", "for n in range(N_iter):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=True #combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "harvester 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "IMEI=gd.IMEISet(2)\n", "raw_data=gd.get(IMEI,DATE)\n", "data=raw_data#[0:500]\n", "TVALS=numpy.array([line[gd.data_idx[\"locationTimestamp\"]] for line in data])\n", "SHIFTS=TVALS\n", "N_ITER=30\n", "\n", "print(\"making feature list\",flush=True)\n", "FEATURES=[]\n", "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n", " FEATURES.append(Box(height=HEIGHT,width=w,shift=s)) \n", "print(\"there are \",len(FEATURES), \"features\", flush=True)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "KAP=1.5E-7 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "print(\"about to iterate\", flush=True)\n", "N_iter=20\n", "for n in range(N_iter):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=True #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(myEM.intervals)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "harvester 3 (CURRENT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "IMEI=gd.IMEISet(3)\n", "raw_data=gd.get(IMEI,DATE)\n", "data=raw_data#[0:500]\n", "TVALS=numpy.array(data[\"locationTimestamp\"])\n", "SHIFTS=TVALS\n", "N_ITER=30\n", "\n", "print(\"making feature list\",flush=True)\n", "FEATURES=[]\n", "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n", " FEATURES.append(Box(height=HEIGHT,width=w,shift=s)) \n", "print(\"there are \",len(FEATURES), \"features\", flush=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "KAP=1.5E-7 #for box\n", "myEM=L0_EM(data,FEATURES,KAP)\n", "\n", "print(\"L0_EM created\", flush=True)\n", "alpha_e=myEM.initialize()\n", "print(\"about to iterate\", flush=True)\n", "N_iter=20\n", "for n in range(N_iter):\n", " print(\"n=\",n,flush=True)\n", " alpha_e=myEM.iterate(alpha_e)\n", " myEM.findfeatures()\n", " if (myEM.stopFlag):\n", " break\n", " \n", "print(\"done\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=False #don't combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "DELTA=0.01 #don't threshold\n", "COMBINEFLAG=True #combine features\n", "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n", "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n", "makeplot(myEM,alpha_e_uncombined)\n", "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(myEM.intervals)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }