algorithm.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Code for **Task Disambiguation in  Hand-Picked Agriculture**\n",
    "Project Director: Richard Sowers <r-sowers@illinois.edu>\n",
    " \n",
    "Copyright 2018 University of Illinois Board of Trustees. All Rights Reserved.\n",
    "Licensed under the MIT license"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy\n",
    "import pandas\n",
    "import pickle\n",
    "import itertools\n",
    "import datetime\n",
    "#%matplotlib notebook\n",
    "%matplotlib inline\n",
    "import pytz\n",
    "import matplotlib.pyplot as plotter\n",
    "#import matplotlib.mlab as mlab\n",
    "#import statsmodels.api as sm\n",
    "imagesuffix=\".png\"\n",
    "N_finer=10\n",
    "region=pytz.timezone(\"America/Los_Angeles\")\n",
    "fname=\"data.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "getData function\n",
    "data should be in .csv file with columns labelled \"IMEI\",\"Latitude\",\"locationTimestamp\"\n",
    "* locationTimestamp should be seconds since epoch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def ts_to_time(ts):\n",
    "    return region.normalize(region.localize(datetime.datetime.fromtimestamp(ts)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['351554053682895', '353918057262822', '353918059182986', '869578020239930']\n",
      "[Timestamp('2016-02-19 00:00:00'), Timestamp('2016-02-22 00:00:00')]\n"
     ]
    }
   ],
   "source": [
    "#############code to get data\n",
    "class getData:\n",
    "\tdef __init__(self,fname):\n",
    "\t\t#sheetname=\"outdata_with_time\"\n",
    "\t\t#raw_data = pandas.read_excel(\"outdata_with_time.xlsx\", sheetname=\"outdata_with_time\", header=0)\n",
    "\t\tself.data=pandas.read_csv(str(fname))\n",
    "\t\tself.data.columns=[\"IMEI\",\"Latitude\",\"locationTimestamp\"]\n",
    "\t\tself.data = self.data.drop_duplicates()\n",
    "\t\tself.data[\"IMEI\"] = self.data[\"IMEI\"].astype(\"str\")\n",
    "\t\tself.data[\"datetime\"]=pandas.to_datetime(self.data[\"locationTimestamp\"].apply(ts_to_time))\n",
    "\t\t#print(data)\n",
    "\t\tself.IMEISet=sorted(list(frozenset(self.data[\"IMEI\"])))\n",
    "\t\tself.data[\"date\"]=pandas.to_datetime(self.data[\"datetime\"].apply(lambda t:t.date()))\n",
    "\t\tself.data.set_index([\"IMEI\",\"locationTimestamp\",\"datetime\",\"date\"],append=True,drop=True,inplace=True)\n",
    "\t\tself.dateSet=sorted(list(frozenset(self.data.index.get_level_values(\"date\"))))\n",
    "\n",
    "\tdef get(self,IMEI,DATE):\n",
    "\t\tflags=numpy.logical_and(self.data.index.get_level_values(\"date\")==DATE,\n",
    "                                self.data.index.get_level_values(\"IMEI\")==IMEI)\n",
    "\t\treduced_data=self.data.loc[flags]\n",
    "\t\t#temp=numpy.array(reduced_data).reshape([-1,len(outlist)])\n",
    "\t\t#print(\"shape of data: \",temp.shape)\n",
    "\t\treturn reduced_data\n",
    "    \n",
    "    \n",
    "gd=getData(fname)\n",
    "print(sorted(gd.IMEISet))\n",
    "print(gd.dateSet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_imei=gd.IMEISet[0]\n",
    "test_date=gd.dateSet[0]\n",
    "print(test_imei)\n",
    "print(test_date)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Box function is reference excursion shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class Box:\n",
    "    def __init__(self, width=1,height=1,shift=0):\n",
    "        self.width=float(width)\n",
    "        self.height=float(height)\n",
    "        self.shift=float(shift)\n",
    "        if (self.width<0):\n",
    "            raise ValueError('negative width in LeftBox')\n",
    "\n",
    "    def refBox(self,x):\n",
    "        x=float(x)\n",
    "        #width=1,height=1,shift=0\n",
    "        return 1 if 0<=x<=1 else 0\n",
    "\n",
    "    def eval(self, x):\n",
    "        if self.width<=0:\n",
    "            return numpy.inf\n",
    "        return self.height * self.refBox((x-self.shift)/self.width)\n",
    "    \n",
    "    def harvestFlag(self,x):\n",
    "        return 0<(x-self.shift)<self.width\n",
    "    \n",
    "    def __le__(self,other):\n",
    "        if not isinstance(other, Box):\n",
    "            return NotImplemented\n",
    "        #return ((other.shift<=self.shift) and ((self.shift+self.width)<=(other.shift+other.width)))\n",
    "        return (other.shift<=self.shift<=(other.shift+other.width))\n",
    "    \n",
    "    def __ge__(self,other):\n",
    "        return (other<=self)\n",
    "    \n",
    "        \n",
    "    \n",
    "myBox=Box()\n",
    "print(\"B(0)=\",myBox.eval(0))\n",
    "xvals_b=numpy.linspace(-3,3,200)\n",
    "yvals_b=numpy.array([myBox.eval(xx) for xx in xvals_b])\n",
    "flags=numpy.array([myBox.harvestFlag(xx) for xx in xvals_b],dtype='bool')\n",
    "plotter.figure()\n",
    "plotter.plot(xvals_b,yvals_b)\n",
    "plotter.plot(xvals_b[flags],yvals_b[flags],'ro',linestyle='--',linewidth=4)\n",
    "plotter.ylim((-0.5,1.5))\n",
    "plotter.show()\n",
    "print(Box()<=Box(2,1,-5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "code to implement [https://arxiv.org/pdf/1407.7508v1.pdf](https://arxiv.org/pdf/1407.7508v1.pdf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class L0_EM:\n",
    "    def __init__(self,data,feature_info,vkap,tau=0.01):\n",
    "        #data=[[time_1,y_1],[time_2,y_2],....]\n",
    "        self.N_data=len(data)\n",
    "        self.times=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n",
    "        self.y=numpy.matrix(data[\"Latitude\"]).transpose()\n",
    "        \n",
    "        N_finer=10\n",
    "        self.times_finer=numpy.linspace(min(self.times),max(self.times),N_finer*len(self.times))\n",
    "        \n",
    "        self.feature_info=list(feature_info)\n",
    "        features=[[f.eval(t) for t in self.times] for f in self.feature_info]\n",
    "        #features=[[feature_1(time_1),feature_1(time_2)..],[feature_2(time_1)..]..]\n",
    "        self.N_features=len(features)\n",
    "        self.Feat_e_T=numpy.matrix(features+[numpy.ones(self.N_data)])\n",
    "        self.Feat_e=self.Feat_e_T.transpose()\n",
    "        \n",
    "        dt=numpy.diff(self.times)\n",
    "        temp=(dt[1:]+dt[:-1])/2\n",
    "        D=numpy.concatenate(([dt[0]],temp,[dt[-1]]))\n",
    "        self.D=numpy.diag(D)\n",
    "\n",
    "        self.A=self.Feat_e_T.dot(self.D).dot(self.Feat_e)\n",
    "        self.b=self.Feat_e_T.dot(self.D).dot(self.y)\n",
    "    \n",
    "\n",
    "        self.Id=numpy.diag([float(vkap)]*self.N_features+[0])\n",
    "        self.alpha_e=None\n",
    "        self.stopFlag=False;\n",
    "\n",
    "        self.tau=float(tau)\n",
    "        self.feature_alpha_e=None\n",
    "        self.feature_count=None\n",
    "        self.feature_times=[]\n",
    "        self.feature_peaks=[]\n",
    "        self.flags=[]\n",
    "        self.dalpha=None\n",
    "\n",
    "\n",
    "    def initialize(self):\n",
    "        #print(\"rank(A): \",numpy.linalg.matrix_rank(self.A))\n",
    "        #print(\"shape of A: \",self.A.shape)\n",
    "        #self.alpha_e=numpy.linalg.solve(self.A,self.b)\n",
    "        self.alpha_e=numpy.linalg.pinv(self.A).dot(self.b)\n",
    "        #print(\"initial alpha: \",self.alpha_e)\n",
    "        return(self.alpha_e)\n",
    "\n",
    "    def iterate(self,alpha_e=None):\n",
    "\n",
    "        #alpha_e is external, self.alpha_e is class variable\n",
    "        alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n",
    "        temp=numpy.ravel(alpha_e)**2\n",
    "        temp[self.N_features]=1\n",
    "        S=numpy.diag(temp)\n",
    "\n",
    "        new_alpha_e=numpy.linalg.pinv(S.dot(self.A)+self.Id).dot(S.dot(self.b))\n",
    "\n",
    "        denom=numpy.linalg.norm(numpy.ravel(self.alpha_e),1)\n",
    "        num=numpy.linalg.norm(numpy.ravel(new_alpha_e-self.alpha_e),1)\n",
    "        self.dalpha=num/denom\n",
    "        print(\"dalpha/alpha=\",self.dalpha)\n",
    "        self.stopFlag=(num<self.tau*denom)\n",
    "        self.alpha_e=new_alpha_e\n",
    "\n",
    "        self.feature_alpha_e=None\n",
    "        self.feature_count=None\n",
    "        self.feature_times=[]\n",
    "        self.feature_peaks=[]\n",
    "        self.flags=[]\n",
    "        self.intervals=[]\n",
    "        return(self.alpha_e)\n",
    "\n",
    "    def evaluate(self,alpha_e=None):\n",
    "        alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n",
    "        return self.Feat_e.dot(alpha_e)\n",
    "    \n",
    "    def evaluate_finer(self,alpha_e=None):\n",
    "        alpha_e=self.alpha_e if alpha_e is None else alpha_e\n",
    "        alpha_e=numpy.ravel(alpha_e)\n",
    "        constant=alpha_e[self.N_features]\n",
    "        temp=numpy.array([constant]*len(self.times_finer))\n",
    "        for n,f in enumerate(self.feature_info):\n",
    "            temp+=numpy.array([alpha_e[n]*f.eval(t) for t in self.times_finer])\n",
    "        return temp\n",
    "    \n",
    "    \n",
    "    def combine(self,a,b):\n",
    "        return (min(a[0],b[0]),max(a[1],b[1]))\n",
    "            \n",
    "    def findfeatures(self,alpha_e=None,delta=0.01,combineFlag=True):\n",
    "        alpha_e=numpy.matrix(alpha_e,dtype='float').reshape([-1,1]) if alpha_e is not None else self.alpha_e\n",
    "        alpha_e=numpy.ravel(alpha_e)\n",
    "        delta=0 if (delta is False) else float(delta) #feature threshold\n",
    "        self.feature_count=0\n",
    "        self.feature_times=[]\n",
    "        self.feature_peaks=[]\n",
    "        self.flags=[]\n",
    "        self.intervals=[(f.shift,f.shift+f.width) for aa,f in zip(alpha_e,self.feature_info)]\n",
    "            \n",
    "        \n",
    "        #threshold out the small features\n",
    "        alpha_e=numpy.array([aa if abs(aa)>=delta else 0 for aa in alpha_e])\n",
    "        \n",
    "        #combine features\n",
    "        if combineFlag:\n",
    "            for n in range(self.N_features-1,-1,-1):\n",
    "                int_n=self.intervals[n]\n",
    "                for nn in range(n-1,-1,-1):\n",
    "                    int_nn=self.intervals[nn]\n",
    "                    Flag=(alpha_e[n]!=0) and (alpha_e[nn]!=0)\n",
    "                    #Flag = Flag and (numpy.sign(alpha_e[n])==numpy.sign(alpha_e[nn]))\n",
    "                    Flag = Flag and (self.feature_info[nn]>=self.feature_info[n])\n",
    "                    if (Flag):\n",
    "                        alpha_e[nn]+=alpha_e[n]\n",
    "                        alpha_e[n]=0\n",
    "                        self.intervals[nn]=self.combine(int_n,int_nn)\n",
    "        \n",
    "        for aa,f in zip(alpha_e,self.feature_info):\n",
    "            if abs(aa)==0:\n",
    "                continue\n",
    "            tempflags=numpy.array([f.harvestFlag(tt) for tt in self.times],dtype='bool')\n",
    "            self.flags.append(tempflags)\n",
    "            self.feature_times.append(f.shift)\n",
    "            self.feature_peaks.append(f.height*aa+alpha_e[self.N_features])\n",
    "            self.feature_count+=1\n",
    "        self.feature_times=numpy.array(self.feature_times)\n",
    "        self.feature_peaks=numpy.array(self.feature_peaks)\n",
    "        self.intervals=[ival for aa,ival in zip(alpha_e,self.intervals) if abs(aa)!=0]\n",
    "        return alpha_e"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "plot "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def makeplot(thisEM,alpha_e=None,fname=None,startFlag=True):\n",
    "    alpha_e=thisEM.alpha_e if alpha_e is None else alpha_e\n",
    "    image_prefix=\"./images/fig\"\n",
    "    image_suffix=\".png\"\n",
    "    temp=None\n",
    "    if fname is not None:\n",
    "        plotter.ioff()\n",
    "        temp=image_prefix+str(fname)+image_suffix\n",
    "        #print(\"temp: \",temp)\n",
    "        \n",
    "    yvals_finer=thisEM.evaluate_finer(alpha_e)\n",
    "    T_min=min(thisEM.times)\n",
    "    T_max=max(thisEM.times)\n",
    "    y_min=min(numpy.ravel(thisEM.y))\n",
    "  \n",
    "\n",
    "    fig=plotter.figure()\n",
    "    plotter.plot(thisEM.times_finer-T_min,yvals_finer-y_min,'g',linewidth=2)\n",
    "    plotter.plot(thisEM.times-T_min,thisEM.y-y_min,'ro',linestyle='--',linewidth=4)\n",
    "    #plotter.plot(myEM.times-T_min,myEM.evaluate(alpha_e)-y_min,'g',linewidth=2)\n",
    "    if startFlag:\n",
    "        plotter.plot(thisEM.feature_times-T_min,thisEM.feature_peaks-y_min,'bo',ms=10)\n",
    "    dy=numpy.ptp(thisEM.y)\n",
    "    plotter.xlim((0,T_max-T_min))\n",
    "    plotter.ylim((-0.25*dy,1.5*dy))\n",
    "    plotter.xlabel(\"Timestamp\")\n",
    "    plotter.ylabel(\"Latitude\")\n",
    "    for flaglist in thisEM.flags:\n",
    "        pass\n",
    "        #tempt=myEM.times[flaglist]-T_min\n",
    "        #tempy=myEM.y[flaglist]-y_min\n",
    "        #print(len(myEM.times))\n",
    "        #print(len(tempt))\n",
    "        #plotter.plot(tempt,tempy,'ko',linestyle='-',linewidth=4)\n",
    "    if temp is None:\n",
    "        plotter.show(fig)\n",
    "        return None\n",
    "    else:\n",
    "        plotter.savefig(temp)\n",
    "        plotter.close()\n",
    "\n",
    "        return temp\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def makefname(IMEI,varkappa=None,delta=None,combineFlag=None):\n",
    "    strings=[]\n",
    "    strings.append(\"Hequals\"+str(IMEI))\n",
    "    if (varkappa is not None):\n",
    "        strings.append(\"vkapequals\"+str(varkappa))\n",
    "    if (delta is not None):\n",
    "        strings.append(\"deltaequals\"+str(delta))\n",
    "    if (combineFlag is not None):\n",
    "        strings.append(\"combineFlagequals\"+str(combineFlag))\n",
    "    temp=\"_\".join(strings)\n",
    "    return temp.replace(\".\",\"point\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DATE=gd.dateSet[0]\n",
    "HEIGHT=0.0002\n",
    "WIDTHS=[200,300,400,500]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "extract data for harvester and visualize it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "IMEI=gd.IMEISet[0] #should be either 0,1,2, or 3\n",
    "\n",
    "raw_data=gd.get(IMEI,DATE)\n",
    "data=raw_data#[0:500]\n",
    "print(\"size of data: \",len(data))\n",
    "tvals=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n",
    "\n",
    "lats=numpy.array(data[\"Latitude\"])\n",
    "dlats=numpy.ptp(lats)\n",
    "plotter.figure()\n",
    "plotter.plot(tvals-min(tvals),lats-numpy.min(lats),'ro',linestyle='--',linewidth=4)\n",
    "plotter.xlabel(\"Timestamp (seconds)\")\n",
    "plotter.ylabel(\"Latitude\")\n",
    "plotter.ylim(-0.1*dlats,1.1*dlats)\n",
    "plotter.show()\n",
    "print(\"h0_latitude\")\n",
    "#plotter.savefig(\"IMEI_0_lat\"+imagesuffix)\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "short example of Box approximation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "yvals=lats\n",
    "\n",
    "tvals_short=numpy.array(tvals[0:50])\n",
    "tvals_short-=numpy.min(tvals_short)\n",
    "yvals_short=yvals[0:50]\n",
    "print(\"mean of yvals_short: \",numpy.mean(yvals_short))\n",
    "yvals_short_min=min(yvals_short)\n",
    "dy=numpy.ptp(yvals_short)\n",
    "\n",
    "BoxA=Box(height=0.0003,width=200,shift=515)\n",
    "BoxB=Box(height=-.00026,width=400,shift=2550)\n",
    "tvals_short_finer=numpy.linspace(0,numpy.max(tvals_short),len(tvals_short)*N_finer)\n",
    "yvals_short_finer_box=numpy.mean(yvals_short)\n",
    "yvals_short_finer_box+=numpy.array([BoxA.eval(tt) for tt in tvals_short_finer])\n",
    "yvals_short_finer_box+=numpy.array([BoxB.eval(tt) for tt in tvals_short_finer])\n",
    "plotter.figure()\n",
    "plotter.plot(tvals_short,yvals_short-yvals_short_min,'ro',linestyle='--',linewidth=4)\n",
    "plotter.plot(tvals_short_finer,yvals_short_finer_box-yvals_short_min,'g',linewidth=2)\n",
    "plotter.xlabel(\"Timestamp (seconds)\")\n",
    "plotter.ylabel(\"Latitude\")\n",
    "plotter.ylim(-0.1*dy,1.1*dy)\n",
    "plotter.show()\n",
    "print(\"h0_reduced_boxexample\")\n",
    "#plotter.savefig(\"IMEI_0_short_box\"+imagesuffix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "constants"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "TVALS=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n",
    "SHIFTS=TVALS\n",
    "N_ITER=30\n",
    "\n",
    "\n",
    "print(\"making feature list\",flush=True)\n",
    "FEATURES=[]\n",
    "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n",
    "    FEATURES.append(Box(height=HEIGHT,width=w,shift=s))    \n",
    "print(\"there are \",len(FEATURES), \"features\", flush=True)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "run for L2 approximation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "EML2=L0_EM(data,FEATURES,0)\n",
    "\n",
    "alpha_e=EML2.initialize()\n",
    "  \n",
    "alpha_e=EML2.findfeatures(alpha_e=alpha_e,delta=0,combineFlag=False)\n",
    "print(\"mean of alpha_e\",numpy.mean(alpha_e))\n",
    "print(\"stdev of alpha_e\",numpy.std(alpha_e))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "plot L2 approximation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "makeplot(EML2,alpha_e,startFlag=False)#,fname=\"fourthharvester\")\n",
    "print(\"h0_L2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "histogram of alpha_e's for L^2 approximation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"max(alpha_e):\",numpy.max(alpha_e))\n",
    "print(\"min(alpha_e):\",numpy.min(alpha_e))\n",
    "mean_alpha_e=numpy.mean(alpha_e)\n",
    "std_alpha_e=numpy.std(alpha_e)\n",
    "print(\"mean(alpha_e)\",mean_alpha_e)\n",
    "print(\"std(alpha_e)\",std_alpha_e)\n",
    "plotter.figure()\n",
    "n, bins, patches = plotter.hist(alpha_e, bins=100, range=(-1,1), facecolor='green')\n",
    "plotter.xlabel(\"alpha\")\n",
    "plotter.ylabel(\"count\")\n",
    "plotter.show()\n",
    "print(\"h0_L2_hist\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "kappa=1E-10 (small)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "KAP=1E-10 #for box\n",
    "myEM=L0_EM(data,FEATURES,KAP)\n",
    "\n",
    "print(\"L0_EM created\", flush=True)\n",
    "alpha_e=myEM.initialize()\n",
    "dalpha=[]\n",
    "print(\"about to iterate\", flush=True)\n",
    "for n in range(N_ITER):\n",
    "    print(\"n=\",n,flush=True)\n",
    "    alpha_e=myEM.iterate(alpha_e)\n",
    "    dalpha.append(myEM.dalpha)\n",
    "    myEM.findfeatures()\n",
    "    if (myEM.stopFlag):\n",
    "        break\n",
    "        \n",
    "print(\"done\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Show that alpha does not converge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plotter.figure()\n",
    "plotter.plot(dalpha)\n",
    "plotter.xlabel(\"iteration\")\n",
    "plotter.ylabel(\"dalpha/alpha\")\n",
    "plotter.show()\n",
    "print(\"nonconvergencefor\"+makefname(IMEI,KAP))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=False #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"max(alpha_e_uncombined):\",numpy.max(alpha_e_uncombined))\n",
    "print(\"min(alpha_e_uncombined):\",numpy.min(alpha_e_uncombined))\n",
    "mean_alpha_e_uncombined=numpy.mean(alpha_e_uncombined)\n",
    "std_alpha_e_uncombined=numpy.std(alpha_e_uncombined)\n",
    "print(\"mean(alpha_e_uncombined)\",mean_alpha_e_uncombined)\n",
    "print(\"std(alpha_e_uncombined)\",std_alpha_e_uncombined)\n",
    "plotter.figure()\n",
    "n, bins, patches = plotter.hist(alpha_e_uncombined, bins=100, range=(-1,1), facecolor='green')\n",
    "plotter.xlabel(\"alpha\")\n",
    "plotter.ylabel(\"count\")\n",
    "plotter.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "kappa=1E-5 (large)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "KAP=1E-5 #for box\n",
    "myEM=L0_EM(data,FEATURES,KAP)\n",
    "print(\"L0_EM created\", flush=True)\n",
    "alpha_e=myEM.initialize()\n",
    "print(\"about to iterate\", flush=True)\n",
    "N_iter=20\n",
    "for n in range(N_iter):\n",
    "    print(\"n=\",n,flush=True)\n",
    "    alpha_e=myEM.iterate(alpha_e)\n",
    "    myEM.findfeatures()\n",
    "    if (myEM.stopFlag):\n",
    "        break\n",
    "        \n",
    "print(\"done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=False #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "kappa=1.5E-7 (mid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "KAP=1.5E-7 #for box\n",
    "myEM=L0_EM(data,FEATURES,KAP)\n",
    "\n",
    "print(\"L0_EM created\", flush=True)\n",
    "alpha_e=myEM.initialize()\n",
    "print(\"about to iterate\", flush=True)\n",
    "N_iter=20\n",
    "for n in range(N_iter):\n",
    "    print(\"n=\",n,flush=True)\n",
    "    alpha_e=myEM.iterate(alpha_e)\n",
    "    myEM.findfeatures()\n",
    "    if (myEM.stopFlag):\n",
    "        break\n",
    "        \n",
    "print(\"done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=False #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "DELTA=0.01 #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=0.01 #don't threshold\n",
    "COMBINEFLAG=True #combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))\n",
    "intervals=myEM.intervals\n",
    "picklename=\"IMEI_\"+str(IMEI)+\"_intervals.p\"\n",
    "pickle.dump( intervals, open( picklename, \"wb\" ) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(myEM.intervals)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "harvester 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "IMEI=gd.IMEISet(1)\n",
    "raw_data=gd.get(IMEI,DATE)\n",
    "data=raw_data#[0:500]\n",
    "TVALS=numpy.array(data.index.get_level_values(\"locationTimestamp\"))\n",
    "SHIFTS=TVALS\n",
    "N_ITER=30\n",
    "\n",
    "print(\"making feature list\",flush=True)\n",
    "FEATURES=[]\n",
    "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n",
    "    FEATURES.append(Box(height=HEIGHT,width=w,shift=s))    \n",
    "print(\"there are \",len(FEATURES), \"features\", flush=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "KAP=1.5E-7 #for box\n",
    "myEM=L0_EM(data,FEATURES,KAP)\n",
    "\n",
    "print(\"L0_EM created\", flush=True)\n",
    "alpha_e=myEM.initialize()\n",
    "print(\"about to iterate\", flush=True)\n",
    "N_iter=20\n",
    "for n in range(N_iter):\n",
    "    print(\"n=\",n,flush=True)\n",
    "    alpha_e=myEM.iterate(alpha_e)\n",
    "    myEM.findfeatures()\n",
    "    if (myEM.stopFlag):\n",
    "        break\n",
    "        \n",
    "print(\"done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=0.01 #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=0.01 #don't threshold\n",
    "COMBINEFLAG=True #combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "harvester 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "IMEI=gd.IMEISet(2)\n",
    "raw_data=gd.get(IMEI,DATE)\n",
    "data=raw_data#[0:500]\n",
    "TVALS=numpy.array([line[gd.data_idx[\"locationTimestamp\"]] for line in data])\n",
    "SHIFTS=TVALS\n",
    "N_ITER=30\n",
    "\n",
    "print(\"making feature list\",flush=True)\n",
    "FEATURES=[]\n",
    "for s,w in itertools.product(sorted(SHIFTS),sorted(WIDTHS,reverse=True)):\n",
    "    FEATURES.append(Box(height=HEIGHT,width=w,shift=s))    \n",
    "print(\"there are \",len(FEATURES), \"features\", flush=True)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "KAP=1.5E-7 #for box\n",
    "myEM=L0_EM(data,FEATURES,KAP)\n",
    "\n",
    "print(\"L0_EM created\", flush=True)\n",
    "alpha_e=myEM.initialize()\n",
    "print(\"about to iterate\", flush=True)\n",
    "N_iter=20\n",
    "for n in range(N_iter):\n",
    "    print(\"n=\",n,flush=True)\n",
    "    alpha_e=myEM.iterate(alpha_e)\n",
    "    myEM.findfeatures()\n",
    "    if (myEM.stopFlag):\n",
    "        break\n",
    "        \n",
    "print(\"done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DELTA=0.01 #don't threshold\n",
    "COMBINEFLAG=False #don't combine features\n",
    "alpha_e_uncombined=myEM.findfeatures(alpha_e=alpha_e,delta=DELTA,combineFlag=COMBINEFLAG)\n",
    "print(\"there are \",myEM.feature_count,\"features\", flush=True)\n",
    "makeplot(myEM,alpha_e_uncombined)\n",
    "print(makefname(IMEI,KAP,DELTA,COMBINEFLAG))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,