From b26dfba99dfd1e96602a03ea765e0c6692278175 Mon Sep 17 00:00:00 2001
From: Vaibhav Karve <vaibhavskarve@gmail.com>
Date: Sun, 26 Jul 2020 11:42:00 -0500
Subject: [PATCH] add EntriesD.ipynb and LinkSizes.ipynb

---
 .../ReadData-checkpoint.ipynb                 |  35 ++-
 ReadData/EntriesD.ipynb                       | 186 ++++++++++++++++
 ReadData/LinkSizes.ipynb                      | 204 ++++++++++++++++++
 3 files changed, 418 insertions(+), 7 deletions(-)
 create mode 100644 ReadData/EntriesD.ipynb
 create mode 100644 ReadData/LinkSizes.ipynb

diff --git a/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb b/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
index d3f1f59..1a31605 100644
--- a/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
+++ b/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,9 +23,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "link_id,begin_node_id,end_node_id,begin_angle,end_angle,street_length,osm_name,osm_class,osm_way_id,startX,startY,endX,endY,osm_changeset,birth_timestamp,death_timestamp\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "## Create a dictionary of links with entries as:\n",
     "##    (begin_node_id, end_node_id): links_id\n",
@@ -41,9 +50,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-4-ddb8c86be344>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../MultiplicativeAlgorithm/D_2011.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mwritefile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0mwritefile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'L,T,traveltime,trips\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m     \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../DataFiles/travel_times_2011.csv'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mrawfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrawfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m         \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2011-01-01 00:00:00'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %X'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'"
+     ]
+    }
+   ],
    "source": [
     "print('This message is a fail-safe. Comment out this line only if you know what you are doing.'); print(failsafe)\n",
     "\n",
@@ -115,7 +136,7 @@
    "source": [
     "## Read from full_link_ids.txt\n",
     "\n",
-    "with open('../Multiplicative Algorithm/full_link_ids.txt', 'r') as readfile:\n",
+    "with open('../MultiplicativeAlgorithm/full_link_ids.txt', 'r') as readfile:\n",
     "    full_links = [int(line.strip()) for line in readfile]\n",
     "    print(len(full_links))"
    ]
@@ -199,7 +220,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,
diff --git a/ReadData/EntriesD.ipynb b/ReadData/EntriesD.ipynb
new file mode 100644
index 0000000..842e0de
--- /dev/null
+++ b/ReadData/EntriesD.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Entries in $D$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, we look at how many numerical entries there are in the $D$ matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "full_link_ids.txt  links.csv  README.md\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "ls ../DataFiles/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(8760, 2302)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "D = np.loadtxt('../MultiplicativeAlgorithm/D_trips.txt')\n",
+    "D.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "97.25287024584539"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "non_nan = np.count_nonzero(~np.isnan(D.flatten()))\n",
+    "total = D.size\n",
+    "non_nan/total*100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "20165520"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "D2 = D.flatten()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = D2[98]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.isnan(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3548011030.0"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.nansum(D2)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/ReadData/LinkSizes.ipynb b/ReadData/LinkSizes.ipynb
new file mode 100644
index 0000000..2979a55
--- /dev/null
+++ b/ReadData/LinkSizes.ipynb
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Link sizes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## This notebook computes the average sizes of all links in our dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We read *links.csv*.  This file contains data for all links (including links with missing entries)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "import statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{0: 'link_id',\n",
+       " 1: 'begin_node_id',\n",
+       " 2: 'end_node_id',\n",
+       " 3: 'begin_angle',\n",
+       " 4: 'end_angle',\n",
+       " 5: 'street_length',\n",
+       " 6: 'osm_name',\n",
+       " 7: 'osm_class',\n",
+       " 8: 'osm_way_id',\n",
+       " 9: 'startX',\n",
+       " 10: 'startY',\n",
+       " 11: 'endX',\n",
+       " 12: 'endY',\n",
+       " 13: 'osm_changeset',\n",
+       " 14: 'birth_timestamp',\n",
+       " 15: 'death_timestamp'}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open('../DataFiles/links.csv', 'r') as csvfile:\n",
+    "    reader = csv.reader(csvfile)\n",
+    "    header, *data = list(reader)\n",
+    "\n",
+    "dict(enumerate(header))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are interested in entry #5 i.e. `street_length`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean    = 132.98258543251998\n",
+      "Median  = 95.569\n",
+      "Mode    = 79.259\n",
+      "Std.Dev = 107.84111322501701\n",
+      "Minimum = 2.806\n",
+      "Maximum = 3937.115\n",
+      "Total   = 260855\n"
+     ]
+    }
+   ],
+   "source": [
+    "street_lengths = [row[5] for row in data]\n",
+    "street_lengths = list(map(float, street_lengths))  # str -> float\n",
+    "print('Mean    =', statistics.mean(street_lengths))\n",
+    "print('Median  =', statistics.median(street_lengths))\n",
+    "print('Mode    =', statistics.mode(street_lengths))\n",
+    "print('Std.Dev =', statistics.stdev(street_lengths))\n",
+    "print('Minimum =', min(street_lengths))\n",
+    "print('Maximum =', max(street_lengths))\n",
+    "print('Total   =', len(street_lengths))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We now read data for only the 2302 links we have chosen for our analysis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../DataFiles/full_link_ids.txt', 'r') as txtfile:\n",
+    "    link_ids = [line.strip() for line in txtfile.readlines()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "full_links = [row for row in data if row[0] in link_ids]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean    = 106.40565682015638\n",
+      "Median  = 79.981\n",
+      "Mode    = 78.523\n",
+      "Std.Dev = 133.08834141783507\n",
+      "Minimum = 40.005\n",
+      "Maximum = 2676.248\n",
+      "Total   = 2302\n"
+     ]
+    }
+   ],
+   "source": [
+    "street_lengths = [row[5] for row in full_links]\n",
+    "street_lengths = list(map(float, street_lengths))  # str -> float\n",
+    "print('Mean    =', statistics.mean(street_lengths))\n",
+    "print('Median  =', statistics.median(street_lengths))\n",
+    "print('Mode    =', statistics.mode(street_lengths))\n",
+    "print('Std.Dev =', statistics.stdev(street_lengths))\n",
+    "print('Minimum =', min(street_lengths))\n",
+    "print('Maximum =', max(street_lengths))\n",
+    "print('Total   =', len(street_lengths))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab