From b26dfba99dfd1e96602a03ea765e0c6692278175 Mon Sep 17 00:00:00 2001 From: Vaibhav Karve <vaibhavskarve@gmail.com> Date: Sun, 26 Jul 2020 11:42:00 -0500 Subject: [PATCH] add EntriesD.ipynb and LinkSizes.ipynb --- .../ReadData-checkpoint.ipynb | 35 ++- ReadData/EntriesD.ipynb | 186 ++++++++++++++++ ReadData/LinkSizes.ipynb | 204 ++++++++++++++++++ 3 files changed, 418 insertions(+), 7 deletions(-) create mode 100644 ReadData/EntriesD.ipynb create mode 100644 ReadData/LinkSizes.ipynb diff --git a/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb b/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb index d3f1f59..1a31605 100644 --- a/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb +++ b/ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -23,9 +23,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "link_id,begin_node_id,end_node_id,begin_angle,end_angle,street_length,osm_name,osm_class,osm_way_id,startX,startY,endX,endY,osm_changeset,birth_timestamp,death_timestamp\n", + "\n" + ] + } + ], "source": [ "## Create a dictionary of links with entries as:\n", "## (begin_node_id, end_node_id): links_id\n", @@ -41,9 +50,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-ddb8c86be344>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../MultiplicativeAlgorithm/D_2011.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mwritefile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mwritefile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'L,T,traveltime,trips\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../DataFiles/travel_times_2011.csv'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mrawfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrawfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2011-01-01 00:00:00'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %X'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'" + ] + } + ], "source": [ "print('This message is a fail-safe. Comment out this line only if you know what you are doing.'); print(failsafe)\n", "\n", @@ -115,7 +136,7 @@ "source": [ "## Read from full_link_ids.txt\n", "\n", - "with open('../Multiplicative Algorithm/full_link_ids.txt', 'r') as readfile:\n", + "with open('../MultiplicativeAlgorithm/full_link_ids.txt', 'r') as readfile:\n", " full_links = [int(line.strip()) for line in readfile]\n", " print(len(full_links))" ] @@ -199,7 +220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.8.3" } }, "nbformat": 4, diff --git a/ReadData/EntriesD.ipynb b/ReadData/EntriesD.ipynb new file mode 100644 index 0000000..842e0de --- /dev/null +++ b/ReadData/EntriesD.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Entries in $D$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we look at how many numerical entries there are in the $D$ matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "full_link_ids.txt links.csv README.md\r\n" + ] + } + ], + "source": [ + "ls ../DataFiles/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8760, 2302)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "D = np.loadtxt('../MultiplicativeAlgorithm/D_trips.txt')\n", + "D.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "97.25287024584539" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_nan = np.count_nonzero(~np.isnan(D.flatten()))\n", + "total = D.size\n", + "non_nan/total*100" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "20165520" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "D2 = D.flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "x = D2[98]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.isnan(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3548011030.0" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.nansum(D2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/ReadData/LinkSizes.ipynb b/ReadData/LinkSizes.ipynb new file mode 100644 index 0000000..2979a55 --- /dev/null +++ b/ReadData/LinkSizes.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Link sizes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## This notebook computes the average sizes of all links in our dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We read *links.csv*. This file contains data for all links (including links with missing entries)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "import statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: 'link_id',\n", + " 1: 'begin_node_id',\n", + " 2: 'end_node_id',\n", + " 3: 'begin_angle',\n", + " 4: 'end_angle',\n", + " 5: 'street_length',\n", + " 6: 'osm_name',\n", + " 7: 'osm_class',\n", + " 8: 'osm_way_id',\n", + " 9: 'startX',\n", + " 10: 'startY',\n", + " 11: 'endX',\n", + " 12: 'endY',\n", + " 13: 'osm_changeset',\n", + " 14: 'birth_timestamp',\n", + " 15: 'death_timestamp'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('../DataFiles/links.csv', 'r') as csvfile:\n", + " reader = csv.reader(csvfile)\n", + " header, *data = list(reader)\n", + "\n", + "dict(enumerate(header))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are interested in entry #5 i.e. `street_length`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean = 132.98258543251998\n", + "Median = 95.569\n", + "Mode = 79.259\n", + "Std.Dev = 107.84111322501701\n", + "Minimum = 2.806\n", + "Maximum = 3937.115\n", + "Total = 260855\n" + ] + } + ], + "source": [ + "street_lengths = [row[5] for row in data]\n", + "street_lengths = list(map(float, street_lengths)) # str -> float\n", + "print('Mean =', statistics.mean(street_lengths))\n", + "print('Median =', statistics.median(street_lengths))\n", + "print('Mode =', statistics.mode(street_lengths))\n", + "print('Std.Dev =', statistics.stdev(street_lengths))\n", + "print('Minimum =', min(street_lengths))\n", + "print('Maximum =', max(street_lengths))\n", + "print('Total =', len(street_lengths))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now read data for only the 2302 links we have chosen for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "with open('../DataFiles/full_link_ids.txt', 'r') as txtfile:\n", + " link_ids = [line.strip() for line in txtfile.readlines()]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "full_links = [row for row in data if row[0] in link_ids]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean = 106.40565682015638\n", + "Median = 79.981\n", + "Mode = 78.523\n", + "Std.Dev = 133.08834141783507\n", + "Minimum = 40.005\n", + "Maximum = 2676.248\n", + "Total = 2302\n" + ] + } + ], + "source": [ + "street_lengths = [row[5] for row in full_links]\n", + "street_lengths = list(map(float, street_lengths)) # str -> float\n", + "print('Mean =', statistics.mean(street_lengths))\n", + "print('Median =', statistics.median(street_lengths))\n", + "print('Mode =', statistics.mode(street_lengths))\n", + "print('Std.Dev =', statistics.stdev(street_lengths))\n", + "print('Minimum =', min(street_lengths))\n", + "print('Maximum =', max(street_lengths))\n", + "print('Total =', len(street_lengths))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab