Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
CSNMF
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
TrafficPatterns
CSNMF
Commits
b26dfba9
Commit
b26dfba9
authored
4 years ago
by
Vaibhav Karve
Browse files
Options
Downloads
Patches
Plain Diff
add EntriesD.ipynb and LinkSizes.ipynb
parent
a01a7143
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
+28
-7
28 additions, 7 deletions
ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
ReadData/EntriesD.ipynb
+186
-0
186 additions, 0 deletions
ReadData/EntriesD.ipynb
ReadData/LinkSizes.ipynb
+204
-0
204 additions, 0 deletions
ReadData/LinkSizes.ipynb
with
418 additions
and
7 deletions
ReadData/.ipynb_checkpoints/ReadData-checkpoint.ipynb
+
28
−
7
View file @
b26dfba9
...
...
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count":
null
,
"execution_count":
1
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -23,9 +23,18 @@
},
{
"cell_type": "code",
"execution_count":
null
,
"execution_count":
2
,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"link_id,begin_node_id,end_node_id,begin_angle,end_angle,street_length,osm_name,osm_class,osm_way_id,startX,startY,endX,endY,osm_changeset,birth_timestamp,death_timestamp\n",
"\n"
]
}
],
"source": [
"## Create a dictionary of links with entries as:\n",
"## (begin_node_id, end_node_id): links_id\n",
...
...
@@ -41,9 +50,21 @@
},
{
"cell_type": "code",
"execution_count":
null
,
"execution_count":
4
,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-ddb8c86be344>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../MultiplicativeAlgorithm/D_2011.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mwritefile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mwritefile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'L,T,traveltime,trips\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../DataFiles/travel_times_2011.csv'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mrawfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrawfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2011-01-01 00:00:00'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %X'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'"
]
}
],
"source": [
"print('This message is a fail-safe. Comment out this line only if you know what you are doing.'); print(failsafe)\n",
"\n",
...
...
@@ -115,7 +136,7 @@
"source": [
"## Read from full_link_ids.txt\n",
"\n",
"with open('../Multiplicative
Algorithm/full_link_ids.txt', 'r') as readfile:\n",
"with open('../MultiplicativeAlgorithm/full_link_ids.txt', 'r') as readfile:\n",
" full_links = [int(line.strip()) for line in readfile]\n",
" print(len(full_links))"
]
...
...
@@ -199,7 +220,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.
6.6
"
"version": "3.
8.3
"
}
},
"nbformat": 4,
...
...
%% Cell type:markdown id: tags:
# This notebook reads Taxisim datafiles generated by Dan Work and Brian Donovan, cleans them up, and preps them for NMF.
## Warning: Do not run this notebook unless absolutely necessary. It takes a long time to run!
This notebook has been included here mostly for the sake of completion, and not for actual execution.
%% Cell type:code id: tags:
```
python
import
numpy
as
np
import
datetime
as
dt
import
matplotlib.pyplot
as
plt
```
%% Cell type:code id: tags:
```
python
## Create a dictionary of links with entries as:
## (begin_node_id, end_node_id): links_id
with
open
(
'
../DataFiles/links.csv
'
)
as
linkfile
:
print
(
linkfile
.
readline
())
link_dict
=
{}
for
counter
,
line
in
enumerate
(
linkfile
):
link_id
,
begin_node
,
end_node
=
line
[:
-
1
].
split
(
'
,
'
)[
0
:
3
]
link_dict
[(
begin_node
,
end_node
)]
=
link_id
link_dict
[(
'
0
'
,
'
0
'
)]
=
'
0
'
```
%% Output
link_id,begin_node_id,end_node_id,begin_angle,end_angle,street_length,osm_name,osm_class,osm_way_id,startX,startY,endX,endY,osm_changeset,birth_timestamp,death_timestamp
%% Cell type:code id: tags:
```
python
print
(
'
This message is a fail-safe. Comment out this line only if you know what you are doing.
'
);
print
(
failsafe
)
## Create a file called `D_2011.csv` that has as a row:
## L, T, traveltime, trips
with
open
(
'
../MultiplicativeAlgorithm/D_2011.csv
'
,
'
w
'
)
as
writefile
:
writefile
.
write
(
'
L,T,traveltime,trips
\n
'
)
with
open
(
'
../DataFiles/travel_times_2011.csv
'
)
as
rawfile
:
print
(
rawfile
.
readline
())
start_time
=
dt
.
datetime
.
strptime
(
'
2011-01-01 00:00:00
'
,
'
%Y-%m-%d %X
'
)
for
counter
,
line
in
enumerate
(
rawfile
):
line
=
line
[:
-
1
]
begin_node_id
,
end_node_id
,
datetime
,
traveltime
,
trips
=
line
.
split
(
'
,
'
)
L
=
link_dict
[(
begin_node_id
,
end_node_id
)]
datetime
=
dt
.
datetime
.
strptime
(
datetime
,
'
%Y-%m-%d %X
'
)
T
=
str
(
int
((
datetime
-
start_time
).
total_seconds
()
/
3600
))
writefile
.
write
(
'
,
'
.
join
([
L
,
T
,
traveltime
,
trips
])
+
'
\n
'
)
if
counter
%
1000000
==
0
:
print
(
counter
)
if
counter
>
20
:
break
```
%% Output
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-4-ddb8c86be344> in <module>
6 with open('../MultiplicativeAlgorithm/D_2011.csv', 'w') as writefile:
7 writefile.write('L,T,traveltime,trips\n')
----> 8 with open('../DataFiles/travel_times_2011.csv') as rawfile:
9 print(rawfile.readline())
10 start_time = dt.datetime.strptime('2011-01-01 00:00:00', '%Y-%m-%d %X')
FileNotFoundError: [Errno 2] No such file or directory: '../DataFiles/travel_times_2011.csv'
%% Cell type:code id: tags:
```
python
## Count the number of data-points throughout the year for each link.
# This helps with separating full_links
print
(
'
This message is a fail-safe. Comment out this line only if you know what you are doing.
'
);
print
(
failsafe
)
link_data_count
=
{}
with
open
(
'
../MultiplicativeAlgorithm/D_2011.csv
'
,
'
r
'
)
as
readfile
:
readfile
.
readline
()
for
counter
,
line
in
enumerate
(
readfile
):
L
,
T
,
traveltimes
,
trips
=
line
[:
-
1
].
split
(
'
,
'
)
if
L
not
in
link_data_count
.
keys
():
link_data_count
[
L
]
=
0
else
:
link_data_count
[
L
]
+=
1
```
%% Cell type:code id: tags:
```
python
## Write to full_link_ids.txt
print
(
'
This message is a fail-safe. Comment out this line only if you know what you are doing.
'
);
print
(
failsafe
)
with
open
(
'
../MultiplicativeAlgorithm/full_link_ids.txt
'
,
'
w
'
)
as
writefile
:
full_links
=
sorted
([
int
(
link
)
for
link
in
link_data_count
if
link_data_count
[
link
]
>=
8760
-
721
])
full_links
=
full_links
[
1
:]
print
(
len
(
full_links
))
full_links
=
map
(
str
,
full_links
)
writefile
.
write
(
'
\n
'
.
join
(
full_links
))
```
%% Cell type:code id: tags:
```
python
## Read from full_link_ids.txt
with
open
(
'
../Multiplicative
Algorithm/full_link_ids.txt
'
,
'
r
'
)
as
readfile
:
with
open
(
'
../MultiplicativeAlgorithm/full_link_ids.txt
'
,
'
r
'
)
as
readfile
:
full_links
=
[
int
(
line
.
strip
())
for
line
in
readfile
]
print
(
len
(
full_links
))
```
%% Cell type:code id: tags:
```
python
## Separate traffic data for full_links from the big data file.
print
(
'
This message is a fail-safe. Comment out this line only if you know what you are doing.
'
);
print
(
failsafe
)
full_links_data
=
[]
progress
=
8761
with
open
(
'
../MultiplicativeAlgorithm/D_2011.csv
'
,
'
r
'
)
as
readfile
:
header
=
readfile
.
readline
()
for
counter
,
line
in
enumerate
(
readfile
):
line
=
line
[:
-
1
]
L
,
T
,
traveltimes
,
trips
=
line
.
split
(
'
,
'
)
if
L
in
full_links
:
full_links_data
.
append
(
line
)
if
int
(
T
)
<
progress
:
progress
=
int
(
T
)
print
(
progress
)
with
open
(
'
../MultiplicativeAlgorithm/D_2011_full_links.csv
'
,
'
w
'
)
as
writefile
:
writefile
.
write
(
header
)
writefile
.
write
(
'
\n
'
.
join
(
full_links_data
))
```
%% Cell type:code id: tags:
```
python
## Write data to D_trips.txt and D_traveltimes.txt
print
(
'
This message is a fail-safe. Comment out this line only if you know what you are doing.
'
);
print
(
failsafe
)
D_trips
=
np
.
zeros
((
8760
,
2302
))
D_traveltimes
=
np
.
zeros
((
8760
,
2302
))
full_links
=
sorted
([
int
(
link
)
for
link
in
link_data_count
if
link_data_count
[
link
]
>=
8760
-
721
])
full_links
=
full_links
[
1
:]
progress
=
8761
with
open
(
'
../MultiplicativeAlgorithm/D_2011_full_links.csv
'
,
'
r
'
)
as
readfile
:
header
=
readfile
.
readline
()
for
line
in
readfile
:
line
=
line
[:
-
1
]
L
,
T
,
traveltimes
,
trips
=
line
.
split
(
'
,
'
)
L
,
T
,
traveltimes
,
trips
=
full_links
.
index
(
int
(
L
)),
int
(
T
),
float
(
traveltimes
),
int
(
trips
)
D_trips
[
T
,
L
]
+=
trips
D_traveltimes
[
T
,
L
]
+=
traveltimes
if
T
<
progress
:
progress
=
T
print
(
progress
)
D_trips
,
D_traveltimes
=
D_trips
.
astype
(
'
float
'
),
D_traveltimes
.
astype
(
'
float
'
)
D_trips
[
D_trips
==
0
]
=
np
.
nan
D_traveltimes
[
D_traveltimes
==
0
]
=
np
.
nan
np
.
savetxt
(
'
../MultiplicativeAlgorithm/D_trips.txt
'
,
D_trips
)
np
.
savetxt
(
'
../MultiplicativeAlgorithm/D_traveltimes.txt
'
,
D_traveltimes
)
```
...
...
This diff is collapsed.
Click to expand it.
ReadData/EntriesD.ipynb
0 → 100644
+
186
−
0
View file @
b26dfba9
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Entries in $D$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook, we look at how many numerical entries there are in the $D$ matrix."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"full_link_ids.txt links.csv README.md\r\n"
]
}
],
"source": [
"ls ../DataFiles/"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(8760, 2302)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"D = np.loadtxt('../MultiplicativeAlgorithm/D_trips.txt')\n",
"D.shape"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"97.25287024584539"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"non_nan = np.count_nonzero(~np.isnan(D.flatten()))\n",
"total = D.size\n",
"non_nan/total*100"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20165520"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"D2 = D.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"x = D2[98]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.isnan(x)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3548011030.0"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.nansum(D2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:markdown id: tags:
# Entries in $D$
%% Cell type:markdown id: tags:
In this notebook, we look at how many numerical entries there are in the $D$ matrix.
%% Cell type:code id: tags:
```
python
import
numpy
as
np
```
%% Cell type:code id: tags:
```
python
ls
..
/
DataFiles
/
```
%% Output
full_link_ids.txt links.csv README.md
%% Cell type:code id: tags:
```
python
D
=
np
.
loadtxt
(
'
../MultiplicativeAlgorithm/D_trips.txt
'
)
D
.
shape
```
%% Output
(8760, 2302)
%% Cell type:code id: tags:
```
python
non_nan
=
np
.
count_nonzero
(
~
np
.
isnan
(
D
.
flatten
()))
total
=
D
.
size
non_nan
/
total
*
100
```
%% Output
97.25287024584539
%% Cell type:code id: tags:
```
python
total
```
%% Output
20165520
%% Cell type:code id: tags:
```
python
D2
=
D
.
flatten
()
```
%% Cell type:code id: tags:
```
python
x
=
D2
[
98
]
```
%% Cell type:code id: tags:
```
python
np
.
isnan
(
x
)
```
%% Output
True
%% Cell type:code id: tags:
```
python
np
.
nansum
(
D2
)
```
%% Output
3548011030.0
This diff is collapsed.
Click to expand it.
ReadData/LinkSizes.ipynb
0 → 100644
+
204
−
0
View file @
b26dfba9
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Link sizes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## This notebook computes the average sizes of all links in our dataset."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We read *links.csv*. This file contains data for all links (including links with missing entries)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import statistics"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: 'link_id',\n",
" 1: 'begin_node_id',\n",
" 2: 'end_node_id',\n",
" 3: 'begin_angle',\n",
" 4: 'end_angle',\n",
" 5: 'street_length',\n",
" 6: 'osm_name',\n",
" 7: 'osm_class',\n",
" 8: 'osm_way_id',\n",
" 9: 'startX',\n",
" 10: 'startY',\n",
" 11: 'endX',\n",
" 12: 'endY',\n",
" 13: 'osm_changeset',\n",
" 14: 'birth_timestamp',\n",
" 15: 'death_timestamp'}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('../DataFiles/links.csv', 'r') as csvfile:\n",
" reader = csv.reader(csvfile)\n",
" header, *data = list(reader)\n",
"\n",
"dict(enumerate(header))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We are interested in entry #5 i.e. `street_length`"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean = 132.98258543251998\n",
"Median = 95.569\n",
"Mode = 79.259\n",
"Std.Dev = 107.84111322501701\n",
"Minimum = 2.806\n",
"Maximum = 3937.115\n",
"Total = 260855\n"
]
}
],
"source": [
"street_lengths = [row[5] for row in data]\n",
"street_lengths = list(map(float, street_lengths)) # str -> float\n",
"print('Mean =', statistics.mean(street_lengths))\n",
"print('Median =', statistics.median(street_lengths))\n",
"print('Mode =', statistics.mode(street_lengths))\n",
"print('Std.Dev =', statistics.stdev(street_lengths))\n",
"print('Minimum =', min(street_lengths))\n",
"print('Maximum =', max(street_lengths))\n",
"print('Total =', len(street_lengths))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We now read data for only the 2302 links we have chosen for our analysis."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"with open('../DataFiles/full_link_ids.txt', 'r') as txtfile:\n",
" link_ids = [line.strip() for line in txtfile.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"full_links = [row for row in data if row[0] in link_ids]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean = 106.40565682015638\n",
"Median = 79.981\n",
"Mode = 78.523\n",
"Std.Dev = 133.08834141783507\n",
"Minimum = 40.005\n",
"Maximum = 2676.248\n",
"Total = 2302\n"
]
}
],
"source": [
"street_lengths = [row[5] for row in full_links]\n",
"street_lengths = list(map(float, street_lengths)) # str -> float\n",
"print('Mean =', statistics.mean(street_lengths))\n",
"print('Median =', statistics.median(street_lengths))\n",
"print('Mode =', statistics.mode(street_lengths))\n",
"print('Std.Dev =', statistics.stdev(street_lengths))\n",
"print('Minimum =', min(street_lengths))\n",
"print('Maximum =', max(street_lengths))\n",
"print('Total =', len(street_lengths))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:markdown id: tags:
# Link sizes
%% Cell type:markdown id: tags:
## This notebook computes the average sizes of all links in our dataset.
%% Cell type:markdown id: tags:
We read
*links.csv*
. This file contains data for all links (including links with missing entries).
%% Cell type:code id: tags:
```
python
import
csv
import
statistics
```
%% Cell type:code id: tags:
```
python
with
open
(
'
../DataFiles/links.csv
'
,
'
r
'
)
as
csvfile
:
reader
=
csv
.
reader
(
csvfile
)
header
,
*
data
=
list
(
reader
)
dict
(
enumerate
(
header
))
```
%% Output
{0: 'link_id',
1: 'begin_node_id',
2: 'end_node_id',
3: 'begin_angle',
4: 'end_angle',
5: 'street_length',
6: 'osm_name',
7: 'osm_class',
8: 'osm_way_id',
9: 'startX',
10: 'startY',
11: 'endX',
12: 'endY',
13: 'osm_changeset',
14: 'birth_timestamp',
15: 'death_timestamp'}
%% Cell type:markdown id: tags:
---
%% Cell type:markdown id: tags:
We are interested in entry #5 i.e.
`street_length`
%% Cell type:code id: tags:
```
python
street_lengths
=
[
row
[
5
]
for
row
in
data
]
street_lengths
=
list
(
map
(
float
,
street_lengths
))
# str -> float
print
(
'
Mean =
'
,
statistics
.
mean
(
street_lengths
))
print
(
'
Median =
'
,
statistics
.
median
(
street_lengths
))
print
(
'
Mode =
'
,
statistics
.
mode
(
street_lengths
))
print
(
'
Std.Dev =
'
,
statistics
.
stdev
(
street_lengths
))
print
(
'
Minimum =
'
,
min
(
street_lengths
))
print
(
'
Maximum =
'
,
max
(
street_lengths
))
print
(
'
Total =
'
,
len
(
street_lengths
))
```
%% Output
Mean = 132.98258543251998
Median = 95.569
Mode = 79.259
Std.Dev = 107.84111322501701
Minimum = 2.806
Maximum = 3937.115
Total = 260855
%% Cell type:markdown id: tags:
---
%% Cell type:markdown id: tags:
We now read data for only the 2302 links we have chosen for our analysis.
%% Cell type:code id: tags:
```
python
with
open
(
'
../DataFiles/full_link_ids.txt
'
,
'
r
'
)
as
txtfile
:
link_ids
=
[
line
.
strip
()
for
line
in
txtfile
.
readlines
()]
```
%% Cell type:code id: tags:
```
python
full_links
=
[
row
for
row
in
data
if
row
[
0
]
in
link_ids
]
```
%% Cell type:code id: tags:
```
python
street_lengths
=
[
row
[
5
]
for
row
in
full_links
]
street_lengths
=
list
(
map
(
float
,
street_lengths
))
# str -> float
print
(
'
Mean =
'
,
statistics
.
mean
(
street_lengths
))
print
(
'
Median =
'
,
statistics
.
median
(
street_lengths
))
print
(
'
Mode =
'
,
statistics
.
mode
(
street_lengths
))
print
(
'
Std.Dev =
'
,
statistics
.
stdev
(
street_lengths
))
print
(
'
Minimum =
'
,
min
(
street_lengths
))
print
(
'
Maximum =
'
,
max
(
street_lengths
))
print
(
'
Total =
'
,
len
(
street_lengths
))
```
%% Output
Mean = 106.40565682015638
Median = 79.981
Mode = 78.523
Std.Dev = 133.08834141783507
Minimum = 40.005
Maximum = 2676.248
Total = 2302
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment