populate_db.py 3.69 KiB
import os
import sys
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'flightmate.settings')
import django
django.setup()
from webapp.models import FlightRecords, RecordSet
def handleCSV(filename):
lineRead = 0
frid = 0
csvFile = open(filename)
lineList = []
finalList = []
for line in csvFile:
if not line:
break
lineRead += 1
if lineRead % 3 != 1:
continue
lineList = line.split(',')
if lineRead == 1:
keyList = lineList.copy()
#print(keyList)
else:
for j in range(len(keyList)):
if j == 0:
dict = {keyList[j]: lineList[j]}
else:
dict.update({keyList[j]: lineList[j]})
finalList.append(dict.copy())
if lineRead % 1000 == 0:
for r in finalList:
add_records(frid, r["Year"], r["Month"], r["DayofMonth"], r["Origin"], r["Dest"], r["DepTime"],
r["CRSDepTime"], r["ArrTime"], r["CRSArrTime"], r["DepDelay"], r["ArrDelay"],
r["UniqueCarrier"], r["FlightNum"])
frid += 1
finalList.clear()
print("inserted 1000 records")
csvFile.close()
def convertTime(time):
s = time.split(".")
hour = ""
minute = ""
if len(s[0]) == 3:
hour = s[0][:1]
minute = s[0][1:]
elif len(s[0]) == 4:
hour = s[0][:2]
minute = s[0][2:]
else:
hour = "00"
minute = s[0]
if len(s) == 1:
return hour + ":" + minute
else:
return hour + ":" + minute + "." + s[1]
def add_records(frid, year, month, day, origin, dest, depart_time_actual, depart_time_scheduled,
arrival_time_actual, arrival_time_scheduled, dep_delay, arr_delay, airline, number):
r = FlightRecords.objects.get_or_create(frid=frid)[0]
r.frid = frid
r.origin = origin
r.dest = dest
r.depart_date = "{0:04d}-{1:02d}-{2:02d}".format(int(year), int(month), int(day))
r.depart_time_actual = convertTime(depart_time_actual)
r.depart_time_scheduled = convertTime(depart_time_scheduled)
r.arrival_time_actual = convertTime(arrival_time_actual)
r.arrival_time_scheduled = convertTime(arrival_time_scheduled)
if dep_delay == "":
dep_delay = "0.0"
r.dep_delay = float(dep_delay)
if arr_delay == "":
arr_delay = "0.0"
r.arr_delay = float(arr_delay)
r.airline = airline
if number == "":
number = "0"
r.number = int(number)
r.save()
return r
def add_set():
attr = ["airline", "dest", "origin"]
for a in attr:
r = RecordSet.objects.get_or_create(name=a)[0]
r.name = a
objs = FlightRecords.objects.order_by(a).values(a).distinct()
posList = []
for o in objs:
posList.append(o[a])
r.value = "|".join(posList)
r.save()
#print(FlightRecords.objects.values('airline').distinct())
""""
def populate(filename):
while True:
recordList = handleCSV(filename)
if len(recordList) == 0:
break
frid = 0
numRecords = 0
for r in recordList:
add_records(frid, r["Year"], r["Month"], r["DayofMonth"], r["Origin"], r["Dest"], r["DepTime"],
r["CRSDepTime"], r["ArrTime"], r["CRSArrTime"], r["DepDelay"], r["ArrDelay"],
r["UniqueCarrier"], r["FlightNum"])
frid += 1
# print(frid)
numRecords += 1
if numRecords % 1000 == 0:
print("inserted 1000 records")
#add_set()
"""
if __name__ == '__main__':
print("Starting populating DB...")
handleCSV(sys.argv[1])