Skip to content
Snippets Groups Projects
populate_db.py 3.69 KiB
import os
import sys
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'flightmate.settings')

import django
django.setup()

from webapp.models import FlightRecords, RecordSet

def handleCSV(filename):
    lineRead = 0
    frid = 0
    csvFile = open(filename)
    lineList = []
    finalList = []
    for line in csvFile:
        if not line:
            break
        lineRead += 1
        if lineRead % 3 != 1:
            continue
        lineList = line.split(',')
        if lineRead == 1:
            keyList = lineList.copy()
            #print(keyList)
        else:
             for j in range(len(keyList)):
                 if j == 0:
                     dict = {keyList[j]: lineList[j]}
                 else:
                     dict.update({keyList[j]: lineList[j]})
             finalList.append(dict.copy())
        if lineRead % 1000 == 0:
            for r in finalList:
                add_records(frid, r["Year"], r["Month"], r["DayofMonth"], r["Origin"], r["Dest"], r["DepTime"],
                            r["CRSDepTime"], r["ArrTime"], r["CRSArrTime"], r["DepDelay"], r["ArrDelay"],
                            r["UniqueCarrier"], r["FlightNum"])
                frid += 1
            finalList.clear()
            print("inserted 1000 records")
    csvFile.close()


def convertTime(time):
    s = time.split(".")
    hour = ""
    minute = ""
    if len(s[0]) == 3:
        hour = s[0][:1]
        minute = s[0][1:]
    elif len(s[0]) == 4:
        hour = s[0][:2]
        minute = s[0][2:]
    else:
        hour = "00"
        minute = s[0]
    if len(s) == 1:
        return hour + ":" + minute
    else:
        return hour + ":" + minute + "." + s[1]

def add_records(frid, year, month, day, origin, dest, depart_time_actual, depart_time_scheduled,
                arrival_time_actual, arrival_time_scheduled, dep_delay, arr_delay, airline, number):
    r = FlightRecords.objects.get_or_create(frid=frid)[0]
    r.frid = frid
    r.origin = origin
    r.dest = dest
    r.depart_date = "{0:04d}-{1:02d}-{2:02d}".format(int(year), int(month), int(day))
    r.depart_time_actual = convertTime(depart_time_actual)
    r.depart_time_scheduled = convertTime(depart_time_scheduled)
    r.arrival_time_actual = convertTime(arrival_time_actual)
    r.arrival_time_scheduled = convertTime(arrival_time_scheduled)
    if dep_delay == "":
        dep_delay = "0.0"
    r.dep_delay = float(dep_delay)
    if arr_delay == "":
        arr_delay = "0.0"
    r.arr_delay = float(arr_delay)
    r.airline = airline
    if number == "":
        number = "0"
    r.number = int(number)
    r.save()
    return r

def add_set():
    attr = ["airline", "dest", "origin"]
    for a in attr:
        r = RecordSet.objects.get_or_create(name=a)[0]
        r.name = a
        objs = FlightRecords.objects.order_by(a).values(a).distinct()
        posList = []
        for o in objs:
            posList.append(o[a])
        r.value = "|".join(posList)
        r.save()
    #print(FlightRecords.objects.values('airline').distinct())

""""
def populate(filename):
    while True:
        recordList = handleCSV(filename)
        if len(recordList) == 0:
            break
        frid = 0
        numRecords = 0
        for r in recordList:
            add_records(frid, r["Year"], r["Month"], r["DayofMonth"], r["Origin"], r["Dest"], r["DepTime"],
                    r["CRSDepTime"], r["ArrTime"], r["CRSArrTime"], r["DepDelay"], r["ArrDelay"],
                    r["UniqueCarrier"], r["FlightNum"])
            frid += 1
            # print(frid)
            numRecords += 1
            if numRecords % 1000 == 0:
                print("inserted 1000 records")
    #add_set()
"""


if __name__ == '__main__':
    print("Starting populating DB...")
    handleCSV(sys.argv[1])