Skip to content
Snippets Groups Projects
Commit dd834eea authored by Nischol Antao's avatar Nischol Antao
Browse files

Modified local programs to output csv data. Measured the amount of time each...

Modified local programs to output csv data. Measured the amount of time each query took on a local pc. Started a document for Challenges_Encountered
parent 2f4da7d8
No related branches found
No related tags found
No related merge requests found
Challenges
I] Question 1
a) Had to clean up data for Players whose country of origin was unknown in the database.
This was especially true for years before 1912
II] Question 2
a) There was incorrect data in the Salaries.csv file, in the teamId field, for the Year 2016,
for some of the teams in the database. The Entire Database used a fixed convention for teamId,
however this convention was changed for the Year 2016 in the Salaries.csv file. When a database
join was performed between the data in the Salaries.csv file and the Teams.csv file, this resulted
in the salary data for 12 teams being omitted from the results. The data in the Salaries.csv file
had to be manually cleaned to match the convention in the rest of the database to fix this.
\ No newline at end of file
File added
This diff is collapsed.
This diff is collapsed.
......@@ -8,10 +8,15 @@
# Copyright: (c) antaonn 2018
# Licence: <your licence>
#-------------------------------------------------------------------------------
import time
starttime = time.time()
import os.path
import pandas as pd
import csv
# This is only needed for the Local Folder Structure in the project
my_path = os.path.abspath(os.path.dirname(__file__))
......@@ -47,9 +52,19 @@ merge_master_field_filt = merge_master_field.filter(items=['yearID', 'birthCount
yearlist = merge_master_field_filt.yearID.unique()
for year in yearlist:
merge_master_field_filt_year = merge_master_field_filt[(merge_master_field_filt.yearID == year)]
yearlycountrylist = merge_master_field_filt_year.birthCountry.unique()
for country in yearlycountrylist:
print (year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count())
with open('Baseball_Global_Representation_By_Year.csv', 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["Year", "Country", "Player Count"])
for year in yearlist:
merge_master_field_filt_year = merge_master_field_filt[(merge_master_field_filt.yearID == year)]
yearlycountrylist = merge_master_field_filt_year.birthCountry.unique()
for country in yearlycountrylist:
if isinstance(country, basestring):
#print (year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count())
writer.writerow([year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count()])
print (time.time()-starttime)
......@@ -8,7 +8,8 @@
# Copyright: (c) antaonn 2018
# Licence: <your licence>
#-------------------------------------------------------------------------------
import time
starttime = time.time()
import os.path
import pandas as pd
......@@ -34,5 +35,6 @@ yearly_wins = df_team_filt.groupby(['yearID', 'teamID'], as_index=False).max()
# Merge the Yearly Wins with the Yearly Salaries
merge = yearly_salary.merge(yearly_wins)
print merge
merge.to_csv("Team_Salary_Versus_Wins.csv")
#print merge
print (time.time()-starttime)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment