Modified local programs to output csv data. Measured the amount of time each...

Modified local programs to output csv data. Measured the amount of time each query took on a local pc. Started a document for Challenges_Encountered

Modified local programs to output csv data. Measured the amount of time each...
dd834eea · Nischol Antao · 2f4da7d8 · dd834eea · dd834eea · dd834eea
Commit dd834eea authored 6 years ago by Nischol Antao
--- a/docs/Challenges_Encountered.txt
+++ b/docs/Challenges_Encountered.txt
+Challenges 
+
+I] Question 1
+
+a) Had to clean up data for Players whose country of origin was unknown in the database.
+This was especially true for years before 1912
+
+II] Question 2
+
+a) There was incorrect data in the Salaries.csv file, in the teamId field, for the Year 2016,
+for some of the teams in the database. The Entire Database used a fixed convention for teamId, 
+however this convention was changed for the Year 2016 in the Salaries.csv file. When a database
+join was performed between the data in the Salaries.csv file and the Teams.csv file, this resulted 
+in the salary data for 12 teams being omitted from the results. The data in the Salaries.csv file
+had to be manually cleaned to match the convention in the rest of the database to fix this. 
\ No newline at end of file
--- a/docs/Performance_Tracking.xlsx
+++ b/docs/Performance_Tracking.xlsx
--- a/reports/Baseball_Global_Representation_By_Year.csv
+++ b/reports/Baseball_Global_Representation_By_Year.csv
--- a/reports/Team_Salary_Versus_Wins.csv
+++ b/reports/Team_Salary_Versus_Wins.csv
--- a/src/Pandas_Quest_1_Player_Country_Origins.py
+++ b/src/Pandas_Quest_1_Player_Country_Origins.py
@@ -8,10 +8,15 @@
 # Copyright:   (c) antaonn 2018
 # Licence:     <your licence>
 #-------------------------------------------------------------------------------
+import time
+starttime = time.time()

 import os.path
 import pandas as pd

+import csv
+
+

 # This is only needed for the Local Folder Structure in the project
 my_path = os.path.abspath(os.path.dirname(__file__))
@@ -47,9 +52,19 @@ merge_master_field_filt = merge_master_field.filter(items=['yearID', 'birthCount

 yearlist = merge_master_field_filt.yearID.unique()

-for year in yearlist:
-    merge_master_field_filt_year = merge_master_field_filt[(merge_master_field_filt.yearID == year)]
-    yearlycountrylist = merge_master_field_filt_year.birthCountry.unique()
-    for country in yearlycountrylist:
-        print (year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count())

+
+with open('Baseball_Global_Representation_By_Year.csv', 'wb') as csvfile:
+    writer = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
+    writer.writerow(["Year", "Country", "Player Count"])
+
+
+    for year in yearlist:
+        merge_master_field_filt_year = merge_master_field_filt[(merge_master_field_filt.yearID == year)]
+        yearlycountrylist = merge_master_field_filt_year.birthCountry.unique()
+        for country in yearlycountrylist:
+            if isinstance(country, basestring):
+            #print (year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count())
+                writer.writerow([year, country, merge_master_field_filt_year[(merge_master_field_filt_year.birthCountry == country)]['birthCountry'].count()])
+
+print (time.time()-starttime)
--- a/src/Pandas_Quest_2_Relation_Salary_Wins.py
+++ b/src/Pandas_Quest_2_Relation_Salary_Wins.py
@@ -8,7 +8,8 @@
 # Copyright:   (c) antaonn 2018
 # Licence:     <your licence>
 #-------------------------------------------------------------------------------
-
+import time
+starttime = time.time()
 import os.path
 import pandas as pd

@@ -34,5 +35,6 @@ yearly_wins = df_team_filt.groupby(['yearID', 'teamID'],  as_index=False).max()

 # Merge the Yearly Wins with the Yearly Salaries
 merge =  yearly_salary.merge(yearly_wins)
-
-print merge
+merge.to_csv("Team_Salary_Versus_Wins.csv")
+#print merge
+print (time.time()-starttime)
\ No newline at end of file