From 9e5811acf9f8549cec5bac0380b76405750ef8e2 Mon Sep 17 00:00:00 2001
From: Nischol Antao <antao_nischol@cat.com>
Date: Mon, 23 Apr 2018 22:16:20 -0500
Subject: [PATCH] Fixed some small cometic issues i noticed with question 2

---
 notebooks/question2.ipynb | 18 +++++++++---------
 notebooks/question2.md    |  4 ++--
 results/question2.html    | 16 ++++++++--------
 src/question_2_pyspark.py |  4 ++--
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/notebooks/question2.ipynb b/notebooks/question2.ipynb
index c9aec23..211de42 100644
--- a/notebooks/question2.ipynb
+++ b/notebooks/question2.ipynb
@@ -12,7 +12,7 @@
     "\n",
     "This database has 27 tables. However to obtain the answer for our query above, we need to cross reference  data from 2 tables in this database. The Salaries.csv table lists every player that played in major league baseball, along with their team, and their associated salary. This data is only provided for the years 1985 and later. Its schema is listed below. \n",
     "\n",
-    "#### Table 1: Master Table Schema\n",
+    "#### Table 1: Salary Table Schema\n",
     "\n",
     "\n",
     "| Field      | Description                            |\n",
@@ -31,7 +31,7 @@
     "\n",
     "The Teams.csv table lists the Team statistics for every team, that has played the game of baseball from 1870 to 2016, along with the year those statistics were recorded. Its schema is listed below\n",
     "\n",
-    "#### Table 2 Fielding Table schema\n",
+    "#### Table 2 Team Table schema\n",
     "\n",
     "\n",
     "| Field          | Description                            |\n",
@@ -98,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false
    },
@@ -137,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 12,
    "metadata": {
     "collapsed": true
    },
@@ -152,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 13,
    "metadata": {
     "collapsed": false
    },
@@ -175,9 +175,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 14,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -211,7 +211,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 15,
    "metadata": {
     "collapsed": false
    },
@@ -784,7 +784,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 17,
    "metadata": {
     "collapsed": false
    },
diff --git a/notebooks/question2.md b/notebooks/question2.md
index 4934d09..75eddd5 100644
--- a/notebooks/question2.md
+++ b/notebooks/question2.md
@@ -7,7 +7,7 @@ In order to determine how the effect Team Salary expenditure has on Major League
 
 This database has 27 tables. However to obtain the answer for our query above, we need to cross reference  data from 2 tables in this database. The Salaries.csv table lists every player that played in major league baseball, along with their team, and their associated salary. This data is only provided for the years 1985 and later. Its schema is listed below. 
 
-#### Table 1: Master Table Schema
+#### Table 1: Salary Table Schema
 
 
 | Field      | Description                            |
@@ -26,7 +26,7 @@ This database has 27 tables. However to obtain the answer for our query above, w
 
 The Teams.csv table lists the Team statistics for every team, that has played the game of baseball from 1870 to 2016, along with the year those statistics were recorded. Its schema is listed below
 
-#### Table 2 Fielding Table schema
+#### Table 2 Team Table schema
 
 
 | Field          | Description                            |
diff --git a/results/question2.html b/results/question2.html
index 4cb6315..b3021ed 100644
--- a/results/question2.html
+++ b/results/question2.html
@@ -11754,7 +11754,7 @@ div#notebook {
 <h2 id="Does-money-buy-Championships?-How-have-the-Highest-Spending-Major-League-Baseball-Teams-performed-over-Time?">Does money buy Championships? How have the Highest Spending Major League Baseball Teams performed over Time?<a class="anchor-link" href="#Does-money-buy-Championships?-How-have-the-Highest-Spending-Major-League-Baseball-Teams-performed-over-Time?">&#182;</a></h2><hr>
 <p>In order to determine how the effect Team Salary expenditure has on Major League Baseball Team Performance, we look at Historical Baseball Data available on the Internet. The specific source of data chosen here is a database of baseball statistics over the years 1870 to 2016. <a href="http://www.seanlahman.com/baseball-database.html">http://www.seanlahman.com/baseball-database.html</a></p>
 <p>This database has 27 tables. However to obtain the answer for our query above, we need to cross reference  data from 2 tables in this database. The Salaries.csv table lists every player that played in major league baseball, along with their team, and their associated salary. This data is only provided for the years 1985 and later. Its schema is listed below.</p>
-<h4 id="Table-1:-Master-Table-Schema">Table 1: Master Table Schema<a class="anchor-link" href="#Table-1:-Master-Table-Schema">&#182;</a></h4><table>
+<h4 id="Table-1:-Salary-Table-Schema">Table 1: Salary Table Schema<a class="anchor-link" href="#Table-1:-Salary-Table-Schema">&#182;</a></h4><table>
 <thead><tr>
 <th>Field</th>
 <th>Description</th>
@@ -11785,7 +11785,7 @@ div#notebook {
 </table>
 <p><em>Note: At the Time of writing, the teamID in the Salaries.csv table for the year 2016 did not follow the convention of teamID's used throughout the rest of the table, and the entire database. Specifically 12 teams had teamIDs that did not match the code that had been used for their teamIDs in previous years. This data was manually cleaned to make sure it did not affect the Results obtained.</em></p>
 <p>The Teams.csv table lists the Team statistics for every team, that has played the game of baseball from 1870 to 2016, along with the year those statistics were recorded. Its schema is listed below</p>
-<h4 id="Table-2-Fielding-Table-schema">Table 2 Fielding Table schema<a class="anchor-link" href="#Table-2-Fielding-Table-schema">&#182;</a></h4><table>
+<h4 id="Table-2-Team-Table-schema">Table 2 Team Table schema<a class="anchor-link" href="#Table-2-Team-Table-schema">&#182;</a></h4><table>
 <thead><tr>
 <th>Field</th>
 <th>Description</th>
@@ -12003,7 +12003,7 @@ div#notebook {
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[1]:</div>
+<div class="prompt input_prompt">In&nbsp;[11]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># Import SparkContext. This is the main entry point for Spark functionality</span>
@@ -12040,7 +12040,7 @@ div#notebook {
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[2]:</div>
+<div class="prompt input_prompt">In&nbsp;[12]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># The Master will need to change when running on a cluster. </span>
@@ -12057,7 +12057,7 @@ div#notebook {
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[3]:</div>
+<div class="prompt input_prompt">In&nbsp;[13]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># We instantiate a SparkContext object with the SparkConfig</span>
@@ -12083,7 +12083,7 @@ div#notebook {
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[4]:</div>
+<div class="prompt input_prompt">In&nbsp;[14]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># We create a sql context object, so that we can read in csv files easily, and create a data frame</span>
@@ -12115,7 +12115,7 @@ div#notebook {
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[5]:</div>
+<div class="prompt input_prompt">In&nbsp;[15]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># Keep the year, team and salary data from the salary table</span>
@@ -12816,7 +12816,7 @@ only showing top 20 rows
 </div>
 <div class="cell border-box-sizing code_cell rendered">
 <div class="input">
-<div class="prompt input_prompt">In&nbsp;[23]:</div>
+<div class="prompt input_prompt">In&nbsp;[17]:</div>
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-ipython3"><pre><span></span><span class="n">sc</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
diff --git a/src/question_2_pyspark.py b/src/question_2_pyspark.py
index 0c51425..e31200b 100644
--- a/src/question_2_pyspark.py
+++ b/src/question_2_pyspark.py
@@ -10,7 +10,7 @@
 # tables in this database. The Salaries.csv table lists every player that played in major league baseball, along with their
 # team, and their associated salary. This data is only provided for the years 1985 and later. Its schema is listed below.
 #
-# #### Table 1: Master Table Schema
+# #### Table 1: Salary Table Schema
 #
 #
 # | Field      | Description                            |
@@ -33,7 +33,7 @@
 # The Teams.csv table lists the Team statistics for every team, that has played the game of baseball from 1870 to 2016,
 # along with the year those statistics were recorded. Its schema is listed below
 #
-# #### Table 2 Fielding Table schema
+# #### Table 2 Team Table schema
 #
 #
 # | Field          | Description                            |
-- 
GitLab