diff --git a/.gitignore b/.gitignore
index 29924d2dca81c2d6698023cf8c853ff98e401531..2324cd311cfa5428b4137487787d349ec81edc93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-.ipynb_checkpoints/*
-MultiplicativeAlgorithm/.ipynb_checkpoints/*
+*/.ipynb_checkpoints/
+*/__pycache__/
 MultiplicativeAlgorithm/D_trips.txt
 MultiplicativeAlgorithm/D_speeds.txt
 MultiplicativeAlgorithm/*.png
diff --git a/MultiplicativeAlgorithm/Extreme_Events.ipynb b/MultiplicativeAlgorithm/ExtremeEvents.ipynb
similarity index 99%
rename from MultiplicativeAlgorithm/Extreme_Events.ipynb
rename to MultiplicativeAlgorithm/ExtremeEvents.ipynb
index f362fa74ac6c81b30b3e8b71e2397638eb75866a..2320491bb519d053556b72de360906f638351246 100644
--- a/MultiplicativeAlgorithm/Extreme_Events.ipynb
+++ b/MultiplicativeAlgorithm/ExtremeEvents.ipynb
@@ -163,7 +163,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
diff --git a/MultiplicativeAlgorithm/Phase2.ipynb b/MultiplicativeAlgorithm/Phase2.ipynb
index c542a3f4b786f3ad9e7a45954eda6bfb4a8977fc..29d6bf1daf7916af95759c6848f231c48a8c6887 100644
--- a/MultiplicativeAlgorithm/Phase2.ipynb
+++ b/MultiplicativeAlgorithm/Phase2.ipynb
@@ -2518,7 +2518,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
diff --git a/MultiplicativeAlgorithm/Visualizations.py b/MultiplicativeAlgorithm/Visualizations.py
index cb86e9853444886df5955f9f40f50c38babd3194..f3896b4b2fbf8ef2daca643bdcdd98a10366c096 100644
--- a/MultiplicativeAlgorithm/Visualizations.py
+++ b/MultiplicativeAlgorithm/Visualizations.py
@@ -315,3 +315,14 @@ def spikeyness_vs_error():
     plt.savefig('Spikeyness_vs_Error')
     return None
 
+def spikeyness2(W):
+    def spikeyness_column(column_number):
+        trend = W[:52*24*7, column_number].reshape(52,24*7)
+        if np.mean(W[:52*24*7, column_number]) == 0:
+            return np.nan
+        else:
+            return np.mean(np.nanstd(trend, axis=0))/np.mean(W[:, column_number])
+        
+    spikey_mean = np.mean([spikeyness_column(sig_number) for sig_number in range(W.shape[1])])
+    spikey_std = np.std([spikeyness(sig_number) for sig_number in range(W.shape[1])])
+    return spikey_mean, spikey_std
\ No newline at end of file
diff --git a/MultiplicativeAlgorithm/__init__.py b/MultiplicativeAlgorithm/__init__.py
index e5fdd929a31eb888ffcec7c2c3c79dcf32a5af2a..b3cc5fba4151e5e1d1e34f62ec1859d23bdfce7b 100644
--- a/MultiplicativeAlgorithm/__init__.py
+++ b/MultiplicativeAlgorithm/__init__.py
@@ -22,24 +22,4 @@ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(messag
 fh.setFormatter(formatter)
 
 logger.addHandler(fh)
-logger.addHandler(sh)
-
-
-
-'''Select path name for files depending on whether you are working on your PC or cluster'''
-# path_for_data is an optional variable that defaults to 'path'. This is included to facilate a seperate path
-# address for large data files
-
-#path = './Data_Files/'         # Use this if working on PC
-#path_for_data = '../../'       # Use this if working on PC and if data files are in a different folder
-
-#path = './scratch/'           # Use this if working on cluster
-#path_for_data = './scratch/'  # Use this if working on cluster and if data files are in a different folder
-
-#filenames = config.generate_filenames(path, path_for_data)
-
-#logger.info('Filenames have been loaded')
-#[logger.debug(i) for i in filenames]
-
-#logger.info('Constants have been loaded')
-#[logger.debug(i) for i in dir(globals()['config'])];
\ No newline at end of file
+logger.addHandler(sh)
\ No newline at end of file
diff --git a/MultiplicativeAlgorithm/cSNMF.ipynb b/MultiplicativeAlgorithm/cSNMF.ipynb
index 08950a7fcba3db8375509515440951833d5b8bc2..a21e801378929dd3891b09145d99ac488e19f594 100644
--- a/MultiplicativeAlgorithm/cSNMF.ipynb
+++ b/MultiplicativeAlgorithm/cSNMF.ipynb
@@ -4,12 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Run cSNMF on Trips data using multiplicative update rules\n",
-    "\n",
-    "**Constraint:** L1 norm of columns of W should be 1\n",
-    "\n",
-    "Get a copy of the data matrices in your local machine from the following links:\n",
-    " - https://uofi.box.com/s/yo60oe084d68obohgraek4weuaqtpgsp"
+    "# Run cSNMF on Trips data using multiplicative update rules"
    ]
   },
   {
@@ -18,13 +13,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from __init__ import *\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
-    "import config\n",
-    "import cSNMF\n",
     "import matplotlib.pyplot as plt\n",
-    "%matplotlib inline"
+    "%matplotlib inline\n",
+    "\n",
+    "from __init__ import *  # initializes logger\n",
+    "import config           # contains all the global variables\n",
+    "import cSNMF            # contains update rules and factorization functions"
    ]
   },
   {
@@ -33,17 +28,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## Read Full-Link data and prep for running NMF.\n",
-    "D = np.loadtxt('D_trips.txt')\n",
-    "logger.info('Full_link data has been read')\n",
+    "## Read Full-Link data and prep for running cSNMF.\n",
+    "if config.TRIPS:\n",
+    "    D = np.loadtxt('D_trips.txt')\n",
+    "    logger.info('Full_link data for trips has been read')\n",
+    "else:\n",
+    "    D = np.loadtxt('D_traveltimes.txt')\n",
+    "    logger.info('Full_link data for traveltimes has been read')\n",
     "\n",
-    "if config.SEEDED == 1:\n",
-    "    seed_W = 0; seed_H = 1\n",
-    "elif config.SEEDED == 0:\n",
-    "    seed_W = None; seed_H = None\n",
+    "# Seeded W and H will always return the same result. Turn off seeded for different results with each run.    \n",
+    "if config.SEEDED:\n",
+    "    seed_W, seed_H = 0, 1\n",
     "else:\n",
-    "    logger.critical('Seed value invalid. Needs to be 0 or 1. Check config.py!')\n",
-    "    quit()"
+    "    seed_W, seed_H = None, None"
    ]
   },
   {
@@ -52,10 +49,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Factorizes D and imposes contraints.\n",
+    "# Returns numpy arrays W and H, and pandas dataframe results.\n",
     "W, H, results = cSNMF.factorize(D,\n",
-    "                                beta = 5000,\n",
+    "                                beta = config.BETA,\n",
     "                                rank = config.RANK,\n",
-    "                                max_iter = 600,\n",
     "                                seed_W = seed_W,\n",
     "                                seed_H = seed_H,\n",
     "                                debug = True,\n",
@@ -68,6 +66,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Displays runtime statistics as a dataframe\n",
     "results"
    ]
   },
@@ -77,8 +76,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#np.savetxt('W_(seed_W = 10,seed_H = 21).txt', W)\n",
-    "#np.savetxt('H_(seed_W = 10,seed_H = 21).txt', H)"
+    "np.savetxt('W_trips.txt', W)\n",
+    "np.savetxt('H_trips.txt', H)"
    ]
   }
  ],
@@ -98,7 +97,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
diff --git a/MultiplicativeAlgorithm/cSNMF.py b/MultiplicativeAlgorithm/cSNMF.py
index d56b90a4fc48d69d22f0e7f6a9cf09d0e07f4240..e02494ee72fd6b3c5066bb8faf4d73f4709bc9b2 100644
--- a/MultiplicativeAlgorithm/cSNMF.py
+++ b/MultiplicativeAlgorithm/cSNMF.py
@@ -1,4 +1,4 @@
-''' This contains functions for cSNMF.'''
+''' This contains functions called by cSNMF.ipynb for constrained Sparse Nonnegative Matrix Factorization'''
 
 import numpy as np
 import pandas as pd
@@ -11,119 +11,100 @@ logger = logging.getLogger(__name__)
 
 
 ## Useful matrix operations:
-def m(A, B): return np.multiply(A,B)
 def div(A, B): return np.array([[A[i,j]/B[i,j] if A[i,j]>0 else 0 for j in range(A.shape[1])] for i in range(A.shape[0])])
-def d(A): return np.diag(np.diag(A))  # d(A) replaces all off-diagonal entries of A with 0.
-def N(A): return m(A, NONZEROS)       # NONZEROS is a global variable.
-
-
+def d(A):      return np.diag(np.diag(A))  # d(A) replaces all off-diagonal entries of A with 0.
+def N(A):      return A*NONZEROS           # NONZEROS is a global variable.
 
 
+# Calculates sparsity of H as in Low_Rank_Manhattan_Traffic.pdf
 def sparsity_metric(H):
     def sparsity_column(column):
         n = len(column)
-        if sum(column) == 0:
-            ratio = 1
-            logger.info('Column of zeros in H')
-        else:
-            ratio = np.linalg.norm(column, 1)/np.linalg.norm(column, 2)
+        assert sum(column) != 0, 'Column of zeros in H. Reduce rank'
+        ratio = np.linalg.norm(column, 1)/np.linalg.norm(column, 2)
         return (math.sqrt(n) - ratio)/(math.sqrt(n) - 1)
     return np.mean([sparsity_column(column) for column in H.T])
 
 
 def calculate_error(D, W, H):
-    norm_D = np.linalg.norm(D)
-    return np.linalg.norm(D - N(W@H))/norm_D*100
-
-def spikeyness(W):
-    def spikeyness_column(column_number):
-        trend = W[:52*24*7, column_number].reshape(52,24*7)
-        if np.mean(W[:52*24*7, column_number]) == 0:
-            return np.nan
-        else:
-            return np.mean(np.nanstd(trend, axis=0))/np.mean(W[:, column_number])
-        
-    spikey_mean = np.mean([spikeyness_column(sig_number) for sig_number in range(W.shape[1])])
-    spikey_std = np.std([spikeyness(sig_number) for sig_number in range(W.shape[1])])
-    return spikey_mean, spikey_std
-
-def zeros_in_H(H):
-    # Percentage of zero entries in H
-    # Use this on H_axed 
-    return sum(sum(H==0))/H.size
+    return np.linalg.norm(D - N(W@H))/NORM_D*100
 
 
+# Constraint imposed on the columns of W
 def impose_L1_constraint(W, H):
     W2 = W/sum(W)
     H2 = np.array([H[i]*sum(W)[i] for i in range(len(H))])
     return W2, H2
 
-
-def axe_H(H, tail_cutoff = 0.4):
+# Sets the bottom tail_cutoff fraction of entries in each column of H to 0.
+def axe_H(H, tail_cutoff = config.AXING_CUTOFF):
     H2 = H.copy()
     for i, column in enumerate(H.T):
-        sorted_column = sorted(column)
+        sorted_column  = sorted(column)
         cumulative_sum = it.accumulate(sorted_column)
-        kill_count = sum(list(cumulative_sum) < tail_cutoff*sum(column))
-        H2.T[i] = np.array([entry if entry >= sorted_column[kill_count] else 0 for entry in column])
+        kill_count     = sum(list(cumulative_sum) < tail_cutoff*sum(column))
+        H2.T[i]        = np.array([entry if entry >= sorted_column[kill_count] else 0 for entry in column])
     return H2
 
 
+'''Sort columns of W and rows of H according to the following rules --
+   - Give each link a total weight of 1.
+   - Distribute that weight across all of its signatures in proportion to the corresponding entry in H.
+   - For each signature, calculate popularity to be sum of weights across all links.
+   - Sort columns of W in decreasing order of popularity.
+   - Sort rows of H based on the same permutaion in order to preserve W@H'''
+# Sorting is done so we have a rough intuition for which signatures get used the most.
 def sort_WH(W, H):
-    H2 = axe_H(H)
-    weights = H2/sum(H2)
+    weights = H/sum(H)
     signature_popularities = sum(weights.T)
-    signature_popularities = sorted(enumerate(signature_popularities), key = lambda x : x[1], reverse = True)
+    signature_popularities = sorted(enumerate(signature_popularities),\
+                                    key = lambda x : x[1], reverse = True)
     W2 = W.copy()
-    H3 = H2.copy()
+    H2 = H.copy()
     for i in range(len(signature_popularities)):
         W2[:, i] = W[:, signature_popularities[i][0]]
-        H3[i] = H2[signature_popularities[i][0]]
-    try:
-        assert ( np.linalg.norm(W@H2 - W2@H3) / np.linalg.norm(W@H2) ) < 0.01
-    except AssertionError:
-        logger.critical('Sorting is not working correctly')
-        return W2, H3
-    
-    return W2, H3
+        H2[i] = H[signature_popularities[i][0]]
+        
+    assert ( np.linalg.norm(W@H - W2@H2) / np.linalg.norm(W@H) ) < 0.01,\
+        'Sorting is not working correctly'
+    return W2, H2
 
 
 
 def factorize(data_array,
-              init_W = None,
-              rank = config.RANK,
-              beta = None,
-              threshold = 0.5,
-              max_iter = 600,
-              seed_W = None,
-              seed_H = None,
-              log = logger,
-              debug = False,
-              axing = True,
-              update_W = True):
+              rank      = 50,
+              beta      = None,
+              threshold = 0.5,    # algorithm stops if change in W and H is less than threshold % 
+              max_iter  = 600,
+              seed_W    = None,
+              seed_H    = None,
+              log       = logger,
+              debug     = False,  # if debug, then return runtime-statistics 
+              axing     = True,   # if axing, then set small entries in H to zero
+              init_W    = None):  # optionally specify W as a numpy array and keep it unchanged in every iteration
 
     log.info('Rank= %s, Threshold= %s', rank, threshold)
 
     D = np.nan_to_num(data_array)
     eta = D.max()
     if beta is None:
-        beta = len(D.T)//2
-    global NONZEROS
-    NONZEROS = ~np.isnan(data_array)
+        beta = len(D.T)/2
     
-    JNN = np.ones((rank, rank))
+    global NONZEROS, NORM_D
+    NONZEROS = ~ np.isnan(data_array)
+    NORM_D   = np.linalg.norm(D)
     
+    JNN = np.ones((rank, rank))
     def update_W(W, H):
         Term_1 = D @ H.T 
         Term_2 = N(W @ H) @ H.T + eta*W
-        W = div(m(W, Term_1), Term_2)
+        W = div(W * Term_1, Term_2)
         return W
 
-    
     def update_H(W, H):
         Term_3 = W.T @ D
         Term_4 = W.T @ N(W @ H) + beta*(JNN @ H)
-        H = div(m(H, Term_3), Term_4)
+        H = div(H * Term_3, Term_4)
         return H
     
 
@@ -134,32 +115,30 @@ def factorize(data_array,
 
     
     logger.info('Initializing W and H...')
-    
     W_shape = (D.shape[0], rank)
     H_shape = (rank, D.shape[1])
     
     if init_W is None:
+        update_W = True
         np.random.seed(seed_W)
         W = np.random.uniform(low = 0.01, high = 1., size = W_shape)    
     else:
+        update_W = False
         W = init_W
         
     np.random.seed(seed_H)
     H = np.random.uniform(low = 0.01, high = 1., size = H_shape)    
     log.info('W, H chosen')
 
-    iterations = 0
+    
+    iterations   = 0
     column_names = ['error', 'sparsity', 'diff_W', 'diff_H', 'W_minmax',
                     'H_0th', 'H_25th', 'H_50th', 'H_75th', 'H_100th'] # For results DataFrame
     results = pd.DataFrame(columns = column_names)
-
-
-    diff_W = 100
-    diff_H = 100
-    
-    global norm_D
-    norm_D = np.linalg.norm(N(D))
+    diff_W  = 100
+    diff_H  = 100
     
+    # Run till stopping condition is met
     while abs(diff_W) + abs(diff_H) > threshold or iterations < 100:
         if iterations > max_iter:
             break
@@ -194,16 +173,14 @@ def factorize(data_array,
         if debug is True:
             H_0th, H_25th, H_50th, H_75th, H_100th = quartiles(H)
             W_minmax = (W.min(), W.max())
-        
             column_values = [error, sparsity, diff_W, diff_H, W_minmax,\
                              H_0th, H_25th, H_50th, H_75th, H_100th]
-
             results = results.append(dict(zip(column_names, column_values)), ignore_index = True)
-            
     
-    if axing:
-        W, H = impose_L1_constraint(W, H)
-        W, H = sort_WH(W, H)
+    
+    W, H = impose_L1_constraint(W, H)
+    if axing:  H = axe_H(H)
+    W, H = sort_WH(W, H)
     
     error = calculate_error(D, W, H)
     sparsity = sparsity_metric(H)
diff --git a/MultiplicativeAlgorithm/config.py b/MultiplicativeAlgorithm/config.py
index f11cb2ed3b36bae3c14ed216fb86a1e8b039778d..66fb064d4800769597b5b99626060d1f8cdd0a83 100644
--- a/MultiplicativeAlgorithm/config.py
+++ b/MultiplicativeAlgorithm/config.py
@@ -1,53 +1,21 @@
-''' * This file defines addresses to all data files, input files and output files.
-    * It also lists all the Global variables used in Non-Negative Matrix Factorization.
+''' * This file lists all the Global variables used in Non-Negative Matrix Factorization.
     * In case any of the Factorization paramaters need to be changed, they should be changed in this file instead of locally within each program.'''
 
+# Global variables
+RANK  = 50           # Rank for Matrix Factorization in Phase1.py
+TRIPS = True         # False reads travel_traveltimes dataset, True reads trips dataset.
+                     # Currently False doesn't work due to bad hyperparameters.    
+SEEDED = True        # Determines whether cSNMF.py uses seeded random matrices for W,H or not.
+BETA   = 5000        # Higher value penalizes lack of sparsity in H. (See Low_Rank_Manhattan_Traffic.pdf).
+                     # If unsure, then set BETA = None
+AXING_CUTOFF = 0.4   # Set between 0 and 1. Sets the bottom AXING_CUTOFF fraction of entries in each column of H to 0.
 
-def generate_filenames(path, path_for_data=None):
-    if path_for_data is None:
-        path_for_data = path
-    filenames = {# Raw data files
-                 'links':                path + 'links.csv',
-                 'nodes':                path + 'nodes.csv',
-                 'raw_data':             path_for_data + 'travel_times_2011.csv',
-                 'data_coo_form':        path_for_data + 'data_coo_form.txt',
-                 'data_trips':           path_for_data + 'data_trips.csv',
-                 'data_traveltimes':     path_for_data + 'data_travel_times.csv',
-                 'data_trips_transpose': path_for_data + 'data_trips_transpose.csv',
-    
-                 # Inputs and Outputs of Read_data.py
-                 'full_link_ids':         path + 'full_link_ids.txt',
-                 'empty_link_ids':        path + 'empty_link_ids.txt',
-                 'full_link_trips':       path + 'full_link_trips.json',
-                 'full_link_traveltimes': path + 'full_link_travel_times.json',
-                 'full_link_speeds':      path + 'full_link_speeds.json',
 
-                 # Inputs and Outputs of Phase1.py
-                 'random':             path + 'Random_numbers.txt',
-                 'W_trips':            path + 'W_trips.txt',
-                 'W_speeds':           path + 'W_speeds.csv',
-                 'W_trips_seed':       path + 'Seeded0,1/W_trips.csv',
-                 'W_speeds_seed':      path + 'Seeded0,1/W_speeds.csv',
-                 'H_trips':           path + 'H_trips.txt',
-                 'H_speeds':          path + 'H_speeds.csv',
-                 'H_trips_seed':      path + 'Seeded0,1/H_trips.csv',
-                 'H_speeds_seed':     path + 'Seeded0,1/H_speeds.csv',
-                 'H_trips_axed':      path + 'H_trips_axed.csv',
-                 'H_speeds_axed':     path + 'H_speeds_axed.csv',
-                 'H_trips_axed_seed': path + 'Seeded0,1/H_trips_axed.csv',
-                 'W_speeds_axed_seed': path + 'Seeded0,1/H_speeds_axed.csv'}
-    return filenames
+# Fixed global parameters for 2011 traffic dataset
+HOURS_IN_YEAR = 8760   # 24*365
+TOTAL_LINKS   = 260855 # Total Links aka Road-segments in NYC.
+FULL_LINKS    = 2302   # Links with less than 720 hours worth of missing data in a year.
+EMPTY_LINKS   = 234892 # Links with less that 720 hours worth of data in a year.
+MID_LINKS     = 23661  # TOTAL_LINKS - FULL_LINKS - EMPTY_LINKS
 
-
-'''Global variables'''
-
-RANK = 50            # Rank for Matrix Factorization in Phase1.py
-TRIPS = 1            # Boolean variable: 0 reads travel_speeds data, 1 reads number_of_trips data. 
-HOURS_IN_YEAR = 8760 # 24*365
-SEEDED = 1           # Boolean variable: 1 or 0 means Phase1.py is seeded or not respectively.
-
-TOTAL_LINKS = 260855 # Total Links aka Road-segments in NYC.
-FULL_LINKS = 2302    # Links with less than 720 hours worth of missing data in a year.
-EMPTY_LINKS = 234892 # Links with less that 720 hours worth of data in a year.
-MID_LINKS = 23661  # TOTAL_LINKS - FULL_LINKS - EMPTY_LINKS
 assert MID_LINKS == TOTAL_LINKS - FULL_LINKS - EMPTY_LINKS
\ No newline at end of file
diff --git a/ReadData/ReadData.ipynb b/ReadData/ReadData.ipynb
index d3f1f59cd62efe9ccc5bed315775c0fce920aebf..f4987f6a58ef7899e216e9153a8b204407676b1f 100644
--- a/ReadData/ReadData.ipynb
+++ b/ReadData/ReadData.ipynb
@@ -115,7 +115,7 @@
    "source": [
     "## Read from full_link_ids.txt\n",
     "\n",
-    "with open('../Multiplicative Algorithm/full_link_ids.txt', 'r') as readfile:\n",
+    "with open('../MultiplicativeAlgorithm/full_link_ids.txt', 'r') as readfile:\n",
     "    full_links = [int(line.strip()) for line in readfile]\n",
     "    print(len(full_links))"
    ]