Skip to content
Snippets Groups Projects
Commit 17378bf7 authored by vkarve2's avatar vkarve2
Browse files

Revised sparsity metric to always be in [0, 1].

Added axe_H function.
parent c0d8f887
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
import logging
import itertools as it
logger = logging.getLogger(__name__)
......@@ -14,17 +15,39 @@ def d(A): return np.diag(np.diag(A)) # d(A) replaces all off-diagonal entries o
def N(A): return m(A, NONZEROS) # NONZEROS is a global variable.
def axe_H(H, relative_cutoff=0.5):
# Makes H sparser by flattening out things below half-peak height in
# each column of H.
H2 = pd.DataFrame(H)
def axe_column(column):
peak = max(column)
return column.mask(column < peak*relative_cutoff, other = 0.0)
return H.apply(axe_column, axis=0).values
def sparsity_metric(H):
def sparsity_column(column):
column2 = list(column)
column2.remove(max(column2))
return np.linalg.norm(column2)/np.linalg.norm(column)
return 1- np.mean([sparsity_column(column) for column in H.T])
def calculate_error(D, W, H):
return np.linalg.norm(D - N(W@H))/norm_D*100
def impose_L1_constraint(W, H):
W2 = W/sum(W)
H2 = np.array([H[i]*sum(W)[i] for i in range(len(H))])
return W2, H2
def axe_H(H, tail_cutoff = 0.25):
H2 = H.copy()
for i, column in enumerate(H2.T):
sorted_column = sorted(column)
cumulative_sum = it.accumulate(sorted_column)
kill_count = sum(list(cumulative_sum) < tail_cutoff*sum(column))
H2.T[i] = np.array([entry if entry >= sorted_column[kill_count] else 0 for entry in column])
return H2
'''TODO: Change this function.
def sort_WH(W0, H0):
# Sort H and W according to decreasing order of signature popularity
# which is calculated using H obtained from axe_H().
......@@ -49,25 +72,8 @@ def sort_WH(W0, H0):
key=lambda pair: pair[0], reverse=True)])
W = np.array(WT).T
H = np.array(H)
return W, H
def sparsity_metric(H):
def sparsity_column(column):
column = (column - column.max())/column.max()
sparse = np.linalg.norm(column)
return sparse/len(column)
return sum([sparsity_column(i) for i in H.T])/len(H.T)
return W, H'''
def calculate_error(D, W, H):
return np.linalg.norm(D - N(W@H))/norm_D*100
def impose_L1_constraint(W, H):
W2 = W/sum(W)
H2 = np.array([H[i]*sum(W)[i] for i in range(len(H))])
return W2, H2
def factorize(data_array, rank, beta = None, threshold = 0.20, max_iter = 400, seed_W = None, seed_H = None, log=logger):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment