Skip to content
Snippets Groups Projects
Commit 355be2e0 authored by vkarve2's avatar vkarve2
Browse files

Changed Sparsity metric to reflect the one used in [Hoyer2004].

parent e2c5d9cc
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Run cSNMF on Trips data using multiplicative update rules
**Constraint:** L1 norm of columns of W should be 1
Get a copy of the data matrices in your local machine from the following links:
- https://www.dropbox.com/s/8ta7x317r32fc7q/D_trips.txt
- https://www.dropbox.com/s/2tb2sb36fgbbx4q/D_speeds.txt
%% Cell type:code id: tags:
``` python
from __init__ import *
import numpy as np
import pandas as pd
import config
import cSNMF
import matplotlib.pyplot as plt
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
## Read Full-Link data and prep for running NMF.
D = np.loadtxt('D_trips.txt')
logger.info('Full_link data has been read')
if config.SEEDED == 1:
seed_W = 0; seed_H = 1
elif config.SEEDED == 0:
seed_W = None; seed_H = None
else:
logger.critical('Seed value invalid. Needs to be 0 or 1. Check config.py!')
quit()
```
%% Output
Full_link data has been read
%% Cell type:markdown id: tags:
D = np.random.randint(1,10, (5,10))
D = np.array(D, dtype='f')
D_true = D.copy()
D[1,2] = np.nan
D[3,9] = np.nan
D[2,9] = np.nan
D[4,4] = np.nan
config.RANK = 3
print(D)
%% Cell type:code id: tags:
``` python
W, H, results = cSNMF.factorize(D,
beta = np.nanmean(D),
rank = config.RANK,
max_iter = 600,
seed_W = seed_W,
seed_H = seed_H,\
debug = False)
```
%% Output
Rank= 70, Threshold= 0.5
Initializing W and H...
W, H chosen
Iteration= 0, Error= 47.3875924723, Sparsity= 0.0209956976117
Iteration= 1, Error= 47.2446147568, Sparsity= 0.0210184416055
Iteration= 2, Error= 47.2216748296, Sparsity= 0.0210978932543
Iteration= 3, Error= 47.1982590051, Sparsity= 0.0212293973311
Iteration= 4, Error= 47.1729461812, Sparsity= 0.0214246612549
Iteration= 5, Error= 47.1442221156, Sparsity= 0.0216926574152
Iteration= 6, Error= 47.1102399808, Sparsity= 0.0220545556759
Iteration= 7, Error= 47.0686199218, Sparsity= 0.0225232011027
Iteration= 8, Error= 47.0162066055, Sparsity= 0.0231265106375
Iteration= 9, Error= 46.9487699551, Sparsity= 0.023894958103
Iteration= 10, Error= 46.8606488749, Sparsity= 0.0248762620983
Iteration= 11, Error= 46.7443701994, Sparsity= 0.0261236934323
Iteration= 12, Error= 46.5903433122, Sparsity= 0.0277129161949
Iteration= 13, Error= 46.3868567481, Sparsity= 0.0297541000462
Iteration= 14, Error= 46.1207929711, Sparsity= 0.0323501833318
Iteration= 15, Error= 45.7796586046, Sparsity= 0.0356080812979
Iteration= 16, Error= 45.3554227385, Sparsity= 0.0395708316294
Iteration= 17, Error= 44.8497233311, Sparsity= 0.0442252698175
Iteration= 18, Error= 44.2779549955, Sparsity= 0.049488259563
Iteration= 19, Error= 43.6681058612, Sparsity= 0.0551506232187
Iteration= 20, Error= 43.0525317571, Sparsity= 0.061017300531
Iteration= 21, Error= 42.4572183956, Sparsity= 0.0669056875815
Iteration= 22, Error= 41.8958589093, Sparsity= 0.0726649143657
Iteration= 23, Error= 41.3709583203, Sparsity= 0.0782306217144
Iteration= 24, Error= 40.8784360568, Sparsity= 0.0836137528347
Iteration= 25, Error= 40.4118601173, Sparsity= 0.0888385655467
Iteration= 26, Error= 39.9649220045, Sparsity= 0.0939426845595
Iteration= 27, Error= 39.5324396445, Sparsity= 0.0989257016598
Iteration= 28, Error= 39.1105676865, Sparsity= 0.103810291418
Iteration= 29, Error= 38.6967365085, Sparsity= 0.108597758797
Iteration= 30, Error= 38.2895633985, Sparsity= 0.113283173105
Iteration= 31, Error= 37.8887286251, Sparsity= 0.117893507003
Iteration= 32, Error= 37.49471312, Sparsity= 0.122388391117
Iteration= 33, Error= 37.1083960462, Sparsity= 0.126758974651
Iteration= 34, Error= 36.7306536457, Sparsity= 0.131033554649
Iteration= 35, Error= 36.3621032365, Sparsity= 0.13525286359
Iteration= 36, Error= 36.0030214833, Sparsity= 0.139401404389
Iteration= 37, Error= 35.6533778381, Sparsity= 0.143473402523
Iteration= 38, Error= 35.3129172124, Sparsity= 0.147489539909
Iteration= 39, Error= 34.9812555659, Sparsity= 0.151464092665
Iteration= 40, Error= 34.657972941, Sparsity= 0.155382227863
Iteration= 41, Error= 34.3426935619, Sparsity= 0.159269506902
Iteration= 42, Error= 34.0351416922, Sparsity= 0.163182813771
Iteration= 43, Error= 33.7351616395, Sparsity= 0.167040865212
Iteration= 44, Error= 33.442698009, Sparsity= 0.170841968276
Iteration= 45, Error= 33.1577528257, Sparsity= 0.174573473342
Iteration= 46, Error= 32.8803484571, Sparsity= 0.178215779124
Iteration= 47, Error= 32.6105102543, Sparsity= 0.181766207239
Iteration= 48, Error= 32.3482627075, Sparsity= 0.185203182333
Iteration= 49, Error= 32.0936282939, Sparsity= 0.188531157896
Iteration= 50, Error= 31.8466230804, Sparsity= 0.191787062987
Iteration= 51, Error= 31.6072484956, Sparsity= 0.19496635352
Iteration= 52, Error= 31.3754817208, Sparsity= 0.198037906074
Iteration= 53, Error= 31.1512681244, Sparsity= 0.201007398443
Iteration= 54, Error= 30.9345185673, Sparsity= 0.203907903115
Iteration= 55, Error= 30.725112209, Sparsity= 0.206788900121
Iteration= 56, Error= 30.5229026617, Sparsity= 0.209640479481
Iteration= 57, Error= 30.327724159, Sparsity= 0.212449111707
Iteration= 58, Error= 30.139395657, Sparsity= 0.215195002403
Iteration= 59, Error= 29.9577228964, Sparsity= 0.217939090892
Iteration= 60, Error= 29.7824996276, Sparsity= 0.22065204846
Iteration= 61, Error= 29.6135091045, Sparsity= 0.223318738853
Iteration= 62, Error= 29.4505263073, Sparsity= 0.225962982371
Iteration= 63, Error= 29.2933207776, Sparsity= 0.228567624823
Iteration= 64, Error= 29.1416596782, Sparsity= 0.231166177735
Iteration= 65, Error= 28.9953107095, Sparsity= 0.233736083995
Iteration= 66, Error= 28.8540446364, Sparsity= 0.23626654233
Iteration= 67, Error= 28.71763729, Sparsity= 0.238767178459
Iteration= 68, Error= 28.585870981, Sparsity= 0.241229396532
Iteration= 69, Error= 28.4585353336, Sparsity= 0.243641573109
Iteration= 70, Error= 28.3354276028, Sparsity= 0.246005586367
Iteration= 71, Error= 28.2163525914, Sparsity= 0.248336678866
Iteration= 72, Error= 28.1011223648, Sparsity= 0.250613986923
Iteration= 73, Error= 27.9895560308, Sparsity= 0.252850921958
Iteration= 74, Error= 27.8814798245, Sparsity= 0.255081298889
Iteration= 75, Error= 27.7767276005, Sparsity= 0.257321931297
Iteration= 76, Error= 27.6751416498, Sparsity= 0.259567560881
Iteration= 77, Error= 27.5765736184, Sparsity= 0.261801524025
Iteration= 78, Error= 27.4808852735, Sparsity= 0.264003095518
Iteration= 79, Error= 27.3879489221, Sparsity= 0.266187839634
Iteration= 80, Error= 27.2976474004, Sparsity= 0.268352197583
Iteration= 81, Error= 27.2098736662, Sparsity= 0.270486958917
Iteration= 82, Error= 27.1245301044, Sparsity= 0.272601741738
Iteration= 83, Error= 27.0415276877, Sparsity= 0.274672592004
Iteration= 84, Error= 26.9607851142, Sparsity= 0.276716830673
Iteration= 85, Error= 26.8822279845, Sparsity= 0.278729262594
Iteration= 86, Error= 26.8057880284, Sparsity= 0.280711297297
Iteration= 87, Error= 26.7314023534, Sparsity= 0.282666143536
Iteration= 88, Error= 26.6590127059, Sparsity= 0.284611520829
Iteration= 89, Error= 26.588564749, Sparsity= 0.28652937268
Iteration= 90, Error= 26.5200073812, Sparsity= 0.288405613746
Iteration= 91, Error= 26.4532921077, Sparsity= 0.290240221468
Iteration= 92, Error= 26.3883724733, Sparsity= 0.292039222446
Iteration= 93, Error= 26.3252035558, Sparsity= 0.293804280586
Iteration= 94, Error= 26.2637415258, Sparsity= 0.295528491615
Iteration= 95, Error= 26.2039432814, Sparsity= 0.297210459286
Iteration= 96, Error= 26.1457661704, Sparsity= 0.298854834337
Iteration= 97, Error= 26.0891678078, Sparsity= 0.300455050162
Iteration= 98, Error= 26.0341059831, Sparsity= 0.302027664804
Iteration= 99, Error= 25.9805386392, Sparsity= 0.303577531712
Iteration= 100, Error= 25.9284238959, Sparsity= 0.305090796773
Iteration= 101, Error= 25.8777201012, Sparsity= 0.306564857776
Iteration= 102, Error= 25.8283859074, Sparsity= 0.30800387719
Iteration= 103, Error= 25.7803803713, Sparsity= 0.309409790161
Iteration= 104, Error= 25.7336630721, Sparsity= 0.310783868797
Iteration= 105, Error= 25.6881942326, Sparsity= 0.312126213624
Iteration= 106, Error= 25.6439348327, Sparsity= 0.313437691784
Iteration= 107, Error= 25.6008467148, Sparsity= 0.314728666463
Iteration= 108, Error= 25.5588926869, Sparsity= 0.31601095381
Iteration= 109, Error= 25.5180366141, Sparsity= 0.317284601525
Iteration= 110, Error= 25.478243484, Sparsity= 0.318536618266
Iteration= 111, Error= 25.4394794196, Sparsity= 0.319763718807
Iteration= 112, Error= 25.4017116286, Sparsity= 0.320971284835
Iteration= 113, Error= 25.3649082875, Sparsity= 0.322167002501
Iteration= 114, Error= 25.3290383777, Sparsity= 0.323336388988
Iteration= 115, Error= 25.294071504, Sparsity= 0.324487362088
Iteration= 116, Error= 25.2599777477, Sparsity= 0.325618714446
Iteration= 117, Error= 25.2267276154, Sparsity= 0.326727658131
Iteration= 118, Error= 25.1942921094, Sparsity= 0.327813417541
Iteration= 119, Error= 25.162642888, Sparsity= 0.328874152725
Iteration= 120, Error= 25.1317524435, Sparsity= 0.32991258191
Iteration= 121, Error= 25.1015942289, Sparsity= 0.330936260996
Iteration= 122, Error= 25.0721427072, Sparsity= 0.331942893191
Iteration= 123, Error= 25.0433733327, Sparsity= 0.33292852684
Iteration= 124, Error= 25.0152624924, Sparsity= 0.333893585038
Iteration= 125, Error= 24.9877874347, Sparsity= 0.334840435598
Iteration= 126, Error= 24.9609262089, Sparsity= 0.335770247131
Iteration= 127, Error= 24.9346576295, Sparsity= 0.336679312162
Iteration= 128, Error= 24.9089612761, Sparsity= 0.337564880471
Iteration= 129, Error= 24.8838175342, Sparsity= 0.338428750298
Iteration= 130, Error= 24.8592076665, Sparsity= 0.33927443862
Iteration= 131, Error= 24.8351138707, Sparsity= 0.34010407311
Iteration= 132, Error= 24.8115192589, Sparsity= 0.340916300248
Iteration= 133, Error= 24.7884077486, Sparsity= 0.341716823932
Iteration= 134, Error= 24.7657639644, Sparsity= 0.342503450987
Iteration= 135, Error= 24.7435732256, Sparsity= 0.343279315435
Iteration= 136, Error= 24.7218215637, Sparsity= 0.344043705175
Iteration= 137, Error= 24.7004957006, Sparsity= 0.344793207579
Iteration= 138, Error= 24.6795829881, Sparsity= 0.345529148255
Iteration= 139, Error= 24.6590713471, Sparsity= 0.346254048462
Iteration= 140, Error= 24.6389492255, Sparsity= 0.346968253763
Iteration= 141, Error= 24.6192055731, Sparsity= 0.34767221945
Iteration= 142, Error= 24.5998298229, Sparsity= 0.348363174788
Iteration= 143, Error= 24.5808118703, Sparsity= 0.349042136627
Iteration= 144, Error= 24.5621420469, Sparsity= 0.349709616162
Iteration= 145, Error= 24.5438110902, Sparsity= 0.350366774259
Iteration= 146, Error= 24.5258101149, Sparsity= 0.351012443027
Iteration= 147, Error= 24.5081305887, Sparsity= 0.351649868562
Iteration= 148, Error= 24.4907643144, Sparsity= 0.352280315478
Iteration= 149, Error= 24.4737034124, Sparsity= 0.352904148044
Iteration= 150, Error= 24.4569402996, Sparsity= 0.353517568983
Iteration= 151, Error= 24.4404676607, Sparsity= 0.354118642061
Iteration= 152, Error= 24.4242784108, Sparsity= 0.354712480166
Iteration= 153, Error= 24.4083656515, Sparsity= 0.355297261348
Iteration= 154, Error= 24.3927226292, Sparsity= 0.35587266096
Iteration= 155, Error= 24.3773427006, Sparsity= 0.356443633033
Iteration= 156, Error= 24.3622193158, Sparsity= 0.357006013567
Iteration= 157, Error= 24.3473460199, Sparsity= 0.35755983774
Iteration= 158, Error= 24.3327164708, Sparsity= 0.358104294201
Iteration= 159, Error= 24.3183244657, Sparsity= 0.358640157274
Iteration= 160, Error= 24.3041639664, Sparsity= 0.359167300681
Iteration= 161, Error= 24.2902291182, Sparsity= 0.359686049728
Iteration= 162, Error= 24.2765142571, Sparsity= 0.36019759454
Iteration= 163, Error= 24.2630139061, Sparsity= 0.360699751263
Iteration= 164, Error= 24.2497227626, Sparsity= 0.36119372419
Iteration= 165, Error= 24.2366356796, Sparsity= 0.361679610709
Iteration= 166, Error= 24.2237476464, Sparsity= 0.362159353492
Iteration= 167, Error= 24.2110537742, Sparsity= 0.36263281757
Iteration= 168, Error= 24.1985492896, Sparsity= 0.363099862037
Iteration= 169, Error= 24.186229532, Sparsity= 0.3635604401
Iteration= 170, Error= 24.1740899532, Sparsity= 0.364014159475
Iteration= 171, Error= 24.162126115, Sparsity= 0.364464076078
Iteration= 172, Error= 24.1503336875, Sparsity= 0.36490884998
Iteration= 173, Error= 24.1387084549, Sparsity= 0.365347157088
Iteration= 174, Error= 24.1272463325, Sparsity= 0.365777621014
Iteration= 175, Error= 24.1159433993, Sparsity= 0.366205999692
Iteration= 176, Error= 24.1047959376, Sparsity= 0.366626639902
Iteration= 177, Error= 24.093800473, Sparsity= 0.367038611234
Iteration= 178, Error= 24.0829538039, Sparsity= 0.36744353529
Iteration= 179, Error= 24.0722530059, Sparsity= 0.367842428821
Iteration= 180, Error= 24.0616954014, Sparsity= 0.368234091149
Iteration= 181, Error= 24.0512784855, Sparsity= 0.368618313009
Iteration= 182, Error= 24.0409998207, Sparsity= 0.368999901459
Iteration= 183, Error= 24.0308569248, Sparsity= 0.369381080989
Iteration= 184, Error= 24.0208471876, Sparsity= 0.369758420428
Iteration= 185, Error= 24.0109678402, Sparsity= 0.370132658878
Iteration= 186, Error= 24.0012159784, Sparsity= 0.370501338302
Iteration= 187, Error= 23.9915886216, Sparsity= 0.370863394845
Iteration= 188, Error= 23.9820827837, Sparsity= 0.371219519004
Iteration= 189, Error= 23.9726955339, Sparsity= 0.371569642397
Iteration= 190, Error= 23.9634240376, Sparsity= 0.371917500192
Iteration= 191, Error= 23.954265575, Sparsity= 0.372266395991
Iteration= 192, Error= 23.9452175412, Sparsity= 0.372610042315
Iteration= 193, Error= 23.9362774359, Sparsity= 0.372949661534
Iteration= 194, Error= 23.9274428548, Sparsity= 0.373288172734
Iteration= 195, Error= 23.9187114873, Sparsity= 0.373622225626
Iteration= 196, Error= 23.9100811253, Sparsity= 0.373951325723
Error= 32.0857969231, Sparsity= 0.393939111185
%% Cell type:code id: tags:
``` python
results
```
%% Output
Empty DataFrame
Columns: [error, sparsity, diff_W, diff_H, W_minmax, H_0th, H_25th, H_50th, H_75th, H_100th]
Index: []
%% Cell type:code id: tags:
``` python
np.savetxt('W_trips.txt', W)
np.savetxt('H_trips.txt', H)
```
%% Cell type:code id: tags:
``` python
import imp
imp.reload(cSNMF)
cSNMF.sparsity_metric(H)
```
%% Output
0.82092271433931607
......
......@@ -2,6 +2,7 @@
import numpy as np
import pandas as pd
import math
import logging
import itertools as it
import config
......@@ -19,10 +20,10 @@ def N(A): return m(A, NONZEROS) # NONZEROS is a global variable.
def sparsity_metric(H):
def sparsity_column(column):
column2 = list(column)
column2.remove(max(column2))
return np.linalg.norm(column2)/np.linalg.norm(column)
return 1- np.mean([sparsity_column(column) for column in H.T])
n = len(column)
ratio = np.linalg.norm(column, 1)/np.linalg.norm(column, 2)
return (math.sqrt(n) - ratio)/(math.sqrt(n) - 1)
return np.mean([sparsity_column(column) for column in H.T])
def calculate_error(D, W, H):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment