-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfd_sketch.py
87 lines (62 loc) · 2.59 KB
/
fd_sketch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
#!/usr/bin/env python
import numpy as np
import numpy.linalg as ln
import math
import sys
""" This is a simple and deterministic method for matrix sketch.
The original method has been introduced in [Liberty2013]_ .
[Liberty2013] Edo Liberty, "Simple and Deterministic Matrix Sketching", ACM SIGKDD, 2013.
"""
def sketch(mat_a, ell):
"""Compute a sketch matrix of input matrix
Note that \ell must be smaller than m * 2
:param mat_a: original matrix to be sketched (n x m)
:param ell: the number of rows in sketch matrix
:returns: sketch matrix (\ell x m)
"""
# number of columns
m = mat_a.shape[1]
# Input error handling
if math.floor(ell / 2) >= m:
raise ValueError('Error: ell must be smaller than m * 2')
if ell >= mat_a.shape[0]:
raise ValueError('Error: ell must not be greater than n')
# initialize output matrix B
mat_b = np.zeros([ell, m])
# compute zero valued row list
zero_rows = np.nonzero([round(s, 7) == 0.0 for s in np.sum(mat_b, axis = 1)])[0].tolist()
# repeat inserting each row of matrix A
for i in range(0, mat_a.shape[0]):
# insert a row into matrix B
mat_b[zero_rows[0], :] = mat_a[i, :]
# remove zero valued row from the list
zero_rows.remove(zero_rows[0])
# if there is no more zero valued row
if len(zero_rows) == 0:
# compute SVD of matrix B
mat_u, vec_sigma, mat_v = ln.svd(mat_b, full_matrices=False)
# obtain squared singular value for threshold
squared_sv_center = vec_sigma[math.floor(ell / 2)] ** 2
# update sigma to shrink the row norms
sigma_tilda = [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)]
# update matrix B where at least half rows are all zero
mat_b = np.dot(np.diagflat(sigma_tilda), mat_v)
# update the zero valued row list
zero_rows = np.nonzero([round(s, 7) == 0 for s in np.sum(mat_b, axis = 1)])[0].tolist()
return mat_b
def calculateError(mat_a, mat_b):
"""Compute the degree of error by sketching
:param mat_a: original matrix
:param mat_b: sketch matrix
:returns: reconstruction error
"""
dot_mat_a = np.dot(mat_a.T, mat_a)
dot_mat_b = np.dot(mat_b.T, mat_b)
return ln.norm(dot_mat_a - dot_mat_b, ord = 2)
def squaredFrobeniusNorm(mat_a):
"""Compute the squared Frobenius norm of a matrix
:param mat_a: original matrix
:returns: squared Frobenius norm
"""
return ln.norm(mat_a, ord = 'fro') ** 2