-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathSSLagM_1 song_chromas.py
138 lines (115 loc) · 5.29 KB
/
SSLagM_1 song_chromas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
SCRIPT NÚMERO: 3
Este script calcula el Espectrograma de Mel y las matriz de Lag para una
canción de SALAMI de acorde a un contexto de lag L.
Este método sigue los pasos del paper "MUSIC BOUNDARY DETECTION USING NEURAL
NETWORKS ON SPECTROGRAMS AND SELF-SIMILARITY LAG MATRICES"
"""
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import time
import skimage.measure
import scipy
from scipy.spatial import distance
import math
import extract_labels_from_txt
start_time = time.time()
"""=================================SIGNAL=============================="""
song = "12"
song_path = "/media/carlos/FILES1/SALAMI/songs/" + song + ".mp3"
window_size = 2048 #(samples/frame)
hop_length = 1024 #overlap 50% (samples/frame)
sr_desired = 44100
y, sr = librosa.load(song_path, sr=None)
if sr != sr_desired:
y = librosa.core.resample(y, sr, sr_desired)
sr = sr_desired
p = 6
L_sec = 14 #lag context in seconds
L = round(L_sec*sr/hop_length) #conversion of lag L seconds to frames
"""--------------------------------------------------------------------"""
"""========================ESPECTROGRAMA DE MEL========================"""
"""--------------------------------------------------------------------"""
stft = np.abs(librosa.stft(y, n_fft=window_size, hop_length=hop_length))
#S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=window_size, hop_length=hop_length, n_mels=80, fmin=80, fmax=16000)
#S_to_dB = librosa.power_to_db(S,ref=np.max) #convert S in dB
padding_factor = L #frames
pad = np.full((stft.shape[0], padding_factor), -70) #matrix of 80x30frames of -70dB corresponding to padding
S_padded = np.concatenate((pad, stft), axis=1) #padding 30 frames with noise at -70dB at the beginning
#S_padded = np.concatenate((S_padded, pad), axis=1)
"""--------------------------------------------------------------------"""
"""==============================MFCCs================================="""
"""--------------------------------------------------------------------"""
#max pooling of p=2 factor (columns dimension of time series becomes N/p)
x_prime = skimage.measure.block_reduce(S_padded, (1,p), np.max) #Mel Spectrogram downsampled
PCPs = librosa.feature.chroma_stft(S=x_prime, sr=sr, n_fft=window_size, hop_length=hop_length)
PCPs = PCPs[1:,:]
#Bagging frames
m = 2 #baggin parameter in frames
x = [np.roll(PCPs,n,axis=1) for n in range(m)]
x_hat = np.concatenate(x, axis=0)
#Cosine distance calculation: D[N/p,L/p] matrix
distances = np.zeros((x_hat.shape[1], padding_factor//p)) #D has as dimensions N/p and L/p
for i in range(x_hat.shape[1]): #iteration in columns of x_hat
for l in range(padding_factor//p):
if i-(l+1) < 0:
cosine_dist = 1
elif i-(l+1) < padding_factor//p:
cosine_dist = 1
else:
cosine_dist = distance.cosine(x_hat[:,i], x_hat[:,i-(l+1)]) #cosine distance between columns i and i-L
distances[i,l] = cosine_dist
#Threshold epsilon[N/p,L/p] calculation
kappa = 0.1
epsilon = np.zeros((distances.shape[0], padding_factor//p)) #D has as dimensions N/p and L/p
for i in range(padding_factor//p, distances.shape[0]): #iteration in columns of x_hat
for l in range(padding_factor//p):
epsilon[i,l] = np.quantile(np.concatenate((distances[i-l,:], distances[i,:])), kappa)
distances = distances[padding_factor//p:,:]
epsilon = epsilon[padding_factor//p:,:]
x_prime = x_prime[:,padding_factor//p:]
#Self Similarity Lag Matrix
sslm = scipy.special.expit(1-distances/epsilon) #aplicación de la sigmoide
sslm = np.transpose(sslm)
#Check if SSLM has nans and if it has them, substitute them by 0
for i in range(sslm.shape[0]):
for j in range(sslm.shape[1]):
if np.isnan(sslm[i,j]):
sslm[i,j] = 0
#Plot SSLM
plt.figure(1)
plt.title("SSLM")
fig = plt.imshow(sslm, origin='lower', cmap='viridis')
#fig.axes.get_xaxis().set_visible(False)
#fig.axes.get_yaxis().set_visible(False)
plt.show()
"""Now, with the SSLM calculated, we plot the transitions along time axis
reading them from the .txt annotations file"""
path = "E:\\UNIVERSIDAD\\MÁSTER INGENIERÍA INDUSTRIAL\\TFM\\Database\\salami-data-public\\annotations\\" + song + "\\parsed\\"
file = "textfile1_functions.txt"
labels_sec = extract_labels_from_txt.ReadDataFromtxt(path, file)
labels = [(float(labels_sec[i])*sr/(p*hop_length)) for i in range(len(labels_sec))]
plt.imshow(sslm, origin='lower', cmap='gray')
for x in range(len(labels)):
plt.axvline(labels[x], ymin=0.8, color='y', lw=2, linestyle='-')
plt.show()
if sslm.shape[1] == x_prime.shape[1]:
print("SSLM and MLS have the same time dimension (columns).")
else:
print("ERROR. Time dimension of SSLM and MLS mismatch.")
print("MLS has", x_prime.shape[1], "lag bins and the SSLM", sslm.shape[1])
"""
#Compare mfccs and chroma Lag matrices
np.save("C:\\Users\\Carlos\\Desktop\\1358.npy", sslm)
im_chroma = np.load("E:\\INVESTIGACIÓN\\Proyectos\\Boundaries Detection\\Inputs\\np SSLM from Chromas\\1000.npy")
im_mfccs = np.load("E:\\INVESTIGACIÓN\\Proyectos\\Boundaries Detection\\Inputs\\np SSLM from MFCCs\\1000.npy")
plt.subplot(211)
plt.imshow(im_chroma, origin = 'lower')
plt.subplot(212)
plt.imshow(im_mfccs, origin = 'lower')
plt.show()
print("Shape of SSLM from MFCCs:", im_mfccs.shape[1])
print("Shape of SSLM from Chromas:", im_chroma.shape[1])
"""