-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathexample.py
93 lines (80 loc) · 3.49 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#-*- coding: utf_8 -*-
import matplotlib
import cPickle
import numpy as np
import os
import sys
import librosa
from scipy.misc import imsave
import auralise
''' 2015-09-28 Keunwoo Choi
- [0] load cnn weights that is learned from keras (http://keras.io) - but you don't need to install it.
- [1] load a song file (STFT).
---[1-0] log_amplitude(abs(STFT))
-- [1-1] feed-forward the spectrogram,
--- [1-1-0]using NOT theano, just scipy and numpy on CPU. Thus it's slow.
-- [1-2] then 'deconve' using switch matrix for every convolutional layer.
-- [1-3] 'separate' or 'auralise' the deconved spectrogram using phase information of original signal.
- [2]listen up!
'''
if __name__ == "__main__":
'''
This is main body of program I used for the paper at ismir.
You need a keras model weights file with music files at the appropriate folder... In other words, it won't be executed at your computer.
Just see the part after
print '--- deconve! ---'
, where the deconvolution functions are used.
'''
# load learned weights
W, layer_names = auralise.load_weights()
num_conv_layer = len(W)
# load files
print '--- prepare files ---'
filenames_src = ['bach.wav', 'dream.wav', 'toy.wav']
path_SRC = 'src_songs/'
path_src = 'src_songs/'
print '--- Please modify above to run on your file ---'
filenames_SRC = []
N_FFT = 512
SAMPLE_RATE = 11025
# get STFT of the files.
for filename in filenames_src:
song_id = filename.split('.')[0]
if os.path.exists(path_SRC + song_id + '.npy'):
pass
else:
src = librosa.load(os.path.join(path_src, filename), sr=SAMPLE_RATE, mono=True, duration=4.)[0]
SRC = librosa.stft(src, n_fft=N_FFT, hop_length=N_FFT/2)
if SRC.shape[1] > 173:
SRC = SRC[:, :173]
elif SRC.shape[1] < 173:
temp = np.zeros((257, 173))
temp[:, :SRC.shape[1]] = SRC
SRC = temp
np.save(path_SRC + song_id + '.npy', SRC)
filenames_SRC.append(song_id + '.npy')
# deconve
depths = [5,4,3,2,1]
for filename in filenames_SRC:
song_id = filename.split('.')[0]
SRC = np.load(path_SRC + filename)
filename_out = '%s_a_original.wav' % (song_id)
if not os.path.exists(path_SRC + song_id):
os.makedirs(path_src + song_id)
if not os.path.exists(path_SRC + song_id + '_img'):
os.makedirs(path_src + song_id + '_img')
librosa.output.write_wav(path_src+ song_id + '/' + filename_out, librosa.istft(SRC, hop_length=N_FFT/2), sr=SAMPLE_RATE, norm=True)
for depth in depths:
print '--- deconve! ---'
deconvedMASKS = auralise.get_deconve_mask(W[:depth], layer_names, SRC, depth) # size can be smaller than SRC due to downsampling
print 'result; %d masks with size of %d, %d' % deconvedMASKS.shape
for deconved_feature_ind, deconvedMASK_here in enumerate(deconvedMASKS):
MASK = np.zeros(SRC.shape)
MASK[0:deconvedMASK_here.shape[0], 0:deconvedMASK_here.shape[1]] = deconvedMASK_here
deconvedSRC = np.multiply(SRC, MASK)
filename_out = '%s_deconved_from_depth_%d_feature_%d.wav' % (song_id, depth, deconved_feature_ind)
librosa.output.write_wav(path_src+ song_id + '/' + filename_out, librosa.istft(deconvedSRC, hop_length=N_FFT/2), SAMPLE_RATE, norm=True)
filename_img_out = 'spectrogram_%s_from_depth_%d_feature_%d.png' % (song_id, depth, deconved_feature_ind)
imsave(path_src+song_id + '_img' + '/' + filename_img_out , np.flipud(np.multiply(np.abs(SRC), MASK)))
filename_img_out = 'filter_for_%s_from_depth_%d_feature_%d.png' % (song_id, depth, deconved_feature_ind)
imsave(path_src+song_id + '_img' + '/' + filename_img_out , np.flipud(MASK))