-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaudio_preprocess.py
63 lines (48 loc) Β· 1.65 KB
/
audio_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import librosa
import librosa.display
# import IPython.display
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager as fm
import glob
from IPython import get_ipython
from PIL import Image
import re
def audio_preprocessing(data,test):
# folder_list = glob.glob("audio_data/*") # μ€λμ€ λ°μ΄ν° νμΌ μμΉ
# print(folder_list)
# print(len(folder_list))
# index=0
# for i in folder_list:
frame_length = 0.025
frame_stride = 0.010
y, sr = librosa.load(data,sr=16000)
win_length = int(np.ceil(frame_length*sr))
window = 'hamming'
nfft = int(round(sr*frame_length))
hop_length = int(round(sr*frame_stride))
plt.figure(figsize=(4,10))
Si = librosa.feature.melspectrogram(y=y,sr=sr,n_mels=40, n_fft=nfft, hop_length=hop_length,win_length=win_length, window=window,
center=True, pad_mode='reflect', fmin=0.0)
DB = librosa.amplitude_to_db(Si, ref=np.max)
librosa.display.specshow(DB, sr=sr, x_axis='linear', y_axis='time',hop_length=hop_length)
plt.axis('off')
# plt.xlabel("Time")
# plt.ylabel("MFCC coefficients")
# plt.colorbar()
# plt.title("MFCCs")
#save image
fig = plt.gcf()
img_path = "./model_image/"+test+'.png'
fig.savefig(img_path,bbox_inches='tight',pad_inches=0)
plt.close()
# resize
img_size = (256, 256)
image = Image.open(f'{img_path}')
image = image.resize(img_size)
image = image.transpose(Image.FLIP_LEFT_RIGHT)
image = image.rotate(-90)
image.save(f'{img_path}')
# index+=1
print("complete!")