-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvis.py
185 lines (139 loc) · 6.1 KB
/
vis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import numpy as np
from PIL import Image
import os
from torchvision.transforms import Resize
try:
import moviepy.editor as mpy
from moviepy.audio.AudioClip import AudioArrayClip
except:
pass
import tempfile
from subprocess import call
from scipy.io import wavfile
def read_gif(im, resize=None, transpose=True):
if isinstance(im, str):
im = Image.open(im)
im.seek(0)
frames = []
try:
while 1:
frame = im.convert('RGB')
if resize is not None:
frame = frame.resize((32,32))
frames.append(np.array(frame))
im.seek(im.tell()+1)
except EOFError:
pass # end of sequence
image = np.stack(frames, axis=0)
if transpose:
image = np.transpose(image, [0, 3, 1, 2])
return image
def npy_to_gif(im_list, filename, fps=5):
"""
:param im_list: a list of frames
:param filename:
:param fps:
:return:
"""
if isinstance(im_list, np.ndarray):
im_list = list(im_list)
if filename[-4:] != '.gif':
filename = filename + '.gif'
save_dir = '/'.join(str.split(filename, '/')[:-1])
if not os.path.exists(save_dir):
print('creating directory: ', save_dir)
os.makedirs(save_dir)
clip = mpy.ImageSequenceClip(im_list, fps=fps)
clip.write_gif(filename)
def npy_to_mp4(im_list, filename, fps=4, audio=None):
"""
:param im_list:
:param filename:
:param fps:
:param audio: an array frames x samples, containing the audio samples per each frame
:return:
"""
if isinstance(im_list, np.ndarray):
im_list = list(im_list)
if filename[-4:] != '.mp4':
filename = filename + '.mp4'
save_dir = '/'.join(str.split(filename, '/')[:-1])
if save_dir and not os.path.exists(save_dir):
print('creating directory: ', save_dir)
os.mkdir(save_dir)
clip = mpy.ImageSequenceClip(im_list, fps=fps)
if audio is not None:
# moviepy always expects stereo audio for some reason, repeating second axis to emulate stereo.
if len(audio.shape) == 2:
samples_per_frame = audio.shape[1]
else:
samples_per_frame = audio.shape[0] / len(im_list)
audio = audio.reshape(-1, 1).repeat(2, 1)
audio_clip = AudioArrayClip(audio, fps=samples_per_frame * fps)
clip = clip.set_audio(audio_clip)
clip.write_videofile(filename, temp_audiofile=filename + '.m4a', remove_temp=True, codec="libx264", audio_codec="aac",
verbose=False, logger=None)
def npy_to_mp4_w_audio(im_list, filename, fps=4, audio=None):
""" This function save npy with audio using a direct call to ffmpeg. I found this necessary to work for a particular
set of audio and video. The standard npy_to_mp4 works for some other set of audio and video, potentially not a subset.
I am not an expert in codecs ¯\_(ツ)_/¯ """
if filename[-4:] != '.mp4':\
filename = filename + '.mp4'
if audio is not None:
if len(audio.shape) == 2:
samples_per_frame = audio.shape[1]
else:
samples_per_frame = audio.shape[0] / len(im_list)
audio = audio.reshape(-1, 1).repeat(2, 1)
tmp_audio_file = tempfile.NamedTemporaryFile('w', suffix='.wav', dir='.')
tmp_video_file = tempfile.NamedTemporaryFile('w', suffix='.mp4', dir='.')
wavfile.write(tmp_audio_file.name, int(samples_per_frame * fps), audio[:, 0])
npy_to_mp4(im_list, tmp_video_file.name, fps)
cmd = ('ffmpeg' + ' -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {2} -y -loglevel 16'.format(tmp_audio_file.name, tmp_video_file.name, filename)).split()
call(cmd)
else:
npy_to_mp4(im_list, filename, fps)
print('saved a video to ' + filename)
# audio = AudioArrayClip(np.ones((20 * 1067, 1)), fps=fps * 1067)
# clip = clip.set_audio(audio.subclip(0, clip.duration))
# clip.write_videofile(filename, temp_audiofile=filename + '.m4a', remove_temp=True, codec="libx264",audio_codec="aac")
npy2gif = npy_to_gif
npy2mp4 = npy_to_mp4_w_audio
def ch_first2last(video):
return video.transpose((0,2,3,1))
def ch_last2first(video):
return video.transpose((0,3,1,2))
def resize_video_chfirst(video, size):
return ch_last2first(resize_video(ch_first2last(video), size))
def resize_video(video, size):
if size is None or size[0] is None:
return video
if video[0].shape[0] == size[0] and video[0].shape[1] == size[1]:
return np.stack(video, axis=0)
# return video
# video = ch_first2last(video).astype(np.uint8)
# lazy transform ;)
# transformed_video = video
# moviepy is memory inefficient
# clip = mpy.ImageSequenceClip(np.split(images, images.shape[0]), fps=1)
# clip = clip.resize((self.img_sz, self.img_sz))
# images = np.array([frame for frame in clip.iter_frames()])
# looping over time is too slow for long videos
transformed_video = np.stack([np.asarray(Resize(size)(Image.fromarray(im))) for im in video], axis=0)
# Using pytorch is also slow
# x = torch.from_numpy(video.transpose((0,3,1,2))).float()
# x = F.interpolate(x, size)
# transformed_video = x.data.numpy().transpose((0,2,3,1))
# It seems that cv2 can resize images with up to 512 channels, which is what this implementation uses
# It is as fast as the looping implementation above
# sh = list(video.shape)
# x = video.transpose((1, 2, 3, 0)).reshape(sh[1:3]+[-1])
# n_split = math.ceil(x.shape[2] / 512.0)
# x = np.array_split(x, n_split, 2)
# x = np.concatenate([cv2.resize(im, size) for im in x], 2)
# transformed_video = x.reshape(list(size) + [sh[3], sh[0]]).transpose((3, 0, 1, 2))
# scipy.ndimage.zoom seems to be even slower than looping. Wow.
# sh = [video.shape[1] / size[0], video.shape[2] / size[1]]
# transformed_video = ndimage.zoom(video, [1] + sh + [1])
# transformed_video = ch_last2first(transformed_video).astype(np.float32)
return transformed_video