Skip to content

Commit

Permalink
[torchaudio] Fix torchaudio interface error (wenet-e2e#2352)
Browse files Browse the repository at this point in the history
  • Loading branch information
lsrami authored Mar 20, 2024
1 parent 6f77461 commit 0e2b6e7
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 11 deletions.
6 changes: 2 additions & 4 deletions tools/compute_cmvn_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import torchaudio.compliance.kaldi as kaldi
from torch.utils.data import Dataset, DataLoader

torchaudio.set_audio_backend("sox_io")


class CollateFunc(object):
''' Collate function for AudioDataset
Expand All @@ -32,15 +30,15 @@ def __call__(self, batch):
value = item[1].strip().split(",")
assert len(value) == 3 or len(value) == 1
wav_path = value[0]
sample_rate = torchaudio.backend.sox_io_backend.info(
sample_rate = torchaudio.info(
wav_path).sample_rate
resample_rate = sample_rate
# len(value) == 3 means segmented wav.scp,
# len(value) == 1 means original wav.scp
if len(value) == 3:
start_frame = int(float(value[1]) * sample_rate)
end_frame = int(float(value[2]) * sample_rate)
waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
waveform, sample_rate = torchaudio.load(
filepath=wav_path,
num_frames=end_frame - start_frame,
frame_offset=start_frame)
Expand Down
8 changes: 2 additions & 6 deletions tools/compute_fbank_feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@

import wenet.dataset.kaldi_io as kaldi_io

# The "sox" backends are deprecated and will be removed in 0.9.0 release.
# So here we use sox_io backend
torchaudio.set_audio_backend("sox_io")


def parse_opts():
parser = argparse.ArgumentParser(description='training your network')
Expand Down Expand Up @@ -104,14 +100,14 @@ def load_wav_segments(wav_scp_file, segments_file):
for item in audio_list:
if len(item) == 2:
key, wav_path = item
waveform, sample_rate = torchaudio.load_wav(wav_path)
waveform, sample_rate = torchaudio.load(wav_path)
else:
assert len(item) == 4
key, wav_path, start, end = item
sample_rate = torchaudio.info(wav_path).sample_rate
frame_offset = int(start * sample_rate)
num_frames = int((end - start) * sample_rate)
waveform, sample_rate = torchaudio.load_wav(
waveform, sample_rate = torchaudio.load(
wav_path, frame_offset, num_frames)

mat = kaldi.fbank(waveform,
Expand Down
1 change: 0 additions & 1 deletion tools/wav2dur.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import torchaudio

torchaudio.set_audio_backend("sox_io")

scp = sys.argv[1]
dur_scp = sys.argv[2]
Expand Down

0 comments on commit 0e2b6e7

Please sign in to comment.