Skip to content

Commit

Permalink
[scripts] Change make_rttm.py to read/write files with UTF-8 encoding (
Browse files Browse the repository at this point in the history
  • Loading branch information
entn-at authored and Bar-BY committed Jan 21, 2020
1 parent 7eb14be commit 5140ffd
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions egs/callhome_diarization/v1/diarization/make_rttm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@

import argparse
import sys

sys.path.append('steps/libs')
import common as common_lib
import codecs


def get_args():
Expand All @@ -63,14 +61,14 @@ def main():

# File containing speaker labels per segment
seg2label = {}
with common_lib.smart_open(args.labels) as labels_file:
with codecs.open(args.labels, 'r', 'utf-8') as labels_file:
for line in labels_file:
seg, label = line.strip().split()
seg2label[seg] = label

# Segments file
reco2segs = {}
with common_lib.smart_open(args.segments) as segments_file:
with codecs.open(args.segments, 'r', 'utf-8') as segments_file:
for line in segments_file:
seg, reco, start, end = line.strip().split()
try:
Expand Down Expand Up @@ -117,7 +115,7 @@ def main():
new_segs += " " + start + "," + end + "," + label
merged_segs.append(reco + new_segs)

with common_lib.smart_open(args.rttm_file, 'w') as rttm_writer:
with codecs.open(args.rttm_file, 'w', 'utf-8') as rttm_writer:
for reco_line in merged_segs:
segs = reco_line.strip().split()
reco = segs[0]
Expand Down

0 comments on commit 5140ffd

Please sign in to comment.