Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci and delta using tf2.3.0 #235

Merged
merged 15 commits into from
Nov 10, 2020
Merged
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ services:
before_install:
- export DELTA_PATH=`pwd`; echo $DELTA_PATH
- export DOCKER_DELTA="/home/gitlab-runner/delta"; echo $DOCKER_DELTA
- export CI_IMAGE=zh794390558/delta:ci-cpu-py3
- export CI_IMAGE=zh794390558/delta:2.3.0-ci-cpu-py3
- docker pull ${CI_IMAGE}
- docker run -it -d --name travis_con --user root -v ${DELTA_PATH}:${DOCKER_DELTA} ${CI_IMAGE} bash
- docker exec travis_con bash -c "gcc -v && g++ -v"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ To verify the installation, run:

```shell
# Activate conda environment
conda activate delta-py3.6-tf2.0.0
conda activate delta-py3.6-tf2.3.0
# Or use the following command if your conda version is < 4.6
# source activate delta-py3.6-tf2.0.0
# source activate delta-py3.6-tf2.3.0

# Add DELTA environment
source env.sh
Expand Down
2 changes: 1 addition & 1 deletion core/ops/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ endif
#$(info $(MAKEFILE_DIR))
#$(info $(MAIN_ROOT))

CXX := g++
CXX := g++-7
NVCC := nvcc
PYTHON_BIN_PATH= python3
CC :=
Expand Down
6 changes: 3 additions & 3 deletions delta/data/feat/speech_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ def test_splice(self):
self.assertTupleEqual(out.eval().shape, (1, 3, 2 * ctx))
self.assertAllEqual(out, tf.ones([1, 3, 2 * ctx]))

with self.assertRaises(ValueError):
with self.assertRaises(tf.errors.InvalidArgumentError):
out = tffeat.splice(feat, left_context=-2, right_context=-2).eval()

with self.assertRaises(ValueError):
with self.assertRaises(tf.errors.InvalidArgumentError):
out = tffeat.splice(feat, left_context=2, right_context=-2).eval()

with self.assertRaises(ValueError):
with self.assertRaises(tf.errors.InvalidArgumentError):
out = tffeat.splice(feat, left_context=-2, right_context=2).eval()


Expand Down
4 changes: 2 additions & 2 deletions delta/data/preprocess/base_preparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def prepare_one_raw_data(self, one_path, one_path_after, mode,
if self.multi_output:
for i in range(self.output_num):
label_ds = label[i].batch(self.batch_size)
label_iterator = label_ds.make_initializable_iterator()
label_iterator = tf.data.make_initializable_iterator(label_ds)
label_after_arr = self.run_dataset(label_iterator, batch_num)
label_after_one = [
one_line.decode("utf-8") for one_line in label_after_arr
Expand All @@ -154,7 +154,7 @@ def prepare_one_raw_data(self, one_path, one_path_after, mode,
else:
label = label[0]
label_ds = label.batch(self.batch_size)
label_iterator = label_ds.make_initializable_iterator()
label_iterator = tf.data.make_initializable_iterator(label_ds)
label_after_arr = self.run_dataset(label_iterator, batch_num)
one_label_after = [
one_line.decode("utf-8") for one_line in label_after_arr
Expand Down
2 changes: 1 addition & 1 deletion delta/data/preprocess/text_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def load_textline_dataset(paths, column_num):
"""Load raw data for text task."""
ds = tf.data.TextLineDataset(paths)
ds = ds.map(
lambda x: tf.strings.split(x, sep="\t", result_type="RaggedTensor"))
lambda x: tf.squeeze(tf.strings.split(x, sep="\t", result_type="RaggedTensor"), axis = 0))
ds = ds.filter(lambda line: tf.equal(tf.size(line), column_num))
ds_list = []
for i in range(column_num):
Expand Down
4 changes: 2 additions & 2 deletions delta/data/preprocess/text_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_process_one_label_dataset(self):
true_res = [0, 0, 0, 8]
label_ds = process_one_label_dataset(label_ds, self.config)

iterator = label_ds.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(label_ds)
label_res = iterator.get_next()

with tf.Session() as sess:
Expand All @@ -139,7 +139,7 @@ def test_process_multi_label_dataset(self):
label_ds = tf.data.TextLineDataset(label_filepath)
true_res = [[0, 8, 8], [0, 7, 8]]
label_ds = process_multi_label_dataset(label_ds, self.config)
iterator = label_ds.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(label_ds)
label_res = iterator.get_next()

with tf.Session() as sess:
Expand Down
2 changes: 1 addition & 1 deletion delta/data/preprocess/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_pre_process_text_ds_iter(

text_ds = text_ds.batch(batch_size)

iterator = text_ds.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(text_ds)

return iterator

Expand Down
2 changes: 1 addition & 1 deletion delta/data/task/text_cls_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def dataset(self):

data_set = data_set.prefetch(self.num_prefetch_batch)

iterator = data_set.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(data_set)

# pylint: disable=unused-variable
if self.infer_without_label:
Expand Down
2 changes: 1 addition & 1 deletion delta/data/task/text_match_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def dataset(self):

text_ds_left_right = text_ds_left_right.prefetch(self.num_prefetch_batch)

iterator = text_ds_left_right.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(text_ds_left_right)
# pylint: disable=unused-variable
if self.infer_without_label:
(input_x_left, input_x_right), (input_x_left_len,
Expand Down
2 changes: 1 addition & 1 deletion delta/data/task/text_nlu_joint_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def dataset(self):

data_set = data_set.prefetch(self.num_prefetch_batch)

iterator = data_set.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(data_set)

if self.infer_without_label:
input_x, input_x_len = iterator.get_next()
Expand Down
2 changes: 1 addition & 1 deletion delta/data/task/text_seq2seq_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def dataset(self):

data_set = data_set.prefetch(self.num_prefetch_batch)

iterator = data_set.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(data_set)

# pylint: disable=unused-variable
if self.infer_without_label:
Expand Down
2 changes: 1 addition & 1 deletion delta/data/task/text_seq_label_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def dataset(self):

data_set = data_set.prefetch(self.num_prefetch_batch)

iterator = data_set.make_initializable_iterator()
iterator = tf.data.make_initializable_iterator(data_set)

if self.infer_without_label:
input_x, input_x_len = iterator.get_next()
Expand Down
4 changes: 2 additions & 2 deletions delta/utils/metrics/py_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def call(self, y_true=None, y_pred=None, arguments=None):
in_f.write(ref_sent)
in_f.write("\n")

files_rouge = FilesRouge(self.hyp_path, self.ref_path)
scores = files_rouge.get_scores(avg=True)
files_rouge = FilesRouge()
scores = files_rouge.get_scores(self.hyp_path, self.ref_path, avg=True)
return self.get_scores_output(scores)

@staticmethod
Expand Down
17 changes: 9 additions & 8 deletions delta/utils/solver/asr_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Lambda
from tensorflow.keras.experimental import export_saved_model
#from tensorflow.keras.experimental import export_saved_model

from delta import utils
from delta.utils.decode import py_ctc
Expand Down Expand Up @@ -265,10 +265,11 @@ def ctc_greedy_decode_lambda_func(args):
inputs=[input_feat, input_length], outputs=greedy_decode)

model_export_path = Path(self._model_path).joinpath("export")
export_saved_model(
model=model_to_export,
saved_model_path=str(model_export_path),
custom_objects=None,
as_text=False,
input_signature=None,
serving_only=False)
model_to_export.save(
filepath=str(model_export_path),
overwrite=True,
include_optimizer=True,
save_format='tf',
signatures=None,
options=None,
)
21 changes: 13 additions & 8 deletions docker/build.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fi

TAG=${TF_VER}-${TARGET}-${DEVICE}-py3
DOCKER='sudo docker'
PIP_INSTALL="pip --no-cache-dir install -i https://pypi.tuna.tsinghua.edu.cn/simple/"
PIP_INSTALL="pip --no-cache-dir install -i https://mirrors.aliyun.com/pypi/simple"

set -e
set -u
Expand All @@ -48,14 +48,19 @@ on_exit() {
trap on_exit HUP INT PIPE QUIT TERM EXIT


# https://hub.docker.com/r/tensorflow/tensorflow
# Versioned images <= 1.15.0 (1.x) and <= 2.1.0 (2.x) have Python 3
# (3.5 for Ubuntu 16-based images; 3.6 for Ubuntu 18-based images) in images tagged "-py3"
# and Python 2.7 in images without "py" in the tag.
# All newer images are Python 3 only. Tags containing -py3 are deprecated.
if [ ${DEVICE} == 'cpu' ] && [ ${TARGET} == 'deltann' ];then
IMAGE=tensorflow/tensorflow:devel-py3
IMAGE=tensorflow/tensorflow:devel
elif [ ${DEVICE} == 'gpu' ] && [ ${TARGET} == 'deltann' ];then
IMAGE=tensorflow/tensorflow:devel-gpu-py3
IMAGE=tensorflow/tensorflow:devel-gpu
elif [ ${DEVICE} == 'cpu' ] && [ ${TARGET} == 'delta' ] || [ ${TARGET} == 'ci' ];then
IMAGE=tensorflow/tensorflow:${TF_VER}-py3
elif [ ${DEVICE} == 'gpu' ] && [ ${TARGET} == 'delta' ] || [ ${TARGET} == 'ci' ];then
IMAGE=tensorflow/tensorflow:${TF_VER}-gpu-py3
IMAGE=tensorflow/tensorflow:${TF_VER}
elif [ ${DEVICE} == 'gpu' ] && [ ${TARGET} == 'delta' ];then
IMAGE=tensorflow/tensorflow:${TF_VER}-gpu
else
echo "no support target or device"
exit -1
Expand Down Expand Up @@ -134,11 +139,11 @@ if [ $MODE == 'push' ] || [ $MODE == 'build' ];then
$DOCKER pull $IMAGE

# build image
$DOCKER build --no-cache=false -t delta:$TAG -f $DOCKERFILE . || { echo "build ${TARGET} ${DEVICE} error"; exit 1; }
$DOCKER build --no-cache=false -t zh794390558/delta:$TAG -f $DOCKERFILE . || { echo "build ${TARGET} ${DEVICE} error"; exit 1; }

#push image
if [ $MODE == 'push' ];then
$DOCKER tag delta:${TAG} zh794390558/delta:${TAG}
#$DOCKER tag delta:${TAG} zh794390558/delta:${TAG}
$DOCKER push zh794390558/delta:$TAG

if [ $? == 0 ]; then
Expand Down
4 changes: 2 additions & 2 deletions docker/dockerfile.ci.cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tensorflow/tensorflow:2.0.0-py3
FROM tensorflow/tensorflow:2.3.0
COPY sources.list.ubuntu18.04 /etc/apt/sources.list

# install tools
Expand All @@ -7,6 +7,6 @@ RUN /bin/bash /install.sh

COPY requirements.txt /ci/requirements.txt
WORKDIR /ci
RUN sudo pip --no-cache-dir install -i https://pypi.tuna.tsinghua.edu.cn/simple/ --upgrade pip && pip --no-cache-dir install -i https://pypi.tuna.tsinghua.edu.cn/simple/ --user -r requirements.txt
RUN sudo pip --no-cache-dir install -i https://mirrors.aliyun.com/pypi/simple --upgrade pip && pip --no-cache-dir install -i https://mirrors.aliyun.com/pypi/simple --user -r requirements.txt

CMD ["/bin/bash", "-c"]
2 changes: 1 addition & 1 deletion docker/dockerfile.delta.cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tensorflow/tensorflow:2.0.0-py3
FROM tensorflow/tensorflow:2.3.0
COPY sources.list.ubuntu18.04 /etc/apt/sources.list

# install tools
Expand Down
2 changes: 1 addition & 1 deletion docker/dockerfile.delta.gpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tensorflow/tensorflow:2.0.0-gpu-py3
FROM tensorflow/tensorflow:2.3.0-gpu
COPY sources.list.ubuntu18.04 /etc/apt/sources.list

# install tools
Expand Down
2 changes: 1 addition & 1 deletion docker/dockerfile.deltann.cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tensorflow/tensorflow:devel-py3
FROM tensorflow/tensorflow:devel
COPY sources.list.ubuntu18.04 /etc/apt/sources.list

# install tools
Expand Down
2 changes: 1 addition & 1 deletion docker/dockerfile.deltann.gpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tensorflow/tensorflow:devel-gpu-py3
FROM tensorflow/tensorflow:devel-gpu
COPY sources.list.ubuntu18.04 /etc/apt/sources.list

# install tools
Expand Down
2 changes: 2 additions & 0 deletions docker/run.sh → docker/gen_dockerfile.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

rm dockerfile.delta*

bash build.sh ci cpu dockerfile
bash build.sh delta cpu dockerfile
bash build.sh delta gpu dockerfile
Expand Down
1 change: 0 additions & 1 deletion docker/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ apt-get update && apt-get install -y --no-install-recommends \
vim \
zlib1g-dev \
wget \
gfortran \
subversion \
ca-certificates \
unzip \
Expand Down
12 changes: 10 additions & 2 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
matplotlib
sklearn
pandas
librosa
librosa>=0.7.2
numba==0.48.0 #must be this by librosa>=0.7.2
absl-py
jieba
wget
kaldiio
soundfile
textgrid
Expand All @@ -28,4 +30,10 @@ pyAudioAnalysis
flake8
sentencepiece
deepdiff
tensorflow_addons
tensorflow-addons==0.11.1
tensorflow-model-optimization


# TODO: rm below
# https://github.com/tensorflow/addons/issues/864
tensorflow-cpu==2.3.0
3 changes: 0 additions & 3 deletions docs/released_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,3 @@ Recently several deep learning based approaches have been successfully used in s
### Textual+numeric

We implement the direct concatenation data fusion in data processing stage,therefore this type of multimodal training can be directly used for existing models in DELTA.



2 changes: 0 additions & 2 deletions docs/version.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,3 @@ Make sure all PRs under milestone `v0.3.2` are closed, then close the milestone.
Using below command to generate relase note.

`python tools/release_notes.py -c didi delta v0.3.2`


2 changes: 1 addition & 1 deletion egs/mini_an4/asr/v1/conf/asr-ctc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ solver:
metrics:
pos_label: 1 # int, same to sklearn
metrics_used : null
monitor_used : val_token_err
monitor_used : val_loss
cals:
- name: AccuracyCal
arguments: null
Expand Down
7 changes: 4 additions & 3 deletions env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ else
fi

# need version
export TF_VER=2.0.0
export TF_VER=2.3.0
export PY_VER=3.6

# root dir
Expand Down Expand Up @@ -51,6 +51,7 @@ if [ -e $MAIN_ROOT/tools/go.env ];then
source $MAIN_ROOT/tools/go.env
fi

# maybe used by deltann
# tensorflow lib path
TF_LIB_PATH=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
export LD_LIBRARY_PATH=${TF_LIB_PATH}:${LD_LIBRARY_PATH}
#TF_LIB_PATH=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
#export LD_LIBRARY_PATH=${TF_LIB_PATH}:${LD_LIBRARY_PATH}
5 changes: 3 additions & 2 deletions tools/install/prepare_kaldi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ pushd ${MAIN_ROOT}/tools/kaldi/tools
extras/check_dependencies.sh || ${SUDO} apt-get install -y zlib1g-dev wget gfortran subversion

SPH2PIPE_VERSION=v2.5
test -e sph2pipe_${SPH2PIPE_VERSION}.tar.gz && rm sph2pipe_${SPH2PIPE_VERSION}.tar.gz
wget -T 10 -t 3 https://www.openslr.org/resources/3/sph2pipe_${SPH2PIPE_VERSION}.tar.gz || wget -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_${SPH2PIPE_VERSION}.tar.gz || exit 1
tar --no-same-owner -xzf sph2pipe_v2.5.tar.gz
cd sph2pipe_v2.5/
gcc -o sph2pipe *.c -lm
cd sph2pipe_${SPH2PIPE_VERSION}; patch -p1 < ${MAIN_ROOT}/tools/install/sph2pipe.patch; gcc -o sph2pipe *.c -lm; cd -
touch sph2pipe_${SPH2PIPE_VERSION}/.patched
popd
23 changes: 23 additions & 0 deletions tools/install/sph2pipe.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
diff --git a/sph2pipe.c b/sph2pipe.c
index d0d502b..928739c 100644
--- a/sph2pipe.c
+++ b/sph2pipe.c
@@ -103,6 +103,7 @@

#define _SPH_CONVERT_MAIN_

+#include <unistd.h>
#include "sph_convert.h"
#include "ulaw.h"

diff --git a/ulaw.h b/ulaw.h
index 990c833..62ff96e 100644
--- a/ulaw.h
+++ b/ulaw.h
@@ -82,3 +82,6 @@ GLOBAL short int alaw2pcm[256]
}
#endif
;
+
+uchar pcm2ulaw( short int sample );
+uchar pcm2alaw( short int pcmval );
Loading