Skip to content
This repository has been archived by the owner on Feb 20, 2023. It is now read-only.

Commit

Permalink
Export the trained model
Browse files Browse the repository at this point in the history
When the model is trained, in order to run an inference service to serve
it, the model should be exported. Two optional parameters are
introduced:
  "-save NAME"
  "-save_version VERSION"
By default, the model is not exported. If "-save NAME" is specified, the
model is saved using given NAME. If "-save_version VERSION" is
specified, together with "-save NAME", the model is saved using given
NAME and VERSION. The "-save_version" is ignored, if "-save" is missing.
By default, version "001" is used. Models are exported in directory:
 models/<NAME>-<outcome>/<VERSION>/
and are compressed in file:
 models/<NAME>-<outcome>/<NAME>-<outcome>-<VERSION>.tar.gz
The exported models are tested with kserve, the layout of directories and
archive file is designed in a way kserve tensorflow predictor expects.

fixes #2

Signed-off-by: Tzvetomir Stoyanov (VMware) <[email protected]>
  • Loading branch information
tzstoyanov committed Dec 13, 2021
1 parent cc3762d commit 1a4fefa
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.DS_Store
.ipynb_checkpoints
env/
models/
exports/
__pycache__/
virtualenv-ml-conversational/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ python ./ml-conversational-analytic-tool/run.py <annotated_filename> <dataset_fi
- `dataset_filename` is the location of the raw data
- `model` is the type of model and can be 'LSTM' or 'CNN'
- `outcome` can be 'Constructive', 'Inclusive' or 'Both'
- (optional) `-save NAME` Save the trained model, an output `NAME` must be specified. The model is saved in `models/name-outcome` directory.
- (optional) `-save_version VERSION` If `-save NAME` is specified, save the model using given `NAME` nad `VERSION` The parameter is ignored if `-save NAME` is missing. By default, version `001` is used.
- (optional) `-roleRelevant` indicates that the encoding generated should be a stacked matrix representing user roles in
conversation. If it is not set then a single matrix representing each comment/review without the role is generated.
- (optional) `-pad` indicates that the number of comment/review should be padded to be a constant value. This argument
Expand Down
3 changes: 3 additions & 0 deletions ml-conversational-analytic-tool/baseCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_siz
validation_split=val_split, verbose=1)
return train_hist

def saveModel(self, name, version):
self.model.save("{}/{}".format(name, version))

def scoreModel(self, obs, res):
"""
Score model for accuracy, precision and recall
Expand Down
3 changes: 3 additions & 0 deletions ml-conversational-analytic-tool/baseLSTM.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_siz
batch_size=batch_size, verbose=1)
return train_hist

def saveModel(self, name, version):
self.model.save("{}/{}".format(name, version))

def scoreModel(self, obs, res):
"""
Score model for accuracy, precision and recall
Expand Down
38 changes: 32 additions & 6 deletions ml-conversational-analytic-tool/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,31 @@
# SPDX-License-Identifier: Apache-2.0

import argparse
import tarfile
import os

from sklearn.model_selection import train_test_split

from baseCNN import BaseCNN
from baseLSTM import BaseLSTM
from preProcessedDataset import PreProcessedDataset


def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type, padding):
model_directory = 'models'

def save_model(model, name, version):
if not os.path.exists(model_directory):
os.makedirs(model_directory)
model_path = "{}/{}".format(model_directory, name)
tar_file_name = "{}-{}.tar.gz".format(name, version)
model.saveModel(name=model_path, version=version)
os.chdir(model_path)
tar = tarfile.open(tar_file_name, "w:gz")
tar.add(version)
tar.close()
os.chdir("../../")
print("Model saved in {}/{}; {}/{}".format(model_path, version, model_path, tar_file_name))

def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type, padding, save_name, model_ver):
# Setup dataset
data = PreProcessedDataset()
data.setupPreProcess(annotated_filename, dataset_filename)
Expand All @@ -35,6 +52,10 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type

# Score model
scores = model.scoreModel(test_obs, test_res)

# Save model
if save_name is not None and len(save_name) > 0:
save_model(model=model, name=save_name+"-"+outcome, version=model_ver)

return scores

Expand All @@ -46,6 +67,10 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type
parser.add_argument('dataset_filename', help='File location of extracted dataset')
parser.add_argument('model', help='Model type to use for training, supported CNN and LSTM')
parser.add_argument('outcome', help='Inclusive, Constructive, or Both')
parser.add_argument('-save', metavar='NAME', help='Save the model using given NAME')
parser.add_argument('-save_version', metavar='VERSION', default='001',
help='Together with -save NAME: save the model using given NAME and VERSION. '\
'If omitted, 001 is used. The parameter is ignored if -save is missing.')
parser.add_argument('-roleRelevant', action='store_true', default=False,
help='Encoding method differentiates b/w conversation roles')
parser.add_argument('-pad', action='store_true', default=False, help='Pad total length of each pull')
Expand All @@ -60,13 +85,14 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type
encodingType = 'role-agnostic'

if args.outcome != 'Both':
run_res = run(args.annotated_filename, args.dataset_filename, args.outcome, encodingType, args.model, args.pad)
run_res = run(args.annotated_filename, args.dataset_filename, args.outcome, encodingType,
args.model, args.pad, args.save, args.save_version)
print(run_res)
else:
run_res_constructive = run(args.annotated_filename, args.dataset_filename, 'Constructive', encodingType,
args.model, args.pad)
args.model, args.pad, args.save, args.save_version)
print("Constructive: {}".format(run_res_constructive))

run_res_inclusive = run(args.annotated_filename, args.dataset_filename, 'Inclusive', encodingType, args.model,
args.pad)
run_res_inclusive = run(args.annotated_filename, args.dataset_filename, 'Inclusive', encodingType,
args.model, args.pad, args.save, args.save_version)
print("Inclusvie: {}".format(run_res_inclusive))

0 comments on commit 1a4fefa

Please sign in to comment.