Export the trained model

When the model is trained, in order to run an inference service to serve it, the model should be exported. Two optional parameters are introduced: "-save NAME" "-save_version VERSION" By default, the model is not exported. If "-save NAME" is specified, the model is saved using given NAME. If "-save_version VERSION" is specified, together with "-save NAME", the model is saved using given NAME and VERSION. The "-save_version" is ignored, if "-save" is missing. By default, version "001" is used. Models are exported in directory: models/<NAME>-<outcome>/<VERSION>/ and are compressed in file: models/<NAME>-<outcome>/<NAME>-<outcome>-<VERSION>.tar.gz The exported models are tested with kserve, the layout of directories and archive file is designed in a way kserve tensorflow predictor expects. fixes #2 Signed-off-by: Tzvetomir Stoyanov (VMware) <[email protected]>
vmware-archive · Dec 13, 2021 · 1a4fefa · 1a4fefa
1 parent cc3762d
commit 1a4fefa
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 .DS_Store
 .ipynb_checkpoints
 env/
+models/
 exports/
 __pycache__/
 virtualenv-ml-conversational/
diff --git a/README.md b/README.md
@@ -135,6 +135,8 @@ python ./ml-conversational-analytic-tool/run.py <annotated_filename> <dataset_fi
 - `dataset_filename` is the location of the raw data
 - `model` is the type of model and can be 'LSTM' or 'CNN'
 - `outcome` can be 'Constructive', 'Inclusive' or 'Both'
+- (optional) `-save NAME` Save the trained model, an output `NAME` must be specified. The model is saved in `models/name-outcome` directory.
+- (optional) `-save_version VERSION` If `-save NAME` is specified, save the model using given `NAME` nad `VERSION` The parameter is ignored if `-save NAME` is missing. By default, version `001` is used.
 - (optional) `-roleRelevant` indicates that the encoding generated should be a stacked matrix representing user roles in
   conversation. If it is not set then a single matrix representing each comment/review without the role is generated.
 - (optional) `-pad` indicates that the number of comment/review should be padded to be a constant value. This argument

diff --git a/ml-conversational-analytic-tool/baseCNN.py b/ml-conversational-analytic-tool/baseCNN.py
@@ -61,6 +61,9 @@ def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_siz
                                         validation_split=val_split, verbose=1)
             return train_hist
 
+    def saveModel(self, name, version):
+        self.model.save("{}/{}".format(name, version))
+
     def scoreModel(self, obs, res):
         """
         Score model for accuracy, precision and recall

diff --git a/ml-conversational-analytic-tool/baseLSTM.py b/ml-conversational-analytic-tool/baseLSTM.py
@@ -75,6 +75,9 @@ def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_siz
                                         batch_size=batch_size, verbose=1)
             return train_hist
 
+    def saveModel(self, name, version):
+        self.model.save("{}/{}".format(name, version))
+
     def scoreModel(self, obs, res):
         """
         Score model for accuracy, precision and recall

diff --git a/ml-conversational-analytic-tool/run.py b/ml-conversational-analytic-tool/run.py
@@ -2,14 +2,31 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
+import tarfile
+import os
+
 from sklearn.model_selection import train_test_split
 
 from baseCNN import BaseCNN
 from baseLSTM import BaseLSTM
 from preProcessedDataset import PreProcessedDataset
 
-
-def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type, padding):
+model_directory = 'models'
+
+def save_model(model, name, version):
+    if not os.path.exists(model_directory):
+        os.makedirs(model_directory)
+    model_path = "{}/{}".format(model_directory, name)
+    tar_file_name = "{}-{}.tar.gz".format(name, version)
+    model.saveModel(name=model_path, version=version)
+    os.chdir(model_path)
+    tar = tarfile.open(tar_file_name, "w:gz")
+    tar.add(version)
+    tar.close()
+    os.chdir("../../")
+    print("Model saved in {}/{}; {}/{}".format(model_path, version, model_path, tar_file_name))
+
+def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type, padding, save_name, model_ver):
     # Setup dataset
     data = PreProcessedDataset()
     data.setupPreProcess(annotated_filename, dataset_filename)
@@ -35,6 +52,10 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type
 
     # Score model
     scores = model.scoreModel(test_obs, test_res)
+
+    # Save model
+    if save_name is not None and len(save_name) > 0:
+        save_model(model=model, name=save_name+"-"+outcome, version=model_ver)
 
     return scores
 
@@ -46,6 +67,10 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type
     parser.add_argument('dataset_filename', help='File location of extracted dataset')
     parser.add_argument('model', help='Model type to use for training, supported CNN and LSTM')
     parser.add_argument('outcome', help='Inclusive, Constructive, or Both')
+    parser.add_argument('-save', metavar='NAME', help='Save the model using given NAME')
+    parser.add_argument('-save_version', metavar='VERSION', default='001',
+                        help='Together with -save NAME: save the model using given NAME and VERSION. '\
+                             'If omitted, 001 is used. The parameter is ignored if -save is missing.')
     parser.add_argument('-roleRelevant', action='store_true', default=False,
                         help='Encoding method differentiates b/w conversation roles')
     parser.add_argument('-pad', action='store_true', default=False, help='Pad total length of each pull')
@@ -60,13 +85,14 @@ def run(annotated_filename, dataset_filename, outcome, encoding_type, model_type
         encodingType = 'role-agnostic'
 
     if args.outcome != 'Both':
-        run_res = run(args.annotated_filename, args.dataset_filename, args.outcome, encodingType, args.model, args.pad)
+        run_res = run(args.annotated_filename, args.dataset_filename, args.outcome, encodingType,
+                      args.model, args.pad, args.save, args.save_version)
         print(run_res)
     else:
         run_res_constructive = run(args.annotated_filename, args.dataset_filename, 'Constructive', encodingType,
-                                   args.model, args.pad)
+                                   args.model, args.pad, args.save, args.save_version)
         print("Constructive: {}".format(run_res_constructive))
 
-        run_res_inclusive = run(args.annotated_filename, args.dataset_filename, 'Inclusive', encodingType, args.model,
-                                args.pad)
+        run_res_inclusive = run(args.annotated_filename, args.dataset_filename, 'Inclusive', encodingType,
+                                args.model, args.pad, args.save, args.save_version)
         print("Inclusvie: {}".format(run_res_inclusive))