From 2fe3228752b01b3ac58fdef173ebd53bd964df1b Mon Sep 17 00:00:00 2001
From: Bill Scherer <36514047+billschereriii@users.noreply.github.com>
Date: Wed, 4 Oct 2023 13:41:01 -0500
Subject: [PATCH] Expose MINBATCHTIMEOUT parameter in set_model interface
 (#406)

Expose MINBATCHTIMEOUT parameter to set_model() family calls
Tweak CI/CD pipeline to clear up disk space needed for Intel compiler
[ committed by @billschereriii ]
[ reviewed by @ashao  ]
---
 .github/workflows/run_tests.yml          |  12 +++
 doc/changelog.rst                        |   5 +-
 doc/data_structures.rst                  |   1 +
 include/c_client.h                       |  10 +-
 include/client.h                         |   8 ++
 include/pyclient.h                       |   8 ++
 include/redis.h                          |   4 +
 include/rediscluster.h                   |   4 +
 include/redisserver.h                    |   4 +
 src/c/c_client.cpp                       |  20 ++--
 src/cpp/client.cpp                       |  50 ++++++++-
 src/cpp/redis.cpp                        |  11 +-
 src/cpp/rediscluster.cpp                 |   9 +-
 src/fortran/client.F90                   |  48 +++++----
 src/fortran/client/model_interfaces.inc  |  24 +++--
 src/python/module/smartredis/client.py   |  20 ++++
 src/python/src/pyclient.cpp              |  18 ++--
 tests/python/test_errors.py              |   2 +
 tests/python/test_model_methods_torch.py | 125 +++++++++++++++++++++++
 19 files changed, 331 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index b4aea72a2..bbcb343c2 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -75,6 +75,18 @@ jobs:
         with:
           python-version: ${{ matrix.py_v }}
 
+      # Free up some disk space
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/share/dotnet &&
+          sudo rm -rf /opt/ghc &&
+          sudo rm -rf "/usr/local/share/boost"
+
+      # sudo rm -rf /usr/share/dotnet &&
+      # sudo rm -rf /opt/ghc &&
+      # sudo rm -rf "/usr/local/share/boost" &&
+      # sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
       # Install compilers (Intel or GCC)
       - name: Install GCC
         if: "!contains( matrix.compiler, 'intel' )" # if using GNU compiler
diff --git a/doc/changelog.rst b/doc/changelog.rst
index b38b8eb3c..1e12c96ad 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -8,18 +8,21 @@ To be released at some future point in time
 
 Description
 
+- Improved support for model execution batching
 - Added support for model chunking
 - Updated the third-party RedisAI component
 - Updated the third-party lcov component
-- Add link to contributing guidelines
+- Added link to contributing guidelines
 
 Detailed Notes
 
+- Exposed access to the Redis.AI MINBATCHTIMEOUT parameter, which limits the delay in model execution when trying to accumulate multiple executions in a batch (PR406_)
 - Models will now be automatically chunked when sent to/received from the backed database. This allows use of models greater than 511MB in size. (PR404_)
 - Updated from RedisAI v1.2.3 (test target)/v1.2.4 and v1.2.5 (CI/CD pipeline) to v1.2.7 (PR402_)
 - Updated lcov from version 1.15 to 2.0 (PR396_)
 - Create CONTRIBUTIONS.md file that points to the contribution guideline for both SmartSim and SmartRedis (PR395_)
 
+.. _PR406: https://github.com/CrayLabs/SmartRedis/pull/406
 .. _PR404: https://github.com/CrayLabs/SmartRedis/pull/404
 .. _PR402: https://github.com/CrayLabs/SmartRedis/pull/402
 .. _PR396: https://github.com/CrayLabs/SmartRedis/pull/396
diff --git a/doc/data_structures.rst b/doc/data_structures.rst
index 2a1b86467..fb547c4bd 100644
--- a/doc/data_structures.rst
+++ b/doc/data_structures.rst
@@ -358,6 +358,7 @@ are uniform across all SmartRedis clients, and as an example, the C++
                    const std::string& device,
                    int batch_size = 0,
                    int min_batch_size = 0,
+                   int min_batch_timeout = 0,
                    const std::string& tag = "",
                    const std::vector<std::string>& inputs
                        = std::vector<std::string>(),
diff --git a/include/c_client.h b/include/c_client.h
index 3d0535d43..850b8ea9b 100644
--- a/include/c_client.h
+++ b/include/c_client.h
@@ -323,7 +323,7 @@ bool _isTensorFlow(const char* backend);
 /*!
 *   \brief Check parameters for all parameters common to set_model methods
 *   \details Make sure that all pointers are not void and that the size
-*            of the inputs and outputs is not zero 
+*            of the inputs and outputs is not zero
 *   \param c_client The client object to use for communication
 *   \param name The name to associate with the model
 *   \param backend The name of the backend (TF, TFLITE, TORCH, ONNX)
@@ -372,6 +372,7 @@ void _check_params_set_model(void* c_client,
 *                        excluding null terminating character
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -396,6 +397,7 @@ SRError set_model_from_file(void* c_client,
                             const size_t device_length,
                             const int batch_size,
                             const int min_batch_size,
+                            const int min_batch_timeout,
                             const char* tag,
                             const size_t tag_length,
                             const char** inputs,
@@ -428,6 +430,7 @@ SRError set_model_from_file(void* c_client,
 *   \param num_gpus the number of gpus to use with the model
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -452,6 +455,7 @@ SRError set_model_from_file_multigpu(void* c_client,
                                      const int num_gpus,
                                      const int batch_size,
                                      const int min_batch_size,
+                                     const int min_batch_timeout,
                                      const char* tag,
                                      const size_t tag_length,
                                      const char** inputs,
@@ -486,6 +490,7 @@ SRError set_model_from_file_multigpu(void* c_client,
 *                        excluding null terminating character
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -510,6 +515,7 @@ SRError set_model(void* c_client,
                   const size_t device_length,
                   const int batch_size,
                   const int min_batch_size,
+                  const int min_batch_timeout,
                   const char* tag,
                   const size_t tag_length,
                   const char** inputs,
@@ -542,6 +548,7 @@ SRError set_model(void* c_client,
 *   \param num_gpus The number of GPUs to use with the model
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -566,6 +573,7 @@ SRError set_model_multigpu(void* c_client,
                   const int num_gpus,
                   const int batch_size,
                   const int min_batch_size,
+                  const int min_batch_timeout,
                   const char* tag,
                   const size_t tag_length,
                   const char** inputs,
diff --git a/include/client.h b/include/client.h
index 3444c707b..a9bb07614 100644
--- a/include/client.h
+++ b/include/client.h
@@ -344,6 +344,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only). For other models, provide an
@@ -359,6 +360,7 @@ class Client : public SRObject
                                  const std::string& device,
                                  int batch_size = 0,
                                  int min_batch_size = 0,
+                                 int min_batch_timeout = 0,
                                  const std::string& tag = "",
                                  const std::vector<std::string>& inputs
                                      = std::vector<std::string>(),
@@ -383,6 +385,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -398,6 +401,7 @@ class Client : public SRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -423,6 +427,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only). For other models, provide an
@@ -438,6 +443,7 @@ class Client : public SRObject
                        const std::string& device,
                        int batch_size = 0,
                        int min_batch_size = 0,
+                       int min_batch_timeout = 0,
                        const std::string& tag = "",
                        const std::vector<std::string>& inputs
                            = std::vector<std::string>(),
@@ -462,6 +468,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -477,6 +484,7 @@ class Client : public SRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
diff --git a/include/pyclient.h b/include/pyclient.h
index b1f82bcbb..5c407a3a3 100644
--- a/include/pyclient.h
+++ b/include/pyclient.h
@@ -285,6 +285,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -299,6 +300,7 @@ class PyClient : public PySRObject
                         const std::string& device,
                         int batch_size = 0,
                         int min_batch_size = 0,
+                        int min_batch_timeout = 0,
                         const std::string& tag = "",
                         const std::vector<std::string>& inputs
                             = std::vector<std::string>(),
@@ -317,6 +319,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -332,6 +335,7 @@ class PyClient : public PySRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -350,6 +354,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -364,6 +369,7 @@ class PyClient : public PySRObject
                                 const std::string& device,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -382,6 +388,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -397,6 +404,7 @@ class PyClient : public PySRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
diff --git a/include/redis.h b/include/redis.h
index 4aa9f246c..b799577c1 100644
--- a/include/redis.h
+++ b/include/redis.h
@@ -283,6 +283,7 @@ class Redis : public RedisServer
         *                 (e.g. CPU or GPU)
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only)
@@ -297,6 +298,7 @@ class Redis : public RedisServer
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -314,6 +316,7 @@ class Redis : public RedisServer
         *   \param num_gpus The number of GPUs to use with this model
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only)
@@ -328,6 +331,7 @@ class Redis : public RedisServer
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
diff --git a/include/rediscluster.h b/include/rediscluster.h
index 876c6b733..7371ec343 100644
--- a/include/rediscluster.h
+++ b/include/rediscluster.h
@@ -302,6 +302,7 @@ class RedisCluster : public RedisServer
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -317,6 +318,7 @@ class RedisCluster : public RedisServer
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -335,6 +337,7 @@ class RedisCluster : public RedisServer
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -350,6 +353,7 @@ class RedisCluster : public RedisServer
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
diff --git a/include/redisserver.h b/include/redisserver.h
index b28d03a25..39594c505 100644
--- a/include/redisserver.h
+++ b/include/redisserver.h
@@ -285,6 +285,7 @@ class RedisServer {
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -300,6 +301,7 @@ class RedisServer {
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -319,6 +321,7 @@ class RedisServer {
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -334,6 +337,7 @@ class RedisServer {
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
diff --git a/src/c/c_client.cpp b/src/c/c_client.cpp
index 99cd691da..6672184b5 100644
--- a/src/c/c_client.cpp
+++ b/src/c/c_client.cpp
@@ -392,6 +392,7 @@ extern "C" SRError set_model_from_file(
   const char* device, const size_t device_length,
   const int batch_size,
   const int min_batch_size,
+  const int min_batch_timeout,
   const char* tag, const size_t tag_length,
   const char** inputs, const size_t* input_lengths, const size_t n_inputs,
   const char** outputs, const size_t* output_lengths, const size_t n_outputs)
@@ -430,8 +431,8 @@ extern "C" SRError set_model_from_file(
     }
 
     s->set_model_from_file(name_str, model_file_str, backend_str, device_str,
-                            batch_size, min_batch_size, tag_str, input_vec,
-                            output_vec);
+                            batch_size, min_batch_size, min_batch_timeout,
+                            tag_str, input_vec, output_vec);
   });
 }
 
@@ -443,6 +444,7 @@ extern "C" SRError set_model_from_file_multigpu(
   const char* backend, const size_t backend_length,
   const int first_gpu, const int num_gpus,
   const int batch_size, const int min_batch_size,
+  const int min_batch_timeout,
   const char* tag, const size_t tag_length,
   const char** inputs, const size_t* input_lengths,
   const size_t n_inputs, const char** outputs,
@@ -481,8 +483,8 @@ extern "C" SRError set_model_from_file_multigpu(
     }
 
     s->set_model_from_file_multigpu(name_str, model_file_str, backend_str, first_gpu,
-                                    num_gpus, batch_size, min_batch_size, tag_str,
-                                    input_vec, output_vec);
+                                    num_gpus, batch_size, min_batch_size, min_batch_timeout,
+                                    tag_str, input_vec, output_vec);
   });
 }
 
@@ -494,6 +496,7 @@ extern "C" SRError set_model(
   const char* backend, const size_t backend_length,
   const char* device, const size_t device_length,
   const int batch_size, const int min_batch_size,
+  const int min_batch_timeout,
   const char* tag, const size_t tag_length,
   const char** inputs, const size_t* input_lengths,
   const size_t n_inputs,
@@ -534,8 +537,8 @@ extern "C" SRError set_model(
     }
 
     s->set_model(name_str, model_str, backend_str, device_str,
-                batch_size, min_batch_size, tag_str, input_vec,
-                output_vec);
+                batch_size, min_batch_size, min_batch_timeout,
+                tag_str, input_vec, output_vec);
   });
 }
 
@@ -547,6 +550,7 @@ extern "C" SRError set_model_multigpu(
   const char* backend, const size_t backend_length,
   const int first_gpu, const int num_gpus,
   const int batch_size, const int min_batch_size,
+  const int min_batch_timeout,
   const char* tag, const size_t tag_length,
   const char** inputs, const size_t* input_lengths,
   const size_t n_inputs,
@@ -586,8 +590,8 @@ extern "C" SRError set_model_multigpu(
     }
 
     s->set_model_multigpu(name_str, model_str, backend_str, first_gpu, num_gpus,
-                          batch_size, min_batch_size, tag_str, input_vec,
-                          output_vec);
+                          batch_size, min_batch_size, min_batch_timeout,
+                          tag_str, input_vec, output_vec);
   });
 }
 
diff --git a/src/cpp/client.cpp b/src/cpp/client.cpp
index cfb1867a5..7e9e98c7d 100644
--- a/src/cpp/client.cpp
+++ b/src/cpp/client.cpp
@@ -502,6 +502,7 @@ void Client::set_model_from_file(const std::string& name,
                                  const std::string& device,
                                  int batch_size,
                                  int min_batch_size,
+                                 int min_batch_timeout,
                                  const std::string& tag,
                                  const std::vector<std::string>& inputs,
                                  const std::vector<std::string>& outputs)
@@ -522,7 +523,7 @@ void Client::set_model_from_file(const std::string& name,
     std::string_view model(tmp.data(), tmp.length());
 
     set_model(name, model, backend, device, batch_size,
-              min_batch_size, tag, inputs, outputs);
+              min_batch_size, min_batch_timeout, tag, inputs, outputs);
 }
 
 // Set a model from file in the database for future execution in a multi-GPU system
@@ -533,6 +534,7 @@ void Client::set_model_from_file_multigpu(const std::string& name,
                                           int num_gpus,
                                           int batch_size,
                                           int min_batch_size,
+                                          int min_batch_timeout,
                                           const std::string& tag,
                                           const std::vector<std::string>& inputs,
                                           const std::vector<std::string>& outputs)
@@ -553,8 +555,42 @@ void Client::set_model_from_file_multigpu(const std::string& name,
     std::string_view model(tmp.data(), tmp.length());
 
     set_model_multigpu(name, model, backend, first_gpu, num_gpus, batch_size,
-                       min_batch_size, tag, inputs, outputs);
+                       min_batch_size, min_batch_timeout, tag, inputs, outputs);
 }
+
+// Validate batch settings for the set_model calls
+inline void __check_batch_settings(
+    int batch_size, int min_batch_size, int min_batch_timeout)
+{
+    // Throw a usage exception if batch_size is zero but one of the other
+    // parameters is non-zero
+    if (batch_size == 0 && (min_batch_size > 0 || min_batch_timeout > 0)) {
+        throw SRRuntimeException(
+            "batch_size must be non-zero if min_batch_size or "
+            "min_batch_timeout is used; otherwise batching will "
+            "not be performed."
+        );
+    }
+
+    // Throw a usage exception if min_batch_timeout is nonzero and
+    // min_batch_size is zero. (batch_size also has to be non-zero, but
+    // this was caught in the previous clause.)
+    if (min_batch_timeout > 0 && min_batch_size == 0) {
+        throw SRRuntimeException(
+            "min_batch_size must be non-zero if min_batch_timeout "
+            "is used; otherwise the min_batch_timeout parameter is ignored."
+        );
+    }
+
+    // Issue a warning if min_batch_size is non-zero but min_batch_timeout is zero
+    if (min_batch_size > 0 && min_batch_timeout == 0) {
+        std::cerr << "WARNING: min_batch_timeout was not set when a non-zero "
+                  << "min_batch_size was selected. " << std::endl
+                  << "Setting a small value (~10ms) for min_batch_timeout "
+                  << "may improve performance" << std::endl;
+    }
+}
+
 // Set a model from a string buffer in the database for future execution
 void Client::set_model(const std::string& name,
                        const std::string_view& model,
@@ -562,6 +598,7 @@ void Client::set_model(const std::string& name,
                        const std::string& device,
                        int batch_size,
                        int min_batch_size,
+                       int min_batch_timeout,
                        const std::string& tag,
                        const std::vector<std::string>& inputs,
                        const std::vector<std::string>& outputs)
@@ -606,6 +643,8 @@ void Client::set_model(const std::string& name,
         throw SRRuntimeException(device + " is not a valid device.");
     }
 
+    __check_batch_settings(batch_size, min_batch_size, min_batch_timeout);
+
     // Split model into chunks
     size_t offset = 0;
     std::vector<std::string_view> model_segments;
@@ -621,7 +660,7 @@ void Client::set_model(const std::string& name,
     std::string key = _build_model_key(name, false);
     auto response = _redis_server->set_model(
         key, model_segments, backend, device,
-        batch_size, min_batch_size,
+        batch_size, min_batch_size, min_batch_timeout,
         tag, inputs, outputs);
     if (response.has_error()) {
         throw SRInternalException(
@@ -636,6 +675,7 @@ void Client::set_model_multigpu(const std::string& name,
                                 int num_gpus,
                                 int batch_size,
                                 int min_batch_size,
+                                int min_batch_timeout,
                                 const std::string& tag,
                                 const std::vector<std::string>& inputs,
                                 const std::vector<std::string>& outputs)
@@ -677,6 +717,8 @@ void Client::set_model_multigpu(const std::string& name,
         throw SRParameterException(backend + " is not a valid backend.");
     }
 
+    __check_batch_settings(batch_size, min_batch_size, min_batch_timeout);
+
     // Split model into chunks
     size_t offset = 0;
     std::vector<std::string_view> model_segments;
@@ -692,7 +734,7 @@ void Client::set_model_multigpu(const std::string& name,
     std::string key = _build_model_key(name, false);
     _redis_server->set_model_multigpu(
         key, model_segments, backend, first_gpu, num_gpus,
-        batch_size, min_batch_size,
+        batch_size, min_batch_size, min_batch_timeout,
         tag, inputs, outputs);
 }
 
diff --git a/src/cpp/redis.cpp b/src/cpp/redis.cpp
index 0aa7560c6..a00af813b 100644
--- a/src/cpp/redis.cpp
+++ b/src/cpp/redis.cpp
@@ -299,6 +299,7 @@ CommandReply Redis::set_model(const std::string& model_name,
                               const std::string& device,
                               int batch_size,
                               int min_batch_size,
+                              int min_batch_timeout,
                               const std::string& tag,
                               const std::vector<std::string>& inputs,
                               const std::vector<std::string>& outputs
@@ -318,6 +319,9 @@ CommandReply Redis::set_model(const std::string& model_name,
     if (min_batch_size > 0) {
         cmd << "MINBATCHSIZE" << std::to_string(min_batch_size);
     }
+    if (min_batch_timeout > 0) {
+        cmd << "MINBATCHTIMEOUT" << std::to_string(min_batch_timeout);
+    }
     if (inputs.size() > 0) {
         cmd << "INPUTS" << std::to_string(inputs.size()) <<  inputs;
     }
@@ -339,6 +343,7 @@ void Redis::set_model_multigpu(const std::string& name,
                                int num_gpus,
                                int batch_size,
                                int min_batch_size,
+                               int min_batch_timeout,
                                const std::string& tag,
                                const std::vector<std::string>& inputs,
                                const std::vector<std::string>& outputs)
@@ -349,7 +354,8 @@ void Redis::set_model_multigpu(const std::string& name,
         std::string device = "GPU:" + std::to_string(i);
         std::string model_key = name + "." + device;
         result = set_model(
-            model_key, model, backend, device, batch_size, min_batch_size, tag, inputs, outputs);
+            model_key, model, backend, device, batch_size, min_batch_size, min_batch_timeout,
+            tag, inputs, outputs);
         if (result.has_error() > 0) {
             throw SRRuntimeException("Failed to set model for GPU " + std::to_string(i));
         }
@@ -357,7 +363,8 @@ void Redis::set_model_multigpu(const std::string& name,
 
     // Add a version for get_model to find
     result = set_model(
-        name, model, backend, "GPU", batch_size, min_batch_size, tag, inputs, outputs);
+        name, model, backend, "GPU", batch_size, min_batch_size, min_batch_timeout,
+        tag, inputs, outputs);
     if (result.has_error() > 0) {
         throw SRRuntimeException("Failed to set general model");
     }
diff --git a/src/cpp/rediscluster.cpp b/src/cpp/rediscluster.cpp
index 3c1ae259d..6f847dc6e 100644
--- a/src/cpp/rediscluster.cpp
+++ b/src/cpp/rediscluster.cpp
@@ -511,6 +511,7 @@ CommandReply RedisCluster::set_model(const std::string& model_name,
                                      const std::string& device,
                                      int batch_size,
                                      int min_batch_size,
+                                     int min_batch_timeout,
                                      const std::string& tag,
                                      const std::vector<std::string>& inputs,
                                      const std::vector<std::string>& outputs)
@@ -531,6 +532,9 @@ CommandReply RedisCluster::set_model(const std::string& model_name,
     if (min_batch_size > 0) {
         cmd << "MINBATCHSIZE" << std::to_string(min_batch_size);
     }
+    if (min_batch_timeout > 0) {
+        cmd << "MINBATCHTIMEOUT" << std::to_string(min_batch_timeout);
+    }
     if ( inputs.size() > 0) {
         cmd << "INPUTS" << std::to_string(inputs.size()) << inputs;
     }
@@ -558,6 +562,7 @@ void RedisCluster::set_model_multigpu(const std::string& name,
                                       int num_gpus,
                                       int batch_size,
                                       int min_batch_size,
+                                      int min_batch_timeout,
                                       const std::string& tag,
                                       const std::vector<std::string>& inputs,
                                       const std::vector<std::string>& outputs)
@@ -571,7 +576,7 @@ void RedisCluster::set_model_multigpu(const std::string& name,
         // Store it
         CommandReply result = set_model(
             model_key, model, backend, device, batch_size, min_batch_size,
-            tag, inputs, outputs);
+            min_batch_timeout, tag, inputs, outputs);
         if (result.has_error() > 0) {
             throw SRRuntimeException("Failed to set model for " + device);
         }
@@ -580,7 +585,7 @@ void RedisCluster::set_model_multigpu(const std::string& name,
     // Add a version for get_model to find
     CommandReply result = set_model(
         name, model, backend, "GPU", batch_size, min_batch_size,
-        tag, inputs, outputs);
+        min_batch_timeout, tag, inputs, outputs);
     if (result.has_error() > 0) {
         throw SRRuntimeException("Failed to set general model");
     }
diff --git a/src/fortran/client.F90 b/src/fortran/client.F90
index 7c79f0148..c3acd35c7 100644
--- a/src/fortran/client.F90
+++ b/src/fortran/client.F90
@@ -746,8 +746,8 @@ function get_model(self, name, model) result(code)
 end function get_model
 
 !> Load the machine learning model from a file and set the configuration
-function set_model_from_file(self, name, model_file, backend, device, batch_size, min_batch_size, tag, &
-    inputs, outputs) result(code)
+function set_model_from_file(self, name, model_file, backend, device, batch_size, min_batch_size, &
+    min_batch_timeout, tag, inputs, outputs) result(code)
   class(client_type),                       intent(in) :: self           !< An initialized SmartRedis client
   character(len=*),                         intent(in) :: name           !< The name to use to place the model
   character(len=*),                         intent(in) :: model_file     !< The file storing the model
@@ -755,6 +755,7 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size
   character(len=*),                         intent(in) :: device         !< The name of the device (CPU, GPU, GPU:0, GPU:1...)
   integer,                        optional, intent(in) :: batch_size     !< The batch size for model execution
   integer,                        optional, intent(in) :: min_batch_size !< The minimum batch size for model execution
+  integer,                        optional, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
   character(len=*),               optional, intent(in) :: tag            !< A tag to attach to the model for
                                                                          !! information purposes
   character(len=*), dimension(:), optional, intent(in) :: inputs         !< One or more names of model input nodes (TF
@@ -775,7 +776,7 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size
   integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths
   integer(kind=c_size_t) :: name_length, model_file_length, backend_length, device_length, tag_length, n_inputs, &
                             n_outputs
-  integer(kind=c_int)    :: c_batch_size, c_min_batch_size
+  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_min_batch_timeout
   type(c_ptr)            :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr
   type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs
 
@@ -784,6 +785,8 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size
   if (present(batch_size)) c_batch_size = batch_size
   c_min_batch_size = 0
   if (present(min_batch_size)) c_min_batch_size = min_batch_size
+  c_min_batch_timeout = 0
+  if (present(min_batch_timeout)) c_min_batch_timeout = min_batch_timeout
   if (present(tag)) then
     allocate(character(kind=c_char, len=len_trim(tag)) :: c_tag)
     c_tag = tag
@@ -828,8 +831,8 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size
 
   code = set_model_from_file_c(self%client_ptr, c_name, name_length, c_model_file, model_file_length, &
                              c_backend, backend_length, c_device, device_length, c_batch_size, c_min_batch_size, &
-                             c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, &
-                             output_lengths_ptr, n_outputs)
+                             c_min_batch_timeout, c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, &
+                             outputs_ptr, output_lengths_ptr, n_outputs)
   if (allocated(c_inputs))        deallocate(c_inputs)
   if (allocated(input_lengths))   deallocate(input_lengths)
   if (allocated(ptrs_to_inputs))  deallocate(ptrs_to_inputs)
@@ -840,7 +843,7 @@ end function set_model_from_file
 
 !> Load the machine learning model from a file and set the configuration for use in multi-GPU systems
 function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu, num_gpus, batch_size, min_batch_size, &
-                                      tag, inputs, outputs) result(code)
+                                      min_batch_timeout, tag, inputs, outputs) result(code)
   class(client_type),                       intent(in) :: self           !< An initialized SmartRedis client
   character(len=*),                         intent(in) :: name           !< The name to use to place the model
   character(len=*),                         intent(in) :: model_file     !< The file storing the model
@@ -849,6 +852,7 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu
   integer,                                  intent(in) :: num_gpus       !< The number of GPUs to use with the model
   integer,                        optional, intent(in) :: batch_size     !< The batch size for model execution
   integer,                        optional, intent(in) :: min_batch_size !< The minimum batch size for model execution
+  integer,                        optional, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
   character(len=*),               optional, intent(in) :: tag            !< A tag to attach to the model for
                                                                          !! information purposes
   character(len=*), dimension(:), optional, intent(in) :: inputs         !< One or more names of model input nodes (TF
@@ -868,7 +872,7 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu
   integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths
   integer(kind=c_size_t) :: name_length, model_file_length, backend_length, tag_length, n_inputs, &
                             n_outputs
-  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_first_gpu, c_num_gpus
+  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_min_batch_timeout, c_first_gpu, c_num_gpus
   type(c_ptr)            :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr
   type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs
 
@@ -877,6 +881,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu
   if (present(batch_size)) c_batch_size = batch_size
   c_min_batch_size = 0
   if (present(min_batch_size)) c_min_batch_size = min_batch_size
+  c_min_batch_timeout = 0
+  if (present(min_batch_timeout)) c_min_batch_timeout = min_batch_timeout
   if (present(tag)) then
     allocate(character(kind=c_char, len=len_trim(tag)) :: c_tag)
     c_tag = tag
@@ -922,8 +928,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu
 
   code = set_model_from_file_multigpu_c(self%client_ptr, c_name, name_length, c_model_file, model_file_length, &
                              c_backend, backend_length, c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, &
-                             c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, &
-                             output_lengths_ptr, n_outputs)
+                             c_min_batch_timeout, c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, &
+                             outputs_ptr, output_lengths_ptr, n_outputs)
 
   if (allocated(c_inputs))        deallocate(c_inputs)
   if (allocated(input_lengths))   deallocate(input_lengths)
@@ -934,8 +940,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu
 end function set_model_from_file_multigpu
 
 !> Establish a model to run
-function set_model(self, name, model, backend, device, batch_size, min_batch_size, tag, &
-    inputs, outputs) result(code)
+function set_model(self, name, model, backend, device, batch_size, min_batch_size, min_batch_timeout, &
+    tag, inputs, outputs) result(code)
   class(client_type),             intent(in) :: self           !< An initialized SmartRedis client
   character(len=*),               intent(in) :: name           !< The name to use to place the model
   character(len=*),               intent(in) :: model          !< The binary representation of the model
@@ -943,6 +949,7 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz
   character(len=*),               intent(in) :: device         !< The name of the device (CPU, GPU, GPU:0, GPU:1...)
   integer,                        intent(in) :: batch_size     !< The batch size for model execution
   integer,                        intent(in) :: min_batch_size !< The minimum batch size for model execution
+  integer,                        intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
   character(len=*),               intent(in) :: tag            !< A tag to attach to the model for information purposes
   character(len=*), dimension(:), intent(in) :: inputs         !< One or more names of model input nodes (TF models)
   character(len=*), dimension(:), intent(in) :: outputs        !< One or more names of model output nodes (TF models)
@@ -960,7 +967,7 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz
   integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths
   integer(kind=c_size_t) :: name_length, model_length, backend_length, device_length, tag_length, n_inputs, &
                             n_outputs
-  integer(kind=c_int)    :: c_batch_size, c_min_batch_size
+  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_min_batch_timeout
   type(c_ptr)            :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr
   type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs
 
@@ -984,12 +991,13 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz
                                 output_lengths_ptr, n_outputs)
   if (code /= SRNoError) return
 
-  ! Cast the batch sizes to C integers
+  ! Cast the batch params to C integers
   c_batch_size = batch_size
   c_min_batch_size = min_batch_size
+  c_min_batch_timeout = min_batch_timeout
 
   code = set_model_c(self%client_ptr, c_name, name_length, c_model, model_length, c_backend, backend_length, &
-                 c_device, device_length, batch_size, min_batch_size, c_tag, tag_length, &
+                 c_device, device_length, batch_size, min_batch_size, c_min_batch_timeout, c_tag, tag_length, &
                  inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, output_lengths_ptr, n_outputs)
 
   if (allocated(c_inputs))        deallocate(c_inputs)
@@ -1001,8 +1009,8 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz
 end function set_model
 
 !> Set a model from a byte string to run on a system with multiple GPUs
-function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, batch_size, min_batch_size, tag, &
-    inputs, outputs) result(code)
+function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, batch_size, min_batch_size, &
+    min_batch_timeout, tag, inputs, outputs) result(code)
   class(client_type),             intent(in) :: self           !< An initialized SmartRedis client
   character(len=*),               intent(in) :: name           !< The name to use to place the model
   character(len=*),               intent(in) :: model          !< The binary representation of the model
@@ -1011,6 +1019,7 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat
   integer,                        intent(in) :: num_gpus       !< The number of GPUs to use with the model
   integer,                        intent(in) :: batch_size     !< The batch size for model execution
   integer,                        intent(in) :: min_batch_size !< The minimum batch size for model execution
+  integer,                        intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
   character(len=*),               intent(in) :: tag            !< A tag to attach to the model for information purposes
   character(len=*), dimension(:), intent(in) :: inputs         !< One or more names of model input nodes (TF models)
   character(len=*), dimension(:), intent(in) :: outputs        !< One or more names of model output nodes (TF models)
@@ -1026,7 +1035,7 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat
 
   integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths
   integer(kind=c_size_t) :: name_length, model_length, backend_length, tag_length, n_inputs, n_outputs
-  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_first_gpu, c_num_gpus
+  integer(kind=c_int)    :: c_batch_size, c_min_batch_size, c_min_batch_timeout, c_first_gpu, c_num_gpus
   type(c_ptr)            :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr
   type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs
 
@@ -1048,14 +1057,15 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat
                                  output_lengths_ptr, n_outputs)
   if (code /= SRNoError) return
 
-  ! Cast the batch sizes to C integers
+  ! Cast the batch params to C integers
   c_batch_size = batch_size
   c_min_batch_size = min_batch_size
+  c_min_batch_timeout = min_batch_timeout
   c_first_gpu = first_gpu
   c_num_gpus = num_gpus
 
   code = set_model_multigpu_c(self%client_ptr, c_name, name_length, c_model, model_length, c_backend, backend_length, &
-                 c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, c_tag, tag_length, &
+                 c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, c_min_batch_timeout, c_tag, tag_length, &
                  inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, output_lengths_ptr, n_outputs)
 
   if (allocated(c_inputs))        deallocate(c_inputs)
diff --git a/src/fortran/client/model_interfaces.inc b/src/fortran/client/model_interfaces.inc
index d3836bc1c..ef7d46661 100644
--- a/src/fortran/client/model_interfaces.inc
+++ b/src/fortran/client/model_interfaces.inc
@@ -40,8 +40,9 @@ end interface
 
 interface
   function set_model_from_file_c( c_client, key, key_length, model_file, model_file_length,      &
-      backend, backend_length, device, device_length, batch_size, min_batch_size, tag, tag_length, &
-      inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_from_file")
+      backend, backend_length, device, device_length, batch_size, min_batch_size, min_batch_timeout, &
+      tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) &
+      bind(c, name="set_model_from_file")
     use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char
     import :: enum_kind
     integer(kind=enum_kind)                   :: set_model_from_file_c
@@ -59,6 +60,7 @@ interface
                                                                    !! null terminating character
     integer(kind=c_int),    value, intent(in) :: batch_size        !< The batch size for model execution
     integer(kind=c_int),    value, intent(in) :: min_batch_size    !< The minimum batch size for model execution
+    integer(kind=c_int),    value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
     character(kind=c_char),        intent(in) :: tag(*)            !< A tag to attach to the model for information
                                                                    !! purposes
     integer(kind=c_size_t), value, intent(in) :: tag_length        !< The length of the tag c-string, excluding null
@@ -77,8 +79,9 @@ end interface
 
 interface
   function set_model_from_file_multigpu_c( c_client, key, key_length, model_file, model_file_length,      &
-      backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, tag, tag_length, &
-      inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_from_file_multigpu")
+      backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, min_batch_timeout, &
+      tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) &
+      bind(c, name="set_model_from_file_multigpu")
     use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char
     import :: enum_kind
     integer(kind=enum_kind)                   :: set_model_from_file_multigpu_c
@@ -96,6 +99,7 @@ interface
                                                                    !! null terminating character
     integer(kind=c_int),    value, intent(in) :: batch_size        !< The batch size for model execution
     integer(kind=c_int),    value, intent(in) :: min_batch_size    !< The minimum batch size for model execution
+    integer(kind=c_int),    value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
     character(kind=c_char),        intent(in) :: tag(*)            !< A tag to attach to the model for information
                                                                    !! purposes
     integer(kind=c_size_t), value, intent(in) :: tag_length        !< The length of the tag c-string, excluding null
@@ -114,8 +118,9 @@ end interface
 
 interface
   function set_model_c( c_client, key, key_length, model, model_length,      &
-      backend, backend_length, device, device_length, batch_size, min_batch_size, tag, tag_length, &
-      inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model")
+      backend, backend_length, device, device_length, batch_size, min_batch_size, min_batch_timeout, &
+      tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) &
+      bind(c, name="set_model")
     use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char
     import :: enum_kind
     integer(kind=enum_kind)                   :: set_model_c
@@ -133,6 +138,7 @@ interface
                                                                    !! null terminating character
     integer(kind=c_int),    value, intent(in) :: batch_size        !< The batch size for model execution
     integer(kind=c_int),    value, intent(in) :: min_batch_size    !< The minimum batch size for model execution
+    integer(kind=c_int),    value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
     character(kind=c_char),        intent(in) :: tag(*)            !< A tag to attach to the model for information
                                                                    !! purposes
     integer(kind=c_size_t), value, intent(in) :: tag_length        !< The length of the tag c-string, excluding null
@@ -151,8 +157,9 @@ end interface
 
 interface
   function set_model_multigpu_c( c_client, key, key_length, model, model_length,      &
-      backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, tag, tag_length, &
-      inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_multigpu")
+      backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, min_batch_timeout, &
+      tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) &
+      bind(c, name="set_model_multigpu")
     use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char
     import :: enum_kind
     integer(kind=enum_kind)                   :: set_model_multigpu_c
@@ -170,6 +177,7 @@ interface
                                                                    !! null terminating character
     integer(kind=c_int),    value, intent(in) :: batch_size        !< The batch size for model execution
     integer(kind=c_int),    value, intent(in) :: min_batch_size    !< The minimum batch size for model execution
+    integer(kind=c_int),    value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size
     character(kind=c_char),        intent(in) :: tag(*)            !< A tag to attach to the model for information
                                                                    !! purposes
     integer(kind=c_size_t), value, intent(in) :: tag_length        !< The length of the tag c-string, excluding null
diff --git a/src/python/module/smartredis/client.py b/src/python/module/smartredis/client.py
index 53270f966..30361d3ba 100644
--- a/src/python/module/smartredis/client.py
+++ b/src/python/module/smartredis/client.py
@@ -609,6 +609,7 @@ def set_model(
         device: str = "CPU",
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.Union[str, t.List[str]]] = None,
         outputs: t.Optional[t.Union[str, t.List[str]]] = None,
@@ -636,6 +637,8 @@ def set_model(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: Max time (ms) to wait for min batch size
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -649,6 +652,7 @@ def set_model(
         typecheck(device, "device", str)
         typecheck(batch_size, "batch_size", int)
         typecheck(min_batch_size, "min_batch_size", int)
+        typecheck(min_batch_timeout, "min_batch_timeout", int)
         typecheck(tag, "tag", str)
         device = self.__check_device(device)
         backend = self.__check_backend(backend)
@@ -660,6 +664,7 @@ def set_model(
             device,
             batch_size,
             min_batch_size,
+            min_batch_timeout,
             tag,
             inputs,
             outputs,
@@ -675,6 +680,7 @@ def set_model_multigpu(
         num_gpus: int,
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.Union[str, t.List[str]]] = None,
         outputs: t.Optional[t.Union[str, t.List[str]]] = None,
@@ -703,6 +709,8 @@ def set_model_multigpu(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: Max time (ms) to wait for min batch size
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -717,6 +725,7 @@ def set_model_multigpu(
         typecheck(num_gpus, "num_gpus", int)
         typecheck(batch_size, "batch_size", int)
         typecheck(min_batch_size, "min_batch_size", int)
+        typecheck(min_batch_timeout, "min_batch_timeout", int)
         typecheck(tag, "tag", str)
         backend = self.__check_backend(backend)
         inputs, outputs = self.__check_tensor_args(inputs, outputs)
@@ -728,6 +737,7 @@ def set_model_multigpu(
             num_gpus,
             batch_size,
             min_batch_size,
+            min_batch_timeout,
             tag,
             inputs,
             outputs,
@@ -742,6 +752,7 @@ def set_model_from_file(
         device: str = "CPU",
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.Union[str, t.List[str]]] = None,
         outputs: t.Optional[t.Union[str, t.List[str]]] = None,
@@ -769,6 +780,8 @@ def set_model_from_file(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: Max time (ms) to wait for min batch size
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -783,6 +796,7 @@ def set_model_from_file(
         typecheck(device, "device", str)
         typecheck(batch_size, "batch_size", int)
         typecheck(min_batch_size, "min_batch_size", int)
+        typecheck(min_batch_timeout, "min_batch_timeout", int)
         typecheck(tag, "tag", str)
         device = self.__check_device(device)
         backend = self.__check_backend(backend)
@@ -795,6 +809,7 @@ def set_model_from_file(
             device,
             batch_size,
             min_batch_size,
+            min_batch_timeout,
             tag,
             inputs,
             outputs,
@@ -810,6 +825,7 @@ def set_model_from_file_multigpu(
         num_gpus: int,
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.Union[str, t.List[str]]] = None,
         outputs: t.Optional[t.Union[str, t.List[str]]] = None,
@@ -838,6 +854,8 @@ def set_model_from_file_multigpu(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: Max time (ms) to wait for min batch size
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -853,6 +871,7 @@ def set_model_from_file_multigpu(
         typecheck(num_gpus, "num_gpus", int)
         typecheck(batch_size, "batch_size", int)
         typecheck(min_batch_size, "min_batch_size", int)
+        typecheck(min_batch_timeout, "min_batch_timeout", int)
         typecheck(tag, "tag", str)
         backend = self.__check_backend(backend)
         m_file = self.__check_file(model_file)
@@ -865,6 +884,7 @@ def set_model_from_file_multigpu(
             num_gpus,
             batch_size,
             min_batch_size,
+            min_batch_timeout,
             tag,
             inputs,
             outputs,
diff --git a/src/python/src/pyclient.cpp b/src/python/src/pyclient.cpp
index eb9b497d3..f174fa253 100644
--- a/src/python/src/pyclient.cpp
+++ b/src/python/src/pyclient.cpp
@@ -326,14 +326,15 @@ void PyClient::set_model(const std::string& name,
                  const std::string& device,
                  int batch_size,
                  int min_batch_size,
+                 int min_batch_timeout,
                  const std::string& tag,
                  const std::vector<std::string>& inputs,
                  const std::vector<std::string>& outputs)
 {
     MAKE_CLIENT_API({
         _client->set_model(name, model, backend, device,
-                           batch_size, min_batch_size, tag,
-                           inputs, outputs);
+                           batch_size, min_batch_size, min_batch_timeout,
+                           tag, inputs, outputs);
     });
 }
 
@@ -344,14 +345,15 @@ void PyClient::set_model_multigpu(const std::string& name,
                                   int num_gpus,
                                   int batch_size,
                                   int min_batch_size,
+                                  int min_batch_timeout,
                                   const std::string& tag,
                                   const std::vector<std::string>& inputs,
                                   const std::vector<std::string>& outputs)
 {
     MAKE_CLIENT_API({
         _client->set_model_multigpu(name, model, backend, first_gpu, num_gpus,
-                                    batch_size, min_batch_size, tag,
-                                    inputs, outputs);
+                                    batch_size, min_batch_size, min_batch_timeout,
+                                    tag, inputs, outputs);
     });
 }
 
@@ -361,14 +363,15 @@ void PyClient::set_model_from_file(const std::string& name,
                                    const std::string& device,
                                    int batch_size,
                                    int min_batch_size,
+                                   int min_batch_timeout,
                                    const std::string& tag,
                                    const std::vector<std::string>& inputs,
                                    const std::vector<std::string>& outputs)
 {
     MAKE_CLIENT_API({
         _client->set_model_from_file(name, model_file, backend, device,
-                                           batch_size, min_batch_size, tag,
-                                           inputs, outputs);
+                                           batch_size, min_batch_size, min_batch_timeout,
+                                           tag, inputs, outputs);
     });
 }
 
@@ -379,6 +382,7 @@ void PyClient::set_model_from_file_multigpu(const std::string& name,
                                             int num_gpus,
                                             int batch_size,
                                             int min_batch_size,
+                                            int min_batch_timeout,
                                             const std::string& tag,
                                             const std::vector<std::string>& inputs,
                                             const std::vector<std::string>& outputs)
@@ -386,7 +390,7 @@ void PyClient::set_model_from_file_multigpu(const std::string& name,
     MAKE_CLIENT_API({
         _client->set_model_from_file_multigpu(
             name, model_file, backend, first_gpu, num_gpus, batch_size,
-            min_batch_size, tag, inputs, outputs);
+            min_batch_size, min_batch_timeout, tag, inputs, outputs);
     });
 }
 
diff --git a/tests/python/test_errors.py b/tests/python/test_errors.py
index bba7fde07..c330dc996 100644
--- a/tests/python/test_errors.py
+++ b/tests/python/test_errors.py
@@ -444,6 +444,8 @@ def test_bad_type_set_model_from_file_multigpu(use_cluster, context):
         c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, batch_size="not_an_integer")
     with pytest.raises(TypeError):
         c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, min_batch_size="not_an_integer")
+    with pytest.raises(TypeError):
+        c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, min_batch_timeout="not_an_integer")
     with pytest.raises(TypeError):
         c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, tag=42)
 
diff --git a/tests/python/test_model_methods_torch.py b/tests/python/test_model_methods_torch.py
index b1c7b078b..d98c6bed7 100644
--- a/tests/python/test_model_methods_torch.py
+++ b/tests/python/test_model_methods_torch.py
@@ -27,8 +27,12 @@
 import os
 
 import torch
+import pytest
+from os import environ
 from smartredis import Client
+from smartredis.error import *
 
+test_gpu = environ.get("SMARTREDIS_TEST_DEVICE","cpu").lower() == "gpu"
 
 def test_set_model(mock_model, use_cluster, context):
     model = mock_model.create_torch_cnn()
@@ -69,3 +73,124 @@ def test_torch_inference(mock_model, use_cluster, context):
     c.run_model("torch_cnn", inputs=["torch_cnn_input"], outputs=["torch_cnn_output"])
     out_data = c.get_tensor("torch_cnn_output")
     assert out_data.shape == (1, 1, 1, 1)
+
+def test_batch_exceptions(mock_model, use_cluster, context):
+    # get model and set into database
+    mock_model.create_torch_cnn(filepath="./torch_cnn.pt")
+    model = mock_model.create_torch_cnn()
+    c = Client(None, use_cluster, logger_name=context)
+    batch_size = 1
+    min_batch_size = 1
+    min_batch_timeout = 1
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file(
+            "file_cnn", "./torch_cnn.pt", "TORCH", "CPU",
+            batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file(
+            "file_cnn", "./torch_cnn.pt", "TORCH", "CPU",
+            batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file(
+            "file_cnn", "./torch_cnn.pt", "TORCH", "CPU",
+            batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file_multigpu(
+            "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1,
+            batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file_multigpu(
+            "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1,
+            batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_from_file_multigpu(
+            "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1,
+            batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model(
+            "file_cnn", model, "TORCH", "CPU",
+            batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model(
+            "file_cnn", model, "TORCH", "CPU",
+            batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model(
+            "file_cnn", model, "TORCH", "CPU",
+            batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_multigpu(
+            "file_cnn", model, "TORCH", 1, 1,
+            batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_multigpu(
+            "file_cnn", model, "TORCH", 1, 1,
+            batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0
+        )
+    with pytest.raises(RedisRuntimeError):
+        c.set_model_multigpu(
+            "file_cnn", model, "TORCH", 1, 1,
+            batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout
+        )
+
+def test_batch_warning_set_model_from_file(mock_model, use_cluster, context, capfd):
+    # get model and set into database
+    mock_model.create_torch_cnn(filepath="./torch_cnn.pt")
+    c = Client(None, use_cluster, logger_name=context)
+    c.set_model_from_file(
+        "file_cnn", "./torch_cnn.pt", "TORCH", "CPU",
+        batch_size=1, min_batch_size=1, min_batch_timeout=0
+    )
+    captured = capfd.readouterr()
+    assert "WARNING" in captured.err
+
+@pytest.mark.skipif(
+    not test_gpu,
+    reason="SMARTREDIS_TEST_DEVICE does not specify 'gpu'"
+)
+def test_batch_warning_set_model_from_file_multigpu(mock_model, use_cluster, context, capfd):
+    # get model and set into database
+    mock_model.create_torch_cnn(filepath="./torch_cnn.pt")
+    c = Client(None, use_cluster, logger_name=context)
+    c.set_model_from_file_multigpu(
+        "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1,
+        batch_size=1, min_batch_size=1, min_batch_timeout=0
+    )
+    captured = capfd.readouterr()
+    assert "WARNING" in captured.err
+
+def test_batch_warning_set_model(mock_model, use_cluster, context, capfd):
+    # get model and set into database
+    model = mock_model.create_torch_cnn()
+    c = Client(None, use_cluster, logger_name=context)
+    c.set_model(
+        "file_cnn", model, "TORCH", "CPU",
+        batch_size=1, min_batch_size=1, min_batch_timeout=0
+    )
+    captured = capfd.readouterr()
+    assert "WARNING" in captured.err
+
+@pytest.mark.skipif(
+    not test_gpu,
+    reason="SMARTREDIS_TEST_DEVICE does not specify 'gpu'"
+)
+def test_batch_warning_set_model_multigpu(mock_model, use_cluster, context, capfd):
+    # get model and set into database
+    model = mock_model.create_torch_cnn()
+    c = Client(None, use_cluster, logger_name=context)
+    c.set_model_multigpu(
+        "file_cnn", model, "TORCH", 1, 1,
+        batch_size=1, min_batch_size=1, min_batch_timeout=0
+    )
+    captured = capfd.readouterr()
+    assert "WARNING" in captured.err