Expose MINBATCHTIMEOUT parameter in set_model interface (#406)

Expose MINBATCHTIMEOUT parameter to set_model() family calls Tweak CI/CD pipeline to clear up disk space needed for Intel compiler [ committed by @billschereriii ] [ reviewed by @ashao ]
CrayLabs · Oct 4, 2023 · 2fe3228 · 2fe3228
1 parent 53def75
commit 2fe3228
Show file tree

Hide file tree

Showing 19 changed files with 331 additions and 52 deletions.
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -75,6 +75,18 @@ jobs:
         with:
           python-version: ${{ matrix.py_v }}
 
+      # Free up some disk space
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/share/dotnet &&
+          sudo rm -rf /opt/ghc &&
+          sudo rm -rf "/usr/local/share/boost"
+
+      # sudo rm -rf /usr/share/dotnet &&
+      # sudo rm -rf /opt/ghc &&
+      # sudo rm -rf "/usr/local/share/boost" &&
+      # sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
       # Install compilers (Intel or GCC)
       - name: Install GCC
         if: "!contains( matrix.compiler, 'intel' )" # if using GNU compiler

diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -8,18 +8,21 @@ To be released at some future point in time
 
 Description
 
+- Improved support for model execution batching
 - Added support for model chunking
 - Updated the third-party RedisAI component
 - Updated the third-party lcov component
-- Add link to contributing guidelines
+- Added link to contributing guidelines
 
 Detailed Notes
 
+- Exposed access to the Redis.AI MINBATCHTIMEOUT parameter, which limits the delay in model execution when trying to accumulate multiple executions in a batch (PR406_)
 - Models will now be automatically chunked when sent to/received from the backed database. This allows use of models greater than 511MB in size. (PR404_)
 - Updated from RedisAI v1.2.3 (test target)/v1.2.4 and v1.2.5 (CI/CD pipeline) to v1.2.7 (PR402_)
 - Updated lcov from version 1.15 to 2.0 (PR396_)
 - Create CONTRIBUTIONS.md file that points to the contribution guideline for both SmartSim and SmartRedis (PR395_)
 
+.. _PR406: https://github.com/CrayLabs/SmartRedis/pull/406
 .. _PR404: https://github.com/CrayLabs/SmartRedis/pull/404
 .. _PR402: https://github.com/CrayLabs/SmartRedis/pull/402
 .. _PR396: https://github.com/CrayLabs/SmartRedis/pull/396

diff --git a/doc/data_structures.rst b/doc/data_structures.rst
@@ -358,6 +358,7 @@ are uniform across all SmartRedis clients, and as an example, the C++
                    const std::string& device,
                    int batch_size = 0,
                    int min_batch_size = 0,
+                   int min_batch_timeout = 0,
                    const std::string& tag = "",
                    const std::vector<std::string>& inputs
                        = std::vector<std::string>(),

diff --git a/include/c_client.h b/include/c_client.h
@@ -323,7 +323,7 @@ bool _isTensorFlow(const char* backend);
 /*!
 *   \brief Check parameters for all parameters common to set_model methods
 *   \details Make sure that all pointers are not void and that the size
-*            of the inputs and outputs is not zero 
+*            of the inputs and outputs is not zero
 *   \param c_client The client object to use for communication
 *   \param name The name to associate with the model
 *   \param backend The name of the backend (TF, TFLITE, TORCH, ONNX)
@@ -372,6 +372,7 @@ void _check_params_set_model(void* c_client,
 *                        excluding null terminating character
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -396,6 +397,7 @@ SRError set_model_from_file(void* c_client,
                             const size_t device_length,
                             const int batch_size,
                             const int min_batch_size,
+                            const int min_batch_timeout,
                             const char* tag,
                             const size_t tag_length,
                             const char** inputs,
@@ -428,6 +430,7 @@ SRError set_model_from_file(void* c_client,
 *   \param num_gpus the number of gpus to use with the model
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -452,6 +455,7 @@ SRError set_model_from_file_multigpu(void* c_client,
                                      const int num_gpus,
                                      const int batch_size,
                                      const int min_batch_size,
+                                     const int min_batch_timeout,
                                      const char* tag,
                                      const size_t tag_length,
                                      const char** inputs,
@@ -486,6 +490,7 @@ SRError set_model_from_file_multigpu(void* c_client,
 *                        excluding null terminating character
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -510,6 +515,7 @@ SRError set_model(void* c_client,
                   const size_t device_length,
                   const int batch_size,
                   const int min_batch_size,
+                  const int min_batch_timeout,
                   const char* tag,
                   const size_t tag_length,
                   const char** inputs,
@@ -542,6 +548,7 @@ SRError set_model(void* c_client,
 *   \param num_gpus The number of GPUs to use with the model
 *   \param batch_size The batch size for model execution
 *   \param min_batch_size The minimum batch size for model execution
+*   \param min_batch_timeout Max time (ms) to wait for min batch size
 *   \param tag A tag to attach to the model for information purposes
 *   \param tag_length The length of the tag string,
 *                     excluding null terminating character
@@ -566,6 +573,7 @@ SRError set_model_multigpu(void* c_client,
                   const int num_gpus,
                   const int batch_size,
                   const int min_batch_size,
+                  const int min_batch_timeout,
                   const char* tag,
                   const size_t tag_length,
                   const char** inputs,

diff --git a/include/client.h b/include/client.h
@@ -344,6 +344,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only). For other models, provide an
@@ -359,6 +360,7 @@ class Client : public SRObject
                                  const std::string& device,
                                  int batch_size = 0,
                                  int min_batch_size = 0,
+                                 int min_batch_timeout = 0,
                                  const std::string& tag = "",
                                  const std::vector<std::string>& inputs
                                      = std::vector<std::string>(),
@@ -383,6 +385,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -398,6 +401,7 @@ class Client : public SRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -423,6 +427,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only). For other models, provide an
@@ -438,6 +443,7 @@ class Client : public SRObject
                        const std::string& device,
                        int batch_size = 0,
                        int min_batch_size = 0,
+                       int min_batch_timeout = 0,
                        const std::string& tag = "",
                        const std::vector<std::string>& inputs
                            = std::vector<std::string>(),
@@ -462,6 +468,7 @@ class Client : public SRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -477,6 +484,7 @@ class Client : public SRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),

diff --git a/include/pyclient.h b/include/pyclient.h
@@ -285,6 +285,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -299,6 +300,7 @@ class PyClient : public PySRObject
                         const std::string& device,
                         int batch_size = 0,
                         int min_batch_size = 0,
+                        int min_batch_timeout = 0,
                         const std::string& tag = "",
                         const std::vector<std::string>& inputs
                             = std::vector<std::string>(),
@@ -317,6 +319,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -332,6 +335,7 @@ class PyClient : public PySRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -350,6 +354,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -364,6 +369,7 @@ class PyClient : public PySRObject
                                 const std::string& device,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),
@@ -382,6 +388,7 @@ class PyClient : public PySRObject
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -397,6 +404,7 @@ class PyClient : public PySRObject
                                 int num_gpus,
                                 int batch_size = 0,
                                 int min_batch_size = 0,
+                                int min_batch_timeout = 0,
                                 const std::string& tag = "",
                                 const std::vector<std::string>& inputs
                                     = std::vector<std::string>(),

diff --git a/include/redis.h b/include/redis.h
@@ -283,6 +283,7 @@ class Redis : public RedisServer
         *                 (e.g. CPU or GPU)
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only)
@@ -297,6 +298,7 @@ class Redis : public RedisServer
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -314,6 +316,7 @@ class Redis : public RedisServer
         *   \param num_gpus The number of GPUs to use with this model
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for information purposes
         *   \param inputs One or more names of model input nodes
         *                 (TF models only)
@@ -328,6 +331,7 @@ class Redis : public RedisServer
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),

diff --git a/include/rediscluster.h b/include/rediscluster.h
@@ -302,6 +302,7 @@ class RedisCluster : public RedisServer
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -317,6 +318,7 @@ class RedisCluster : public RedisServer
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -335,6 +337,7 @@ class RedisCluster : public RedisServer
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -350,6 +353,7 @@ class RedisCluster : public RedisServer
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),

diff --git a/include/redisserver.h b/include/redisserver.h
@@ -285,6 +285,7 @@ class RedisServer {
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -300,6 +301,7 @@ class RedisServer {
                                        const std::string& device,
                                        int batch_size = 0,
                                        int min_batch_size = 0,
+                                       int min_batch_timeout = 0,
                                        const std::string& tag = "",
                                        const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),
@@ -319,6 +321,7 @@ class RedisServer {
         *   \param batch_size The batch size for model execution
         *   \param min_batch_size The minimum batch size for model
         *                         execution
+        *   \param min_batch_timeout Max time (ms) to wait for min batch size
         *   \param tag A tag to attach to the model for
         *              information purposes
         *   \param inputs One or more names of model input nodes
@@ -334,6 +337,7 @@ class RedisServer {
                                         int num_gpus,
                                         int batch_size = 0,
                                         int min_batch_size = 0,
+                                        int min_batch_timeout = 0,
                                         const std::string& tag = "",
                                         const std::vector<std::string>& inputs
                                             = std::vector<std::string>(),