From 2fe3228752b01b3ac58fdef173ebd53bd964df1b Mon Sep 17 00:00:00 2001 From: Bill Scherer <36514047+billschereriii@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:41:01 -0500 Subject: [PATCH] Expose MINBATCHTIMEOUT parameter in set_model interface (#406) Expose MINBATCHTIMEOUT parameter to set_model() family calls Tweak CI/CD pipeline to clear up disk space needed for Intel compiler [ committed by @billschereriii ] [ reviewed by @ashao ] --- .github/workflows/run_tests.yml | 12 +++ doc/changelog.rst | 5 +- doc/data_structures.rst | 1 + include/c_client.h | 10 +- include/client.h | 8 ++ include/pyclient.h | 8 ++ include/redis.h | 4 + include/rediscluster.h | 4 + include/redisserver.h | 4 + src/c/c_client.cpp | 20 ++-- src/cpp/client.cpp | 50 ++++++++- src/cpp/redis.cpp | 11 +- src/cpp/rediscluster.cpp | 9 +- src/fortran/client.F90 | 48 +++++---- src/fortran/client/model_interfaces.inc | 24 +++-- src/python/module/smartredis/client.py | 20 ++++ src/python/src/pyclient.cpp | 18 ++-- tests/python/test_errors.py | 2 + tests/python/test_model_methods_torch.py | 125 +++++++++++++++++++++++ 19 files changed, 331 insertions(+), 52 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index b4aea72a2..bbcb343c2 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -75,6 +75,18 @@ jobs: with: python-version: ${{ matrix.py_v }} + # Free up some disk space + - name: Free disk space + run: | + sudo rm -rf /usr/share/dotnet && + sudo rm -rf /opt/ghc && + sudo rm -rf "/usr/local/share/boost" + + # sudo rm -rf /usr/share/dotnet && + # sudo rm -rf /opt/ghc && + # sudo rm -rf "/usr/local/share/boost" && + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" + # Install compilers (Intel or GCC) - name: Install GCC if: "!contains( matrix.compiler, 'intel' )" # if using GNU compiler diff --git a/doc/changelog.rst b/doc/changelog.rst index b38b8eb3c..1e12c96ad 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -8,18 +8,21 @@ To be released at some future point in time Description +- Improved support for model execution batching - Added support for model chunking - Updated the third-party RedisAI component - Updated the third-party lcov component -- Add link to contributing guidelines +- Added link to contributing guidelines Detailed Notes +- Exposed access to the Redis.AI MINBATCHTIMEOUT parameter, which limits the delay in model execution when trying to accumulate multiple executions in a batch (PR406_) - Models will now be automatically chunked when sent to/received from the backed database. This allows use of models greater than 511MB in size. (PR404_) - Updated from RedisAI v1.2.3 (test target)/v1.2.4 and v1.2.5 (CI/CD pipeline) to v1.2.7 (PR402_) - Updated lcov from version 1.15 to 2.0 (PR396_) - Create CONTRIBUTIONS.md file that points to the contribution guideline for both SmartSim and SmartRedis (PR395_) +.. _PR406: https://github.com/CrayLabs/SmartRedis/pull/406 .. _PR404: https://github.com/CrayLabs/SmartRedis/pull/404 .. _PR402: https://github.com/CrayLabs/SmartRedis/pull/402 .. _PR396: https://github.com/CrayLabs/SmartRedis/pull/396 diff --git a/doc/data_structures.rst b/doc/data_structures.rst index 2a1b86467..fb547c4bd 100644 --- a/doc/data_structures.rst +++ b/doc/data_structures.rst @@ -358,6 +358,7 @@ are uniform across all SmartRedis clients, and as an example, the C++ const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/include/c_client.h b/include/c_client.h index 3d0535d43..850b8ea9b 100644 --- a/include/c_client.h +++ b/include/c_client.h @@ -323,7 +323,7 @@ bool _isTensorFlow(const char* backend); /*! * \brief Check parameters for all parameters common to set_model methods * \details Make sure that all pointers are not void and that the size -* of the inputs and outputs is not zero +* of the inputs and outputs is not zero * \param c_client The client object to use for communication * \param name The name to associate with the model * \param backend The name of the backend (TF, TFLITE, TORCH, ONNX) @@ -372,6 +372,7 @@ void _check_params_set_model(void* c_client, * excluding null terminating character * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution +* \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param tag_length The length of the tag string, * excluding null terminating character @@ -396,6 +397,7 @@ SRError set_model_from_file(void* c_client, const size_t device_length, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, @@ -428,6 +430,7 @@ SRError set_model_from_file(void* c_client, * \param num_gpus the number of gpus to use with the model * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution +* \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param tag_length The length of the tag string, * excluding null terminating character @@ -452,6 +455,7 @@ SRError set_model_from_file_multigpu(void* c_client, const int num_gpus, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, @@ -486,6 +490,7 @@ SRError set_model_from_file_multigpu(void* c_client, * excluding null terminating character * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution +* \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param tag_length The length of the tag string, * excluding null terminating character @@ -510,6 +515,7 @@ SRError set_model(void* c_client, const size_t device_length, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, @@ -542,6 +548,7 @@ SRError set_model(void* c_client, * \param num_gpus The number of GPUs to use with the model * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution +* \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param tag_length The length of the tag string, * excluding null terminating character @@ -566,6 +573,7 @@ SRError set_model_multigpu(void* c_client, const int num_gpus, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, diff --git a/include/client.h b/include/client.h index 3444c707b..a9bb07614 100644 --- a/include/client.h +++ b/include/client.h @@ -344,6 +344,7 @@ class Client : public SRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param inputs One or more names of model input nodes * (TF models only). For other models, provide an @@ -359,6 +360,7 @@ class Client : public SRObject const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -383,6 +385,7 @@ class Client : public SRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -398,6 +401,7 @@ class Client : public SRObject int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -423,6 +427,7 @@ class Client : public SRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param inputs One or more names of model input nodes * (TF models only). For other models, provide an @@ -438,6 +443,7 @@ class Client : public SRObject const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -462,6 +468,7 @@ class Client : public SRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -477,6 +484,7 @@ class Client : public SRObject int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/include/pyclient.h b/include/pyclient.h index b1f82bcbb..5c407a3a3 100644 --- a/include/pyclient.h +++ b/include/pyclient.h @@ -285,6 +285,7 @@ class PyClient : public PySRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -299,6 +300,7 @@ class PyClient : public PySRObject const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -317,6 +319,7 @@ class PyClient : public PySRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -332,6 +335,7 @@ class PyClient : public PySRObject int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -350,6 +354,7 @@ class PyClient : public PySRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -364,6 +369,7 @@ class PyClient : public PySRObject const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -382,6 +388,7 @@ class PyClient : public PySRObject * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -397,6 +404,7 @@ class PyClient : public PySRObject int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/include/redis.h b/include/redis.h index 4aa9f246c..b799577c1 100644 --- a/include/redis.h +++ b/include/redis.h @@ -283,6 +283,7 @@ class Redis : public RedisServer * (e.g. CPU or GPU) * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param inputs One or more names of model input nodes * (TF models only) @@ -297,6 +298,7 @@ class Redis : public RedisServer const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -314,6 +316,7 @@ class Redis : public RedisServer * \param num_gpus The number of GPUs to use with this model * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for information purposes * \param inputs One or more names of model input nodes * (TF models only) @@ -328,6 +331,7 @@ class Redis : public RedisServer int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/include/rediscluster.h b/include/rediscluster.h index 876c6b733..7371ec343 100644 --- a/include/rediscluster.h +++ b/include/rediscluster.h @@ -302,6 +302,7 @@ class RedisCluster : public RedisServer * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -317,6 +318,7 @@ class RedisCluster : public RedisServer const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -335,6 +337,7 @@ class RedisCluster : public RedisServer * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -350,6 +353,7 @@ class RedisCluster : public RedisServer int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/include/redisserver.h b/include/redisserver.h index b28d03a25..39594c505 100644 --- a/include/redisserver.h +++ b/include/redisserver.h @@ -285,6 +285,7 @@ class RedisServer { * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -300,6 +301,7 @@ class RedisServer { const std::string& device, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), @@ -319,6 +321,7 @@ class RedisServer { * \param batch_size The batch size for model execution * \param min_batch_size The minimum batch size for model * execution + * \param min_batch_timeout Max time (ms) to wait for min batch size * \param tag A tag to attach to the model for * information purposes * \param inputs One or more names of model input nodes @@ -334,6 +337,7 @@ class RedisServer { int num_gpus, int batch_size = 0, int min_batch_size = 0, + int min_batch_timeout = 0, const std::string& tag = "", const std::vector& inputs = std::vector(), diff --git a/src/c/c_client.cpp b/src/c/c_client.cpp index 99cd691da..6672184b5 100644 --- a/src/c/c_client.cpp +++ b/src/c/c_client.cpp @@ -392,6 +392,7 @@ extern "C" SRError set_model_from_file( const char* device, const size_t device_length, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, const size_t* input_lengths, const size_t n_inputs, const char** outputs, const size_t* output_lengths, const size_t n_outputs) @@ -430,8 +431,8 @@ extern "C" SRError set_model_from_file( } s->set_model_from_file(name_str, model_file_str, backend_str, device_str, - batch_size, min_batch_size, tag_str, input_vec, - output_vec); + batch_size, min_batch_size, min_batch_timeout, + tag_str, input_vec, output_vec); }); } @@ -443,6 +444,7 @@ extern "C" SRError set_model_from_file_multigpu( const char* backend, const size_t backend_length, const int first_gpu, const int num_gpus, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, const size_t* input_lengths, const size_t n_inputs, const char** outputs, @@ -481,8 +483,8 @@ extern "C" SRError set_model_from_file_multigpu( } s->set_model_from_file_multigpu(name_str, model_file_str, backend_str, first_gpu, - num_gpus, batch_size, min_batch_size, tag_str, - input_vec, output_vec); + num_gpus, batch_size, min_batch_size, min_batch_timeout, + tag_str, input_vec, output_vec); }); } @@ -494,6 +496,7 @@ extern "C" SRError set_model( const char* backend, const size_t backend_length, const char* device, const size_t device_length, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, const size_t* input_lengths, const size_t n_inputs, @@ -534,8 +537,8 @@ extern "C" SRError set_model( } s->set_model(name_str, model_str, backend_str, device_str, - batch_size, min_batch_size, tag_str, input_vec, - output_vec); + batch_size, min_batch_size, min_batch_timeout, + tag_str, input_vec, output_vec); }); } @@ -547,6 +550,7 @@ extern "C" SRError set_model_multigpu( const char* backend, const size_t backend_length, const int first_gpu, const int num_gpus, const int batch_size, const int min_batch_size, + const int min_batch_timeout, const char* tag, const size_t tag_length, const char** inputs, const size_t* input_lengths, const size_t n_inputs, @@ -586,8 +590,8 @@ extern "C" SRError set_model_multigpu( } s->set_model_multigpu(name_str, model_str, backend_str, first_gpu, num_gpus, - batch_size, min_batch_size, tag_str, input_vec, - output_vec); + batch_size, min_batch_size, min_batch_timeout, + tag_str, input_vec, output_vec); }); } diff --git a/src/cpp/client.cpp b/src/cpp/client.cpp index cfb1867a5..7e9e98c7d 100644 --- a/src/cpp/client.cpp +++ b/src/cpp/client.cpp @@ -502,6 +502,7 @@ void Client::set_model_from_file(const std::string& name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -522,7 +523,7 @@ void Client::set_model_from_file(const std::string& name, std::string_view model(tmp.data(), tmp.length()); set_model(name, model, backend, device, batch_size, - min_batch_size, tag, inputs, outputs); + min_batch_size, min_batch_timeout, tag, inputs, outputs); } // Set a model from file in the database for future execution in a multi-GPU system @@ -533,6 +534,7 @@ void Client::set_model_from_file_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -553,8 +555,42 @@ void Client::set_model_from_file_multigpu(const std::string& name, std::string_view model(tmp.data(), tmp.length()); set_model_multigpu(name, model, backend, first_gpu, num_gpus, batch_size, - min_batch_size, tag, inputs, outputs); + min_batch_size, min_batch_timeout, tag, inputs, outputs); } + +// Validate batch settings for the set_model calls +inline void __check_batch_settings( + int batch_size, int min_batch_size, int min_batch_timeout) +{ + // Throw a usage exception if batch_size is zero but one of the other + // parameters is non-zero + if (batch_size == 0 && (min_batch_size > 0 || min_batch_timeout > 0)) { + throw SRRuntimeException( + "batch_size must be non-zero if min_batch_size or " + "min_batch_timeout is used; otherwise batching will " + "not be performed." + ); + } + + // Throw a usage exception if min_batch_timeout is nonzero and + // min_batch_size is zero. (batch_size also has to be non-zero, but + // this was caught in the previous clause.) + if (min_batch_timeout > 0 && min_batch_size == 0) { + throw SRRuntimeException( + "min_batch_size must be non-zero if min_batch_timeout " + "is used; otherwise the min_batch_timeout parameter is ignored." + ); + } + + // Issue a warning if min_batch_size is non-zero but min_batch_timeout is zero + if (min_batch_size > 0 && min_batch_timeout == 0) { + std::cerr << "WARNING: min_batch_timeout was not set when a non-zero " + << "min_batch_size was selected. " << std::endl + << "Setting a small value (~10ms) for min_batch_timeout " + << "may improve performance" << std::endl; + } +} + // Set a model from a string buffer in the database for future execution void Client::set_model(const std::string& name, const std::string_view& model, @@ -562,6 +598,7 @@ void Client::set_model(const std::string& name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -606,6 +643,8 @@ void Client::set_model(const std::string& name, throw SRRuntimeException(device + " is not a valid device."); } + __check_batch_settings(batch_size, min_batch_size, min_batch_timeout); + // Split model into chunks size_t offset = 0; std::vector model_segments; @@ -621,7 +660,7 @@ void Client::set_model(const std::string& name, std::string key = _build_model_key(name, false); auto response = _redis_server->set_model( key, model_segments, backend, device, - batch_size, min_batch_size, + batch_size, min_batch_size, min_batch_timeout, tag, inputs, outputs); if (response.has_error()) { throw SRInternalException( @@ -636,6 +675,7 @@ void Client::set_model_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -677,6 +717,8 @@ void Client::set_model_multigpu(const std::string& name, throw SRParameterException(backend + " is not a valid backend."); } + __check_batch_settings(batch_size, min_batch_size, min_batch_timeout); + // Split model into chunks size_t offset = 0; std::vector model_segments; @@ -692,7 +734,7 @@ void Client::set_model_multigpu(const std::string& name, std::string key = _build_model_key(name, false); _redis_server->set_model_multigpu( key, model_segments, backend, first_gpu, num_gpus, - batch_size, min_batch_size, + batch_size, min_batch_size, min_batch_timeout, tag, inputs, outputs); } diff --git a/src/cpp/redis.cpp b/src/cpp/redis.cpp index 0aa7560c6..a00af813b 100644 --- a/src/cpp/redis.cpp +++ b/src/cpp/redis.cpp @@ -299,6 +299,7 @@ CommandReply Redis::set_model(const std::string& model_name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs @@ -318,6 +319,9 @@ CommandReply Redis::set_model(const std::string& model_name, if (min_batch_size > 0) { cmd << "MINBATCHSIZE" << std::to_string(min_batch_size); } + if (min_batch_timeout > 0) { + cmd << "MINBATCHTIMEOUT" << std::to_string(min_batch_timeout); + } if (inputs.size() > 0) { cmd << "INPUTS" << std::to_string(inputs.size()) << inputs; } @@ -339,6 +343,7 @@ void Redis::set_model_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -349,7 +354,8 @@ void Redis::set_model_multigpu(const std::string& name, std::string device = "GPU:" + std::to_string(i); std::string model_key = name + "." + device; result = set_model( - model_key, model, backend, device, batch_size, min_batch_size, tag, inputs, outputs); + model_key, model, backend, device, batch_size, min_batch_size, min_batch_timeout, + tag, inputs, outputs); if (result.has_error() > 0) { throw SRRuntimeException("Failed to set model for GPU " + std::to_string(i)); } @@ -357,7 +363,8 @@ void Redis::set_model_multigpu(const std::string& name, // Add a version for get_model to find result = set_model( - name, model, backend, "GPU", batch_size, min_batch_size, tag, inputs, outputs); + name, model, backend, "GPU", batch_size, min_batch_size, min_batch_timeout, + tag, inputs, outputs); if (result.has_error() > 0) { throw SRRuntimeException("Failed to set general model"); } diff --git a/src/cpp/rediscluster.cpp b/src/cpp/rediscluster.cpp index 3c1ae259d..6f847dc6e 100644 --- a/src/cpp/rediscluster.cpp +++ b/src/cpp/rediscluster.cpp @@ -511,6 +511,7 @@ CommandReply RedisCluster::set_model(const std::string& model_name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -531,6 +532,9 @@ CommandReply RedisCluster::set_model(const std::string& model_name, if (min_batch_size > 0) { cmd << "MINBATCHSIZE" << std::to_string(min_batch_size); } + if (min_batch_timeout > 0) { + cmd << "MINBATCHTIMEOUT" << std::to_string(min_batch_timeout); + } if ( inputs.size() > 0) { cmd << "INPUTS" << std::to_string(inputs.size()) << inputs; } @@ -558,6 +562,7 @@ void RedisCluster::set_model_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -571,7 +576,7 @@ void RedisCluster::set_model_multigpu(const std::string& name, // Store it CommandReply result = set_model( model_key, model, backend, device, batch_size, min_batch_size, - tag, inputs, outputs); + min_batch_timeout, tag, inputs, outputs); if (result.has_error() > 0) { throw SRRuntimeException("Failed to set model for " + device); } @@ -580,7 +585,7 @@ void RedisCluster::set_model_multigpu(const std::string& name, // Add a version for get_model to find CommandReply result = set_model( name, model, backend, "GPU", batch_size, min_batch_size, - tag, inputs, outputs); + min_batch_timeout, tag, inputs, outputs); if (result.has_error() > 0) { throw SRRuntimeException("Failed to set general model"); } diff --git a/src/fortran/client.F90 b/src/fortran/client.F90 index 7c79f0148..c3acd35c7 100644 --- a/src/fortran/client.F90 +++ b/src/fortran/client.F90 @@ -746,8 +746,8 @@ function get_model(self, name, model) result(code) end function get_model !> Load the machine learning model from a file and set the configuration -function set_model_from_file(self, name, model_file, backend, device, batch_size, min_batch_size, tag, & - inputs, outputs) result(code) +function set_model_from_file(self, name, model_file, backend, device, batch_size, min_batch_size, & + min_batch_timeout, tag, inputs, outputs) result(code) class(client_type), intent(in) :: self !< An initialized SmartRedis client character(len=*), intent(in) :: name !< The name to use to place the model character(len=*), intent(in) :: model_file !< The file storing the model @@ -755,6 +755,7 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size character(len=*), intent(in) :: device !< The name of the device (CPU, GPU, GPU:0, GPU:1...) integer, optional, intent(in) :: batch_size !< The batch size for model execution integer, optional, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer, optional, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(len=*), optional, intent(in) :: tag !< A tag to attach to the model for !! information purposes character(len=*), dimension(:), optional, intent(in) :: inputs !< One or more names of model input nodes (TF @@ -775,7 +776,7 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths integer(kind=c_size_t) :: name_length, model_file_length, backend_length, device_length, tag_length, n_inputs, & n_outputs - integer(kind=c_int) :: c_batch_size, c_min_batch_size + integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_min_batch_timeout type(c_ptr) :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs @@ -784,6 +785,8 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size if (present(batch_size)) c_batch_size = batch_size c_min_batch_size = 0 if (present(min_batch_size)) c_min_batch_size = min_batch_size + c_min_batch_timeout = 0 + if (present(min_batch_timeout)) c_min_batch_timeout = min_batch_timeout if (present(tag)) then allocate(character(kind=c_char, len=len_trim(tag)) :: c_tag) c_tag = tag @@ -828,8 +831,8 @@ function set_model_from_file(self, name, model_file, backend, device, batch_size code = set_model_from_file_c(self%client_ptr, c_name, name_length, c_model_file, model_file_length, & c_backend, backend_length, c_device, device_length, c_batch_size, c_min_batch_size, & - c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, & - output_lengths_ptr, n_outputs) + c_min_batch_timeout, c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, & + outputs_ptr, output_lengths_ptr, n_outputs) if (allocated(c_inputs)) deallocate(c_inputs) if (allocated(input_lengths)) deallocate(input_lengths) if (allocated(ptrs_to_inputs)) deallocate(ptrs_to_inputs) @@ -840,7 +843,7 @@ end function set_model_from_file !> Load the machine learning model from a file and set the configuration for use in multi-GPU systems function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu, num_gpus, batch_size, min_batch_size, & - tag, inputs, outputs) result(code) + min_batch_timeout, tag, inputs, outputs) result(code) class(client_type), intent(in) :: self !< An initialized SmartRedis client character(len=*), intent(in) :: name !< The name to use to place the model character(len=*), intent(in) :: model_file !< The file storing the model @@ -849,6 +852,7 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu integer, intent(in) :: num_gpus !< The number of GPUs to use with the model integer, optional, intent(in) :: batch_size !< The batch size for model execution integer, optional, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer, optional, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(len=*), optional, intent(in) :: tag !< A tag to attach to the model for !! information purposes character(len=*), dimension(:), optional, intent(in) :: inputs !< One or more names of model input nodes (TF @@ -868,7 +872,7 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths integer(kind=c_size_t) :: name_length, model_file_length, backend_length, tag_length, n_inputs, & n_outputs - integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_first_gpu, c_num_gpus + integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_min_batch_timeout, c_first_gpu, c_num_gpus type(c_ptr) :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs @@ -877,6 +881,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu if (present(batch_size)) c_batch_size = batch_size c_min_batch_size = 0 if (present(min_batch_size)) c_min_batch_size = min_batch_size + c_min_batch_timeout = 0 + if (present(min_batch_timeout)) c_min_batch_timeout = min_batch_timeout if (present(tag)) then allocate(character(kind=c_char, len=len_trim(tag)) :: c_tag) c_tag = tag @@ -922,8 +928,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu code = set_model_from_file_multigpu_c(self%client_ptr, c_name, name_length, c_model_file, model_file_length, & c_backend, backend_length, c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, & - c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, & - output_lengths_ptr, n_outputs) + c_min_batch_timeout, c_tag, tag_length, inputs_ptr, input_lengths_ptr, n_inputs, & + outputs_ptr, output_lengths_ptr, n_outputs) if (allocated(c_inputs)) deallocate(c_inputs) if (allocated(input_lengths)) deallocate(input_lengths) @@ -934,8 +940,8 @@ function set_model_from_file_multigpu(self, name, model_file, backend, first_gpu end function set_model_from_file_multigpu !> Establish a model to run -function set_model(self, name, model, backend, device, batch_size, min_batch_size, tag, & - inputs, outputs) result(code) +function set_model(self, name, model, backend, device, batch_size, min_batch_size, min_batch_timeout, & + tag, inputs, outputs) result(code) class(client_type), intent(in) :: self !< An initialized SmartRedis client character(len=*), intent(in) :: name !< The name to use to place the model character(len=*), intent(in) :: model !< The binary representation of the model @@ -943,6 +949,7 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz character(len=*), intent(in) :: device !< The name of the device (CPU, GPU, GPU:0, GPU:1...) integer, intent(in) :: batch_size !< The batch size for model execution integer, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(len=*), intent(in) :: tag !< A tag to attach to the model for information purposes character(len=*), dimension(:), intent(in) :: inputs !< One or more names of model input nodes (TF models) character(len=*), dimension(:), intent(in) :: outputs !< One or more names of model output nodes (TF models) @@ -960,7 +967,7 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths integer(kind=c_size_t) :: name_length, model_length, backend_length, device_length, tag_length, n_inputs, & n_outputs - integer(kind=c_int) :: c_batch_size, c_min_batch_size + integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_min_batch_timeout type(c_ptr) :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs @@ -984,12 +991,13 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz output_lengths_ptr, n_outputs) if (code /= SRNoError) return - ! Cast the batch sizes to C integers + ! Cast the batch params to C integers c_batch_size = batch_size c_min_batch_size = min_batch_size + c_min_batch_timeout = min_batch_timeout code = set_model_c(self%client_ptr, c_name, name_length, c_model, model_length, c_backend, backend_length, & - c_device, device_length, batch_size, min_batch_size, c_tag, tag_length, & + c_device, device_length, batch_size, min_batch_size, c_min_batch_timeout, c_tag, tag_length, & inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, output_lengths_ptr, n_outputs) if (allocated(c_inputs)) deallocate(c_inputs) @@ -1001,8 +1009,8 @@ function set_model(self, name, model, backend, device, batch_size, min_batch_siz end function set_model !> Set a model from a byte string to run on a system with multiple GPUs -function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, batch_size, min_batch_size, tag, & - inputs, outputs) result(code) +function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, batch_size, min_batch_size, & + min_batch_timeout, tag, inputs, outputs) result(code) class(client_type), intent(in) :: self !< An initialized SmartRedis client character(len=*), intent(in) :: name !< The name to use to place the model character(len=*), intent(in) :: model !< The binary representation of the model @@ -1011,6 +1019,7 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat integer, intent(in) :: num_gpus !< The number of GPUs to use with the model integer, intent(in) :: batch_size !< The batch size for model execution integer, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(len=*), intent(in) :: tag !< A tag to attach to the model for information purposes character(len=*), dimension(:), intent(in) :: inputs !< One or more names of model input nodes (TF models) character(len=*), dimension(:), intent(in) :: outputs !< One or more names of model output nodes (TF models) @@ -1026,7 +1035,7 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat integer(c_size_t), dimension(:), allocatable, target :: input_lengths, output_lengths integer(kind=c_size_t) :: name_length, model_length, backend_length, tag_length, n_inputs, n_outputs - integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_first_gpu, c_num_gpus + integer(kind=c_int) :: c_batch_size, c_min_batch_size, c_min_batch_timeout, c_first_gpu, c_num_gpus type(c_ptr) :: inputs_ptr, input_lengths_ptr, outputs_ptr, output_lengths_ptr type(c_ptr), dimension(:), allocatable :: ptrs_to_inputs, ptrs_to_outputs @@ -1048,14 +1057,15 @@ function set_model_multigpu(self, name, model, backend, first_gpu, num_gpus, bat output_lengths_ptr, n_outputs) if (code /= SRNoError) return - ! Cast the batch sizes to C integers + ! Cast the batch params to C integers c_batch_size = batch_size c_min_batch_size = min_batch_size + c_min_batch_timeout = min_batch_timeout c_first_gpu = first_gpu c_num_gpus = num_gpus code = set_model_multigpu_c(self%client_ptr, c_name, name_length, c_model, model_length, c_backend, backend_length, & - c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, c_tag, tag_length, & + c_first_gpu, c_num_gpus, c_batch_size, c_min_batch_size, c_min_batch_timeout, c_tag, tag_length, & inputs_ptr, input_lengths_ptr, n_inputs, outputs_ptr, output_lengths_ptr, n_outputs) if (allocated(c_inputs)) deallocate(c_inputs) diff --git a/src/fortran/client/model_interfaces.inc b/src/fortran/client/model_interfaces.inc index d3836bc1c..ef7d46661 100644 --- a/src/fortran/client/model_interfaces.inc +++ b/src/fortran/client/model_interfaces.inc @@ -40,8 +40,9 @@ end interface interface function set_model_from_file_c( c_client, key, key_length, model_file, model_file_length, & - backend, backend_length, device, device_length, batch_size, min_batch_size, tag, tag_length, & - inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_from_file") + backend, backend_length, device, device_length, batch_size, min_batch_size, min_batch_timeout, & + tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) & + bind(c, name="set_model_from_file") use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char import :: enum_kind integer(kind=enum_kind) :: set_model_from_file_c @@ -59,6 +60,7 @@ interface !! null terminating character integer(kind=c_int), value, intent(in) :: batch_size !< The batch size for model execution integer(kind=c_int), value, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer(kind=c_int), value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(kind=c_char), intent(in) :: tag(*) !< A tag to attach to the model for information !! purposes integer(kind=c_size_t), value, intent(in) :: tag_length !< The length of the tag c-string, excluding null @@ -77,8 +79,9 @@ end interface interface function set_model_from_file_multigpu_c( c_client, key, key_length, model_file, model_file_length, & - backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, tag, tag_length, & - inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_from_file_multigpu") + backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, min_batch_timeout, & + tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) & + bind(c, name="set_model_from_file_multigpu") use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char import :: enum_kind integer(kind=enum_kind) :: set_model_from_file_multigpu_c @@ -96,6 +99,7 @@ interface !! null terminating character integer(kind=c_int), value, intent(in) :: batch_size !< The batch size for model execution integer(kind=c_int), value, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer(kind=c_int), value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(kind=c_char), intent(in) :: tag(*) !< A tag to attach to the model for information !! purposes integer(kind=c_size_t), value, intent(in) :: tag_length !< The length of the tag c-string, excluding null @@ -114,8 +118,9 @@ end interface interface function set_model_c( c_client, key, key_length, model, model_length, & - backend, backend_length, device, device_length, batch_size, min_batch_size, tag, tag_length, & - inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model") + backend, backend_length, device, device_length, batch_size, min_batch_size, min_batch_timeout, & + tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) & + bind(c, name="set_model") use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char import :: enum_kind integer(kind=enum_kind) :: set_model_c @@ -133,6 +138,7 @@ interface !! null terminating character integer(kind=c_int), value, intent(in) :: batch_size !< The batch size for model execution integer(kind=c_int), value, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer(kind=c_int), value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(kind=c_char), intent(in) :: tag(*) !< A tag to attach to the model for information !! purposes integer(kind=c_size_t), value, intent(in) :: tag_length !< The length of the tag c-string, excluding null @@ -151,8 +157,9 @@ end interface interface function set_model_multigpu_c( c_client, key, key_length, model, model_length, & - backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, tag, tag_length, & - inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs ) bind(c, name="set_model_multigpu") + backend, backend_length, first_gpu, num_gpus, batch_size, min_batch_size, min_batch_timeout, & + tag, tag_length, inputs, input_lengths, n_inputs, outputs, output_lengths, n_outputs) & + bind(c, name="set_model_multigpu") use iso_c_binding, only : c_ptr, c_size_t, c_int, c_char import :: enum_kind integer(kind=enum_kind) :: set_model_multigpu_c @@ -170,6 +177,7 @@ interface !! null terminating character integer(kind=c_int), value, intent(in) :: batch_size !< The batch size for model execution integer(kind=c_int), value, intent(in) :: min_batch_size !< The minimum batch size for model execution + integer(kind=c_int), value, intent(in) :: min_batch_timeout !< Max time (ms) to wait for min batch size character(kind=c_char), intent(in) :: tag(*) !< A tag to attach to the model for information !! purposes integer(kind=c_size_t), value, intent(in) :: tag_length !< The length of the tag c-string, excluding null diff --git a/src/python/module/smartredis/client.py b/src/python/module/smartredis/client.py index 53270f966..30361d3ba 100644 --- a/src/python/module/smartredis/client.py +++ b/src/python/module/smartredis/client.py @@ -609,6 +609,7 @@ def set_model( device: str = "CPU", batch_size: int = 0, min_batch_size: int = 0, + min_batch_timeout: int = 0, tag: str = "", inputs: t.Optional[t.Union[str, t.List[str]]] = None, outputs: t.Optional[t.Union[str, t.List[str]]] = None, @@ -636,6 +637,8 @@ def set_model( :type batch_size: int, optional :param min_batch_size: minimum batch size for model execution, defaults to 0 :type min_batch_size: int, optional + :param min_batch_timeout: Max time (ms) to wait for min batch size + :type min_batch_timeout: int, optional :param tag: additional tag for model information, defaults to "" :type tag: str, optional :param inputs: model inputs (TF only), defaults to None @@ -649,6 +652,7 @@ def set_model( typecheck(device, "device", str) typecheck(batch_size, "batch_size", int) typecheck(min_batch_size, "min_batch_size", int) + typecheck(min_batch_timeout, "min_batch_timeout", int) typecheck(tag, "tag", str) device = self.__check_device(device) backend = self.__check_backend(backend) @@ -660,6 +664,7 @@ def set_model( device, batch_size, min_batch_size, + min_batch_timeout, tag, inputs, outputs, @@ -675,6 +680,7 @@ def set_model_multigpu( num_gpus: int, batch_size: int = 0, min_batch_size: int = 0, + min_batch_timeout: int = 0, tag: str = "", inputs: t.Optional[t.Union[str, t.List[str]]] = None, outputs: t.Optional[t.Union[str, t.List[str]]] = None, @@ -703,6 +709,8 @@ def set_model_multigpu( :type batch_size: int, optional :param min_batch_size: minimum batch size for model execution, defaults to 0 :type min_batch_size: int, optional + :param min_batch_timeout: Max time (ms) to wait for min batch size + :type min_batch_timeout: int, optional :param tag: additional tag for model information, defaults to "" :type tag: str, optional :param inputs: model inputs (TF only), defaults to None @@ -717,6 +725,7 @@ def set_model_multigpu( typecheck(num_gpus, "num_gpus", int) typecheck(batch_size, "batch_size", int) typecheck(min_batch_size, "min_batch_size", int) + typecheck(min_batch_timeout, "min_batch_timeout", int) typecheck(tag, "tag", str) backend = self.__check_backend(backend) inputs, outputs = self.__check_tensor_args(inputs, outputs) @@ -728,6 +737,7 @@ def set_model_multigpu( num_gpus, batch_size, min_batch_size, + min_batch_timeout, tag, inputs, outputs, @@ -742,6 +752,7 @@ def set_model_from_file( device: str = "CPU", batch_size: int = 0, min_batch_size: int = 0, + min_batch_timeout: int = 0, tag: str = "", inputs: t.Optional[t.Union[str, t.List[str]]] = None, outputs: t.Optional[t.Union[str, t.List[str]]] = None, @@ -769,6 +780,8 @@ def set_model_from_file( :type batch_size: int, optional :param min_batch_size: minimum batch size for model execution, defaults to 0 :type min_batch_size: int, optional + :param min_batch_timeout: Max time (ms) to wait for min batch size + :type min_batch_timeout: int, optional :param tag: additional tag for model information, defaults to "" :type tag: str, optional :param inputs: model inputs (TF only), defaults to None @@ -783,6 +796,7 @@ def set_model_from_file( typecheck(device, "device", str) typecheck(batch_size, "batch_size", int) typecheck(min_batch_size, "min_batch_size", int) + typecheck(min_batch_timeout, "min_batch_timeout", int) typecheck(tag, "tag", str) device = self.__check_device(device) backend = self.__check_backend(backend) @@ -795,6 +809,7 @@ def set_model_from_file( device, batch_size, min_batch_size, + min_batch_timeout, tag, inputs, outputs, @@ -810,6 +825,7 @@ def set_model_from_file_multigpu( num_gpus: int, batch_size: int = 0, min_batch_size: int = 0, + min_batch_timeout: int = 0, tag: str = "", inputs: t.Optional[t.Union[str, t.List[str]]] = None, outputs: t.Optional[t.Union[str, t.List[str]]] = None, @@ -838,6 +854,8 @@ def set_model_from_file_multigpu( :type batch_size: int, optional :param min_batch_size: minimum batch size for model execution, defaults to 0 :type min_batch_size: int, optional + :param min_batch_timeout: Max time (ms) to wait for min batch size + :type min_batch_timeout: int, optional :param tag: additional tag for model information, defaults to "" :type tag: str, optional :param inputs: model inputs (TF only), defaults to None @@ -853,6 +871,7 @@ def set_model_from_file_multigpu( typecheck(num_gpus, "num_gpus", int) typecheck(batch_size, "batch_size", int) typecheck(min_batch_size, "min_batch_size", int) + typecheck(min_batch_timeout, "min_batch_timeout", int) typecheck(tag, "tag", str) backend = self.__check_backend(backend) m_file = self.__check_file(model_file) @@ -865,6 +884,7 @@ def set_model_from_file_multigpu( num_gpus, batch_size, min_batch_size, + min_batch_timeout, tag, inputs, outputs, diff --git a/src/python/src/pyclient.cpp b/src/python/src/pyclient.cpp index eb9b497d3..f174fa253 100644 --- a/src/python/src/pyclient.cpp +++ b/src/python/src/pyclient.cpp @@ -326,14 +326,15 @@ void PyClient::set_model(const std::string& name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) { MAKE_CLIENT_API({ _client->set_model(name, model, backend, device, - batch_size, min_batch_size, tag, - inputs, outputs); + batch_size, min_batch_size, min_batch_timeout, + tag, inputs, outputs); }); } @@ -344,14 +345,15 @@ void PyClient::set_model_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) { MAKE_CLIENT_API({ _client->set_model_multigpu(name, model, backend, first_gpu, num_gpus, - batch_size, min_batch_size, tag, - inputs, outputs); + batch_size, min_batch_size, min_batch_timeout, + tag, inputs, outputs); }); } @@ -361,14 +363,15 @@ void PyClient::set_model_from_file(const std::string& name, const std::string& device, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) { MAKE_CLIENT_API({ _client->set_model_from_file(name, model_file, backend, device, - batch_size, min_batch_size, tag, - inputs, outputs); + batch_size, min_batch_size, min_batch_timeout, + tag, inputs, outputs); }); } @@ -379,6 +382,7 @@ void PyClient::set_model_from_file_multigpu(const std::string& name, int num_gpus, int batch_size, int min_batch_size, + int min_batch_timeout, const std::string& tag, const std::vector& inputs, const std::vector& outputs) @@ -386,7 +390,7 @@ void PyClient::set_model_from_file_multigpu(const std::string& name, MAKE_CLIENT_API({ _client->set_model_from_file_multigpu( name, model_file, backend, first_gpu, num_gpus, batch_size, - min_batch_size, tag, inputs, outputs); + min_batch_size, min_batch_timeout, tag, inputs, outputs); }); } diff --git a/tests/python/test_errors.py b/tests/python/test_errors.py index bba7fde07..c330dc996 100644 --- a/tests/python/test_errors.py +++ b/tests/python/test_errors.py @@ -444,6 +444,8 @@ def test_bad_type_set_model_from_file_multigpu(use_cluster, context): c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, batch_size="not_an_integer") with pytest.raises(TypeError): c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, min_batch_size="not_an_integer") + with pytest.raises(TypeError): + c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, min_batch_timeout="not_an_integer") with pytest.raises(TypeError): c.set_model_from_file_multigpu("simple_cnn", modelfile, "TORCH", 0, 1, tag=42) diff --git a/tests/python/test_model_methods_torch.py b/tests/python/test_model_methods_torch.py index b1c7b078b..d98c6bed7 100644 --- a/tests/python/test_model_methods_torch.py +++ b/tests/python/test_model_methods_torch.py @@ -27,8 +27,12 @@ import os import torch +import pytest +from os import environ from smartredis import Client +from smartredis.error import * +test_gpu = environ.get("SMARTREDIS_TEST_DEVICE","cpu").lower() == "gpu" def test_set_model(mock_model, use_cluster, context): model = mock_model.create_torch_cnn() @@ -69,3 +73,124 @@ def test_torch_inference(mock_model, use_cluster, context): c.run_model("torch_cnn", inputs=["torch_cnn_input"], outputs=["torch_cnn_output"]) out_data = c.get_tensor("torch_cnn_output") assert out_data.shape == (1, 1, 1, 1) + +def test_batch_exceptions(mock_model, use_cluster, context): + # get model and set into database + mock_model.create_torch_cnn(filepath="./torch_cnn.pt") + model = mock_model.create_torch_cnn() + c = Client(None, use_cluster, logger_name=context) + batch_size = 1 + min_batch_size = 1 + min_batch_timeout = 1 + with pytest.raises(RedisRuntimeError): + c.set_model_from_file( + "file_cnn", "./torch_cnn.pt", "TORCH", "CPU", + batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model_from_file( + "file_cnn", "./torch_cnn.pt", "TORCH", "CPU", + batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0 + ) + with pytest.raises(RedisRuntimeError): + c.set_model_from_file( + "file_cnn", "./torch_cnn.pt", "TORCH", "CPU", + batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model_from_file_multigpu( + "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1, + batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model_from_file_multigpu( + "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1, + batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0 + ) + with pytest.raises(RedisRuntimeError): + c.set_model_from_file_multigpu( + "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1, + batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model( + "file_cnn", model, "TORCH", "CPU", + batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model( + "file_cnn", model, "TORCH", "CPU", + batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0 + ) + with pytest.raises(RedisRuntimeError): + c.set_model( + "file_cnn", model, "TORCH", "CPU", + batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model_multigpu( + "file_cnn", model, "TORCH", 1, 1, + batch_size=0, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + with pytest.raises(RedisRuntimeError): + c.set_model_multigpu( + "file_cnn", model, "TORCH", 1, 1, + batch_size=0, min_batch_size=min_batch_size, min_batch_timeout=0 + ) + with pytest.raises(RedisRuntimeError): + c.set_model_multigpu( + "file_cnn", model, "TORCH", 1, 1, + batch_size=batch_size, min_batch_size=0, min_batch_timeout=min_batch_timeout + ) + +def test_batch_warning_set_model_from_file(mock_model, use_cluster, context, capfd): + # get model and set into database + mock_model.create_torch_cnn(filepath="./torch_cnn.pt") + c = Client(None, use_cluster, logger_name=context) + c.set_model_from_file( + "file_cnn", "./torch_cnn.pt", "TORCH", "CPU", + batch_size=1, min_batch_size=1, min_batch_timeout=0 + ) + captured = capfd.readouterr() + assert "WARNING" in captured.err + +@pytest.mark.skipif( + not test_gpu, + reason="SMARTREDIS_TEST_DEVICE does not specify 'gpu'" +) +def test_batch_warning_set_model_from_file_multigpu(mock_model, use_cluster, context, capfd): + # get model and set into database + mock_model.create_torch_cnn(filepath="./torch_cnn.pt") + c = Client(None, use_cluster, logger_name=context) + c.set_model_from_file_multigpu( + "file_cnn", "./torch_cnn.pt", "TORCH", 1, 1, + batch_size=1, min_batch_size=1, min_batch_timeout=0 + ) + captured = capfd.readouterr() + assert "WARNING" in captured.err + +def test_batch_warning_set_model(mock_model, use_cluster, context, capfd): + # get model and set into database + model = mock_model.create_torch_cnn() + c = Client(None, use_cluster, logger_name=context) + c.set_model( + "file_cnn", model, "TORCH", "CPU", + batch_size=1, min_batch_size=1, min_batch_timeout=0 + ) + captured = capfd.readouterr() + assert "WARNING" in captured.err + +@pytest.mark.skipif( + not test_gpu, + reason="SMARTREDIS_TEST_DEVICE does not specify 'gpu'" +) +def test_batch_warning_set_model_multigpu(mock_model, use_cluster, context, capfd): + # get model and set into database + model = mock_model.create_torch_cnn() + c = Client(None, use_cluster, logger_name=context) + c.set_model_multigpu( + "file_cnn", model, "TORCH", 1, 1, + batch_size=1, min_batch_size=1, min_batch_timeout=0 + ) + captured = capfd.readouterr() + assert "WARNING" in captured.err