From 374ca25990f9c6f9f475e09a6d06949b025c507e Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 9 Jul 2024 16:06:44 +0700 Subject: [PATCH] feat: integrate cortex.audio engine --- cortex-cpp/common/base.h | 6 + cortex-cpp/controllers/server.cc | 213 +++++++++++++++++++++++++---- cortex-cpp/controllers/server.h | 14 +- cortex-cpp/cortex-common/EngineI.h | 9 ++ cortex-cpp/utils/cortex_utils.h | 1 + 5 files changed, 215 insertions(+), 28 deletions(-) diff --git a/cortex-cpp/common/base.h b/cortex-cpp/common/base.h index 382f21b80..c515d42f5 100644 --- a/cortex-cpp/common/base.h +++ b/cortex-cpp/common/base.h @@ -26,6 +26,12 @@ class BaseModel { virtual void FineTuning( const HttpRequestPtr& req, std::function&& callback) = 0; + virtual void CreateTranscription( + const HttpRequestPtr& req, + std::function&& callback) = 0; + virtual void CreateTranslation( + const HttpRequestPtr& req, + std::function&& callback) = 0; }; class BaseChatCompletion { diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 14507317d..997190eba 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -17,6 +17,7 @@ constexpr static auto kLlamaEngine = "cortex.llamacpp"; constexpr static auto kPythonRuntimeEngine = "cortex.python"; constexpr static auto kOnnxEngine = "cortex.onnx"; constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm"; +constexpr static auto kAudioEngine = "cortex.audio"; } // namespace server::server(){ @@ -156,23 +157,33 @@ void server::GetModels(const HttpRequestPtr& req, } LOG_TRACE << "Start to get models"; - auto& en = std::get(engines_[cur_engine_type_].engine); - if (en->IsSupported("GetModels")) { - en->GetModels( - req->getJsonObject(), - [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); - resp->setStatusCode(static_cast( - status["status_code"].asInt())); - cb(resp); - }); - } else { - Json::Value res; - res["message"] = "Method is not supported yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); - resp->setStatusCode(k500InternalServerError); - callback(resp); - LOG_WARN << "Method is not supported yet"; + std::vector e_types = {cur_engine_type_}; + if (cur_engine_type_ == kLlamaEngine) { + e_types.push_back(kAudioEngine); + } else if (cur_engine_type_ == kAudioEngine) { + e_types.push_back(kLlamaEngine); + } + for (auto const& et : e_types) { + if (IsEngineLoaded(et)) { + auto& en = std::get(engines_[et].engine); + if (en->IsSupported("GetModels")) { + en->GetModels( + req->getJsonObject(), + [cb = std::move(callback)](Json::Value status, Json::Value res) { + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(static_cast( + status["status_code"].asInt())); + cb(resp); + }); + } else { + Json::Value res; + res["message"] = "Method is not supported yet"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k500InternalServerError); + callback(resp); + LOG_WARN << "Method is not supported yet"; + } + } } LOG_TRACE << "Done get models"; @@ -257,21 +268,24 @@ void server::LoadModel(const HttpRequestPtr& req, // We have not loaded engine yet, should load it before using it if (engines_.find(engine_type) == engines_.end()) { - // We only use single engine so unload all engines before load new engine - UnloadEngines(); + // We use single engine for llamacpp, onnx, tensorrt so unload all engines before load new engine + // But it is tricky that we can use llamacpp with audio engine + UnloadEngines(engine_type); auto get_engine_path = [](std::string_view e) { if (e == kLlamaEngine) { return cortex_utils::kLlamaLibPath; - } else if(e == kOnnxEngine) { + } else if (e == kOnnxEngine) { return cortex_utils::kOnnxLibPath; - } else if(e == kTensorrtLlmEngine) { + } else if (e == kTensorrtLlmEngine) { return cortex_utils::kTensorrtLlmPath; + } else if (e == kAudioEngine) { + return cortex_utils::kAudioLibPath; } return cortex_utils::kLlamaLibPath; }; try { - if (engine_type == kLlamaEngine) { + if (engine_type == kLlamaEngine || engine_type == kAudioEngine) { cortex::cpuid::CpuInfo cpu_info; LOG_INFO << "CPU instruction set: " << cpu_info.to_string(); } @@ -312,6 +326,96 @@ void server::LoadModel(const HttpRequestPtr& req, LOG_TRACE << "Done load model"; } +void server::CreateTranscription( + const HttpRequestPtr& req, + std::function&& callback) { + if (!IsEngineLoaded(kAudioEngine)) { + Json::Value res; + res["message"] = "Engine is not loaded yet"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "Engine is not loaded yet"; + return; + } + + LOG_TRACE << "Start transcript"; + SyncQueue q; + auto& en = std::get(engines_[kAudioEngine].engine); + if (en->IsSupported("CreateTranscription")) { + auto req_body = ToAudioReqBody(req); + if (req_body == nullptr) { + Json::Value json_resp; + json_resp["message"] = + "No model field found or too many files in request body"; + auto resp = cortex_utils::nitroHttpJsonResponse(json_resp); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + en->CreateTranscription(req_body, + [&q](Json::Value status, Json::Value res) { + q.push(std::make_pair(status, res)); + }); + LOG_TRACE << "Wait to transcript"; + ProcessNonStreamRes(std::move(callback), q); + } else { + Json::Value res; + res["message"] = "Method is not supported yet"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k500InternalServerError); + callback(resp); + LOG_WARN << "Method is not supported yet"; + } + LOG_TRACE << "Done transcript"; +} + +void server::CreateTranslation( + const HttpRequestPtr& req, + std::function&& callback) { + auto engine_type = + (*(req->getJsonObject())).get("engine", kAudioEngine).asString(); + if (!IsEngineLoaded(engine_type)) { + Json::Value res; + res["message"] = "Engine is not loaded yet"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "Engine is not loaded yet"; + return; + } + + LOG_TRACE << "Start translate"; + SyncQueue q; + auto& en = std::get(engines_[engine_type].engine); + if (en->IsSupported("CreateTranscription")) { + auto req_body = ToAudioReqBody(req); + if (req_body == nullptr) { + Json::Value json_resp; + json_resp["message"] = + "No model field found or too many files in request body"; + auto resp = cortex_utils::nitroHttpJsonResponse(json_resp); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + en->CreateTranscription(req_body, + [&q](Json::Value status, Json::Value res) { + q.push(std::make_pair(status, res)); + }); + LOG_TRACE << "Wait to translate"; + ProcessNonStreamRes(std::move(callback), q); + } else { + Json::Value res; + res["message"] = "Method is not supported yet"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k500InternalServerError); + callback(resp); + LOG_WARN << "Method is not supported yet"; + } + LOG_TRACE << "Done translate"; +} + void server::ProcessStreamRes(std::function cb, std::shared_ptr q) { auto err_or_done = std::make_shared(false); @@ -359,14 +463,69 @@ bool server::IsEngineLoaded(const std::string& e) { return engines_.find(e) != engines_.end(); } -void server::UnloadEngines() { - // We unload all engines except python engine +void server::UnloadEngines(const std::string& e) { + // We unload all engines except: + // - python engine + // - llama engine in case we'd like to load audio engine + // - audio engine in case we'd like to load llama engine for (auto it = engines_.begin(); it != engines_.end();) { - if (it->first != kPythonRuntimeEngine) { - it = engines_.erase(it); - } else + if ((it->first == kPythonRuntimeEngine) || + (e == kLlamaEngine && it->first == kAudioEngine) || + (e == kAudioEngine && it->first == kLlamaEngine)) { it++; + } else { + it = engines_.erase(it); + } + } +} + +std::shared_ptr server::ToAudioReqBody(const HttpRequestPtr& req) { + auto req_body = std::make_shared(); + MultiPartParser part_parser; + if (part_parser.parse(req) != 0 || part_parser.getFiles().size() != 1) { + LOG_ERROR << "Must have exactly one file"; + return nullptr; } + + auto& file = part_parser.getFiles()[0]; + const auto& form_fields = part_parser.getParameters(); + + if (form_fields.find("model") == form_fields.end()) { + LOG_ERROR << "No model field found in request body"; + return nullptr; + } + + (*req_body)["model"] = form_fields.at("model"); + // Parse all other optional parameters from the request + (*req_body)["language"] = form_fields.find("language") != form_fields.end() + ? form_fields.at("language") + : "en"; + (*req_body)["prompt"] = form_fields.find("prompt") != form_fields.end() + ? form_fields.at("prompt") + : ""; + (*req_body)["response_format"] = + form_fields.find("response_format") != form_fields.end() + ? form_fields.at("response_format") + : "json"; + (*req_body)["temperature"] = + form_fields.find("temperature") != form_fields.end() + ? std::stof(form_fields.at("temperature")) + : 0; + + // Save input file to temp location + std::string temp_dir = + std::filesystem::temp_directory_path().string() + "/" + + std::to_string(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + // Create the directory + std::filesystem::create_directory(temp_dir); + // Save the file to the directory, with its original name + std::string temp_file_path = temp_dir + "/" + file.getFileName(); + file.saveAs(temp_file_path); + + (*req_body)["file"] = temp_file_path; + return req_body; } } // namespace inferences diff --git a/cortex-cpp/controllers/server.h b/cortex-cpp/controllers/server.h index 87f544f27..ed0d58f8d 100644 --- a/cortex-cpp/controllers/server.h +++ b/cortex-cpp/controllers/server.h @@ -50,6 +50,8 @@ class server : public drogon::HttpController, METHOD_ADD(server::ModelStatus, "modelstatus", Post); METHOD_ADD(server::GetModels, "models", Get); METHOD_ADD(server::GetEngines, "engines", Get); + METHOD_ADD(server::CreateTranscription, "transcriptions", Post); + METHOD_ADD(server::CreateTranslation, "translations", Post); // cortex.python API METHOD_ADD(server::FineTuning, "finetuning", Post); @@ -58,6 +60,8 @@ class server : public drogon::HttpController, ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post); ADD_METHOD_TO(server::GetModels, "/v1/models", Get); ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post); + ADD_METHOD_TO(server::CreateTranscription, "/v1/audio/transcriptions", Post); + ADD_METHOD_TO(server::CreateTranslation, "/v1/audio/translations", Post); // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options); // NOTE: prelight will be added back when browser support is properly planned @@ -91,6 +95,12 @@ class server : public drogon::HttpController, void FineTuning( const HttpRequestPtr& req, std::function&& callback) override; + void CreateTranscription( + const HttpRequestPtr& req, + std::function&& callback) override; + void CreateTranslation( + const HttpRequestPtr& req, + std::function&& callback) override; private: void ProcessStreamRes(std::function cb, @@ -99,7 +109,9 @@ class server : public drogon::HttpController, SyncQueue& q); bool IsEngineLoaded(const std::string& e); - void UnloadEngines(); + void UnloadEngines(const std::string& e); + + std::shared_ptr ToAudioReqBody(const HttpRequestPtr& req); private: struct SyncQueue { diff --git a/cortex-cpp/cortex-common/EngineI.h b/cortex-cpp/cortex-common/EngineI.h index c5dcc8afe..c8e11bc03 100644 --- a/cortex-cpp/cortex-common/EngineI.h +++ b/cortex-cpp/cortex-common/EngineI.h @@ -34,4 +34,13 @@ class EngineI { virtual void GetModels( std::shared_ptr jsonBody, std::function&& callback) = 0; + + // cortex.audio interface + virtual void CreateTranscription( + std::shared_ptr jsonBody, + std::function&& callback) = 0; + + virtual void CreateTranslation( + std::shared_ptr jsonBody, + std::function&& callback) = 0; }; diff --git a/cortex-cpp/utils/cortex_utils.h b/cortex-cpp/utils/cortex_utils.h index c4319ca57..fba761b9e 100644 --- a/cortex-cpp/utils/cortex_utils.h +++ b/cortex-cpp/utils/cortex_utils.h @@ -29,6 +29,7 @@ constexpr static auto kLlamaLibPath = "/engines/cortex.llamacpp"; constexpr static auto kPythonRuntimeLibPath = "/engines/cortex.python"; constexpr static auto kOnnxLibPath = "/engines/cortex.onnx"; constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; +constexpr static auto kAudioLibPath = "/engines/cortex.audio"; inline std::string models_folder = "./models";