-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(functions): Add support for REST based remote functions #10911
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,11 +16,19 @@ velox_add_library(velox_functions_remote_thrift_client ThriftClient.cpp) | |
velox_link_libraries(velox_functions_remote_thrift_client | ||
PUBLIC remote_function_thrift FBThrift::thriftcpp2) | ||
|
||
velox_add_library(velox_functions_remote_rest_client RestClient.cpp) | ||
velox_link_libraries(velox_functions_remote_rest_client Folly::folly cpr::cpr) | ||
|
||
velox_add_library(velox_functions_remote Remote.cpp) | ||
velox_link_libraries( | ||
velox_functions_remote | ||
PUBLIC velox_expression | ||
velox_memory | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its a bit strange that these Velox exec dependencies came in because of REST. Can you remove them and check what happens ? |
||
velox_exec | ||
velox_vector | ||
velox_presto_serializer | ||
velox_functions_remote_thrift_client | ||
velox_functions_remote_rest_client | ||
velox_functions_remote_get_serde | ||
velox_type_fbhive | ||
Folly::folly) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,20 +16,27 @@ | |
|
||
#include "velox/functions/remote/client/Remote.h" | ||
|
||
#include <fmt/format.h> | ||
#include <folly/io/async/EventBase.h> | ||
#include <sstream> | ||
#include <string> | ||
|
||
#include "velox/common/memory/ByteStream.h" | ||
aditi-pandit marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#include "velox/expression/Expr.h" | ||
#include "velox/expression/VectorFunction.h" | ||
#include "velox/functions/remote/client/RestClient.h" | ||
#include "velox/functions/remote/client/ThriftClient.h" | ||
#include "velox/functions/remote/if/GetSerde.h" | ||
#include "velox/functions/remote/if/gen-cpp2/RemoteFunctionServiceAsyncClient.h" | ||
#include "velox/serializers/PrestoSerializer.h" | ||
#include "velox/type/fbhive/HiveTypeSerializer.h" | ||
#include "velox/vector/VectorStream.h" | ||
|
||
using namespace folly; | ||
namespace facebook::velox::functions { | ||
namespace { | ||
|
||
std::string serializeType(const TypePtr& type) { | ||
// Use hive type serializer. | ||
return type::fbhive::HiveTypeSerializer::serialize(type); | ||
} | ||
|
||
|
@@ -40,10 +47,17 @@ class RemoteFunction : public exec::VectorFunction { | |
const std::vector<exec::VectorFunctionArg>& inputArgs, | ||
const RemoteVectorFunctionMetadata& metadata) | ||
: functionName_(functionName), | ||
location_(metadata.location), | ||
thriftClient_(getThriftClient(location_, &eventBase_)), | ||
metadata_(metadata), | ||
serdeFormat_(metadata.serdeFormat), | ||
serde_(getSerde(serdeFormat_)) { | ||
serde_(getSerde(serdeFormat_)), | ||
location_(metadata.location) { | ||
if (metadata.location.type() == typeid(SocketAddress)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can have a member variable of enum for RemoteType = {REST, HTTP}, and set it in the constructor and use it in the apply instead of doing the type check each time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the suggestion. This part was added by @wills-feng, and I think using boost::variant ensures type safety while avoiding the need for an additional enum. It keeps the implementation simple and prevents potential desynchronization issues. Let me know your thoughts. |
||
thriftClient_ = | ||
getThriftClient(boost::get<SocketAddress>(location_), &eventBase_); | ||
} else if (metadata.location.type() == typeid(std::string)) { | ||
restClient_ = getRestClient(); | ||
} | ||
|
||
std::vector<TypePtr> types; | ||
types.reserve(inputArgs.size()); | ||
serializedInputTypes_.reserve(inputArgs.size()); | ||
|
@@ -62,7 +76,11 @@ class RemoteFunction : public exec::VectorFunction { | |
exec::EvalCtx& context, | ||
VectorPtr& result) const override { | ||
try { | ||
applyRemote(rows, args, outputType, context, result); | ||
if ((metadata_.location.type() == typeid(SocketAddress))) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There isn't a need to do this typing at this point then. You can write a condition based on whether you have a thriftClient_ or restClient_. |
||
applyRemote(rows, args, outputType, context, result); | ||
} else if (metadata_.location.type() == typeid(std::string)) { | ||
applyRestRemote(rows, args, outputType, context, result); | ||
} | ||
} catch (const VeloxRuntimeError&) { | ||
throw; | ||
} catch (const std::exception&) { | ||
|
@@ -71,6 +89,40 @@ class RemoteFunction : public exec::VectorFunction { | |
} | ||
|
||
private: | ||
void applyRestRemote( | ||
const SelectivityVector& rows, | ||
const std::vector<VectorPtr>& args, | ||
const TypePtr& outputType, | ||
const exec::EvalCtx& context, | ||
VectorPtr& result) const { | ||
try { | ||
serializer::presto::PrestoVectorSerde serde; | ||
auto remoteRowVector = std::make_shared<RowVector>( | ||
context.pool(), | ||
remoteInputType_, | ||
BufferPtr{}, | ||
rows.end(), | ||
std::move(args)); | ||
|
||
std::unique_ptr<IOBuf> requestBody = | ||
std::make_unique<IOBuf>(rowVectorToIOBuf( | ||
remoteRowVector, rows.end(), *context.pool(), &serde)); | ||
|
||
std::unique_ptr<IOBuf> responseBody = | ||
restClient_->invokeFunction(boost::get<std::string>(location_), std::move(requestBody)); | ||
|
||
auto outputRowVector = IOBufToRowVector( | ||
*responseBody, ROW({outputType}), *context.pool(), &serde); | ||
|
||
result = outputRowVector->childAt(0); | ||
} catch (const std::exception& e) { | ||
VELOX_FAIL( | ||
"Error while executing remote function '{}': {}", | ||
functionName_, | ||
e.what()); | ||
} | ||
} | ||
|
||
void applyRemote( | ||
const SelectivityVector& rows, | ||
std::vector<VectorPtr>& args, | ||
|
@@ -109,7 +161,7 @@ class RemoteFunction : public exec::VectorFunction { | |
VELOX_FAIL( | ||
"Error while executing remote function '{}' at '{}': {}", | ||
functionName_, | ||
location_.describe(), | ||
boost::get<SocketAddress>(location_).describe(), | ||
e.what()); | ||
} | ||
|
||
|
@@ -142,14 +194,18 @@ class RemoteFunction : public exec::VectorFunction { | |
} | ||
|
||
const std::string functionName_; | ||
folly::SocketAddress location_; | ||
EventBase eventBase_; | ||
const RemoteVectorFunctionMetadata metadata_; | ||
|
||
folly::EventBase eventBase_; | ||
std::unique_ptr<RemoteFunctionClient> thriftClient_; | ||
remote::PageFormat serdeFormat_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you remove the serdeFormat_ and serde_ member variables ? It was better to construct them once in the constructor and use them in the function apply code, instead of call getSerde each time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted the changes. |
||
std::unique_ptr<VectorSerde> serde_; | ||
|
||
// Structures we construct once to cache: | ||
boost::variant<SocketAddress, std::string> location_; | ||
|
||
// Depending on which active type we have, one of these clients will be used: | ||
std::unique_ptr<RemoteFunctionClient> thriftClient_; | ||
std::unique_ptr<HttpClient> restClient_; | ||
|
||
RowTypePtr remoteInputType_; | ||
std::vector<std::string> serializedInputTypes_; | ||
}; | ||
|
@@ -169,7 +225,7 @@ void registerRemoteFunction( | |
std::vector<exec::FunctionSignaturePtr> signatures, | ||
const RemoteVectorFunctionMetadata& metadata, | ||
bool overwrite) { | ||
exec::registerStatefulVectorFunction( | ||
registerStatefulVectorFunction( | ||
name, | ||
signatures, | ||
std::bind( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "velox/functions/remote/client/RestClient.h" | ||
|
||
#include <cpr/cpr.h> | ||
#include <folly/io/IOBufQueue.h> | ||
|
||
#include "velox/common/base/Exceptions.h" | ||
|
||
using namespace folly; | ||
namespace facebook::velox::functions { | ||
|
||
std::unique_ptr<IOBuf> RestClient::invokeFunction( | ||
const std::string& fullUrl, | ||
std::unique_ptr<IOBuf> requestPayload) { | ||
IOBufQueue inputBufQueue(IOBufQueue::cacheChainLength()); | ||
inputBufQueue.append(std::move(requestPayload)); | ||
|
||
std::string requestBody; | ||
for (auto range : *inputBufQueue.front()) { | ||
requestBody.append( | ||
reinterpret_cast<const char*>(range.data()), range.size()); | ||
} | ||
|
||
cpr::Response response = cpr::Post( | ||
cpr::Url{fullUrl}, | ||
cpr::Header{ | ||
{"Content-Type", "application/X-presto-pages"}, | ||
{"Accept", "application/X-presto-pages"}}, | ||
cpr::Body{requestBody}); | ||
|
||
if (response.error) { | ||
VELOX_FAIL(fmt::format( | ||
"Error communicating with server: {} URL: {}", | ||
response.error.message, | ||
fullUrl)); | ||
} | ||
|
||
auto outputBuf = IOBuf::copyBuffer(response.text); | ||
return outputBuf; | ||
} | ||
|
||
std::unique_ptr<HttpClient> getRestClient() { | ||
return std::make_unique<RestClient>(); | ||
} | ||
|
||
} // namespace facebook::velox::functions |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <folly/io/IOBuf.h> | ||
#include <memory> | ||
#include <string> | ||
|
||
namespace facebook::velox::functions { | ||
|
||
/// @brief Abstract interface for an HTTP client. | ||
/// Provides a method to invoke a function by sending an HTTP request | ||
/// and receiving a response, both in Presto's serialized wire format. | ||
class HttpClient { | ||
aditi-pandit marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I look at this code I feel like this is a RemoteFunctionClientInterface and we can have 2 implementations -- RestRemoteClient and ThriftRemoteClient. Might be good to abstract something like that so that both Thrift and Rest remote functions remain consistent in some way. wdyt ? |
||
public: | ||
virtual ~HttpClient() = default; | ||
|
||
/// @brief Invokes a function over HTTP. | ||
/// @param url The endpoint URL to send the request to. | ||
/// @param requestPayload The request payload in Presto's serialized wire | ||
/// format. | ||
/// @return A unique pointer to the response payload in Presto's serialized | ||
/// wire format. | ||
virtual std::unique_ptr<folly::IOBuf> invokeFunction( | ||
const std::string& url, | ||
std::unique_ptr<folly::IOBuf> requestPayload) = 0; | ||
}; | ||
|
||
/// @brief Concrete implementation of HttpClient using REST. | ||
/// Handles HTTP communication by sending requests and receiving responses | ||
/// using RESTful APIs with payloads in Presto's serialized wire format. | ||
class RestClient : public HttpClient { | ||
public: | ||
/// @brief Invokes a function over HTTP using cpr. | ||
/// Sends an HTTP POST request to the specified URL with the request payload | ||
/// and receives the response payload. Both payloads are in Presto's | ||
/// serialized wire format. | ||
/// @param url The endpoint URL to send the request to. | ||
/// @param requestPayload The request payload in Presto's serialized wire | ||
/// format. | ||
/// @return A unique pointer to the response payload in Presto's serialized | ||
/// wire format. | ||
/// @throws VeloxException if there is an error initializing cpr or during | ||
/// the request. | ||
std::unique_ptr<folly::IOBuf> invokeFunction( | ||
const std::string& url, | ||
std::unique_ptr<folly::IOBuf> requestPayload) override; | ||
}; | ||
|
||
/// @brief Factory function to create an instance of RestClient. | ||
/// @return A unique pointer to an HttpClient implementation. | ||
std::unique_ptr<HttpClient> getRestClient(); | ||
|
||
} // namespace facebook::velox::functions |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#10911 (comment)