Skip to content

Commit

Permalink
feat(functions): Add support for REST based remote functions
Browse files Browse the repository at this point in the history
Co-authored-by: Wills Feng <[email protected]>
  • Loading branch information
Joe-Abraham and wills-feng committed Feb 5, 2025
1 parent ddc20e6 commit 7cdcaae
Show file tree
Hide file tree
Showing 14 changed files with 951 additions and 62 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,9 @@ if(VELOX_ENABLE_REMOTE_FUNCTIONS)
find_package(fizz CONFIG REQUIRED)
find_package(wangle CONFIG REQUIRED)
find_package(FBThrift CONFIG REQUIRED)
set(cpr_SOURCE BUNDLED)
velox_resolve_dependency(cpr)
FetchContent_MakeAvailable(cpr)
endif()

if(VELOX_ENABLE_GCS)
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/remote/client/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,19 @@ velox_add_library(velox_functions_remote_thrift_client ThriftClient.cpp)
velox_link_libraries(velox_functions_remote_thrift_client
PUBLIC remote_function_thrift FBThrift::thriftcpp2)

velox_add_library(velox_functions_remote_rest_client RestClient.cpp)
velox_link_libraries(velox_functions_remote_rest_client Folly::folly cpr::cpr)

velox_add_library(velox_functions_remote Remote.cpp)
velox_link_libraries(
velox_functions_remote
PUBLIC velox_expression
velox_memory
velox_exec
velox_vector
velox_presto_serializer
velox_functions_remote_thrift_client
velox_functions_remote_rest_client
velox_functions_remote_get_serde
velox_type_fbhive
Folly::folly)
Expand Down
111 changes: 100 additions & 11 deletions velox/functions/remote/client/Remote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,71 @@

#include "velox/functions/remote/client/Remote.h"

#include <fmt/format.h>
#include <folly/io/async/EventBase.h>
#include <sstream>
#include <string>

#include "velox/common/memory/ByteStream.h"
#include "velox/expression/Expr.h"
#include "velox/expression/VectorFunction.h"
#include "velox/functions/remote/client/RestClient.h"
#include "velox/functions/remote/client/ThriftClient.h"
#include "velox/functions/remote/if/GetSerde.h"
#include "velox/functions/remote/if/gen-cpp2/RemoteFunctionServiceAsyncClient.h"
#include "velox/serializers/PrestoSerializer.h"
#include "velox/type/fbhive/HiveTypeSerializer.h"
#include "velox/vector/VectorStream.h"

using namespace folly;
namespace facebook::velox::functions {
namespace {

std::string serializeType(const TypePtr& type) {
// Use hive type serializer.
return type::fbhive::HiveTypeSerializer::serialize(type);
}

std::string extractFunctionName(const std::string& input) {
size_t lastDot = input.find_last_of('.');
if (lastDot != std::string::npos) {
return input.substr(lastDot + 1);
}
return input;
}

std::string urlEncode(const std::string& value) {
std::ostringstream escaped;
escaped.fill('0');
escaped << std::hex;
for (char c : value) {
if (isalnum(static_cast<unsigned char>(c)) || c == '-' || c == '_' ||
c == '.' || c == '~') {
escaped << c;
} else {
escaped << '%' << std::setw(2) << int(static_cast<unsigned char>(c));
}
}
return escaped.str();
}

class RemoteFunction : public exec::VectorFunction {
public:
RemoteFunction(
const std::string& functionName,
const std::vector<exec::VectorFunctionArg>& inputArgs,
const RemoteVectorFunctionMetadata& metadata)
: functionName_(functionName),
location_(metadata.location),
thriftClient_(getThriftClient(location_, &eventBase_)),
metadata_(metadata),
serdeFormat_(metadata.serdeFormat),
serde_(getSerde(serdeFormat_)) {
serde_(getSerde(serdeFormat_)),
location_(metadata.location) {
if (metadata.location.type() == typeid(SocketAddress)) {
thriftClient_ =
getThriftClient(boost::get<SocketAddress>(location_), &eventBase_);
} else if (metadata.location.type() == typeid(std::string)) {
restClient_ = getRestClient();
}

std::vector<TypePtr> types;
types.reserve(inputArgs.size());
serializedInputTypes_.reserve(inputArgs.size());
Expand All @@ -62,7 +99,11 @@ class RemoteFunction : public exec::VectorFunction {
exec::EvalCtx& context,
VectorPtr& result) const override {
try {
applyRemote(rows, args, outputType, context, result);
if ((metadata_.location.type() == typeid(SocketAddress))) {
applyRemote(rows, args, outputType, context, result);
} else if (metadata_.location.type() == typeid(std::string)) {
applyRestRemote(rows, args, outputType, context, result);
}
} catch (const VeloxRuntimeError&) {
throw;
} catch (const std::exception&) {
Expand All @@ -71,6 +112,50 @@ class RemoteFunction : public exec::VectorFunction {
}

private:
void applyRestRemote(
const SelectivityVector& rows,
const std::vector<VectorPtr>& args,
const TypePtr& outputType,
const exec::EvalCtx& context,
VectorPtr& result) const {
try {
serializer::presto::PrestoVectorSerde serde;
auto remoteRowVector = std::make_shared<RowVector>(
context.pool(),
remoteInputType_,
BufferPtr{},
rows.end(),
std::move(args));

std::unique_ptr<IOBuf> requestBody =
std::make_unique<IOBuf>(rowVectorToIOBuf(
remoteRowVector, rows.end(), *context.pool(), &serde));

// Because location_ is a variant, we must get the string:
const auto& url = boost::get<std::string>(location_);
const std::string fullUrl = fmt::format(
"{}/v1/functions/{}/{}/{}/{}",
url,
metadata_.schema.value_or("default"),
extractFunctionName(functionName_),
urlEncode(metadata_.functionId.value_or("default_function_id")),
metadata_.version.value_or("1"));

std::unique_ptr<IOBuf> responseBody =
restClient_->invokeFunction(fullUrl, std::move(requestBody));

auto outputRowVector = IOBufToRowVector(
*responseBody, ROW({outputType}), *context.pool(), &serde);

result = outputRowVector->childAt(0);
} catch (const std::exception& e) {
VELOX_FAIL(
"Error while executing remote function '{}': {}",
functionName_,
e.what());
}
}

void applyRemote(
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
Expand Down Expand Up @@ -109,7 +194,7 @@ class RemoteFunction : public exec::VectorFunction {
VELOX_FAIL(
"Error while executing remote function '{}' at '{}': {}",
functionName_,
location_.describe(),
boost::get<SocketAddress>(location_).describe(),
e.what());
}

Expand Down Expand Up @@ -142,14 +227,18 @@ class RemoteFunction : public exec::VectorFunction {
}

const std::string functionName_;
folly::SocketAddress location_;
EventBase eventBase_;
const RemoteVectorFunctionMetadata metadata_;

folly::EventBase eventBase_;
std::unique_ptr<RemoteFunctionClient> thriftClient_;
remote::PageFormat serdeFormat_;
std::unique_ptr<VectorSerde> serde_;

// Structures we construct once to cache:
boost::variant<SocketAddress, std::string> location_;

// Depending on which active type we have, one of these clients will be used:
std::unique_ptr<RemoteFunctionClient> thriftClient_;
std::unique_ptr<HttpClient> restClient_;

RowTypePtr remoteInputType_;
std::vector<std::string> serializedInputTypes_;
};
Expand All @@ -169,7 +258,7 @@ void registerRemoteFunction(
std::vector<exec::FunctionSignaturePtr> signatures,
const RemoteVectorFunctionMetadata& metadata,
bool overwrite) {
exec::registerStatefulVectorFunction(
registerStatefulVectorFunction(
name,
signatures,
std::bind(
Expand Down
25 changes: 21 additions & 4 deletions velox/functions/remote/client/Remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,37 @@

#pragma once

#include <boost/variant.hpp>
#include <folly/SocketAddress.h>
#include "velox/expression/VectorFunction.h"
#include "velox/functions/remote/if/gen-cpp2/RemoteFunction_types.h"

namespace facebook::velox::functions {

struct RemoteVectorFunctionMetadata : public exec::VectorFunctionMetadata {
/// Network address of the servr to communicate with. Note that this can hold
/// a network location (ip/port pair) or a unix domain socket path (see
/// URL of the HTTP/REST server for remote function.
/// Or Network address of the server to communicate with. Note that this can
/// hold a network location (ip/port pair) or a unix domain socket path (see
/// SocketAddress::makeFromPath()).
folly::SocketAddress location;
boost::variant<folly::SocketAddress, std::string> location;

/// The serialization format to be used
/// The serialization format to be used when sending data to the remote.
remote::PageFormat serdeFormat{remote::PageFormat::PRESTO_PAGE};

/// Optional schema defining the structure of the data or input/output types
/// involved in the remote function. This may include details such as column
/// names and data types.
std::optional<std::string> schema;

/// Optional identifier for the specific remote function to be invoked.
/// This can be useful when the same server hosts multiple functions,
/// and the client needs to specify which function to call.
std::optional<std::string> functionId;

/// Optional version information to be used when calling the remote function.
/// This can help in ensuring compatibility with a particular version of the
/// function if multiple versions are available on the server.
std::optional<std::string> version;
};

/// Registers a new remote function. It will use the meatadata defined in
Expand Down
61 changes: 61 additions & 0 deletions velox/functions/remote/client/RestClient.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/remote/client/RestClient.h"

#include <cpr/cpr.h>
#include <folly/io/IOBufQueue.h>

#include "velox/common/base/Exceptions.h"

using namespace folly;
namespace facebook::velox::functions {

std::unique_ptr<IOBuf> RestClient::invokeFunction(
const std::string& fullUrl,
std::unique_ptr<IOBuf> requestPayload) {
IOBufQueue inputBufQueue(IOBufQueue::cacheChainLength());
inputBufQueue.append(std::move(requestPayload));

std::string requestBody;
for (auto range : *inputBufQueue.front()) {
requestBody.append(
reinterpret_cast<const char*>(range.data()), range.size());
}

cpr::Response response = cpr::Post(
cpr::Url{fullUrl},
cpr::Header{
{"Content-Type", "application/X-presto-pages"},
{"Accept", "application/X-presto-pages"}},
cpr::Body{requestBody});

if (response.error) {
VELOX_FAIL(fmt::format(
"Error communicating with server: {} URL: {}",
response.error.message,
fullUrl));
}

auto outputBuf = IOBuf::copyBuffer(response.text);
return outputBuf;
}

std::unique_ptr<HttpClient> getRestClient() {
return std::make_unique<RestClient>();
}

} // namespace facebook::velox::functions
68 changes: 68 additions & 0 deletions velox/functions/remote/client/RestClient.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/io/IOBuf.h>
#include <memory>
#include <string>

namespace facebook::velox::functions {

/// @brief Abstract interface for an HTTP client.
/// Provides a method to invoke a function by sending an HTTP request
/// and receiving a response, both in Presto's serialized wire format.
class HttpClient {
public:
virtual ~HttpClient() = default;

/// @brief Invokes a function over HTTP.
/// @param url The endpoint URL to send the request to.
/// @param requestPayload The request payload in Presto's serialized wire
/// format.
/// @return A unique pointer to the response payload in Presto's serialized
/// wire format.
virtual std::unique_ptr<folly::IOBuf> invokeFunction(
const std::string& url,
std::unique_ptr<folly::IOBuf> requestPayload) = 0;
};

/// @brief Concrete implementation of HttpClient using REST.
/// Handles HTTP communication by sending requests and receiving responses
/// using RESTful APIs with payloads in Presto's serialized wire format.
class RestClient : public HttpClient {
public:
/// @brief Invokes a function over HTTP using cpr.
/// Sends an HTTP POST request to the specified URL with the request payload
/// and receives the response payload. Both payloads are in Presto's
/// serialized wire format.
/// @param url The endpoint URL to send the request to.
/// @param requestPayload The request payload in Presto's serialized wire
/// format.
/// @return A unique pointer to the response payload in Presto's serialized
/// wire format.
/// @throws VeloxException if there is an error initializing cpr or during
/// the request.
std::unique_ptr<folly::IOBuf> invokeFunction(
const std::string& url,
std::unique_ptr<folly::IOBuf> requestPayload) override;
};

/// @brief Factory function to create an instance of RestClient.
/// @return A unique pointer to an HttpClient implementation.
std::unique_ptr<HttpClient> getRestClient();

} // namespace facebook::velox::functions
Loading

0 comments on commit 7cdcaae

Please sign in to comment.