Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add executorch parallel #953

Merged
merged 1 commit into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion torchao/experimental/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@ if(NOT TORCHAO_INCLUDE_DIRS)
set(TORCHAO_INCLUDE_DIRS ${TORCHAO_ROOT}/../..)
endif()

if (NOT TORCHAO_OP_TARGET)
message(FATAL_ERROR "TORCHAO_OP_TARGET is not set. Set it to ATEN or EXECUTORCH.")
endif()

if (NOT TORCHAO_PARALLEL_BACKEND)
if (TORCHAO_OP_TARGET STREQUAL "ATEN")
set(TORCHAO_PARALLEL_BACKEND "ATEN_OPENMP")
elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
set(TORCHAO_PARALLEL_BACKEND "PTHREADPOOL")
set(TORCHAO_PARALLEL_BACKEND "EXECUTORCH")
else()
message(TORCHAO_PARALLEL_BACKEND "TORCHAO_PARALLEL_BACKEND is not set. Please set it directly or set TORCHAO_OP_TARGET to get a default.")
endif()
Expand Down
8 changes: 8 additions & 0 deletions torchao/experimental/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_ATEN=1 AT_PARALLEL_OPENMP=1 INTRA_OP_PARALLEL=1)
target_link_libraries(${target_name} PRIVATE ${TORCH_INSTALL_PREFIX}/lib/libomp${CMAKE_SHARED_LIBRARY_SUFFIX})

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "EXECUTORCH")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH")
message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}")
message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}")
target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1)

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "OPENMP")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=OPENMP. You must set the CMake variable OpenMP_ROOT to the OMP library location before compiling. Do not use this option if Torch was built with OPENMP; use ATEN_OPENMP instead.")
find_package(OpenMP REQUIRED)
Expand Down
4 changes: 2 additions & 2 deletions torchao/experimental/build_torchao_ops.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ export CMAKE_OUT=/tmp/cmake-out/torchao
cmake -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT} \
-DTORCHAO_OP_TARGET="$1" \
-DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
-DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
-S . \
-B ${CMAKE_OUT}
cmake --build ${CMAKE_OUT} --target install --config Release
5 changes: 5 additions & 0 deletions torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/valpacking/interleave.cpp
)
endif()

install(
TARGETS torchao_kernels_aarch64
DESTINATION lib
)
5 changes: 5 additions & 0 deletions torchao/experimental/ops/linear/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ include(${TORCHAO_ROOT}/Utils.cmake)

add_library(torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND} STATIC channelwise_8bit_activation_groupwise_lowbit_weight.cpp)
target_link_torchao_parallel_backend(torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND} "${TORCHAO_PARALLEL_BACKEND}")

install(
TARGETS torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND}
DESTINATION lib
)
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ LinearTilingParams get_default_linear_tiling_params(

LinearTilingParams tiling_params;
auto num_threads = torchao::get_num_threads();
assert(num_threads >= 1);
TORCHAO_CHECK(num_threads >= 1, "num_threads must be >= 1");

tiling_params.mc_by_mr = 1;
int mc = tiling_params.mc_by_mr * ukernel_config.mr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if(TORCHAO_OP_TARGET STREQUAL "ATEN")
target_compile_definitions(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE USE_ATEN=1)
elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
message(STATUS "Building with TORCHAO_OP_TARGET=EXECUTORCH")
add_library(linear_a8wxdq_${TORCHAO_OP_TARGET} SHARED
add_library(linear_a8wxdq_${TORCHAO_OP_TARGET} STATIC
linear_a8wxdq_executorch/w2s.cpp
linear_a8wxdq_executorch/w2sz.cpp
linear_a8wxdq_executorch/w3s.cpp
Expand All @@ -29,9 +29,9 @@ elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
linear_a8wxdq_executorch/w5s.cpp
linear_a8wxdq_executorch/w5sz.cpp
)
target_include_directories(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE ${EXECUTORCH_INCLUDE_DIRS})
target_include_directories(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_compile_definitions(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE USE_EXECUTORCH=1)
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE ${EXECUTORCH_LIBRARIES})
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64)
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND})
else()
Expand Down
28 changes: 28 additions & 0 deletions torchao/experimental/ops/parallel-executorch-impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <executorch/extension/threadpool/threadpool.h>

template <typename F>
void torchao::parallel_1d(const int64_t begin, const int64_t end, const F& f) {
torch::executorch::threadpool::get_threadpool()->run(
[&](size_t i) {
int64_t idx = begin + i;
f(idx);
},
end - begin);
}

inline void torchao::set_num_threads(int num_threads) {
torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
num_threads);
}

inline int torchao::get_num_threads() {
return torch::executorch::threadpool::get_threadpool()->get_thread_count();
}
3 changes: 1 addition & 2 deletions torchao/experimental/ops/parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ int get_num_threads();
#ifdef TORCHAO_PARALLEL_EXECUTORCH
#pragma message( \
"TORCHAO_PARALLEL_EXECUTORCH is set. Using ExecuTorch parallel backend.")

#error "TORCHAO_PARALLEL_EXECUTORCH is not implemented yet"
#include <torchao/experimental/ops/parallel-executorch-impl.h>

#else
#ifdef TORCHAO_PARALLEL_PTHREADPOOL
Expand Down
Loading