diff --git a/BUILD.md b/BUILD.md index 2cff5ddfe4ff2..febf8ddb73bff 100644 --- a/BUILD.md +++ b/BUILD.md @@ -84,6 +84,7 @@ For other system requirements and other dependencies, please see [this section]( |**Build Shared Library**|--build_shared_lib|| |**Build Python wheel**|--build_wheel|| |**Build C# and C packages**|--build_csharp|| +|**Build WindowsML**|--use_winml
--use_dml
--build_shared_lib|WindowsML depends on DirectML and the OnnxRuntime shared library.| |**Build Java package**|--build_java|Creates an onnxruntime4j.jar in the build directory, implies `--build_shared_lib`| diff --git a/cgmanifest.json b/cgmanifest.json index 707d663688d3f..f327e07a08297 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -390,7 +390,7 @@ "type": "git" } }, - { + { "component": { "git": { "commitHash": "e8c599bca6c56c44b6730ad93f6abbc9ecd60fc1", @@ -399,8 +399,8 @@ "type": "git" } }, - { - "component":{ + { + "component":{ "type": "other", "Other": { "Name": "Go", diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index f8002c004e64a..9b80c1c433321 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -90,6 +90,7 @@ option(tensorflow_C_PACKAGE_PATH "Path to tensorflow C package installation dir" option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF) option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump node input shapes and output data to standard output when executing the model." OFF) option(onnxruntime_USE_DML "Build with DirectML support" OFF) +option(onnxruntime_USE_WINML "Build with WinML support" OFF) option(onnxruntime_USE_ACL "Build with ACL support" OFF) option(onnxruntime_ENABLE_INSTRUMENT "Enable Instrument with Event Tracing for Windows (ETW)" OFF) option(onnxruntime_USE_TELEMETRY "Build with Telemetry" OFF) @@ -210,10 +211,14 @@ if (MSVC) SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL") SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL") endif() - check_cxx_compiler_flag(-Qspectre HAS_QSPECTRE) - if (HAS_QSPECTRE) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qspectre") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre") + # The WinML build tool chain builds ARM/ARM64, and the internal tool chain does not have folders for spectre mitigation libs. + # WinML performs spectre mitigation differently. + if (NOT DEFINED onnxruntime_DISABLE_QSPECTRE_CHECK) + check_cxx_compiler_flag(-Qspectre HAS_QSPECTRE) + if (HAS_QSPECTRE) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qspectre") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre") + endif() endif() SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DYNAMICBASE") check_cxx_compiler_flag(-guard:cf HAS_GUARD_CF) @@ -547,9 +552,12 @@ if (WIN32) # set linker flags to minimize the binary size. if (MSVC) - foreach(type EXE SHARED) - set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /OPT:REF,ICF,LBR") - set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /INCREMENTAL:NO") + foreach(type EXE STATIC SHARED) + if (NOT type MATCHES STATIC) + set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /OPT:REF,ICF,LBR") + set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /INCREMENTAL:NO") + #TODO: the "/LTCG" switch should be controlled by onnxruntime_ENABLE_LTO + endif() if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") @@ -791,7 +799,7 @@ foreach(target_name onnxruntime_common onnxruntime_graph onnxruntime_framework o endforeach() foreach(provider_name ${ONNXRUNTIME_PROVIDER_NAMES}) - if(NOT provider_name STREQUAL "cpu") + if(NOT provider_name STREQUAL "cpu" AND NOT provider_name STREQUAL "winml") if (MSVC) target_compile_options(onnxruntime_providers_${provider_name} PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$>:/utf-8>") target_compile_options(onnxruntime_providers_${provider_name} PRIVATE "$<$:SHELL:--compiler-options /sdl>" "$<$>:/sdl>") @@ -817,6 +825,18 @@ endif() +if (onnxruntime_USE_WINML) + # WINML uses and depends on the shared lib. Note: You can build WINML without DML and you will get a + # CPU only WINML + if (NOT onnxruntime_BUILD_SHARED_LIB) + message( + FATAL_ERROR + "Option onnxruntime_USE_WINML can only be used when onnxruntime_BUILD_SHARED_LIB is also enabled") + endif() + include(wil.cmake) + include(winml.cmake) +endif() # if(onnxruntime_USE_WINML) + #The following files may use the 'onnxruntime_libs' and 'onnxruntime_EXTERNAL_LIBRARIES' vars if (onnxruntime_BUILD_SHARED_LIB) diff --git a/cmake/external/dml.cmake b/cmake/external/dml.cmake index 99e4fa0404859..421b862a1a908 100644 --- a/cmake/external/dml.cmake +++ b/cmake/external/dml.cmake @@ -19,21 +19,19 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML) set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config) set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config) - set(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages) + get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE) + set(DML_PACKAGE_DIR ${PACKAGES_DIR}/DirectML.0.0.1) # Restore nuget packages, which will pull down the DirectML redist package add_custom_command( - OUTPUT restore_packages.stamp + OUTPUT ${DML_PACKAGE_DIR}/bin/x64/DirectML.lib ${DML_PACKAGE_DIR}/bin/x86/DirectML.lib DEPENDS ${PACKAGES_CONFIG} ${NUGET_CONFIG} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/nuget/src/nuget restore ${PACKAGES_CONFIG} -PackagesDirectory ${PACKAGES_DIR} -ConfigFile ${NUGET_CONFIG} - COMMAND ${CMAKE_COMMAND} -E touch restore_packages.stamp VERBATIM) - add_custom_target(RESTORE_PACKAGES ALL DEPENDS restore_packages.stamp) + include_directories(BEFORE "${DML_PACKAGE_DIR}/include") + add_custom_target(RESTORE_PACKAGES ALL DEPENDS ${DML_PACKAGE_DIR}/bin/x64/DirectML.lib ${DML_PACKAGE_DIR}/bin/x86/DirectML.lib) add_dependencies(RESTORE_PACKAGES nuget) - - list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES RESTORE_PACKAGES) else() include_directories(${dml_INCLUDE_DIR}) - link_directories(${dml_LIB_DIR}) endif() diff --git a/cmake/onnx/CMakeLists.txt b/cmake/onnx/CMakeLists.txt index ca90a7faf7ab4..79177911da1f2 100644 --- a/cmake/onnx/CMakeLists.txt +++ b/cmake/onnx/CMakeLists.txt @@ -8,7 +8,7 @@ target_include_directories(onnx_proto PUBLIC $) onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${ONNXRUNTIME_ROOT}/core/protobuf TARGET onnx_proto) if (WIN32) - target_compile_options(onnx_proto PRIVATE "/wd4146" "/wd4125" "/wd4456" "/wd4267") + target_compile_options(onnx_proto PRIVATE "/wd4146" "/wd4125" "/wd4456" "/wd4267" "/wd4309") else() if(HAS_UNUSED_VARIABLE) target_compile_options(onnx_proto PRIVATE "-Wno-unused-variable") @@ -53,6 +53,7 @@ if (WIN32) /wd4100 # 'param' : unreferenced formal parameter /wd4244 # 'argument' conversion from 'google::protobuf::int64' to 'int', possible loss of data /EHsc # exception handling - C++ may throw, extern "C" will not + /wd4996 # 'argument' Using double parameter version instead of single parameter version of SetTotalBytesLimit(). The second parameter is ignored. ) set(onnx_static_library_flags -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index dfb63dcf32238..6f0e9b0f177d3 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -70,6 +70,7 @@ target_link_libraries(onnxruntime PRIVATE ${PROVIDERS_NUPHAR} ${PROVIDERS_DML} ${PROVIDERS_ACL} + ${onnxruntime_winml} onnxruntime_optimizer onnxruntime_providers onnxruntime_util diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 63a8283429dce..52aa2f3989d9d 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -44,6 +44,22 @@ else() endif() endif() +if(CMAKE_GENERATOR_PLATFORM) + # Multi-platform generator + set(onnxruntime_target_platform ${CMAKE_GENERATOR_PLATFORM}) +else() + set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR}) +endif() +if(onnxruntime_target_platform STREQUAL "ARM64") + set(onnxruntime_target_platform "ARM64") +elseif(onnxruntime_target_platform STREQUAL "ARM" OR CMAKE_GENERATOR MATCHES "ARM") + set(onnxruntime_target_platform "ARM") +elseif(onnxruntime_target_platform STREQUAL "x64" OR onnxruntime_target_platform STREQUAL "x86_64" OR onnxruntime_target_platform STREQUAL "AMD64" OR CMAKE_GENERATOR MATCHES "Win64") + set(onnxruntime_target_platform "x64") +elseif(onnxruntime_target_platform STREQUAL "x86" OR onnxruntime_target_platform STREQUAL "i386" OR onnxruntime_target_platform STREQUAL "i686") + set(onnxruntime_target_platform "x86") +endif() + file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS ${onnxruntime_common_src_patterns} ) diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index e7b4213bc6cbb..38ec173dfc0b1 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -19,7 +19,7 @@ set(mlas_common_srcs ) if(MSVC) - if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64") + if(onnxruntime_target_platform STREQUAL "ARM64") set(asm_filename ${ONNXRUNTIME_ROOT}/core/mlas/lib/arm64/SgemmKernelNeon.asm) set(pre_filename ${CMAKE_CURRENT_BINARY_DIR}/SgemmKernelNeon.i) set(obj_filename ${CMAKE_CURRENT_BINARY_DIR}/SgemmKernelNeon.obj) @@ -38,11 +38,11 @@ if(MSVC) armasm64.exe ${ARMASM_FLAGS} ${pre_filename} ${obj_filename} ) set(mlas_platform_srcs ${obj_filename}) - elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM" OR CMAKE_GENERATOR MATCHES "ARM") + elseif(onnxruntime_target_platform STREQUAL "ARM") set(mlas_platform_srcs ${ONNXRUNTIME_ROOT}/core/mlas/lib/arm/sgemmc.cpp ) - elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR CMAKE_GENERATOR MATCHES "Win64") + elseif(onnxruntime_target_platform STREQUAL "x64") enable_language(ASM_MASM) set(mlas_platform_srcs diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 6c42e78b5b94d..16bca1a9266c2 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -69,6 +69,10 @@ if(onnxruntime_USE_DML) set(PROVIDERS_DML onnxruntime_providers_dml) list(APPEND ONNXRUNTIME_PROVIDER_NAMES dml) endif() +if(onnxruntime_USE_WINML) + set(PROVIDERS_WINML onnxruntime_providers_winml) + list(APPEND ONNXRUNTIME_PROVIDER_NAMES winml) +endif() if(onnxruntime_USE_ACL) set(PROVIDERS_ACL onnxruntime_providers_acl) list(APPEND ONNXRUNTIME_PROVIDER_NAMES acl) @@ -215,7 +219,7 @@ if (onnxruntime_USE_TENSORRT) if ( CMAKE_COMPILER_IS_GNUCC ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers") endif() - set(CXX_VERSION_DEFINED TRUE) + set(CXX_VERSION_DEFINED TRUE) add_subdirectory(${ONNXRUNTIME_ROOT}/../cmake/external/onnx-tensorrt) set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS}) if (WIN32) @@ -301,7 +305,7 @@ if (onnxruntime_USE_OPENVINO) if(WIN32) set(OPENVINO_LIB_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/lib/intel64/Release) set(OPENVINO_TBB_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/lib/intel64/Release) - set(OPENVINO_MKL_TINY_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/bin/intel64/Release) + set(OPENVINO_MKL_TINY_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/bin/intel64/Release) else() set(OPENVINO_LIB_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/lib/intel64/) set(OPENVINO_TBB_DIR $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/external/tbb/lib) @@ -325,9 +329,9 @@ if (onnxruntime_USE_OPENVINO) else() target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${OPENVINO_INCLUDE_DIR} ${OPENVINO_EXTENSIONS_DIR} ${OPENVINO_LIB_DIR} ${OPENVINO_TBB_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS}) endif() - - if (WIN32) - string(REPLACE "include" "libs" PYTHON_LIB ${PYTHON_INCLUDE_DIRS}) + + if (WIN32) + string(REPLACE "include" "libs" PYTHON_LIB ${PYTHON_INCLUDE_DIRS}) find_package(InferenceEngine 2.1 REQUIRED) set(PYTHON_LIBRARIES ${PYTHON_LIB}) set(OPENVINO_CPU_EXTENSION_DIR ${onnxruntime_BINARY_DIR}/ie_cpu_extension/${CMAKE_BUILD_TYPE}) @@ -428,21 +432,41 @@ if (onnxruntime_USE_DML) onnxruntime_add_include_to_target(onnxruntime_providers_dml onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf) add_dependencies(onnxruntime_providers_dml ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_providers_dml PRIVATE ${ONNXRUNTIME_ROOT} ${ONNXRUNTIME_ROOT}/../cmake/external/wil/include) - - target_link_libraries(onnxruntime_providers_dml ${CMAKE_CURRENT_BINARY_DIR}/packages/DirectML.0.0.1/build/DirectML.targets) - target_link_libraries(onnxruntime_providers_dml d3d12.lib dxgi.lib) + + if (NOT onnxruntime_USE_CUSTOM_DIRECTML) + if(NOT onnxruntime_target_platform STREQUAL "x86" AND NOT onnxruntime_target_platform STREQUAL "x64") + message(FATAL_ERROR "Target platform ${onnxruntime_target_platform} is not supported by DML") + endif() + foreach(file "DirectML.dll" "DirectML.pdb" "DirectML.Debug.dll" "DirectML.Debug.pdb") + add_custom_command(TARGET onnxruntime_providers_dml + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}/${file}" $) + endforeach() + endif() + + function(target_add_dml target) + if (NOT onnxruntime_USE_CUSTOM_DIRECTML) + target_link_libraries(${target} PRIVATE "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}/DirectML.lib") + add_dependencies(${target} RESTORE_PACKAGES) + endif() + endfunction() + + target_add_dml(onnxruntime_providers_dml) + target_link_libraries(onnxruntime_providers_dml PRIVATE d3d12.lib dxgi.lib delayimp.lib) set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll") + # The DML EP requires C++17 set_target_properties(onnxruntime_providers_dml PROPERTIES CXX_STANDARD 17) set_target_properties(onnxruntime_providers_dml PROPERTIES CXX_STANDARD_REQUIRED ON) - + target_compile_definitions(onnxruntime_providers_dml PRIVATE ONNX_NAMESPACE=onnx ONNX_ML LOTUS_LOG_THRESHOLD=2 LOTUS_ENABLE_STDERR_LOGGING PLATFORM_WINDOWS) target_compile_definitions(onnxruntime_providers_dml PRIVATE UNICODE _UNICODE NOMINMAX) if (MSVC) target_compile_definitions(onnxruntime_providers_dml PRIVATE _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING) target_compile_options(onnxruntime_providers_dml PRIVATE "/W3") endif() - + install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dml DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers) set_target_properties(onnxruntime_providers_dml PROPERTIES LINKER_LANGUAGE CXX) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index f6814a34d3b9d..1592f26be2c37 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -340,6 +340,9 @@ onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_framework G if (onnxruntime_USE_DNNL) target_compile_definitions(onnxruntime_test_utils PUBLIC USE_DNNL=1) endif() +if (onnxruntime_USE_DML) + target_add_dml(onnxruntime_test_utils) +endif() add_dependencies(onnxruntime_test_utils ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT}) set_target_properties(onnxruntime_test_utils PROPERTIES FOLDER "ONNXRuntimeTest") diff --git a/cmake/precompiled_header.cmake b/cmake/precompiled_header.cmake new file mode 100644 index 0000000000000..dbdeb2bb508aa --- /dev/null +++ b/cmake/precompiled_header.cmake @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# Configures sources on a target to use a precompiled header. This function takes a target and +# header name as input. The function will generate a .cpp file that includes the header and is used +# to generate the precompiled header; this source file is added to the target's sources. +function(target_precompiled_header target_name header_name) + if (MSVC AND CMAKE_VS_PLATFORM_TOOLSET) + # The input precompiled header source (i.e. the '.h' file used for the precompiled header). + set(pch_header_path ${header_name}) + get_filename_component(header_base_name ${header_name} NAME_WE) + + # Generate the source file that builds the precompiled header. The generated file will have + # the same base name as the input header name, but has the .cpp extension. + set(pch_source_path ${CMAKE_CURRENT_BINARY_DIR}/${header_base_name}.cpp) + set(pch_source_content "// THIS FILE IS GENERATED BY CMAKE\n#include \"${pch_header_path}\"") + file(WRITE ${pch_source_path} ${pch_source_content}) + set_source_files_properties(${pch_source_path} PROPERTIES COMPILE_FLAGS "/Yc${pch_header_path}") + + # The target's C++ sources use the precompiled header (/Yu). Source-level properties will + # take precedence over target-level properties, so this will not change the generated source + # file's property to create the precompiled header (/Yc). + target_compile_options(${target_name} PRIVATE $<$:/Yu${header_name}>) + + # Append generated precompiled source to target's sources. + target_sources(${target_name} PRIVATE ${pch_source_path}) + + endif() +endfunction() diff --git a/cmake/wil.cmake b/cmake/wil.cmake new file mode 100644 index 0000000000000..36a8bc9d3cd18 --- /dev/null +++ b/cmake/wil.cmake @@ -0,0 +1,5 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +add_library(wil INTERFACE) +target_include_directories(wil INTERFACE external/wil/include/) \ No newline at end of file diff --git a/cmake/winml.cmake b/cmake/winml.cmake new file mode 100644 index 0000000000000..d3814c59492a1 --- /dev/null +++ b/cmake/winml.cmake @@ -0,0 +1,629 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +include(precompiled_header.cmake) +include(winml_sdk_helpers.cmake) +include(winml_cppwinrt.cmake) + +# get the current nuget sdk kit directory +get_sdk(sdk_folder sdk_version) +set(target_folder ONNXRuntime/winml) +set(winml_adapter_dir ${REPO_ROOT}/winml/adapter) +set(winml_api_root ${REPO_ROOT}/winml/api) +set(winml_dll_dir ${REPO_ROOT}/winml/dll) +set(winml_lib_dir ${REPO_ROOT}/winml/lib) +set(winml_lib_api_dir ${REPO_ROOT}/winml/lib/api) +set(winml_lib_api_image_dir ${REPO_ROOT}/winml/lib/api.image) +set(winml_lib_api_ort_dir ${REPO_ROOT}/winml/lib/api.ort) +set(winml_lib_common_dir ${REPO_ROOT}/winml/lib/common) +set(winml_lib_telemetry_dir ${REPO_ROOT}/winml/lib/telemetry) + +# Version parts for Windows.AI.MachineLearning.dll. +set(WINML_VERSION_MAJOR_PART 0 CACHE STRING "First part of numeric file/product version.") +set(WINML_VERSION_MINOR_PART 0 CACHE STRING "Second part of numeric file/product version.") +set(WINML_VERSION_BUILD_PART 0 CACHE STRING "Third part of numeric file/product version.") +set(WINML_VERSION_PRIVATE_PART 0 CACHE STRING "Fourth part of numeric file/product version.") +set(WINML_VERSION_STRING "Internal Build" CACHE STRING "String representation of file/product version.") + +get_filename_component(exclusions "${winml_api_root}/exclusions.txt" ABSOLUTE) +convert_forward_slashes_to_back(${exclusions} CPPWINRT_COMPONENT_EXCLUSION_LIST) + +# For winrt idl files: +# 1) the file name must match the casing of the file on disk. +# 2) for winrt idls the casing must match the namespaces within exactly (Window.AI.MachineLearning). +# target_cppwinrt will attempt to create a winmd with the name and same casing as the supplied +# idl file. If the name of the winmd file does not match the contained namespaces, cppwinrt.exe +# will generate component template files with fully qualified names, which will not match the existing +# generated component files. +# +# For native idl files there are no casing restrictions. +get_filename_component(winrt_idl "${winml_api_root}/Windows.AI.MachineLearning.idl" ABSOLUTE) +get_filename_component(idl_native "${winml_api_root}/windows.ai.machineLearning.native.idl" ABSOLUTE) +get_filename_component(idl_native_internal "${winml_api_root}/windows.ai.machineLearning.native.internal.idl" ABSOLUTE) + +# generate cppwinrt sdk +add_generate_cppwinrt_sdk_headers_target( + winml_sdk_cppwinrt # the target name + ${sdk_folder} # location of sdk folder + ${sdk_version} # sdk version + ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include # output folder relative to CMAKE_BINARY_DIR where the generated sdk will be placed in the + ${target_folder} # folder where this target will be placed +) + +# generate winml headers from idl +target_cppwinrt(winml_api + ${winrt_idl} # winml winrt idl to compile + ${winml_lib_api_dir} # location for cppwinrt generated component sources + ${sdk_folder} # location of sdk folder + ${sdk_version} # sdk version + ${target_folder} # the folder this target will be placed under +) + +target_midl(winml_api_native + ${idl_native} # winml native idl to compile + ${sdk_folder} # location of sdk folder + ${sdk_version} # sdk version + ${target_folder} # the folder this target will be placed under +) + +target_midl(winml_api_native_internal + ${idl_native_internal} # winml internal native idl to compile + ${sdk_folder} # location of sdk folder + ${sdk_version} # sdk version + ${target_folder}) # the folder this target will be placed under + +########################### +# Add winml_lib_telemetry +########################### + +# Add static library that will be archived/linked for both static/dynamic library +add_library(winml_lib_telemetry STATIC + ${winml_lib_telemetry_dir}/inc/TelemetryEvent.h + ${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows/TraceLoggingConfig.h + ${winml_lib_common_dir}/inc/WinMLTelemetryHelper.h + ${winml_lib_telemetry_dir}/Telemetry.cpp + ${winml_lib_telemetry_dir}/TelemetryEvent.cpp + ${winml_lib_telemetry_dir}/WinMLTelemetryHelper.cpp + ${winml_lib_telemetry_dir}/pch.h +) + +# Compiler options +target_compile_features(winml_lib_telemetry PRIVATE cxx_std_17) +target_compile_options(winml_lib_telemetry PRIVATE /GR- /await /wd4238) +if (onnxruntime_USE_TELEMETRY) + set_target_properties(winml_lib_telemetry PROPERTIES COMPILE_FLAGS "/FI${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows/TraceLoggingConfigPrivate.h") +endif() + +# Compiler flags +target_compile_definitions(winml_lib_telemetry PRIVATE PLATFORM_WINDOWS) +target_compile_definitions(winml_lib_telemetry PRIVATE _SCL_SECURE_NO_WARNINGS) # remove warnings about unchecked iterators + +# Specify the usage of a precompiled header +target_precompiled_header(winml_lib_telemetry pch.h) + +# Includes +target_include_directories(winml_lib_telemetry PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) +target_include_directories(winml_lib_telemetry PRIVATE ${CMAKE_SOURCE_DIR}/common/inc) +target_include_directories(winml_lib_telemetry PRIVATE ${winml_lib_telemetry_dir}) +target_include_directories(winml_lib_telemetry PRIVATE ${winml_lib_common_dir}/inc) +target_include_directories(winml_lib_telemetry PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows) + +# Properties +set_target_properties(winml_lib_telemetry + PROPERTIES + FOLDER + ${target_folder}) + +# Link libraries +target_link_libraries(winml_lib_telemetry PRIVATE wil) + +########################### +# Add winml_lib_ort +########################### + +list(APPEND winml_lib_api_ort_files + ${winml_lib_api_ort_dir}/inc/OnnxruntimeProvider.h + ${winml_lib_api_ort_dir}/OnnxruntimeCpuSessionBuilder.h + ${winml_lib_api_ort_dir}/OnnxruntimeCpuSessionBuilder.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeDescriptorConverter.h + ${winml_lib_api_ort_dir}/OnnxruntimeDescriptorConverter.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeEngine.h + ${winml_lib_api_ort_dir}/OnnxruntimeEngine.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeEngineBuilder.h + ${winml_lib_api_ort_dir}/OnnxruntimeEngineBuilder.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeEnvironment.h + ${winml_lib_api_ort_dir}/OnnxruntimeEnvironment.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeModel.h + ${winml_lib_api_ort_dir}/OnnxruntimeModel.cpp + ${winml_lib_api_ort_dir}/OnnxruntimeSessionBuilder.h + ${winml_lib_api_ort_dir}/pch.h + ) + +if (onnxruntime_USE_DML) + list(APPEND winml_lib_api_ort_files + ${winml_lib_api_ort_dir}/OnnxruntimeDmlSessionBuilder.h + ${winml_lib_api_ort_dir}/OnnxruntimeDmlSessionBuilder.cpp + ) +endif(onnxruntime_USE_DML) + +# Add static library that will be archived/linked for both static/dynamic library +add_library(winml_lib_ort STATIC ${winml_lib_api_ort_files}) + +# Compiler options +target_compile_features(winml_lib_ort PRIVATE cxx_std_17) +target_compile_options(winml_lib_ort PRIVATE /GR- /await /wd4238) + +# Compiler definitions +target_compile_definitions(winml_lib_ort PRIVATE PLATFORM_WINDOWS) +target_compile_definitions(winml_lib_ort PRIVATE _SCL_SECURE_NO_WARNINGS) # remove warnings about unchecked iterators + +# Specify the usage of a precompiled header +target_precompiled_header(winml_lib_ort pch.h) + +# Includes +target_include_directories(winml_lib_ort PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_lib_ort PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_lib_ort PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers + +target_include_directories(winml_lib_ort PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + +target_include_directories(winml_lib_ort PRIVATE ${REPO_ROOT}/winml) +target_include_directories(winml_lib_ort PRIVATE ${winml_lib_api_dir}) # needed for generated headers +target_include_directories(winml_lib_ort PRIVATE ${winml_lib_api_core_dir}) +target_include_directories(winml_lib_ort PRIVATE ${winml_lib_api_ort_dir}) +target_include_directories(winml_lib_ort PRIVATE ${winml_lib_common_dir}/inc) +target_include_directories(winml_lib_ort PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}) +target_include_directories(winml_lib_ort PRIVATE ${ONNXRUNTIME_ROOT}) + +set_target_properties(winml_lib_ort + PROPERTIES + FOLDER + ${target_folder}) + +# Add deps +add_dependencies(winml_lib_ort winml_sdk_cppwinrt) +add_dependencies(winml_lib_ort winml_api) +add_dependencies(winml_lib_ort winml_api_native) +add_dependencies(winml_lib_ort winml_api_native_internal) + +# Link libraries +target_link_libraries(winml_lib_ort PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/packages/DirectML.0.0.1/build/DirectML.targets) +target_link_libraries(winml_lib_ort PRIVATE wil) + + +########################### +# Add winml_adapter +########################### + +list(APPEND winml_adapter_files + ${winml_adapter_dir}/pch.h + ${winml_adapter_dir}/winml_adapter_apis.h + ${winml_adapter_dir}/winml_adapter_c_api.h + ${winml_adapter_dir}/winml_adapter_c_api.cpp + ${winml_adapter_dir}/winml_adapter_dml.cpp + ${winml_adapter_dir}/winml_adapter_environment.cpp + ${winml_adapter_dir}/winml_adapter_execution_provider.cpp + ${winml_adapter_dir}/winml_adapter_model.cpp + ${winml_adapter_dir}/winml_adapter_model.h + ${winml_adapter_dir}/winml_adapter_session.cpp + ) + +if (onnxruntime_USE_DML) + list(APPEND winml_adapter_files + ${winml_adapter_dir}/abi_custom_registry_impl.cpp + ${winml_adapter_dir}/abi_custom_registry_impl.h + ) +endif(onnxruntime_USE_DML) + +add_library(winml_adapter ${winml_adapter_files}) + +# wil requires C++17 +set_target_properties(winml_adapter PROPERTIES CXX_STANDARD 17) +set_target_properties(winml_adapter PROPERTIES CXX_STANDARD_REQUIRED ON) + +# Compiler definitions +onnxruntime_add_include_to_target(winml_adapter onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf) +target_include_directories(winml_adapter PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS}) +add_dependencies(winml_adapter ${onnxruntime_EXTERNAL_DEPENDENCIES}) + +# Specify the usage of a precompiled header +target_precompiled_header(winml_adapter pch.h) + +# Includes +target_include_directories(winml_adapter PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) # windows machine learning generated component headers +target_include_directories(winml_adapter PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_adapter PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_adapter PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers +target_include_directories(winml_adapter PRIVATE ${winml_lib_api_dir}) # needed for generated headers +target_include_directories(winml_adapter PRIVATE ${winml_lib_dir}) +target_include_directories(winml_adapter PRIVATE ${winml_adapter_dir}) +target_include_directories(winml_adapter PRIVATE ${winml_lib_common_dir}/inc) + +set_target_properties(winml_adapter + PROPERTIES + FOLDER + ${target_folder}) + +# Add deps +add_dependencies(winml_adapter winml_sdk_cppwinrt) +add_dependencies(winml_adapter winml_api) +add_dependencies(winml_adapter winml_api_native) +add_dependencies(winml_adapter winml_api_native_internal) + +# Link libraries +target_link_libraries(winml_adapter PRIVATE wil) +if (onnxruntime_USE_DML) + target_add_dml(winml_adapter) +endif(onnxruntime_USE_DML) + +# add it to the onnxruntime shared library +set(onnxruntime_winml winml_adapter) +list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES winml_adapter) + +########################### +# Add winml_lib_image +########################### + +# Add static library that will be archived/linked for both static/dynamic library +add_library(winml_lib_image STATIC + ${winml_lib_api_image_dir}/inc/ConverterResourceStore.h + ${winml_lib_api_image_dir}/inc/D3DDeviceCache.h + ${winml_lib_api_image_dir}/inc/DeviceHelpers.h + ${winml_lib_api_image_dir}/inc/ImageConversionHelpers.h + ${winml_lib_api_image_dir}/inc/ImageConversionTypes.h + ${winml_lib_api_image_dir}/inc/ImageConverter.h + ${winml_lib_api_image_dir}/inc/TensorToVideoFrameConverter.h + ${winml_lib_api_image_dir}/inc/VideoFrameToTensorConverter.h + ${winml_lib_api_image_dir}/CpuDetensorizer.h + ${winml_lib_api_image_dir}/CpuTensorizer.h + ${winml_lib_api_image_dir}/pch.h + ${winml_lib_api_image_dir}/ConverterResourceStore.cpp + ${winml_lib_api_image_dir}/D3DDeviceCache.cpp + ${winml_lib_api_image_dir}/DeviceHelpers.cpp + ${winml_lib_api_image_dir}/ImageConversionHelpers.cpp + ${winml_lib_api_image_dir}/ImageConverter.cpp + ${winml_lib_api_image_dir}/TensorToVideoFrameConverter.cpp + ${winml_lib_api_image_dir}/VideoFrameToTensorConverter.cpp +) + +# Compiler options +target_compile_features(winml_lib_image PRIVATE cxx_std_17) +target_compile_options(winml_lib_image PRIVATE /GR- /await /wd4238) + +# Compiler flags +target_compile_definitions(winml_lib_image PRIVATE ONNX_NAMESPACE=onnx) +target_compile_definitions(winml_lib_image PRIVATE ONNX_ML) +target_compile_definitions(winml_lib_image PRIVATE LOTUS_LOG_THRESHOLD=2) +target_compile_definitions(winml_lib_image PRIVATE LOTUS_ENABLE_STDERR_LOGGING) +target_compile_definitions(winml_lib_image PRIVATE PLATFORM_WINDOWS) +target_compile_definitions(winml_lib_image PRIVATE _SCL_SECURE_NO_WARNINGS) # remove warnings about unchecked iterators + +# Specify the usage of a precompiled header +target_precompiled_header(winml_lib_image pch.h) + +# Includes +target_include_directories(winml_lib_image PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) # windows machine learning generated component headers +target_include_directories(winml_lib_image PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_lib_image PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_lib_image PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers +target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_ROOT}/core/providers/dml/DmlExecutionProvider/src/External/D3DX12) # for d3dx12.h +target_include_directories(winml_lib_image PRIVATE ${winml_lib_api_dir}) # needed for generated headers +target_include_directories(winml_lib_image PRIVATE ${winml_lib_api_image_dir}) +target_include_directories(winml_lib_image PRIVATE ${winml_lib_common_dir}/inc) +target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_ROOT}) +target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}) # for status.h +target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/gsl/include) +target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/onnx) +target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src) + +# Properties +set_target_properties(winml_lib_image + PROPERTIES + FOLDER + ${target_folder}) + +# Add deps +add_dependencies(winml_lib_image winml_sdk_cppwinrt) +add_dependencies(winml_lib_image winml_api) +add_dependencies(winml_lib_image winml_api_native) +add_dependencies(winml_lib_image winml_api_native_internal) + +# Link libraries +target_link_libraries(winml_lib_image PRIVATE wil winml_lib_common) +if (onnxruntime_USE_DML) + target_add_dml(winml_lib_image) +endif(onnxruntime_USE_DML) + + +########################### +# Add winml_lib_api +########################### + +# Add static library that will be archived/linked for both static/dynamic library +add_library(winml_lib_api STATIC + ${winml_lib_api_dir}/impl/FeatureCompatibility.h + ${winml_lib_api_dir}/impl/IMapFeatureValue.h + ${winml_lib_api_dir}/impl/ISequenceFeatureValue.h + ${winml_lib_api_dir}/impl/MapBase.h + ${winml_lib_api_dir}/impl/SequenceBase.h + ${winml_lib_api_dir}/impl/Tensor.h + ${winml_lib_api_dir}/impl/TensorBase.h + ${winml_lib_api_dir}/impl/TensorBuffer.h + ${winml_lib_api_dir}/impl/TensorKindFrom.h + ${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h + ${winml_lib_api_dir}/ImageFeatureDescriptor.cpp + ${winml_lib_api_dir}/ImageFeatureDescriptor.h + ${winml_lib_api_dir}/ImageFeatureValue.cpp + ${winml_lib_api_dir}/ImageFeatureValue.h + ${winml_lib_api_dir}/LearningModel.cpp + ${winml_lib_api_dir}/LearningModel.h + ${winml_lib_api_dir}/LearningModelBinding.cpp + ${winml_lib_api_dir}/LearningModelBinding.h + ${winml_lib_api_dir}/LearningModelDevice.cpp + ${winml_lib_api_dir}/LearningModelDevice.h + ${winml_lib_api_dir}/LearningModelEvaluationResult.cpp + ${winml_lib_api_dir}/LearningModelEvaluationResult.h + ${winml_lib_api_dir}/LearningModelSession.cpp + ${winml_lib_api_dir}/LearningModelSession.h + ${winml_lib_api_dir}/LearningModelSessionOptions.cpp + ${winml_lib_api_dir}/LearningModelSessionOptions.h + ${winml_lib_api_dir}/MapFeatureDescriptor.cpp + ${winml_lib_api_dir}/MapFeatureDescriptor.h + ${winml_lib_api_dir}/SequenceFeatureDescriptor.cpp + ${winml_lib_api_dir}/SequenceFeatureDescriptor.h + ${winml_lib_api_dir}/TensorFeatureDescriptor.cpp + ${winml_lib_api_dir}/TensorFeatureDescriptor.h + ${winml_lib_api_dir}/pch/pch.h +) + +# Compiler options +target_compile_features(winml_lib_api PRIVATE cxx_std_17) +target_compile_options(winml_lib_api PRIVATE /GR- /await /bigobj /wd4238) + +# Compiler flags +target_compile_definitions(winml_lib_api PRIVATE ONNX_NAMESPACE=onnx) +target_compile_definitions(winml_lib_api PRIVATE ONNX_ML) +target_compile_definitions(winml_lib_api PRIVATE LOTUS_LOG_THRESHOLD=2) +target_compile_definitions(winml_lib_api PRIVATE LOTUS_ENABLE_STDERR_LOGGING) +target_compile_definitions(winml_lib_api PRIVATE PLATFORM_WINDOWS) +target_compile_definitions(winml_lib_api PRIVATE _SCL_SECURE_NO_WARNINGS) # remove warnings about unchecked iterators + +# Specify the usage of a precompiled header +target_precompiled_header(winml_lib_api pch.h) + +# Includes +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers + +target_include_directories(winml_lib_api PRIVATE ${winml_lib_api_dir}) +target_include_directories(winml_lib_api PRIVATE ${winml_lib_api_dir}/pch) +target_include_directories(winml_lib_api PRIVATE ${winml_adapter_dir}) +target_include_directories(winml_lib_api PRIVATE ${winml_lib_api_image_dir}/inc) +target_include_directories(winml_lib_api PRIVATE ${winml_lib_api_ort_dir}/inc) +target_include_directories(winml_lib_api PRIVATE ${winml_lib_telemetry_dir}/inc) +target_include_directories(winml_lib_api PRIVATE ${winml_lib_common_dir}/inc) + +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/date/include) +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/gsl/include) +target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/onnx) + +target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}) +target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}/core/graph) +target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_ROOT}) +target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_ROOT}/core/graph) +target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/eigen) +target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/onnx) +target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src) +target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/gsl/include) + +# Properties +set_target_properties(winml_lib_api + PROPERTIES + FOLDER + ${target_folder}) + +# Add deps +add_dependencies(winml_lib_api onnx) +add_dependencies(winml_lib_api winml_sdk_cppwinrt) +add_dependencies(winml_lib_api winml_api) +add_dependencies(winml_lib_api winml_api_native) +add_dependencies(winml_lib_api winml_api_native_internal) + +# Link libraries +target_link_libraries(winml_lib_api PRIVATE wil winml_lib_telemetry) +if (onnxruntime_USE_DML) + target_add_dml(winml_lib_api) +endif(onnxruntime_USE_DML) + +########################### +# Add winml_lib_common +########################### + +add_library(winml_lib_common STATIC + ${winml_lib_common_dir}/inc/common.h + ${winml_lib_common_dir}/inc/CommonDeviceHelpers.h + ${winml_lib_common_dir}/inc/cppwinrt_onnx.h + ${winml_lib_common_dir}/inc/dx.h + ${winml_lib_common_dir}/inc/errors.h + ${winml_lib_common_dir}/inc/iengine.h + ${winml_lib_common_dir}/inc/NamespaceAliases.h + ${winml_lib_common_dir}/inc/onnx.h + ${winml_lib_common_dir}/inc/PheonixSingleton.h + ${winml_lib_common_dir}/inc/StringHelpers.h + ${winml_lib_common_dir}/inc/WinMLTelemetryHelper.h + ${winml_lib_common_dir}/inc/WinML_Lock.h + ${winml_lib_common_dir}/inc/winrt_headers.h + ${winml_lib_common_dir}/CommonDeviceHelpers.cpp +) + +set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD 17) +set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD_REQUIRED ON) +target_compile_options(winml_lib_common PRIVATE /GR- /await /bigobj /wd4238) +target_link_libraries(winml_lib_common PRIVATE wil) +target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) +target_compile_definitions(winml_lib_common PRIVATE + ONNX_NAMESPACE=onnx + ONNX_ML + LOTUS_LOG_THRESHOLD=2 + LOTUS_ENABLE_STDERR_LOGGING + PLATFORM_WINDOWS + _SCL_SECURE_NO_WARNINGS) +add_dependencies(winml_lib_common winml_sdk_cppwinrt) +add_dependencies(winml_lib_common winml_api) +add_dependencies(winml_lib_common winml_api_native) +add_dependencies(winml_lib_common winml_api_native_internal) + +target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers +target_include_directories(winml_lib_common PRIVATE ${winml_lib_api_dir}) +target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(winml_lib_common PRIVATE ${winml_lib_common_dir}/inc) +target_precompiled_header(winml_lib_common inc/pch.h) + +if (onnxruntime_USE_DML) + target_add_dml(winml_lib_common) +endif() + +########################### +# Add winml_dll +########################### + +set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated/module.g.excl.cpp + PROPERTIES + GENERATED + TRUE) + +# Add library +add_library(winml_dll SHARED + ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated/module.g.excl.cpp + ${winml_dll_dir}/windows.ai.machinelearning.def + ${winml_dll_dir}/winml.rc + ${winml_dll_dir}/pch.h + ${winml_dll_dir}/module.cpp +) + +# Compiler options +target_compile_features(winml_dll PRIVATE cxx_std_17) +target_compile_options(winml_dll PRIVATE /GR- /await /bigobj /wd4238) + +# Compiler definitions +target_compile_definitions(winml_dll PRIVATE ONNX_NAMESPACE=onnx) +target_compile_definitions(winml_dll PRIVATE ONNX_ML) +target_compile_definitions(winml_dll PRIVATE LOTUS_LOG_THRESHOLD=2) +target_compile_definitions(winml_dll PRIVATE LOTUS_ENABLE_STDERR_LOGGING) +target_compile_definitions(winml_dll PRIVATE PLATFORM_WINDOWS) +target_compile_definitions(winml_dll PRIVATE VER_MAJOR=${WINML_VERSION_MAJOR_PART}) +target_compile_definitions(winml_dll PRIVATE VER_MINOR=${WINML_VERSION_MINOR_PART}) +target_compile_definitions(winml_dll PRIVATE VER_BUILD=${WINML_VERSION_BUILD_PART}) +target_compile_definitions(winml_dll PRIVATE VER_PRIVATE=${WINML_VERSION_PRIVATE_PART}) +target_compile_definitions(winml_dll PRIVATE VER_STRING=\"${WINML_VERSION_STRING}\") + +# Specify the usage of a precompiled header +target_precompiled_header(winml_dll pch.h) + +# Includes +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api) # windows machine learning generated component headers +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated) # windows machine learning generated component headers +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) # sdk cppwinrt headers + +target_include_directories(winml_dll PRIVATE ${winml_dll_dir}) +target_include_directories(winml_dll PRIVATE ${winml_lib_api_dir}) +target_include_directories(winml_dll PRIVATE ${winml_lib_api_dir}/impl) +target_include_directories(winml_dll PRIVATE ${winml_lib_api_ort_dir}/inc) +target_include_directories(winml_dll PRIVATE ${winml_adapter_dir}) +target_include_directories(winml_dll PRIVATE ${winml_lib_api_image_dir}/inc) +target_include_directories(winml_dll PRIVATE ${winml_lib_telemetry_dir}/inc) +target_include_directories(winml_dll PRIVATE ${winml_lib_common_dir}/inc) + +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/date/include) +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/gsl/include) +target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/onnx) + +target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}) +target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}/core/graph) +target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_ROOT}) +target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_ROOT}/core/graph) +target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/onnx) +target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src) +target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/gsl/include) +target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/eigen) + +# Properties +set_target_properties(winml_dll + PROPERTIES + OUTPUT_NAME windows.ai.machinelearning) + +if (onnxruntime_USE_DML) + set(delayload_dml "/DELAYLOAD:directml.dll") +endif(onnxruntime_USE_DML) + +# The default libraries to link with in Windows are kernel32.lib;user32.lib;gdi32.lib;winspool.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;comdlg32.lib;advapi32.lib +# Remove them and use the onecore umbrella library instead +foreach(default_lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdgl32.lib advapi32.lib) + set(removed_libs "${removed_libs} /NODEFAULTLIB:${default_lib}") +endforeach() +set(CMAKE_C_STANDARD_LIBRARIES "${removed_libs} onecoreuap.lib") +set(CMAKE_CXX_STANDARD_LIBRARIES "${removed_libs} onecoreuap.lib") +set_target_properties(winml_dll + PROPERTIES + LINK_FLAGS + "/DEF:${WINML_DIR}/windows.ai.machinelearning.def ${os_component_link_flags} /DELAYLOAD:d3d12.dll /DELAYLOAD:d3d11.dll /DELAYLOAD:dxgi.dll ${delayload_dml}") + + +set_target_properties(winml_dll + PROPERTIES + FOLDER + ${target_folder}) + +# Add deps +add_dependencies(winml_dll winml_sdk_cppwinrt) +add_dependencies(winml_dll winml_api_native) +add_dependencies(winml_dll winml_api_native_internal) + +# Any project that links in debug_alloc.obj needs this lib. +# unresolved external symbol __imp_SymSetOptions +# ... __imp_SymGetLineFromAddr64 +# ... __imp_SymInitialize +# ... __imp_SymFromAddr +if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + set(DBGHELP dbghelp.lib) +endif("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + +# Link libraries +target_link_libraries(winml_dll PRIVATE onnxruntime) +target_link_libraries(winml_dll PRIVATE re2) +target_link_libraries(winml_dll PRIVATE wil) +target_link_libraries(winml_dll PRIVATE winml_lib_api) +target_link_libraries(winml_dll PRIVATE winml_lib_image) +target_link_libraries(winml_dll PRIVATE winml_lib_ort) +target_link_libraries(winml_dll PRIVATE winml_lib_telemetry) +target_link_libraries(winml_dll PRIVATE delayimp.lib) +target_link_libraries(winml_dll PRIVATE ${DBGHELP}) + +# 1 of 3 projects that fail in link with 'failed to do memory mapped file I/O' (Only release) +# when using x86 hosted architecture. When using the LKG compiler this becomes a problem +# because it falls back to incorrectly using the public version of link. +# To avoid the scenario completely, this will tell cl/link to already start with x64 hosted, +# rather than waiting for it to fail and retry and resolve incorrectly. +if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + set_target_properties(winml_dll PROPERTIES VS_GLOBAL_PreferredToolArchitecture "x64") +endif("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + +option(onnxruntime_BUILD_WINML_TESTS "Build WinML tests" ON) +if (onnxruntime_BUILD_WINML_TESTS) + include(winml_unittests.cmake) +endif() + +# This is needed to suppress warnings that complain that no imports are found for the delayloaded library cublas64*.lib +# When cuda is enabled in the pipeline, it sets CMAKE_SHARED_LINKER_FLAGS which affects all targets including winml_dll. +# However, there are no cuda imports in winml_dll, and the linker throws the 4199 warning. +# This is needed to allow winml_dll build with cuda enabled. +set_target_properties(winml_dll + PROPERTIES + LINK_FLAGS + "/ignore:4199") \ No newline at end of file diff --git a/cmake/winml_cppwinrt.cmake b/cmake/winml_cppwinrt.cmake new file mode 100644 index 0000000000000..c047689b32588 --- /dev/null +++ b/cmake/winml_cppwinrt.cmake @@ -0,0 +1,223 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# This script adds cppwinrt support for VS-generated projects. +# +# target_cppwinrt(foo bar.idl) +# +# Calling target_midl function runs midlrt.exe and produces bar.h +# Calling target_cppwinrt function does two things: +# +# 1) Adds a target "bar.cppwinrt", which performs the midl and cppwinrt +# builds and produces: +# bar.h +# bar.winmd +# bar.tlb +# module.g.cpp +# +# 2) Adds a dependency to the new custom target "bar.cppwinrt" + +function(target_midl + target_name + idl_file + sdk_folder # sdk kit directory + sdk_version # sdk version + folder_name) + if (MSVC) + # get sdk include paths for midl + get_sdk_include_folder(${sdk_folder} ${sdk_version} sdk_include_folder) + set(um_sdk_directory "${sdk_include_folder}/um") + set(shared_sdk_directory "${sdk_include_folder}/shared") + set(winrt_sdk_directory "${sdk_include_folder}/winrt") + + # get sdk metadata path + get_sdk_metadata_folder(${sdk_folder} ${sdk_version} sdk_metadata_directory_forward_slashes) + convert_forward_slashes_to_back(${sdk_metadata_directory_forward_slashes} sdk_metadata_directory) + + # get midl + get_sdk_midl_exe(${sdk_folder} ${sdk_version} midl_exe) + + # Filename variables + get_filename_component(file_name_with_extension ${idl_file} NAME) + string(REGEX REPLACE "\\.[^.]*$" "" file_name ${file_name_with_extension}) + set(header_filename ${file_name}.h) + convert_forward_slashes_to_back(${idl_file} idl_file_forward_slash) + + # using add_custom_command trick to prevent rerunning script unless ${file} is changed + add_custom_command( + OUTPUT ${header_filename} + COMMAND ${midl_exe} + /metadata_dir ${sdk_metadata_directory} + /W1 /char signed /nologo /winrt + /no_settings_comment /no_def_idir /target "NT60" + /I ${um_sdk_directory} + /I ${shared_sdk_directory} + /I ${winrt_sdk_directory} + /I ${CMAKE_CURRENT_SOURCE_DIR} + /h ${header_filename} + ${idl_file_forward_slash} + DEPENDS ${idl_file} + ) + + add_custom_target( + ${target_name} + ALL + DEPENDS ${header_filename} + ) + + set_target_properties(${target_name} PROPERTIES FOLDER ${folder_name}) + endif() +endfunction() + +function(target_cppwinrt + target_name # the name of the target to add + file # name of the idl file to compile + out_sources_folder # path where generated sources will be placed + sdk_folder # sdk kit directory + sdk_version # sdk version + folder_name # folder this target will be placed +) + if (MSVC) + # get sdk include paths for midl + get_sdk_include_folder(${sdk_folder} ${sdk_version} sdk_include_folder) + set(um_sdk_directory "${sdk_include_folder}/um") + set(shared_sdk_directory "${sdk_include_folder}/shared") + set(winrt_sdk_directory "${sdk_include_folder}/winrt") + + # get sdk metadata path + get_sdk_metadata_folder(${sdk_folder} ${sdk_version} sdk_metadata_directory_forward_slashes) + convert_forward_slashes_to_back(${sdk_metadata_directory_forward_slashes} sdk_metadata_directory) + + # get midl + get_sdk_midl_exe(${sdk_folder} ${sdk_version} midl_exe) + + # get cppwinrt + get_sdk_cppwinrt_exe(${sdk_folder} ${sdk_version} cppwinrt_exe) + + # Filename variables + convert_forward_slashes_to_back(${file} idl_file_forward_slash) + get_filename_component(file_name_with_extension ${file} NAME) + string(REGEX REPLACE "\\.[^.]*$" "" fileName ${file_name_with_extension}) + set(header_filename ${fileName}.h) + set(winmd_filename ${fileName}.winmd) + set(tlb_filename ${fileName}.tlb) + + # Get directory + get_filename_component(idl_source_directory ${file} DIRECTORY) + + set(target_outputs ${CMAKE_CURRENT_BINARY_DIR}/${target_name}) + convert_forward_slashes_to_back(${target_outputs}/comp output_dir_back_slash) + convert_forward_slashes_to_back(${target_outputs}/temp temp_dir_back_slash) + convert_forward_slashes_to_back(${target_outputs}/comp_generated generated_dir_back_slash) + convert_forward_slashes_to_back(${generated_dir_back_slash}/module.g.cpp module_g_cpp_back_slash) + convert_forward_slashes_to_back(${generated_dir_back_slash}/module.g.excl.cpp module_g_ecxl_cpp_back_slash) + + # using add_custom_command trick to prevent rerunning script unless ${file} is changed + add_custom_command( + OUTPUT ${header_filename} ${winmd_filename} + DEPENDS ${file} + COMMAND ${midl_exe} + /metadata_dir ${sdk_metadata_directory} + /W1 /char signed /nomidl /nologo /winrt + /no_settings_comment /no_def_idir /target "NT60" + /I ${um_sdk_directory} + /I ${shared_sdk_directory} + /I ${winrt_sdk_directory} + /I ${idl_source_directory} + /winmd ${winmd_filename} + /h ${header_filename} + /tlb ${tlb_filename} + ${idl_file_forward_slash} + COMMAND + ${cppwinrt_exe} -in ${winmd_filename} -comp ${output_dir_back_slash} -ref ${sdk_metadata_directory} -out ${generated_dir_back_slash} -verbose + COMMAND + # copy the generated component files into a temporary directory where headers exclusions will be applied + xcopy ${output_dir_back_slash} ${temp_dir_back_slash}\\ /Y /D + COMMAND + # for each file in the temp directory, ensure it is not in the exclusions list. + # if it is, then we need to delete it. + cmd /C "@echo off \ + for /f %I in ('dir /b ${temp_dir_back_slash}') \ + do \ + ( \ + for /f %E in (${CPPWINRT_COMPONENT_EXCLUSION_LIST}) \ + do \ + ( \ + if %E == %I \ + ( \ + del ${temp_dir_back_slash}\\%I \ + ) \ + ) \ + )" + COMMAND + # for each file in the temp directory, copy the file back into the source tree + # unless the file already exists + cmd /C "@echo off \ + for /f %I in ('dir /b ${temp_dir_back_slash}') \ + do \ + ( \ + if not exist ${out_sources_folder}\\%I \ + ( \ + copy ${temp_dir_back_slash}\\%I ${out_sources_folder}\\%I \ + ) \ + )" + COMMAND + # open the generated module.g.cpp and strip all the includes (lines) containing excluded headers + # write the new file out to module.g.excl.cpp. + powershell -Command "& { \ + $exclusions = get-content '${CPPWINRT_COMPONENT_EXCLUSION_LIST}'; \ + (get-content '${module_g_cpp_back_slash}') \ + | where { \ + $str = $_; \ + $matches = ($exclusions | where { $str -match $_ }); \ + $matches.Length -eq 0 } \ + | Out-File '${module_g_ecxl_cpp_back_slash}' \ + }" + BYPRODUCTS + ${generated_dir_back_slash}/module.g.excl.cpp + VERBATIM + ) + + add_custom_target( + ${target_name} + ALL + DEPENDS ${header_filename} ${winmd_filename} + ) + + set_target_properties(${target_name} PROPERTIES FOLDER ${folder_name}) + endif() +endfunction() + +function(add_generate_cppwinrt_sdk_headers_target + target_name # the name of the target to add + sdk_folder # sdk kit directory + sdk_version # sdk version + sdk_directory # the name of the folder to output the sdk headers to + folder_name # folder this target will be placed +) + if (MSVC) + # get the current nuget sdk's metadata directory + get_sdk_metadata_folder(${sdk_folder} ${sdk_version} metadata_folder) + + # get cppwinrt + get_sdk_cppwinrt_exe(${sdk_folder} ${sdk_version} cppwinrt_exe) + + # windows.winmd is consumed by cppwinrt to produce the sdk headers + set(windows_winmd "${metadata_folder}/windows.winmd") + + # base.h along with the other winrt sdk headers are produced by this command + set(base_h "${sdk_directory}/winrt/base.h") + + # using add_custom_command trick to prevent rerunning script unless ${windows_winmd} is changed + add_custom_command( + OUTPUT ${base_h} + DEPENDS ${windows_winmd} + COMMAND ${cppwinrt_exe} -in \"${metadata_folder}\" -out \"${sdk_directory}\" -verbose + ) + + # add the target + add_custom_target(${target_name} ALL DEPENDS ${base_h}) + + set_target_properties(${target_name} PROPERTIES FOLDER ${folder_name}) + endif() +endfunction() diff --git a/cmake/winml_sdk_helpers.cmake b/cmake/winml_sdk_helpers.cmake new file mode 100644 index 0000000000000..9241fcd060caf --- /dev/null +++ b/cmake/winml_sdk_helpers.cmake @@ -0,0 +1,120 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +cmake_minimum_required(VERSION 3.0) + +# utility +function(convert_forward_slashes_to_back input output) + string(REGEX REPLACE "/" "\\\\" backwards ${input}) + set(${output} ${backwards} PARENT_SCOPE) +endfunction() + +# get window 10 install path from registry +function(get_installed_sdk + sdk_folder # the current sdk folder + output_sdk_version # the current sdk version +) + # return the kit path + get_filename_component(win10_sdk_root "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots;KitsRoot10]" ABSOLUTE CACHE) + set(${sdk_folder} ${win10_sdk_root} PARENT_SCOPE) + + # return the sdk version + if(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) + set(${output_sdk_version} ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION} PARENT_SCOPE) + else() + # choose the SDK matching the system version, or fallback to the latest + file(GLOB win10_sdks RELATIVE "${win10_sdk_root}/UnionMetadata" "${win10_sdk_root}/UnionMetadata/*.*.*.*") + list(GET win10_sdks 0 latest_sdk) + foreach(sdk IN LISTS win10_sdks) + string(FIND ${sdk} ${CMAKE_SYSTEM_VERSION} is_system_version) + if(NOT ${is_system_version} EQUAL -1) + set(${output_sdk_version} ${sdk} PARENT_SCOPE) + return() + elseif(sdk VERSION_GREATER latest_sdk) + set(latest_sdk ${sdk}) + endif() + endforeach() + set(${output_sdk_version} ${latest_sdk} PARENT_SCOPE) + endif() +endfunction() + +# current sdk binary directory +function(get_sdk_binary_directory + sdk_folder # the kit path + sdk_version # the sdk version + binary_dir # the output folder variable +) + set(${binary_dir} "${sdk_folder}/bin/${sdk_version}" PARENT_SCOPE) +endfunction() + +# current sdk include directory +function(get_sdk_include_folder + sdk_folder # the kit path + sdk_version # the sdk version + include_dir # the output folder variable +) + set(${include_dir} "${sdk_folder}/include/${sdk_version}" PARENT_SCOPE) +endfunction() + +# current sdk metadata directory +function(get_sdk_metadata_folder + sdk_folder # the kit path + sdk_version # the sdk version + metadata_dir # the output folder variable +) + set(${metadata_dir} "${sdk_folder}/UnionMetadata/${sdk_version}" PARENT_SCOPE) +endfunction() + +# current sdk midl exe path +function(get_sdk_midl_exe + sdk_folder # the kit path + sdk_version # the sdk version + midl_exe_path # the output exe path +) + get_sdk_binary_directory(${sdk_folder} ${sdk_version} bin_dir) + set(${midl_exe_path} "${bin_dir}/x64/midlrt.exe" PARENT_SCOPE) +endfunction() + +# current cppwinrt cppwinrt exe path +function(get_installed_sdk_cppwinrt_exe + sdk_folder # the kit path + sdk_version # the sdk version + cppwinrt_exe_path # the output exe path +) + get_sdk_binary_directory(${sdk_folder} ${sdk_version} bin_dir) + set(${cppwinrt_exe_path} "${bin_dir}/x64/cppwinrt.exe" PARENT_SCOPE) +endfunction() + +# current cppwinrt cppwinrt exe path +function(get_sdk_cppwinrt_exe + sdk_folder # the kit path + sdk_version # the sdk version + output_cppwinrt_exe_path # the output exe path +) + if (NOT DEFINED winml_CPPWINRT_EXE_PATH_OVERRIDE) + get_installed_sdk_cppwinrt_exe(${sdk_folder} ${sdk_version} cppwinrt_exe_path) + set(${output_cppwinrt_exe_path} ${cppwinrt_exe_path} PARENT_SCOPE) + else () + set(${output_cppwinrt_exe_path} ${winml_CPPWINRT_EXE_PATH_OVERRIDE} PARENT_SCOPE) + endif() +endfunction() + +function(get_sdk + output_sdk_folder # the path to the current sdk kit folder + output_sdk_version # the current sdk version +) + if ((NOT DEFINED winml_WINDOWS_SDK_DIR_OVERRIDE) AND + (NOT DEFINED winml_WINDOWS_SDK_VERSION_OVERRIDE)) + get_installed_sdk(sdk_folder sdk_version) + set(${output_sdk_folder} ${sdk_folder} PARENT_SCOPE) + set(${output_sdk_version} ${sdk_version} PARENT_SCOPE) + elseif ((DEFINED winml_WINDOWS_SDK_DIR_OVERRIDE) AND + (DEFINED winml_WINDOWS_SDK_VERSION_OVERRIDE)) + set(${output_sdk_folder} ${winml_WINDOWS_SDK_DIR_OVERRIDE} PARENT_SCOPE) + set(${output_sdk_version} ${winml_WINDOWS_SDK_VERSION_OVERRIDE} PARENT_SCOPE) + else() + message( + FATAL_ERROR + "Options winml_WINDOWS_SDK_DIR_OVERRIDE and winml_WINDOWS_SDK_VERSION_OVERRIDE must be defined together, or not at all.") + endif() +endfunction() diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake new file mode 100644 index 0000000000000..8e35f7e75bde8 --- /dev/null +++ b/cmake/winml_unittests.cmake @@ -0,0 +1,132 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +set(WINML_TEST_SRC_DIR ${REPO_ROOT}/winml/test) +set(WINML_TEST_INC_DIR + ${REPO_ROOT}/winml/test/common + ${REPO_ROOT}/winml/lib/Common/inc + ${REPO_ROOT}/onnxruntime + ${REPO_ROOT}/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/D3DX12 + ${REPO_ROOT}/cmake/external/googletest/googletest/include + ${REPO_ROOT}/cmake/external/protobuf/src + ${REPO_ROOT}/cmake/external/wil/include + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/winml_api + ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated + ${CMAKE_CURRENT_BINARY_DIR}/winml/sdk/cppwinrt/include) + +function(set_winml_target_properties target) + set_target_properties(${target} PROPERTIES + FOLDER "ONNXRuntimeTest/winml" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + ) + target_include_directories(${target} PRIVATE ${WINML_TEST_INC_DIR}) +endfunction() + +function(add_winml_test) + # Add a test target and make it discoverable by CTest by calling add_test + cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS" ${ARGN}) + if(_UT_LIBS) + list(REMOVE_DUPLICATES _UT_LIBS) + endif() + list(REMOVE_DUPLICATES _UT_SOURCES) + if (_UT_DEPENDS) + list(REMOVE_DUPLICATES _UT_DEPENDS) + endif() + + add_executable(${_UT_TARGET} ${_UT_SOURCES}) + source_group(TREE ${WINML_TEST_SRC_DIR} FILES ${_UT_SOURCES}) + set_winml_target_properties(${_UT_TARGET}) + + if (_UT_DEPENDS) + add_dependencies(${_UT_TARGET} ${_UT_DEPENDS}) + endif() + target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} gtest winml_google_test_lib ${onnxruntime_EXTERNAL_LIBRARIES} winml_lib_common onnxruntime) + + add_test(NAME ${_UT_TARGET} + COMMAND ${_UT_TARGET} + WORKING_DIRECTORY $ + ) +endfunction() + +function(get_winml_test_scenario_src + winml_test_src_path + output_winml_test_scenario_src + output_winml_test_scenario_libs +) + if (onnxruntime_USE_DML) + file(GLOB winml_test_scenario_src CONFIGURE_DEPENDS "${winml_test_src_path}/scenario/cppwinrt/*.cpp") + set(${output_winml_test_scenario_libs} "onnxruntime_providers_dml" PARENT_SCOPE) + else() + set(winml_test_scenario_src "${winml_test_src_path}/scenario/cppwinrt/scenariotestscppwinrt.cpp") + endif() + set(${output_winml_test_scenario_src} ${winml_test_scenario_src} PARENT_SCOPE) +endfunction() + +function(get_winml_test_api_src + winml_test_src_path + output_winml_test_api_src +) + file(GLOB winml_test_api_src CONFIGURE_DEPENDS "${winml_test_src_path}/api/*.cpp") + set(${output_winml_test_api_src} ${winml_test_api_src} PARENT_SCOPE) +endfunction() + +file(GLOB winml_test_common_src CONFIGURE_DEPENDS "${WINML_TEST_SRC_DIR}/common/*.cpp") +add_library(winml_test_common STATIC ${winml_test_common_src}) +add_dependencies(winml_test_common + onnx + winml_api + winml_dll +) + +add_library(winml_google_test_lib STATIC ${WINML_TEST_SRC_DIR}/common/googletest/main.cpp) +set_winml_target_properties(winml_google_test_lib) + +set_winml_target_properties(winml_test_common) +get_winml_test_api_src(${WINML_TEST_SRC_DIR} winml_test_api_src) +add_winml_test( + TARGET winml_test_api + SOURCES ${winml_test_api_src} + LIBS winml_test_common +) +target_compile_definitions(winml_test_api PRIVATE BUILD_GOOGLE_TEST) +target_precompiled_header(winml_test_api testPch.h) + +get_winml_test_scenario_src(${WINML_TEST_SRC_DIR} winml_test_scenario_src winml_test_scenario_libs) +add_winml_test( + TARGET winml_test_scenario + SOURCES ${winml_test_scenario_src} + LIBS winml_test_common delayimp.lib ${winml_test_scenario_libs} +) +target_precompiled_header(winml_test_scenario testPch.h) +target_compile_definitions(winml_test_scenario PRIVATE BUILD_GOOGLE_TEST) +set_target_properties(winml_test_scenario PROPERTIES LINK_FLAGS + "/DELAYLOAD:d2d1.dll /DELAYLOAD:d3d11.dll /DELAYLOAD:dxgi.dll" +) + +# During build time, copy any modified collaterals. +# configure_file(source destination COPYONLY), which configures CMake to copy the file whenever source is modified, +# can't be used here because we don't know the destination during configure time (in multi-configuration generators, +# such as VS, one can switch between Debug/Release builds in the same build tree, and the destination depends on the +# build mode). +function(add_winml_collateral source) + get_filename_component(source_directory ${source} DIRECTORY) + file(GLOB_RECURSE collaterals RELATIVE ${source_directory} ${source}) + foreach(collateral ${collaterals}) + set(collateral_path ${source_directory}/${collateral}) + if(NOT IS_DIRECTORY ${collateral_path}) + add_custom_command(TARGET winml_test_common + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${collateral_path} "$/${collateral}") + endif() + endforeach() +endfunction() + +add_winml_collateral("${WINML_TEST_SRC_DIR}/api/models/*.onnx") +add_winml_collateral("${WINML_TEST_SRC_DIR}/collateral/images/*.png") +add_winml_collateral("${WINML_TEST_SRC_DIR}/collateral/models/*.onnx") +add_winml_collateral("${WINML_TEST_SRC_DIR}/common/testdata/squeezenet/*") +add_winml_collateral("${WINML_TEST_SRC_DIR}/scenario/cppwinrt/*.onnx") +add_winml_collateral("${WINML_TEST_SRC_DIR}/scenario/models/*.onnx") diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index c4cc6a526da36..1a254099e2a43 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -55,6 +55,14 @@ Visible="false" /> + + + + #include #include +#ifdef _WIN32 +#include +#endif namespace onnxruntime { namespace common { @@ -75,6 +78,40 @@ inline const char* StatusCodeToString(StatusCode status) noexcept { } } +#ifdef _WIN32 +inline HRESULT StatusCodeToHRESULT(StatusCode status) noexcept { + switch (status) + { + case StatusCode::OK: + return S_OK; + case StatusCode::FAIL: + return E_FAIL; + case StatusCode::INVALID_ARGUMENT: + return E_INVALIDARG; + case StatusCode::NO_SUCHFILE: + return __HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + case StatusCode::NO_MODEL: + return __HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + case StatusCode::ENGINE_ERROR: + return E_FAIL; + case StatusCode::RUNTIME_EXCEPTION: + return E_FAIL; + case StatusCode::INVALID_PROTOBUF: + return __HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + case StatusCode::MODEL_LOADED: + return __HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + case StatusCode::NOT_IMPLEMENTED: + return E_NOTIMPL; + case StatusCode::INVALID_GRAPH: + return __HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + case StatusCode::EP_FAIL: + return __HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + default: + return E_FAIL; + } +} +#endif + class Status { public: Status() noexcept = default; diff --git a/include/onnxruntime/core/platform/windows/TraceLoggingConfig.h b/include/onnxruntime/core/platform/windows/TraceLoggingConfig.h index d9aed52b87c40..77114ecdf8c0f 100644 --- a/include/onnxruntime/core/platform/windows/TraceLoggingConfig.h +++ b/include/onnxruntime/core/platform/windows/TraceLoggingConfig.h @@ -78,4 +78,4 @@ Module Name: // TraceLoggingString(szUser, "UserName", "User's name", MICROSOFT_FIELDTAG_HASH_PII), // ...); #define MICROSOFT_FIELDTAG_DROP_PII 0x04000000 -#define MICROSOFT_FIELDTAG_HASH_PII 0x08000000 \ No newline at end of file +#define MICROSOFT_FIELDTAG_HASH_PII 0x08000000 diff --git a/include/onnxruntime/core/platform/windows/readme.txt b/include/onnxruntime/core/platform/windows/readme.txt new file mode 100644 index 0000000000000..f1a436fc200be --- /dev/null +++ b/include/onnxruntime/core/platform/windows/readme.txt @@ -0,0 +1,2 @@ +copied from minkernel/published/internal/telemetry/open_source/TraceLoggingConfig.h +this is the official open source edition for these configuration settings \ No newline at end of file diff --git a/include/onnxruntime/core/providers/winml/winml_provider_factory.h b/include/onnxruntime/core/providers/winml/winml_provider_factory.h new file mode 100644 index 0000000000000..b08b42e310e41 --- /dev/null +++ b/include/onnxruntime/core/providers/winml/winml_provider_factory.h @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "onnxruntime_c_api.h" + +struct WinmlAdapterApi; +typedef struct WinmlAdapterApi WinmlAdapterApi; + +ORT_EXPORT const WinmlAdapterApi* ORT_API_CALL OrtGetWinMLAdapter(_In_ const OrtApi* ort_api) NO_EXCEPTION; \ No newline at end of file diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 76b325a47169f..cca39a086280c 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -156,6 +156,8 @@ ORT_RUNTIME_CLASS(TypeInfo); ORT_RUNTIME_CLASS(TensorTypeAndShapeInfo); ORT_RUNTIME_CLASS(SessionOptions); ORT_RUNTIME_CLASS(CustomOpDomain); +ORT_RUNTIME_CLASS(MapTypeInfo); +ORT_RUNTIME_CLASS(SequenceTypeInfo); // When passing in an allocator to any ORT function, be sure that the allocator object // is not destroyed until the last allocated object using it is freed. @@ -648,6 +650,66 @@ struct OrtApi { ORT_CLASS_RELEASE(TensorTypeAndShapeInfo); ORT_CLASS_RELEASE(SessionOptions); ORT_CLASS_RELEASE(CustomOpDomain); + + // End of Version 1 - DO NOT MODIFY ABOVE (see above text for more information) + + // Version 2 - In development, feel free to add/remove/rearrange here + + /** + * GetDenotationFromTypeInfo + * This api augments OrtTypeInfo to return denotations on the type. + * This is used by WinML to determine if an input/output is intended to be an Image or a Tensor. + */ + OrtStatus*(ORT_API_CALL* GetDenotationFromTypeInfo)(_In_ const OrtTypeInfo*, _Out_ const char** const denotation, _Out_ size_t* len)NO_EXCEPTION; + + // OrtTypeInfo Casting methods + + /** + * CastTypeInfoToMapTypeInfo + * This api augments OrtTypeInfo to return an OrtMapTypeInfo when the type is a map. + * The OrtMapTypeInfo has additional information about the map's key type and value type. + * This is used by WinML to support model reflection APIs. + * + * Don't free the 'out' value + */ + OrtStatus*(ORT_API_CALL* CastTypeInfoToMapTypeInfo)(_In_ const OrtTypeInfo* type_info, _Out_ const OrtMapTypeInfo** out)NO_EXCEPTION; + + /** + * CastTypeInfoToSequenceTypeInfo + * This api augments OrtTypeInfo to return an OrtSequenceTypeInfo when the type is a sequence. + * The OrtSequenceTypeInfo has additional information about the sequence's element type. + * This is used by WinML to support model reflection APIs. + * + * Don't free the 'out' value + */ + OrtStatus*(ORT_API_CALL* CastTypeInfoToSequenceTypeInfo)(_In_ const OrtTypeInfo* type_info, _Out_ const OrtSequenceTypeInfo** out)NO_EXCEPTION; + + // OrtMapTypeInfo Accessors + + /** + * GetMapKeyType + * This api augments get the key type of a map. Key types are restricted to being scalar types and use ONNXTensorElementDataType. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* GetMapKeyType)(_In_ const OrtMapTypeInfo* map_type_info, _Out_ enum ONNXTensorElementDataType* out)NO_EXCEPTION; + + /** + * GetMapValueType + * This api augments get the value type of a map. + */ + OrtStatus*(ORT_API_CALL* GetMapValueType)(_In_ const OrtMapTypeInfo* map_type_info, _Outptr_ OrtTypeInfo** type_info)NO_EXCEPTION; + + // OrtSequenceTypeInfo Accessors + + /** + * GetSequenceElementType + * This api augments get the element type of a sequence. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* GetSequenceElementType)(_In_ const OrtSequenceTypeInfo* sequence_type_info, _Outptr_ OrtTypeInfo** type_info)NO_EXCEPTION; + + ORT_CLASS_RELEASE(MapTypeInfo); + ORT_CLASS_RELEASE(SequenceTypeInfo); }; /* diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index eb9e9394ddd72..a97a5d413f904 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -354,4 +354,4 @@ struct CustomOpBase : OrtCustomOp { } // namespace Ort -#include "onnxruntime_cxx_inline.h" +#include "onnxruntime_cxx_inline.h" \ No newline at end of file diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 99c1656c0a7bc..f6fb350171f01 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -553,4 +553,4 @@ inline OrtValue* CustomOpApi::KernelContext_GetOutput(OrtKernelContext* context, return out; } -} // namespace Ort +} // namespace Ort \ No newline at end of file diff --git a/onnxruntime/core/framework/allocatormgr.cc b/onnxruntime/core/framework/allocatormgr.cc index 6f2ccd3f2b034..3dec82e3a3d2c 100644 --- a/onnxruntime/core/framework/allocatormgr.cc +++ b/onnxruntime/core/framework/allocatormgr.cc @@ -29,9 +29,4 @@ AllocatorPtr CreateAllocator(DeviceAllocatorRegistrationInfo info, OrtDevice::De return AllocatorPtr(std::move(device_allocator)); } -DeviceAllocatorRegistry& DeviceAllocatorRegistry::Instance() { - static DeviceAllocatorRegistry s_instance; - return s_instance; -} - } // namespace onnxruntime diff --git a/onnxruntime/core/framework/allocatormgr.h b/onnxruntime/core/framework/allocatormgr.h index e6824dba8b79f..0ccc30b695cad 100644 --- a/onnxruntime/core/framework/allocatormgr.h +++ b/onnxruntime/core/framework/allocatormgr.h @@ -18,25 +18,4 @@ struct DeviceAllocatorRegistrationInfo { AllocatorPtr CreateAllocator(DeviceAllocatorRegistrationInfo info, OrtDevice::DeviceId device_id = 0); -class DeviceAllocatorRegistry { - public: - void RegisterDeviceAllocator(std::string&& name, DeviceAllocatorFactory factory, size_t max_mem, - OrtMemType mem_type = OrtMemTypeDefault) { - DeviceAllocatorRegistrationInfo info({mem_type, factory, max_mem}); - device_allocator_registrations_.emplace(std::move(name), std::move(info)); - } - - const std::map& AllRegistrations() const { - return device_allocator_registrations_; - } - - static DeviceAllocatorRegistry& Instance(); - - private: - DeviceAllocatorRegistry() = default; - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(DeviceAllocatorRegistry); - - std::map device_allocator_registrations_; -}; - } // namespace onnxruntime diff --git a/onnxruntime/core/framework/onnxruntime_map_type_info.cc b/onnxruntime/core/framework/onnxruntime_map_type_info.cc new file mode 100644 index 0000000000000..107cdbbed10c2 --- /dev/null +++ b/onnxruntime/core/framework/onnxruntime_map_type_info.cc @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "core/framework/onnxruntime_map_type_info.h" +#include "core/framework/onnxruntime_typeinfo.h" +#include "core/graph/onnx_protobuf.h" +#include "core/session/ort_apis.h" +#include "core/framework/error_code_helper.h" + +OrtMapTypeInfo::OrtMapTypeInfo(ONNXTensorElementDataType map_key_type, OrtTypeInfo* map_value_type) noexcept : map_key_type_(map_key_type), map_value_type_(map_value_type, &OrtApis::ReleaseTypeInfo) { +} + +static ONNXTensorElementDataType +ToONNXTensorElementDataType(ONNX_NAMESPACE::TensorProto_DataType data_type) { + using TensorType = ONNX_NAMESPACE::TensorProto_DataType; + switch (data_type) { + case TensorType::TensorProto_DataType_BOOL: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL; } + case TensorType::TensorProto_DataType_STRING: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING; } // maps to c++ type std::string + case TensorType::TensorProto_DataType_FLOAT16: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } // maps to c type float + case TensorType::TensorProto_DataType_FLOAT: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; } + case TensorType::TensorProto_DataType_DOUBLE: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; } // maps to c type double + case TensorType::TensorProto_DataType_INT8: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; } // maps to c type int8_t + case TensorType::TensorProto_DataType_INT16: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16; } // maps to c type int16_t + case TensorType::TensorProto_DataType_INT32: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; } // maps to c type int32_t + case TensorType::TensorProto_DataType_INT64: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; } // maps to c type int64_t + case TensorType::TensorProto_DataType_UINT8: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; } // maps to c type uint8_t + case TensorType::TensorProto_DataType_UINT16: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16; } // maps to c type uint16_t + case TensorType::TensorProto_DataType_UINT32: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32; } // maps to c type uint32_t + case TensorType::TensorProto_DataType_UINT64: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64; } // maps to c type uint64_t + case TensorType::TensorProto_DataType_COMPLEX64: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64; } // complex with float32 real and imaginary components + case TensorType::TensorProto_DataType_COMPLEX128: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128; } // complex with float64 real and imaginary components + case TensorType::TensorProto_DataType_BFLOAT16: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16; } // Non-IEEE floating-point format based on IEEE754 single-precision + default: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; } + } +} + +OrtStatus* OrtMapTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* type_proto, OrtMapTypeInfo** out) { + auto value_case = type_proto->value_case(); + if (value_case != ONNX_NAMESPACE::TypeProto::kMapType) + { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "type_proto is not of type map!");; + } + + // Get the key type of the map + auto type_proto_map = type_proto->map_type(); + auto map_key_type = ToONNXTensorElementDataType(ONNX_NAMESPACE::TensorProto_DataType(type_proto_map.key_type())); + + // Get the value type of the map + OrtTypeInfo* map_value_type_info = nullptr; + if (auto status = OrtTypeInfo::FromTypeProto(&type_proto_map.value_type(), &map_value_type_info)) + { + return status; + } + + *out = new OrtMapTypeInfo(map_key_type, map_value_type_info); + return nullptr; +} + +OrtStatus* OrtMapTypeInfo::Clone(OrtMapTypeInfo** out) { + OrtTypeInfo* map_value_type_copy = nullptr; + if (auto status = map_value_type_->Clone(&map_value_type_copy)) + { + return status; + } + *out = new OrtMapTypeInfo(map_key_type_, map_value_type_copy); + return nullptr; +} + +// OrtMapTypeInfo Accessors +ORT_API_STATUS_IMPL(OrtApis::GetMapKeyType, const OrtMapTypeInfo* map_type_info, enum ONNXTensorElementDataType* out) { + API_IMPL_BEGIN + *out = map_type_info->map_key_type_; + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(OrtApis::GetMapValueType, const OrtMapTypeInfo* map_type_info, OrtTypeInfo** out) { + API_IMPL_BEGIN + return map_type_info->map_value_type_->Clone(out); + API_IMPL_END +} + +ORT_API(void, OrtApis::ReleaseMapTypeInfo, OrtMapTypeInfo* ptr) { + delete ptr; +} \ No newline at end of file diff --git a/onnxruntime/core/framework/onnxruntime_map_type_info.h b/onnxruntime/core/framework/onnxruntime_map_type_info.h new file mode 100644 index 0000000000000..46477d8f04fa7 --- /dev/null +++ b/onnxruntime/core/framework/onnxruntime_map_type_info.h @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include "onnxruntime_c_api.h" + +#include + +namespace ONNX_NAMESPACE { +class TypeProto; +} + +struct OrtMapTypeInfo { + public: + ONNXTensorElementDataType map_key_type_ = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING; + std::unique_ptr map_value_type_; + + static OrtStatus* FromTypeProto(const ONNX_NAMESPACE::TypeProto*, OrtMapTypeInfo** out); + + OrtStatus* Clone(OrtMapTypeInfo** out); + + private: + OrtMapTypeInfo(ONNXTensorElementDataType map_key_type, OrtTypeInfo* map_value_type)noexcept; + OrtMapTypeInfo(const OrtMapTypeInfo& other) = delete; + OrtMapTypeInfo& operator=(const OrtMapTypeInfo& other) = delete; + +}; diff --git a/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc b/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc new file mode 100644 index 0000000000000..a5ee0c9a63bb1 --- /dev/null +++ b/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "core/framework/onnxruntime_sequence_type_info.h" +#include "core/framework/onnxruntime_typeinfo.h" +#include "core/graph/onnx_protobuf.h" +#include "core/session/ort_apis.h" +#include "core/framework/error_code_helper.h" + +OrtSequenceTypeInfo::OrtSequenceTypeInfo(OrtTypeInfo* sequence_key_type) noexcept : + sequence_key_type_(sequence_key_type, &OrtApis::ReleaseTypeInfo) { +} + +OrtStatus* OrtSequenceTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* type_proto, OrtSequenceTypeInfo** out) { + auto value_case = type_proto->value_case(); + if (value_case != ONNX_NAMESPACE::TypeProto::kSequenceType) + { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "type_proto is not of type sequence!");; + } + + auto type_proto_sequence = type_proto->sequence_type(); + OrtTypeInfo* sequence_key_type_info = nullptr; + if (auto status = OrtTypeInfo::FromTypeProto(&type_proto_sequence.elem_type(), &sequence_key_type_info)) + { + return status; + } + + *out = new OrtSequenceTypeInfo(sequence_key_type_info); + return nullptr; +} + +OrtStatus* OrtSequenceTypeInfo::Clone(OrtSequenceTypeInfo** out) { + OrtTypeInfo* sequence_key_type_copy = nullptr; + if (auto status = sequence_key_type_->Clone(&sequence_key_type_copy)) + { + return status; + } + *out = new OrtSequenceTypeInfo(sequence_key_type_copy); + return nullptr; +} + +ORT_API_STATUS_IMPL(OrtApis::GetSequenceElementType, const OrtSequenceTypeInfo* sequence_type_info, OrtTypeInfo** out) { + API_IMPL_BEGIN + return sequence_type_info->sequence_key_type_->Clone(out); + API_IMPL_END +} + +ORT_API(void, OrtApis::ReleaseSequenceTypeInfo, OrtSequenceTypeInfo* ptr) { + delete ptr; +} \ No newline at end of file diff --git a/onnxruntime/core/framework/onnxruntime_sequence_type_info.h b/onnxruntime/core/framework/onnxruntime_sequence_type_info.h new file mode 100644 index 0000000000000..abb3503778b71 --- /dev/null +++ b/onnxruntime/core/framework/onnxruntime_sequence_type_info.h @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include "onnxruntime_c_api.h" + +#include + +namespace ONNX_NAMESPACE { +class TypeProto; +} + +struct OrtSequenceTypeInfo { + public: + std::unique_ptr sequence_key_type_; + + OrtStatus* Clone(OrtSequenceTypeInfo** out); + + static OrtStatus* FromTypeProto(const ONNX_NAMESPACE::TypeProto*, OrtSequenceTypeInfo** out); + + private: + OrtSequenceTypeInfo(OrtTypeInfo* sequence_key_type) noexcept; + OrtSequenceTypeInfo(const OrtSequenceTypeInfo& other) = delete; + OrtSequenceTypeInfo& operator=(const OrtSequenceTypeInfo& other) = delete; +}; diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.cc b/onnxruntime/core/framework/onnxruntime_typeinfo.cc index 080a3518048cb..42e03e802caf1 100644 --- a/onnxruntime/core/framework/onnxruntime_typeinfo.cc +++ b/onnxruntime/core/framework/onnxruntime_typeinfo.cc @@ -10,6 +10,11 @@ #include "core/framework/sparse_tensor.h" #include "core/graph/onnx_protobuf.h" #include "core/session/ort_apis.h" +#include "core/framework/error_code_helper.h" + +#include "core/framework/tensor_type_and_shape.h" +#include "core/framework/onnxruntime_map_type_info.h" +#include "core/framework/onnxruntime_sequence_type_info.h" using onnxruntime::BFloat16; using onnxruntime::DataTypeImpl; @@ -20,11 +25,27 @@ using onnxruntime::TensorShape; namespace on = ONNX_NAMESPACE; +OrtTypeInfo::OrtTypeInfo(ONNXType type1) noexcept : type(type1) { +} + OrtTypeInfo::OrtTypeInfo(ONNXType type1, OrtTensorTypeAndShapeInfo* data1) noexcept : type(type1), data(data1) { } +OrtTypeInfo::OrtTypeInfo(ONNXType type1, OrtMapTypeInfo* map_type_info1) noexcept : type(type1), map_type_info(map_type_info1) { +} + +OrtTypeInfo::OrtTypeInfo(ONNXType type1, OrtSequenceTypeInfo* sequence_type_info1) noexcept : type(type1), sequence_type_info(sequence_type_info1) { +} + OrtTypeInfo::~OrtTypeInfo() { OrtApis::ReleaseTensorTypeAndShapeInfo(data); + + if (map_type_info) { + OrtApis::ReleaseMapTypeInfo(map_type_info); + } + if (sequence_type_info) { + OrtApis::ReleaseSequenceTypeInfo(sequence_type_info); + } } ORT_API_STATUS_IMPL(OrtApis::GetOnnxTypeFromTypeInfo, _In_ const struct OrtTypeInfo* input, ONNXType* out) { @@ -37,6 +58,28 @@ ORT_API_STATUS_IMPL(OrtApis::CastTypeInfoToTensorInfo, _In_ const struct OrtType return nullptr; } +ORT_API_STATUS_IMPL(OrtApis::CastTypeInfoToMapTypeInfo, const OrtTypeInfo* type_info, const OrtMapTypeInfo** out) { + API_IMPL_BEGIN + *out = type_info->type == ONNX_TYPE_MAP ? type_info->map_type_info : nullptr; + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(OrtApis::CastTypeInfoToSequenceTypeInfo, const OrtTypeInfo* type_info, const OrtSequenceTypeInfo** out) { + API_IMPL_BEGIN + *out = type_info->type == ONNX_TYPE_SEQUENCE ? type_info->sequence_type_info : nullptr; + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(OrtApis::GetDenotationFromTypeInfo, const OrtTypeInfo* type_info, const char** const out, size_t* len) { + API_IMPL_BEGIN + *out = type_info->denotation.c_str(); + *len = type_info->denotation.size(); + return nullptr; + API_IMPL_END +} + ORT_API(void, OrtApis::ReleaseTypeInfo, _Frees_ptr_opt_ OrtTypeInfo* ptr) { delete ptr; } @@ -49,7 +92,7 @@ OrtStatus* GetTensorShapeAndType(const TensorShape& shape, const std::vectorIsTensorSequenceType()) { - *out = new OrtTypeInfo(ONNX_TYPE_SEQUENCE, nullptr); + *out = new OrtTypeInfo(ONNX_TYPE_SEQUENCE); return nullptr; } @@ -92,16 +135,14 @@ OrtStatus* OrtTypeInfo::FromOrtValue(const OrtValue& value, OrtTypeInfo** out) { // Place Opaque first as tensors will be mostly handled above and maps and sequences are not common switch (type_proto->value_case()) { case on::TypeProto::kOpaqueType: { - *out = new OrtTypeInfo(ONNX_TYPE_OPAQUE, nullptr); + *out = new OrtTypeInfo(ONNX_TYPE_OPAQUE); return nullptr; } case on::TypeProto::kMapType: { - *out = new OrtTypeInfo(ONNX_TYPE_MAP, nullptr); - return nullptr; + return OrtTypeInfo::FromTypeProto(type_proto, out); } case on::TypeProto::kSequenceType: { - *out = new OrtTypeInfo(ONNX_TYPE_SEQUENCE, nullptr); - return nullptr; + return OrtTypeInfo::FromTypeProto(type_proto, out); } // Real Tensor support case on::TypeProto::kTensorType: @@ -204,19 +245,39 @@ OrtStatus* OrtTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* input, Or st = GetTensorShapeAndType(TensorShape(), nullptr, *input, &info); } if (st != nullptr) return st; - *out = new OrtTypeInfo(ten_type, info); + auto type_info = new OrtTypeInfo(ten_type, info); + type_info->denotation = input->denotation(); + *out = type_info; return nullptr; } break; case on::TypeProto::kSequenceType: { - *out = new OrtTypeInfo(ONNX_TYPE_SEQUENCE, nullptr); + OrtSequenceTypeInfo* sequence_type_info = nullptr; + + if (auto status = OrtSequenceTypeInfo::FromTypeProto(input, &sequence_type_info)) { + return status; + } + + auto type_info = new OrtTypeInfo(ONNX_TYPE_SEQUENCE, sequence_type_info); + type_info->denotation = input->denotation(); + *out = type_info; return nullptr; } break; case on::TypeProto::kMapType: { - *out = new OrtTypeInfo(ONNX_TYPE_MAP, nullptr); + OrtMapTypeInfo* map_type_info = nullptr; + + if (auto status = OrtMapTypeInfo::FromTypeProto(input, &map_type_info)) { + return status; + } + + auto type_info = new OrtTypeInfo(ONNX_TYPE_MAP, map_type_info); + type_info->denotation = input->denotation(); + *out = type_info; return nullptr; } break; case on::TypeProto::kOpaqueType: { - *out = new OrtTypeInfo(ONNX_TYPE_OPAQUE, nullptr); + auto type_info = new OrtTypeInfo(ONNX_TYPE_OPAQUE); + type_info->denotation = input->denotation(); + *out = type_info; return nullptr; } break; case on::TypeProto::VALUE_NOT_SET: @@ -227,3 +288,48 @@ OrtStatus* OrtTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* input, Or } return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "not implemented"); } + +OrtStatus* OrtTypeInfo::Clone(OrtTypeInfo** out) { + switch (type) { + case ONNX_TYPE_TENSOR: + case ONNX_TYPE_SPARSETENSOR: + { + OrtTensorTypeAndShapeInfo* clone; + if (auto status = data->Clone(&clone)) { + return status; + } + *out = new OrtTypeInfo(type, clone); + (*out)->denotation = denotation; + return nullptr; + } + case ONNX_TYPE_SEQUENCE: + { + OrtSequenceTypeInfo* clone; + if (auto status = sequence_type_info->Clone(&clone)) { + return status; + } + *out = new OrtTypeInfo(type, clone); + (*out)->denotation = denotation; + return nullptr; + } + case ONNX_TYPE_MAP: { + OrtMapTypeInfo* clone; + if (auto status = map_type_info->Clone(&clone)) { + return status; + } + *out = new OrtTypeInfo(type, clone); + (*out)->denotation = denotation; + return nullptr; + } + case ONNX_TYPE_OPAQUE: + { + *out = new OrtTypeInfo(type); + (*out)->denotation = denotation; + return nullptr; + } + default: + // Not implemented + break; + } + return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "not implemented"); +} \ No newline at end of file diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.h b/onnxruntime/core/framework/onnxruntime_typeinfo.h index d615840dcb501..3c256aa73d17d 100644 --- a/onnxruntime/core/framework/onnxruntime_typeinfo.h +++ b/onnxruntime/core/framework/onnxruntime_typeinfo.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include "core/session/onnxruntime_c_api.h" namespace onnxruntime { @@ -14,6 +15,10 @@ namespace ONNX_NAMESPACE { class TypeProto; } +// These types are only present in the winml adapter c api, so they are forward declared. +struct OrtMapTypeInfo; +struct OrtSequenceTypeInfo; + /** * the equivalent of ONNX_NAMESPACE::TypeProto * This class is mainly for the C API @@ -21,19 +26,26 @@ class TypeProto; struct OrtTypeInfo { public: ONNXType type = ONNX_TYPE_UNKNOWN; + std::string denotation; ~OrtTypeInfo(); //owned by this OrtTensorTypeAndShapeInfo* data = nullptr; + OrtMapTypeInfo* map_type_info = nullptr; + OrtSequenceTypeInfo* sequence_type_info = nullptr; OrtTypeInfo(const OrtTypeInfo& other) = delete; OrtTypeInfo& operator=(const OrtTypeInfo& other) = delete; + OrtStatus* Clone(OrtTypeInfo** out); + static OrtStatus* FromOrtValue(const OrtValue& value, OrtTypeInfo** out); static OrtStatus* FromTypeProto(const ONNX_NAMESPACE::TypeProto*, OrtTypeInfo** out); - static const onnxruntime::DataTypeImpl* ElementTypeFromProto(int type); private: + OrtTypeInfo(ONNXType type) noexcept; OrtTypeInfo(ONNXType type, OrtTensorTypeAndShapeInfo* data) noexcept; + OrtTypeInfo(ONNXType type, OrtMapTypeInfo* map_type_info) noexcept; + OrtTypeInfo(ONNXType type, OrtSequenceTypeInfo* sequence_type_info) noexcept; }; diff --git a/onnxruntime/core/framework/path_lib.cc b/onnxruntime/core/framework/path_lib.cc index f2e526424c808..f34deb9025c7a 100644 --- a/onnxruntime/core/framework/path_lib.cc +++ b/onnxruntime/core/framework/path_lib.cc @@ -7,8 +7,11 @@ #include #ifdef _WIN32 +#if defined(USE_PATHCCH_LIB) +#include +#pragma comment(lib, "PathCch.lib") // Desktop apps need to support back to Windows 7, so we can't use PathCch.lib as it was added in Windows 8 -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) #include #pragma comment(lib, "Shlwapi.lib") #else @@ -24,7 +27,7 @@ namespace onnxruntime { namespace { Status RemoveFileSpec(PWSTR pszPath, size_t cchPath) { assert(pszPath != nullptr && pszPath[0] != L'\0'); -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && !defined(USE_PATHCCH_LIB) (void)cchPath; for (PWSTR t = L"\0"; *t == L'\0'; t = PathRemoveBackslashW(pszPath)) ; diff --git a/onnxruntime/core/framework/tensor_type_and_shape.cc b/onnxruntime/core/framework/tensor_type_and_shape.cc index 088043a159962..64bb11dbcbcfa 100644 --- a/onnxruntime/core/framework/tensor_type_and_shape.cc +++ b/onnxruntime/core/framework/tensor_type_and_shape.cc @@ -192,6 +192,11 @@ OrtStatus* GetTensorShapeAndType(const onnxruntime::TensorShape& shape, const st return GetTensorShapeAndTypeHelper(type, shape, dim_params, out); } +OrtStatus* OrtTensorTypeAndShapeInfo::Clone(OrtTensorTypeAndShapeInfo** out) +{ + return GetTensorShapeAndTypeHelper(type, shape, &dim_params, out); +} + ORT_API_STATUS_IMPL(OrtApis::GetTensorTypeAndShape, _In_ const OrtValue* v, _Out_ OrtTensorTypeAndShapeInfo** out) { API_IMPL_BEGIN onnxruntime::MLDataType type = v->Type(); diff --git a/onnxruntime/core/framework/tensor_type_and_shape.h b/onnxruntime/core/framework/tensor_type_and_shape.h index 28431a9d614cf..f781160cc6505 100644 --- a/onnxruntime/core/framework/tensor_type_and_shape.h +++ b/onnxruntime/core/framework/tensor_type_and_shape.h @@ -13,4 +13,6 @@ struct OrtTensorTypeAndShapeInfo { OrtTensorTypeAndShapeInfo() = default; OrtTensorTypeAndShapeInfo(const OrtTensorTypeAndShapeInfo& other) = delete; OrtTensorTypeAndShapeInfo& operator=(const OrtTensorTypeAndShapeInfo& other) = delete; + + OrtStatus* Clone(OrtTensorTypeAndShapeInfo** out); }; diff --git a/onnxruntime/core/graph/function.cc b/onnxruntime/core/graph/function.cc index 44bcc577f3fd0..d5edeb7cb4b2f 100644 --- a/onnxruntime/core/graph/function.cc +++ b/onnxruntime/core/graph/function.cc @@ -378,7 +378,7 @@ FunctionImpl::FunctionImpl(const onnxruntime::Graph& graph, auto status = function_body_graph.Resolve(); ORT_ENFORCE(status.IsOK(), "Resolve subgraph failed:", status.ErrorMessage()); -} +} // namespace onnxruntime FunctionImpl::~FunctionImpl() = default; diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc index d8066bafba1aa..3cc756a12a71f 100644 --- a/onnxruntime/core/graph/model.cc +++ b/onnxruntime/core/graph/model.cc @@ -95,6 +95,10 @@ Model::Model(std::unique_ptr model_proto, const IOnnxRuntimeOpSchema " specifies which version of the ONNX OperatorSet is being imported."); } + if (!model_proto->has_ir_version() || model_proto->ir_version() > ONNX_NAMESPACE::Version::IR_VERSION) { + throw std::invalid_argument("Unknown model file format version."); + } + model_proto_ = std::move(model_proto); for (auto& prop : model_proto_->metadata_props()) { model_metadata_[prop.key()] = prop.value(); diff --git a/onnxruntime/core/graph/schema_registry.cc b/onnxruntime/core/graph/schema_registry.cc index f0d4005c1503d..8127af7877c23 100644 --- a/onnxruntime/core/graph/schema_registry.cc +++ b/onnxruntime/core/graph/schema_registry.cc @@ -194,6 +194,17 @@ DomainToVersionMap SchemaRegistryManager::GetLatestOpsetVersions(bool is_onnx_on return domain_version_map; } +static bool IsDomainVersionBeyondSupportedRange( + const std::string& domain, + const int op_set_version) { + // check the ONNX schema registry + auto& onnx_domain_version_map = + ONNX_NAMESPACE::OpSchemaRegistry::DomainToVersionRange::Instance().Map(); + + auto it = onnx_domain_version_map.find(domain); + return it != onnx_domain_version_map.end() && op_set_version > it->second.second; +} + // Return the schema with biggest version, which is not greater than specified // in specified domain. The value of earliest_opset_where_unchanged // is also set to the earliest version preceding op_set_version where the operator @@ -238,10 +249,14 @@ void SchemaRegistryManager::GetSchemaAndHistory( checked_registry_indices.push_back(index); } - // if not found in registered custom schema registry, search in ONNX schema registry - *latest_schema = ONNX_NAMESPACE::OpSchemaRegistry::Schema(key, version, domain); - if (*latest_schema != nullptr) { - *earliest_opset_where_unchanged = (*latest_schema)->SinceVersion(); + // Reject versions greater than what is actually supported. + *latest_schema = nullptr; + if (!IsDomainVersionBeyondSupportedRange(domain, version)) { + // if not found in registered custom schema registry, search in ONNX schema registry + *latest_schema = ONNX_NAMESPACE::OpSchemaRegistry::Schema(key, version, domain); + if (*latest_schema != nullptr) { + *earliest_opset_where_unchanged = (*latest_schema)->SinceVersion(); + } } } diff --git a/onnxruntime/core/platform/telemetry.cc b/onnxruntime/core/platform/telemetry.cc index 7c587a1b5d469..e6092693c6661 100644 --- a/onnxruntime/core/platform/telemetry.cc +++ b/onnxruntime/core/platform/telemetry.cc @@ -22,12 +22,22 @@ void Telemetry::DisableTelemetryEvents() const { void Telemetry::LogProcessInfo() const { } +void Telemetry::LogSessionCreationStart() const { +} + +void Telemetry::LogEvaluationStop() const { +} + +void Telemetry::LogEvaluationStart() const { +} + void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, const std::string& model_producer_name, const std::string& model_producer_version, const std::string& model_domain, const std::unordered_map& domain_to_version_map, const std::string& model_graph_name, const std::unordered_map& model_metadata, - const std::string& loadedFrom, const std::vector& execution_provider_ids) const { + const std::string& loadedFrom, const std::vector& execution_provider_ids, + bool use_fp16) const { ORT_UNUSED_PARAMETER(session_id); ORT_UNUSED_PARAMETER(ir_version); ORT_UNUSED_PARAMETER(model_producer_name); @@ -38,6 +48,7 @@ void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, cons ORT_UNUSED_PARAMETER(model_metadata); ORT_UNUSED_PARAMETER(loadedFrom); ORT_UNUSED_PARAMETER(execution_provider_ids); + ORT_UNUSED_PARAMETER(use_fp16); } void Telemetry::LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file, @@ -55,5 +66,9 @@ void Telemetry::LogRuntimePerf(uint32_t session_id, uint32_t total_runs_since_la ORT_UNUSED_PARAMETER(total_run_duration_since_last); } +void Telemetry::LogExecutionProviderEvent(LUID* adapterLuid) const { + ORT_UNUSED_PARAMETER(adapterLuid); +} + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/telemetry.h b/onnxruntime/core/platform/telemetry.h index a0fc42e045c49..a669c95eebd4a 100644 --- a/onnxruntime/core/platform/telemetry.h +++ b/onnxruntime/core/platform/telemetry.h @@ -10,6 +10,9 @@ #include "core/common/status.h" #include "core/common/common.h" +struct _LUID; +using LUID = _LUID; + namespace onnxruntime { /** @@ -36,18 +39,27 @@ class Telemetry { virtual void LogProcessInfo() const; + virtual void LogSessionCreationStart() const; + + virtual void LogEvaluationStop() const; + + virtual void LogEvaluationStart() const; + virtual void LogSessionCreation(uint32_t session_id, int64_t ir_version, const std::string& model_producer_name, const std::string& model_producer_version, const std::string& model_domain, const std::unordered_map& domain_to_version_map, const std::string& model_graph_name, const std::unordered_map& model_metadata, - const std::string& loadedFrom, const std::vector& execution_provider_ids) const; + const std::string& loadedFrom, const std::vector& execution_provider_ids, + bool use_fp16) const; virtual void LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file, const char* function, uint32_t line) const; virtual void LogRuntimePerf(uint32_t session_id, uint32_t total_runs_since_last, int64_t total_run_duration_since_last) const; + virtual void LogExecutionProviderEvent(LUID* adapterLuid) const; + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Telemetry); }; diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc index 46461f3d052ae..8c0beb13978d1 100644 --- a/onnxruntime/core/platform/windows/debug_alloc.cc +++ b/onnxruntime/core/platform/windows/debug_alloc.cc @@ -235,12 +235,8 @@ Memory_LeakCheck::~Memory_LeakCheck() { _snprintf_s(buffer, _TRUNCATE, "%d bytes of memory leaked in %d allocations", leaked_bytes, leak_count); string.append(buffer); - // If we're being actively debugged, show a message box to get the dev's attention - if (IsDebuggerPresent()) - MessageBoxA(nullptr, string.c_str(), "Warning", MB_OK | MB_ICONWARNING); - else { - // If we're on the command line (like on a build machine), output to the console and exit(-1) - std::cout << "\n----- MEMORY LEAKS: " << string.c_str() << "\n"; + std::cout << "\n----- MEMORY LEAKS: " << string.c_str() << "\n"; + if (!IsDebuggerPresent()) { exit(-1); } diff --git a/onnxruntime/core/platform/windows/telemetry.cc b/onnxruntime/core/platform/windows/telemetry.cc index c7a8bda42d1c2..23092bbf21880 100644 --- a/onnxruntime/core/platform/windows/telemetry.cc +++ b/onnxruntime/core/platform/windows/telemetry.cc @@ -96,12 +96,37 @@ void WindowsTelemetry::LogProcessInfo() const { process_info_logged = true; } +void WindowsTelemetry::LogSessionCreationStart() const { + TraceLoggingWrite(telemetry_provider_handle, + "SessionCreationStart", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + +void WindowsTelemetry::LogEvaluationStop() const { + TraceLoggingWrite(telemetry_provider_handle, + "EvaluationStop", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + +void WindowsTelemetry::LogEvaluationStart() const { + TraceLoggingWrite(telemetry_provider_handle, + "EvaluationStart", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + void WindowsTelemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, const std::string& model_producer_name, const std::string& model_producer_version, const std::string& model_domain, const std::unordered_map& domain_to_version_map, const std::string& model_graph_name, const std::unordered_map& model_metadata, - const std::string& loadedFrom, const std::vector& execution_provider_ids) const { + const std::string& loaded_from, const std::vector& execution_provider_ids, + bool use_fp16) const { if (global_register_count_ == 0 || enabled_ == false) return; @@ -156,10 +181,11 @@ void WindowsTelemetry::LogSessionCreation(uint32_t session_id, int64_t ir_versio TraceLoggingString(model_producer_name.c_str(), "modelProducerName"), TraceLoggingString(model_producer_version.c_str(), "modelProducerVersion"), TraceLoggingString(model_domain.c_str(), "modelDomain"), + TraceLoggingBool(use_fp16, "usefp16"), TraceLoggingString(domain_to_verison_string.c_str(), "domainToVersionMap"), TraceLoggingString(model_graph_name.c_str(), "modelGraphName"), TraceLoggingString(model_metadata_string.c_str(), "modelMetaData"), - TraceLoggingString(loadedFrom.c_str(), "loadedFrom"), + TraceLoggingString(loaded_from.c_str(), "loadedFrom"), TraceLoggingString(execution_provider_string.c_str(), "executionProviderIds")); } @@ -170,6 +196,7 @@ void WindowsTelemetry::LogRuntimeError(uint32_t session_id, const common::Status TraceLoggingWrite(telemetry_provider_handle, "RuntimeError", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), // Telemetry info @@ -198,4 +225,17 @@ void WindowsTelemetry::LogRuntimePerf(uint32_t session_id, uint32_t total_runs_s TraceLoggingInt64(total_run_duration_since_last, "totalRunDuration")); } +void WindowsTelemetry::LogExecutionProviderEvent(LUID* adapterLuid) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "ExecutionProviderEvent", + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + // Telemetry info + TraceLoggingUInt32(adapterLuid->LowPart, "adapterLuidLowPart"), + TraceLoggingUInt32(adapterLuid->HighPart, "adapterLuidHighPart")); +} + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/windows/telemetry.h b/onnxruntime/core/platform/windows/telemetry.h index dd5da6205bcae..d34b26860308e 100644 --- a/onnxruntime/core/platform/windows/telemetry.h +++ b/onnxruntime/core/platform/windows/telemetry.h @@ -13,9 +13,7 @@ namespace onnxruntime { * derives and implments a Telemetry provider on Windows */ class WindowsTelemetry : public Telemetry { - public: - // these are allowed to be created, WindowsEnv will create one WindowsTelemetry(); ~WindowsTelemetry(); @@ -25,18 +23,27 @@ class WindowsTelemetry : public Telemetry { void LogProcessInfo() const override; + void LogSessionCreationStart() const override; + + void LogEvaluationStop() const override; + + void LogEvaluationStart() const override; + void LogSessionCreation(uint32_t session_id, int64_t ir_version, const std::string& model_producer_name, const std::string& model_producer_version, const std::string& model_domain, const std::unordered_map& domain_to_version_map, const std::string& model_graph_name, const std::unordered_map& model_metadata, - const std::string& loadedFrom, const std::vector& execution_provider_ids) const override; - + const std::string& loadedFrom, const std::vector& execution_provider_ids, + bool use_fp16) const override; + void LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file, const char* function, uint32_t line) const override; void LogRuntimePerf(uint32_t session_id, uint32_t total_runs_since_last, int64_t total_run_duration_since_last) const override; + void LogExecutionProviderEvent(LUID* adapterLuid) const override; + private: static OrtMutex mutex_; static uint32_t global_register_count_; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h index c34dfbc2d93d6..0415976b58263 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h @@ -19,7 +19,7 @@ namespace onnxruntime class Node; } -namespace winrt::Windows::AI::MachineLearning::implementation +namespace Windows::AI::MachineLearning::Adapter { interface __declspec(uuid("5b19a18a-5ed5-4df2-a363-21b89380a698")) IWinmlExecutionProvider : public IUnknown @@ -65,6 +65,8 @@ namespace winrt::Windows::AI::MachineLearning::implementation virtual void Close() = 0; }; + using MLOperatorTensorGetter = std::function(uint32_t index)>; + struct DmlOperatorParams { Microsoft::WRL::ComPtr op; @@ -86,8 +88,6 @@ namespace winrt::Windows::AI::MachineLearning::implementation bool allowHalfPrecisionComputation = false; }; - using MLOperatorTensorGetter = std::function(uint32_t index)>; - using GraphNodeFactory = std::function; } -namespace winrt::Windows::AI::MachineLearning::implementation +namespace Windows::AI::MachineLearning::Adapter { using namespace Microsoft::WRL; @@ -110,4 +110,4 @@ class AbiCustomRegistry : public WRL::Base(_status.Code()))); \ } \ } while (0) - -namespace Dml -{ - HRESULT MapLotusErrorToHRESULT(onnxruntime::common::Status status); -} diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp index 1c7c88c93aeff..84b298ca77dfc 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp @@ -29,7 +29,7 @@ #define ENABLE_GRAPH_COMPILATION -using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace Windows::AI::MachineLearning::Adapter; namespace Dml { @@ -129,6 +129,12 @@ namespace Dml } } +// ORT release pipelines agent pools do not have 19H1 SDK installed which defines D3D_FEATURE_LEVEL_1_0_CORE. +// Once ORT/WinML github project can be built with VS2019, we can update these pools to use install the 19H1 SDK +// using the command line installer tool with VS2019 +// Task 24384515: Update ORT AIInfra release agent pool to install 19H1 SDK on VM bootstrap +#define D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE ((D3D_FEATURE_LEVEL)0x1000) + ExecutionProviderImpl::ExecutionProviderImpl(IDMLDevice* dmlDevice, ID3D12Device* d3d12Device, ID3D12CommandQueue* queue, bool enableMetacommands) : m_d3d12Device(d3d12Device), m_dmlDevice(dmlDevice), @@ -138,7 +144,7 @@ namespace Dml D3D12_FEATURE_DATA_FEATURE_LEVELS featureLevels = {}; D3D_FEATURE_LEVEL featureLevelsList[] = { - D3D_FEATURE_LEVEL_1_0_CORE, + D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_12_0, @@ -153,7 +159,7 @@ namespace Dml sizeof(featureLevels) )); - m_isMcdmDevice = (featureLevels.MaxSupportedFeatureLevel == D3D_FEATURE_LEVEL_1_0_CORE); + m_isMcdmDevice = (featureLevels.MaxSupportedFeatureLevel == D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE); m_context = std::make_shared(m_d3d12Device.Get(), m_dmlDevice.Get(), queue); @@ -674,7 +680,7 @@ namespace Dml return m_areMetacommandsEnabled; } - std::shared_ptr + std::shared_ptr ExecutionProviderImpl::GetInternalRegistrationInfoMap() const { return m_internalRegInfoMap; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h index fd1709d8299b5..58f73f62bad3c 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h @@ -9,13 +9,11 @@ #include #include -namespace WRL -{ - template - using Base = Microsoft::WRL::RuntimeClass< - Microsoft::WRL::RuntimeClassFlags, - TInterfaces... - >; +namespace WRL { +template +using Base = Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + TInterfaces...>; } using namespace Microsoft::WRL; @@ -30,7 +28,7 @@ namespace Dml class ExecutionProvider; class ExecutionProviderImpl : public WRL::Base + Windows::AI::MachineLearning::Adapter::IWinmlExecutionProvider> { public: explicit ExecutionProviderImpl::ExecutionProviderImpl( @@ -158,7 +156,7 @@ namespace Dml std::shared_ptr GetCpuInputAllocator(); std::shared_ptr GetCpuOutputAllocator(); - std::shared_ptr + std::shared_ptr GetInternalRegistrationInfoMap() const; private: @@ -175,7 +173,7 @@ namespace Dml std::shared_ptr m_cpuInputAllocator; std::shared_ptr m_cpuOutputAllocator; std::shared_ptr m_kernelRegistry; - std::shared_ptr m_internalRegInfoMap; + std::shared_ptr m_internalRegInfoMap; mutable uint64_t m_partitionKernelPrefixVal = 0; bool m_closed = false; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/FusedGraphKernel.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/FusedGraphKernel.cpp index 0d5ab86b3582b..9391e191de86d 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/FusedGraphKernel.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/FusedGraphKernel.cpp @@ -6,7 +6,7 @@ #include "MLOperatorAuthorImpl.h" #include "FusedGraphKernel.h" -using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace Windows::AI::MachineLearning::Adapter; namespace Dml { @@ -170,7 +170,7 @@ namespace Dml } else { - std::tie(unpackedTensor, tensorByteSize) = winrt::Windows::AI::MachineLearning::implementation::UnpackTensor(initializer); + std::tie(unpackedTensor, tensorByteSize) = UnpackTensor(initializer); tensorPtr = unpackedTensor.get(); } @@ -726,7 +726,7 @@ namespace Dml ComPtr m_compiledExecutionPlanOperator; std::vector m_inputsUsed; const void* m_executionHandle = nullptr; - ComPtr m_winmlProvider; + ComPtr m_winmlProvider; ComPtr m_provider; EdgeShapes m_outputShapes; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp index 8b6b42c63a70b..622b7b96cb09c 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp @@ -4,7 +4,7 @@ #include "precomp.h" #include "GraphDescBuilder.h" -using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace Windows::AI::MachineLearning::Adapter; namespace Dml::GraphDescBuilder { diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h index 68fc7cc9f513d..02319daab0ab9 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h @@ -9,14 +9,14 @@ namespace Dml { struct GraphNodeProperties { - std::shared_ptr + std::shared_ptr internalRegInfo; // These are currently passed from the partitioning step since the only DML operators current // supporting graph nodes don't customize the order of edges or shapes, other than coercing // dimension count. This will change as the supported set of operators as graph nodes increases. - winrt::Windows::AI::MachineLearning::implementation::EdgeShapes inputShapes; - winrt::Windows::AI::MachineLearning::implementation::EdgeShapes outputShapes; + Windows::AI::MachineLearning::Adapter::EdgeShapes inputShapes; + Windows::AI::MachineLearning::Adapter::EdgeShapes outputShapes; }; namespace GraphDescBuilder diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp index 1efbd3fd6b44b..e6ffb31d0084c 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp @@ -16,7 +16,7 @@ //#define PRINT_PARTITON_INFO -using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace Windows::AI::MachineLearning::Adapter; namespace Dml { diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h index 48e787736b65a..2c9dc497e1364 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h @@ -43,7 +43,7 @@ namespace Dml std::vector> BuildPartitions( const onnxruntime::GraphViewer& graph, - const winrt::Windows::AI::MachineLearning::implementation::InternalRegistrationInfoMap& internalRegInfoMap, + const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfoMap& internalRegInfoMap, const std::vector& registries, uint32_t supportedDeviceDataTypeMask, // Each bit corresponds to each DML_TENSOR_DATA_TYPE. std::unordered_map& graphNodePropertyMap, @@ -53,7 +53,7 @@ namespace Dml std::vector> PartitionGraph( const onnxruntime::GraphViewer& graph, - const winrt::Windows::AI::MachineLearning::implementation::InternalRegistrationInfoMap& internalRegInfoMap, + const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfoMap& internalRegInfoMap, const std::vector& registries, uint32_t supportedDeviceDataTypeMask, // Each bit corresponds to each DML_TENSOR_DATA_TYPE. onnxruntime::KernelRegistry* registryForPartitionKernels, @@ -64,7 +64,7 @@ namespace Dml const onnxruntime::Node& node, const onnxruntime::KernelRegistry& registry, uint32_t supportedDeviceDataTypeMask, // Each bit corresponds to each DML_TENSOR_DATA_TYPE. - const winrt::Windows::AI::MachineLearning::implementation::InternalRegistrationInfoMap& internalRegInfoMap, + const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfoMap& internalRegInfoMap, bool allow64BitInputThroughStrides, _In_opt_ const std::unordered_map* nodeNameToPartitionMap // Only used when allow64BitInputThroughStrides is true ); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp index b959e0c930755..5df941e9ef887 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp @@ -13,7 +13,8 @@ using namespace Microsoft::WRL; -namespace winrt::Windows::AI::MachineLearning::implementation { +namespace Windows::AI::MachineLearning::Adapter +{ size_t AttributeValue::ElementCount() const { switch (type) { @@ -91,8 +92,8 @@ bool IsAllocationInterface(const ::OrtMemoryInfo& info) { // the ABI. The translation is determined by the provider and based on options with which the // kernels are registered. void TranslateAllocationDataToAbi( - winrt::Windows::AI::MachineLearning::implementation::IWinmlExecutionProvider* winmlProvider, - bool isInternalOperator, + IWinmlExecutionProvider* winmlProvider, + bool isInternalOperator, const ::OrtMemoryInfo& allocInfo, IUnknown* allocation, IUnknown** abiAllocation) { @@ -1669,17 +1670,20 @@ EdgeShapes AbiOpKernel::GetInputShapes(onnxruntime::OpKernelContext* context) co void AbiOpKernel::InferAndVerifyOutputSizes( gsl::span requiredConstantCpuInputs, - MLOperatorTensorGetter& constantInputGetter, - const EdgeShapes* inputShapes, - EdgeShapes& outputShapes) const { - winrt::Windows::AI::MachineLearning::implementation::InferAndVerifyOutputSizes( - Node(), - m_defaultAttributes, - m_shapeInferrer.Get(), - requiredConstantCpuInputs, - constantInputGetter, - inputShapes, - outputShapes); + MLOperatorTensorGetter& constantInputGetter, + const EdgeShapes* inputShapes, + EdgeShapes& outputShapes) const +{ + // call the non member function (below) + Windows::AI::MachineLearning::Adapter::InferAndVerifyOutputSizes( + Node(), + m_defaultAttributes, + m_shapeInferrer.Get(), + requiredConstantCpuInputs, + constantInputGetter, + inputShapes, + outputShapes + ); } void InferAndVerifyOutputSizes( diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h index d6fabb2f7287b..0168da24ef4ca 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h @@ -21,7 +21,7 @@ namespace WRL >; } -namespace winrt::Windows::AI::MachineLearning::implementation +namespace Windows::AI::MachineLearning::Adapter { using namespace Microsoft::WRL; @@ -380,7 +380,7 @@ class OpKernelInfoWrapper : public OpNodeInfoWrapper< bool m_allowOutputShapeQuery = false; bool m_internalOperator = false; - ComPtr m_winmlProvider; + ComPtr m_winmlProvider; const onnxruntime::OpKernelInfo* m_impl = nullptr; @@ -435,7 +435,7 @@ class DmlGraphOpKernelInfoWrapper : public OpNodeInfoWrapper< // For shape info, in addition to the info const EdgeShapes* m_inferredOutputShapes = nullptr; - ComPtr m_winmlProvider; + ComPtr m_winmlProvider; bool m_internalOperator = false; // The execution object returned through the ABI, which may vary according to kernel @@ -477,7 +477,7 @@ class OpKernelContextWrapper : public WRL::Base, publi std::vector> m_outputTensors; const onnxruntime::IExecutionProvider* m_provider = nullptr; - ComPtr m_winmlProvider; + ComPtr m_winmlProvider; bool m_internalOperator = false; // The execution object returned to the kernel may vary according to kernel execution options @@ -542,7 +542,7 @@ class AbiOpKernel : public onnxruntime::OpKernel mutable std::mutex m_mutex; mutable EdgeShapes m_inferredOutputShapes; - ComPtr m_winmlProvider; + ComPtr m_winmlProvider; bool m_internalOperator = false; std::vector m_requiredConstantCpuInputs; @@ -640,4 +640,4 @@ bool TryGetStaticOutputShapes(const onnxruntime::Node& node, EdgeShapes& outputS bool ContainsEmptyDimensions(const EdgeShapes& shapes); std::tuple, size_t> UnpackTensor(const onnx::TensorProto& initializer); -} // namespace winrt::Windows::AI::MachineLearning::implementation +} // namespace Windows::AI::MachineLearning::Adapter diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp index 7f27291ede562..b64ae7dc751ae 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp @@ -134,7 +134,7 @@ class DmlOperatorPoolingTemplate : public DmlOperatorPooling } }; -void QueryMaxPool(IMLOperatorSupportQueryContextPrivate* context, bool *isSupported) +void CALLBACK QueryMaxPool(IMLOperatorSupportQueryContextPrivate* context, bool *isSupported) { *isSupported = false; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSlice.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSlice.cpp index e167a89f0606e..0e9d0feb5a815 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSlice.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSlice.cpp @@ -71,7 +71,7 @@ class DmlOperatorSliceTemplate : public DmlOperatorSlice } }; -void QuerySlice(IMLOperatorSupportQueryContextPrivate* context, bool *isSupported) +void CALLBACK QuerySlice(IMLOperatorSupportQueryContextPrivate* context, bool *isSupported) { *isSupported = (context->GetInputCount() <= 4); } diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.h index 186608d78b586..45923d528dc05 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.h @@ -8,8 +8,8 @@ interface IMLOperatorKernel; class MLOperatorKernelCreationContext; // Forward declares an external creation function. -#define DML_OP_EXTERN_CREATION_FUNCTION(operatorName) extern void Create##operatorName(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel) -#define DML_OP_EXTERN_QUERY_FUNCTION(operatorName) extern void Query##operatorName(IMLOperatorSupportQueryContextPrivate* context, bool *isSupported); +#define DML_OP_EXTERN_CREATION_FUNCTION(operatorName) extern void CALLBACK Create##operatorName(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel) +#define DML_OP_EXTERN_QUERY_FUNCTION(operatorName) extern void CALLBACK Query##operatorName(IMLOperatorSupportQueryContextPrivate* context, bool* isSupported); // Declares a callback creation function of the given operator class. // This does not register it, just declares it for usage by registration later. @@ -20,7 +20,7 @@ class MLOperatorKernelCreationContext; // commas in them break the macro, and so they are stuffed into the VA_ARGS. // #define DML_OP_DEFINE_CREATION_FUNCTION(operatorName, ...)\ -extern void Create##operatorName(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel)\ +extern void CALLBACK Create##operatorName(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel)\ {\ using T = __VA_ARGS__; \ THROW_IF_FAILED(MLOperatorKernel::CreateInstance(*kernelInfo, /*out*/ opKernel));\ diff --git a/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.cc b/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.cc index 3d25aa1fe388e..9f63513e870e1 100644 --- a/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.cc +++ b/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.cc @@ -25,48 +25,17 @@ namespace GraphTransformerHelpers { - void RegisterGraphTransformers(onnxruntime::InferenceSession* lotusSession, bool registerLotusTransforms) + void RegisterGraphTransformers(onnxruntime::InferenceSession* lotusSession) { // Register Lotus graph transformers + // we were able to combine all of the winml/dml/ort work except for 2 transformers. + // these 2 are tracked by : + // Bug 22973884 : Fix issues with BatchNorm + Add and BatchNorm + Mul handling implicit inputs, and move from Winml to ORT // - // TODO: Work out issues controlling graph optimization passes through ORT's optimization level - // and rule list. In the meantime (and before new transformers are tested in Winml), passes - // are registered explicitly, and the optimization level is set to default above (no optimization). - // - // Issues: - // Why is UnsqueezeElimination not registered by name in ORT? - // Why are level 2 (default) transformers not run before partitioning, which the DML XP requires? - // Why are level2 transformers only enabled on the CPU provider in GenerateTransformers? - // Why does name filtering only apply to rule based graph transformers? - // Why is Matmul+Add not used when contrib ops are disabled? - - if (registerLotusTransforms) - { - lotusSession->RegisterGraphTransformer(std::move(std::make_unique()), onnxruntime::TransformerLevel::Level1); - } - std::unique_ptr rule_transformer = std::make_unique("WinmlRuleTransformer"); - - if (registerLotusTransforms) - { - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - rule_transformer->Register(std::make_unique()); - } - rule_transformer->Register(std::make_unique()); rule_transformer->Register(std::make_unique()); - lotusSession->RegisterGraphTransformer(std::move(rule_transformer), onnxruntime::TransformerLevel::Level1); - - if (registerLotusTransforms) - { - lotusSession->RegisterGraphTransformer(std::move(std::make_unique()), onnxruntime::TransformerLevel::Level1); - } } } \ No newline at end of file diff --git a/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.h b/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.h index 169597c0d1341..bd9b1148cf0b0 100644 --- a/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.h +++ b/onnxruntime/core/providers/dml/GraphTransformers/GraphTransformerHelpers.h @@ -5,5 +5,5 @@ namespace GraphTransformerHelpers { - void RegisterGraphTransformers(onnxruntime::InferenceSession* lotusSession, bool registerLotusTransforms); + void RegisterGraphTransformers(onnxruntime::InferenceSession* lotusSession); } \ No newline at end of file diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h index b15955e0e533d..7fee23b8d0004 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h @@ -734,7 +734,7 @@ class MLOperatorKernel : public Microsoft::WRL::RuntimeClass< using MLOperatorTypeInferenceFunction = void (CALLBACK*)(IMLOperatorTypeInferenceContext*); using MLOperatorShapeInferenceFunction = void (CALLBACK*)(IMLOperatorShapeInferenceContext*); -using MLOperatorKernelCreateFn = void(*)(IMLOperatorKernelCreationContext*, IMLOperatorKernel**); +using MLOperatorKernelCreateFn = void(CALLBACK*)(IMLOperatorKernelCreationContext*, IMLOperatorKernel**); using MLOperatorSupportQueryFunction = void (CALLBACK*)(IMLOperatorSupportQueryContextPrivate*, bool*); class MLOperatorShapeInferrer : public Microsoft::WRL::RuntimeClass< diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index 03c003d2b6d0d..aa8486117fa97 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -6,15 +6,14 @@ #include "Common.h" #include "Attributes.h" #include "MLOperatorAuthorHelper.h" +#include "core/common/common.h" -namespace OperatorHelper -{ +namespace OperatorHelper { bool ContainsEmptyDimensions(gsl::span dimensions); std::vector BroadcastTensorShape( gsl::span inputShape0, - gsl::span inputShape1 - ); + gsl::span inputShape1); // Find all the occurrences of a value, and return the array indices (in ascending order). // @@ -22,19 +21,16 @@ std::vector BroadcastTensorShape( // value = 1 // output indices = {1,3,4} #pragma optimize("", off) -template -void FindValueIndices(gsl::span values, T value, /*out*/ std::vector& indices) -{ - indices.clear(); - for (size_t i = 0, valuesCount = values.size(); i < valuesCount; ++i) - { - // Work around compiler bug on x86 release by using data() rather than operator [] directly. - // cl.exe 19.20.27412.4 for x86 - if (values.data()[i] == value) - { - indices.push_back(gsl::narrow_cast(i)); - } +template +void FindValueIndices(gsl::span values, T value, /*out*/ std::vector& indices) { + indices.clear(); + for (size_t i = 0, valuesCount = values.size(); i < valuesCount; ++i) { + // Work around compiler bug on x86 release by using data() rather than operator [] directly. + // cl.exe 19.20.27412.4 for x86 + if (values.data()[i] == value) { + indices.push_back(gsl::narrow_cast(i)); } + } } #pragma optimize("", on) @@ -51,248 +47,224 @@ void HandleNegativeAxes(gsl::span onnxAxes, uint32_t dimCount); // e.g. input values = {2,1,3,1,1,5} // ellidable input indices = {1,3,4} // output values = {2,3,5} -template -void RemoveValuesByIndex(gsl::span indices, bool keepOneValue, /*inout*/ std::vector& values) -{ - assert(std::is_sorted(indices.begin(), indices.end())); - - // Keep the last value at least, if all values would otherwise be removed. - if (keepOneValue && !indices.empty() && indices.size() == values.size()) +template +void RemoveValuesByIndex(gsl::span indices, bool keepOneValue, /*inout*/ std::vector& values) { + assert(std::is_sorted(indices.begin(), indices.end())); + + // Keep the last value at least, if all values would otherwise be removed. + if (keepOneValue && !indices.empty() && indices.size() == values.size()) { + indices = indices.first(indices.size() - 1); + } + + auto indicesIterator = indices.begin(); + auto indicesEnd = indices.end(); + size_t oldValuesCount = values.size(); + size_t newValuesCount = 0; + size_t nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); + + // For every value, either skip the entry, or copy it to the output. + for (size_t i = 0; i < oldValuesCount; ++i) { + if (i == nextIndex) // Skip and remove entry. { - indices = indices.first(indices.size() - 1); - } - - auto indicesIterator = indices.begin(); - auto indicesEnd = indices.end(); - size_t oldValuesCount = values.size(); - size_t newValuesCount = 0; - size_t nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); - - // For every value, either skip the entry, or copy it to the output. - for (size_t i = 0; i < oldValuesCount; ++i) + nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); + } else // Keep and copy entry. { - if (i == nextIndex) // Skip and remove entry. - { - nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); - } - else // Keep and copy entry. - { - values[newValuesCount++] = values[i]; - } - + values[newValuesCount++] = values[i]; } - values.resize(newValuesCount); + } + values.resize(newValuesCount); } int64_t ReadAsInt64(MLOperatorTensorDataType tensorDataType, const void* p); -class EdgeShapes -{ -public: - EdgeShapes() = default; - EdgeShapes(const std::vector& dim){ m_shapes = dim; } - EdgeShapes(const std::initializer_list& dim) { m_shapes.assign(dim.begin(), dim.end()); } - EdgeShapes(const gsl::span dim) { m_shapes.assign(dim.begin(), dim.end()); } - - bool IsTensor() { return true; } - bool IsUnused() { return m_shapes.empty(); } - - std::vector& GetShape() { return m_shapes; } -private: - std::vector m_shapes; -}; +class EdgeShapes { + public: + EdgeShapes() = default; + EdgeShapes(const std::vector& dim) { m_shapes = dim; } + EdgeShapes(const std::initializer_list& dim) { m_shapes.assign(dim.begin(), dim.end()); } + EdgeShapes(const gsl::span dim) { m_shapes.assign(dim.begin(), dim.end()); } -struct KernelArgs -{ - // Initialize arrays up to NcdhwSpatialDimensionCount to avoid vector allocations, - // but it's important to use .spatialDimensionCount when accessing them because - // values beyond that may be bogus. - uint32_t strides[NcdhwSpatialDimensionCount]; - uint32_t dilations[NcdhwSpatialDimensionCount]; - uint32_t windowSize[NcdhwSpatialDimensionCount]; - uint32_t startPadding[NcdhwSpatialDimensionCount]; - uint32_t endPadding[NcdhwSpatialDimensionCount]; - uint32_t outputPadding[NcdhwSpatialDimensionCount]; - - KernelArgs(uint32_t spatialDimensionCount) : - autoPad(false), - autoPadSameUpper(false), - spatialDimensionCount(spatialDimensionCount) - { - ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); - } + bool IsTensor() { return true; } + bool IsUnused() { return m_shapes.empty(); } - void FillWithLeadingValues(gsl::span input, gsl::span output, uint32_t fillCount, uint32_t value) - { - // e.g. - // input = [5,6,7,8] - // fillcount = 2 - // value = 1 - // output = [1,1,5,6,7,8] - - const size_t inputCount = input.size(); - const size_t outputCount = output.size(); - const size_t clampedFillCount = std::min(size_t(fillCount), outputCount); - const size_t copyCount = std::min(outputCount - fillCount, inputCount); - - std::fill_n(output.data(), fillCount, value); - std::copy_n(input.data(), copyCount, output.data() + fillCount); - } + std::vector& GetShape() { return m_shapes; } - // Create a copy of an existing kernel args with a minimum dimension count, - // filling the leading attribute values with 1's or 0's respectively. - KernelArgs(KernelArgs const& kernelArgs, uint32_t minimumDimensionCount) : - autoPad(kernelArgs.autoPad), - autoPadSameUpper(kernelArgs.autoPadSameUpper), - spatialDimensionCount(std::max(kernelArgs.spatialDimensionCount, minimumDimensionCount)) - { - ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); - - uint32_t fillCount = (minimumDimensionCount > kernelArgs.spatialDimensionCount) ? minimumDimensionCount - kernelArgs.spatialDimensionCount : 0; - FillWithLeadingValues(kernelArgs.strides, this->strides, fillCount, 1u); - FillWithLeadingValues(kernelArgs.dilations, this->dilations, fillCount, 1u); - FillWithLeadingValues(kernelArgs.windowSize, this->windowSize, fillCount, 1u); - FillWithLeadingValues(kernelArgs.startPadding, this->startPadding, fillCount, 0u); - FillWithLeadingValues(kernelArgs.endPadding, this->endPadding, fillCount, 0u); - FillWithLeadingValues(kernelArgs.outputPadding, this->outputPadding, fillCount, 0u); - } + private: + std::vector m_shapes; +}; - // This is true if padding must be automatically computed based on input sizes. - // ResolveAutoPadding must happen during Compute rather than initialization. - // This is temporary until kernel initialization routine once Lotus can provide - // sizes at operator initialization. - bool autoPad; - bool autoPadSameUpper; - uint32_t spatialDimensionCount; +struct KernelArgs { + // Initialize arrays up to NcdhwSpatialDimensionCount to avoid vector allocations, + // but it's important to use .spatialDimensionCount when accessing them because + // values beyond that may be bogus. + uint32_t strides[NcdhwSpatialDimensionCount]; + uint32_t dilations[NcdhwSpatialDimensionCount]; + uint32_t windowSize[NcdhwSpatialDimensionCount]; + uint32_t startPadding[NcdhwSpatialDimensionCount]; + uint32_t endPadding[NcdhwSpatialDimensionCount]; + uint32_t outputPadding[NcdhwSpatialDimensionCount]; + + KernelArgs(uint32_t spatialDimensionCount) : autoPad(false), + autoPadSameUpper(false), + spatialDimensionCount(spatialDimensionCount) { + ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); + } + + void FillWithLeadingValues(gsl::span input, gsl::span output, uint32_t fillCount, uint32_t value) { + // e.g. + // input = [5,6,7,8] + // fillcount = 2 + // value = 1 + // output = [1,1,5,6,7,8] + + const size_t inputCount = input.size(); + const size_t outputCount = output.size(); + const size_t clampedFillCount = std::min(size_t(fillCount), outputCount); + const size_t copyCount = std::min(outputCount - fillCount, inputCount); + + std::fill_n(output.data(), fillCount, value); + std::copy_n(input.data(), copyCount, output.data() + fillCount); + } + + // Create a copy of an existing kernel args with a minimum dimension count, + // filling the leading attribute values with 1's or 0's respectively. + KernelArgs(KernelArgs const& kernelArgs, uint32_t minimumDimensionCount) : autoPad(kernelArgs.autoPad), + autoPadSameUpper(kernelArgs.autoPadSameUpper), + spatialDimensionCount(std::max(kernelArgs.spatialDimensionCount, minimumDimensionCount)) { + ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); + + uint32_t fillCount = (minimumDimensionCount > kernelArgs.spatialDimensionCount) ? minimumDimensionCount - kernelArgs.spatialDimensionCount : 0; + FillWithLeadingValues(kernelArgs.strides, this->strides, fillCount, 1u); + FillWithLeadingValues(kernelArgs.dilations, this->dilations, fillCount, 1u); + FillWithLeadingValues(kernelArgs.windowSize, this->windowSize, fillCount, 1u); + FillWithLeadingValues(kernelArgs.startPadding, this->startPadding, fillCount, 0u); + FillWithLeadingValues(kernelArgs.endPadding, this->endPadding, fillCount, 0u); + FillWithLeadingValues(kernelArgs.outputPadding, this->outputPadding, fillCount, 0u); + } + + // This is true if padding must be automatically computed based on input sizes. + // ResolveAutoPadding must happen during Compute rather than initialization. + // This is temporary until kernel initialization routine once Lotus can provide + // sizes at operator initialization. + bool autoPad; + bool autoPadSameUpper; + uint32_t spatialDimensionCount; }; std::vector InitializeKernelOutputDimensions( gsl::span inputDimensions, - const KernelArgs& args -); + const KernelArgs& args); std::vector InitializeKernelOutputDimsTranspose( gsl::span inputDimensions, - const KernelArgs& args -); + const KernelArgs& args); KernelArgs InitializeGlobalKernel(gsl::span inputDimensions); KernelArgs InitializeKernel( const MLOperatorAttributes& kernelInfo, uint32_t inputDimensionCount, - gsl::span filterTensorShape -); + gsl::span filterTensorShape); void ResolveAutoPadding( KernelArgs& args, - gsl::span inputDimensions -); + gsl::span inputDimensions); + +class GetOutputShapeAsInputShapeHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + GetOutputShapeAsInputShapeHelper(const Info_t& info, const Shape_t& shape){ + ORT_UNUSED_PARAMETER(info); + ORT_UNUSED_PARAMETER(shape); + }; + + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; +}; -class GetOutputShapeAsInputShapeHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - GetOutputShapeAsInputShapeHelper(const Info_t& info, const Shape_t& shape) {}; +class GetBroadcastedOutputShapeHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + GetBroadcastedOutputShapeHelper(const Info_t& info, const Shape_t& shape){}; - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class GetBroadcastedOutputShapeHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - GetBroadcastedOutputShapeHelper(const Info_t& info, const Shape_t& shape) {}; +class RandomUniformHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + template + RandomUniformHelperBase(const Info_t& info) { + m_high = info.GetOptionalAttribute(AttrName::High, 1.0f); + m_low = info.GetOptionalAttribute(AttrName::Low, 0.0f); + + if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) { + m_seed = info.GetAttribute(AttrName::Seed); + } else { + m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + } + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + protected: + float m_high; + float m_low; + float m_seed; }; -class RandomUniformHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - template - RandomUniformHelperBase(const Info_t& info) - { - m_high = info.GetOptionalAttribute(AttrName::High, 1.0f); - m_low = info.GetOptionalAttribute(AttrName::Low, 0.0f); +class RandomUniformHelper : public RandomUniformHelperBase { + public: + template + RandomUniformHelper(const Info_t& info, const Shape_t& shape) : RandomUniformHelperBase(info) { + auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); + ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); + m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); + } - if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) - { - m_seed = info.GetAttribute(AttrName::Seed); - } - else - { - m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); - } - } -protected: - float m_high; - float m_low; - float m_seed; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + + private: + // Returns an empty vector if the optional attribute is missing. + std::vector m_tensorShape; }; -class RandomUniformHelper : public RandomUniformHelperBase -{ -public: - template - RandomUniformHelper(const Info_t& info, const Shape_t& shape) : RandomUniformHelperBase(info) - { - auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); - ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); - m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); +class RandomNormalHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + template + RandomNormalHelperBase(const Info_t& info) { + m_mean = info.GetOptionalAttribute(AttrName::Mean, 0.0f); + m_scale = info.GetOptionalAttribute(AttrName::Scale, 1.0f); + + if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) { + m_seed = info.GetAttribute(AttrName::Seed); + } else { + m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); } + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -private: - // Returns an empty vector if the optional attribute is missing. - std::vector m_tensorShape; + protected: + float m_mean; + float m_scale; + float m_seed; }; -class RandomNormalHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - template - RandomNormalHelperBase(const Info_t& info) - { - m_mean = info.GetOptionalAttribute(AttrName::Mean, 0.0f); - m_scale = info.GetOptionalAttribute(AttrName::Scale, 1.0f); - - if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) - { - m_seed = info.GetAttribute(AttrName::Seed); - } - else - { - m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); - } - } -protected: - float m_mean; - float m_scale; - float m_seed; -}; +class RandomNormalHelper : public RandomNormalHelperBase { + public: + template + RandomNormalHelper(const Info_t& info, const Shape_t& shape) : RandomNormalHelperBase(info) { + auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); + ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); + m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); + } -class RandomNormalHelper : public RandomNormalHelperBase -{ -public: - template - RandomNormalHelper(const Info_t& info, const Shape_t& shape) : RandomNormalHelperBase(info) - { - auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); - ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); - m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); - } + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -private: - // Returns an empty vector if the optional attribute is missing. - std::vector m_tensorShape; + private: + // Returns an empty vector if the optional attribute is missing. + std::vector m_tensorShape; }; class ConvolutionHelperBase @@ -320,18 +292,17 @@ class ConvolutionHelperBase } } - void ResolvingPadding(gsl::span inputDimensions); + void ResolvingPadding(gsl::span inputDimensions); - const std::vector& GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const - { - return m_outputShapes; - } - - template - void InitializeKernelAndShapes(const Shape_t& shapeInfo) - { - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); - const std::vector filterDims = shapeInfo.GetInputTensorShape(1); + const std::vector& GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { + ORT_UNUSED_PARAMETER(shapeInfo); + return m_outputShapes; + } + + template + void InitializeKernelAndShapes(const Shape_t& shapeInfo) { + const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); + const std::vector filterDims = shapeInfo.GetInputTensorShape(1); ML_CHECK_VALID_ARGUMENT( inputDimensions.size() >= 3 && inputDimensions.size() <= 5, @@ -358,10 +329,10 @@ class ConvolutionHelperBase ); } - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); - const std::vector filterDims = shapeInfo.GetInputTensorShape(1); + const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); + const std::vector filterDims = shapeInfo.GetInputTensorShape(1); - ML_CHECK_VALID_ARGUMENT(inputDimensions.size() > NonspatialDimensionCount, "Input dimensions must be >= 3"); + ML_CHECK_VALID_ARGUMENT(inputDimensions.size() > NonspatialDimensionCount, "Input dimensions must be >= 3"); if (hasDynamicPads) { @@ -396,49 +367,45 @@ class ConvolutionHelperBase assert(m_outputShapes[0].GetShape().size() > C); m_outputShapes[0].GetShape()[C] = filterDims[C] * m_groupCount; - if (!outputShape.empty()) - { - // Start padding, end padding, and output padding are all ignored if output shape is set. - std::fill(m_kernel.outputPadding, m_kernel.outputPadding + m_kernel.spatialDimensionCount, 0); - - if (outputShape.size() > 2) - { - ML_CHECK_VALID_ARGUMENT(outputShape[outputShape.size() - 3] == gsl::narrow_cast(m_outputShapes[0].GetShape()[C]), "Output channel must be equivalent to filter channel."); - } - - for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) - { - size_t outputIndex = outputShape.size() - m_kernel.spatialDimensionCount + i; - ML_CHECK_VALID_ARGUMENT(outputShape[outputIndex] >= gsl::narrow_cast(inputDimensions[H + i]), "Output dimension cannot be smaller than input dimension."); - m_outputShapes[0].GetShape()[H + i] = outputShape[outputIndex]; - } - - const int dimOffset = gsl::narrow_cast(inputDimensions.size() - m_kernel.spatialDimensionCount); - - for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) - { - int stride = m_kernel.strides[i]; - int windowSize = m_kernel.windowSize[i]; - - // Compute padding such that in reverse order, the logical input (m_outputShapes below) is fully defined - // for a convolution over the logical output region (inputDimensions below). - // - // The padding required is the first windowSize element (for the first logical output element), - // plus (logicalOutput - 1) steps of stride (the distance between each windowed set of logical - // input elements), minus the actual logical input size. - int paddings = gsl::narrow_cast((inputDimensions[i + dimOffset] - 1) * stride + windowSize - m_outputShapes[0].GetShape()[i + dimOffset]); - paddings = std::max(0, paddings); - - m_kernel.startPadding[i] = m_kernel.autoPadSameUpper ? (paddings + 1) / 2 : paddings / 2; - m_kernel.endPadding[i] = paddings - m_kernel.startPadding[i]; - } - } + if (!outputShape.empty()) { + // Start padding, end padding, and output padding are all ignored if output shape is set. + std::fill(m_kernel.outputPadding, m_kernel.outputPadding + m_kernel.spatialDimensionCount, 0); + + if (outputShape.size() > 2) { + ML_CHECK_VALID_ARGUMENT(outputShape[outputShape.size() - 3] == gsl::narrow_cast(m_outputShapes[0].GetShape()[C]), "Output channel must be equivalent to filter channel."); + } + + for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) { + size_t outputIndex = outputShape.size() - m_kernel.spatialDimensionCount + i; + ML_CHECK_VALID_ARGUMENT(outputShape[outputIndex] >= gsl::narrow_cast(inputDimensions[H + i]), "Output dimension cannot be smaller than input dimension."); + m_outputShapes[0].GetShape()[H + i] = outputShape[outputIndex]; + } + + const int dimOffset = gsl::narrow_cast(inputDimensions.size() - m_kernel.spatialDimensionCount); + + for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) { + int stride = m_kernel.strides[i]; + int windowSize = m_kernel.windowSize[i]; + + // Compute padding such that in reverse order, the logical input (m_outputShapes below) is fully defined + // for a convolution over the logical output region (inputDimensions below). + // + // The padding required is the first windowSize element (for the first logical output element), + // plus (logicalOutput - 1) steps of stride (the distance between each windowed set of logical + // input elements), minus the actual logical input size. + int paddings = gsl::narrow_cast((inputDimensions[i + dimOffset] - 1) * stride + windowSize - m_outputShapes[0].GetShape()[i + dimOffset]); + paddings = std::max(0, paddings); + + m_kernel.startPadding[i] = m_kernel.autoPadSameUpper ? (paddings + 1) / 2 : paddings / 2; + m_kernel.endPadding[i] = paddings - m_kernel.startPadding[i]; + } } + } -protected: - uint32_t m_groupCount; - KernelArgs m_kernel; - std::vector m_outputShapes; + protected: + uint32_t m_groupCount; + KernelArgs m_kernel; + std::vector m_outputShapes; }; class ConvHelper : public ConvolutionHelperBase @@ -470,6 +437,7 @@ class GemmHelper template GemmHelper(const Info_t& info, const Shape_t& shape) { + ORT_UNUSED_PARAMETER(shape); m_transA = info.GetOptionalAttribute(AttrName::TransA, 0); m_transB = info.GetOptionalAttribute(AttrName::TransB, 0); m_broadcast = info.GetOptionalAttribute(AttrName::Broadcast, 0); @@ -477,61 +445,57 @@ class GemmHelper m_beta = info.GetOptionalAttribute(AttrName::Beta, 0.0f); } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - enum InputTensors { IN_A, IN_B, IN_C }; + enum InputTensors { IN_A, + IN_B, + IN_C }; -protected: - bool m_transA = false; - bool m_transB = false; - bool m_broadcast = false; - float m_alpha = 0.0f; - float m_beta = 0.0f; + protected: + bool m_transA = false; + bool m_transB = false; + bool m_broadcast = false; + float m_alpha = 0.0f; + float m_beta = 0.0f; }; -class TransposeHelper -{ -public: - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); +class TransposeHelper { + public: + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - TransposeHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + TransposeHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_permutations; + protected: + std::vector m_permutations; }; -class SplitHelper -{ -public: - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); +class SplitHelper { + public: + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - SplitHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + SplitHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int m_axis = 0; - std::vector m_split; + protected: + int m_axis = 0; + std::vector m_split; }; class SliceHelperBase @@ -565,7 +529,7 @@ class SliceHelperBase ends.push_back(gsl::narrow_cast(endsData[i])); } uint32_t inputCount = operatorInfo.GetInputCount(); - if (operatorInfo.GetInputCount() > 3) + if (inputCount > 3) { MLOperatorTensor axesTensor = operatorInfo.GetConstantInputTensor(3); const std::vector& axesTensorDimensions = axesTensor.GetShape(); @@ -577,7 +541,7 @@ class SliceHelperBase } } - if (operatorInfo.GetInputCount() > 4) + if (inputCount > 4) { MLOperatorTensor stepsTensor = operatorInfo.GetConstantInputTensor(4); const std::vector& stepsTensorDimensions = stepsTensor.GetShape(); @@ -620,6 +584,9 @@ class SliceHelperBase ReadIndexTensors(operatorInfo, starts, ends, axes, steps); } } + + const uint32_t dimCount = gsl::narrow_cast(inputDimensions.size()); + HandleNegativeAxes(/*inout*/ axes, dimCount); ML_CHECK_VALID_ARGUMENT(starts.size() == ends.size(), "'starts' must equal 'ends' in size."); ML_CHECK_VALID_ARGUMENT(axes.empty() || starts.size() == axes.size(), "'axes' must equal 'starts' in size, or 'axes' must be empty."); @@ -668,13 +635,13 @@ class SliceHelperBase Initialize(info, shape.GetInputTensorShape(0), opsetVersion); } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_outputDimensions; - std::vector m_offsets; - std::vector m_sizes; - std::vector m_strides; + protected: + std::vector m_outputDimensions; + std::vector m_offsets; + std::vector m_sizes; + std::vector m_strides; }; class SliceHelper : public SliceHelperBase @@ -697,569 +664,506 @@ class PaddingHelper public: void Initialize(const MLOperatorAttributes& operatorAttributes); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - PaddingHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + PaddingHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_startPadding; - std::vector m_endPadding; + protected: + std::vector m_startPadding; + std::vector m_endPadding; }; -class ReduceHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ReduceHelperBase(const Info_t& info, const Shape_t& shape, bool usingAxes) - { - m_keepDims = info.GetOptionalAttribute(AttrName::KeepDims, 1); - if (usingAxes) - { - m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); - } - else - { - int axis = info.GetOptionalAttribute(AttrName::Axis, 0); - m_axes.push_back(axis); - } - std::vector inputShape = shape.GetInputTensorShape(0); - HandleNegativeAxes(/*inout*/ m_axes, gsl::narrow_cast(inputShape.size())); - AdjustAxesAndOutputShape(inputShape); +class ReduceHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ReduceHelperBase(const Info_t& info, const Shape_t& shape, bool usingAxes) { + m_keepDims = info.GetOptionalAttribute(AttrName::KeepDims, 1); + if (usingAxes) { + m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); + } else { + int axis = info.GetOptionalAttribute(AttrName::Axis, 0); + m_axes.push_back(axis); } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - -private: - void AdjustAxesAndOutputShape(const std::vector& inputShape); - -protected: - std::vector m_axes; - int m_keepDims = 0; + std::vector inputShape = shape.GetInputTensorShape(0); + HandleNegativeAxes(/*inout*/ m_axes, gsl::narrow_cast(inputShape.size())); + AdjustAxesAndOutputShape(inputShape); + } + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + + private: + void AdjustAxesAndOutputShape(const std::vector& inputShape); + + protected: + std::vector m_axes; + int m_keepDims = 0; }; -class ArgMinArgMaxHelper : public ReduceHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ArgMinArgMaxHelper(const Info_t& info, const Shape_t& shape) : ReduceHelperBase(info, shape, false) {} +class ArgMinArgMaxHelper : public ReduceHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ArgMinArgMaxHelper(const Info_t& info, const Shape_t& shape) : ReduceHelperBase(info, shape, false) {} }; -class ReduceHelper : public ReduceHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ReduceHelper(const Info_t& info, const Shape_t& shape) : ReduceHelperBase(info, shape, true) {} +class ReduceHelper : public ReduceHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ReduceHelper(const Info_t& info, const Shape_t& shape) : ReduceHelperBase(info, shape, true) {} }; -class MatMulHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - MatMulHelper(const Info_t& info, const Shape_t& shape) {} +class MatMulHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + MatMulHelper(const Info_t& info, const Shape_t& shape) {} - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class TopKHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - TopKHelper(const Info_t& info, const Shape_t& shape) - { - m_k = info.GetOptionalAttribute(AttrName::K, -1); - ML_CHECK_VALID_ARGUMENT(m_k >= 0, "Attribute k is missing."); +class TopKHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + TopKHelper(const Info_t& info, const Shape_t& shape) { + m_k = info.GetOptionalAttribute(AttrName::K, -1); + ML_CHECK_VALID_ARGUMENT(m_k >= 0, "Attribute k is missing."); - m_axis = info.GetOptionalAttribute(AttrName::Axis, -1); - auto inputShape = shape.GetInputTensorShape(0); + m_axis = info.GetOptionalAttribute(AttrName::Axis, -1); + auto inputShape = shape.GetInputTensorShape(0); - if (m_axis < 0) - { - m_axis = m_axis + gsl::narrow_cast(inputShape.size()); - } - ML_CHECK_VALID_ARGUMENT(m_axis >= 0 && m_axis < gsl::narrow_cast(inputShape.size())); + if (m_axis < 0) { + m_axis = m_axis + gsl::narrow_cast(inputShape.size()); } + ML_CHECK_VALID_ARGUMENT(m_axis >= 0 && m_axis < gsl::narrow_cast(inputShape.size())); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int32_t m_k; - int32_t m_axis; + protected: + int32_t m_k; + int32_t m_axis; }; -class RecurrentHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - RecurrentHelper(const Info_t& info, const Shape_t& shape) - { - m_hiddenSize = info.GetOptionalAttribute(AttrName::HiddenSize, 1); - } +class RecurrentHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + RecurrentHelper(const Info_t& info, const Shape_t& shape) { + m_hiddenSize = info.GetOptionalAttribute(AttrName::HiddenSize, 1); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int m_hiddenSize = 0; + protected: + int m_hiddenSize = 0; }; -class ConcatHelper -{ -public: - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); +class ConcatHelper { + public: + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ConcatHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ConcatHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int m_axis; + protected: + int m_axis; }; -class CropHelper -{ -public: - enum BorderDim { Left, Top, Right, Bottom }; - enum ScaleDim { Height, Width }; - - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); - - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - CropHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } - - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - -protected: - uint32_t m_offsets[NchwDimensionCount]; - uint32_t m_sizes[NchwSpatialDimensionCount]; +class CropHelper { + public: + enum BorderDim { Left, + Top, + Right, + Bottom }; + enum ScaleDim { Height, + Width }; + + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); + + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + CropHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } + + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + + protected: + uint32_t m_offsets[NchwDimensionCount]; + uint32_t m_sizes[NchwSpatialDimensionCount]; }; -class DepthToSpaceHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - DepthToSpaceHelper(const Info_t& info, const Shape_t& shape) - { - m_blockSize = info.GetOptionalAttribute(AttrName::BlockSize, -1); - ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); - } +class DepthToSpaceHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + DepthToSpaceHelper(const Info_t& info, const Shape_t& shape) { + m_blockSize = info.GetOptionalAttribute(AttrName::BlockSize, -1); + ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int32_t m_blockSize; + protected: + int32_t m_blockSize; }; -class SpaceToDepthHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - SpaceToDepthHelper(const Info_t& info, const Shape_t& shape) - { - m_blockSize = info.GetOptionalAttribute(AttrName::BlockSize, -1); - ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); - } +class SpaceToDepthHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + SpaceToDepthHelper(const Info_t& info, const Shape_t& shape) { + m_blockSize = info.GetOptionalAttribute(AttrName::BlockSize, -1); + ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int32_t m_blockSize; + protected: + int32_t m_blockSize; }; -class FlattenHelper -{ -public: - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); +class FlattenHelper { + public: + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - FlattenHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + FlattenHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int m_axis = 1; + protected: + int m_axis = 1; }; -class MultinomialHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - MultinomialHelper(const Info_t& info, const Shape_t& shape) - {} +class MultinomialHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + MultinomialHelper(const Info_t& info, const Shape_t& shape) {} - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class GatherHelper -{ -public: - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span dataDimensions - ); +class GatherHelper { + public: + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span dataDimensions); - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - GatherHelper(const Info_t& info, const Shape_t& shape) - { - Initialize(info, shape.GetInputTensorShape(0)); - } + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + GatherHelper(const Info_t& info, const Shape_t& shape) { + Initialize(info, shape.GetInputTensorShape(0)); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - int m_axis = 0; + protected: + int m_axis = 0; }; -class PoolingHelperBase -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - PoolingHelperBase( - const Info_t& info, - const Shape_t& shape, - bool useGlobalPooling - ) : m_kernel(useGlobalPooling - ? InitializeGlobalKernel(shape.GetInputTensorShape(0)) - : InitializeKernel(info, static_cast(shape.GetInputTensorShape(0).size()), gsl::span()) - ) - { - if (!useGlobalPooling) - { - ResolveAutoPadding(m_kernel, shape.GetInputTensorShape(0)); - } +class PoolingHelperBase { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + PoolingHelperBase( + const Info_t& info, + const Shape_t& shape, + bool useGlobalPooling) : m_kernel(useGlobalPooling + ? InitializeGlobalKernel(shape.GetInputTensorShape(0)) + : InitializeKernel(info, static_cast(shape.GetInputTensorShape(0).size()), gsl::span())) { + if (!useGlobalPooling) { + ResolveAutoPadding(m_kernel, shape.GetInputTensorShape(0)); } + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - KernelArgs m_kernel; + protected: + KernelArgs m_kernel; }; -class GlobalPoolingHelper : public PoolingHelperBase -{ -public: - template - GlobalPoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, true) {} +class GlobalPoolingHelper : public PoolingHelperBase { + public: + template + GlobalPoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, true) {} }; -class PoolingHelper : public PoolingHelperBase -{ -public: - template - PoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, false) {} +class PoolingHelper : public PoolingHelperBase { + public: + template + PoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, false) {} }; -class RoiPoolingHelper -{ -public: - enum InputTensors { INPUT, ROIS }; - - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - RoiPoolingHelper(const Info_t& info, const Shape_t& shape) - { - std::vector pooledShape = info.GetOptionalAttributeVectorInt32(AttrName::PooledShape); - ML_CHECK_VALID_ARGUMENT(pooledShape.size() == 2, "Pooled shape must be 2."); - m_pooledSizeH = pooledShape[0]; - m_pooledSizeW = pooledShape[1]; - } - - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - -protected: - uint32_t m_pooledSizeW; - uint32_t m_pooledSizeH; +class RoiPoolingHelper { + public: + enum InputTensors { INPUT, + ROIS }; + + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + RoiPoolingHelper(const Info_t& info, const Shape_t& shape) { + std::vector pooledShape = info.GetOptionalAttributeVectorInt32(AttrName::PooledShape); + ML_CHECK_VALID_ARGUMENT(pooledShape.size() == 2, "Pooled shape must be 2."); + m_pooledSizeH = pooledShape[0]; + m_pooledSizeW = pooledShape[1]; + } + + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + + protected: + uint32_t m_pooledSizeW; + uint32_t m_pooledSizeH; }; -class SqueezeHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - SqueezeHelper(const Info_t& info, const Shape_t& shape) - { - m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); - std::sort(m_axes.begin(), m_axes.end()); - } +class SqueezeHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + SqueezeHelper(const Info_t& info, const Shape_t& shape) { + m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); + std::sort(m_axes.begin(), m_axes.end()); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_axes; + protected: + std::vector m_axes; }; -class UnsqueezeHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - UnsqueezeHelper(const Info_t& info, const Shape_t& shape) - { - m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); - std::sort(m_axes.begin(), m_axes.end()); - } +class UnsqueezeHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + UnsqueezeHelper(const Info_t& info, const Shape_t& shape) { + m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); + std::sort(m_axes.begin(), m_axes.end()); + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_axes; + protected: + std::vector m_axes; }; -template -void CALLBACK ShapeInferenceFunction(IMLOperatorShapeInferenceContext* inference_context) -{ - MLShapeInferenceContext helperContext(inference_context); - T opHelper(helperContext, helperContext); +template +void CALLBACK ShapeInferenceFunction(IMLOperatorShapeInferenceContext* inference_context) { + MLShapeInferenceContext helperContext(inference_context); + T opHelper(helperContext, helperContext); - // EdgeInfo to contain whether tensor, whether unused, and what shape is - std::vector outputShapes = opHelper.GetOutputShapes(helperContext); + // EdgeInfo to contain whether tensor, whether unused, and what shape is + std::vector outputShapes = opHelper.GetOutputShapes(helperContext); - for (uint32_t i = 0; i < outputShapes.size(); ++i) - { - if (outputShapes[i].IsTensor() && !outputShapes[i].IsUnused()) - { - helperContext.SetOutputTensorShape(i, outputShapes[i].GetShape()); - } + for (uint32_t i = 0; i < outputShapes.size(); ++i) { + if (outputShapes[i].IsTensor() && !outputShapes[i].IsUnused()) { + helperContext.SetOutputTensorShape(i, outputShapes[i].GetShape()); } + } } -class ReshapeHelper -{ -public: - template - ReshapeHelper(const Info_t& info, const Shape_t& shape) - { - ML_CHECK_VALID_ARGUMENT(info.GetInputCount() >= 2); - ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() >= 1); - - MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); +class ReshapeHelper { + public: + template + ReshapeHelper(const Info_t& info, const Shape_t& shape) { + ML_CHECK_VALID_ARGUMENT(info.GetInputCount() >= 2); + ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() >= 1); - // The 'shape' tensor is a 1D tensor holding the new shape to reshape to, - // and the first element of its own shape holds how many dimensions there - // will be for the output. - std::vector shapeTensorDimensions = shapeTensor.GetShape(); - ML_CHECK_VALID_ARGUMENT(shapeTensorDimensions.size() == 1, "Reshape's shape tensor must be 1D."); - size_t dimCount = shapeTensorDimensions[0]; - - ML_CHECK_VALID_ARGUMENT(shapeTensor.IsCpuData(), "Reshape's shape tensor must be CPU Tensor."); - const int64_t* shapeData = shapeTensor.GetData(); - - // Shape of shape tensor is how many dims to reshape to. - for (size_t i = 0; i < dimCount; ++i) - { - m_shapeDims.push_back(gsl::narrow_cast(shapeData[i])); - } - } + MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + // The 'shape' tensor is a 1D tensor holding the new shape to reshape to, + // and the first element of its own shape holds how many dimensions there + // will be for the output. + std::vector shapeTensorDimensions = shapeTensor.GetShape(); + ML_CHECK_VALID_ARGUMENT(shapeTensorDimensions.size() == 1, "Reshape's shape tensor must be 1D."); + size_t dimCount = shapeTensorDimensions[0]; -protected: - std::vector m_shapeDims; -}; + ML_CHECK_VALID_ARGUMENT(shapeTensor.IsCpuData(), "Reshape's shape tensor must be CPU Tensor."); + const int64_t* shapeData = shapeTensor.GetData(); -class ExpandHelper -{ -public: - template - ExpandHelper(const Info_t& info, const Shape_t& shape) - { + // Shape of shape tensor is how many dims to reshape to. + for (size_t i = 0; i < dimCount; ++i) { + m_shapeDims.push_back(gsl::narrow_cast(shapeData[i])); } + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: + protected: + std::vector m_shapeDims; }; -class ConstantOfShapeHelper -{ -public: - template - ConstantOfShapeHelper(const Info_t& info, const Shape_t& shape) - { - } +class ExpandHelper { + public: + template + ExpandHelper(const Info_t& info, const Shape_t& shape) { + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: + protected: }; -class TileHelper -{ -public: - template - TileHelper(const Info_t& info, const Shape_t& shapeInfo) - { - m_inputDimensions = shapeInfo.GetInputTensorShape(0); +class ConstantOfShapeHelper { + public: + template + ConstantOfShapeHelper(const Info_t& info, const Shape_t& shape) { + } - // Read the repeats tensor. - const std::vector repeatsTensorDimensions = shapeInfo.GetInputTensorShape(1); - ML_CHECK_VALID_ARGUMENT(repeatsTensorDimensions.size() == 1, "Tile's repeats tensor must be 1D."); - const size_t dimCount = repeatsTensorDimensions[0]; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - MLOperatorTensor repeatsTensor = info.GetConstantInputTensor(1); - const int64_t* repeatsData = repeatsTensor.GetData(); - ML_CHECK_VALID_ARGUMENT(m_inputDimensions.size() == dimCount, "Tile's repeats tensor must be the same dimension count as the input tensor."); - ML_CHECK_VALID_ARGUMENT(repeatsTensor.IsCpuData(), "Tile's repeats tensor must be CPU Tensor."); + protected: +}; - for (size_t i = 0; i < dimCount; ++i) - { - ML_CHECK_VALID_ARGUMENT(repeatsData[i] > 0, "Repeat values should be > 0."); - m_repeatsData.push_back(gsl::narrow_cast(repeatsData[i])); - } +class TileHelper { + public: + template + TileHelper(const Info_t& info, const Shape_t& shapeInfo) { + m_inputDimensions = shapeInfo.GetInputTensorShape(0); + + // Read the repeats tensor. + const std::vector repeatsTensorDimensions = shapeInfo.GetInputTensorShape(1); + ML_CHECK_VALID_ARGUMENT(repeatsTensorDimensions.size() == 1, "Tile's repeats tensor must be 1D."); + const size_t dimCount = repeatsTensorDimensions[0]; + + MLOperatorTensor repeatsTensor = info.GetConstantInputTensor(1); + const int64_t* repeatsData = repeatsTensor.GetData(); + ML_CHECK_VALID_ARGUMENT(m_inputDimensions.size() == dimCount, "Tile's repeats tensor must be the same dimension count as the input tensor."); + ML_CHECK_VALID_ARGUMENT(repeatsTensor.IsCpuData(), "Tile's repeats tensor must be CPU Tensor."); + + for (size_t i = 0; i < dimCount; ++i) { + ML_CHECK_VALID_ARGUMENT(repeatsData[i] > 0, "Repeat values should be > 0."); + m_repeatsData.push_back(gsl::narrow_cast(repeatsData[i])); + } - // Update the computed output shape accordingly, repeat every axis's length by the repeat count. - m_outputDimensions.assign(m_inputDimensions.begin(), m_inputDimensions.end()); + // Update the computed output shape accordingly, repeat every axis's length by the repeat count. + m_outputDimensions.assign(m_inputDimensions.begin(), m_inputDimensions.end()); - for (size_t dimIndex = 0; dimIndex < dimCount; ++dimIndex) - { - m_outputDimensions[dimIndex] *= m_repeatsData[dimIndex]; - } + for (size_t dimIndex = 0; dimIndex < dimCount; ++dimIndex) { + m_outputDimensions[dimIndex] *= m_repeatsData[dimIndex]; } + } - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_repeatsData; - std::vector m_inputDimensions; - std::vector m_outputDimensions; + protected: + std::vector m_repeatsData; + std::vector m_inputDimensions; + std::vector m_outputDimensions; }; -class ResizeHelper -{ -public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ResizeHelper(const Info_t& info, const Shape_t& shape) +class ResizeHelper { + public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ResizeHelper(const Info_t& info, const Shape_t& shape) { + // Read the scales from the 2nd tensor. + if (info.GetInputCount() > 1) { + MLOperatorTensor scalesTensor = info.GetConstantInputTensor(1); + Initialize(scalesTensor, shape.GetInputTensorShape(0)); + } else // From attribute. { - // Read the scales from the 2nd tensor. - if (info.GetInputCount() > 1) - { - MLOperatorTensor scalesTensor = info.GetConstantInputTensor(1); - Initialize(scalesTensor, shape.GetInputTensorShape(0)); - } - else // From attribute. - { - Initialize(info, shape.GetInputTensorShape(0)); - } + Initialize(info, shape.GetInputTensorShape(0)); } + } - void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions - ); + void Initialize( + const MLOperatorAttributes& operatorAttributes, + gsl::span inputDimensions); - void Initialize( - const MLOperatorTensor& scalesTensor, - gsl::span inputDimensions - ); + void Initialize( + const MLOperatorTensor& scalesTensor, + gsl::span inputDimensions); - void InitializeOutputDimensions( - gsl::span scales, - gsl::span inputDimensions - ); + void InitializeOutputDimensions( + gsl::span scales, + gsl::span inputDimensions); - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; -protected: - std::vector m_inputDimensions; - std::vector m_outputDimensions; - std::vector m_scales; // Cached scales to check for updates/invalidate operator. + protected: + std::vector m_inputDimensions; + std::vector m_outputDimensions; + std::vector m_scales; // Cached scales to check for updates/invalidate operator. }; -class OneHotHelper -{ -public: - template - OneHotHelper(const Info_t& info, const Shape_t& shapeInfo) - { - ML_CHECK_VALID_ARGUMENT(info.GetInputCount() == 3); - ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() == 1); - - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); - std::vector outputDimensions; - - m_onnxAxis = info.GetOptionalAttribute(AttrName::Axis, -1); - - // Get 'depth' tensor, which is really a scalar for the output size along the given axis. - MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); - - auto indicesShape = shapeInfo.GetInputTensorShape(0); - m_absoluteAxis = HandleNegativeAxis(m_onnxAxis, gsl::narrow_cast(indicesShape.size() + 1)); - - // The shape tensor ('depth') is a 0D tensor holding the size for the output tensor along the specified axis. - // It must be registered as OrtMemType::OrtMemTypeCPUInput for CPU read access. - const uint32_t depthElementCount = ComputeElementCountFromDimensions(shapeTensor.GetShape()); - ML_CHECK_VALID_ARGUMENT(shapeTensor.IsCpuData(), "OneHots's 'depth' tensor must be a CPU Tensor."); - ML_CHECK_VALID_ARGUMENT(depthElementCount == 1, "OneHots's 'depth' tensor must have one element."); - const void* tensorData = shapeTensor.GetByteData(); - const int64_t depth64 = ReadAsInt64(shapeTensor.GetTensorDataType(), tensorData); - ML_CHECK_VALID_ARGUMENT(depth64 > 0, "Negative or zero 'depth' values for OneHot are illegal."); - const uint32_t depth = gsl::narrow_cast(depth64); - m_outputDimensions.assign(indicesShape.begin(), indicesShape.end()); - m_outputDimensions.insert(m_outputDimensions.begin() + m_absoluteAxis, depth); - } - - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - -protected: - int32_t m_onnxAxis = 0; // Original ONNX attribute value, including negative value. - uint32_t m_absoluteAxis = 0; // Absolute index value. - std::vector m_indicesDimensions; - std::vector m_outputDimensions; +class OneHotHelper { + public: + template + OneHotHelper(const Info_t& info, const Shape_t& shapeInfo) { + ML_CHECK_VALID_ARGUMENT(info.GetInputCount() == 3); + ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() == 1); + + const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); + std::vector outputDimensions; + + m_onnxAxis = info.GetOptionalAttribute(AttrName::Axis, -1); + + // Get 'depth' tensor, which is really a scalar for the output size along the given axis. + MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); + + auto indicesShape = shapeInfo.GetInputTensorShape(0); + m_absoluteAxis = HandleNegativeAxis(m_onnxAxis, gsl::narrow_cast(indicesShape.size() + 1)); + + // The shape tensor ('depth') is a 0D tensor holding the size for the output tensor along the specified axis. + // It must be registered as OrtMemType::OrtMemTypeCPUInput for CPU read access. + const uint32_t depthElementCount = ComputeElementCountFromDimensions(shapeTensor.GetShape()); + ML_CHECK_VALID_ARGUMENT(shapeTensor.IsCpuData(), "OneHots's 'depth' tensor must be a CPU Tensor."); + ML_CHECK_VALID_ARGUMENT(depthElementCount == 1, "OneHots's 'depth' tensor must have one element."); + const void* tensorData = shapeTensor.GetByteData(); + const int64_t depth64 = ReadAsInt64(shapeTensor.GetTensorDataType(), tensorData); + ML_CHECK_VALID_ARGUMENT(depth64 > 0, "Negative or zero 'depth' values for OneHot are illegal."); + const uint32_t depth = gsl::narrow_cast(depth64); + m_outputDimensions.assign(indicesShape.begin(), indicesShape.end()); + m_outputDimensions.insert(m_outputDimensions.begin() + m_absoluteAxis, depth); + } + + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + + protected: + int32_t m_onnxAxis = 0; // Original ONNX attribute value, including negative value. + uint32_t m_absoluteAxis = 0; // Absolute index value. + std::vector m_indicesDimensions; + std::vector m_outputDimensions; }; using ShapeInferenceHelper_Conv = ConvHelper; @@ -1274,7 +1178,7 @@ using ShapeInferenceHelper_GlobalLpPool = GlobalPoolingHelper; using ShapeInferenceHelper_MaxRoiPool = RoiPoolingHelper; using ShapeInferenceHelper_InstanceNormalization = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_BatchNormalization = GetOutputShapeAsInputShapeHelper; - + using ShapeInferenceHelper_LRN = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MeanVarianceNormalization = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_LpNormalization = GetOutputShapeAsInputShapeHelper; @@ -1282,7 +1186,7 @@ using ShapeInferenceHelper_RNN = RecurrentHelper; using ShapeInferenceHelper_GRU = RecurrentHelper; using ShapeInferenceHelper_LSTM = RecurrentHelper; using ShapeInferenceHelper_Gather = GatherHelper; - + using ShapeInferenceHelper_Flatten = FlattenHelper; using ShapeInferenceHelper_Split = SplitHelper; using ShapeInferenceHelper_Transpose = TransposeHelper; @@ -1361,11 +1265,11 @@ using ShapeInferenceHelper_ArgMax = ArgMinArgMaxHelper; using ShapeInferenceHelper_ArgMin = ArgMinArgMaxHelper; using ShapeInferenceHelper_Gemm = GemmHelper; using ShapeInferenceHelper_Neg = GetOutputShapeAsInputShapeHelper; - + using ShapeInferenceHelper_Crop = CropHelper; using ShapeInferenceHelper_ImageScaler = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Upsample = ResizeHelper; - + using ShapeInferenceHelper_Sigmoid = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_HardSigmoid = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Tanh = GetOutputShapeAsInputShapeHelper; @@ -1384,14 +1288,14 @@ using ShapeInferenceHelper_Softplus = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_ParametricSoftplus = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Dropout = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Shrink = GetOutputShapeAsInputShapeHelper; - + using ShapeInferenceHelper_Identity = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MatMul = MatMulHelper; using ShapeInferenceHelper_Cast = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MemcpyFromHost = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MemcpyToHost = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_TopK = TopKHelper; - + using ShapeInferenceHelper_RandomUniform = RandomUniformHelper; using ShapeInferenceHelper_RandomUniformLike = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_RandomNormal = RandomNormalHelper; @@ -1408,4 +1312,4 @@ using ShapeInferenceHelper_FusedMatMul = MatMulHelper; using ShapeInferenceHelper_FusedAdd = GetBroadcastedOutputShapeHelper; using ShapeInferenceHelper_FusedSum = GetBroadcastedOutputShapeHelper; -} // namespace OperatorHelper +} // namespace OperatorHelper diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h index 1047a82fae9d2..62e8c30cd19ef 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h @@ -29,7 +29,7 @@ namespace SchemaInferenceOverrider schema->TypeAndShapeInferenceFunction([=](onnx::InferenceContext& ctx) { onnxruntime::OpNodeProtoHelper nodeInfo(&ctx); - if (winrt::Windows::AI::MachineLearning::implementation::InputTensorShapesDefinedOnNode(nodeInfo)) + if (Windows::AI::MachineLearning::Adapter::InputTensorShapesDefinedOnNode(nodeInfo)) { // Check that required constant CPU inputs exist for (uint32_t inputIndex : constantCpuInputsCapture) @@ -41,7 +41,7 @@ namespace SchemaInferenceOverrider } auto abiContext = - wil::MakeOrThrow( + wil::MakeOrThrow( &nodeInfo, &ctx, constantCpuInputsCapture); THROW_IF_FAILED(shapeInferrer->InferOutputShapes(abiContext.Get())); @@ -80,7 +80,7 @@ OverrideSchemaInferenceFunction CreateProvider() override; + void SetDefaultRoundingMode(AllocatorRoundingMode rounding_mode); private: ComPtr dml_device_{}; ComPtr cmd_queue_{}; + AllocatorRoundingMode rounding_mode_ = AllocatorRoundingMode::Enabled; }; std::unique_ptr DMLProviderFactory::CreateProvider() { - return Dml::CreateExecutionProvider(dml_device_.Get(), cmd_queue_.Get()); + auto provider = Dml::CreateExecutionProvider(dml_device_.Get(), cmd_queue_.Get()); + Dml::SetDefaultRoundingMode(provider.get(), rounding_mode_); + return provider; +} + +void DMLProviderFactory::SetDefaultRoundingMode(AllocatorRoundingMode rounding_mode) { + rounding_mode_ = rounding_mode; } std::shared_ptr CreateExecutionProviderFactory_DML(IDMLDevice* dml_device, @@ -48,11 +57,20 @@ std::shared_ptr CreateExecutionProviderFactory_DML(ID THROW_HR(E_INVALIDARG); } + ComPtr d3d12_device; + THROW_IF_FAILED(dml_device->GetParentDevice(IID_PPV_ARGS(&d3d12_device))); + const Env& env = Env::Default(); + env.GetTelemetryProvider().LogExecutionProviderEvent(&d3d12_device->GetAdapterLuid()); + return std::make_shared(dml_device, cmd_queue); } -bool IsSoftwareAdapter(IDXGIAdapter1* adapter) -{ +void DmlConfigureProviderFactoryDefaultRoundingMode(IExecutionProviderFactory* factory, AllocatorRoundingMode rounding_mode) { + auto dml_prvider_factory = static_cast(factory); + dml_prvider_factory->SetDefaultRoundingMode(rounding_mode); +} + +bool IsSoftwareAdapter(IDXGIAdapter1* adapter) { DXGI_ADAPTER_DESC1 desc; adapter->GetDesc1(&desc); @@ -81,7 +99,7 @@ std::shared_ptr CreateExecutionProviderFactory_DML(in D3D12_COMMAND_QUEUE_DESC cmd_queue_desc = {}; cmd_queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE; cmd_queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - + ComPtr cmd_queue; THROW_IF_FAILED(d3d12_device->CreateCommandQueue(&cmd_queue_desc, IID_PPV_ARGS(&cmd_queue))); @@ -90,7 +108,7 @@ std::shared_ptr CreateExecutionProviderFactory_DML(in // In debug builds, enable the DML debug layer if the D3D12 debug layer is also enabled #if _DEBUG ComPtr debug_device; - (void)d3d12_device->QueryInterface(IID_PPV_ARGS(&debug_device)); // ignore failure + (void)d3d12_device->QueryInterface(IID_PPV_ARGS(&debug_device)); // ignore failure const bool is_d3d12_debug_layer_enabled = (debug_device != nullptr); if (is_d3d12_debug_layer_enabled) { diff --git a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py index b26db65ccc1b6..8985fca17a1f7 100644 --- a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py +++ b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py @@ -13,11 +13,14 @@ import onnxruntime from onnx import helper, numpy_helper from onnx import shape_inference +from onnx import IR_VERSION import os from timeit import default_timer as timer def generate_model(rnn_type, input_dim, hidden_dim, bidirectional, layers, model_name, batch_one=True, has_seq_len=False): model = onnx.ModelProto() + model.ir_version = IR_VERSION + opset = model.opset_import.add() opset.domain == 'onnx' opset.version = 7 diff --git a/onnxruntime/core/providers/winml/symbols.txt b/onnxruntime/core/providers/winml/symbols.txt new file mode 100644 index 0000000000000..5fcaf6d21e6e9 --- /dev/null +++ b/onnxruntime/core/providers/winml/symbols.txt @@ -0,0 +1 @@ +OrtGetWinMLAdapter \ No newline at end of file diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 04ce2e7767ac2..65d1eab7d6df6 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -191,6 +191,7 @@ void InferenceSession::ConstructorCommon(const SessionOptions& session_options, StartProfiling(session_options_.profile_file_prefix); } + telemetry_ = {}; // a monotonically increasing session id for use in telemetry session_id_ = global_session_id_.fetch_add(1); } @@ -374,7 +375,7 @@ common::Status InferenceSession::Load(std::function l(session_mutex_); + const Env& env = Env::Default(); + env.GetTelemetryProvider().LogSessionCreationStart(); if (!is_model_loaded_) { LOGS(*session_logger_, ERROR) << "Model was not loaded"; return common::Status(common::ONNXRUNTIME, common::FAIL, "Model was not loaded."); @@ -825,10 +870,10 @@ common::Status InferenceSession::Initialize() { is_inited_ = true; // and log telemetry - const Env& env = Env::Default(); + bool model_use_fp16 = ModelUseFP16(model_->ToProto()); env.GetTelemetryProvider().LogSessionCreation(session_id_, model_->IrVersion(), model_->ProducerName(), model_->ProducerVersion(), model_->Domain(), model_->MainGraph().DomainToVersionMap(), model_->MainGraph().Name(), - model_->MetaData(), event_name_, execution_providers_.GetIds()); + model_->MetaData(), telemetry_.event_name_, execution_providers_.GetIds(), model_use_fp16); LOGS(*session_logger_, INFO) << "Session successfully initialized."; } catch (const NotImplementedException& ex) { @@ -1007,6 +1052,7 @@ Status InferenceSession::Run(const RunOptions& run_options, const std::vector exec_providers_to_stop; exec_providers_to_stop.reserve(execution_providers_.NumProviders()); @@ -1017,6 +1063,14 @@ Status InferenceSession::Run(const RunOptions& run_options, const std::vector telemetry_.kDurationBetweenSendingEvaluationStart) { + env.GetTelemetryProvider().LogEvaluationStart(); + // reset counters + telemetry_.time_sent_last_evalutation_start_ = std::chrono::high_resolution_clock::now(); + telemetry_.isEvaluationStart = true; + } + ORT_RETURN_IF_ERROR_SESSIONID_(ValidateInputs(feed_names, feeds)); ORT_RETURN_IF_ERROR_SESSIONID_(ValidateOutputs(output_names, p_fetches)); @@ -1072,20 +1126,24 @@ Status InferenceSession::Run(const RunOptions& run_options, const std::vector kDurationBetweenSending) { + if (TimeDiffMicroSeconds(telemetry_.time_sent_last_) > telemetry_.kDurationBetweenSending) { // send the telemetry - const Env& env = Env::Default(); - env.GetTelemetryProvider().LogRuntimePerf(session_id_, total_runs_since_last_, total_run_duration_since_last_); + env.GetTelemetryProvider().LogRuntimePerf(session_id_, telemetry_.total_runs_since_last_, telemetry_.total_run_duration_since_last_); // reset counters - time_sent_last_ = std::chrono::high_resolution_clock::now(); - total_runs_since_last_ = 0; - total_run_duration_since_last_ = 0; + telemetry_.time_sent_last_ = std::chrono::high_resolution_clock::now(); + telemetry_.total_runs_since_last_ = 0; + telemetry_.total_run_duration_since_last_ = 0; } + // check the frequency to send Evalutaion Stop event + if (telemetry_.isEvaluationStart) { + env.GetTelemetryProvider().LogEvaluationStop(); + telemetry_.isEvaluationStart = false; + } // send out profiling events (optional) if (session_profiler_.IsEnabled()) { session_profiler_.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "model_run", tp); diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index cf27c46d552fa..66e90075e5cf0 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -502,15 +502,24 @@ class InferenceSession { #ifdef ENABLE_LANGUAGE_INTEROP_OPS InterOpDomains interop_domains_; #endif - // used to support platform telemetry static std::atomic global_session_id_; // a monotonically increasing session id uint32_t session_id_; // the current session's id - uint32_t total_runs_since_last_; // the total number of Run() calls since the last report - long long total_run_duration_since_last_; // the total duration (us) of Run() calls since the last report - TimePoint time_sent_last_; // the TimePoint of the last report - const long long kDurationBetweenSending = 1000 * 1000 * 60 * 10; // duration in (us). send a report every 10 mins - std::string event_name_; // where the model is loaded from: ["model_loading_uri", "model_loading_proto", "model_loading_istream"] + + struct Telemetry { + Telemetry() : time_sent_last_(), time_sent_last_evalutation_start_() {} + uint32_t total_runs_since_last_ = 0; // the total number of Run() calls since the last report + long long total_run_duration_since_last_ = 0; // the total duration (us) of Run() calls since the last report + std::string event_name_; // where the model is loaded from: ["model_loading_uri", "model_loading_proto", "model_loading_istream"] + + TimePoint time_sent_last_; // the TimePoint of the last report + TimePoint time_sent_last_evalutation_start_; + // Event Rate per provider < 20 peak events per second + constexpr static long long kDurationBetweenSending = 1000 * 1000 * 60 * 10; // duration in (us). send a report every 10 mins + constexpr static long long kDurationBetweenSendingEvaluationStart = 1000 * 50; // duration in (us). send a EvaluationStop Event every 50 ms; + + bool isEvaluationStart = false; + } telemetry_; #ifdef ONNXRUNTIME_ENABLE_INSTRUMENT bool session_activity_started_ = false; diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index ca23ed90f76a5..b9e1f58c95988 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -12,7 +12,6 @@ #include #include "core/common/logging/logging.h" -#include "core/common/logging/sinks/clog_sink.h" #include "core/common/status.h" #include "core/graph/graph.h" #include "core/framework/allocator.h" @@ -1394,6 +1393,14 @@ static constexpr OrtApi ort_api_1_to_2 = { // End of Version 1 - DO NOT MODIFY ABOVE (see above text for more information) // Version 2 - In development, feel free to add/remove/rearrange here + &OrtApis::GetDenotationFromTypeInfo, + &OrtApis::CastTypeInfoToMapTypeInfo, + &OrtApis::CastTypeInfoToSequenceTypeInfo, + &OrtApis::GetMapKeyType, + &OrtApis::GetMapValueType, + &OrtApis::GetSequenceElementType, + &OrtApis::ReleaseMapTypeInfo, + &OrtApis::ReleaseSequenceTypeInfo }; // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other) @@ -1421,4 +1428,4 @@ ORT_API(void, OrtApis::ReleaseEnv, _Frees_ptr_opt_ OrtEnv* value) { DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Value, OrtValue) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(RunOptions, OrtRunOptions) -DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) +DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) \ No newline at end of file diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index cdc1ea7b6900f..4e3bf2274aaf4 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -16,6 +16,8 @@ ORT_API(void, ReleaseTypeInfo, OrtTypeInfo*); ORT_API(void, ReleaseTensorTypeAndShapeInfo, OrtTensorTypeAndShapeInfo*); ORT_API(void, ReleaseSessionOptions, OrtSessionOptions*); ORT_API(void, ReleaseCustomOpDomain, OrtCustomOpDomain*); +ORT_API(void, ReleaseMapTypeInfo, OrtMapTypeInfo*); +ORT_API(void, ReleaseSequenceTypeInfo, OrtSequenceTypeInfo*); ORT_API_STATUS_IMPL(CreateStatus, OrtErrorCode code, _In_ const char* msg); OrtErrorCode ORT_API_CALL GetErrorCode(_In_ const OrtStatus* status) NO_EXCEPTION ORT_ALL_ARGS_NONNULL; @@ -144,4 +146,16 @@ ORT_API_STATUS_IMPL(KernelContext_GetOutputCount, _In_ const OrtKernelContext* c ORT_API_STATUS_IMPL(KernelContext_GetInput, _In_ const OrtKernelContext* context, _In_ size_t index, _Out_ const OrtValue** out); ORT_API_STATUS_IMPL(KernelContext_GetOutput, _Inout_ OrtKernelContext* context, _In_ size_t index, _In_ const int64_t* dim_values, size_t dim_count, _Out_ OrtValue** out); +// OrtTypeInfo methods +ORT_API_STATUS_IMPL(GetDenotationFromTypeInfo, _In_ const OrtTypeInfo*, _Out_ const char** const denotation, _Out_ size_t* len); +ORT_API_STATUS_IMPL(CastTypeInfoToMapTypeInfo, _In_ const OrtTypeInfo* type_info, _Out_ const OrtMapTypeInfo** out); +ORT_API_STATUS_IMPL(CastTypeInfoToSequenceTypeInfo, _In_ const OrtTypeInfo* type_info, _Out_ const OrtSequenceTypeInfo** out); + +// OrtMapTypeInfo Accessors +ORT_API_STATUS_IMPL(GetMapKeyType, _In_ const OrtMapTypeInfo* map_type_info, _Out_ enum ONNXTensorElementDataType* out); +ORT_API_STATUS_IMPL(GetMapValueType, _In_ const OrtMapTypeInfo* map_type_info, _Outptr_ OrtTypeInfo** type_info); + +// OrtSequenceTypeInfo Accessors +ORT_API_STATUS_IMPL(GetSequenceElementType, _In_ const OrtSequenceTypeInfo* sequence_type_info, _Outptr_ OrtTypeInfo** type_info); + } // namespace OrtApis diff --git a/onnxruntime/core/session/ort_env.cc b/onnxruntime/core/session/ort_env.cc index da804d890c0b2..a74de2242d510 100644 --- a/onnxruntime/core/session/ort_env.cc +++ b/onnxruntime/core/session/ort_env.cc @@ -1,35 +1,44 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +//this file contains implementations of the C API + +#include + #include "ort_env.h" -#include "core/common/logging/logging.h" +#include "core/session/ort_apis.h" +#include "core/session/environment.h" #include "core/common/logging/sinks/clog_sink.h" -using namespace onnxruntime::logging; +#include "core/common/logging/logging.h" +#include "core/session/environment.h" + using namespace onnxruntime; +using namespace onnxruntime::logging; -class LoggingWrapper : public ISink { - public: - LoggingWrapper(OrtLoggingFunction logging_function, void* logger_param) - : logging_function_(logging_function), logger_param_(logger_param) { - } +OrtEnv* OrtEnv::p_instance_ = nullptr; +int OrtEnv::ref_count_ = 0; +onnxruntime::OrtMutex OrtEnv::m_; - void SendImpl(const Timestamp& /*timestamp*/ /*timestamp*/, const std::string& logger_id, - const Capture& message) override { - std::string s = message.Location().ToString(); - logging_function_(logger_param_, static_cast(message.Severity()), message.Category(), - logger_id.c_str(), s.c_str(), message.Message().c_str()); - } +LoggingWrapper::LoggingWrapper(OrtLoggingFunction logging_function, void* logger_param) + : logging_function_(logging_function), logger_param_(logger_param) { +} - private: - OrtLoggingFunction logging_function_; - void* logger_param_; -}; +void LoggingWrapper::SendImpl(const onnxruntime::logging::Timestamp& /*timestamp*/ /*timestamp*/, const std::string& logger_id, + const onnxruntime::logging::Capture& message) { + std::string s = message.Location().ToString(); + logging_function_(logger_param_, static_cast(message.Severity()), message.Category(), + logger_id.c_str(), s.c_str(), message.Message().c_str()); +} + +OrtEnv::OrtEnv(std::unique_ptr value1, std::unique_ptr logging_manager) + : value_(std::move(value1)), logging_manager_(std::move(logging_manager)) { +} -OrtEnv* OrtEnv::GetInstance(const LoggingManagerConstructionInfo& lm_info, Status& status) { - std::lock_guard lock(m_); +OrtEnv* OrtEnv::GetInstance(const OrtEnv::LoggingManagerConstructionInfo& lm_info, onnxruntime::common::Status& status) { + std::lock_guard lock(m_); if (!p_instance_) { - std::unique_ptr env; - status = Environment::Create(env); + std::unique_ptr env; + status = onnxruntime::Environment::Create(env); if (!status.IsOK()) { return nullptr; } @@ -58,6 +67,24 @@ OrtEnv* OrtEnv::GetInstance(const LoggingManagerConstructionInfo& lm_info, Statu return p_instance_; } -OrtEnv* OrtEnv::p_instance_ = nullptr; -int OrtEnv::ref_count_ = 0; -OrtMutex OrtEnv::m_; \ No newline at end of file +void OrtEnv::Release(OrtEnv* env_ptr) { + if (!env_ptr) { + return; + } + std::lock_guard lock(m_); + ORT_ENFORCE(env_ptr == p_instance_); // sanity check + --ref_count_; + if (ref_count_ == 0) { + delete p_instance_; + p_instance_ = nullptr; + } +} + +LoggingManager* OrtEnv::GetLoggingManager() const { + return logging_manager_.get(); +} + +void OrtEnv::SetLoggingManager(std::unique_ptr logging_manager) { + std::lock_guard lock(m_); + logging_manager_ = std::move(logging_manager); +} \ No newline at end of file diff --git a/onnxruntime/core/session/ort_env.h b/onnxruntime/core/session/ort_env.h index be4508eebdc6a..c93d2937c7a7b 100644 --- a/onnxruntime/core/session/ort_env.h +++ b/onnxruntime/core/session/ort_env.h @@ -2,13 +2,28 @@ // Licensed under the MIT License. #pragma once - -#include "core/common/logging/logging.h" -#include "core/common/logging/sinks/clog_sink.h" -#include "core/common/status.h" -#include "core/platform/ort_mutex.h" -#include "core/session/environment.h" +#include +#include #include "core/session/onnxruntime_c_api.h" +#include "core/common/logging/isink.h" +#include "core/platform/ort_mutex.h" +#include "core/common/status.h" + +namespace onnxruntime { +class Environment; +} + +class LoggingWrapper : public onnxruntime::logging::ISink { + public: + LoggingWrapper(OrtLoggingFunction logging_function, void* logger_param); + + void SendImpl(const onnxruntime::logging::Timestamp& /*timestamp*/ /*timestamp*/, const std::string& logger_id, + const onnxruntime::logging::Capture& message) override; + + private: + OrtLoggingFunction logging_function_; + void* logger_param_; +}; struct OrtEnv { public: @@ -26,24 +41,14 @@ struct OrtEnv { OrtLoggingLevel default_warning_level; const char* logid{}; }; + static OrtEnv* GetInstance(const LoggingManagerConstructionInfo& lm_info, onnxruntime::common::Status& status); - static void Release(OrtEnv* env_ptr) { - if (!env_ptr) { - return; - } - std::lock_guard lock(m_); - ORT_ENFORCE(env_ptr == p_instance_); // sanity check - --ref_count_; - if (ref_count_ == 0) { - delete p_instance_; - p_instance_ = nullptr; - } - } - - onnxruntime::logging::LoggingManager* GetLoggingManager() const { - return logging_manager_.get(); - } + static void Release(OrtEnv* env_ptr); + + onnxruntime::logging::LoggingManager* GetLoggingManager() const; + + void SetLoggingManager(std::unique_ptr logging_manager); private: static OrtEnv* p_instance_; @@ -53,10 +58,7 @@ struct OrtEnv { std::unique_ptr value_; std::unique_ptr logging_manager_; - OrtEnv(std::unique_ptr value1, std::unique_ptr logging_manager) - : value_(std::move(value1)), logging_manager_(std::move(logging_manager)) { - } - + OrtEnv(std::unique_ptr value1, std::unique_ptr logging_manager); ~OrtEnv() = default; ORT_DISALLOW_COPY_AND_ASSIGNMENT(OrtEnv); diff --git a/onnxruntime/python/tools/bert/README.md b/onnxruntime/python/tools/bert/README.md index d12ff736deedf..643b41771af3c 100644 --- a/onnxruntime/python/tools/bert/README.md +++ b/onnxruntime/python/tools/bert/README.md @@ -63,4 +63,3 @@ See below for description of all the options: By default, model uses float32 in computation. If this flag is specified, half-precision float will be used. This option is recommended for NVidia GPU with Tensor Core like V100 and T4. For older GPUs, float32 is likely faster. - **verbose**: (*optional*) Print verbose information when this flag is specified. - diff --git a/onnxruntime/test/ir/onnx_model_test.cc b/onnxruntime/test/ir/onnx_model_test.cc index b972a0d677e1e..de3cb975929e5 100644 --- a/onnxruntime/test/ir/onnx_model_test.cc +++ b/onnxruntime/test/ir/onnx_model_test.cc @@ -63,6 +63,15 @@ TEST(ONNXModelsTest, non_existing_model) { ASSERT_EQ(st.Code(), common::NO_SUCHFILE); } +TEST(ONNXModelsTest, future_opset) { + // NOTE: this requires the current directory to be where onnxruntime_ir_UT.exe is located + std::shared_ptr model; + common::Status st = Model::Load(ORT_TSTR("./testdata/add_opset_314159.onnx"), model, nullptr, + DefaultLoggingManager().DefaultLogger()); + ASSERT_FALSE(st.IsOK()); + ASSERT_EQ(st.Code(), common::INVALID_GRAPH); +} + #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS TEST(ONNXModelsTest1, bvlc_alexnet_1) { using ::google::protobuf::io::CodedInputStream; diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index b130526b0e19f..8eaa81fe2d1af 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -606,8 +606,10 @@ int real_main(int argc, char* argv[], Ort::Env& env) { broken_tests.insert({"vgg19", "failed: bad allocation"}); #endif -#ifdef DISABLE_CONTRIB_OPS + // Disable mask_rcnn_keras as this model currently has an invalid contrib op version set to 10 broken_tests.insert({"mask_rcnn_keras", "This model uses contrib ops."}); + +#ifdef DISABLE_CONTRIB_OPS broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."}); broken_tests.insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."}); broken_tests.insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."}); diff --git a/onnxruntime/test/testdata/add_opset_314159.onnx b/onnxruntime/test/testdata/add_opset_314159.onnx new file mode 100644 index 0000000000000..296b5719cb56f Binary files /dev/null and b/onnxruntime/test/testdata/add_opset_314159.onnx differ diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index cdc709f0344fa..b2ce4914b9f4a 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -146,12 +146,14 @@ def parse_arguments(): parser.add_argument("--use_full_protobuf", action='store_true', help="Use the full protobuf library") parser.add_argument("--disable_contrib_ops", action='store_true', help="Disable contrib ops (reduces binary size)") parser.add_argument("--skip_onnx_tests", action='store_true', help="Explicitly disable all onnx related tests. Note: Use --skip_tests to skip all tests.") + parser.add_argument("--skip_winml_tests", action='store_true', help="Explicitly disable all WinML related tests") parser.add_argument("--enable_msvc_static_runtime", action='store_true', help="Enable static linking of MSVC runtimes.") parser.add_argument("--enable_language_interop_ops", action='store_true', help="Enable operator implemented in language other than cpp") parser.add_argument("--cmake_generator", choices=['Visual Studio 15 2017', 'Visual Studio 16 2019'], default='Visual Studio 15 2017', help="Specify the generator that CMake invokes. This is only supported on Windows") parser.add_argument("--enable_multi_device_test", action='store_true', help="Test with multi-device. Mostly used for multi-device GPU") parser.add_argument("--use_dml", action='store_true', help="Build with DirectML.") + parser.add_argument("--use_winml", action='store_true', help="Build with WinML.") parser.add_argument("--use_telemetry", action='store_true', help="Only official builds can set this flag to enable telemetry.") return parser.parse_args() @@ -285,6 +287,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home # for now, disable jemalloc if pybind is also enabled. cmake_args = [cmake_path, cmake_dir, "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), + "-Donnxruntime_BUILD_WINML_TESTS=" + ("OFF" if args.skip_winml_tests else "ON"), "-Donnxruntime_GENERATE_TEST_REPORTS=ON", "-Donnxruntime_DEV_MODE=" + ("OFF" if args.android else "ON"), "-DPYTHON_EXECUTABLE=" + sys.executable, @@ -328,6 +331,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home # enable pyop if it is nightly build "-Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=" + ("ON" if args.enable_language_interop_ops or (args.config != 'Debug' and bool(os.getenv('NIGHTLY_BUILD') == '1')) else "OFF"), "-Donnxruntime_USE_DML=" + ("ON" if args.use_dml else "OFF"), + "-Donnxruntime_USE_WINML=" + ("ON" if args.use_winml else "OFF"), "-Donnxruntime_USE_TELEMETRY=" + ("ON" if args.use_telemetry else "OFF"), ] diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py index 6ed654db6a469..f38d599f4ca98 100755 --- a/tools/ci_build/gen_def.py +++ b/tools/ci_build/gen_def.py @@ -56,9 +56,14 @@ def parse_arguments(): with open(args.output_source, 'w') as file: file.write("#include \n") for c in args.config: - file.write("#include \n" % (c,c)) + # WinML adapter should not be exported in platforms other than Windows. + # Exporting OrtGetWinMLAdapter is exported without issues using .def file when compiling for Windows + # so it isn't necessary to include it in generated_source.c + if c != "winml": + file.write("#include \n" % (c,c)) file.write("void* GetFunctionEntryByName(const char* name){\n") for symbol in symbols: - file.write("if(strcmp(name,\"%s\") ==0) return (void*)&%s;\n" % (symbol,symbol)) + if symbol != "OrtGetWinMLAdapter": + file.write("if(strcmp(name,\"%s\") ==0) return (void*)&%s;\n" % (symbol,symbol)) file.write("return NULL;\n"); file.write("}\n"); \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/cpu.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/cpu.yml index 7687c0fbe16b7..9b03eb688e7d7 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/cpu.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/cpu.yml @@ -9,7 +9,7 @@ jobs: AgentPool : 'Win-CPU-2019' ArtifactName: 'drop-nuget' JobName: 'Windows_CI_Dev' - BuildCommand: '--build_dir $(Build.BinariesDirectory) --skip_submodule_sync --use_openmp --build_shared_lib --use_featurizers --enable_onnx_tests --use_telemetry --cmake_generator "Visual Studio 16 2019"' + BuildCommand: '--build_dir $(Build.BinariesDirectory) --skip_submodule_sync --use_openmp --build_shared_lib --use_featurizers --enable_onnx_tests --use_telemetry --use_winml --cmake_generator "Visual Studio 16 2019"' BuildArch: 'x64' EnvSetupScript: 'setup_env.bat' sln_platform: 'x64' @@ -28,7 +28,7 @@ jobs: AgentPool : 'Win-CPU-2019' ArtifactName: 'drop-win-x86-zip' JobName: 'Windows_CI_Dev_x86' - BuildCommand: '--build_dir $(Build.BinariesDirectory) --skip_submodule_sync --use_openmp --build_shared_lib --use_featurizers --enable_onnx_tests --x86 --use_telemetry --cmake_generator "Visual Studio 16 2019"' + BuildCommand: '--build_dir $(Build.BinariesDirectory) --skip_submodule_sync --use_openmp --build_shared_lib --use_featurizers --enable_onnx_tests --x86 --use_telemetry --use_winml --cmake_generator "Visual Studio 16 2019"' BuildArch: 'x86' EnvSetupScript: 'setup_env_x86.bat' sln_platform: 'Win32' diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml index c6778bcdf3b89..1b0e037a8f50b 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml @@ -7,7 +7,7 @@ jobs: parameters: AgentPool : 'Win-GPU-2019' ArtifactName: 'drop-nuget' - JobName: 'Windows_CI_GPU_Dev' + JobName: 'Windows_CI_GPU_CUDA_Dev' BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --use_featurizers --enable_onnx_tests --use_telemetry --cmake_generator "Visual Studio 16 2019" --msvc_toolset 14.16 --use_cuda --cuda_version=10.0 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.6.5.32\cuda" BuildArch: 'x64' msbuildArchitecture: 'amd64' @@ -25,6 +25,27 @@ jobs: mkdir $(Build.ArtifactStagingDirectory)\testdata copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata +- template: ../../templates/win-ci-2019.yml + parameters: + AgentPool : 'Win-GPU-2019' + ArtifactName: drop-nuget-dml + JobName: 'Windows_CI_GPU_DML_Dev' + BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --use_featurizers --enable_onnx_tests --use_telemetry --use_dml --cmake_generator "Visual Studio 16 2019" + BuildArch: 'x64' + msbuildArchitecture: 'amd64' + EnvSetupScript: 'setup_env.bat' + sln_platform: 'x64' + DoDebugBuild: 'false' + DoNugetPack : 'true' + DoCompliance: 'false' + DoEsrp: ${{ parameters.DoEsrp }} + OrtPackageId: 'Microsoft.ML.OnnxRuntime.DirectML' + NuPackScript: | + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML + copy $(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) + mkdir $(Build.ArtifactStagingDirectory)\testdata + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata + - job: 'Linux_CI_GPU_Dev' pool: $(AgentPoolLinux) steps: @@ -62,7 +83,8 @@ jobs: - job: NuGet_Packaging pool: 'Win-GPU-2019' dependsOn: - - Windows_CI_GPU_Dev + - Windows_CI_GPU_CUDA_Dev + - Windows_CI_GPU_DML_Dev - Linux_CI_GPU_Dev condition: succeeded() steps: @@ -72,6 +94,13 @@ jobs: artifactName: 'drop-nuget' targetPath: '$(Build.BinariesDirectory)/nuget-artifact' + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - NuGet' + inputs: + artifactName: 'drop-nuget-dml' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact' + continueOnError: true + - task: DownloadPipelineArtifact@0 displayName: 'Download Pipeline Artifact - Linux' inputs: diff --git a/tools/ci_build/github/azure-pipelines/win-arm-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-arm-crosscompile-ci-pipeline.yml index d5f145fecaae8..278ced307d71e 100644 --- a/tools/ci_build/github/azure-pipelines/win-arm-crosscompile-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-arm-crosscompile-ci-pipeline.yml @@ -1,48 +1,43 @@ jobs: - job: Windows_ARM_CrossCompile_CI_Dev timeoutInMinutes: 120 + pool: Win-CPU-2019 variables: buildDirectory: '$(Build.BinariesDirectory)' + ortPackageId: 'Microsoft.ML.OnnxRuntime' + strategy: + maxParallel: 2 + matrix: + debug: + BuildConfig: 'Debug' + release: + BuildConfig: 'Release' steps: - template: templates/set-test-data-variables-step.yml - - task: UniversalPackages@0 - displayName: 'Download python' + - task: UsePythonVersion@0 inputs: - command: download - vstsFeed: '$(System.TeamProject)' - vstsFeedPackage: 'miniconda3_win64' - vstsPackageVersion: '4.5.11' - downloadDirectory: '$(Build.BinariesDirectory)\python' - - task: CmdLine@1 - displayName: 'Run python installer' - inputs: - filename: '$(Build.BinariesDirectory)\python\installer.exe' - arguments: '/S /NoRegistry=1 /AddToPath=0 /RegisterPython=0 /D=$(Build.BinariesDirectory)\packages\python' - timeoutInMinutes: 10 + versionSpec: '3.7' - task: BatchScript@1 displayName: 'setup env' inputs: filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\setup_env.bat' modifyEnvironment: true workingFolder: '$(Build.BinariesDirectory)' - - task: CmdLine@1 - displayName: 'Download cmake' + - task: DotNetCoreCLI@2 + displayName: 'Restore nuget packages' inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\download_cmake.py --build_dir $(Build.BinariesDirectory)' - - task: CmdLine@1 - displayName: 'Generate cmake config and build Debug' - inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config Debug --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --arm' - workingDirectory: "$(Build.BinariesDirectory)" + command: restore + projects: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' + configuration: '$(BuildConfig)' + arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OrtPackageId=$(ortPackageId)' + workingDirectory: '$(Build.SourcesDirectory)\csharp' - task: CmdLine@1 - displayName: 'Generate cmake config and build Release' + displayName: 'Generate cmake config and build' inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config Release --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --arm' + filename: python.exe + arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 16 2019" --arm --build_shared_lib --use_winml' workingDirectory: "$(Build.BinariesDirectory)" - template: templates/component-governance-component-detection-steps.yml parameters : condition : 'ci_only' - - template: templates/clean-agent-build-directory-step.yml \ No newline at end of file + - template: templates/clean-agent-build-directory-step.yml diff --git a/tools/ci_build/github/azure-pipelines/win-arm64-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-arm64-crosscompile-ci-pipeline.yml index 1a87c491f9ac4..a7078a1b7dc1d 100644 --- a/tools/ci_build/github/azure-pipelines/win-arm64-crosscompile-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-arm64-crosscompile-ci-pipeline.yml @@ -1,48 +1,43 @@ jobs: - job: Windows_ARM_CrossCompile_CI_Dev timeoutInMinutes: 120 + pool: Win-CPU-2019 variables: buildDirectory: '$(Build.BinariesDirectory)' + ortPackageId: 'Microsoft.ML.OnnxRuntime' + strategy: + maxParallel: 2 + matrix: + debug: + BuildConfig: 'Debug' + release: + BuildConfig: 'Release' steps: - template: templates/set-test-data-variables-step.yml - - task: UniversalPackages@0 - displayName: 'Download python' + - task: UsePythonVersion@0 inputs: - command: download - vstsFeed: '$(System.TeamProject)' - vstsFeedPackage: 'miniconda3_win64' - vstsPackageVersion: '4.5.11' - downloadDirectory: '$(Build.BinariesDirectory)\python' - - task: CmdLine@1 - displayName: 'Run python installer' - inputs: - filename: '$(Build.BinariesDirectory)\python\installer.exe' - arguments: '/S /NoRegistry=1 /AddToPath=0 /RegisterPython=0 /D=$(Build.BinariesDirectory)\packages\python' - timeoutInMinutes: 10 + versionSpec: '3.7' - task: BatchScript@1 displayName: 'setup env' inputs: filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\setup_env.bat' modifyEnvironment: true workingFolder: '$(Build.BinariesDirectory)' - - task: CmdLine@1 - displayName: 'Download cmake' + - task: DotNetCoreCLI@2 + displayName: 'Restore nuget packages' inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\download_cmake.py --build_dir $(Build.BinariesDirectory)' - - task: CmdLine@1 - displayName: 'Generate cmake config and build Debug' - inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config Debug --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --arm64' - workingDirectory: "$(Build.BinariesDirectory)" + command: restore + projects: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' + configuration: '$(BuildConfig)' + arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OrtPackageId=$(ortPackageId)' + workingDirectory: '$(Build.SourcesDirectory)\csharp' - task: CmdLine@1 - displayName: 'Generate cmake config and build Release' + displayName: 'Generate cmake config and build' inputs: - filename: '$(Build.BinariesDirectory)\packages\python\python.exe' - arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config Release --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --arm64' + filename: python.exe + arguments: '$(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 16 2019" --arm64 --build_shared_lib --use_winml' workingDirectory: "$(Build.BinariesDirectory)" - template: templates/component-governance-component-detection-steps.yml parameters : condition : 'ci_only' - - template: templates/clean-agent-build-directory-step.yml \ No newline at end of file + - template: templates/clean-agent-build-directory-step.yml diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index b191c5a5ff486..2750f7825c5dd 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -56,7 +56,7 @@ jobs: displayName: 'Generate cmake config' inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_featurizers --use_dnnl --use_openmp --build_shared_lib --enable_onnx_tests --build_java' + arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_featurizers --use_dnnl --use_winml --use_openmp --build_shared_lib --enable_onnx_tests --build_java' workingDirectory: '$(Build.BinariesDirectory)' - task: VSBuild@1 @@ -72,6 +72,13 @@ jobs: workingFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' createLogFile: true + - task: PublishSymbols@2 + displayName: 'Publish Build Symbols' + inputs: + symbolsFolder: $(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig) + searchPattern: '**/*.pdb' + symbolServerType: teamServices + - task: PythonScript@0 displayName: 'Build wheel' inputs: @@ -133,5 +140,4 @@ jobs: parameters : condition : 'succeeded' - - template: templates/clean-agent-build-directory-step.yml - + - template: templates/clean-agent-build-directory-step.yml \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/win-x86-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-x86-ci-pipeline.yml index 3b55497b8fc6f..7155952a3f7c5 100644 --- a/tools/ci_build/github/azure-pipelines/win-x86-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-x86-ci-pipeline.yml @@ -42,7 +42,7 @@ jobs: displayName: 'Generate cmake config' inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_featurizers --x86 --use_openmp --build_shared_lib --enable_onnx_tests' + arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --use_winml --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_featurizers --x86 --use_openmp --build_shared_lib --enable_onnx_tests' workingDirectory: '$(Build.BinariesDirectory)' - task: VSBuild@1 @@ -115,7 +115,7 @@ jobs: set /p WHEEL_FILENAME=name, + schema[i]->inputCount, + schema[i]->outputCount, + schema[i]->typeConstraintCount, + schema[i]->attributeCount, + schema[i]->defaultAttributeCount); + } +#endif + + // Delegate to base class + return AbiCustomRegistry::RegisterOperatorSetSchema( + opSetId, + baseline_version, + schema, + schemaCount, + typeInferrer, + shapeInferrer); +} +CATCH_RETURN(); + +HRESULT STDMETHODCALLTYPE AbiCustomRegistryImpl::RegisterOperatorKernel( + const MLOperatorKernelDescription* opKernel, + IMLOperatorKernelFactory* operatorKernelFactory, + _In_opt_ IMLOperatorShapeInferrer* shapeInferrer) const noexcept { + return RegisterOperatorKernel(opKernel, operatorKernelFactory, shapeInferrer, nullptr, false, false, false); +} + +HRESULT STDMETHODCALLTYPE AbiCustomRegistryImpl::RegisterOperatorKernel( + const MLOperatorKernelDescription* opKernel, + IMLOperatorKernelFactory* operatorKernelFactory, + _In_opt_ IMLOperatorShapeInferrer* shapeInferrer, + _In_opt_ IMLOperatorSupportQueryPrivate* supportQuery, + bool isInternalOperator, + bool canAliasFirstInput, + bool supportsGraph, + const uint32_t* requiredInputCountForGraph, + bool requiresFloatFormatsForGraph, + _In_reads_(constantCpuInputCount) const uint32_t* requiredConstantCpuInputs, + uint32_t constantCpuInputCount) const noexcept try { +#ifdef LAYERING_DONE + // Log a custom op telemetry if the operator is not a built-in DML operator + if (!isInternalOperator) { + telemetry_helper.LogRegisterOperatorKernel( + opKernel->name, + opKernel->domain, + static_cast(opKernel->executionType)); + } +#endif + + // Delegate to base class + return AbiCustomRegistry::RegisterOperatorKernel( + opKernel, + operatorKernelFactory, + shapeInferrer, + supportQuery, + isInternalOperator, + canAliasFirstInput, + supportsGraph, + requiredInputCountForGraph, + requiresFloatFormatsForGraph, + requiredConstantCpuInputs, + constantCpuInputCount); +} +CATCH_RETURN(); + +} // namespace Windows::AI::MachineLearning::Adapter + +#endif USE_DML diff --git a/winml/adapter/abi_custom_registry_impl.h b/winml/adapter/abi_custom_registry_impl.h new file mode 100644 index 0000000000000..040f8e28c01ae --- /dev/null +++ b/winml/adapter/abi_custom_registry_impl.h @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#ifdef USE_DML +#include "core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.h" + +namespace Windows::AI::MachineLearning::Adapter { + +// An implementation of AbiCustomRegistry that emits telemetry events when operator kernels or schemas are registered. +class AbiCustomRegistryImpl : public AbiCustomRegistry { + public: + HRESULT STDMETHODCALLTYPE RegisterOperatorSetSchema( + const MLOperatorSetId* op_set_id, + int baseline_version, + const MLOperatorSchemaDescription* const* schema, + uint32_t schema_count, + _In_opt_ IMLOperatorTypeInferrer* type_inferrer, + _In_opt_ IMLOperatorShapeInferrer* shape_inferrer) const noexcept override; + + HRESULT STDMETHODCALLTYPE RegisterOperatorKernel( + const MLOperatorKernelDescription* operator_kernel, + IMLOperatorKernelFactory* operator_kernel_factory, + _In_opt_ IMLOperatorShapeInferrer* shape_inferrer, + _In_opt_ IMLOperatorSupportQueryPrivate* supportQuery, + bool is_internal_operator, + bool can_alias_first_input, + bool supports_graph, + const uint32_t* required_input_count_for_graph = nullptr, + bool requires_float_formats_for_graph = false, + _In_reads_(constant_cpu_input_count) const uint32_t* required_constant_cpu_inputs = nullptr, + uint32_t constant_cpu_input_count = 0) const noexcept override; + + HRESULT STDMETHODCALLTYPE RegisterOperatorKernel( + const MLOperatorKernelDescription* op_kernel, + IMLOperatorKernelFactory* operator_kernel_factory, + _In_opt_ IMLOperatorShapeInferrer* shape_inferrer) const noexcept override; +}; + +} // namespace Windows::AI::MachineLearning::Adapter +#endif USE_DML diff --git a/winml/adapter/pch.h b/winml/adapter/pch.h new file mode 100644 index 0000000000000..c448ebc5f87a9 --- /dev/null +++ b/winml/adapter/pch.h @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "cppwinrt_onnx.h" +#include "wil/wrl.h" +#include "dx.h" + +#if USE_DML +#include +#endif USE_DML diff --git a/winml/adapter/winml_adapter_apis.h b/winml/adapter/winml_adapter_apis.h new file mode 100644 index 0000000000000..a4477c264a263 --- /dev/null +++ b/winml/adapter/winml_adapter_apis.h @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "winml_adapter_c_api.h" + +namespace Windows { +namespace AI { +namespace MachineLearning { +namespace Adapter { + +ORT_API(void, ReleaseModel, OrtModel*); +ORT_API(void, ReleaseExecutionProvider, OrtExecutionProvider*); + +ORT_API_STATUS(OverrideSchema); + +// OrtEnv methods +ORT_API_STATUS(EnvConfigureCustomLoggerAndProfiler, _In_ OrtEnv* env, OrtLoggingFunction logging_function, OrtProfilingFunction profiling_function, _In_opt_ void* logger_param, OrtLoggingLevel default_warning_level, _In_ const char* logid, _Outptr_ OrtEnv** out); + +// OrtModel methods +ORT_API_STATUS(CreateModelFromPath, _In_ const char* model_path, _In_ size_t size, _Outptr_ OrtModel** out); +ORT_API_STATUS(CreateModelFromData, _In_ void* data, _In_ size_t size, _Outptr_ OrtModel** out); +ORT_API_STATUS(CloneModel, _In_ const OrtModel* in, _Outptr_ OrtModel** out); +ORT_API_STATUS(ModelGetAuthor, _In_ const OrtModel* model, _Out_ const char** const author, _Out_ size_t* len); +ORT_API_STATUS(ModelGetName, _In_ const OrtModel* model, _Out_ const char** const name, _Out_ size_t* len); +ORT_API_STATUS(ModelGetDomain, _In_ const OrtModel* model, _Out_ const char** const domain, _Out_ size_t* len); +ORT_API_STATUS(ModelGetDescription, _In_ const OrtModel* model, _Out_ const char** const description, _Out_ size_t* len); +ORT_API_STATUS(ModelGetVersion, _In_ const OrtModel* model, _Out_ int64_t* version); +ORT_API_STATUS(ModelGetInputCount, _In_ const OrtModel* model, _Out_ size_t* count); +ORT_API_STATUS(ModelGetOutputCount, _In_ const OrtModel* model, _Out_ size_t* count); +ORT_API_STATUS(ModelGetInputName, _In_ const OrtModel* model, _In_ size_t index, _Out_ const char** input_name, _Out_ size_t* count); +ORT_API_STATUS(ModelGetOutputName, _In_ const OrtModel* model, _In_ size_t index, _Out_ const char** output_name, _Out_ size_t* count); +ORT_API_STATUS(ModelGetInputDescription, _In_ const OrtModel* model, _In_ size_t index, _Out_ const char** input_description, _Out_ size_t* count); +ORT_API_STATUS(ModelGetOutputDescription, _In_ const OrtModel* model, _In_ size_t index, _Out_ const char** output_description, _Out_ size_t* count); +ORT_API_STATUS(ModelGetInputTypeInfo, _In_ const OrtModel* model, _In_ size_t index, _Outptr_ OrtTypeInfo** type_info); +ORT_API_STATUS(ModelGetOutputTypeInfo, _In_ const OrtModel* model, _In_ size_t index, _Outptr_ OrtTypeInfo** type_info); +ORT_API_STATUS(ModelGetMetadataCount, _In_ const OrtModel* model, _Out_ size_t* count); +ORT_API_STATUS(ModelGetMetadata, _In_ const OrtModel* model, _Out_ size_t count, _Out_ const char** const key, _Out_ size_t* key_len, _Out_ const char** const value, _Out_ size_t* value_len); +ORT_API_STATUS(ModelEnsureNoFloat16, _In_ const OrtModel* model); + +ORT_API_STATUS(OrtSessionOptionsAppendExecutionProviderEx_DML, _In_ OrtSessionOptions* options, _In_ ID3D12Device* d3d_device, _In_ ID3D12CommandQueue* cmd_queue); + +// OrtSession methods +ORT_API_STATUS(CreateSessionWithoutModel, _In_ OrtEnv* env, _In_ const OrtSessionOptions* options, _Outptr_ OrtSession** session); + +//Do not release provider... as there is no release method available +ORT_API_STATUS(SessionGetExecutionProvider, _In_ OrtSession* session, size_t index, _Out_ OrtExecutionProvider** provider); +ORT_API_STATUS(SessionInitialize, _In_ OrtSession* session); +ORT_API_STATUS(SessionLoadAndPurloinModel, _In_ OrtSession* session, _In_ OrtModel* model); + +ORT_API_STATUS(SessionStartProfiling, _In_ OrtEnv* env, _In_ OrtSession* session); +ORT_API_STATUS(SessionEndProfiling, _In_ OrtSession* session); +ORT_API_STATUS(SessionRegisterGraphTransformers, _In_ OrtSession* session); +ORT_API_STATUS(SessionRegisterCustomRegistry, _In_ OrtSession* session, _In_ IMLOperatorRegistry* registry); +ORT_API_STATUS(SessionCopyOneInputAcrossDevices, _In_ OrtSession* session, _In_ const char* const input_name, _In_ OrtValue* orig_value, _Outptr_ OrtValue** new_value); + +// Dml methods (TODO need to figure out how these need to move to session somehow...) +ORT_API_STATUS(DmlExecutionProviderSetDefaultRoundingMode, _In_ OrtExecutionProvider* dml_provider, _In_ bool is_enabled); +ORT_API_STATUS(DmlExecutionProviderFlushContext, _In_ OrtExecutionProvider* dml_provider); +ORT_API_STATUS(DmlExecutionProviderTrimUploadHeap, _In_ OrtExecutionProvider* dml_provider); +ORT_API_STATUS(DmlExecutionProviderReleaseCompletedReferences, _In_ OrtExecutionProvider* dml_provider); +ORT_API_STATUS(DmlCreateGPUAllocationFromD3DResource, _In_ ID3D12Resource* pResource, _Out_ void** dml_resource); +ORT_API_STATUS(DmlGetD3D12ResourceFromAllocation, _In_ OrtExecutionProvider* provider, _In_ void* allocation, _Out_ ID3D12Resource** resource); +ORT_API_STATUS(DmlFreeGPUAllocation, _In_ void* ptr); + +// note: this returns a weak ref + +ORT_API_STATUS(GetProviderMemoryInfo, _In_ OrtExecutionProvider* provider, OrtMemoryInfo** memory_info); +ORT_API_STATUS(GetProviderAllocator, _In_ OrtExecutionProvider* provider, OrtAllocator** allocator); +ORT_API_STATUS(FreeProviderAllocator, _In_ OrtAllocator* allocator); +ORT_API_STATUS(GetValueMemoryInfo, const OrtValue* value, OrtMemoryInfo** memory_info); + +// ExecutionProvider Methods +ORT_API_STATUS(ExecutionProviderSync, _In_ OrtExecutionProvider* provider); +ORT_API_STATUS(DmlCopyTensor, _In_ OrtExecutionProvider* provider, _In_ OrtValue* src, _In_ OrtValue* dst); +ORT_API_STATUS(CreateCustomRegistry, _Out_ IMLOperatorRegistry** registry); + +ORT_API_STATUS(ValueGetDeviceId, _In_ OrtValue* ort_value, _Out_ int16_t* device_id); +ORT_API_STATUS(SessionGetInputRequiredDeviceId, _In_ OrtSession* session, _In_ const char* const input_name, _Out_ int16_t* device_id); + +} // namespace Adapter +} // namespace MachineLearning +} // namespace AI +} // namespace Windows \ No newline at end of file diff --git a/winml/adapter/winml_adapter_c_api.cpp b/winml/adapter/winml_adapter_c_api.cpp new file mode 100644 index 0000000000000..6fda737ed45d0 --- /dev/null +++ b/winml/adapter/winml_adapter_c_api.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "pch.h" + +#include "winml_adapter_c_api.h" +#include "winml_adapter_apis.h" +#include "core/session/ort_apis.h" + +#include +#include + +const OrtApi* GetVersion1Api(); + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +static constexpr WinmlAdapterApi winml_adapter_api_1 = { + // Schema override + &winmla::OverrideSchema, + + // OrtEnv methods + &winmla::EnvConfigureCustomLoggerAndProfiler, + + // OrtModel methods + &winmla::CreateModelFromPath, + &winmla::CreateModelFromData, + &winmla::CloneModel, + &winmla::ModelGetAuthor, + &winmla::ModelGetName, + &winmla::ModelGetDomain, + &winmla::ModelGetDescription, + &winmla::ModelGetVersion, + &winmla::ModelGetInputCount, + &winmla::ModelGetOutputCount, + &winmla::ModelGetInputName, + &winmla::ModelGetOutputName, + &winmla::ModelGetInputDescription, + &winmla::ModelGetOutputDescription, + &winmla::ModelGetInputTypeInfo, + &winmla::ModelGetOutputTypeInfo, + &winmla::ModelGetMetadataCount, + &winmla::ModelGetMetadata, + &winmla::ModelEnsureNoFloat16, + + // OrtSessionOptions methods + &OrtSessionOptionsAppendExecutionProvider_CPU, + &winmla::OrtSessionOptionsAppendExecutionProviderEx_DML, + + // OrtSession methods + &winmla::CreateSessionWithoutModel, + &winmla::SessionGetExecutionProvider, + &winmla::SessionInitialize, + &winmla::SessionRegisterGraphTransformers, + &winmla::SessionRegisterCustomRegistry, + &winmla::SessionLoadAndPurloinModel, + &winmla::SessionStartProfiling, + &winmla::SessionEndProfiling, + &winmla::SessionCopyOneInputAcrossDevices, + + // Dml methods (TODO need to figure out how these need to move to session somehow...) + &winmla::DmlExecutionProviderSetDefaultRoundingMode, + &winmla::DmlExecutionProviderFlushContext, + &winmla::DmlExecutionProviderTrimUploadHeap, + &winmla::DmlExecutionProviderReleaseCompletedReferences, + &winmla::DmlCreateGPUAllocationFromD3DResource, + &winmla::DmlFreeGPUAllocation, + &winmla::DmlGetD3D12ResourceFromAllocation, + &winmla::DmlCopyTensor, + + &winmla::GetProviderMemoryInfo, + &winmla::GetProviderAllocator, + &winmla::FreeProviderAllocator, + &winmla::GetValueMemoryInfo, + + &winmla::ExecutionProviderSync, + + &winmla::CreateCustomRegistry, + + &winmla::ValueGetDeviceId, + &winmla::SessionGetInputRequiredDeviceId, + + // Release + &winmla::ReleaseModel +}; + +const WinmlAdapterApi* ORT_API_CALL OrtGetWinMLAdapter(const OrtApi* ort_api) NO_EXCEPTION { + if (OrtApis::GetApi(1) == ort_api) { + return &winml_adapter_api_1; + } + + return nullptr; +} \ No newline at end of file diff --git a/winml/adapter/winml_adapter_c_api.h b/winml/adapter/winml_adapter_c_api.h new file mode 100644 index 0000000000000..76dec8f49a2f6 --- /dev/null +++ b/winml/adapter/winml_adapter_c_api.h @@ -0,0 +1,420 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/session/onnxruntime_c_api.h" + +/** + * All APIs exported by winml_adapter_c_api.h are part of the private interface dedicated to supporting the WinML API. + * This contract is subject to change based on the needs of the WinML API and is not intended for direct use by callers + * of the onnxruntime c-api and usage of APIs in this header are *not* supported by the onnxruntime product. + */ + +ORT_RUNTIME_CLASS(Model); +ORT_RUNTIME_CLASS(ExecutionProvider); + +struct WinmlAdapterApi; +typedef struct WinmlAdapterApi WinmlAdapterApi; + +struct ID3D12Resource; +struct ID3D12Device; +struct ID3D12CommandQueue; +struct IMLOperatorRegistry; + +// TODO: Must match onnxruntime::profiling::EventRecord +enum OrtProfilerEventCategory { + SESSION_EVENT = 0, + NODE_EVENT, + EVENT_CATEGORY_MAX +}; + +struct OrtProfilerEventRecord { + OrtProfilerEventCategory category_; + const char* category_name_; + int64_t duration_; + int64_t time_span_; + const char* event_name_; + int32_t process_id_; + int32_t thread_id_; + const char* op_name_; + const char* execution_provider_; +}; + +typedef void(ORT_API_CALL* OrtProfilingFunction)(const OrtProfilerEventRecord* event_record); + +struct WinmlAdapterApi { + /** + * OverrideSchema + * This api is used to override schema inference functions for a variety of ops across opsets. + * This exists because certain ops were failing to infer schemas and caused performance + * issues for DML as it was forced to create resources during evaluation. + * This can be removed when schema inference functions have been updated. + */ + OrtStatus*(ORT_API_CALL* OverrideSchema)() NO_EXCEPTION; + + /** + * EnvConfigureCustomLoggerAndProfiler + * This api is used to add a custom logger and profiler to the ors environment. + * This exists because existing methods on the c-abi to create the environment only support a custom logger. + * Since WinML hooks the profiler events, we expose the profiler and an associated profiling function. + */ + OrtStatus*(ORT_API_CALL* EnvConfigureCustomLoggerAndProfiler)(_In_ OrtEnv* env, OrtLoggingFunction logging_function, OrtProfilingFunction profiling_function, _In_opt_ void* logger_param, OrtLoggingLevel default_warning_level, _In_ const char* logid, _Outptr_ OrtEnv** out)NO_EXCEPTION; + + // OrtModel methods + + /** + * CreateModelFromPath + * This api creates an OrtModel based on a specified model path. + * There is no inferencing or evaluation setup performed. Only ONNX load is done to reflect on the model's inputs/outputs and other properties. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* CreateModelFromPath)(_In_ const char* model_path, _In_ size_t size, _Outptr_ OrtModel** out)NO_EXCEPTION; + + /** + * CreateModelFromData + * This api creates an OrtModel from a buffer. + * There is no inferencing or evaluation setup performed. Only ONNX load is done to reflect on the model's inputs/outputs and other properties. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* CreateModelFromData)(_In_ void* data, _In_ size_t size, _Outptr_ OrtModel** out)NO_EXCEPTION; + + /** + * CloneModel + * This api copies the OrtModel along with its internal proto buffer and cached metadata. + * The OrtSession type expects to own the model proto buffer. + * WinML uses this to yield copies of the model proto held by OrtModel to OrtSession. + */ + OrtStatus*(ORT_API_CALL* CloneModel)(_In_ const OrtModel* in, _Outptr_ OrtModel** out)NO_EXCEPTION; + + /** + * ModelGetAuthor + * This api gets the model author from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetAuthor)(_In_ const OrtModel* model, _Out_ const char** const author, _Out_ size_t* len)NO_EXCEPTION; + + /** + * ModelGetName + * This api gets the model name from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetName)(_In_ const OrtModel* model, _Out_ const char** const name, _Out_ size_t* len)NO_EXCEPTION; + + /** + * ModelGetDomain + * This api gets the model domain from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetDomain)(_In_ const OrtModel* model, _Out_ const char** const domain, _Out_ size_t* len)NO_EXCEPTION; + + /** + * ModelGetDescription + * This api gets the model description from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetDescription)(_In_ const OrtModel* model, _Out_ const char** const description, _Out_ size_t* len)NO_EXCEPTION; + + /** + * ModelGetVersion + * This api gets the model version from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetVersion)(_In_ const OrtModel* model, _Out_ int64_t* version)NO_EXCEPTION; + + /** + * ModelGetInputCount + * This api gets the number of inputs from the OrtModel. It closely matches the API of a similar name similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetInputCount)(_In_ const OrtModel* model, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetOutputCount + * This api gets the number of outputs from the OrtModel. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetOutputCount)(_In_ const OrtModel* model, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetInputName + * This api gets the input name from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetInputName)(_In_ const OrtModel* model, _In_ size_t index, _Out_ const char** input_name, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetOutputName + * This api gets the output name from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetOutputName)(_In_ const OrtModel* model, _In_ size_t index, _Out_ const char** output_name, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetInputDescription + * This api gets the input description from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetInputDescription)(_In_ const OrtModel* model, _In_ size_t index, _Out_ const char** input_description, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetOutputDescription + * This api gets the output description from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetOutputDescription)(_In_ const OrtModel* model, _In_ size_t index, _Out_ const char** output_description, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetInputTypeInfo + * This api gets the input OrtTypeInfo from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetInputTypeInfo)(_In_ const OrtModel* model, _In_ size_t index, _Outptr_ OrtTypeInfo** type_info)NO_EXCEPTION; + + /** + * ModelGetOutputTypeInfo + * This api gets the output OrtTypeInfo from the OrtModel given an index. It closely matches the API of a similar name for retrieving model metadata from OrtSession. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetOutputTypeInfo)(_In_ const OrtModel* model, _In_ size_t index, _Outptr_ OrtTypeInfo** type_info)NO_EXCEPTION; + + /** + * ModelGetMetadataCount + * This api gets the number of metadata entries from the OrtModel. + * This is used by WinML to support model reflection APIs. + */ + OrtStatus*(ORT_API_CALL* ModelGetMetadataCount)(_In_ const OrtModel* model, _Out_ size_t* count)NO_EXCEPTION; + + /** + * ModelGetMetadata + * This api gets the model metadata from the OrtModel. + * This is used by WinML to deduce whether model input and output formats are supported by the WinML tensorization code paths. + */ + OrtStatus*(ORT_API_CALL* ModelGetMetadata)(_In_ const OrtModel* model, _Out_ size_t count, _Out_ const char** const key, _Out_ size_t* key_len, _Out_ const char** const value, _Out_ size_t* value_len)NO_EXCEPTION; + + /** + * ModelEnsureNoFloat16 + * This api checks whether the model requires float 16 support. + * This is used by WinML to fail gracefully when float 16 support is not available on the device. + * + * Can this API be moved into the EP during session initialization. Currently we do an early fp16 check to avoid initialization when it is not supported. + */ + OrtStatus*(ORT_API_CALL* ModelEnsureNoFloat16)(_In_ const OrtModel* model)NO_EXCEPTION; + + // OrtSessionOptions methods + + /** + * OrtSessionOptionsAppendExecutionProvider_CPU + * This api is used to add the cpu EP to OrtSessionOptions so that WinML Gpu session are configures with CPU fallback. + */ + OrtStatus*(ORT_API_CALL* OrtSessionOptionsAppendExecutionProvider_CPU)(_In_ OrtSessionOptions* options, int use_arena)NO_EXCEPTION; + + /** + * OrtSessionOptionsAppendExecutionProvider_DML + * This api is used to add the DML EP to OrtSessionOptions. + */ + OrtStatus*(ORT_API_CALL* OrtSessionOptionsAppendExecutionProvider_DML)(_In_ OrtSessionOptions* options, ID3D12Device* device, ID3D12CommandQueue* queue)NO_EXCEPTION; + + // OrtSession methods + + /** + * CreateSessionWithoutModel + * This api is used to create a Session that is completely uninitialized. While there are other Session creation APIs in the + * c-abi, WinML uses this so that it can perform optimizations prior to loading the model, and initializing. + * Moreover, WinML needs a new api to support the OrtModel type, and prevent the parsing model protobufs again on session creation. + */ + OrtStatus*(ORT_API_CALL* CreateSessionWithoutModel)(_In_ OrtEnv* env, _In_ const OrtSessionOptions* options, _Outptr_ OrtSession** session)NO_EXCEPTION; + + /** + * SessionGetExecutionProvider + * This api is used to get a handle to an OrtExecutionProvider. + * Currently WinML uses this to talk directly to the DML EP and configure settings on it. + */ + OrtStatus*(ORT_API_CALL* SessionGetExecutionProvider)(_In_ OrtSession* session, _In_ size_t index, _Out_ OrtExecutionProvider** provider)NO_EXCEPTION; + + /** + * SessionInitialize + * This api is used to initialize an OrtSession. This is one component of creating a usable OrtSession, and is a part of CreateSession in the c-abi. + * Currently WinML uses this to finalize session creation, after configuring a variety of properties on the OrtSession. + */ + OrtStatus*(ORT_API_CALL* SessionInitialize)(_In_ OrtSession* session)NO_EXCEPTION; + + /** + * SessionRegisterGraphTransformers + * This api is used to enable DML specific graph transformations on an OrtSession. + * + * Ideally these transformations should be configured by the contract between the runtime and the EP and not overridden by WinML. + */ + OrtStatus*(ORT_API_CALL* SessionRegisterGraphTransformers)(_In_ OrtSession* session)NO_EXCEPTION; + + /** + * SessionRegisterCustomRegistry + * This api is used to support custom operators as they were shipped in WinML RS5. + */ + OrtStatus*(ORT_API_CALL* SessionRegisterCustomRegistry)(_In_ OrtSession* session, _In_ IMLOperatorRegistry* registry)NO_EXCEPTION; + + /** + * SessionLoadAndPurloinModel + * This api is used to load an OrtModel into an OrtSession. + * + * Don't free the 'out' value as this API will defunct and release the OrtModel internally. + */ + OrtStatus*(ORT_API_CALL* SessionLoadAndPurloinModel)(_In_ OrtSession* session, _In_ OrtModel* model)NO_EXCEPTION; + + /** + * SessionStartProfiling + * This api is used to start profiling OrtSession. The existing mechanism only allows configuring profiling at session creation. + * + * WinML uses this to toggle profilling on and off based on if a telemetry providers are being listened to. + */ + OrtStatus*(ORT_API_CALL* SessionStartProfiling)(_In_ OrtEnv* env, _In_ OrtSession* session)NO_EXCEPTION; + + /** + * SessionEndProfiling + * This api is used to end profiling OrtSession. The existing mechanism only allows configuring profiling at session creation. + * + * WinML uses this to toggle profilling on and off based on if a telemetry providers are being listened to. + */ + OrtStatus*(ORT_API_CALL* SessionEndProfiling)(_In_ OrtSession* session)NO_EXCEPTION; + + /** + * SessionCopyOneInputAcrossDevices + * This api is used to copy and create an OrtValue input to prepare the input on the correct device. + * + * WinML uses this to copy gpu device OrtValues to the CPU and vice-versa. + */ + OrtStatus*(ORT_API_CALL* SessionCopyOneInputAcrossDevices)(_In_ OrtSession* session, _In_ const char* const input_name, _In_ OrtValue* orig_value, _Outptr_ OrtValue** new_value)NO_EXCEPTION; + + // Dml methods (TODO need to figure out how these need to move to session somehow...) + + /** + * DmlExecutionProviderSetDefaultRoundingMode + * This api is used to configure the DML EP to turn on/off rounding. + * + * WinML uses this to disable rounding during session initialization and then enables it again post initialization. + */ + OrtStatus*(ORT_API_CALL* DmlExecutionProviderSetDefaultRoundingMode)(_In_ OrtExecutionProvider* dml_provider, _In_ bool is_enabled)NO_EXCEPTION; + + /** + * DmlExecutionProviderFlushContext + * This api is used to flush the DML EP. + * + * WinML communicates directly with DML to perform this as an optimization. + */ + OrtStatus*(ORT_API_CALL* DmlExecutionProviderFlushContext)(_In_ OrtExecutionProvider* dml_provider)NO_EXCEPTION; + + /** + * DmlExecutionProviderTrimUploadHeap + * This api is used to trim the upload heap in the DML EP. + * + * WinML communicates directly with DML to perform this as an optimization. + */ + OrtStatus*(ORT_API_CALL* DmlExecutionProviderTrimUploadHeap)(_In_ OrtExecutionProvider* dml_provider)NO_EXCEPTION; + + /** + * DmlExecutionProviderReleaseCompletedReferences + * This api is used to release completed references after first run the DML EP. + * + * WinML communicates directly with DML to perform this as an optimization. + */ + OrtStatus*(ORT_API_CALL* DmlExecutionProviderReleaseCompletedReferences)(_In_ OrtExecutionProvider* dml_provider)NO_EXCEPTION; + + /** + * DmlCreateGPUAllocationFromD3DResource + * This api is used to create a DML EP input based on a user specified d3d12 resource. + * + * WinML uses this as part of its Tensor apis to allow callers to specify their own D3D12 resources as inputs/outputs. + */ + OrtStatus*(ORT_API_CALL* DmlCreateGPUAllocationFromD3DResource)(_In_ ID3D12Resource* pResource, _Out_ void** dml_resource)NO_EXCEPTION; + + /** + * DmlFreeGPUAllocation + * This api is used free the DML EP input created by DmlCreateGPUAllocationFromD3DResource. + * + * WinML uses this as part of its Tensor apis to allow callers to specify their own D3D12 resources as inputs/outputs. + */ + OrtStatus*(ORT_API_CALL* DmlFreeGPUAllocation)(_In_ void* ptr)NO_EXCEPTION; + + /** + * DmlGetD3D12ResourceFromAllocation + * This api is used to get the D3D12 resource when a OrtValue has been allocated by the DML EP and accessed via GetMutableTensorData. + * + * WinML uses this in the image feature path to get the d3d resource and perform and tensorization on inputs directly into the allocated d3d12 resource. + */ + OrtStatus*(ORT_API_CALL* DmlGetD3D12ResourceFromAllocation)(_In_ OrtExecutionProvider* provider, _In_ void* allocation, _Out_ ID3D12Resource** resource)NO_EXCEPTION; + + /** + * DmlCopyTensor + * This api is used copy a tensor allocated by the DML EP Allocator to the CPU. + * + * WinML uses this when graphs are evaluated with DML, and their outputs remain on the GPU but need to be copied back to the CPU. + */ + OrtStatus*(ORT_API_CALL* DmlCopyTensor)(_In_ OrtExecutionProvider* provider, _In_ OrtValue* src, _In_ OrtValue* dst)NO_EXCEPTION; + + /** + * GetProviderMemoryInfo + * This api gets the memory info object associated with an EP. + * + * WinML uses this to manage caller specified D3D12 inputs/outputs. It uses the memory info here to call DmlCreateGPUAllocationFromD3DResource. + */ + OrtStatus*(ORT_API_CALL* GetProviderMemoryInfo)(_In_ OrtExecutionProvider* provider, OrtMemoryInfo** memory_info)NO_EXCEPTION; + + /** + * GetProviderAllocator + * This api gets associated allocator used by a provider. + * + * WinML uses this to create tensors, and needs to hold onto the allocator for the duration of the associated value's lifetime. + */ + OrtStatus*(ORT_API_CALL* GetProviderAllocator)(_In_ OrtExecutionProvider* provider, OrtAllocator** allocator)NO_EXCEPTION; + + /** + * FreeProviderAllocator + * This api frees an allocator. + * + * WinML uses this to free the associated allocator for an ortvalue when creating tensors. + * Internally this derefs a shared_ptr. + */ + OrtStatus*(ORT_API_CALL* FreeProviderAllocator)(_In_ OrtAllocator* allocator)NO_EXCEPTION; + + /** + * GetValueMemoryInfo + * This api gets the memory info of an OrtValue. + * + * WinML uses this to determine if an OrtValue is allocated on the Cpu or elsewhere. + */ + OrtStatus*(ORT_API_CALL* GetValueMemoryInfo)(const OrtValue* value, OrtMemoryInfo** memory_info)NO_EXCEPTION; + + /** + * ExecutionProviderSync + * This api syncs the EP. + * + * WinML uses this to sync EP inputs/outputs directly. + */ + OrtStatus*(ORT_API_CALL* ExecutionProviderSync)(_In_ OrtExecutionProvider* provider)NO_EXCEPTION; + + /** + * CreateCustomRegistry + * This api creates a custom registry that callers can populate with cusom ops. + * + * WinML uses this to support custom ops. + */ + OrtStatus*(ORT_API_CALL* CreateCustomRegistry)(_Out_ IMLOperatorRegistry** registry)NO_EXCEPTION; + + /** + * ValueGetDeviceId + * This api returns the device id of the OrtValue. + * + * WinML uses this to determine if an OrtValue is created on the needed device. + */ + OrtStatus*(ORT_API_CALL* ValueGetDeviceId)(_In_ OrtValue* ort_value, _Out_ int16_t* device_id)NO_EXCEPTION; + + /** + * SessionGetInputRequiredDeviceId + * This api returns the required device id for a model input. + * + * WinML uses this to determine if an OrtValue is created on the needed device. + */ + OrtStatus*(ORT_API_CALL* SessionGetInputRequiredDeviceId)(_In_ OrtSession* session, _In_ const char* const input_name, _Out_ int16_t* device_id)NO_EXCEPTION; + + ORT_CLASS_RELEASE(Model); +}; diff --git a/winml/adapter/winml_adapter_dml.cpp b/winml/adapter/winml_adapter_dml.cpp new file mode 100644 index 0000000000000..235ebbf4d0db2 --- /dev/null +++ b/winml/adapter/winml_adapter_dml.cpp @@ -0,0 +1,160 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "pch.h" + +#include "winml_adapter_c_api.h" +#include "core/session/ort_apis.h" +#include "winml_adapter_apis.h" +#include "core/framework/error_code_helper.h" + +#ifdef USE_DML +#include "core/session/abi_session_options_impl.h" +#include "core/providers/dml/dml_provider_factory.h" +#include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h" +#endif // USE_DML + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +#ifdef USE_DML +Microsoft::WRL::ComPtr CreateDmlDevice(ID3D12Device* d3d12Device) { + // Dynamically load DML to avoid WinML taking a static dependency on DirectML.dll + wil::unique_hmodule dmlDll(LoadLibraryW(L"DirectML.dll")); + THROW_LAST_ERROR_IF(!dmlDll); + + auto dmlCreateDevice1Fn = reinterpret_cast( + GetProcAddress(dmlDll.get(), "DMLCreateDevice1")); + THROW_LAST_ERROR_IF(!dmlCreateDevice1Fn); + + DML_CREATE_DEVICE_FLAGS dmlFlags = DML_CREATE_DEVICE_FLAG_NONE; + + // Enable the DML debug layer in DEBUG builds, if the D3D12 debug layer is also enabled +#if _DEBUG + Microsoft::WRL::ComPtr d3d12DebugDevice; + if (SUCCEEDED(d3d12Device->QueryInterface(IID_PPV_ARGS(&d3d12DebugDevice)))) { + d3d12DebugDevice = nullptr; + dmlFlags |= DML_CREATE_DEVICE_FLAG_DEBUG; + } +#endif // USE_DML + + Microsoft::WRL::ComPtr dmlDevice; + THROW_IF_FAILED(dmlCreateDevice1Fn(d3d12Device, dmlFlags, DML_FEATURE_LEVEL_2_0, IID_PPV_ARGS(&dmlDevice))); + + // Keep DirectML.dll loaded by leaking the handle. This is equivalent behavior to if we delay-loaded the DLL. + dmlDll.release(); + + return dmlDevice; +} + +namespace onnxruntime { +void DmlConfigureProviderFactoryDefaultRoundingMode(onnxruntime::IExecutionProviderFactory* factory, AllocatorRoundingMode rounding_mode); +} + +#endif // USE_DML + +ORT_API_STATUS_IMPL(winmla::OrtSessionOptionsAppendExecutionProviderEx_DML, _In_ OrtSessionOptions* options, + ID3D12Device* d3d_device, ID3D12CommandQueue* queue) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_device = CreateDmlDevice(d3d_device); + if (auto status = OrtSessionOptionsAppendExecutionProviderEx_DML(options, dml_device.Get(), queue)) { + return status; + } + auto factory = options->provider_factories.back().get(); + + // OnnxRuntime uses the default rounding mode when calling the session's allocator. + // During initialization, OnnxRuntime allocates weights, which are permanent across session + // lifetime and can be large, so shouldn't be rounded. + // So we create the provider with rounding disabled, and expect the caller to enable it after. + onnxruntime::DmlConfigureProviderFactoryDefaultRoundingMode(factory, AllocatorRoundingMode::Disabled); +#endif // USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlExecutionProviderSetDefaultRoundingMode, _In_ OrtExecutionProvider* dml_provider, _In_ bool is_enabled) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + Dml::SetDefaultRoundingMode(dml_provider_internal, is_enabled ? AllocatorRoundingMode::Enabled : AllocatorRoundingMode::Disabled); +#endif + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlExecutionProviderFlushContext, _In_ OrtExecutionProvider* dml_provider) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + Dml::FlushContext(dml_provider_internal); +#endif // USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlExecutionProviderTrimUploadHeap, _In_ OrtExecutionProvider* dml_provider) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + Dml::TrimUploadHeap(dml_provider_internal); +#endif // USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlExecutionProviderReleaseCompletedReferences, _In_ OrtExecutionProvider* dml_provider) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + Dml::ReleaseCompletedReferences(dml_provider_internal); +#endif // USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlCreateGPUAllocationFromD3DResource, _In_ ID3D12Resource* pResource, _Out_ void** dml_resource) { + API_IMPL_BEGIN +#ifdef USE_DML + *dml_resource = Dml::CreateGPUAllocationFromD3DResource(pResource); +#endif // USE_DML USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlGetD3D12ResourceFromAllocation, _In_ OrtExecutionProvider* dml_provider, _In_ void* allocation, _Out_ ID3D12Resource** d3d_resource) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + *d3d_resource = + Dml::GetD3D12ResourceFromAllocation( + dml_provider_internal->GetAllocator(0, ::OrtMemType::OrtMemTypeDefault).get(), + allocation); +#endif // USE_DML USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlFreeGPUAllocation, _In_ void* ptr) { + API_IMPL_BEGIN +#ifdef USE_DML + Dml::FreeGPUAllocation(ptr); +#endif // USE_DML USE_DML + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::DmlCopyTensor, _In_ OrtExecutionProvider* dml_provider, _In_ OrtValue* src, _In_ OrtValue* dst) { + API_IMPL_BEGIN +#ifdef USE_DML + auto dml_provider_internal = reinterpret_cast<::onnxruntime::IExecutionProvider*>(dml_provider); + auto status = Dml::CopyTensor(dml_provider_internal, *(src->GetMutable()), *(dst->GetMutable())); + if (!status.IsOK()) { + return onnxruntime::ToOrtStatus(status); + } + return nullptr; +#else + return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "Out of memory"); +#endif // USE_DML USE_DML + API_IMPL_END +} \ No newline at end of file diff --git a/winml/adapter/winml_adapter_environment.cpp b/winml/adapter/winml_adapter_environment.cpp new file mode 100644 index 0000000000000..d74c35aa3344f --- /dev/null +++ b/winml/adapter/winml_adapter_environment.cpp @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "pch.h" + +#include "winml_adapter_c_api.h" +#include "core/session/ort_apis.h" +#include "winml_adapter_apis.h" +#include "core/framework/error_code_helper.h" +#include "core/session/ort_env.h" + +#ifdef USE_DML +#include "abi_custom_registry_impl.h" +#include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h" +#include "core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h" +#endif USE_DML + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +class WinmlAdapterLoggingWrapper : public LoggingWrapper { + public: + WinmlAdapterLoggingWrapper(OrtLoggingFunction logging_function, OrtProfilingFunction profiling_function, void* logger_param) : LoggingWrapper(logging_function, logger_param), + profiling_function_(profiling_function) { + } + + void SendProfileEvent(onnxruntime::profiling::EventRecord& event_record) const override { + if (profiling_function_) { + OrtProfilerEventRecord ort_event_record = {}; + ort_event_record.category_ = static_cast(event_record.cat); + ort_event_record.category_name_ = onnxruntime::profiling::event_categor_names_[event_record.cat]; + ort_event_record.duration_ = event_record.dur; + ort_event_record.event_name_ = event_record.name.c_str(); + ort_event_record.execution_provider_ = (event_record.cat == onnxruntime::profiling::EventCategory::NODE_EVENT) ? event_record.args["provider"].c_str() : nullptr; + ort_event_record.op_name_ = (event_record.cat == onnxruntime::profiling::EventCategory::NODE_EVENT) ? event_record.args["op_name"].c_str() : nullptr; + ort_event_record.process_id_ = event_record.pid; + ort_event_record.thread_id_ = event_record.tid; + ort_event_record.time_span_ = event_record.ts; + + profiling_function_(&ort_event_record); + } + } + + private: + OrtProfilingFunction profiling_function_{}; +}; + +ORT_API_STATUS_IMPL(winmla::EnvConfigureCustomLoggerAndProfiler, _In_ OrtEnv* env, OrtLoggingFunction logging_function, OrtProfilingFunction profiling_function, + _In_opt_ void* logger_param, OrtLoggingLevel default_warning_level, + _In_ const char* logid, _Outptr_ OrtEnv** out) { + API_IMPL_BEGIN + std::string name = logid; + std::unique_ptr logger = onnxruntime::make_unique(logging_function, profiling_function, logger_param); + + // Clear the logging manager, since only one default instance of logging manager can exist at a time. + env->SetLoggingManager(nullptr); + + auto winml_logging_manager = std::make_unique(std::move(logger), + static_cast(default_warning_level), + false, + onnxruntime::logging::LoggingManager::InstanceType::Default, + &name); + + // Set a new default logging manager + env->SetLoggingManager(std::move(winml_logging_manager)); + return nullptr; + API_IMPL_END +} + +// Override select shape inference functions which are incomplete in ONNX with versions that are complete, +// and are also used in DML kernel registrations. Doing this avoids kernel and shader creation being +// deferred until first evaluation. It also prevents a situation where inference functions in externally +// registered schema are reachable only after upstream schema have been revised in a later OS release, +// which would be a compatibility risk. +ORT_API_STATUS_IMPL(winmla::OverrideSchema) { + API_IMPL_BEGIN +#ifdef USE_DML + static std::once_flag schema_override_once_flag; + std::call_once(schema_override_once_flag, []() { + SchemaInferenceOverrider::OverrideSchemaInferenceFunctions(); + }); +#endif USE_DML. + return nullptr; + API_IMPL_END +} \ No newline at end of file diff --git a/winml/adapter/winml_adapter_execution_provider.cpp b/winml/adapter/winml_adapter_execution_provider.cpp new file mode 100644 index 0000000000000..e9dbcf2a060b5 --- /dev/null +++ b/winml/adapter/winml_adapter_execution_provider.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "pch.h" + +#include "winml_adapter_c_api.h" +#include "core/session/ort_apis.h" +#include "winml_adapter_apis.h" +#include "core/framework/error_code_helper.h" +#include "core/framework/execution_provider.h" + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +struct OrtAllocatorWrapper : public OrtAllocator { + public: + OrtAllocatorWrapper(onnxruntime::AllocatorPtr impl) : impl_(impl) { + version = ORT_API_VERSION; + Alloc = AllocImpl; + Free = FreeImpl; + Info = InfoImpl; + } + + static void* ORT_API_CALL AllocImpl(struct OrtAllocator* this_, size_t size) { + return static_cast(this_)->impl_->Alloc(size); + } + static void ORT_API_CALL FreeImpl(struct OrtAllocator* this_, void* p) { + return static_cast(this_)->impl_->Free(p); + } + static const struct OrtMemoryInfo* ORT_API_CALL InfoImpl(const struct OrtAllocator* this_) { + return &(static_cast(this_)->impl_->Info()); + } + + private: + onnxruntime::AllocatorPtr impl_; +}; + +ORT_API_STATUS_IMPL(winmla::ExecutionProviderSync, _In_ OrtExecutionProvider* provider) { + API_IMPL_BEGIN + const auto execution_provider = reinterpret_cast(provider); + execution_provider->Sync(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::GetProviderAllocator, _In_ OrtExecutionProvider* provider, OrtAllocator** allocator) { + API_IMPL_BEGIN + const auto execution_provider = reinterpret_cast(provider); + auto allocator_ptr = execution_provider->GetAllocator(0, ::OrtMemType::OrtMemTypeDefault); + *allocator = new (std::nothrow) OrtAllocatorWrapper(allocator_ptr); + if (*allocator == nullptr) { + return OrtApis::CreateStatus(ORT_FAIL, "Out of memory"); + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::GetProviderMemoryInfo, _In_ OrtExecutionProvider* provider, OrtMemoryInfo** memory_info) { + API_IMPL_BEGIN + const auto execution_provider = reinterpret_cast(provider); + + auto allocator = execution_provider->GetAllocator(0, ::OrtMemType::OrtMemTypeDefault); + + const auto& info = allocator->Info(); + *memory_info = new (std::nothrow) OrtMemoryInfo(info.name, info.alloc_type, info.device, info.id, info.mem_type); + if (*memory_info == nullptr) { + return OrtApis::CreateStatus(ORT_FAIL, "Out of memory"); + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::FreeProviderAllocator, _In_ OrtAllocator* allocator) { + API_IMPL_BEGIN + delete static_cast(allocator); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::GetValueMemoryInfo, const OrtValue* value, OrtMemoryInfo** memory_info) { + API_IMPL_BEGIN + const auto& tensor = value->Get(); + auto info = tensor.Location(); + *memory_info = new OrtMemoryInfo(info.name, info.alloc_type, info.device, info.id, info.mem_type); + if (*memory_info == nullptr) { + return OrtApis::CreateStatus(ORT_FAIL, "Out of memory"); + } + return nullptr; + API_IMPL_END +} \ No newline at end of file diff --git a/winml/adapter/winml_adapter_model.cpp b/winml/adapter/winml_adapter_model.cpp new file mode 100644 index 0000000000000..82039872156f8 --- /dev/null +++ b/winml/adapter/winml_adapter_model.cpp @@ -0,0 +1,430 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "pch.h" + +#include "winml_adapter_model.h" + +#include "winml_adapter_c_api.h" +#include "core/graph/onnx_protobuf.h" +#include "core/session/ort_apis.h" +#include "winml_adapter_apis.h" +#include "core/framework/error_code_helper.h" + +#include +#include +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "core/framework/onnxruntime_typeinfo.h" + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +static std::vector GetInitializers(const onnx::ModelProto& model_proto) { + std::vector initializers; + auto& graph = model_proto.graph(); + auto& graph_initializers = graph.initializer(); + for (auto& initializer : graph_initializers) { + initializers.push_back(initializer.name().c_str()); + } + return initializers; +} + +static std::vector GetInputsWithoutInitializers(const onnx::ModelProto& model_proto) { + auto initializers = GetInitializers(model_proto); + + std::vector inputs_without_initializers; + auto& graph = model_proto.graph(); + auto& inputs = graph.input(); + for (auto& input : inputs) { + if (input.has_name() && input.has_type()) { + auto found_it = std::find_if( + std::begin(initializers), + std::end(initializers), + [&](auto& initializer) { + return std::strcmp(initializer, input.name().c_str()) == 0; + }); + + auto is_initializer = found_it != std::end(initializers); + if (!is_initializer) { + inputs_without_initializers.push_back(&input); + } + } + } + return inputs_without_initializers; +} + +static std::vector GetOutputs(const onnx::ModelProto& model_proto) { + std::vector outputs_with_name; + auto& graph = model_proto.graph(); + auto& outputs = graph.output(); + for (auto& output : outputs) { + if (output.has_name() && output.has_type()) { + outputs_with_name.push_back(&output); + } + } + return outputs_with_name; +} + +class ModelInfo { + public: + ModelInfo(const onnx::ModelProto* model_proto) { + Initialize(model_proto); + } + + public: + // model metadata + std::string author_; + std::string name_; + std::string domain_; + std::string description_; + int64_t version_; + std::vector> model_metadata_; + std::vector input_features_; + std::vector output_features_; + bool requires_float16_support_; + + private: + void Initialize(const onnx::ModelProto* model_proto) { + for (auto& prop : model_proto->metadata_props()) { + model_metadata_.push_back(std::make_pair(prop.key(), prop.value())); + } + + input_features_ = GetInputsWithoutInitializers(*model_proto); + output_features_ = ::GetOutputs(*model_proto); + + auto has_producer_name = model_proto->has_producer_name(); + author_ = has_producer_name ? model_proto->producer_name() : ""; + + auto has_domain = model_proto->has_domain(); + domain_ = has_domain ? model_proto->domain() : ""; + + auto has_graph = model_proto->has_graph(); + auto graph_has_name = model_proto->graph().has_name(); + auto is_name_available = has_graph && graph_has_name; + name_ = is_name_available ? model_proto->graph().name() : ""; + + auto has_description = model_proto->has_doc_string(); + description_ = has_description ? model_proto->doc_string() : ""; + + auto has_version = model_proto->has_model_version(); + version_ = has_version ? model_proto->model_version() : 0; + } +}; + +OrtModel::OrtModel(std::unique_ptr model_proto) : model_proto_(std::move(model_proto)), + model_info_(std::make_unique(model_proto_.get())) { +} + +// factory methods for creating an ort model from a path +static OrtStatus* CreateModelProto(const char* path, std::unique_ptr& out) { + int file_descriptor; + _set_errno(0); // clear errno + _sopen_s( + &file_descriptor, + path, + O_RDONLY | _O_SEQUENTIAL | _O_BINARY, + _SH_DENYWR, + _S_IREAD | _S_IWRITE); + + errno_t err = 0; + _get_errno(&err); + if (err == ENOENT) { + return OrtApis::CreateStatus(ORT_NO_SUCHFILE, "Model file not found!"); + } + + if (0 > file_descriptor) { + return OrtApis::CreateStatus(ORT_NO_SUCHFILE, "Model file not found!"); + } + + google::protobuf::io::FileInputStream stream(file_descriptor); + stream.SetCloseOnDelete(true); + + auto model_proto = std::unique_ptr(new onnx::ModelProto()); + + auto parse_succeeded = model_proto->ParseFromZeroCopyStream(&stream); + if (!parse_succeeded) { + return OrtApis::CreateStatus(ORT_INVALID_PROTOBUF, "Failed to parse model file!"); + } + + out = std::move(model_proto); + + return S_OK; +} + +OrtStatus* OrtModel::CreateOrtModelFromPath(const char* path, size_t len, OrtModel** model) { + ORT_UNUSED_PARAMETER(len); + + std::unique_ptr model_proto; + + if (auto status = CreateModelProto(path, model_proto)) { + return status; + } + + return OrtModel::CreateOrtModelFromProto(std::move(model_proto), model); +} + +OrtStatus* OrtModel::CreateOrtModelFromData(void* data, size_t len, OrtModel** model) { + auto model_proto = std::unique_ptr(new onnx::ModelProto()); + + auto parse_succeeded = model_proto->ParseFromArray(data, static_cast(len)); + if (!parse_succeeded) { + return OrtApis::CreateStatus(ORT_INVALID_PROTOBUF, "Failed to parse model stream!"); + } + + return OrtModel::CreateOrtModelFromProto(std::move(model_proto), model); +} + +OrtStatus* OrtModel::CreateOrtModelFromProto(std::unique_ptr&& model_proto, OrtModel** model) { + *model = new (std::nothrow) OrtModel(std::move(model_proto)); + if (*model == nullptr) { + return OrtApis::CreateStatus(ORT_ENGINE_ERROR, "Engine failed to create a model!"); + } + + return nullptr; +} + +const ModelInfo* OrtModel::UseModelInfo() const { + return model_info_.get(); +} + +const ONNX_NAMESPACE::ModelProto* OrtModel::UseModelProto() const { + return model_proto_.get(); +} + +std::unique_ptr OrtModel::DetachModelProto() { + return std::move(model_proto_); +} + +ORT_API_STATUS_IMPL(winmla::CreateModelFromPath, const char* model_path, size_t size, OrtModel** out) { + API_IMPL_BEGIN + if (auto status = OrtModel::CreateOrtModelFromPath(model_path, size, out)) { + return status; + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::CreateModelFromData, void* data, size_t size, OrtModel** out) { + API_IMPL_BEGIN + if (auto status = OrtModel::CreateOrtModelFromData(data, size, out)) { + return status; + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::CloneModel, const OrtModel* in, OrtModel** out) { + API_IMPL_BEGIN + auto model_proto_copy = std::make_unique(*in->UseModelProto()); + if (auto status = OrtModel::CreateOrtModelFromProto(std::move(model_proto_copy), out)) { + return status; + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetAuthor, const OrtModel* model, const char** const author, size_t* len) { + API_IMPL_BEGIN + *author = model->UseModelInfo()->author_.c_str(); + *len = model->UseModelInfo()->author_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetName, const OrtModel* model, const char** const name, size_t* len) { + API_IMPL_BEGIN + *name = model->UseModelInfo()->name_.c_str(); + *len = model->UseModelInfo()->name_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetDomain, const OrtModel* model, const char** const domain, size_t* len) { + API_IMPL_BEGIN + *domain = model->UseModelInfo()->domain_.c_str(); + *len = model->UseModelInfo()->domain_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetDescription, const OrtModel* model, const char** const description, size_t* len) { + API_IMPL_BEGIN + *description = model->UseModelInfo()->description_.c_str(); + *len = model->UseModelInfo()->description_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetVersion, const OrtModel* model, int64_t* version) { + API_IMPL_BEGIN + *version = model->UseModelInfo()->version_; + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetMetadataCount, const OrtModel* model, size_t* count) { + API_IMPL_BEGIN + *count = model->UseModelInfo()->model_metadata_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetMetadata, const OrtModel* model, size_t count, const char** const key, + size_t* key_len, const char** const value, size_t* value_len) { + API_IMPL_BEGIN + *key = model->UseModelInfo()->model_metadata_[count].first.c_str(); + *key_len = model->UseModelInfo()->model_metadata_[count].first.size(); + *value = model->UseModelInfo()->model_metadata_[count].second.c_str(); + *value_len = model->UseModelInfo()->model_metadata_[count].second.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetInputCount, const OrtModel* model, size_t* count) { + API_IMPL_BEGIN + *count = model->UseModelInfo()->input_features_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetOutputCount, const OrtModel* model, size_t* count) { + API_IMPL_BEGIN + *count = model->UseModelInfo()->output_features_.size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetInputName, const OrtModel* model, size_t index, + const char** input_name, size_t* count) { + API_IMPL_BEGIN + *input_name = model->UseModelInfo()->input_features_[index]->name().c_str(); + *count = model->UseModelInfo()->input_features_[index]->name().size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetOutputName, const OrtModel* model, size_t index, + const char** output_name, size_t* count) { + API_IMPL_BEGIN + *output_name = model->UseModelInfo()->output_features_[index]->name().c_str(); + *count = model->UseModelInfo()->output_features_[index]->name().size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetInputDescription, const OrtModel* model, size_t index, + const char** input_description, size_t* count) { + API_IMPL_BEGIN + *input_description = model->UseModelInfo()->input_features_[index]->doc_string().c_str(); + *count = model->UseModelInfo()->input_features_[index]->doc_string().size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetOutputDescription, const OrtModel* model, size_t index, + const char** output_description, size_t* count) { + API_IMPL_BEGIN + *output_description = model->UseModelInfo()->output_features_[index]->doc_string().c_str(); + *count = model->UseModelInfo()->output_features_[index]->doc_string().size(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetInputTypeInfo, const OrtModel* model, size_t index, OrtTypeInfo** type_info) { + API_IMPL_BEGIN + if (auto status = OrtTypeInfo::FromTypeProto(&model->UseModelInfo()->input_features_[index]->type(), type_info)) { + return status; + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelGetOutputTypeInfo, const OrtModel* model, size_t index, OrtTypeInfo** type_info) { + API_IMPL_BEGIN + if (auto status = OrtTypeInfo::FromTypeProto(&model->UseModelInfo()->output_features_[index]->type(), type_info)) { + return status; + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::ModelEnsureNoFloat16, const OrtModel* model) { + API_IMPL_BEGIN + auto model_info = model->UseModelInfo(); + auto model_proto = model->UseModelProto(); + auto& graph = model_proto->graph(); + + // The model will not contain fp16 operations if: + // 1. The model has no fp16 inputs + // 2. The model has no fp16 initializers + // 3. The model does not create any fp16 intermediary tensors via the Cast (to float16) operator + // 4. The model does not have any fp16 outputs + + // 1. Ensure that The model has no fp16 inputs + for (auto input : model_info->input_features_) { + auto& type = input->type(); + if (type.value_case() == ONNX_NAMESPACE::TypeProto::kTensorType) { + auto& tensor_type = type.tensor_type(); + if (tensor_type.elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16) { + std::stringstream error_message; + error_message << "The model contains a 16-bit input (" + << input->name() + << "), but the current device does not support 16-bit float."; + return OrtApis::CreateStatus(ORT_INVALID_GRAPH, error_message.str().c_str()); + } + } + } + + // 2. Ensure that the model has no fp16 initializers + for (int i = 0; i < graph.node_size(); i++) { + auto node = graph.node(i); + if (node.op_type() == "Cast" && node.domain().empty()) { + for (int attribIndex = 0; attribIndex < node.attribute_size(); attribIndex++) { + auto attribute = node.attribute(attribIndex); + if (attribute.name() == "to") { + if (attribute.i() == onnx::TensorProto::DataType::TensorProto_DataType_FLOAT16) { + std::stringstream error_message; + error_message << "The model contains a 16-bit input (" + << node.name().c_str() + << "), but the current device does not support 16-bit float."; + return OrtApis::CreateStatus(ORT_INVALID_GRAPH, error_message.str().c_str()); + } + } + } + } + } + + // 3. Ensure that the model does not create any fp16 intermediary + // tensors via the Cast (to float16) operator + for (int i = 0; i < graph.initializer_size(); i++) { + auto initializer = graph.initializer(i); + if (initializer.data_type() == onnx::TensorProto::DataType::TensorProto_DataType_FLOAT16) { + std::stringstream error_message; + error_message << "The model contains a 16-bit input (" + << initializer.name().c_str() + << "), but the current device does not support 16-bit float."; + return OrtApis::CreateStatus(ORT_INVALID_GRAPH, error_message.str().c_str()); + } + } + + // 4. Ensure that the model does not have any fp16 outputs + for (auto output : model_info->output_features_) { + auto& type = output->type(); + if (type.value_case() == ONNX_NAMESPACE::TypeProto::kTensorType) { + auto& tensor_type = type.tensor_type(); + if (tensor_type.elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16) { + std::stringstream error_message; + error_message << "The model contains a 16-bit input (" + << output->name() + << "), but the current device does not support 16-bit float."; + return OrtApis::CreateStatus(ORT_INVALID_GRAPH, error_message.str().c_str()); + } + } + } + return nullptr; + API_IMPL_END +} + +ORT_API(void, winmla::ReleaseModel, OrtModel* ptr) { + delete ptr; +} \ No newline at end of file diff --git a/winml/adapter/winml_adapter_model.h b/winml/adapter/winml_adapter_model.h new file mode 100644 index 0000000000000..1dabfa23dfa0b --- /dev/null +++ b/winml/adapter/winml_adapter_model.h @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "winml_adapter_c_api.h" +#include +#include "core/graph/onnx_protobuf.h" + +class ModelInfo; + +struct OrtModel { + public: + static OrtStatus* CreateOrtModelFromPath(const char* path, size_t len, OrtModel** model); + static OrtStatus* CreateOrtModelFromData(void* data, size_t len, OrtModel** model); + static OrtStatus* CreateOrtModelFromProto(std::unique_ptr&& model_proto, OrtModel** model); + const ModelInfo* UseModelInfo() const; + + const onnx::ModelProto* UseModelProto() const; + std::unique_ptr DetachModelProto(); + + private: + OrtModel(std::unique_ptr model_proto); + OrtModel(const OrtModel& other) = delete; + OrtModel& operator=(const OrtModel& other) = delete; + + private: + std::unique_ptr model_proto_; + std::unique_ptr model_info_; +}; diff --git a/winml/adapter/winml_adapter_session.cpp b/winml/adapter/winml_adapter_session.cpp new file mode 100644 index 0000000000000..329d713752f70 --- /dev/null +++ b/winml/adapter/winml_adapter_session.cpp @@ -0,0 +1,245 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "pch.h" + +#include "winml_adapter_c_api.h" +#include "core/session/ort_apis.h" +#include "winml_adapter_apis.h" +#include "core/framework/error_code_helper.h" + +#include "core/session/inference_session.h" +#include "core/session/abi_session_options_impl.h" +#include "core/session/ort_env.h" + +#include "winml_adapter_model.h" +#include "core/framework/utils.h" + +#ifdef USE_DML +#include "core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.h" +#include "abi_custom_registry_impl.h" +#include "core/providers/dml/GraphTransformers/GraphTransformerHelpers.h" +#endif USE_DML + +namespace winmla = Windows::AI::MachineLearning::Adapter; + +// ORT intentionally requires callers derive from their session class to access +// the protected methods used below. +class InferenceSessionProtectedLoadAccessor : public onnxruntime::InferenceSession { + public: + onnxruntime::common::Status + Load(std::unique_ptr p_model_proto) { + return onnxruntime::InferenceSession::Load(std::move(p_model_proto)); + } + const onnxruntime::SessionState& GetSessionState() { + return *session_state_; + } +}; + +ORT_API_STATUS_IMPL(winmla::CreateSessionWithoutModel, _In_ OrtEnv* env, _In_ const OrtSessionOptions* options, _Outptr_ OrtSession** session) { + API_IMPL_BEGIN + std::unique_ptr inference_session; + try { + // Create the inference session + inference_session = std::make_unique(options->value, env->GetLoggingManager()); + } catch (const std::exception& e) { + return OrtApis::CreateStatus(ORT_FAIL, e.what()); + } + + // we need to disable mem pattern if DML is one of the providers since DML doesn't have the concept of + // byte addressable memory + std::vector> provider_list; + if (options) { + for (auto& factory : options->provider_factories) { + auto provider = factory->CreateProvider(); + if (provider->Type() == onnxruntime::kDmlExecutionProvider) { + if (options->value.enable_mem_pattern) { + // TODO Instead of returning an error, should we set mem pattern to false here and log a warning saying so? + // Doing so would be inconsistent with the Python API that doesn't go through this code path. + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Mem pattern should be disabled when using DML execution provider."); + } + if (options->value.execution_mode != ExecutionMode::ORT_SEQUENTIAL) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Sequential execution should be enabled when using DML execution provider."); + } + } + provider_list.push_back(std::move(provider)); + } + } + + Status status; + if (options) { + if (!options->custom_op_domains_.empty()) { + status = inference_session->AddCustomOpDomains(options->custom_op_domains_); + if (!status.IsOK()) + return onnxruntime::ToOrtStatus(status); + } + } + + // register the providers + for (auto& provider : provider_list) { + if (provider) { + inference_session->RegisterExecutionProvider(std::move(provider)); + } + } + + *session = reinterpret_cast(inference_session.release()); + + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionGetExecutionProvider, _In_ OrtSession* session, _In_ size_t index, _Out_ OrtExecutionProvider** ort_provider) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto session_protected_load_accessor = + static_cast(inference_session); + const auto& session_state = session_protected_load_accessor->GetSessionState(); + auto& provider_id = session_state.GetExecutionProviders().GetIds().at(index); + const auto& provider = session_state.GetExecutionProviders().Get(provider_id); + + *ort_provider = const_cast(reinterpret_cast(provider)); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionInitialize, _In_ OrtSession* session) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto status = inference_session->Initialize(); + if (!status.IsOK()) { + return onnxruntime::ToOrtStatus(status); + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionLoadAndPurloinModel, _In_ OrtSession* session, _In_ OrtModel* model) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto session_protected_load_accessor = + static_cast(inference_session); + + auto status = session_protected_load_accessor->Load(model->DetachModelProto()); + + ReleaseModel(model); + + if (!status.IsOK()) { + return onnxruntime::ToOrtStatus(status); + } + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionStartProfiling, _In_ OrtEnv* env, _In_ OrtSession* session) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + inference_session->StartProfiling(&env->GetLoggingManager()->DefaultLogger()); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionEndProfiling, _In_ OrtSession* session) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + inference_session->EndProfiling(); + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::SessionRegisterGraphTransformers, _In_ OrtSession* session) { + API_IMPL_BEGIN +#ifdef USE_DML + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + + // Bug 22973884 : Fix issues with BatchNorm + Add and BatchNorm + Mul handling implicit inputs, and move from Winml to ORT + GraphTransformerHelpers::RegisterGraphTransformers(inference_session); +#endif USE_DML + return nullptr; + API_IMPL_END +} + +inline std::list> +GetLotusCustomRegistries(IMLOperatorRegistry* registry) { + if (registry != nullptr) { +#ifdef USE_DML + // Down-cast to the concrete type. + // The only supported input is the AbiCustomRegistry type. + // Other implementations of IMLOperatorRegistry are forbidden. + auto abi_custom_registry = + static_cast(registry); + + // Get the ORT registry + return abi_custom_registry->GetRegistries(); +#endif // USE_DML + } + return {}; +} + +ORT_API_STATUS_IMPL(winmla::SessionRegisterCustomRegistry, _In_ OrtSession* session, _In_ IMLOperatorRegistry* registry) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto custom_registries = GetLotusCustomRegistries(registry); + + // Register + for (auto& custom_registry : custom_registries) { + ORT_THROW_IF_ERROR(inference_session->RegisterCustomRegistry(custom_registry)); + } + + return nullptr; + API_IMPL_END +} + +ORT_API_STATUS_IMPL(winmla::CreateCustomRegistry, _Out_ IMLOperatorRegistry** registry) { + API_IMPL_BEGIN +#ifdef USE_DML + auto impl = wil::MakeOrThrow(); + *registry = impl.Detach(); +#endif // USE_DML + return nullptr; + API_IMPL_END +} + +static OrtDevice GetSessionGetInputDevice(_In_ OrtSession* session, _In_ const char* const input_name) { + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto session_protected_load_accessor = + static_cast(inference_session); + const onnxruntime::SessionState& session_state = session_protected_load_accessor->GetSessionState(); + + std::vector node_info_vec; + session_state.GetInputNodeInfo(input_name, node_info_vec); + const auto& node_info = node_info_vec.front(); // all consumers of a feed have the same device so first entry is fine + return *node_info.device; +} + +ORT_API_STATUS_IMPL(winmla::SessionGetInputRequiredDeviceId, _In_ OrtSession* session, _In_ const char* const input_name, _Out_ int16_t* device_id) { + auto device = GetSessionGetInputDevice(session, input_name); + *device_id = device.Id(); + return nullptr; +} + +ORT_API_STATUS_IMPL(winmla::ValueGetDeviceId, _In_ OrtValue* ort_value, _Out_ int16_t* device_id) { + auto device = ort_value->Get().Location().device; + *device_id = device.Id(); + return nullptr; +} + +ORT_API_STATUS_IMPL(winmla::SessionCopyOneInputAcrossDevices, _In_ OrtSession* session, _In_ const char* const input_name, + _In_ OrtValue* orig_value, _Outptr_ OrtValue** new_value) { + API_IMPL_BEGIN + auto inference_session = reinterpret_cast<::onnxruntime::InferenceSession*>(session); + auto session_protected_load_accessor = + static_cast(inference_session); + const onnxruntime::SessionState& session_state = session_protected_load_accessor->GetSessionState(); + + auto ort_value = std::make_unique(); + auto status = onnxruntime::utils::CopyOneInputAcrossDevices(session_state, input_name, *orig_value, *ort_value.get()); + if (!status.IsOK()) { + return onnxruntime::ToOrtStatus(status); + } + + *new_value = ort_value.release(); + + return nullptr; + API_IMPL_END +} \ No newline at end of file diff --git a/winml/api/Windows.AI.MachineLearning.idl b/winml/api/Windows.AI.MachineLearning.idl new file mode 100644 index 0000000000000..2debbc75e5c9e --- /dev/null +++ b/winml/api/Windows.AI.MachineLearning.idl @@ -0,0 +1,726 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//! \file Windows.AI.MachineLearning.idl +import "Windows.Foundation.idl"; +import "windows.graphics.idl"; +import "windows.media.idl"; + +#ifndef WINDOWSAI_RAZZLE_BUILD +// Pull in definition for DualApiPartitionAttribute, because the WinML IDL +// does not build in the OS Repo, and needs to access internal definitions for +// various custom attirbute definitions. +import "dualapipartitionattribute.idl"; +import "windows.graphics.directx.direct3d11.idl"; +import "windows.graphics.imaging.idl"; +import "windows.storage.idl"; +#endif + +#include + +namespace Windows.AI.MachineLearning +{ + [contractversion(3)] + apicontract MachineLearningContract{}; + + //! Forward declarations + runtimeclass LearningModelBinding; + + //! \enum LearningModelFeatureKind + //! \brief Defines the list of input and output feature types for a machine learning model. + //! Each of these maps to a corresponding FeatureDescriptor that you can use to learn more + //! about how to pass the feature into and out of the the model. + [contract(MachineLearningContract, 1)] + enum LearningModelFeatureKind + { + //! The feature is a tensor, use TensorFeatureDescriptor + Tensor = 0, + //! The feature is a sequence, use SequenceFeatureDescriptor + Sequence, + //! The feature is a map, use MapFeatureDescriptor + Map, + //! The feature is an image, use ImageFeatureDescriptor + Image + }; + + //! \brief Describes the common properties that all features have. + [uuid(bc08cf7c-6ed0-4004-97ba-b9a2eecd2b4f)] + [contract(MachineLearningContract, 1)] + interface ILearningModelFeatureDescriptor + { + //! \brief The name you use to bind values to this feature. + //! This property is required and will always be there. All features have a name as + //! primary key for the model. Usually as a single word. You use this name when + //! enumerating the features of the model and then later when binding a value to one + //! those feature using a LearningModelBinding. It will be unique across all features. + String Name{ get; }; + //! \brief A description of what this feature is used for in the model + //! This property is optional. If provided by an author model it will be a description + //! of what the feature is for the model. + String Description{ get; }; + //! \brief The kind of feature - use this to know which derived class to use. + LearningModelFeatureKind Kind{ get; }; + //! \brief If true, you must bind a value to this feature before calling Evalaute(). + Boolean IsRequired{ get; }; + } + + [uuid(2a222e5d-afb1-47ed-bfad-b5b3a459ec04)] + [contract(MachineLearningContract, 1)] + interface ILearningModelOperatorProvider : IInspectable + { + } + + //! \interface LearningModel + //! \brief Represents a trained machine learning model. + //! \details This is the main object you use to interact with Windows Machine Learning. You use + //! it to load, bind, and evaluate trained ONNX models. To load the model you use + //! one of the Load constructors. You can then enumerate the InputFeatures and + //! OutputFeatures. To bind and evaluate you create a LearningModelSession. + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ILearningModelStatics", e3b977e8-6952-4e47-8ef4-1f7f07897c6d)] + [interface_name("Windows.AI.MachineLearning.ILearningModel", 5b8e4920-489f-4e86-9128-265a327b78fa)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass LearningModel : Windows.Foundation.IClosable + { + //! Loads an ONNX model from a StorageFile asynchronously. + [remote_async] + static Windows.Foundation.IAsyncOperation LoadFromStorageFileAsync(Windows.Storage.IStorageFile modelFile); + //! Loads an ONNX model from a stream asynchronously. + [remote_async] + static Windows.Foundation.IAsyncOperation LoadFromStreamAsync(Windows.Storage.Streams.IRandomAccessStreamReference modelStream); + //! Loads an ONNX model from a file on disk. + static LearningModel LoadFromFilePath(String filePath); + //! Loads an ONNX model from a stream. + static LearningModel LoadFromStream(Windows.Storage.Streams.IRandomAccessStreamReference modelStream); + + //! Loads an ONNX model from a StorageFile asynchronously. + [remote_async] + [method_name("LoadFromStorageFileWithOperatorProviderAsync")] static Windows.Foundation.IAsyncOperation LoadFromStorageFileAsync(Windows.Storage.IStorageFile modelFile, ILearningModelOperatorProvider operatorProvider); + //! Loads an ONNX model from a stream asynchronously. + [remote_async] + [method_name("LoadFromStreamWithOperatorProviderAsync")] static Windows.Foundation.IAsyncOperation LoadFromStreamAsync(Windows.Storage.Streams.IRandomAccessStreamReference modelStream, ILearningModelOperatorProvider operatorProvider); + //! Loads an ONNX model from a file on disk. + [method_name("LoadFromFilePathWithOperatorProvider")] static LearningModel LoadFromFilePath(String filePath, ILearningModelOperatorProvider operatorProvider); + //! Loads an ONNX model from a stream. + [method_name("LoadFromStreamWithOperatorProvider")] static LearningModel LoadFromStream(Windows.Storage.Streams.IRandomAccessStreamReference modelStream, ILearningModelOperatorProvider operatorProvider); + + //! The name of the model author. + String Author{ get; }; + //! The name of the model. + String Name{ get; }; + //! The namespace of the imported model operator set. All models implicitly import the default ONNX operator set. + String Domain{ get; }; + //! A description of the model. + String Description{ get; }; + //! The ONNX version assumed by the model. + Int64 Version{ get; }; + //! The raw ONNX model provided metadata. + Windows.Foundation.Collections.IMapView Metadata{ get; }; + //! All of the input features. + Windows.Foundation.Collections.IVectorView InputFeatures{ get; }; + //! All of the output features. + Windows.Foundation.Collections.IVectorView OutputFeatures{ get; }; + } + + //! \enum LearningModelDeviceKind + //! \brief Defines the list of devices that can evaluate a machine learning model. + [contract(MachineLearningContract, 1)] + enum LearningModelDeviceKind + { + //! Let the system decide which device to use. + Default = 0, + //! Use the CPU to evaluate the model. + Cpu, + //! Use a GPU or other DirectX device to evaluate the model. + DirectX, + //! Use the system policy defined device for high performance. + DirectXHighPerformance, + //! Use the system policy defined device for minimum power. + DirectXMinPower + }; + + + //! \class LearningModelDevice + //! \brief Create an instance specific to which device you want to evaluate the machine learning model on. + //! \namespace Windows.AI.MachineLearning + [contract(MachineLearningContract, 1)] + [constructor_name("Windows.AI.MachineLearning.ILearningModelDeviceFactory", 9cffd74d-b1e5-4f20-80ad-0a56690db06b)] + [static_name("Windows.AI.MachineLearning.ILearningModelDeviceStatics", 49f32107-a8bf-42bb-92c7-10b12dc5d21f)] + [interface_name("Windows.AI.MachineLearning.ILearningModelDevice", f5c2c8fe-3f56-4a8c-ac5f-fdb92d8b8252)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass LearningModelDevice + { + //! Create a LearningModelDevice from the specified IDirect3DDevice. + //! During evaluation, the specified IDirect3DDevice will be used to create resources and queue work during execution. + static LearningModelDevice CreateFromDirect3D11Device(Windows.Graphics.DirectX.Direct3D11.IDirect3DDevice device); + //! Create a LearningModelDevice from the specified list of devices enumerated in LearningModelDeviceKind. + [method_name("Create")] LearningModelDevice(LearningModelDeviceKind deviceKind); + + // BUGBUG: this needs to be Windows.Graphics.DisplayAdapterId which is only there in the RS4 winmd + //! Returns the unique identifier for the chosen adapter for model + Windows.Graphics.DisplayAdapterId AdapterId{ get; }; + //! Returns the chosen IDirect3DDevice for model evaluation. + Windows.Graphics.DirectX.Direct3D11.IDirect3DDevice Direct3D11Device{ get; }; + } + + [contract(MachineLearningContract, 1)] + [interface_name("Windows.AI.MachineLearning.ILearningModelEvaluationResult", b2f9bfcd-960e-49c0-8593-eb190ae3eee2)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass LearningModelEvaluationResult + { + //! The optional user supplied string that was attached to the Evaluate() call to connect the output results. + String CorrelationId{ get; }; + //! If the evaluation failed, returns an error code for what caused the failure. + Int32 ErrorStatus{ get; }; + //! True if the evaluation completed successfully. + //! If False, use ErrorStatus to find out what caused the failure. + Boolean Succeeded{ get; }; + //! A set of features representing the output prediction along with probabilities. + Windows.Foundation.Collections.IMapView Outputs{ get; }; + } + + //! \class LearningModelSessionOptions + //! \brief TODO:Docs + [contract(MachineLearningContract, 2)] + [dualapipartition(1)] + runtimeclass LearningModelSessionOptions + { + // default constructor + LearningModelSessionOptions(); + + //! The BatchSizeOverride option will allow the model compiler to use constant batch size performance optimizations when setting up the LearningModelSession. + //! The default value for the BatchSizeOverride will be 1 indicating a static batch size of 1. + //! BatchSizeOverride = 0 indicates that the batch size present in the model should be honored. + //! BatchSizeOverride > 0 indicates the size of batch that will be used to override the model batch size and optimize evaluations. + UInt32 BatchSizeOverride { get; set; }; + + [contract(MachineLearningContract, 3)] + { + //! The CloseModelOnSessionCreation option will allow the LearningModelSession to take ownership of the LearningModel's + //! internal model representation. This will defunct the LearningModel session, but decreases the necessary peak working set. + //! CloseModelOnSessionCreation = True indicates that the model's internal data will be moved into the session during construction. + //! CloseModelOnSessionCreation = False indicates that the model's internal data will be copied into the session during construction. + Boolean CloseModelOnSessionCreation { get; set; }; + } + } + + //! \class LearningModelSession + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [constructor_name("Windows.AI.MachineLearning.ILearningModelSessionFactory", 0f6b881d-1c9b-47b6-bfe0-f1cf62a67579)] + [interface_name("Windows.AI.MachineLearning.ILearningModelSession", 8e58f8f6-b787-4c11-90f0-7129aeca74a9)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass LearningModelSession : Windows.Foundation.IClosable + { + //! Create a session, on the system default device, to evaluate the specified model on. + [method_name("CreateFromModel")] LearningModelSession(LearningModel model); + //! Create a session, on the provided device, to evaluate the specified model on. + [method_name("CreateFromModelOnDevice")] LearningModelSession(LearningModel model, LearningModelDevice deviceToRunOn); + + //! Returns the machine learning model attached to the session. + LearningModel Model{ get; }; + //! Returns the evaluation device that the session was created on. + LearningModelDevice Device{ get; }; + //! Returns the list of properties set for model evaluation. + Windows.Foundation.Collections.IPropertySet EvaluationProperties{ get; }; + //! Evaluate the machine learning model using the feature values already bound in 'bindings'. (asynchronous) + [remote_async] + Windows.Foundation.IAsyncOperation EvaluateAsync(LearningModelBinding bindings, String correlationId); + //! Evaluate the machine learning model using the feature values in the map 'features'. (asynchronous) + //! This method is an alternative to the separated bind then eval form that takes a LearningModelBinding. + //! It will take the passed in features, create a LearningModelBinding for you, bind the features, and then evaluate the model. + [remote_async] + Windows.Foundation.IAsyncOperation EvaluateFeaturesAsync(Windows.Foundation.Collections.IMap features, String correlationId); + //! Evaluate the machine learning model using the feature values bound in 'bindings'. + LearningModelEvaluationResult Evaluate(LearningModelBinding bindings, String correlationId); + //! Evaluate the machine learning model using the feature values in the map 'features'. + //! This method is an alternative to the separated bind then eval form that takes a LearningModelBinding. + //! It will take the passed in features, create a LearningModelBinding for you, bind the features, and then evaluate the model. + LearningModelEvaluationResult EvaluateFeatures(Windows.Foundation.Collections.IMap features, String correlationId); + + [contract(MachineLearningContract, 2)] + { + //! Create a session, on the provided device, with the desired model compilation options, to evaluate the specified model on. + [method_name("CreateFromModelOnDeviceWithSessionOptions")] LearningModelSession(LearningModel model, LearningModelDevice deviceToRunOn, LearningModelSessionOptions learningModelSessionOptions); + } + } + + //! \interface ILearningModelFeatureValue + //! \brief The instantiated value for a feature. + [contract(MachineLearningContract, 1)] + [uuid(f51005db-4085-4dfe-9fed-95eb0c0cf75c)] + interface ILearningModelFeatureValue + { + //! The data type of the feature. + LearningModelFeatureKind Kind{ get; }; + }; + + //! \class LearningModelBinding + //! \brief Holder for associations between model inputs/outputs and variable instances. + [contract(MachineLearningContract, 1)] + [constructor_name("Windows.AI.MachineLearning.ILearningModelBindingFactory", c95f7a7a-e788-475e-8917-23aa381faf0b)] + [interface_name("Windows.AI.MachineLearning.ILearningModelBinding", ea312f20-168f-4f8c-94fe-2e7ac31b4aa8)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass LearningModelBinding : Windows.Foundation.Collections.IMapView + { + //! Create a LearningModelBinding from the specified LearningModelSession. + //! During evaluation, the specified adapter will be used to create resources and queue work during execution. + [method_name("CreateFromSession")] LearningModelBinding(LearningModelSession session); + //! Bind a value to the specified feature. + void Bind(String name, IInspectable value); + //! TODO:Docs + [method_name("BindWithProperties")] void Bind(String name, IInspectable value, Windows.Foundation.Collections.IPropertySet props); + //! Remove all bindings. + void Clear(); + } + + //! \enum TensorKind + //! \brief Defines the list of supported tensor data types. + [contract(MachineLearningContract, 1)] + enum TensorKind + { + //! Supported by ONNX, but should never happen and is invalid for Windows ML. + Undefined = 0, + //! The tensor type is 32bit float. + Float, + //! The tensor type is 8bit unsigned int. + UInt8, + //! The tensor type is 8bit signed int. + Int8, + //! The tensor type is 16bit unsigned int. + UInt16, + //! The tensor type is 16bit signed int. + Int16, + //! The tensor type is 32bit signed int. + Int32, + //! The tensor type is 64bit signed int. + Int64, + //! The tensor type is String. + String, + //! The tensor type is Boolean. + Boolean, + //! The tensor type is 16bit float. + Float16, + //! The tensor type is 64bit float. + Double, + //! The tensor type is 32bit unsigned int. + UInt32, + //! The tensor type is 64bit unsigned int. + UInt64, + //! Supported by ONNX, but is not supported by Windows ML. + Complex64, + //! Supported by ONNX, but is not supported by Windows ML. + Complex128 + }; + + //! \class MapFeatureDescriptor + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [interface_name("Windows.AI.MachineLearning.IMapFeatureDescriptor", 530424bd-a257-436d-9e60-c2981f7cc5c4)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass MapFeatureDescriptor : ILearningModelFeatureDescriptor + { + //! Returns the data type of the map's key. + TensorKind KeyKind{ get; }; + //! Returns the properties of the map's value. + ILearningModelFeatureDescriptor ValueDescriptor{ get; }; + } + + //! \class SequenceFeatureDescriptor + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [interface_name("Windows.AI.MachineLearning.ISequenceFeatureDescriptor", 84f6945a-562b-4d62-a851-739aced96668)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass SequenceFeatureDescriptor : ILearningModelFeatureDescriptor + { + //! Gets the properties of the specified feature. + ILearningModelFeatureDescriptor ElementDescriptor{ get; }; + } + + //! \class TensorFeatureDescriptor + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [interface_name("Windows.AI.MachineLearning.ITensorFeatureDescriptor", 74455c80-946a-4310-a19c-ee0af028fce4)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorFeatureDescriptor : ILearningModelFeatureDescriptor + { + //! Returns the data type of the tensor. + TensorKind TensorKind{ get; }; + //! Returns the count and size of each dimension. + Windows.Foundation.Collections.IVectorView Shape{ get; }; + } + + //! \class ImageFeatureDescriptor + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [interface_name("Windows.AI.MachineLearning.IImageFeatureDescriptor", 365585a5-171a-4a2a-985f-265159d3895a)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass ImageFeatureDescriptor : ILearningModelFeatureDescriptor + { + //! Specifies the pixel format (channel ordering, bit depth, and data type) of the pixel data. + Windows.Graphics.Imaging.BitmapPixelFormat BitmapPixelFormat{ get; }; + //! Specifies the alpha mode of the pixel data. + Windows.Graphics.Imaging.BitmapAlphaMode BitmapAlphaMode{ get; }; + //! The width of the image. + UInt32 Width{ get; }; + //! The height of the image. + UInt32 Height{ get; }; + } + + //! \interface ITensor + //! \brief TODO:Docs + [contract(MachineLearningContract, 1)] + [uuid(05489593-a305-4a25-ad09-440119b4b7f6)] + interface ITensor : IInspectable requires ILearningModelFeatureValue + { + //! Returns the data type of the tensor. + TensorKind TensorKind{ get; }; + //! TODO:Docs + Windows.Foundation.Collections.IVectorView Shape{ get; }; + } + + //! \class TensorFloat + //! \brief A 32bit float tensor object. + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorFloatStatics", dbcd395b-3ba3-452f-b10d-3c135e573fa9)] + [interface_name("Windows.AI.MachineLearning.ITensorFloat", f2282d82-aa02-42c8-a0c8-df1efc9676e1)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorFloat : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + //! Creates a tensor object without allocating a buffer. + static TensorFloat Create(); + //! Creates a tensor object and allocates a buffer of size 'shape'. + static TensorFloat Create(Windows.Foundation.Collections.IIterable shape); + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorFloat CreateFromArray(Windows.Foundation.Collections.IIterable shape, Single[] data); + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorFloat CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + //! Returns a read only view of the data. + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorFloat CreateFromShapeArrayAndDataArray(Int64[] shape, Single[] data); + + //! Creates a tensor object of size 'shape', and uses the data in 'buffer'. + static TensorFloat CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorFloat16BitStatics", a52db6f5-318a-44d4-820b-0cdc7054a84a)] + [interface_name("Windows.AI.MachineLearning.ITensorFloat16Bit", 0ab994fc-5b89-4c3c-b5e4-5282a5316c0a)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorFloat16Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorFloat16Bit Create(); + static TensorFloat16Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorFloat16Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, Single[] data); + static TensorFloat16Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorFloat16Bit CreateFromShapeArrayAndDataArray(Int64[] shape, Single[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + //! 'buffer' contains a packed array of 16bit floating point values. + static TensorFloat16Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorUInt8BitStatics", 05f67583-bc24-4220-8a41-2dcd8c5ed33c)] + [interface_name("Windows.AI.MachineLearning.ITensorUInt8Bit", 58e1ae27-622b-48e3-be22-d867aed1daac)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorUInt8Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorUInt8Bit Create(); + static TensorUInt8Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorUInt8Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, UInt8[] data); + static TensorUInt8Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorUInt8Bit CreateFromShapeArrayAndDataArray(Int64[] shape, UInt8[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + //! 'buffer' contains a packed array of 8bit uint8 values. + static TensorUInt8Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorInt8BitStatics", b1a12284-095c-4c76-a661-ac4cee1f3e8b)] + [interface_name("Windows.AI.MachineLearning.ITensorInt8Bit", cddd97c5-ffd8-4fef-aefb-30e1a485b2ee)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorInt8Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorInt8Bit Create(); + static TensorInt8Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorInt8Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, BYTE[] data); + static TensorInt8Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorInt8Bit CreateFromShapeArrayAndDataArray(Int64[] shape, BYTE[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorInt8Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorUInt16BitStatics", 5df745dd-028a-481a-a27c-c7e6435e52dd)] + [interface_name("Windows.AI.MachineLearning.ITensorUInt16Bit", 68140f4b-23c0-42f3-81f6-a891c011bc3f)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorUInt16Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorUInt16Bit Create(); + static TensorUInt16Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorUInt16Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, UInt16[] data); + static TensorUInt16Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorUInt16Bit CreateFromShapeArrayAndDataArray(Int64[] shape, UInt16[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorUInt16Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorInt16BitStatics", 98646293-266e-4b1a-821f-e60d70898b91)] + [interface_name("Windows.AI.MachineLearning.ITensorInt16Bit", 98a32d39-e6d6-44af-8afa-baebc44dc020)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorInt16Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorInt16Bit Create(); + static TensorInt16Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorInt16Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, Int16[] data); + static TensorInt16Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorInt16Bit CreateFromShapeArrayAndDataArray(Int64[] shape, Int16[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorInt16Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorUInt32BitStatics", 417c3837-e773-4378-8e7f-0cc33dbea697)] + [interface_name("Windows.AI.MachineLearning.ITensorUInt32Bit", d8c9c2ff-7511-45a3-bfac-c38f370d2237)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorUInt32Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorUInt32Bit Create(); + static TensorUInt32Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorUInt32Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, UInt32[] data); + static TensorUInt32Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorUInt32Bit CreateFromShapeArrayAndDataArray(Int64[] shape, UInt32[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorUInt32Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorInt32BitStatics", 6539864b-52fa-4e35-907c-834cac417b50)] + [interface_name("Windows.AI.MachineLearning.ITensorInt32Bit", 2c0c28d3-207c-4486-a7d2-884522c5e589)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorInt32Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorInt32Bit Create(); + static TensorInt32Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorInt32Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, Int32[] data); + static TensorInt32Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorInt32Bit CreateFromShapeArrayAndDataArray(Int64[] shape, Int32[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorInt32Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorUInt64BitStatics", 7a7e20eb-242f-47cb-a9c6-f602ecfbfee4)] + [interface_name("Windows.AI.MachineLearning.ITensorUInt64Bit", 2e70ffad-04bf-4825-839a-82baef8c7886)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorUInt64Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorUInt64Bit Create(); + static TensorUInt64Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorUInt64Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, UInt64[] data); + static TensorUInt64Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorUInt64Bit CreateFromShapeArrayAndDataArray(Int64[] shape, UInt64[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorUInt64Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorInt64BitStatics", 9648ad9d-1198-4d74-9517-783ab62b9cc2)] + [interface_name("Windows.AI.MachineLearning.ITensorInt64Bit", 499665ba-1fa2-45ad-af25-a0bd9bda4c87)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorInt64Bit : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorInt64Bit Create(); + static TensorInt64Bit Create(Windows.Foundation.Collections.IIterable shape); + static TensorInt64Bit CreateFromArray(Windows.Foundation.Collections.IIterable shape, Int64[] data); + static TensorInt64Bit CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorInt64Bit CreateFromShapeArrayAndDataArray(Int64[] shape, Int64[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorInt64Bit CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorBooleanStatics", 2796862c-2357-49a7-b476-d0aa3dfe6866)] + [interface_name("Windows.AI.MachineLearning.ITensorBoolean", 50f311ed-29e9-4a5c-a44d-8fc512584eed)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorBoolean : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorBoolean Create(); + static TensorBoolean Create(Windows.Foundation.Collections.IIterable shape); + static TensorBoolean CreateFromArray(Windows.Foundation.Collections.IIterable shape, Boolean[] data); + static TensorBoolean CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorBoolean CreateFromShapeArrayAndDataArray(Int64[] shape, Boolean[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + //! 'buffer' represents a byte packed array of boolean values. + static TensorBoolean CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorDoubleStatics", a86693c5-9538-44e7-a3ca-5df374a5a70c)] + [interface_name("Windows.AI.MachineLearning.ITensorDouble", 91e41252-7a8f-4f0e-a28f-9637ffc8a3d0)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorDouble : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorDouble Create(); + static TensorDouble Create(Windows.Foundation.Collections.IIterable shape); + static TensorDouble CreateFromArray(Windows.Foundation.Collections.IIterable shape, Double[] data); + static TensorDouble CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorDouble CreateFromShapeArrayAndDataArray(Int64[] shape, Double[] data); + + //! Creates a tensor object, creates a tensor of size 'shape', and uses the data in 'buffer'. + static TensorDouble CreateFromBuffer(Int64[] shape, Windows.Storage.Streams.IBuffer buffer); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.ITensorStringStatics", 83623324-cf26-4f17-a2d4-20ef8d097d53)] + [interface_name("Windows.AI.MachineLearning.ITensorString", 582335c8-bdb1-4610-bc75-35e9cbf009b7)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass TensorString : ITensor, [contract(MachineLearningContract, 2)]Windows.Foundation.IMemoryBuffer + { + static TensorString Create(); + static TensorString Create(Windows.Foundation.Collections.IIterable shape); + static TensorString CreateFromArray(Windows.Foundation.Collections.IIterable shape, String[] data); + static TensorString CreateFromIterable(Windows.Foundation.Collections.IIterable shape, Windows.Foundation.Collections.IIterable data); + Windows.Foundation.Collections.IVectorView GetAsVectorView(); + + [contract(MachineLearningContract, 2)] + { + //! Creates a tensor object, allocates a buffer of size 'shape', and copies all of 'data' into it. + static TensorString CreateFromShapeArrayAndDataArray(Int64[] shape, String[] data); + } + } + + [contract(MachineLearningContract, 1)] + [static_name("Windows.AI.MachineLearning.IImageFeatureValueStatics", 1bc317fd-23cb-4610-b085-c8e1c87ebaa0)] + [interface_name("Windows.AI.MachineLearning.IImageFeatureValue", f0414fd9-c9aa-4405-b7fb-94f87c8a3037)] + [threading(both)] + [marshaling_behavior(agile)] + [dualapipartition(1)] + runtimeclass ImageFeatureValue : ILearningModelFeatureValue + { + static ImageFeatureValue CreateFromVideoFrame(Windows.Media.VideoFrame image); + Windows.Media.VideoFrame VideoFrame{ get; }; + } +} \ No newline at end of file diff --git a/winml/api/dualapipartitionattribute.h b/winml/api/dualapipartitionattribute.h new file mode 100644 index 0000000000000..7dec68eb883b0 --- /dev/null +++ b/winml/api/dualapipartitionattribute.h @@ -0,0 +1,103 @@ +/* Header file automatically generated from dualapipartitionattribute.idl */ +/* + * File built with Microsoft(R) MIDLRT Compiler Engine Version 10.00.0228 + */ + +#pragma warning( disable: 4049 ) /* more than 64k source lines */ + +/* verify that the version is high enough to compile this file*/ +#ifndef __REQUIRED_RPCNDR_H_VERSION__ +#define __REQUIRED_RPCNDR_H_VERSION__ 500 +#endif + +/* verify that the version is high enough to compile this file*/ +#ifndef __REQUIRED_RPCSAL_H_VERSION__ +#define __REQUIRED_RPCSAL_H_VERSION__ 100 +#endif + +#include +#include + +#ifndef __RPCNDR_H_VERSION__ +#error this stub requires an updated version of +#endif /* __RPCNDR_H_VERSION__ */ + +#ifndef COM_NO_WINDOWS_H +#include +#include +#endif /*COM_NO_WINDOWS_H*/ +#ifndef __dualapipartitionattribute_h__ +#define __dualapipartitionattribute_h__ +#ifndef __dualapipartitionattribute_p_h__ +#define __dualapipartitionattribute_p_h__ + + +#pragma once + +#pragma push_macro("MIDL_CONST_ID") +#undef MIDL_CONST_ID +#define MIDL_CONST_ID const __declspec(selectany) + + +// API Contract Inclusion Definitions +#if !defined(SPECIFIC_API_CONTRACT_DEFINITIONS) +#if !defined(WINDOWS_APPLICATIONMODEL_CALLS_CALLSPHONECONTRACT_VERSION) +#define WINDOWS_APPLICATIONMODEL_CALLS_CALLSPHONECONTRACT_VERSION 0x50000 +#endif // defined(WINDOWS_APPLICATIONMODEL_CALLS_CALLSPHONECONTRACT_VERSION) + +#if !defined(WINDOWS_FOUNDATION_FOUNDATIONCONTRACT_VERSION) +#define WINDOWS_FOUNDATION_FOUNDATIONCONTRACT_VERSION 0x40000 +#endif // defined(WINDOWS_FOUNDATION_FOUNDATIONCONTRACT_VERSION) + +#if !defined(WINDOWS_FOUNDATION_UNIVERSALAPICONTRACT_VERSION) +#define WINDOWS_FOUNDATION_UNIVERSALAPICONTRACT_VERSION 0xa0000 +#endif // defined(WINDOWS_FOUNDATION_UNIVERSALAPICONTRACT_VERSION) + +#if !defined(WINDOWS_NETWORKING_SOCKETS_CONTROLCHANNELTRIGGERCONTRACT_VERSION) +#define WINDOWS_NETWORKING_SOCKETS_CONTROLCHANNELTRIGGERCONTRACT_VERSION 0x30000 +#endif // defined(WINDOWS_NETWORKING_SOCKETS_CONTROLCHANNELTRIGGERCONTRACT_VERSION) + +#if !defined(WINDOWS_PHONE_PHONECONTRACT_VERSION) +#define WINDOWS_PHONE_PHONECONTRACT_VERSION 0x10000 +#endif // defined(WINDOWS_PHONE_PHONECONTRACT_VERSION) + +#if !defined(WINDOWS_PHONE_PHONEINTERNALCONTRACT_VERSION) +#define WINDOWS_PHONE_PHONEINTERNALCONTRACT_VERSION 0x10000 +#endif // defined(WINDOWS_PHONE_PHONEINTERNALCONTRACT_VERSION) + +#if !defined(WINDOWS_UI_WEBUI_CORE_WEBUICOMMANDBARCONTRACT_VERSION) +#define WINDOWS_UI_WEBUI_CORE_WEBUICOMMANDBARCONTRACT_VERSION 0x10000 +#endif // defined(WINDOWS_UI_WEBUI_CORE_WEBUICOMMANDBARCONTRACT_VERSION) + +#endif // defined(SPECIFIC_API_CONTRACT_DEFINITIONS) + + +// Header files for imported files +#include "Windows.Foundation.h" + +#if defined(__cplusplus) && !defined(CINTERFACE) +/* Forward Declarations */ + + +#pragma warning (push) +#pragma warning (disable:4668) +#pragma warning (disable:4001) +#pragma once +#pragma warning (pop) + + +#else // !defined(__cplusplus) +/* Forward Declarations */ + +#pragma warning (push) +#pragma warning (disable:4668) +#pragma warning (disable:4001) +#pragma once +#pragma warning (pop) + + +#endif // defined(__cplusplus) +#pragma pop_macro("MIDL_CONST_ID") +#endif // __dualapipartitionattribute_p_h__ + +#endif // __dualapipartitionattribute_h__ diff --git a/winml/api/dualapipartitionattribute.idl b/winml/api/dualapipartitionattribute.idl new file mode 100644 index 0000000000000..c38ce9a117b7d --- /dev/null +++ b/winml/api/dualapipartitionattribute.idl @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//! \file windows.ai.machinelearning.attributes.idl + +import "Windows.Foundation.idl"; +#include + +// This file redefines the DualApiPartition attribute found in +// internal\sdk\inc\windows.foundation.idl. +// +// The DualApiPartitionAttribure defined in windows.foundation.idl +// is used to annotate Windows SDK APIs with a marker to indicate that +// the API will be usable on desktop as well as the store uwp app container. +// +// All WinML APIs should be usable from the desktop. + +namespace Windows.Foundation.Metadata +{ + [attributeusage(target_runtimeclass)] + [attributename("dualapipartition")] + [contract(Windows.Foundation.FoundationContract, 1)] + [version(NTDDI_WIN8), + version(NTDDI_WIN8, Platform.WindowsPhone)] + attribute DualApiPartitionAttribute + { + unsigned __int32 version; + } +} diff --git a/winml/api/exclusions.txt b/winml/api/exclusions.txt new file mode 100644 index 0000000000000..40ce8287f7ee6 --- /dev/null +++ b/winml/api/exclusions.txt @@ -0,0 +1,26 @@ +TensorBoolean.cpp +TensorBoolean.h +TensorDouble.cpp +TensorDouble.h +TensorFloat.cpp +TensorFloat.h +TensorFloat16Bit.cpp +TensorFloat16Bit.h +TensorInt16Bit.cpp +TensorInt16Bit.h +TensorInt32Bit.cpp +TensorInt32Bit.h +TensorInt64Bit.cpp +TensorInt64Bit.h +TensorInt8Bit.cpp +TensorInt8Bit.h +TensorString.cpp +TensorString.h +TensorUInt16Bit.cpp +TensorUInt16Bit.h +TensorUInt32Bit.cpp +TensorUInt32Bit.h +TensorUInt64Bit.cpp +TensorUInt64Bit.h +TensorUInt8Bit.cpp +TensorUInt8Bit.h diff --git a/winml/api/windows.ai.machinelearning.native.idl b/winml/api/windows.ai.machinelearning.native.idl new file mode 100644 index 0000000000000..b3a3a783146e0 --- /dev/null +++ b/winml/api/windows.ai.machinelearning.native.idl @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import "oaidl.idl"; +import "ocidl.idl"; +import "d3d12.idl"; + +cpp_quote( "#include " ) + +#pragma region Desktop Family +cpp_quote( "#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)" ) + +cpp_quote( "struct IMLOperatorRegistry; " ) +cpp_quote( "struct __declspec(uuid(\"1adaa23a-eb67-41f3-aad8-5d984e9bacd4\")) __declspec(novtable) ILearningModelOperatorProviderNative : IUnknown " ) +cpp_quote( "{ " ) +cpp_quote( " STDMETHOD(GetRegistry)(IMLOperatorRegistry** ppOperatorRegistry) PURE; " ) +cpp_quote( "}; " ) + +[uuid(52f547ef-5b03-49b5-82d6-565f1ee0dd49), object, local] +interface ITensorNative : IUnknown +{ + HRESULT GetBuffer([out, size_is(, *capacity)] BYTE **value, [out] UINT32 *capacity); + HRESULT GetD3D12Resource([out] ID3D12Resource ** result); +}; + +[uuid(39d055a4-66f6-4ebc-95d9-7a29ebe7690a), object, local] +interface ITensorStaticsNative : IUnknown +{ + HRESULT CreateFromD3D12Resource(ID3D12Resource *value, [size_is(shapeCount)] __int64 *shape, int shapeCount, [out] IUnknown ** result); +}; + +[uuid(1e9b31a1-662e-4ae0-af67-f63bb337e634), object, local] +interface ILearningModelDeviceFactoryNative : IUnknown +{ + HRESULT CreateFromD3D12CommandQueue(ID3D12CommandQueue * value, [out] IUnknown ** result); +}; + +cpp_quote("#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */") \ No newline at end of file diff --git a/winml/api/windows.ai.machinelearning.native.internal.idl b/winml/api/windows.ai.machinelearning.native.internal.idl new file mode 100644 index 0000000000000..269041b62bfe3 --- /dev/null +++ b/winml/api/windows.ai.machinelearning.native.internal.idl @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import "oaidl.idl"; + +cpp_quote( "#include " ) + +#pragma region Desktop Family +cpp_quote( "#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)" ) + +[uuid(fd7ae883-38f0-47e5-879c-5eb23336e507), object, local] +interface IMetacommandsController : IUnknown +{ + HRESULT SetMetacommandsEnabled( + [in] boolean enabled); +}; + +[uuid(b42a6c5d-cf8b-4d1f-9375-0de92c0c9996), object, local] +interface IDeviceFenceValidator : IUnknown +{ + boolean SharedHandleInitialized(); +}; + +[uuid(259caa2f-44fd-4525-8102-e8a10012fb88), object, local] +interface ILearningModelStaticsNative : IUnknown +{ + HRESULT Load( + [in, size_is(cchModelPath)] const wchar_t *modelPath, + [in] UINT32 cchModelPath, + IUnknown **model); +}; + +[uuid(0ccc204e-3424-42c9-a008-9e7f0756d647), object, local] +interface ILearningModelBindingNative : IUnknown +{ + HRESULT Bind( + [in, size_is(cchName)] const wchar_t *name, + [in] UINT32 cchName, + IUnknown* value); +}; + +[uuid(871c531b-0eb1-4fb4-8c61-fbf9e6f8ba6b), object, local] +interface ILearningModelFeatureDescriptorNative : IUnknown +{ + HRESULT GetName([out, size_is(, *cchName)] const wchar_t **name, [out] UINT32 *cchName); + HRESULT GetDescription([out, size_is(, *cchDescription)] const wchar_t **description, [out] UINT32 *cchDescription); +}; + +[uuid(9972a361-b185-40e2-b1bc-23a667d5fb97), object, local] +interface ILearningModelEvaluationResultNative : IUnknown +{ + HRESULT GetOutput( + [in, size_is(cchName)] const wchar_t *name, + [in] UINT32 cchName, + [out] IUnknown ** result); +}; + +cpp_quote("#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */") \ No newline at end of file diff --git a/winml/dll/Windows.AI.MachineLearning.def b/winml/dll/Windows.AI.MachineLearning.def new file mode 100644 index 0000000000000..ceb9246489cbb --- /dev/null +++ b/winml/dll/Windows.AI.MachineLearning.def @@ -0,0 +1,4 @@ +EXPORTS +DllCanUnloadNow PRIVATE +DllGetActivationFactory PRIVATE +MLCreateOperatorRegistry \ No newline at end of file diff --git a/winml/dll/module.cpp b/winml/dll/module.cpp new file mode 100644 index 0000000000000..717fb3fcede76 --- /dev/null +++ b/winml/dll/module.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include +#include + +#include "LearningModelDevice.h" +#include "OnnxruntimeProvider.h" + +using namespace winrt::Windows::AI::MachineLearning::implementation; + +void __stdcall OnErrorReported(bool alreadyReported, wil::FailureInfo const& failure) WI_NOEXCEPT { + if (!alreadyReported) { + winrt::hstring message(failure.pszMessage ? failure.pszMessage : L""); + telemetry_helper.LogRuntimeError( + failure.hr, + winrt::to_string(message), + failure.pszFile, + failure.pszFunction, + failure.uLineNumber); + } +} + +extern "C" BOOL WINAPI DllMain(_In_ HINSTANCE hInstance, DWORD dwReason, _In_ void* lpvReserved) { + switch (dwReason) { + case DLL_PROCESS_ATTACH: + DisableThreadLibraryCalls(hInstance); + + // Register the TraceLogging provider feeding telemetry. It's OK if this fails; + // trace logging calls just become no-ops. + telemetry_helper.Register(); + wil::SetResultTelemetryFallback(&OnErrorReported); + break; + case DLL_PROCESS_DETACH: + telemetry_helper.LogWinMLShutDown(); + // Unregister Trace Logging Provider feeding telemetry + telemetry_helper.UnRegister(); + +#ifdef NDEBUG + bool dynamicUnload = (lpvReserved == nullptr); + + // + // The OS can reclaim memory more quickly and correctly during process shutdown. + // Continue to do this on debug builds due to leak detection tracing. + // + if (dynamicUnload) +#endif + { + LearningModelDevice::DllUnload(); + } + + break; + } + + return true; +} + +extern "C" HRESULT WINAPI MLCreateOperatorRegistry(_COM_Outptr_ IMLOperatorRegistry** registry) try { + winrt::com_ptr engine_factory; + WINML_THROW_IF_FAILED(CreateOnnxruntimeEngineFactory(engine_factory.put())); + WINML_THROW_IF_FAILED(engine_factory->CreateCustomRegistry(registry)); + return S_OK; +} +CATCH_RETURN(); + +STDAPI DllCanUnloadNow() { + // The windows.ai.machinelearning.dll should not be freed by + // CoFreeUnusedLibraries since there can be outstanding COM object + // references to many objects (AbiCustomRegistry, IMLOperatorKernelContext, + // IMLOperatorTensor, etc) that are not reference counted in this path. + // + // In order to implement DllCanUnloadNow we would need to reference count + // all of the instances of non-WinRT COM objects that have been shared + // across the dll boundary or harden the boundary APIs to make sure to + // additional outstanding references are not cached by callers. + // + // Identifying and curating the complete list of IUnknown based COM objects + // that are shared out as a consequence of the MLCreateOperatorRegistry API + // will be a complex task to complete in RS5. + // + // As a temporary workaround we simply prevent the windows.ai.machinelearning.dll + // from unloading. + // + // There are no known code paths that rely on opportunistic dll unload. + return S_FALSE; +} + +STDAPI DllGetActivationFactory(HSTRING classId, void** factory) { + return WINRT_GetActivationFactory(classId, factory); +} \ No newline at end of file diff --git a/winml/dll/pch.h b/winml/dll/pch.h new file mode 100644 index 0000000000000..f04283c985571 --- /dev/null +++ b/winml/dll/pch.h @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "cppwinrt_onnx.h" +#include "dx.h" + +#include "inc/ILotusValueProviderPrivate.h" +#include "impl/IMapFeatureValue.h" +#include "impl/ISequenceFeatureValue.h" +#include "FeatureValues.h" diff --git a/winml/dll/winml.rc b/winml/dll/winml.rc new file mode 100644 index 0000000000000..50fdaf0f45289 --- /dev/null +++ b/winml/dll/winml.rc @@ -0,0 +1,42 @@ +// This file REQUIRES the following external definitions: +// VER_MAJOR, VER_MINOR, VER_BUILD, VER_PRIVATE, and VER_STRING + +#include + +#if defined(DEBUG) || defined(_DEBUG) +#define VER_DEBUG VS_FF_DEBUG +#else +#define VER_DEBUG 0 +#endif + +// ----------------------------------------------------------------------------- + +VS_VERSION_INFO VERSIONINFO +FILEVERSION VER_MAJOR, VER_MINOR, VER_BUILD, VER_PRIVATE +PRODUCTVERSION VER_MAJOR, VER_MINOR, VER_BUILD, VER_PRIVATE +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS VER_DEBUG +FILEOS VOS__WINDOWS32 +FILETYPE VFT_DLL +FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + BEGIN + VALUE "CompanyName", "Microsoft Corporation" + VALUE "FileDescription", "Windows Machine Learning Runtime" + VALUE "FileVersion", VER_STRING + VALUE "InternalName", "Windows.AI.MachineLearning.Runtime" + VALUE "LegalCopyright", "\251 Microsoft Corporation. All rights reserved." + VALUE "OriginalFilename", "windows.ai.machinelearning.dll" + VALUE "ProductName", "Microsoft\256 Windows\256 Operating System" + VALUE "ProductVersion", VER_STRING + END + END + + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1252 + END +END \ No newline at end of file diff --git a/winml/lib/Api.Image/ConverterResourceStore.cpp b/winml/lib/Api.Image/ConverterResourceStore.cpp new file mode 100644 index 0000000000000..f698a05f29835 --- /dev/null +++ b/winml/lib/Api.Image/ConverterResourceStore.cpp @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "inc/ConverterResourceStore.h" + +#include +#include + +#include +#include + +using namespace Windows::AI::MachineLearning; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::DirectX; +using namespace Windows::Graphics::DirectX::Direct3D11; + +ConverterResources::ConverterResources(Pool& pool, ConverterResourceDescription& descriptor) : m_pool(pool), + Descriptor(descriptor), + Tensorizer(std::make_unique()), + Detensorizer(std::make_unique()) { +} + +void ConverterResources::ReturnToCache() { + if (auto pool = m_pool.lock()) { + pool->Store(shared_from_this()); + } +} + +ConverterResourceStore::ConverterResourceStore(size_t nCacheSize) : m_cacheSize(nCacheSize) { +} + +std::shared_ptr ConverterResourceStore::Fetch(ConverterResourceDescription& descriptor) { + std::lock_guard lock(m_mutex); + + auto resource = FetchAndRemoveObject(descriptor); + + if (resource == nullptr) { + // Create the resource + resource = ConverterResources::Create(shared_from_this(), descriptor); + } + + return resource; +} + +std::shared_ptr ConverterResourceStore::FetchAndRemoveObject(ConverterResourceDescription& desc) { + // Iterate through the resources and find all the resources which are completed and unallocate + auto foundIt = + std::find_if(std::begin(m_objects), std::end(m_objects), + [&](const auto& cachedObject) { + return desc == cachedObject.Resource->Descriptor; + }); + + if (foundIt == std::end(m_objects)) { + return nullptr; + } else { + std::shared_ptr object = foundIt->Resource; + // Remove the item from the cache so that it is not reused by another call + m_objects.erase(foundIt); + + return object; + } +} + +void ConverterResourceStore::Store(std::shared_ptr object) { + std::lock_guard lock(m_mutex); + + auto foundIt = std::find_if(std::begin(m_objects), std::end(m_objects), + [&](const auto& cachedObject) { + return object == cachedObject.Resource; + }); + + if (foundIt == std::end(m_objects)) { + // If the resource is not already cached + if (m_objects.size() < m_cacheSize) { + // If the cache has free slots, then use one + m_objects.push_back( + PoolObject{ + object, + storeId++}); + } else { + // If the cache has no free slots, then evict the oldest + EvictOldestPoolObject(); + + m_objects.push_back( + PoolObject{ + object, + storeId++}); + } + } +} + +void ConverterResourceStore::EvictOldestPoolObject() { + auto oldestIt = + std::min_element(std::begin(m_objects), std::end(m_objects), + [&](const auto& left, const auto& right) { + return left.StoreId < right.StoreId; + }); + + // Remove the oldest item from the cache + m_objects.erase(oldestIt); +} \ No newline at end of file diff --git a/winml/lib/Api.Image/CpuDetensorizer.h b/winml/lib/Api.Image/CpuDetensorizer.h new file mode 100644 index 0000000000000..4059f85eaf211 --- /dev/null +++ b/winml/lib/Api.Image/CpuDetensorizer.h @@ -0,0 +1,237 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "inc/ImageConversionTypes.h" + +namespace Windows::AI::MachineLearning::Internal { +class CpuDetensorizer { + public: + template + static HRESULT Detensorize( + _In_ ImageTensorChannelType formatFrom, + _In_ ImageTensorChannelType formatTo, + _In_ const T* pCPUTensor, + _In_ uint32_t bufferWidth, + _In_ uint32_t tensorHeight, + _In_ uint32_t tensorWidth, + _Inout_ BYTE* pData) { +#pragma warning(push) +#pragma warning(disable : 26014) // warning about possible out of bounds accesing pData, but input is checked for BGRA8 format, so uiCapacity should be in multiples of 4 + // output is BGRA8: so blue at i, green is at i + 1, red is at i + 2 + + uint32_t bytesPerPixel = formatTo == kImageTensorChannelTypeGRAY8 ? 1 : 4; + + // bufferWidth may have padding because of optimization, but bytesPerRow includes only the real tensor data. We need to jump + // over bufferWidth's extra padding + uint32_t bytesPerRow = tensorWidth * bytesPerPixel; + uint32_t end = bufferWidth * tensorHeight; + size_t tensorPlaneSize = tensorWidth * tensorHeight; + + if (formatFrom == formatTo && (formatFrom == kImageTensorChannelTypeBGR8 || formatFrom == kImageTensorChannelTypeRGB8)) { + for (uint32_t i = 0; i < tensorHeight; i++) { + BYTE* pPixel = pData; + + InterleaveRowFloatToByte( + pCPUTensor + i * tensorWidth, + pCPUTensor + tensorPlaneSize + i * tensorWidth, + pCPUTensor + tensorPlaneSize * 2 + i * tensorWidth, + tensorWidth, + pPixel, + bytesPerPixel); + + pData += bufferWidth; + } + } else if ((formatFrom == kImageTensorChannelTypeRGB8 && formatTo == kImageTensorChannelTypeBGR8) || (formatFrom == kImageTensorChannelTypeBGR8 && formatTo == kImageTensorChannelTypeRGB8)) { + for (uint32_t i = 0; i < tensorHeight; i++) { + BYTE* pPixel = pData; + + InterleaveRowFloatToByte( + pCPUTensor + tensorPlaneSize * 2 + i * tensorWidth, + pCPUTensor + tensorPlaneSize + i * tensorWidth, + pCPUTensor + i * tensorWidth, + tensorWidth, + pPixel, + bytesPerPixel); + + pData += bufferWidth; + } + } else if (formatFrom == kImageTensorChannelTypeGRAY8 && (formatTo == kImageTensorChannelTypeBGR8 || formatTo == kImageTensorChannelTypeRGB8)) { + // just replicate the gray data across each channel + for (uint32_t i = 0; i < end; i += bufferWidth) { + for (uint32_t j = i; j < i + bytesPerRow; j += 4) { + BYTE bGray = DetensorizeValue(pCPUTensor); + pData[j] = bGray; + pData[j + 1] = bGray; + pData[j + 2] = bGray; + pData[j + 3] = 255; + pCPUTensor++; + } + } + } else if (formatFrom == kImageTensorChannelTypeGRAY8 && formatTo == kImageTensorChannelTypeGRAY8) { + for (uint32_t i = 0; i < end; i += bufferWidth) { + for (uint32_t j = i; j < i + bytesPerRow; j += 1) { + BYTE bGray = DetensorizeValue(pCPUTensor); + pData[j] = bGray; + pCPUTensor++; + } + } + } else if (formatFrom == kImageTensorChannelTypeBGR8 && formatTo == kImageTensorChannelTypeGRAY8) { + for (uint32_t i = 0; i < end; i += bufferWidth) { + for (uint32_t j = i; j < i + bytesPerRow; j += 1) { + BYTE red, green, blue; + + blue = DetensorizeValue(pCPUTensor); + green = DetensorizeValue(pCPUTensor + tensorPlaneSize); + red = DetensorizeValue(pCPUTensor + tensorPlaneSize * 2); + + pData[j] = static_cast(0.2126f * red + 0.7152f * green + 0.0722f * blue); + pCPUTensor++; + } + } + } else if (formatFrom == kImageTensorChannelTypeRGB8 && formatTo == kImageTensorChannelTypeGRAY8) { + for (uint32_t i = 0; i < end; i += bufferWidth) { + for (uint32_t j = i; j < i + bytesPerRow; j += 1) { + BYTE red, green, blue; + + red = DetensorizeValue(pCPUTensor); + green = DetensorizeValue(pCPUTensor + tensorPlaneSize); + blue = DetensorizeValue(pCPUTensor + tensorPlaneSize * 2); + + pData[j] = static_cast(0.2126f * red + 0.7152f * green + 0.0722f * blue); + pCPUTensor++; + } + } + } +#pragma warning(pop) + else { + return E_INVALIDARG; + } + return S_OK; + } + + private: + template + static float ReadTensor(const T* pCPUTensor) { + return *pCPUTensor; + } + + template <> + static float ReadTensor(const DirectX::PackedVector::HALF* pCPUTensor) { + return DirectX::PackedVector::XMConvertHalfToFloat(*pCPUTensor); + } + + template + static BYTE DetensorizeValue(const T* pCPUTensor) { + return static_cast(std::max(0.0f, std::min(255.0f, ReadTensor(pCPUTensor) + 0.5f))); + } + + template + static void InterleaveRowFloatToByte( + const T* xChannel, + const T* yChannel, + const T* zChannel, + uint32_t tensorWidth, + BYTE* pData, + uint32_t bytesPerPixel) { + BYTE* pPixel = pData; + uint32_t tensorWidthRemaining = tensorWidth; + + while (tensorWidthRemaining > 0) { + pPixel[0] = DetensorizeValue(xChannel); + pPixel[1] = DetensorizeValue(yChannel); + pPixel[2] = DetensorizeValue(zChannel); + pPixel[3] = 255; + + pPixel += 4; + xChannel++; + yChannel++; + zChannel++; + tensorWidthRemaining--; + } + } + +#if defined(_M_AMD64) || defined(_M_IX86) + template <> + static void InterleaveRowFloatToByte( + const float* xChannel, + const float* yChannel, + const float* zChannel, + uint32_t tensorWidth, + BYTE* pData, + uint32_t bytesPerPixel) { + BYTE* pPixel = pData; + uint32_t tensorWidthRemaining = tensorWidth; + + __m128 maxv = _mm_set1_ps(255.0f); + __m128 zero = _mm_setzero_ps(); + + // Prep an alpha register with 8 bit - 255 alpha values + __m128i alpha = _mm_setzero_si128(); + alpha = _mm_cmpeq_epi32(alpha, alpha); + alpha = _mm_srli_epi16(alpha, 8); + + while (tensorWidthRemaining >= 8) { + // Load, saturate, and convert to ints, 8 - 32 bit floats from X channel + __m128i vXIntsLo = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(xChannel), maxv)); + __m128i vXIntsHi = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(xChannel + 4), maxv)); + + // Pack 32 bit ints into 16 bit ints + __m128i vXWords = _mm_packs_epi32(vXIntsLo, vXIntsHi); + + // Load, saturate, and convert to ints, 8 - 32 bit floats from Y channel + __m128i vYIntsLo = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(yChannel), maxv)); + __m128i vYIntsHi = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(yChannel + 4), maxv)); + + // Pack 32 bit ints into 16 bit ints + __m128i vYWords = _mm_packs_epi32(vYIntsLo, vYIntsHi); + + // Load, saturate, and convert to ints, 8 - 32 bit floats from Z channel + __m128i vZIntsLo = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(zChannel), maxv)); + __m128i vZIntsHi = _mm_cvtps_epi32(_mm_min_ps(_mm_loadu_ps(zChannel + 4), maxv)); + + // Pack 32 bit ints into 16 bit ints + __m128i vZWords = _mm_packs_epi32(vZIntsLo, vZIntsHi); + + // Pack 16 bit ints into 8 bit uints + __m128i vXZBytes = _mm_packus_epi16(vXWords, vZWords); + __m128i vYABytes = _mm_packus_epi16(vYWords, alpha); + + // Interleave bytes into XY order + __m128i vXYBytesInterleaved = _mm_unpacklo_epi8(vXZBytes, vYABytes); + // Interleave bytes into ZA order + __m128i vZABytesInterleaved = _mm_unpackhi_epi8(vXZBytes, vYABytes); + + // Interleave 16 bits to get XYZA XYZA ordering + __m128i vPixelBytesLo = _mm_unpacklo_epi16(vXYBytesInterleaved, vZABytesInterleaved); + __m128i vPixelBytesHi = _mm_unpackhi_epi16(vXYBytesInterleaved, vZABytesInterleaved); + + // Write out bytes now in proper order + _mm_storeu_si128((__m128i*)pPixel, vPixelBytesLo); + _mm_storeu_si128((__m128i*)(pPixel + 16), vPixelBytesHi); + + xChannel += 8; + yChannel += 8; + zChannel += 8; + pPixel += 8 * bytesPerPixel; + tensorWidthRemaining -= 8; + } + + // Anything remaining deal with it one at a time + while (tensorWidthRemaining > 0) { + pPixel[0] = DetensorizeValue(xChannel); + pPixel[1] = DetensorizeValue(yChannel); + pPixel[2] = DetensorizeValue(zChannel); + pPixel[3] = 255; + + pPixel += bytesPerPixel; + xChannel++; + yChannel++; + zChannel++; + tensorWidthRemaining--; + } + } +#endif +}; +} // namespace Windows::AI::MachineLearning::Internal diff --git a/winml/lib/Api.Image/CpuTensorizer.h b/winml/lib/Api.Image/CpuTensorizer.h new file mode 100644 index 0000000000000..fa60cef9a145b --- /dev/null +++ b/winml/lib/Api.Image/CpuTensorizer.h @@ -0,0 +1,266 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "inc/ImageConversionTypes.h" + +using namespace Windows::AI::MachineLearning::Internal; +using namespace winrt::Windows::Graphics::Imaging; + +namespace Windows::AI::MachineLearning::Internal { +class CpuTensorizer { + public: + template + static HRESULT TensorizeData( + _In_ ImageTensorChannelType formatFrom, + _In_ ImageTensorChannelType formatTo, + _In_ BYTE* pBuffer, + _In_ UINT32 bufferWidth, + _In_ const BitmapBounds& inputBounds, + _Inout_ T* pCPUTensor) { +#pragma warning(push) +#pragma warning(disable : 26014) // warning about possible out of bounds accesing pData, but input is checked for BGRA8 format, so uiCapacity should be in multiples of 4 + // input is BGRA8: so blue at i, green is at i + 1, red is at i + 2 + + uint32_t bytesPerPixel = formatFrom == kImageTensorChannelTypeGRAY8 ? 1 : 4; + + // bufferWidth may have padding because of optimization, but bytesPerRow includes only the real tensor data. We need to jump + // over bufferWidth's extra padding + uint32_t bytesPerRow = inputBounds.Width * bytesPerPixel; + uint32_t start = (inputBounds.Y * bufferWidth) + (inputBounds.X * bytesPerPixel); + uint32_t end = start + bufferWidth * inputBounds.Height; + uint32_t pixelInd = 0; + + uint32_t xElements = inputBounds.Width - inputBounds.X; + uint32_t yElements = inputBounds.Height - inputBounds.Y; + + if (formatFrom == kImageTensorChannelTypeBGR8 && formatTo == kImageTensorChannelTypeBGR8 || formatFrom == kImageTensorChannelTypeRGB8 && formatTo == kImageTensorChannelTypeRGB8) { + // Convert BGR8 -> BGR8 or RGB8 -> RGB8 + for (uint32_t y = 0; y < yElements; y++) { + DeinterleaveRowByteToFloat( + pBuffer + y * bufferWidth + start, + pCPUTensor + y * inputBounds.Width, + pCPUTensor + (inputBounds.Height * inputBounds.Width) + y * inputBounds.Width, + pCPUTensor + (inputBounds.Height * inputBounds.Width) * 2 + y * inputBounds.Width, + xElements, + bytesPerPixel); + } + } else if (formatFrom == kImageTensorChannelTypeBGR8 && formatTo == kImageTensorChannelTypeRGB8 || formatFrom == kImageTensorChannelTypeRGB8 && formatTo == kImageTensorChannelTypeBGR8) { + // Convert RGB8 -> BGR8 or BGR8 -> RGB8 + for (uint32_t y = 0; y < yElements; y++) { + DeinterleaveRowByteToFloat( + pBuffer + y * bufferWidth + start, + pCPUTensor + (inputBounds.Height * inputBounds.Width) * 2 + y * inputBounds.Width, + pCPUTensor + (inputBounds.Height * inputBounds.Width) + y * inputBounds.Width, + pCPUTensor + y * inputBounds.Width, + xElements, + bytesPerPixel); + } + } else if (formatTo == kImageTensorChannelTypeGRAY8 && (formatFrom == kImageTensorChannelTypeBGR8 || formatFrom == kImageTensorChannelTypeRGB8)) { + // Convert BGR8 -> GRAY8 or RGB8 -> GRAY8 + uint32_t blueIncrement = formatFrom == kImageTensorChannelTypeBGR8 ? 0 : 2; + uint32_t redIncrement = formatFrom == kImageTensorChannelTypeBGR8 ? 2 : 0; + + for (UINT32 i = start; i < end; i += bufferWidth) { + for (UINT32 j = i; j < i + bytesPerRow; j += bytesPerPixel) { + float red = float(pBuffer[j + redIncrement]); + float green = float(pBuffer[j + 1]); + float blue = float(pBuffer[j + blueIncrement]); + float gray = 0.2126f * red + 0.7152f * green + 0.0722f * blue; + pCPUTensor[pixelInd] = ConvertByteToFloat(static_cast(gray)); + pixelInd++; + } + } + } else if (formatFrom == kImageTensorChannelTypeGRAY8 && (formatTo == kImageTensorChannelTypeBGR8 || formatTo == kImageTensorChannelTypeRGB8)) { + // Convert GRAY8 -> BGR8 or GRAY8 -> RGB8 + for (UINT32 i = start; i < end; i += bufferWidth) { + for (UINT32 j = i; j < i + bytesPerRow; j += bytesPerPixel) { + pCPUTensor[pixelInd] = ConvertByteToFloat(pBuffer[j]); + pCPUTensor[(inputBounds.Height * inputBounds.Width) + pixelInd] = ConvertByteToFloat(pBuffer[j]); + pCPUTensor[(inputBounds.Height * inputBounds.Width * 2) + pixelInd] = ConvertByteToFloat(pBuffer[j]); + pixelInd++; + } + } + } else if (formatFrom == kImageTensorChannelTypeGRAY8 && formatTo == kImageTensorChannelTypeGRAY8) { + // Convert GRAY8 -> GRAY8 + for (UINT32 i = start; i < end; i += bufferWidth) { + for (UINT32 j = i; j < i + bytesPerRow; j += bytesPerPixel) { + pCPUTensor[pixelInd] = ConvertByteToFloat(pBuffer[j]); + pixelInd++; + } + } + } +#pragma warning(pop) + else { + return E_INVALIDARG; + } + return S_OK; + } + + private: + template + static T ConvertByteToFloat(const BYTE& input); + + template <> + static float ConvertByteToFloat(const BYTE& input) { + return static_cast(input); + } + template <> + static DirectX::PackedVector::HALF ConvertByteToFloat(const BYTE& input) { + return DirectX::PackedVector::XMConvertFloatToHalf(input); + } + + template + static void DeinterleaveRowByteToFloat( + _In_ BYTE* pBuffer, + _Inout_ T* xChannel, + _Inout_ T* yChannel, + _Inout_ T* zChannel, + uint32_t pixelElements, + uint32_t bytesPerPixel) { + UINT32 j; + + for (j = 0; j < (pixelElements & 0xFFFFFFFC); j += 4) { + xChannel[j] = ConvertByteToFloat(pBuffer[0]); + yChannel[j] = ConvertByteToFloat(pBuffer[1]); + zChannel[j] = ConvertByteToFloat(pBuffer[2]); + xChannel[j + 1] = ConvertByteToFloat(pBuffer[4]); + yChannel[j + 1] = ConvertByteToFloat(pBuffer[5]); + zChannel[j + 1] = ConvertByteToFloat(pBuffer[6]); + xChannel[j + 2] = ConvertByteToFloat(pBuffer[8]); + yChannel[j + 2] = ConvertByteToFloat(pBuffer[9]); + zChannel[j + 2] = ConvertByteToFloat(pBuffer[10]); + xChannel[j + 3] = ConvertByteToFloat(pBuffer[12]); + yChannel[j + 3] = ConvertByteToFloat(pBuffer[13]); + zChannel[j + 3] = ConvertByteToFloat(pBuffer[14]); + pBuffer += bytesPerPixel * 4; + } + + for (; j < pixelElements; j++) { + xChannel[j] = ConvertByteToFloat(pBuffer[0]); + yChannel[j] = ConvertByteToFloat(pBuffer[1]); + zChannel[j] = ConvertByteToFloat(pBuffer[2]); + pBuffer += bytesPerPixel; + } + } + +#if defined(_M_AMD64) || defined(_M_IX86) + template <> + static void DeinterleaveRowByteToFloat( + _In_ BYTE* pBuffer, + _Inout_ float* xChannel, + _Inout_ float* yChannel, + _Inout_ float* zChannel, + uint32_t pixelElements, + uint32_t bytesPerPixel) { + assert(bytesPerPixel == 4); + + __m128i ZeroVector = _mm_setzero_si128(); + while (pixelElements >= 8) { + // Load 8 Pixels into 2 Registers + // vBytes0 = X0 Y0 Z0 A0 X1 Y1... + // vBytes0 = X4 Y4 Z4 A4 X2 Y2... + __m128i vBytes0 = _mm_loadu_si128((__m128i*)pBuffer); + __m128i vBytes1 = _mm_loadu_si128((__m128i*)(pBuffer + 16)); + + // Shuffle to get + // vi0 = X0 X4 Y0 Y4...A1 A5 (A is Alpha which is ignored) + // vi1 = X2 X6 Y2 Y6...A2 A6 + __m128i vi0 = _mm_unpacklo_epi8(vBytes0, vBytes1); + __m128i vi1 = _mm_unpackhi_epi8(vBytes0, vBytes1); + + // Shuffle again to get + // vi0 = X0 X2 X4 X6...A4 A6 (All even byes) + // vi1 = X1 X3 X5 X7...A3 A7 (All odd bytes) + __m128i vi2 = _mm_unpacklo_epi8(vi0, vi1); + __m128i vi3 = _mm_unpackhi_epi8(vi0, vi1); + + // Shuffle last time to get desired order + // vi0 = X0 X1 X2 X3...Y6 Y7 (All even byes) + // vi1 = Z0 Z1 Z2 Z3...A6 A7 (All odd bytes) + __m128i vi4 = _mm_unpacklo_epi8(vi2, vi3); + __m128i vi5 = _mm_unpackhi_epi8(vi2, vi3); + + // unpack with zeros to get 16 bit ints + // vXWords = X0 X1...X6 X7 + __m128i vXWords = _mm_unpacklo_epi8(vi4, ZeroVector); + + // unpack again with zeros to get 32 bit ints + __m128i vXIntsLo = _mm_unpacklo_epi16(vXWords, ZeroVector); + __m128i vXIntsHi = _mm_unpackhi_epi16(vXWords, ZeroVector); + + // store 256 bits of X channel Floats + _mm_storeu_ps(xChannel, _mm_cvtepi32_ps(vXIntsLo)); + _mm_storeu_ps(xChannel + 4, _mm_cvtepi32_ps(vXIntsHi)); + xChannel += 8; + + // unpack again for Y + __m128i vYWords = _mm_unpackhi_epi8(vi4, ZeroVector); + + __m128i vYIntsLo = _mm_unpacklo_epi16(vYWords, ZeroVector); + __m128i vYIntsHi = _mm_unpackhi_epi16(vYWords, ZeroVector); + + _mm_storeu_ps(yChannel, _mm_cvtepi32_ps(vYIntsLo)); + _mm_storeu_ps(yChannel + 4, _mm_cvtepi32_ps(vYIntsHi)); + yChannel += 8; + + // unpack again for Z + __m128i vZWords = _mm_unpacklo_epi8(vi5, ZeroVector); + + __m128i vZIntsLo = _mm_unpacklo_epi16(vZWords, ZeroVector); + __m128i vZIntsHi = _mm_unpackhi_epi16(vZWords, ZeroVector); + + _mm_storeu_ps(zChannel, _mm_cvtepi32_ps(vZIntsLo)); + _mm_storeu_ps(zChannel + 4, _mm_cvtepi32_ps(vZIntsHi)); + zChannel += 8; + + pBuffer += 32; + pixelElements -= 8; + } + if (pixelElements >= 4) { + // load 4 pixels = 16 values + __m128i vBytes = _mm_loadu_si128((__m128i*)pBuffer); + + // unpack to 16 bits + __m128i vWords0 = _mm_unpacklo_epi8(vBytes, ZeroVector); + __m128i vWords1 = _mm_unpackhi_epi8(vBytes, ZeroVector); + + // unpack to 32 bits + __m128i vInts0 = _mm_unpacklo_epi16(vWords0, ZeroVector); + __m128i vInts1 = _mm_unpackhi_epi16(vWords0, ZeroVector); + __m128i vInts2 = _mm_unpacklo_epi16(vWords1, ZeroVector); + __m128i vInts3 = _mm_unpackhi_epi16(vWords1, ZeroVector); + + // Convert to floats + __m128 vFloats0 = _mm_cvtepi32_ps(vInts0); + __m128 vFloats1 = _mm_cvtepi32_ps(vInts1); + __m128 vFloats2 = _mm_cvtepi32_ps(vInts2); + __m128 vFloats3 = _mm_cvtepi32_ps(vInts3); + + // We want have row but need cols so transpose 4x4 matrix + _MM_TRANSPOSE4_PS(vFloats0, vFloats1, vFloats2, vFloats3); + + // Drop alpha channel transposed to vFloats3 write out rest + _mm_storeu_ps(xChannel, vFloats0); + _mm_storeu_ps(yChannel, vFloats1); + _mm_storeu_ps(zChannel, vFloats2); + + xChannel += 4; + yChannel += 4; + zChannel += 4; + pBuffer += 4 * 4; + pixelElements -= 4; + } + + // Any remainder just do one at a time + for (uint32_t j = 0; j < pixelElements; j++) { + xChannel[j] = static_cast(pBuffer[0]); + yChannel[j] = static_cast(pBuffer[1]); + zChannel[j] = static_cast(pBuffer[2]); + pBuffer += bytesPerPixel; + } + } +#endif +}; +} // namespace Windows::AI::MachineLearning::Internal diff --git a/winml/lib/Api.Image/D3DDeviceCache.cpp b/winml/lib/Api.Image/D3DDeviceCache.cpp new file mode 100644 index 0000000000000..94119425f1ae1 --- /dev/null +++ b/winml/lib/Api.Image/D3DDeviceCache.cpp @@ -0,0 +1,675 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "inc/D3DDeviceCache.h" +#include +#include +#include "inc/DeviceHelpers.h" +#include "CommonDeviceHelpers.h" + +namespace float32 { +#include "shaders\SurfaceToTensor-SurfaceToTensorBGR8.h" +#include "shaders\SurfaceToTensor-SurfaceToTensorRGB8.h" +#include "shaders\SurfaceToTensor-SurfaceToTensorGRAY8.h" +#include "shaders\SurfaceToTensor-SurfaceGRAY8ToTensorBGR8.h" +#include "shaders\SurfaceToTensor-SurfaceGRAY8ToTensorGRAY8.h" +#include "shaders\TensorToSurface-TensorBGR8ToSurface.h" +#include "shaders\TensorToSurface-TensorRGB8ToSurface.h" +#include "shaders\TensorToSurface-TensorGRAY8ToSurface.h" +#include "shaders\TensorToSurface-TensorBGR8ToSurfaceGRAY8.h" +#include "shaders\TensorToSurface-TensorRGB8ToSurfaceGRAY8.h" +#include "shaders\TensorToSurface-TensorGRAY8ToSurfaceGRAY8.h" +} // namespace float32 + +namespace float16 { +#include "shaders\SurfaceToTensor16-SurfaceToTensorBGR8.h" +#include "shaders\SurfaceToTensor16-SurfaceToTensorRGB8.h" +#include "shaders\SurfaceToTensor16-SurfaceToTensorGRAY8.h" +#include "shaders\SurfaceToTensor16-SurfaceGRAY8ToTensorBGR8.h" +#include "shaders\SurfaceToTensor16-SurfaceGRAY8ToTensorGRAY8.h" +#include "shaders\TensorToSurface16-TensorBGR8ToSurface.h" +#include "shaders\TensorToSurface16-TensorRGB8ToSurface.h" +#include "shaders\TensorToSurface16-TensorGRAY8ToSurface.h" +#include "shaders\TensorToSurface16-TensorBGR8ToSurfaceGRAY8.h" +#include "shaders\TensorToSurface16-TensorRGB8ToSurfaceGRAY8.h" +#include "shaders\TensorToSurface16-TensorGRAY8ToSurfaceGRAY8.h" +} // namespace float16 + +using namespace Microsoft::WRL; + +namespace winrt::Windows::AI::MachineLearning::implementation { +D3DDeviceCache::D3DDeviceCache(Windows::AI::MachineLearning::LearningModelDeviceKind const& deviceKind) { + WINML_THROW_IF_FAILED(CoCreateGuid(&fence_guid_)); + + if (deviceKind == LearningModelDeviceKind::Cpu || deviceKind == LearningModelDeviceKind::Default) { + // CPU device don't make any GPU devices + device_luid_.HighPart = device_luid_.LowPart = 0; + return; + } + + DXGI_GPU_PREFERENCE preference; + WINML_THROW_IF_FAILED(DeviceHelpers::GetGPUPreference(deviceKind, &preference)); + + CommonDeviceHelpers::AdapterEnumerationSupport support; + WINML_THROW_IF_FAILED(CommonDeviceHelpers::GetAdapterEnumerationSupport(&support)); + + const char errStr[] = "No hardware adapters available"; + if (support.has_dxgi) { + com_ptr spAdapter; + WINML_THROW_IF_FAILED_MSG(DeviceHelpers::GetDXGIHardwareAdapterWithPreference(preference, spAdapter.put()), errStr); + WINML_THROW_IF_FAILED(D3D12CreateDevice(spAdapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device_.put()))); + } +#ifdef ENABLE_DXCORE + if (support.has_dxgi == false) { + com_ptr spAdapter; + WINML_THROW_IF_FAILED_MSG(DeviceHelpers::GetDXCoreHardwareAdapterWithPreference(preference, spAdapter.put()), errStr); + WINML_THROW_IF_FAILED(D3D12CreateDevice(spAdapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device_.put()))); + } +#endif + InitializeCommandQueue(device_.get()); + + device_luid_ = device_->GetAdapterLuid(); +} + +D3DDeviceCache::D3DDeviceCache(Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice const& device) { + WINML_THROW_IF_FAILED(CoCreateGuid(&fence_guid_)); + + // Use the 11 device to initialize 12 + winrt_device_ = device; + + // they told us which device to run on, crack the interop wrapper to get the dxgi device + com_ptr<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess> dxgi; + dxgi = device.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>(); + + com_ptr dxgiDevice; + WINML_THROW_IF_FAILED(dxgi->GetInterface(IID_PPV_ARGS(dxgiDevice.put()))); + + device_11_ = dxgiDevice.as(); + + com_ptr spContext; + device_11_->GetImmediateContext(spContext.put()); + spContext.as(device_context11_); + + com_ptr pDXGIDevice; + WINML_THROW_IF_FAILED(dxgi->GetInterface(IID_PPV_ARGS(pDXGIDevice.put()))); + + com_ptr adapter; + WINML_THROW_IF_FAILED(pDXGIDevice->GetAdapter(adapter.put())); + + WINML_THROW_IF_FAILED(D3D12CreateDevice(adapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device_.put()))); + + InitializeCommandQueue(device_.get()); + + device_luid_ = device_->GetAdapterLuid(); +} + +D3DDeviceCache::D3DDeviceCache(ID3D12CommandQueue* queue) { + WINML_THROW_IF_FAILED(CoCreateGuid(&fence_guid_)); + + // Use the command queue to initialize all of the needed D3D11 interop + command_queue_.copy_from(queue); + command_queue_->QueryInterface(IID_PPV_ARGS(sharing_contract_.put())); + + WINML_THROW_IF_FAILED(queue->GetDevice(IID_PPV_ARGS(device_.put()))); + + device_luid_ = device_->GetAdapterLuid(); +} + +D3DDeviceCache::~D3DDeviceCache() { + // If this is a CPU instance device_ will not have been created. + // Ensure the device is still valid before doing work. + if (device_ != nullptr && (device_->GetDeviceRemovedReason() == S_OK)) { + // dx11 stack is optional, and we lazy load it when available + if (device_context11_ != nullptr) { + // Sync 11 to 12 then Sync 12 to the CPU. This ensures that all inflight work is done before we delete the d3d objects. + GPUSyncD3D11ToD3D12(); + } + SyncD3D12ToCPU(); + } +} + +bool D3DDeviceCache::IsFloat16Supported() { + if (device_ != nullptr) { + return CommonDeviceHelpers::IsFloat16Supported(device_.get()); + } + + return true; +} + +ID3D11Device* D3DDeviceCache::GetD3D11Device() { + EnsureD3D11FromD3D12(); + return device_11_.get(); +} + +const GUID& D3DDeviceCache::GetFenceGuid() const { + return fence_guid_; +} + +ID3D11DeviceContext4* D3DDeviceCache::GetD3D11DeviceContext() { + EnsureD3D11FromD3D12(); + return device_context11_.get(); +} + +Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice D3DDeviceCache::GetWinrtDevice() { + EnsureD3D11FromD3D12(); + return winrt_device_; +} + +void D3DDeviceCache::InitializeCommandQueue(ID3D12Device1* device) { + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + WINML_THROW_IF_FAILED(device->CreateCommandQueue(&queueDesc, winrt::guid_of(), command_queue_.put_void())); + + // If possible get the sharing context. If not leave nullptr; + command_queue_->QueryInterface(IID_PPV_ARGS(sharing_contract_.put())); +} + +// this initializes the following variables, making them from the dx12 device +// device_11_ +// device_context11_ +// winrt_device_ +void D3DDeviceCache::EnsureD3D11FromD3D12() { + // do we even have a device? (CPU will use the cache but not have a device) . + if (device_ == nullptr) + return; + + // are we already initialized + if (winrt_device_ != nullptr) + return; + + CWinMLAutoLock lock(&lock_); + + // check with the lock held, are we already initialized + if (winrt_device_ != nullptr) + return; + + com_ptr<::IInspectable> spInspectable; + com_ptr spDXGIDevice; + + // call our SEH version (for delay loading) + WINML_THROW_IF_FAILED(DeviceHelpers::CreateD3D11On12Device(device_.get(), device_11_.put())); + com_ptr spContext; + device_11_->GetImmediateContext(spContext.put()); + spContext.as(device_context11_); + + WINML_THROW_IF_FAILED(device_11_->QueryInterface(IID_PPV_ARGS(spDXGIDevice.put()))); + // Convert to Winrt wrapper. This doesn't actually make a new device. + WINML_THROW_IF_FAILED(CreateDirect3D11DeviceFromDXGIDevice(spDXGIDevice.get(), spInspectable.put())); + WINML_THROW_IF_FAILED(spInspectable->QueryInterface(winrt::guid_of(), reinterpret_cast(winrt::put_abi(winrt_device_)))); +} + +void D3DDeviceCache::EnsureD3D12Fence() { + // are we already initialized? + if (d3d12_fence_ != nullptr) + return; + + CWinMLAutoLock lock(&lock_); + + // with the lock held, are we already initialized? + if (d3d12_fence_ != nullptr) + return; + + WINML_THROW_IF_FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(d3d12_fence_.put()))); +} + +// this initializes the following variables, so that we can share dx12 with dx11 +// d3d11_fence_ +// d3d12_fence_ +void D3DDeviceCache::EnsureSharedFences() { + // are we already initialized? + if (d3d11_fence_ != nullptr) + return; + + CWinMLAutoLock lock(&lock_); + + // with the lock held, are we already initialized? + if (d3d11_fence_ != nullptr) + return; + + EnsureD3D12Fence(); + + // ensure the d11 stack is alive, the 11 stack doesn't exist on WCOSHeadless yet, so be resilient + EnsureD3D11FromD3D12(); + + com_ptr spD3D12DeviceChild; + d3d12_fence_.as(spD3D12DeviceChild); + HANDLE hSharedFence; + WINML_THROW_IF_FAILED(device_->CreateSharedHandle(spD3D12DeviceChild.get(), NULL, GENERIC_ALL, nullptr, &hSharedFence)); + + com_ptr spD3D11Device5; + device_11_.as(spD3D11Device5); + wil::unique_handle safe(hSharedFence); + WINML_THROW_IF_FAILED(spD3D11Device5->OpenSharedFence(safe.get(), IID_PPV_ARGS(d3d11_fence_.put()))); +} + +void D3DDeviceCache::GPUSyncD3D11ToD3D12() { + EnsureSharedFences(); + + UINT64 currentFence = fence_value_++; + WINML_THROW_IF_FAILED(device_context11_->Signal(d3d11_fence_.get(), currentFence)); + + WINML_THROW_IF_FAILED(command_queue_->Wait(d3d12_fence_.get(), currentFence)); + + if (sharing_contract_ != nullptr) { + sharing_contract_->SharedFenceSignal(d3d12_fence_.get(), currentFence); + } +} + +void D3DDeviceCache::GPUSyncD3D12ToD3D11() { + EnsureSharedFences(); + + UINT64 currentFence = fence_value_++; + WINML_THROW_IF_FAILED(command_queue_->Signal(d3d12_fence_.get(), currentFence)); + + WINML_THROW_IF_FAILED(device_context11_->Wait(d3d11_fence_.get(), currentFence)); +} + +void D3DDeviceCache::SyncD3D12ToCPU() { + UINT64 currentFence = QueueFenceToD3D12(); + WaitForFenceValue(currentFence); +} + +UINT64 D3DDeviceCache::QueueFenceToD3D12() { + EnsureD3D12Fence(); + + UINT64 currentFence = fence_value_++; + WINML_THROW_IF_FAILED(command_queue_->Signal(d3d12_fence_.get(), currentFence)); + + return currentFence; +} + +void D3DDeviceCache::WaitForFenceValue(UINT64 fenceValue) { + EnsureD3D12Fence(); + + wil::unique_handle event(CreateEvent(nullptr, FALSE, FALSE, nullptr)); + THROW_LAST_ERROR_IF(!event); + + WINML_THROW_IF_FAILED(d3d12_fence_->SetEventOnCompletion(fenceValue, event.get())); + + DWORD retVal = WaitForSingleObject(event.get(), INFINITE); + if (retVal != WAIT_OBJECT_0) { + WINML_THROW_IF_FAILED(E_UNEXPECTED); + } +} + +ID3D12RootSignature* D3DDeviceCache::GetTensorizeRootSignature() { + if (tensorize_root_signature_ == nullptr) { + com_ptr newRootSignature; + D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData = {}; + + // This is the highest version the sample supports. If CheckFeatureSupport succeeds, the HighestVersion returned will not be greater than this. + featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1; + + if (FAILED(device_->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData)))) { + featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_0; + } + + // Compute root signature. + { + CD3DX12_DESCRIPTOR_RANGE1 ranges[2]; + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE); + + CD3DX12_ROOT_PARAMETER1 rootParameters[3]; + rootParameters[0].InitAsConstants(4, 0); + rootParameters[1].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); + rootParameters[2].InitAsDescriptorTable(1, &ranges[1], D3D12_SHADER_VISIBILITY_ALL); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC computeRootSignatureDesc; + computeRootSignatureDesc.Init_1_1(_countof(rootParameters), rootParameters, 0, nullptr); + + com_ptr signature; + com_ptr error; + WINML_THROW_IF_FAILED(D3DX12SerializeVersionedRootSignature(&computeRootSignatureDesc, featureData.HighestVersion, signature.put(), error.put())); + WINML_THROW_IF_FAILED(device_->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(newRootSignature.put()))); + newRootSignature->SetName(L"Tensorize Rootsignature"); + } + + if (InterlockedCompareExchangePointer( + tensorize_root_signature_.put_void(), + newRootSignature.get(), + nullptr) == nullptr) { + // This thread won the race and just cached the PSO + newRootSignature.detach(); + } + } + + return tensorize_root_signature_.get(); +} + +ID3D12RootSignature* D3DDeviceCache::GetDetensorizeRootSignature() { + if (detensorize_root_signature_ == nullptr) { + com_ptr newRootSignature; + D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData = {}; + + // This is the highest version the sample supports. If CheckFeatureSupport succeeds, the HighestVersion returned will not be greater than this. + featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1; + + if (FAILED(device_->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData)))) { + featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_0; + } + + // Compute root signature. + { + CD3DX12_DESCRIPTOR_RANGE1 ranges[2]; + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE); + + CD3DX12_ROOT_PARAMETER1 rootParameters[3]; + rootParameters[0].InitAsConstants(4, 0); + rootParameters[1].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); + rootParameters[2].InitAsDescriptorTable(1, &ranges[1], D3D12_SHADER_VISIBILITY_ALL); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_1(_countof(rootParameters), rootParameters, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT); + + com_ptr signature; + com_ptr error; + WINML_THROW_IF_FAILED(D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, featureData.HighestVersion, signature.put(), error.put())); + WINML_THROW_IF_FAILED(device_->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(newRootSignature.put()))); + newRootSignature->SetName(L"Detensorize Rootsignature"); + } + + if (InterlockedCompareExchangePointer( + detensorize_root_signature_.put_void(), + newRootSignature.get(), + nullptr) == nullptr) { + // This thread won the race and just cached the PSO + newRootSignature.detach(); + } + } + + return detensorize_root_signature_.get(); +} + +ID3D12PipelineState* D3DDeviceCache::GetCachedPipelineState(PipelineStateCacheType type, PipelineStateCacheFormat formatFrom, PipelineStateCacheFormat formatTo, PipelineStateCacheOperation operation) { + if (cached_pipeline_state[static_cast(type)][static_cast(formatFrom)][static_cast(formatTo)][static_cast(operation)] == nullptr) { + com_ptr newPSO; + if (operation == PipelineStateCacheOperation::kTensorize) { + newPSO.attach(CreateTensorizePipelineState(type, formatFrom, formatTo)); + } else { + newPSO.attach(CreateDetensorizePipelineState(type, formatFrom, formatTo)); + } + + if (InterlockedCompareExchangePointer( + cached_pipeline_state[static_cast(type)][static_cast(formatFrom)][static_cast(formatTo)][static_cast(operation)].put_void(), + newPSO.get(), + nullptr) == nullptr) { + // This thread won the race and just cached the PSO + newPSO.detach(); + } + } + + return cached_pipeline_state[static_cast(type)][static_cast(formatFrom)][static_cast(formatTo)][static_cast(operation)].get(); +} + +ID3D12PipelineState* D3DDeviceCache::CreateTensorizePipelineState(PipelineStateCacheType type, PipelineStateCacheFormat formatFrom, PipelineStateCacheFormat formatTo) { + static_assert(static_cast(PipelineStateCacheFormat::kCount) == 3, "PipelineStateCacheFormat changed, update D3DDeviceCache::CreateTensorizePipelineState()"); + + const BYTE* shaderBytecode = nullptr; + uint64_t shaderBytecodeSize = 0; + + switch (formatFrom) { + case PipelineStateCacheFormat::kBGR8: + case PipelineStateCacheFormat::kRGB8: + if (type == PipelineStateCacheType::kFloat32) { + if (formatTo == PipelineStateCacheFormat::kBGR8) { + shaderBytecode = float32::g_csSurfaceToTensorBGR8; + shaderBytecodeSize = sizeof(float32::g_csSurfaceToTensorBGR8); + } else if (formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float32::g_csSurfaceToTensorRGB8; + shaderBytecodeSize = sizeof(float32::g_csSurfaceToTensorRGB8); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float32::g_csSurfaceToTensorGRAY8; + shaderBytecodeSize = sizeof(float32::g_csSurfaceToTensorGRAY8); + } else { + assert(false); + } + } else if (type == PipelineStateCacheType::kFloat16) { + if (formatTo == PipelineStateCacheFormat::kBGR8) { + shaderBytecode = float16::g_csSurfaceToTensorBGR8; + shaderBytecodeSize = sizeof(float16::g_csSurfaceToTensorBGR8); + } else if (formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float16::g_csSurfaceToTensorRGB8; + shaderBytecodeSize = sizeof(float16::g_csSurfaceToTensorRGB8); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float16::g_csSurfaceToTensorGRAY8; + shaderBytecodeSize = sizeof(float16::g_csSurfaceToTensorGRAY8); + } else { + assert(false); + } + } + break; + case PipelineStateCacheFormat::kGRAY8: + if (type == PipelineStateCacheType::kFloat32) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + // GRAY -> RGB is the same shader as GRAY -> BGR + shaderBytecode = float32::g_csSurfaceGRAY8ToTensorBGR8; + shaderBytecodeSize = sizeof(float32::g_csSurfaceGRAY8ToTensorBGR8); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float32::g_csSurfaceGRAY8ToTensorGRAY8; + shaderBytecodeSize = sizeof(float32::g_csSurfaceGRAY8ToTensorGRAY8); + } else { + assert(false); + } + } else if (type == PipelineStateCacheType::kFloat16) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + // GRAY -> RGB is the same shader as GRAY -> BGR + shaderBytecode = float16::g_csSurfaceGRAY8ToTensorBGR8; + shaderBytecodeSize = sizeof(float16::g_csSurfaceGRAY8ToTensorBGR8); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float16::g_csSurfaceGRAY8ToTensorGRAY8; + shaderBytecodeSize = sizeof(float16::g_csSurfaceGRAY8ToTensorGRAY8); + } else { + assert(false); + } + } + break; + default: + assert(false); + break; + } + + D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {}; + computePsoDesc.pRootSignature = GetTensorizeRootSignature(); + computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(shaderBytecode, static_cast(shaderBytecodeSize)); + + com_ptr pipelineState; + WINML_THROW_IF_FAILED(device_->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(pipelineState.put()))); + + return pipelineState.detach(); +} + +ID3D12PipelineState* D3DDeviceCache::CreateDetensorizePipelineState(PipelineStateCacheType type, PipelineStateCacheFormat formatFrom, PipelineStateCacheFormat formatTo) { + static_assert(static_cast(PipelineStateCacheFormat::kCount) == 3, "PipelineStateCacheFormat changed, update D3DDeviceCache::CreateDetensorizePipelineState()"); + + const BYTE* shaderBytecode = nullptr; + uint64_t shaderBytecodeSize = 0; + + switch (formatFrom) { + case PipelineStateCacheFormat::kBGR8: + if (type == PipelineStateCacheType::kFloat32) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float32::g_csTensorBGR8ToSurface; + shaderBytecodeSize = sizeof(float32::g_csTensorBGR8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float32::g_csTensorBGR8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float32::g_csTensorBGR8ToSurfaceGRAY8); + } else { + assert(false); + } + } else if (type == PipelineStateCacheType::kFloat16) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float16::g_csTensorBGR8ToSurface; + shaderBytecodeSize = sizeof(float16::g_csTensorBGR8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float16::g_csTensorBGR8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float16::g_csTensorBGR8ToSurfaceGRAY8); + } else { + assert(false); + } + } + break; + case PipelineStateCacheFormat::kRGB8: + if (type == PipelineStateCacheType::kFloat32) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float32::g_csTensorRGB8ToSurface; + shaderBytecodeSize = sizeof(float32::g_csTensorRGB8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float32::g_csTensorRGB8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float32::g_csTensorRGB8ToSurfaceGRAY8); + } else { + assert(false); + } + } else if (type == PipelineStateCacheType::kFloat16) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + shaderBytecode = float16::g_csTensorRGB8ToSurface; + shaderBytecodeSize = sizeof(float16::g_csTensorRGB8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float16::g_csTensorRGB8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float16::g_csTensorRGB8ToSurfaceGRAY8); + } else { + assert(false); + } + } + break; + case PipelineStateCacheFormat::kGRAY8: + if (type == PipelineStateCacheType::kFloat32) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + // GRAY -> RGB is the same shader as GRAY -> BGR + shaderBytecode = float32::g_csTensorGRAY8ToSurface; + shaderBytecodeSize = sizeof(float32::g_csTensorGRAY8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float32::g_csTensorGRAY8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float32::g_csTensorGRAY8ToSurfaceGRAY8); + } else { + assert(false); + } + } else if (type == PipelineStateCacheType::kFloat16) { + if (formatTo == PipelineStateCacheFormat::kBGR8 || formatTo == PipelineStateCacheFormat::kRGB8) { + // GRAY -> RGB is the same shader as GRAY -> BGR + shaderBytecode = float16::g_csTensorGRAY8ToSurface; + shaderBytecodeSize = sizeof(float16::g_csTensorGRAY8ToSurface); + } else if (formatTo == PipelineStateCacheFormat::kGRAY8) { + shaderBytecode = float16::g_csTensorGRAY8ToSurfaceGRAY8; + shaderBytecodeSize = sizeof(float16::g_csTensorGRAY8ToSurfaceGRAY8); + } else { + assert(false); + } + } + break; + default: + assert(false); + break; + } + + D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {}; + computePsoDesc.pRootSignature = GetDetensorizeRootSignature(); + computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(shaderBytecode, static_cast(shaderBytecodeSize)); + + com_ptr pipelineState; + WINML_THROW_IF_FAILED(device_->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(pipelineState.put()))); + + return pipelineState.detach(); +} + +ID3D12Resource* D3DDeviceCache::GetDetensorizeVertexBuffer(_Out_ UINT* vertexBufferSize) { + if (detensorize_vertex_buffer_ == nullptr) { + com_ptr newResource; + // Create the vertex buffer. + // 2 triangles for full screen + DirectX::XMFLOAT3 triangleVertices[] = + { + {-1.0f, 1.0f, 0.0f}, + {1.0f, 1.0f, 0.0f}, + {-1.0f, -1.0f, 0.0f}, + {1.0f, -1.0f, 0.0f}, + }; + + assert(sc_vertexBufferSize == sizeof(triangleVertices)); + + CD3DX12_HEAP_PROPERTIES heapProp(D3D12_HEAP_TYPE_UPLOAD); + D3D12_RESOURCE_DESC resourceDiscription = CD3DX12_RESOURCE_DESC::Buffer(sc_vertexBufferSize); + WINML_THROW_IF_FAILED(device_->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resourceDiscription, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(newResource.put()))); + + // Copy the triangle data to the vertex buffer. + UINT8* pVertexDataBegin; + CD3DX12_RANGE readRange(0, 0); // We do not intend to read from this resource on the CPU. + WINML_THROW_IF_FAILED(newResource->Map(0, &readRange, reinterpret_cast(&pVertexDataBegin))); + memcpy(pVertexDataBegin, triangleVertices, sizeof(triangleVertices)); + newResource->Unmap(0, nullptr); + + if (InterlockedCompareExchangePointer( + detensorize_vertex_buffer_.put_void(), + newResource.get(), + nullptr) == nullptr) { + // This thread won the race and just cached the PSO + newResource.detach(); + } + } + + *vertexBufferSize = sc_vertexBufferSize; + return detensorize_vertex_buffer_.get(); +} + +HANDLE D3DDeviceCache::GetConverterFenceHandle() { + // Lazily create the fence since we may never need to use it + if (!converter_fence_) { + WINML_THROW_IF_FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_SHARED | D3D12_FENCE_FLAG_SHARED_CROSS_ADAPTER, IID_PPV_ARGS(converter_fence_.put()))); + + HANDLE hSharedFence; + WINML_THROW_IF_FAILED(device_->CreateSharedHandle(converter_fence_.get(), nullptr, GENERIC_ALL, nullptr, &hSharedFence)); + + converter_fence_handle_ = wil::unique_handle(hSharedFence); + } + + return converter_fence_handle_.get(); +} + +void D3DDeviceCache::SyncConverterToD3D11Device(_In_ ID3D11Fence* pD3D11Fence) { + assert(command_queue_ != nullptr); + assert(pD3D11Fence != nullptr); + + ComPtr spD3D11Device; + pD3D11Fence->GetDevice(&spD3D11Device); + + ComPtr spD3D11DeviceContext; + spD3D11Device->GetImmediateContext(&spD3D11DeviceContext); + + ComPtr spD3D11DeviceContext4; + WINML_THROW_IF_FAILED(spD3D11DeviceContext->QueryInterface(IID_PPV_ARGS(&spD3D11DeviceContext4))); + + UINT64 newfenceValue = converter_fence_value_++; + WINML_THROW_IF_FAILED(command_queue_->Signal(converter_fence_.get(), newfenceValue)); + WINML_THROW_IF_FAILED(spD3D11DeviceContext4->Wait(pD3D11Fence, newfenceValue)); +} + +void D3DDeviceCache::SyncD3D11DeviceToConverter(_In_ ID3D11Fence* pD3D11Fence) { + assert(command_queue_ != nullptr); + assert(pD3D11Fence != nullptr); + + ComPtr spD3D11Device; + pD3D11Fence->GetDevice(&spD3D11Device); + + ComPtr spD3D11DeviceContext; + spD3D11Device->GetImmediateContext(&spD3D11DeviceContext); + + ComPtr spD3D11DeviceContext4; + WINML_THROW_IF_FAILED(spD3D11DeviceContext->QueryInterface(IID_PPV_ARGS(&spD3D11DeviceContext4))); + + UINT64 newfenceValue = converter_fence_value_++; + WINML_THROW_IF_FAILED(spD3D11DeviceContext4->Signal(pD3D11Fence, newfenceValue)); + WINML_THROW_IF_FAILED(command_queue_->Wait(converter_fence_.get(), newfenceValue)); +} + +bool D3DDeviceCache::SharedHandleInitialized() { + return d3d11_fence_ != nullptr; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api.Image/DeviceHelpers.cpp b/winml/lib/Api.Image/DeviceHelpers.cpp new file mode 100644 index 0000000000000..8c6e81d4ac2aa --- /dev/null +++ b/winml/lib/Api.Image/DeviceHelpers.cpp @@ -0,0 +1,135 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#if USE_DML +#include +#endif USE_DML +#include +#include +#include "inc/DeviceHelpers.h" +#include "CommonDeviceHelpers.h" +#include "LearningModelDevice.h" + +namespace DeviceHelpers { +HRESULT GetDXGIHardwareAdapterWithPreference(DXGI_GPU_PREFERENCE preference, IDXGIAdapter1** ppAdapter) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(CreateDXGIFactory1(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapter; + UINT i = 0; + while (spFactory->EnumAdapterByGpuPreference(i, preference, IID_PPV_ARGS(spAdapter.put())) != DXGI_ERROR_NOT_FOUND) { + DXGI_ADAPTER_DESC1 pDesc; + spAdapter->GetDesc1(&pDesc); + + // see here for documentation on filtering WARP adapter: + // https://docs.microsoft.com/en-us/windows/desktop/direct3ddxgi/d3d10-graphics-programming-guide-dxgi#new-info-about-enumerating-adapters-for-windows-8 + auto isBasicRenderDriverVendorId = pDesc.VendorId == 0x1414; + auto isBasicRenderDriverDeviceId = pDesc.DeviceId == 0x8c; + auto isSoftwareAdapter = pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE; + if (!isSoftwareAdapter && !(isBasicRenderDriverVendorId && isBasicRenderDriverDeviceId)) { + spAdapter.copy_to(ppAdapter); + return S_OK; + } + + spAdapter = nullptr; + ++i; + } + return HRESULT_FROM_WIN32(ERROR_NOT_FOUND); +} + +#ifdef ENABLE_DXCORE +// Return the first adapter that matches the preference: +// DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE => DXCoreAdapterProperty::IsDetachable +// DXGI_GPU_PREFERENCE_MINIMUM_POWER => DXCoreAdapterProperty::IsIntegrated +HRESULT GetDXCoreHardwareAdapterWithPreference(DXGI_GPU_PREFERENCE preference, IDXCoreAdapter** ppAdapter) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(DXCoreCreateAdapterFactory(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapterList; + const GUID gpuFilter[] = {DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS}; + RETURN_IF_FAILED(spFactory->CreateAdapterList(1, gpuFilter, IID_PPV_ARGS(spAdapterList.put()))); + + winrt::com_ptr firstHardwareAdapter; + bool firstHardwareAdapterFound = false; + // select first hardware adapter with given preference + for (uint32_t i = 0; i < spAdapterList->GetAdapterCount(); i++) { + winrt::com_ptr spCurrAdapter; + RETURN_IF_FAILED(spAdapterList->GetAdapter(i, IID_PPV_ARGS(spCurrAdapter.put()))); + + bool isHardware; + RETURN_IF_FAILED(spCurrAdapter->GetProperty(DXCoreAdapterProperty::IsHardware, &isHardware)); + + if (isHardware) { + if (preference == DXGI_GPU_PREFERENCE_UNSPECIFIED) { + spCurrAdapter.copy_to(ppAdapter); + return S_OK; + } + + if (!firstHardwareAdapterFound) { + spCurrAdapter.copy_to(firstHardwareAdapter.put()); + firstHardwareAdapterFound = true; + } + + if (preference == DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE) { + bool isDetached; + RETURN_IF_FAILED(spCurrAdapter->GetProperty(DXCoreAdapterProperty::IsDetachable, &isDetached)); + + if (isDetached) { + spCurrAdapter.copy_to(ppAdapter); + return S_OK; + } + } else if (preference == DXGI_GPU_PREFERENCE_MINIMUM_POWER) { + bool isIntegrated; + RETURN_IF_FAILED(spCurrAdapter->GetProperty(DXCoreAdapterProperty::IsIntegrated, &isIntegrated)); + + if (isIntegrated) { + spCurrAdapter.copy_to(ppAdapter); + return S_OK; + } + } + } + } + // If a preference match wasn't found, return the first hardware adapter in the list + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_FOUND), !firstHardwareAdapterFound); + firstHardwareAdapter.copy_to(ppAdapter); + return S_OK; +} +#endif + +HRESULT CreateD3D11On12Device(ID3D12Device* device12, ID3D11Device** device11) { + return CommonDeviceHelpers::RunDelayLoadedApi( + D3D11On12CreateDevice, + device12, // pointer to d3d12 device + D3D11_CREATE_DEVICE_BGRA_SUPPORT, // required in order to interop with Direct2D + nullptr, // feature level (defaults to d3d12) + 0, // size of feature levels in bytes + nullptr, // an array of unique command queues for D3D11On12 to use + 0, // size of the command queue array + 0, // D3D12 device node to use + device11, // d3d11 device out param + nullptr, // pointer to d3d11 device context (unused) + nullptr); // pointer to the returned feature level (unused) +} + +HRESULT GetGPUPreference(winrt::Windows::AI::MachineLearning::LearningModelDeviceKind deviceKind, DXGI_GPU_PREFERENCE* preference) noexcept { + switch (deviceKind) { + case winrt::Windows::AI::MachineLearning::LearningModelDeviceKind::DirectX: { + *preference = DXGI_GPU_PREFERENCE_UNSPECIFIED; + return S_OK; + } + case winrt::Windows::AI::MachineLearning::LearningModelDeviceKind::DirectXHighPerformance: { + *preference = DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE; + return S_OK; + } + case winrt::Windows::AI::MachineLearning::LearningModelDeviceKind::DirectXMinPower: { + *preference = DXGI_GPU_PREFERENCE_MINIMUM_POWER; + return S_OK; + } + default: + // this should never be reached + return E_INVALIDARG; + } +} +} // namespace DeviceHelpers diff --git a/winml/lib/Api.Image/ImageConversionHelpers.cpp b/winml/lib/Api.Image/ImageConversionHelpers.cpp new file mode 100644 index 0000000000000..b57662480d141 --- /dev/null +++ b/winml/lib/Api.Image/ImageConversionHelpers.cpp @@ -0,0 +1,355 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "inc/ImageConversionHelpers.h" + +using namespace Microsoft::WRL; +using namespace Windows::AI::MachineLearning::Internal; +using namespace Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Graphics::DirectX; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; + +namespace Windows::AI::MachineLearning::Internal::ImageConversionHelpers { + LUID GetLUIDFromDirect3DSurface(const IDirect3DSurface& surface) { + ComPtr spDx11Device; + ComPtr spDxgiInterfaceAccess; + ComPtr spDx11Texture2D; + ComPtr spDXGIDevice; + ComPtr spDXGIAdapter; + DXGI_ADAPTER_DESC adapterDesc = {0}; + + spDxgiInterfaceAccess = surface.as().get(); + WINML_THROW_IF_FAILED(spDxgiInterfaceAccess->GetInterface(IID_PPV_ARGS(&spDx11Texture2D))); + spDx11Texture2D->GetDevice(&spDx11Device); + WINML_THROW_IF_FAILED(spDx11Device->QueryInterface(IID_PPV_ARGS(&spDXGIDevice))); + WINML_THROW_IF_FAILED(spDXGIDevice->GetAdapter(&spDXGIAdapter)); + WINML_THROW_IF_FAILED(spDXGIAdapter->GetDesc(&adapterDesc)); + + return adapterDesc.AdapterLuid; + } + + HRESULT GetVideoFrameInfo( + _In_ const winrt::Windows::Media::IVideoFrame& inputVideoFrame, + _Out_ DWORD& format, + _Out_ int& width, + _Out_ int& height, + _Out_ LUID& luid) { + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface spInputSurface = inputVideoFrame.Direct3DSurface(); + if (spInputSurface != nullptr) { + Direct3DSurfaceDescription description; + description = spInputSurface.Description(); + format = (DWORD)description.Format; + width = description.Width; + height = description.Height; + luid = GetLUIDFromDirect3DSurface(spInputSurface); + } else { + winrt::Windows::Graphics::Imaging::SoftwareBitmap spInputSoftwareBitmap = inputVideoFrame.SoftwareBitmap(); + if (spInputSoftwareBitmap != nullptr) { + format = (DWORD)spInputSoftwareBitmap.BitmapPixelFormat(); + height = spInputSoftwareBitmap.PixelHeight(); + width = spInputSoftwareBitmap.PixelWidth(); + luid.HighPart = luid.LowPart = 0; + } else { + return E_INVALIDARG; + } + } + return S_OK; + } + + void ConvertVideoFrameToVideoFrame( + _In_ const IVideoFrame& inputVideoFrame, + _In_ const BitmapBounds& inputBounds, + _In_ UINT32 outputWidth, + _In_ UINT32 outputHeight, + _Inout_ winrt::Windows::Media::VideoFrame& pOutputVideoFrame) { + BitmapBounds outputBounds = { + 0, + 0, + outputWidth, + outputHeight}; + + winrt::Windows::Graphics::Imaging::SoftwareBitmap spInputSoftwareBitmap = inputVideoFrame.SoftwareBitmap(); + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface spInputDirect3DSurface = inputVideoFrame.Direct3DSurface(); + + // only one of softwarebitmap or direct3Dsurface should be non-null + if ((spInputSoftwareBitmap == nullptr && spInputDirect3DSurface == nullptr) || (spInputSoftwareBitmap != nullptr && spInputDirect3DSurface != nullptr)) { + WINML_THROW_HR(E_INVALIDARG); + } + + auto pInputVideoFrame2 = inputVideoFrame.as(); + pInputVideoFrame2.CopyToAsync(pOutputVideoFrame, inputBounds, outputBounds).get(); + } + + bool SoftwareBitmapFormatSupported(const SoftwareBitmap& softwareBitmap) { + assert(softwareBitmap != nullptr); + + switch (softwareBitmap.BitmapPixelFormat()) { + case BitmapPixelFormat::Bgra8: + case BitmapPixelFormat::Rgba8: + case BitmapPixelFormat::Gray8: + return true; + } + + return false; + } + + bool DirectXPixelFormatSupported(DirectXPixelFormat format) { + switch (format) { + case DirectXPixelFormat::B8G8R8X8UIntNormalized: + case DirectXPixelFormat::B8G8R8A8UIntNormalized: + case DirectXPixelFormat::R8G8B8A8UIntNormalized: + case DirectXPixelFormat::R8UIntNormalized: + return true; + } + + return false; + } + + bool FormatSupportedForUAV(_In_ ID3D12Device1* device, _In_ DXGI_FORMAT format) { + assert(device != nullptr); + + D3D12_FEATURE_DATA_FORMAT_SUPPORT formatSupport = {format}; + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &formatSupport, sizeof(formatSupport)); + + return SUCCEEDED(hr) && (formatSupport.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW); + } + + // This helper method uses the input parameters do determine if a conversion is necessary + // A conversion is not necessary if + // 1. input bounds cover the entire input bitmap/surface (else we are cropping) + // 2. desired output size is equal to input size (else we are resizing) + // 3. (mapping softwarebitmap to softwarebitmap) OR (mapping from d3dsurface to d3dsurface AND the two surfaces are on the same device) + // 4. the input is already in the desired format (BGRA8/B8G8R8X8UIntNormalized) + bool NeedsVideoFrameConversion( + _In_ const IVideoFrame& inputVideoFrame, + _In_ LUID outputLuid, + _In_ const BitmapBounds& inputBounds, + _In_ UINT32 outputWidth, + _In_ UINT32 outputHeight) { + bool bNeedConversion = false; + HRESULT hr = S_OK; + + DWORD format = 0; + int width = 0, height = 0; + LUID luid; + + if (FAILED((hr = GetVideoFrameInfo(inputVideoFrame, format, width, height, luid)))) { + bNeedConversion = true; + } else if (((int)inputBounds.Width != outputWidth) || + (inputBounds.X != 0) || + ((int)inputBounds.Height != outputHeight) || + (inputBounds.Y != 0) || + (inputVideoFrame == nullptr)) // Check crop + { + bNeedConversion = true; + } else if (luid.HighPart != outputLuid.HighPart || + luid.LowPart != outputLuid.LowPart) { + bNeedConversion = true; + } else if (static_cast(width) != outputWidth || + static_cast(height) != outputHeight) { + bNeedConversion = true; + } else if (outputLuid.HighPart != 0 || + outputLuid.LowPart != 0) { + if (format != (DWORD)DirectXPixelFormat::B8G8R8X8UIntNormalized) { + bNeedConversion = true; + } + } else { + if (format != (DWORD)BitmapPixelFormat::Bgra8) { + bNeedConversion = true; + } + } + + TraceLoggingWrite( + winml_trace_logging_provider, + "InputVideoFrame", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingBool(bNeedConversion, "Convert"), + TraceLoggingHexInt32(hr, "HRESULT"), + TraceLoggingInt32(width, "iWidth"), + TraceLoggingInt32(outputWidth, "oWidth"), + TraceLoggingInt32(height, "iHeight"), + TraceLoggingInt32(outputWidth, "oHeight"), + TraceLoggingHexInt64(*((ULONGLONG*)&luid), "iLuid"), + TraceLoggingHexInt64(*((ULONGLONG*)&outputLuid), "oLuid"), + TraceLoggingHexInt32(format, "iFormat"), + TraceLoggingInt32(inputBounds.X, "rX"), + TraceLoggingInt32(inputBounds.Y, "rY"), + TraceLoggingInt32(inputBounds.Width, "rW"), + TraceLoggingInt32(inputBounds.Height, "rH")); + + return bNeedConversion; + } + + ImageTensorChannelType GetChannelTypeFromSoftwareBitmap(const SoftwareBitmap& softwareBitmap) { + assert(softwareBitmap != nullptr); + + switch (softwareBitmap.BitmapPixelFormat()) { + case BitmapPixelFormat::Bgra8: + return kImageTensorChannelTypeBGR8; + case BitmapPixelFormat::Rgba8: + return kImageTensorChannelTypeRGB8; + case BitmapPixelFormat::Gray8: + return kImageTensorChannelTypeGRAY8; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + BitmapPixelFormat GetBitmapPixelFormatFromChannelType(ImageTensorChannelType channelType) { + switch (channelType) { + case kImageTensorChannelTypeBGR8: + return BitmapPixelFormat::Bgra8; + case kImageTensorChannelTypeRGB8: + return BitmapPixelFormat::Rgba8; + case kImageTensorChannelTypeGRAY8: + return BitmapPixelFormat::Gray8; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + ImageTensorChannelType GetChannelTypeFromDirect3DSurface(const IDirect3DSurface& direct3DSurface) { + assert(direct3DSurface != nullptr); + + switch (direct3DSurface.Description().Format) { + case DirectXPixelFormat::B8G8R8A8UIntNormalized: + case DirectXPixelFormat::B8G8R8X8UIntNormalized: + return kImageTensorChannelTypeBGR8; + + case DirectXPixelFormat::R8G8B8A8UIntNormalized: + return kImageTensorChannelTypeRGB8; + + case DirectXPixelFormat::R8UIntNormalized: + return kImageTensorChannelTypeGRAY8; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + DirectXPixelFormat GetDirectXPixelFormatFromDXGIFormat(DXGI_FORMAT dxgiFormat) { + switch (dxgiFormat) { + case DXGI_FORMAT_B8G8R8A8_UNORM: + return DirectXPixelFormat::B8G8R8A8UIntNormalized; + case DXGI_FORMAT_B8G8R8X8_UNORM: + return DirectXPixelFormat::B8G8R8X8UIntNormalized; + case DXGI_FORMAT_R8G8B8A8_UNORM: + return DirectXPixelFormat::R8G8B8A8UIntNormalized; + case DXGI_FORMAT_R8_UNORM: + return DirectXPixelFormat::R8UIntNormalized; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + DXGI_FORMAT GetDXGIFormatFromDirectXPixelFormat(DirectXPixelFormat directXPixelFormat) { + switch (directXPixelFormat) { + case DirectXPixelFormat::B8G8R8A8UIntNormalized: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case DirectXPixelFormat::B8G8R8X8UIntNormalized: + return DXGI_FORMAT_B8G8R8X8_UNORM; + case DirectXPixelFormat::R8G8B8A8UIntNormalized: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case DirectXPixelFormat::R8UIntNormalized: + return DXGI_FORMAT_R8_UNORM; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + DirectXPixelFormat GetDirectXPixelFormatFromChannelType(ImageTensorChannelType channelType) { + switch (channelType) { + case kImageTensorChannelTypeBGR8: + return DirectXPixelFormat::B8G8R8A8UIntNormalized; + case kImageTensorChannelTypeRGB8: + return DirectXPixelFormat::R8G8B8A8UIntNormalized; + case kImageTensorChannelTypeGRAY8: + return DirectXPixelFormat::R8UIntNormalized; + } + + WINML_THROW_HR(E_INVALIDARG); + } + + IDirect3DDevice GetDeviceFromDirect3DSurface(const IDirect3DSurface& d3dSurface) { + assert(d3dSurface != nullptr); + + ComPtr spDx11Texture2D; + ComPtr spDxgiInterfaceAccess = d3dSurface.as().get(); + WINML_THROW_IF_FAILED(spDxgiInterfaceAccess->GetInterface(IID_PPV_ARGS(&spDx11Texture2D))); + + ComPtr spDx11Device; + spDx11Texture2D->GetDevice(&spDx11Device); + + ComPtr spDXGIDevice; + WINML_THROW_IF_FAILED(spDx11Device->QueryInterface(IID_PPV_ARGS(&spDXGIDevice))); + + ComPtr<::IInspectable> spInspectable; + WINML_THROW_IF_FAILED(CreateDirect3D11DeviceFromDXGIDevice(spDXGIDevice.Get(), &spInspectable)); + + IDirect3DDevice d3dDevice; + WINML_THROW_IF_FAILED(spInspectable->QueryInterface(winrt::guid_of(), reinterpret_cast(winrt::put_abi(d3dDevice)))); + + return d3dDevice; + } + + bool TexturesHaveSameDevice(_In_ ID3D11Texture2D* pTexture1, _In_ ID3D11Texture2D* pTexture2) { + if (pTexture1 && pTexture2) { + ComPtr spDevice1; + pTexture1->GetDevice(&spDevice1); + + ComPtr spDevice2; + pTexture2->GetDevice(&spDevice2); + + return spDevice1.Get() == spDevice2.Get(); + } + + return false; + } + + bool TextureIsOnDevice(_In_ ID3D11Texture2D* pTexture, _In_ ID3D11Device* pDevice) { + if (pTexture && pDevice) { + ComPtr spDevice1; + pTexture->GetDevice(&spDevice1); + + return spDevice1.Get() == pDevice; + } + + return false; + } + + ComPtr GetTextureFromDirect3DSurface(const IDirect3DSurface& d3dSurface) { + auto spDxgiInterfaceAccess = d3dSurface.as(); + ComPtr d3d11Texture; + WINML_THROW_IF_FAILED(spDxgiInterfaceAccess->GetInterface(IID_PPV_ARGS(&d3d11Texture))); + + return d3d11Texture; + } + + bool VideoFramesHaveSameDimensions(const IVideoFrame& videoFrame1, const IVideoFrame& videoFrame2) { + if (videoFrame1 && videoFrame2) { + Direct3DSurfaceDescription desc1 = videoFrame1.Direct3DSurface().Description(); + Direct3DSurfaceDescription desc2 = videoFrame2.Direct3DSurface().Description(); + + return desc1.Width == desc2.Width && desc1.Height == desc2.Height; + } + + return false; + } + + bool VideoFramesHaveSameDevice(const IVideoFrame& videoFrame1, const IVideoFrame& videoFrame2) { + if (videoFrame1 && videoFrame2) { + ComPtr spTexture1 = GetTextureFromDirect3DSurface(videoFrame1.Direct3DSurface()); + ComPtr spTexture2 = GetTextureFromDirect3DSurface(videoFrame2.Direct3DSurface()); + + ComPtr spDevice1, spDevice2; + spTexture1->GetDevice(&spDevice1); + spTexture2->GetDevice(&spDevice2); + + return spDevice1.Get() == spDevice2.Get(); + } + + return false; + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/ImageConverter.cpp b/winml/lib/Api.Image/ImageConverter.cpp new file mode 100644 index 0000000000000..956d9d69d8f8e --- /dev/null +++ b/winml/lib/Api.Image/ImageConverter.cpp @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "inc/ImageConverter.h" +#include "inc/ImageConversionHelpers.h" +#include "inc/D3DDeviceCache.h" + +using namespace Microsoft::WRL; +using namespace Windows::Graphics::DirectX::Direct3D11; +using namespace Windows::AI::MachineLearning::Internal; +using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; + +void ImageConverter::SyncD3D11ToD3D12(_In_ D3DDeviceCache& device_cache, _In_ ID3D11Texture2D* pD3D11Texture) { + assert(pD3D11Texture != nullptr); + + ComPtr spTextureDevice; + pD3D11Texture->GetDevice(&spTextureDevice); + + if (spTextureDevice.Get() == device_cache.GetD3D11Device()) { + // If the texture is on D3DDeviceCache's device, we sync using D3DDeviceCache's fences + device_cache.GPUSyncD3D11ToD3D12(); + } else { + // Otherwise, sync using our own cached fences + ComPtr spD3D11DeviceFence = FetchOrCreateFenceOnDevice(device_cache, spTextureDevice.Get()); + device_cache.SyncD3D11DeviceToConverter(spD3D11DeviceFence.Get()); + } +} + +void ImageConverter::SyncD3D12ToD3D11(_In_ D3DDeviceCache& device_cache, _In_ ID3D11Texture2D* spTexture) { + assert(spTexture != nullptr); + + ComPtr spTextureDevice; + spTexture->GetDevice(&spTextureDevice); + + if (spTextureDevice.Get() == device_cache.GetD3D11Device()) { + // If the texture is on D3DDeviceCache's device, we sync using D3DDeviceCache's fences + device_cache.GPUSyncD3D12ToD3D11(); + } else { + // Otherwise, sync using our own cached fences + ComPtr spD3D11DeviceFence = FetchOrCreateFenceOnDevice(device_cache, spTextureDevice.Get()); + device_cache.SyncConverterToD3D11Device(spD3D11DeviceFence.Get()); + } +} + +ComPtr ImageConverter::FetchOrCreateFenceOnDevice(_In_ D3DDeviceCache& device_cache, _In_ ID3D11Device* pD3D11Device) { + assert(pD3D11Device != nullptr); + + ComPtr fence; + UINT comPtrSize = static_cast(sizeof(fence.GetAddressOf())); + + if (FAILED(pD3D11Device->GetPrivateData(device_cache.GetFenceGuid(), &comPtrSize, fence.GetAddressOf())) || fence.Get() == nullptr) { + // There's no fence on the device, so create a new one + ComPtr spD3D11Device5; + WINML_THROW_IF_FAILED(pD3D11Device->QueryInterface(IID_PPV_ARGS(&spD3D11Device5))); + WINML_THROW_IF_FAILED(spD3D11Device5->OpenSharedFence(device_cache.GetConverterFenceHandle(), IID_PPV_ARGS(&fence))); + + // Store the fence on the device + WINML_THROW_IF_FAILED(spD3D11Device5->SetPrivateDataInterface(device_cache.GetFenceGuid(), fence.Get())); + } + + return fence; +} + +void ImageConverter::ResetCommandList(_In_ D3DDeviceCache& device_cache) { + if (!command_list_) { + assert(command_allocator_ == nullptr); + + WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommandAllocator( + device_cache.GetCommandQueue()->GetDesc().Type, + IID_PPV_ARGS(command_allocator_.ReleaseAndGetAddressOf()))); + + WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommandList( + 0, + device_cache.GetCommandQueue()->GetDesc().Type, + command_allocator_.Get(), + pipeline_state_.Get(), + IID_PPV_ARGS(command_list_.ReleaseAndGetAddressOf()))); + } else { + command_list_->Reset(command_allocator_.Get(), pipeline_state_.Get()); + } +} + +void ImageConverter::ResetAllocator() { + WINML_THROW_IF_FAILED(command_allocator_->Reset()); +} + +ComPtr ImageConverter::CreateTextureFromUnsupportedColorFormat( + const IVideoFrame& videoFrame, + const BitmapBounds& inputBounds, + const BitmapBounds& outputBounds, + DirectXPixelFormat newFormat) { + assert(videoFrame != nullptr); + + // Make sure we create the new video frame on the same device. We don't want the VideoFrame pipeline to implicitly share the texture between + // 2 devices since we will need to do it ourselves anyway. + IDirect3DDevice device = ImageConversionHelpers::GetDeviceFromDirect3DSurface(videoFrame.Direct3DSurface()); + + VideoFrame spNewVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(newFormat, outputBounds.Width, outputBounds.Height, device); + videoFrame.as().CopyToAsync(spNewVideoFrame, inputBounds, outputBounds).get(); + + auto spDxgiInterfaceAccess = spNewVideoFrame.Direct3DSurface().as(); + ComPtr d3d11Texture; + WINML_THROW_IF_FAILED(spDxgiInterfaceAccess->GetInterface(IID_PPV_ARGS(&d3d11Texture))); + + return d3d11Texture; +} + +void ImageConverter::CopyTextureIntoTexture(_In_ ID3D11Texture2D* pTextureFrom, _In_ const BitmapBounds& inputBounds, _Inout_ ID3D11Texture2D* pTextureTo) { + assert(pTextureFrom != nullptr); + assert(pTextureTo != nullptr); + + D3D11_TEXTURE2D_DESC textureFromDesc, textureToDesc; + pTextureFrom->GetDesc(&textureFromDesc); + pTextureTo->GetDesc(&textureToDesc); + + assert(inputBounds.Width <= textureFromDesc.Width && inputBounds.Width <= textureToDesc.Width); + assert(inputBounds.Height <= textureFromDesc.Height && inputBounds.Height <= textureToDesc.Height); + + ComPtr spDeviceFrom, spDeviceTo; + pTextureFrom->GetDevice(&spDeviceFrom); + pTextureTo->GetDevice(&spDeviceTo); + + assert(spDeviceFrom.Get() == spDeviceTo.Get()); + + ComPtr spDeviceContext; + spDeviceFrom->GetImmediateContext(&spDeviceContext); + + if (textureFromDesc.Width != textureToDesc.Width || textureFromDesc.Height != textureToDesc.Height) { + // We can't copy the whole resource, so we have to use the slower CopySubresource() function + D3D11_BOX cropBox = CD3D11_BOX(inputBounds.X, inputBounds.Y, 0, inputBounds.X + inputBounds.Width, inputBounds.Y + inputBounds.Height, 1); + spDeviceContext->CopySubresourceRegion(pTextureTo, 0, 0, 0, 0, pTextureFrom, 0, &cropBox); + } else { + // Use the faster CopyResource() function since both textures have the same dimensions + spDeviceContext->CopyResource(pTextureTo, pTextureFrom); + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp new file mode 100644 index 0000000000000..bf717f648c7f0 --- /dev/null +++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp @@ -0,0 +1,611 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include // winmeta needed for TraceLoggingKeyword +#include +#include +#include + +#include "inc/D3DDeviceCache.h" +#include "inc/TensorToVideoFrameConverter.h" +#include "CpuDetensorizer.h" + +#include "LearningModelDevice.h" + +using namespace Microsoft::WRL; +using namespace Windows::AI::MachineLearning::Internal; +using namespace Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace winrt::Windows::Graphics::DirectX; + +class GPUTensorToDX12TextureTelemetryEvent { + public: + GPUTensorToDX12TextureTelemetryEvent(const ImageTensorDescription& tensorDesc) { + TraceLoggingWrite( + winml_trace_logging_provider, + "GPUTensorToDX12Texture", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_START), + TraceLoggingHexInt32(tensorDesc.channelType, "Type"), + TraceLoggingInt64(tensorDesc.sizes[2], "Height"), + TraceLoggingInt64(tensorDesc.sizes[3], "Width")); + } + ~GPUTensorToDX12TextureTelemetryEvent() { + TraceLoggingWrite( + winml_trace_logging_provider, + "GPUTensorToDX12Texture", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_STOP), + TraceLoggingHexInt32(S_OK, "HRESULT")); + } +}; + +class ConvertCPUTensorToVideoFrameWithSoftwareBitmapTelemetryEvent { + public: + ConvertCPUTensorToVideoFrameWithSoftwareBitmapTelemetryEvent(const ImageTensorDescription& tensorDesc) { + TraceLoggingWrite( + winml_trace_logging_provider, + "ConvertCPUTensorToVideoFrameWithSoftwareBitmap", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_START), + TraceLoggingHexInt32(tensorDesc.channelType, "Type"), + TraceLoggingInt64(tensorDesc.sizes[2], "Height"), + TraceLoggingInt64(tensorDesc.sizes[3], "Width")); + } + ~ConvertCPUTensorToVideoFrameWithSoftwareBitmapTelemetryEvent() { + TraceLoggingWrite( + winml_trace_logging_provider, + "ConvertCPUTensorToVideoFrameWithSoftwareBitmap", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_STOP), + TraceLoggingHexInt32(S_OK, "HRESULT")); + } +}; + +void TensorToVideoFrameConverter::DX12TensorToVideoFrame( + _In_ UINT32 batchIdx, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ ID3D12Resource* pInputTensor, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ VideoFrame& destVideoFrame) { + CWinMLAutoLock lock(&lock_); + + auto spDevice = session.Device().as(); + D3DDeviceCache* pDeviceCache = spDevice->GetD3DDeviceCache(); + + IDirect3DSurface spDestDirect3DSurface = destVideoFrame.Direct3DSurface(); + SoftwareBitmap softwareBitmap = destVideoFrame.SoftwareBitmap(); + + if (softwareBitmap) { + ConvertGPUTensorToSoftwareBitmap(batchIdx, pInputTensor, *pDeviceCache, tensorDesc, softwareBitmap); + } else if (spDestDirect3DSurface) { + bool isUAVSupportedFormat = ImageConversionHelpers::FormatSupportedForUAV( + pDeviceCache->GetD3D12Device(), + ImageConversionHelpers::GetDXGIFormatFromDirectXPixelFormat(spDestDirect3DSurface.Description().Format)); + + // UAV support for formats is device dependent + if (!isUAVSupportedFormat) { + ConvertDX12TensorToUnsupportedVideoFrameFormat(batchIdx, pInputTensor, *pDeviceCache, tensorDesc, destVideoFrame); + } else { + ComPtr spVideoFrameTexture = ImageConversionHelpers::GetTextureFromDirect3DSurface(destVideoFrame.Direct3DSurface()); + + D3D11_TEXTURE2D_DESC videoFrameTextureDesc; + spVideoFrameTexture->GetDesc(&videoFrameTextureDesc); + BitmapBounds bounds = {0, 0, videoFrameTextureDesc.Width, videoFrameTextureDesc.Height}; + + if (ImageConversionHelpers::TextureIsOnDevice(spVideoFrameTexture.Get(), pDeviceCache->GetD3D11Device())) { + // The texture is on our device, so we can just create own texture, share it and cache it + if (!output_resource_) { + output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device()); + D3D11_cached_texture_ = ShareD3D12Texture(output_resource_.Get(), pDeviceCache->GetD3D11Device()); + } else { + D3D12_RESOURCE_DESC cachedTextureDesc = output_resource_->GetDesc(); + + if (cachedTextureDesc.Width != videoFrameTextureDesc.Width || cachedTextureDesc.Height != videoFrameTextureDesc.Height || cachedTextureDesc.Format != videoFrameTextureDesc.Format) { + // The dimensions or format don't match, so we need to re-create our texture + output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device()); + D3D11_cached_texture_ = ShareD3D12Texture(output_resource_.Get(), pDeviceCache->GetD3D11Device()); + } + } + + // Detensorize + ConvertGPUTensorToDX12Texture(batchIdx, pInputTensor, *pDeviceCache, tensorDesc, output_resource_.Get()); + + // Make sure that detensorization is done + SyncD3D12ToD3D11(*pDeviceCache, D3D11_cached_texture_.Get()); + + // Finally, copy the detensorized texture to the user's device + CopyTextureIntoTexture(D3D11_cached_texture_.Get(), bounds, spVideoFrameTexture.Get()); + } else { + // We are not on the same device, so we can't rely on our own cached texture + ComPtr spTextureDevice; + spVideoFrameTexture->GetDevice(&spTextureDevice); + + ComPtr spSharedD3D11Texture; + HANDLE sharedHandle = nullptr; + UINT comPtrSize = static_cast(sizeof(spSharedD3D11Texture.GetAddressOf())); + UINT handleSize = static_cast(sizeof(sharedHandle)); + + if ((FAILED(spVideoFrameTexture->GetPrivateData(_d3d11TextureGUID, &comPtrSize, spSharedD3D11Texture.GetAddressOf())) || !spSharedD3D11Texture.Get()) || (FAILED(spVideoFrameTexture->GetPrivateData(_handleGUID, &handleSize, &sharedHandle)) || sharedHandle != shared_handle_)) { + // Create a new shared texture that we cache on the video frame texture + output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device()); + spSharedD3D11Texture = ShareD3D12Texture(output_resource_.Get(), spTextureDevice.Get()); + + // Cache the shared texture on the video frame texture in order to tie their lifetime together + WINML_THROW_IF_FAILED(spVideoFrameTexture->SetPrivateDataInterface(_d3d11TextureGUID, spSharedD3D11Texture.Get())); + WINML_THROW_IF_FAILED(spVideoFrameTexture->SetPrivateData(_handleGUID, sizeof(shared_handle_), &shared_handle_)); + } + + // Detensorize + ConvertGPUTensorToDX12Texture(batchIdx, pInputTensor, *pDeviceCache, tensorDesc, output_resource_.Get()); + + // Make sure that detensorization is done + SyncD3D12ToD3D11(*pDeviceCache, spSharedD3D11Texture.Get()); + + // Finally, copy the detensorized texture to the user's device + CopyTextureIntoTexture(spSharedD3D11Texture.Get(), bounds, spVideoFrameTexture.Get()); + } + } + } else { + // Invalid video frame + WINML_THROW_HR(E_INVALIDARG); + } +} + +ComPtr TensorToVideoFrameConverter::CreateShareableD3D12Texture( + const D3D11_TEXTURE2D_DESC& d3d11Desc, + ID3D12Device* d3d12Device) { + D3D12_HEAP_PROPERTIES heapProps {}; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_RESOURCE_DESC resDesc {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resDesc.Width = d3d11Desc.Width; + resDesc.Height = d3d11Desc.Height; + resDesc.DepthOrArraySize = static_cast(d3d11Desc.ArraySize); + resDesc.MipLevels = static_cast(d3d11Desc.MipLevels); + resDesc.Format = d3d11Desc.Format; + resDesc.SampleDesc = d3d11Desc.SampleDesc; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; + + ComPtr d3d12Resource; + WINML_THROW_IF_FAILED(d3d12Device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_SHARED, + &resDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&d3d12Resource))); + + return d3d12Resource; +} + +void TensorToVideoFrameConverter::ConvertDX12TensorToUnsupportedVideoFrameFormat( + _In_ UINT32 batchIdx, + _In_ ID3D12Resource* pInputTensor, + _In_ D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ VideoFrame& unsupportedVideoFrame) { + assert(pInputTensor != nullptr); + + // Find the first supported format and convert to it + auto supportedFormatIter = std::find_if( + ImageConversionHelpers::supportedWinMLFormats.begin(), + ImageConversionHelpers::supportedWinMLFormats.end(), + [&device_cache](DXGI_FORMAT format) { return ImageConversionHelpers::FormatSupportedForUAV(device_cache.GetD3D12Device(), format); }); + + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + supportedFormatIter != ImageConversionHelpers::supportedWinMLFormats.end(), + "Detensorization for this format is unsupported on the current device."); + + D3D11_TEXTURE2D_DESC supportedDesc {}; + supportedDesc.Width = unsupportedVideoFrame.Direct3DSurface().Description().Width; + supportedDesc.Height = unsupportedVideoFrame.Direct3DSurface().Description().Height; + supportedDesc.MipLevels = 1; + supportedDesc.ArraySize = 1; + supportedDesc.Format = *supportedFormatIter; + supportedDesc.SampleDesc.Count = 1; + supportedDesc.SampleDesc.Quality = 0; + supportedDesc.Usage = D3D11_USAGE_DEFAULT; + + ComPtr unsupportedTexture = ImageConversionHelpers::GetTextureFromDirect3DSurface(unsupportedVideoFrame.Direct3DSurface()); + + ComPtr d3d11Device; + unsupportedTexture->GetDevice(&d3d11Device); + + output_resource_ = CreateShareableD3D12Texture(supportedDesc, device_cache.GetD3D12Device()); + ComPtr spSharedD3D11Texture = ShareD3D12Texture(output_resource_.Get(), d3d11Device.Get()); + + ComPtr dxgiSurface; + WINML_THROW_IF_FAILED(spSharedD3D11Texture->QueryInterface(IID_PPV_ARGS(&dxgiSurface))); + + ComPtr inspectableSurface; + WINML_THROW_IF_FAILED(CreateDirect3D11SurfaceFromDXGISurface(dxgiSurface.Get(), &inspectableSurface)); + + IDirect3DSurface surface; + WINML_THROW_IF_FAILED(inspectableSurface->QueryInterface(winrt::guid_of(), reinterpret_cast(winrt::put_abi(surface)))); + converted_video_frame_ = VideoFrame::CreateWithDirect3D11Surface(surface); + + // Detensorize + ConvertGPUTensorToDX12Texture(batchIdx, pInputTensor, device_cache, tensorDesc, output_resource_.Get()); + + // Wait for the D3D12 work to complete before using the resource + SyncD3D12ToD3D11(device_cache, spSharedD3D11Texture.Get()); + + // Finally, convert and copy the texture to the destination video frame + converted_video_frame_.CopyToAsync(unsupportedVideoFrame).get(); +} + +ComPtr TensorToVideoFrameConverter::ShareD3D12Texture(ID3D12Resource* pResource, ID3D11Device* pDevice) +{ + assert(pResource != nullptr); + assert(pDevice != nullptr); + + ComPtr d3d12Device; + WINML_THROW_IF_FAILED(pResource->GetDevice(IID_PPV_ARGS(&d3d12Device))); + + HANDLE hSharedTexture; + WINML_THROW_IF_FAILED(d3d12Device->CreateSharedHandle(pResource, nullptr, GENERIC_ALL, nullptr, &hSharedTexture)); + + ComPtr device1; + WINML_THROW_IF_FAILED(pDevice->QueryInterface(IID_PPV_ARGS(&device1))); + + wil::unique_handle safeHandle(hSharedTexture); + + ComPtr d3d11Texture; + WINML_THROW_IF_FAILED(device1->OpenSharedResource1(safeHandle.get(), IID_PPV_ARGS(&d3d11Texture))); + + shared_handle_ = safeHandle.get(); + + return d3d11Texture; +} + +void TensorToVideoFrameConverter::SoftwareTensorToVideoFrame( + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ BYTE* pCPUTensorToConvert, + _In_ ImageTensorDescription tensorDesc, + _Inout_ winrt::Windows::Media::VideoFrame& pDestVideoFrame) { + CWinMLAutoLock lock(&lock_); + winrt::Windows::Media::IVideoFrame spTensorFrame; + UINT32 outputWidth = 0; + UINT32 outputHeight = 0; + + UINT32 tensorHeight = static_cast(tensorDesc.sizes[2]); + UINT32 tensorWidth = static_cast(tensorDesc.sizes[3]); + // create a bitmap bounds for the whole image/tensor + BitmapBounds inputBounds = + { + 0, + 0, + tensorWidth, + tensorHeight}; + + winrt::Windows::Graphics::Imaging::SoftwareBitmap spOutputSoftwareBitmap = pDestVideoFrame.SoftwareBitmap(); + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface spOutputSurface = pDestVideoFrame.Direct3DSurface(); + + // only one of softwarebitmap or direct3Dsurface should be non-null + if ((spOutputSoftwareBitmap == nullptr && spOutputSurface == nullptr) || (spOutputSoftwareBitmap != nullptr && spOutputSurface != nullptr)) { + WINML_THROW_HR(E_INVALIDARG); + } + if (spOutputSoftwareBitmap) { + outputWidth = spOutputSoftwareBitmap.PixelWidth(); + outputHeight = spOutputSoftwareBitmap.PixelHeight(); + } else { + Direct3DSurfaceDescription description; + description = spOutputSurface.Description(); + outputWidth = description.Width; + outputHeight = description.Height; + } + + if (ImageConversionHelpers::NeedsVideoFrameConversion(pDestVideoFrame, {}, {0, 0, (UINT32)tensorWidth, (UINT32)tensorHeight}, tensorWidth, tensorHeight)) { + if (converted_video_frame_ == nullptr || + ImageConversionHelpers::NeedsVideoFrameConversion(converted_video_frame_, {}, {0, 0, (UINT32)tensorWidth, (UINT32)tensorHeight}, tensorWidth, tensorHeight)) { + converted_video_frame_ = VideoFrame::CreateWithSoftwareBitmap(SoftwareBitmap(BitmapPixelFormat::Bgra8, tensorWidth, tensorHeight)); + } + + spTensorFrame = converted_video_frame_; + } else { + spTensorFrame = pDestVideoFrame; + converted_video_frame_ = nullptr; + } + auto bitmap = spTensorFrame.SoftwareBitmap(); + ConvertCPUTensorToSoftwareBitmap( + pCPUTensorToConvert, + tensorDesc, + bitmap); + + if (converted_video_frame_) { + ImageConversionHelpers::ConvertVideoFrameToVideoFrame( + converted_video_frame_, + inputBounds, + outputWidth, + outputHeight, + pDestVideoFrame); + } +} + +void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture( + _In_ UINT32 batchIdx, + _In_ ID3D12Resource* pInputResource, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ ID3D12Resource* pOutputResource) { + assert(pInputResource != nullptr); + assert(pOutputResource != nullptr); + + CWinMLAutoLock lock(&lock_); + D3D12_RESOURCE_DESC inputDesc = pInputResource->GetDesc(); + D3D12_RESOURCE_DESC outputDesc = pOutputResource->GetDesc(); + CD3DX12_VIEWPORT viewport((float)0, (float)0, (float)outputDesc.Width, (float)outputDesc.Height); + CD3DX12_RECT scissorRect(0, 0, (LONG)outputDesc.Width, outputDesc.Height); + ComPtr spDx12Device = device_cache.GetD3D12Device(); + + GPUTensorToDX12TextureTelemetryEvent telemetrylogger(tensorDesc); + + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + outputDesc.Format == DXGI_FORMAT_B8G8R8A8_UNORM || outputDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM || outputDesc.Format == DXGI_FORMAT_R8_UNORM, + "Format was output image %d. Output image format must be Bgra8, Rgba8 or Gray8.", + outputDesc.Format); + + // Validate input description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, inputDesc.Height != 0, "Invalid input image height provided. Height is set to zero."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, inputDesc.Width != 0, "Invalid input image height provided. Height is set to zero."); + + // Validate output description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, outputDesc.Height != 0, "Invalid input image height provided. Height is set to zero."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, outputDesc.Width != 0, "Invalid input image height provided. Height is set to zero."); + + // Validate Tensor description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.dataType == kImageTensorDataTypeFloat32 || tensorDesc.dataType == kImageTensorDataTypeFloat16, "Target tensor description must either be kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16. %d was supplied.", tensorDesc.dataType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeRGB8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeRGB8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeBGR8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeBGR8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeGRAY8 || tensorDesc.sizes[1] == 1, "Target tensor description expects kImageTensorChannelTypeGRAY8, but has %lld channels specified instead of 1.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[2] == outputDesc.Height, "Target tensor height (%lld) does not match input height (%d).", tensorDesc.sizes[2], outputDesc.Height); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[3] == (UINT)outputDesc.Width, "Target tensor width (%lld) does not match input width (%d).", tensorDesc.sizes[3], (UINT)outputDesc.Width); + + // Create descriptor heaps + UINT srvUavDescriptorSize = spDx12Device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + // Create a UAV resource for the shader + D3D12_RESOURCE_DESC outputResourceDesc = output_resource_->GetDesc(); + outputResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (!UAV_resource_ || outputDesc.Format != UAV_resource_->GetDesc().Format || outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) { + WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &outputResourceDesc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + nullptr, + IID_PPV_ARGS(&UAV_resource_))); + } + + if (descriptor_heap_ == nullptr) { + // Describe and create a shader resource view (SRV) and unordered access view (UAV) descriptor heap. + D3D12_DESCRIPTOR_HEAP_DESC srvUavHeapDesc = {}; + srvUavHeapDesc.NumDescriptors = DescriptorCount; + srvUavHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvUavHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + WINML_THROW_IF_FAILED(spDx12Device->CreateDescriptorHeap(&srvUavHeapDesc, IID_PPV_ARGS(&descriptor_heap_))); + descriptor_heap_->SetName(L"Detensorize Descriptor Heap"); + } + + // Create SRV and UAV for input and output respectively + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = CreateSRVDescriptor(batchIdx, inputDesc, tensorDesc); + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(descriptor_heap_->GetCPUDescriptorHandleForHeapStart(), SrvBufferIdx, srvUavDescriptorSize); + spDx12Device->CreateShaderResourceView(pInputResource, &srvDesc, srvHandle); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = outputResourceDesc.Format; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(descriptor_heap_->GetCPUDescriptorHandleForHeapStart(), UavBufferIdx, srvUavDescriptorSize); + spDx12Device->CreateUnorderedAccessView(UAV_resource_.Get(), nullptr, &uavDesc, uavHandle); + } + + // + // Pipeline setup for shader operation + // + PipelineStateCacheType type = PipelineStateCacheType::kFloat32; + if (tensorDesc.dataType == kImageTensorDataTypeFloat16) { + type = PipelineStateCacheType::kFloat16; + } + + // Set the origin format + PipelineStateCacheFormat formatFrom = PipelineStateCacheFormat::kBGR8; + if (tensorDesc.channelType == kImageTensorChannelTypeRGB8) { + formatFrom = PipelineStateCacheFormat::kRGB8; + } else if (inputDesc.Format == kImageTensorChannelTypeGRAY8) { + formatFrom = PipelineStateCacheFormat::kGRAY8; + } + + // Set the destination format + PipelineStateCacheFormat formatTo = PipelineStateCacheFormat::kBGR8; + if (outputDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM) { + formatTo = PipelineStateCacheFormat::kRGB8; + } else if (outputDesc.Format == DXGI_FORMAT_R8_UNORM) { + formatTo = PipelineStateCacheFormat::kGRAY8; + } + + root_signature_ = device_cache.GetDetensorizeRootSignature(); + pipeline_state_ = device_cache.GetCachedPipelineState(type, formatFrom, formatTo, PipelineStateCacheOperation::kDetensorize); + + ResetCommandList(device_cache); + + // Write compute commands into the command list and put it into the queue. + { + command_list_->SetComputeRootSignature(root_signature_.Get()); + + ID3D12DescriptorHeap* ppHeaps[] = {descriptor_heap_.Get()}; + command_list_->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps); + + CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(descriptor_heap_->GetGPUDescriptorHandleForHeapStart(), SrvBufferIdx, srvUavDescriptorSize); + CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandle(descriptor_heap_->GetGPUDescriptorHandleForHeapStart(), UavBufferIdx, srvUavDescriptorSize); + { + ConstantBufferCS constantBufferCS = {}; + constantBufferCS.height = static_cast(tensorDesc.sizes[2]); + constantBufferCS.width = static_cast(tensorDesc.sizes[3]); + command_list_->SetComputeRoot32BitConstants(0, 2, &constantBufferCS, 0); + } + command_list_->SetComputeRootDescriptorTable(1, srvHandle); + command_list_->SetComputeRootDescriptorTable(2, uavHandle); + + auto dispatchWidth = static_cast((tensorDesc.sizes[3] - 1) / 16 + 1); + auto dispatchHeight = static_cast((tensorDesc.sizes[2] - 1) / 4 + 1); + + command_list_->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pInputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); + command_list_->Dispatch(dispatchWidth, dispatchHeight, 1); + command_list_->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pInputResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); + + // Copy the UAV data to the output resource after detensorization + command_list_->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(UAV_resource_.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); + command_list_->CopyResource(pOutputResource, UAV_resource_.Get()); + command_list_->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(UAV_resource_.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); + + WINML_THROW_IF_FAILED(command_list_->Close()); + ID3D12CommandList* pComputeToGPUCLs[] = {command_list_.Get()}; + device_cache.GetCommandQueue()->ExecuteCommandLists(ARRAYSIZE(pComputeToGPUCLs), pComputeToGPUCLs); + } +} + +void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap( + _In_ UINT32 batchIdx, + _In_ ID3D12Resource* pInputTensor, + _In_ D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ SoftwareBitmap& softwareBitmap) { + assert(pInputTensor != nullptr); + assert(softwareBitmap != nullptr); + + GPUTensorToDX12TextureTelemetryEvent telemetrylogger(tensorDesc); + + uint32_t tensorElementSize = tensorDesc.dataType == kImageTensorDataTypeFloat32 ? 4 : 2; + uint32_t singleVideoFramebufferSize = static_cast(tensorDesc.sizes[1] * tensorDesc.sizes[2] * tensorDesc.sizes[3] * tensorElementSize); + + // TODO: Make an allocator for readback heaps + if (!readback_heap_ || readback_heap_->GetDesc().Width < singleVideoFramebufferSize) { + WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&readback_heap_))); + } + + ResetCommandList(device_cache); + command_list_->CopyBufferRegion(readback_heap_.Get(), 0, pInputTensor, singleVideoFramebufferSize * batchIdx, singleVideoFramebufferSize); + + WINML_THROW_IF_FAILED(command_list_->Close()); + ID3D12CommandList* ppCommandLists[] = {command_list_.Get()}; + device_cache.GetCommandQueue()->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Sync to make sure the the heap received all the data + device_cache.SyncD3D12ToCPU(); + + void* pCPUTensorBuffer = nullptr; + WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, singleVideoFramebufferSize), &pCPUTensorBuffer)); + + // We avoid the Video Frame pipeline by manually downloading the GPU data to the CPU and detensorize while we are filling the readback heap + ConvertCPUTensorToSoftwareBitmap(pCPUTensorBuffer, tensorDesc, softwareBitmap); + + readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0)); +} + +D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor( + const UINT32 batchIdx, + const D3D12_RESOURCE_DESC& resourceDesc, + const ImageTensorDescription& desc) { + UINT uiTensorElementSize = + desc.dataType == kImageTensorDataTypeFloat32 ? sizeof(UINT) : sizeof(uint16_t); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + UINT singleImageSize = static_cast(desc.sizes[1] * desc.sizes[2] * desc.sizes[3]); + srvDesc.Buffer.FirstElement = batchIdx * desc.sizes[1] * desc.sizes[2] * desc.sizes[3]; + srvDesc.Buffer.NumElements = singleImageSize; + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + if (desc.dataType == kImageTensorDataTypeFloat32) { + // fp32 uses structured buffers so the format can be set to unknown, + // and the stride needs to be set. + srvDesc.Format = resourceDesc.Format; + srvDesc.Buffer.StructureByteStride = uiTensorElementSize; + } else if (desc.dataType == kImageTensorDataTypeFloat16) { + // fp16 uses unstructured buffers because structured buffers dont support fp16 on + // most hardware. The format can be set to unknown to a specific known format, + // and the stride must be zeroed. + srvDesc.Format = DXGI_FORMAT_R16_FLOAT; + srvDesc.Buffer.StructureByteStride = 0; + } else { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + false, + "Tensorization conversion is only supported to kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16."); + } + + return srvDesc; +} + +void TensorToVideoFrameConverter::ConvertCPUTensorToSoftwareBitmap( + _In_ void* pCPUTensor, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ SoftwareBitmap& softwareBitmap) { + ConvertCPUTensorToVideoFrameWithSoftwareBitmapTelemetryEvent telemetrylogger(tensorDesc); + + auto height = softwareBitmap.PixelHeight(); + auto width = softwareBitmap.PixelWidth(); + auto format = softwareBitmap.BitmapPixelFormat(); + + // Validate input description + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + format == BitmapPixelFormat::Bgra8 || format == BitmapPixelFormat::Rgba8 || format == BitmapPixelFormat::Gray8, + "Format was input image %d. Input image format must Bgra8, Rgba8 or Gray8.", + format); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, height > 0, "Output input image height provided. Height is set to zero."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, width > 0, "Output input image width provided. Width is set to zero."); + + // Validate Tensor description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.dataType == kImageTensorDataTypeFloat32 || tensorDesc.dataType == kImageTensorDataTypeFloat16, "Target tensor description must either be kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16. %d was supplied.", tensorDesc.dataType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeRGB8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeRGB8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeBGR8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeBGR8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeGRAY8 || tensorDesc.sizes[1] == 1, "Target tensor description expects kImageTensorChannelTypeGRAY8, but has %lld channels specified instead of 1.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + tensorDesc.channelType == kImageTensorChannelTypeGRAY8 || + tensorDesc.channelType == kImageTensorChannelTypeBGR8 || + tensorDesc.channelType == kImageTensorChannelTypeRGB8, + "Target tensor description expects kImageTensorChannelTypeGRAY8, kImageTensorChannelTypeBGR8, or kImageTensorChannelTypeRGB8 but has %d was specified.", + tensorDesc.channelType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[2] == (UINT)height, "Target tensor height (%lld) does not match input height (%d).", tensorDesc.sizes[2], (UINT)height); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[3] == (UINT)width, "Target tensor width (%lld) does not match input width (%d).", tensorDesc.sizes[3], (UINT)width); + + // get the byte buffer out of a softwarebitmap + BYTE* pData = nullptr; + UINT32 uiCapacity = 0; + + winrt::Windows::Graphics::Imaging::BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(winrt::Windows::Graphics::Imaging::BitmapBufferAccessMode::Write)); + winrt::Windows::Foundation::IMemoryBufferReference reference = spBitmapBuffer.CreateReference(); + auto spByteAccess = reference.as(); + WINML_THROW_IF_FAILED(spByteAccess->GetBuffer(&pData, &uiCapacity)); + + uint32_t bufferWidth = uiCapacity / height; + + ImageTensorChannelType targetChannelType = ImageConversionHelpers::GetChannelTypeFromSoftwareBitmap(softwareBitmap); + + if (tensorDesc.dataType == kImageTensorDataTypeFloat32) { + WINML_THROW_IF_FAILED(CpuDetensorizer::Detensorize(tensorDesc.channelType, targetChannelType, static_cast(pCPUTensor), bufferWidth, height, width, pData)); + } else if (tensorDesc.dataType == kImageTensorDataTypeFloat16) { + WINML_THROW_IF_FAILED(CpuDetensorizer::Detensorize(tensorDesc.channelType, targetChannelType, static_cast(pCPUTensor), bufferWidth, height, width, pData)); + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp new file mode 100644 index 0000000000000..da1b582b3ae8a --- /dev/null +++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp @@ -0,0 +1,557 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include // winmeta needed for TraceLoggingKeyword +#include +#include +#include + +#include "inc/VideoFrameToTensorConverter.h" +#include "CpuTensorizer.h" +#include "inc/D3DDeviceCache.h" + +#include "LearningModelDevice.h" + +using namespace Microsoft::WRL; +using namespace Windows::AI::MachineLearning::Internal; +using namespace Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::AI::MachineLearning::implementation; +using namespace winrt::Windows::Graphics::DirectX; + +class DX12TextureToGPUTensorTelemetryEvent { + public: + DX12TextureToGPUTensorTelemetryEvent(const ImageTensorDescription& tensorDesc) { + TraceLoggingWrite( + winml_trace_logging_provider, + "DX12TextureToGPUTensor", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_START), + TraceLoggingHexInt32(tensorDesc.channelType, "Type"), + TraceLoggingInt64(tensorDesc.sizes[2], "Height"), + TraceLoggingInt64(tensorDesc.sizes[3], "Width")); + } + ~DX12TextureToGPUTensorTelemetryEvent() { + TraceLoggingWrite( + winml_trace_logging_provider, + "DX12TextureToGPUTensor", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_STOP), + TraceLoggingHexInt32(S_OK, "HRESULT")); + } +}; + +class ConvertVideoFrameWithSoftwareBitmapToCPUTensorTelemetryEvent { + public: + ConvertVideoFrameWithSoftwareBitmapToCPUTensorTelemetryEvent(const ImageTensorDescription& tensorDesc) { + TraceLoggingWrite( + winml_trace_logging_provider, + "ConvertVideoFrameWithSoftwareBitmapToCPUTensor", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_START), + TraceLoggingHexInt32(tensorDesc.channelType, "Type"), + TraceLoggingInt64(tensorDesc.sizes[2], "Height"), + TraceLoggingInt64(tensorDesc.sizes[3], "Width")); + } + ~ConvertVideoFrameWithSoftwareBitmapToCPUTensorTelemetryEvent() { + TraceLoggingWrite( + winml_trace_logging_provider, + "ConvertVideoFrameWithSoftwareBitmapToCPUTensor", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingOpcode(EVENT_TRACE_TYPE_STOP), + TraceLoggingHexInt32(S_OK, "HRESULT")); + } +}; + +void VideoFrameToTensorConverter::VideoFrameToSoftwareTensor( + _In_ const IVideoFrame& inputVideoFrame, + _In_ const BitmapBounds& inputBounds, + _In_ const ImageTensorDescription& tensorDesc, + _Out_ BYTE* pOutputCPUTensor) { + CWinMLAutoLock lock(&lock_); + + winrt::Windows::Graphics::Imaging::SoftwareBitmap spInputSoftwareBitmap = inputVideoFrame.SoftwareBitmap(); + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface spInputSurface = inputVideoFrame.Direct3DSurface(); + + // only one of softwarebitmap or direct3Dsurface should be non-null + if ((spInputSoftwareBitmap == nullptr && spInputSurface == nullptr) || (spInputSoftwareBitmap != nullptr && spInputSurface != nullptr)) { + WINML_THROW_IF_FAILED(E_INVALIDARG); + } + + UINT32 tensorHeight = static_cast(tensorDesc.sizes[2]); + UINT32 tensorWidth = static_cast(tensorDesc.sizes[3]); + if (spInputSurface || ImageConversionHelpers::NeedsVideoFrameConversion(inputVideoFrame, {}, inputBounds, tensorWidth, tensorHeight)) { + if (converted_video_frame_ == nullptr || + ImageConversionHelpers::NeedsVideoFrameConversion(converted_video_frame_, {}, {0, 0, (UINT32)tensorWidth, (UINT32)tensorHeight}, tensorWidth, tensorHeight)) { + converted_video_frame_ = VideoFrame::CreateWithSoftwareBitmap(SoftwareBitmap(BitmapPixelFormat::Bgra8, tensorWidth, tensorHeight)); + } + + // Resize the input VideoFrame to converted_video_frame_ + ImageConversionHelpers::ConvertVideoFrameToVideoFrame( + inputVideoFrame, + inputBounds, + tensorWidth, + tensorHeight, + converted_video_frame_); + + ConvertSoftwareBitmapToCPUTensor( + converted_video_frame_.SoftwareBitmap(), + tensorDesc, + {0, 0, (UINT32)tensorWidth, (UINT32)tensorHeight}, + pOutputCPUTensor); + } else { + ConvertSoftwareBitmapToCPUTensor( + inputVideoFrame.SoftwareBitmap(), + tensorDesc, + inputBounds, + pOutputCPUTensor); + } +} + +ComPtr VideoFrameToTensorConverter::ShareD3D11Texture(ID3D11Texture2D* pTexture, ID3D12Device* pDevice) +{ + assert(pTexture != nullptr); + assert(pDevice != nullptr); + + ComPtr spDxgiResource; + WINML_THROW_IF_FAILED(pTexture->QueryInterface(IID_PPV_ARGS(&spDxgiResource))); + + HANDLE hSharedTexture; + WINML_THROW_IF_FAILED(spDxgiResource->CreateSharedHandle(nullptr, GENERIC_ALL, nullptr, &hSharedTexture)); + + wil::unique_handle safeHandle(hSharedTexture); + + ComPtr d3d12Resource; + WINML_THROW_IF_FAILED(pDevice->OpenSharedHandle(safeHandle.get(), IID_PPV_ARGS(&d3d12Resource))); + + shared_handle_ = safeHandle.get(); + + return d3d12Resource; +} + +void VideoFrameToTensorConverter::VideoFrameToDX12Tensor( + _In_ const UINT32 batchIdx, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ const IVideoFrame& inputVideoFrame, + _In_ const BitmapBounds& inputBounds, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ ID3D12Resource* pOutputTensor) { + // Validate Tensor description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.dataType == kImageTensorDataTypeFloat32 || tensorDesc.dataType == kImageTensorDataTypeFloat16, "Target tensor description must either be kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16. %d was supplied.", tensorDesc.dataType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeRGB8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeRGB8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeBGR8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeBGR8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeGRAY8 || tensorDesc.sizes[1] == 1, "Target tensor description expects kImageTensorChannelTypeGRAY8, but has %lld channels specified instead of 1.", tensorDesc.sizes[1]); + + CWinMLAutoLock lock(&lock_); + auto device = session.Device().as(); + D3DDeviceCache* pDeviceCache = device->GetD3DDeviceCache(); + IDirect3DSurface spDirect3DSurface = inputVideoFrame.Direct3DSurface(); + + if (inputVideoFrame.SoftwareBitmap()) { + ConvertSoftwareBitmapToGPUTensor(batchIdx, inputVideoFrame, *pDeviceCache, inputBounds, tensorDesc, pOutputTensor); + } else if (spDirect3DSurface) { + ComPtr spVideoFrameTexture; + BitmapBounds scaledBounds = inputBounds; + + // TODO: Scale during the tensorization phase instead of using the video frame pipeline when the input bounds are not the same size as the tensor + if (!ImageConversionHelpers::DirectXPixelFormatSupported(spDirect3DSurface.Description().Format) || static_cast(inputBounds.Width) != tensorDesc.sizes[3] || static_cast(inputBounds.Height) != tensorDesc.sizes[2]) { + // Force the VideoFrame to not do a conversion if the format is supported since we do it during the tensorization anyway + DirectXPixelFormat newFormat = ImageConversionHelpers::DirectXPixelFormatSupported(spDirect3DSurface.Description().Format) + ? spDirect3DSurface.Description().Format + : ImageConversionHelpers::GetDirectXPixelFormatFromChannelType(tensorDesc.channelType); + + // Change the input bounds since the video frame pipeline already cropped the texture + scaledBounds = {0, 0, static_cast(tensorDesc.sizes[3]), static_cast(tensorDesc.sizes[2])}; + + // Use the Video Frame pipeline if we don't have our own converter for this color format + spVideoFrameTexture = CreateTextureFromUnsupportedColorFormat(inputVideoFrame, inputBounds, scaledBounds, newFormat); + } else { + // If the color format is known or the input widths are not smaller than the tensor desc, just use the video frame as is + spVideoFrameTexture = ImageConversionHelpers::GetTextureFromDirect3DSurface(spDirect3DSurface); + } + + D3D11_TEXTURE2D_DESC videoFrameTextureDesc; + spVideoFrameTexture->GetDesc(&videoFrameTextureDesc); + + if (ImageConversionHelpers::TextureIsOnDevice(spVideoFrameTexture.Get(), pDeviceCache->GetD3D11Device())) { + // The texture is on our device, so we can just create own texture, share it and cache it + if (!D3D11_cached_texture_) { + WINML_THROW_IF_FAILED(pDeviceCache->GetD3D11Device()->CreateTexture2D(&videoFrameTextureDesc, nullptr, &D3D11_cached_texture_)); + input_D3D12_resource_ = ShareD3D11Texture(D3D11_cached_texture_.Get(), pDeviceCache->GetD3D12Device()); + } else { + D3D11_TEXTURE2D_DESC cachedTextureDesc; + D3D11_cached_texture_->GetDesc(&cachedTextureDesc); + + if (cachedTextureDesc.Width != scaledBounds.Width || cachedTextureDesc.Height != scaledBounds.Height || cachedTextureDesc.Format != videoFrameTextureDesc.Format) { + // The dimensions or format don't match, so we need to re-create our texture + WINML_THROW_IF_FAILED(pDeviceCache->GetD3D11Device()->CreateTexture2D(&videoFrameTextureDesc, nullptr, &D3D11_cached_texture_)); + input_D3D12_resource_ = ShareD3D11Texture(D3D11_cached_texture_.Get(), pDeviceCache->GetD3D12Device()); + } + } + + CopyTextureIntoTexture(spVideoFrameTexture.Get(), scaledBounds, D3D11_cached_texture_.Get()); + } else { + // We are not on the same device, so we can't rely on our cached texture + ComPtr spTextureDevice; + spVideoFrameTexture->GetDevice(&spTextureDevice); + + ComPtr spSharedD3D11Texture; + HANDLE sharedHandle = nullptr; + UINT comPtrSize = static_cast(sizeof(spSharedD3D11Texture.GetAddressOf())); + UINT handleSize = static_cast(sizeof(sharedHandle)); + + if ((FAILED(spVideoFrameTexture->GetPrivateData(d3d11_texture_GUID_, &comPtrSize, spSharedD3D11Texture.GetAddressOf())) || !spSharedD3D11Texture.Get()) || (FAILED(spVideoFrameTexture->GetPrivateData(handle_GUID_, &handleSize, &sharedHandle)) || sharedHandle != shared_handle_)) { + // Create a new shared texture that we cache on the video frame texture + WINML_THROW_IF_FAILED(spTextureDevice->CreateTexture2D(&videoFrameTextureDesc, nullptr, &spSharedD3D11Texture)); + + input_D3D12_resource_ = ShareD3D11Texture(spSharedD3D11Texture.Get(), pDeviceCache->GetD3D12Device()); + + // Cache the shared texture on the video frame texture in order to tie their lifetime together + WINML_THROW_IF_FAILED(spVideoFrameTexture->SetPrivateDataInterface(d3d11_texture_GUID_, spSharedD3D11Texture.Get())); + WINML_THROW_IF_FAILED(spVideoFrameTexture->SetPrivateData(handle_GUID_, sizeof(shared_handle_), &shared_handle_)); + } + + // Copy from the video frame texture to the shared texture + CopyTextureIntoTexture(spVideoFrameTexture.Get(), scaledBounds, spSharedD3D11Texture.Get()); + } + + // Sync to make sure that the D3D11 texture is done copying + SyncD3D11ToD3D12(*pDeviceCache, spVideoFrameTexture.Get()); + + // We cropped the texture, shared it and converted it to a known color format, so it's time to tensorize + // TODO: merge all videoframes to a single DX12Texture Resource before call ConvertDX12TextureToGPUTensor. + ConvertDX12TextureToGPUTensor(batchIdx, input_D3D12_resource_.Get(), *pDeviceCache, tensorDesc, pOutputTensor); + } else { + // Invalid video frame + WINML_THROW_IF_FAILED(E_INVALIDARG); + } +} + +void VideoFrameToTensorConverter::ConvertDX12TextureToGPUTensor( + _In_ UINT32 batchIdx, + _In_ ID3D12Resource* pInputResource, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ ID3D12Resource* pOutputResource) { + assert(pInputResource != nullptr); + assert(pOutputResource != nullptr); + + CWinMLAutoLock lock(&lock_); + D3D12_RESOURCE_DESC inputDesc = pInputResource->GetDesc(); + D3D12_RESOURCE_DESC outputDesc = pOutputResource->GetDesc(); + ComPtr spDx12Device = device_cache.GetD3D12Device(); + + DX12TextureToGPUTensorTelemetryEvent telemetrylogger(tensorDesc); + + // Validate input description + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + inputDesc.Format == DXGI_FORMAT_B8G8R8X8_UNORM || inputDesc.Format == DXGI_FORMAT_B8G8R8A8_UNORM || inputDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM || inputDesc.Format == DXGI_FORMAT_R8_UNORM, + "Format was input image %d. Input image format must Bgra8, Rgba8 or Gray8.", + inputDesc.Format); + + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, inputDesc.Width != 0, "Invalid input image height provided. Width is set to zero."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, inputDesc.Height != 0, "Invalid input image height provided. Height is set to zero."); + + // Validate Tensor description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.dataType == kImageTensorDataTypeFloat32 || tensorDesc.dataType == kImageTensorDataTypeFloat16, "Target tensor description must either be kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16. %d was supplied.", tensorDesc.dataType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeRGB8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeRGB8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeBGR8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeBGR8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeGRAY8 || tensorDesc.sizes[1] == 1, "Target tensor description expects kImageTensorChannelTypeGRAY8, but has %lld channels specified instead of 1.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[2] == inputDesc.Height, "Target tensor height (%lld) does not match input height (%d).", tensorDesc.sizes[2], inputDesc.Height); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[3] == (UINT)inputDesc.Width, "Target tensor width (%lld) does not match input width (%d).", tensorDesc.sizes[3], (UINT)inputDesc.Width); + + UINT uiTensorElementSize = tensorDesc.dataType == kImageTensorDataTypeFloat32 ? sizeof(FLOAT) : sizeof(uint16_t); + + // Validate Tensor Resource + { + D3D12_HEAP_PROPERTIES outputHeapProperties; + D3D12_HEAP_FLAGS outputHeapFlags; + + WINML_THROW_IF_FAILED(pOutputResource->GetHeapProperties(&outputHeapProperties, &outputHeapFlags)); + + UINT64 ullNumElementsTensor = 1; + for (UINT uiIdx = 0; uiIdx < kImageTensorDimensionCountMax; uiIdx++) { + WINML_THROW_IF_FAILED(ULongLongMult(ullNumElementsTensor, tensorDesc.sizes[uiIdx], &ullNumElementsTensor)); + } + if (ullNumElementsTensor > UINT_MAX) { + WINML_THROW_IF_FAILED(E_INVALIDARG); + } + + UINT64 ullTensorSize = 0; + WINML_THROW_IF_FAILED(ULongLongMult(ullNumElementsTensor, uiTensorElementSize, &ullTensorSize)); + + if (outputDesc.Width < ullTensorSize || + outputDesc.Height != 1 || + outputDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || + !(outputDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) || + outputHeapProperties.Type != D3D12_HEAP_TYPE_DEFAULT) { + WINML_THROW_IF_FAILED(E_INVALIDARG); + } + } + + { + ComPtr spDx12DeviceIn, spDx12DeviceOut; + WINML_THROW_IF_FAILED(pInputResource->GetDevice(IID_PPV_ARGS(&spDx12DeviceIn))); + WINML_THROW_IF_FAILED(pOutputResource->GetDevice(IID_PPV_ARGS(&spDx12DeviceOut))); + + if (spDx12Device != spDx12DeviceIn || spDx12Device != spDx12DeviceOut) { + // Both input and output should have the same device + WINML_THROW_IF_FAILED(E_INVALIDARG); + } + } + + // Create descriptor heaps. + UINT srvUavDescriptorSize = spDx12Device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + if (descriptor_heap_ == nullptr) { + // Describe and create a shader resource view (SRV) and unordered access view (UAV) descriptor heap. + D3D12_DESCRIPTOR_HEAP_DESC srvUavHeapDesc = {}; + srvUavHeapDesc.NumDescriptors = DescriptorCount; + srvUavHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvUavHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + WINML_THROW_IF_FAILED(spDx12Device->CreateDescriptorHeap(&srvUavHeapDesc, IID_PPV_ARGS(&descriptor_heap_))); + descriptor_heap_->SetName(L"Tensorize Descriptor Heap"); + } + + // Create SRV and UAV for input and output respectively + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = inputDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(descriptor_heap_->GetCPUDescriptorHandleForHeapStart(), SrvBufferIdx, srvUavDescriptorSize); + spDx12Device->CreateShaderResourceView(pInputResource, &srvDesc, srvHandle); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = CreateUAVDescription(batchIdx, outputDesc, tensorDesc); + CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(descriptor_heap_->GetCPUDescriptorHandleForHeapStart(), UavBufferIdx, srvUavDescriptorSize); + spDx12Device->CreateUnorderedAccessView(pOutputResource, nullptr, &uavDesc, uavHandle); + } + + // + // Pipeline setup for shader operation + // + PipelineStateCacheType type = PipelineStateCacheType::kFloat32; + if (tensorDesc.dataType == kImageTensorDataTypeFloat16) { + type = PipelineStateCacheType::kFloat16; + } + + // Set the origin format + PipelineStateCacheFormat formatFrom = PipelineStateCacheFormat::kBGR8; + if (inputDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM) { + formatFrom = PipelineStateCacheFormat::kRGB8; + } else if (inputDesc.Format == DXGI_FORMAT_R8_UNORM) { + formatFrom = PipelineStateCacheFormat::kGRAY8; + } + + // Set the destination format + PipelineStateCacheFormat formatTo = PipelineStateCacheFormat::kBGR8; + if (tensorDesc.channelType == kImageTensorChannelTypeRGB8) { + formatTo = PipelineStateCacheFormat::kRGB8; + } else if (tensorDesc.channelType == kImageTensorChannelTypeGRAY8) { + formatTo = PipelineStateCacheFormat::kGRAY8; + } + + root_signature_ = device_cache.GetTensorizeRootSignature(); + pipeline_state_ = device_cache.GetCachedPipelineState(type, formatFrom, formatTo, PipelineStateCacheOperation::kTensorize); + + ResetCommandList(device_cache); + + // Write compute commands into the command list and put it into the queue. + { + command_list_->SetComputeRootSignature(root_signature_.Get()); + + ID3D12DescriptorHeap* ppHeaps[] = {descriptor_heap_.Get()}; + command_list_->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps); + + CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(descriptor_heap_->GetGPUDescriptorHandleForHeapStart(), SrvBufferIdx, srvUavDescriptorSize); + CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandle(descriptor_heap_->GetGPUDescriptorHandleForHeapStart(), UavBufferIdx, srvUavDescriptorSize); + { + ConstantBufferCS constantBufferCS = {}; + constantBufferCS.height = inputDesc.Height; + constantBufferCS.width = (UINT)inputDesc.Width; + command_list_->SetComputeRoot32BitConstants(0, 2, &constantBufferCS, 0); + } + command_list_->SetComputeRootDescriptorTable(1, srvHandle); + command_list_->SetComputeRootDescriptorTable(2, uavHandle); + + UINT64 dispatchWidth = (inputDesc.Width - 1) / 16 + 1; + UINT64 dispatchHeight = (inputDesc.Height - 1) / 4 + 1; + command_list_->Dispatch(static_cast(dispatchWidth), static_cast(dispatchHeight), 1); + + WINML_THROW_IF_FAILED(command_list_->Close()); + + ID3D12CommandList* pComputeToGPUCLs[] = {command_list_.Get()}; + + device_cache.GetCommandQueue()->ExecuteCommandLists(ARRAYSIZE(pComputeToGPUCLs), pComputeToGPUCLs); + } +} + +void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor( + _In_ UINT32 batchIdx, + _In_ const IVideoFrame& videoFrame, + _In_ D3DDeviceCache& device_cache, + _In_ const BitmapBounds& inputBounds, + _In_ const ImageTensorDescription& tensorDesc, + _Inout_ ID3D12Resource* pOutputResource) { + assert(pOutputResource != nullptr); + assert(videoFrame.SoftwareBitmap() != nullptr); + + DX12TextureToGPUTensorTelemetryEvent telemetrylogger(tensorDesc); + + SoftwareBitmap convertedSoftwareBitmap = nullptr; + BitmapBounds scaledBounds = inputBounds; + + // TODO: Scale during the tensorization phase instead of using the video frame pipeline when the input bounds are not the same size as the tensor + if (static_cast(inputBounds.Width) != tensorDesc.sizes[3] || static_cast(inputBounds.Height) != tensorDesc.sizes[2]) { + scaledBounds = {0, 0, static_cast(tensorDesc.sizes[3]), static_cast(tensorDesc.sizes[2])}; + + // Force the VideoFrame to not do a conversion if the format is supported since we do it during the tensorization anyway + BitmapPixelFormat newPixelFormat = ImageConversionHelpers::SoftwareBitmapFormatSupported(videoFrame.SoftwareBitmap()) + ? videoFrame.SoftwareBitmap().BitmapPixelFormat() + : ImageConversionHelpers::GetBitmapPixelFormatFromChannelType(tensorDesc.channelType); + + convertedSoftwareBitmap = SoftwareBitmap(newPixelFormat, static_cast(tensorDesc.sizes[3]), static_cast(tensorDesc.sizes[2])); + VideoFrame convertedVideoFrame = VideoFrame::CreateWithSoftwareBitmap(convertedSoftwareBitmap); + videoFrame.as().CopyToAsync(convertedVideoFrame, inputBounds, scaledBounds).get(); + + convertedSoftwareBitmap = convertedVideoFrame.SoftwareBitmap(); + } else if (!ImageConversionHelpers::SoftwareBitmapFormatSupported(videoFrame.SoftwareBitmap())) { + convertedSoftwareBitmap = SoftwareBitmap::Convert(videoFrame.SoftwareBitmap(), ImageConversionHelpers::GetBitmapPixelFormatFromChannelType(tensorDesc.channelType)); + } else { + // We don't need a conversion + convertedSoftwareBitmap = videoFrame.SoftwareBitmap(); + } + + assert(convertedSoftwareBitmap != nullptr); + + D3D12_RESOURCE_DESC outputDesc = pOutputResource->GetDesc(); + + uint32_t tensorElementSize = tensorDesc.dataType == kImageTensorDataTypeFloat32 ? 4 : 2; + uint32_t bufferSize = static_cast(tensorDesc.sizes[1] * tensorDesc.sizes[2] * tensorDesc.sizes[3] * tensorElementSize); + + // TODO: Make an allocator for upload heaps + if (!upload_heap_ || upload_heap_->GetDesc().Width < bufferSize) { + WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(bufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&upload_heap_))); + } + + void* pCPUTensorBuffer = nullptr; + WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), &pCPUTensorBuffer)); + + // We avoid the Video Frame pipeline by manually sending the CPU data to the GPU, and we tensorize while we are filling the + // upload heap. The image may already have been cropped/scaled by the video frame pipeline, so we send the scaled bounds + // instead of the initial input bounds + ConvertSoftwareBitmapToCPUTensor(convertedSoftwareBitmap, tensorDesc, scaledBounds, pCPUTensorBuffer); + + upload_heap_->Unmap(0, &CD3DX12_RANGE(0, bufferSize)); + + ResetCommandList(device_cache); + command_list_->CopyBufferRegion(pOutputResource, bufferSize * batchIdx, upload_heap_.Get(), 0, bufferSize); + + WINML_THROW_IF_FAILED(command_list_->Close()); + ID3D12CommandList* ppCommandLists[] = {command_list_.Get()}; + device_cache.GetCommandQueue()->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); +} + +D3D12_UNORDERED_ACCESS_VIEW_DESC VideoFrameToTensorConverter::CreateUAVDescription( + const UINT32 batchIdx, + const D3D12_RESOURCE_DESC& resourceDesc, + const ImageTensorDescription& desc) { + UINT uiTensorElementSize = + desc.dataType == kImageTensorDataTypeFloat32 ? sizeof(UINT) : sizeof(uint16_t); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + UINT singleImageSize = static_cast(desc.sizes[1] * desc.sizes[2] * desc.sizes[3]); + uavDesc.Buffer.FirstElement = batchIdx * desc.sizes[1] * desc.sizes[2] * desc.sizes[3]; + uavDesc.Buffer.NumElements = singleImageSize; + uavDesc.Buffer.CounterOffsetInBytes = 0; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + if (desc.dataType == kImageTensorDataTypeFloat32) { + // fp32 uses structured buffers so the format can be set to unknown, + // and the stride needs to be set. + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.Buffer.StructureByteStride = uiTensorElementSize; + } else if (desc.dataType == kImageTensorDataTypeFloat16) { + // fp16 uses unstructured buffers because structured buffers dont support fp16 on + // most hardware. The format can be set to unknown to a specific known format, + // and the stride must be zeroed. + uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + uavDesc.Buffer.StructureByteStride = 0; + } else { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + false, + "Tensorization conversion is only supported to kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16."); + } + + return uavDesc; +} + +void VideoFrameToTensorConverter::ConvertSoftwareBitmapToCPUTensor( + _In_ const SoftwareBitmap& softwareBitmap, + _In_ const ImageTensorDescription& tensorDesc, + _In_ const BitmapBounds& inputBounds, + _Inout_ void* pCPUTensor) { + assert(softwareBitmap != nullptr); + + ConvertVideoFrameWithSoftwareBitmapToCPUTensorTelemetryEvent telemetrylogger(tensorDesc); + + auto height = softwareBitmap.PixelHeight(); + auto width = softwareBitmap.PixelWidth(); + auto format = softwareBitmap.BitmapPixelFormat(); + + // Validate input description + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + format == BitmapPixelFormat::Bgra8 || format == BitmapPixelFormat::Rgba8 || format == BitmapPixelFormat::Gray8, + "Format was input image %d. Input image format must Bgra8, Rgba8 or Gray8.", + format); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, height > 0, "Invalid input image height provided. Height is set to zero."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, width > 0, "Invalid input image width provided. Height is set to zero."); + + // Validate Tensor description + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.dataType == kImageTensorDataTypeFloat32 || tensorDesc.dataType == kImageTensorDataTypeFloat16, "Target tensor description must either be kImageTensorDataTypeFloat32, or kImageTensorDataTypeFloat16. %d was supplied.", tensorDesc.dataType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeRGB8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeRGB8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeBGR8 || tensorDesc.sizes[1] == 3, "Target tensor description expects kImageTensorChannelTypeBGR8, but has %lld channels specified instead of 3.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.channelType != kImageTensorChannelTypeGRAY8 || tensorDesc.sizes[1] == 1, "Target tensor description expects kImageTensorChannelTypeGRAY8, but has %lld channels specified instead of 1.", tensorDesc.sizes[1]); + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + tensorDesc.channelType == kImageTensorChannelTypeGRAY8 || + tensorDesc.channelType == kImageTensorChannelTypeBGR8 || + tensorDesc.channelType == kImageTensorChannelTypeRGB8, + "Target tensor description expects kImageTensorChannelTypeGRAY8, kImageTensorChannelTypeBGR8, or kImageTensorChannelTypeRGB8 but has %d was specified.", + tensorDesc.channelType); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[2] == (UINT)inputBounds.Height, "Target tensor height (%lld) does not match input height (%d).", tensorDesc.sizes[2], (UINT)inputBounds.Height); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, tensorDesc.sizes[3] == (UINT)inputBounds.Width, "Target tensor width (%lld) does not match input width (%d).", tensorDesc.sizes[3], (UINT)inputBounds.Width); + + // get the byte buffer out of a softwarebitmap + BYTE* pData = nullptr; + UINT32 bufferSize = 0; + winrt::Windows::Graphics::Imaging::BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(winrt::Windows::Graphics::Imaging::BitmapBufferAccessMode::Read)); + winrt::Windows::Foundation::IMemoryBufferReference reference = spBitmapBuffer.CreateReference(); + auto spByteAccess = reference.as(); + WINML_THROW_IF_FAILED(spByteAccess->GetBuffer(&pData, &bufferSize)); + + UINT32 bufferWidth = bufferSize / height; + + ImageTensorChannelType channelType = ImageConversionHelpers::GetChannelTypeFromSoftwareBitmap(softwareBitmap); + + if (tensorDesc.dataType == kImageTensorDataTypeFloat32) { + WINML_THROW_IF_FAILED(CpuTensorizer::TensorizeData(channelType, tensorDesc.channelType, pData, bufferWidth, inputBounds, reinterpret_cast(pCPUTensor))); + } else if (tensorDesc.dataType == kImageTensorDataTypeFloat16) { + WINML_THROW_IF_FAILED(CpuTensorizer::TensorizeData(channelType, tensorDesc.channelType, pData, bufferWidth, inputBounds, reinterpret_cast(pCPUTensor))); + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/ConverterResourceStore.h b/winml/lib/Api.Image/inc/ConverterResourceStore.h new file mode 100644 index 0000000000000..ed3cd10907ec1 --- /dev/null +++ b/winml/lib/Api.Image/inc/ConverterResourceStore.h @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include "VideoFrameToTensorConverter.h" +#include "TensorToVideoFrameConverter.h" + +namespace Windows::AI::MachineLearning { + +// Forward Declare +class ConverterResourceStore; + +struct ConverterResourceDescription { + DWORD pixel_format; + int width; + int height; + LUID luid; + + bool operator==(_In_ ConverterResourceDescription& desc) { + // Converter resources DON'T match if + // 1) the resources have different dimensions + // 2) the resources are on different devices + // 3) the resources have different pixel formats + if (desc.width != width || + desc.height != height || + desc.luid.HighPart != luid.HighPart || + desc.luid.LowPart != luid.LowPart || + desc.pixel_format != pixel_format) { + return false; + } + + return true; + } +}; + +class ConverterResources : public std::enable_shared_from_this { + using Pool = std::weak_ptr; + + public: + template + static std::shared_ptr Create(Pool pool, ConverterResourceDescription& descriptor) { + return std::make_shared(pool, descriptor); + } + + ConverterResources(Pool& pool, ConverterResourceDescription& descriptor); + + void ReturnToCache(); + + public: + ConverterResourceDescription Descriptor; + + std::unique_ptr Tensorizer; + std::unique_ptr Detensorizer; + + private: + Pool m_pool; +}; + +// This class retains tensorization and detensorization +// resources in a store, and evicts the oldest resource object +// when the size of the pool is maxed out. Objects in the pool +// can be reused for caching purposes to enhance performance during +// tensorization. +class ConverterResourceStore : public std::enable_shared_from_this { + struct PoolObject { + std::shared_ptr Resource; + uint64_t StoreId; + }; + + public: + template + static std::shared_ptr Create(TArgs&&... args) { + return std::make_shared(std::forward(args)...); + } + + ConverterResourceStore(size_t nCacheSize); + + std::shared_ptr Fetch(ConverterResourceDescription& descriptor); + void Store(std::shared_ptr object); + + private: + std::shared_ptr FetchAndRemoveObject(ConverterResourceDescription& desc); + void EvictOldestPoolObject(); + + private: + std::vector m_objects; + size_t m_cacheSize; + std::mutex m_mutex; + uint64_t storeId = 0; +}; + +class PoolObjectWrapper { + public: + template + static std::shared_ptr Create(TArgs&&... args) { + return std::make_shared(std::forward(args)...); + } + + explicit PoolObjectWrapper(std::shared_ptr&& resources) : m_resources(resources) { + } + + ~PoolObjectWrapper() { + if (m_resources) { + m_resources->ReturnToCache(); + } + } + + std::shared_ptr Get() { + return m_resources; + } + + private: + std::shared_ptr m_resources; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/D3DDeviceCache.h b/winml/lib/Api.Image/inc/D3DDeviceCache.h new file mode 100644 index 0000000000000..43df3dcadf070 --- /dev/null +++ b/winml/lib/Api.Image/inc/D3DDeviceCache.h @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "pch.h" + +// +// Exception information +// +#ifndef FACILITY_VISUALCPP +#define FACILITY_VISUALCPP ((LONG)0x6d) +#endif + +#define VcppException(sev, err) ((sev) | (FACILITY_VISUALCPP << 16) | err) + +namespace winrt::Windows::AI::MachineLearning::implementation { +enum class PipelineStateCacheType : unsigned char { + kFloat32 = 0, + kFloat16 = 1, + kCount = 2 +}; + +enum class PipelineStateCacheFormat : unsigned char { + kRGB8 = 0, + kBGR8 = 1, + kGRAY8 = 2, + kCount = 3 +}; + +enum class PipelineStateCacheOperation : unsigned char { + kTensorize = 0, + kDetensorize = 1, + kCount = 2 +}; + +class D3DDeviceCache { + public: + ~D3DDeviceCache(); + D3DDeviceCache(Windows::AI::MachineLearning::LearningModelDeviceKind const& device_kind); + D3DDeviceCache(Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice const& device); + D3DDeviceCache(ID3D12CommandQueue* queue); + + ID3D11Device* GetD3D11Device(); + ID3D11DeviceContext4* GetD3D11DeviceContext(); + + ID3D12Device1* GetD3D12Device() { return device_.get(); } + ID3D12CommandQueue* GetCommandQueue() { return command_queue_.get(); } + + Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice GetWinrtDevice(); + + ID3D12RootSignature* GetTensorizeRootSignature(); + ID3D12RootSignature* GetDetensorizeRootSignature(); + ID3D12PipelineState* GetCachedPipelineState(PipelineStateCacheType type, PipelineStateCacheFormat format_from, PipelineStateCacheFormat format_to, PipelineStateCacheOperation operation); + + ID3D12Resource* GetDetensorizeVertexBuffer(_Out_ UINT* vertex_buffer_size); + + HANDLE GetConverterFenceHandle(); + + const GUID& GetFenceGuid() const; + + void GPUSyncD3D11ToD3D12(); + void GPUSyncD3D12ToD3D11(); + void SyncD3D12ToCPU(); + + void SyncConverterToD3D11Device(_In_ ID3D11Fence* d3d11_fence_); + void SyncD3D11DeviceToConverter(_In_ ID3D11Fence* d3d11_fence_); + + UINT64 QueueFenceToD3D12(); + void WaitForFenceValue(UINT64 fence_value); + + const LUID& GetDeviceLuid() { return device_luid_; }; + + bool IsFloat16Supported(); + bool SharedHandleInitialized(); + + private: + void EnsureD3D11FromD3D12(); + void EnsureD3D12Fence(); + void EnsureSharedFences(); + void InitializeCommandQueue(ID3D12Device1* device); + + ID3D12PipelineState* CreateTensorizePipelineState(PipelineStateCacheType type, PipelineStateCacheFormat format_from, PipelineStateCacheFormat format_to); + ID3D12PipelineState* CreateDetensorizePipelineState(PipelineStateCacheType type, PipelineStateCacheFormat format_from, PipelineStateCacheFormat format_to); + + com_ptr device_; + com_ptr command_queue_; + com_ptr sharing_contract_; + + com_ptr device_11_; + Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice winrt_device_; + com_ptr device_context11_; + + com_ptr tensorize_root_signature_; + com_ptr detensorize_root_signature_; + + com_ptr cached_pipeline_state[PipelineStateCacheType::kCount][PipelineStateCacheFormat::kCount][PipelineStateCacheFormat::kCount][PipelineStateCacheOperation::kCount]; + + com_ptr detensorize_vertex_buffer_; + + com_ptr d3d11_fence_; + com_ptr d3d12_fence_; + std::atomic fence_value_ = 1; + + GUID fence_guid_; + + com_ptr converter_fence_; + wil::unique_handle converter_fence_handle_; + std::atomic converter_fence_value_ = 1; + + LUID device_luid_; + static const UINT sc_vertexBufferSize = sizeof(DirectX::XMFLOAT3) * 4; + + // added a lock when we added delay loading to the device cache. Since parts of + // initialization happen later, we need make it thread safe. + CWinMLLock lock_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/DeviceHelpers.h b/winml/lib/Api.Image/inc/DeviceHelpers.h new file mode 100644 index 0000000000000..f1b82217b020a --- /dev/null +++ b/winml/lib/Api.Image/inc/DeviceHelpers.h @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include + +#if __has_include("dxcore.h") +#define ENABLE_DXCORE 1 +#endif +#ifdef ENABLE_DXCORE +#include +#endif + +namespace DeviceHelpers { +HRESULT CreateD3D11On12Device(ID3D12Device* device12, ID3D11Device** device11); +#ifdef ENABLE_DXCORE +HRESULT GetDXCoreHardwareAdapterWithPreference(DXGI_GPU_PREFERENCE preference, _COM_Outptr_ IDXCoreAdapter** ppAdapter); +#endif +HRESULT GetDXGIHardwareAdapterWithPreference(DXGI_GPU_PREFERENCE preference, _COM_Outptr_ IDXGIAdapter1** adapter); +HRESULT GetGPUPreference(winrt::Windows::AI::MachineLearning::LearningModelDeviceKind deviceKind, DXGI_GPU_PREFERENCE* preference) noexcept; +} // namespace DeviceHelpers diff --git a/winml/lib/Api.Image/inc/ImageConversionHelpers.h b/winml/lib/Api.Image/inc/ImageConversionHelpers.h new file mode 100644 index 0000000000000..a4805664bbb81 --- /dev/null +++ b/winml/lib/Api.Image/inc/ImageConversionHelpers.h @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include "ImageConversionTypes.h" + +namespace Windows::AI::MachineLearning::Internal::ImageConversionHelpers { + // This API that takes a video frame and converts it to a video frame of desired format (DXGI_FORMAT_B8G8R8X8_UNORM/BitmapPixelFormat::Bgra8) and size (after any scale/crop operations). + // This should also cover any DX adapter hop (if needed in a multi GPU scenario) and CPU->GPU / GPU->CPU conversion + void ConvertVideoFrameToVideoFrame( + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ UINT32 output_width, + _In_ UINT32 output_height, + _Inout_ winrt::Windows::Media::VideoFrame& output_video_frame); + + // This helper method uses the input parameters do determine if a conversion is necessary + // A conversion is not necessary if + // 1. input bounds cover the entire input bitmap/surface + // 2. desired output size is equal to input size + // 3. (mapping softwarebitmap to softwarebitmap) OR (mapping from d3dsurface to d3dsurface AND the two surfaces are on the same device) + // 4. the input is already in the desired format (BGRA8/B8G8R8X8UIntNormalized) + bool NeedsVideoFrameConversion( + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ LUID output_luid, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ UINT32 output_width, + _In_ UINT32 output_height); + + bool SoftwareBitmapFormatSupported(const winrt::Windows::Graphics::Imaging::SoftwareBitmap& software_bitmap); + bool DirectXPixelFormatSupported(winrt::Windows::Graphics::DirectX::DirectXPixelFormat format); + bool FormatSupportedForUAV(_In_ ID3D12Device1* device, _In_ DXGI_FORMAT format); + ImageTensorChannelType GetChannelTypeFromSoftwareBitmap(const winrt::Windows::Graphics::Imaging::SoftwareBitmap& software_bitmap); + ImageTensorChannelType GetChannelTypeFromDirect3DSurface(const winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface& direct3D_surface); + winrt::Windows::Graphics::Imaging::BitmapPixelFormat GetBitmapPixelFormatFromChannelType(ImageTensorChannelType channel_type); + winrt::Windows::Graphics::DirectX::DirectXPixelFormat GetDirectXPixelFormatFromDXGIFormat(DXGI_FORMAT dxgi_format); + DXGI_FORMAT GetDXGIFormatFromDirectXPixelFormat(_In_ winrt::Windows::Graphics::DirectX::DirectXPixelFormat directX_pixel_format); + winrt::Windows::Graphics::DirectX::DirectXPixelFormat GetDirectXPixelFormatFromChannelType(_In_ ImageTensorChannelType channel_type); + Microsoft::WRL::ComPtr GetTextureFromDirect3DSurface(const winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface& d3d_surface); + bool TexturesHaveSameDevice(_In_ ID3D11Texture2D* pTexture1, _In_ ID3D11Texture2D* texture2d); + bool TextureIsOnDevice(_In_ ID3D11Texture2D* pTexture, _In_ ID3D11Device* device); + bool VideoFramesHaveSameDimensions(const winrt::Windows::Media::IVideoFrame& video_frame_1, const winrt::Windows::Media::IVideoFrame& video_frame_2); + bool VideoFramesHaveSameDevice(const winrt::Windows::Media::IVideoFrame& video_frame_1, const winrt::Windows::Media::IVideoFrame& video_frame_2); + + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice GetDeviceFromDirect3DSurface( + const winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface& d3dSurface); + + constexpr std::array supportedWinMLFormats = { + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_B8G8R8A8_UNORM, + DXGI_FORMAT_B8G8R8X8_UNORM}; +} // namespace Windows::AI::MachineLearning::Internal::ImageConversionHelpers \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/ImageConversionTypes.h b/winml/lib/Api.Image/inc/ImageConversionTypes.h new file mode 100644 index 0000000000000..667167743b501 --- /dev/null +++ b/winml/lib/Api.Image/inc/ImageConversionTypes.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning::Internal { +const UINT kImageTensorDimensionCountMax = 4; // NCHW format + +enum ImageTensorDataType { + kImageTensorDataTypeFloat32, + kImageTensorDataTypeFloat16, + kImageTensorDataTypeUInt32, + kImageTensorDataTypeUInt16, + kImageTensorDataTypeUInt8, + kImageTensorDataTypeInt32, + kImageTensorDataTypeInt16, + kImageTensorDataTypeInt8, + kImageTensorDataTypeCount +}; + +enum ImageTensorChannelType { + kImageTensorChannelTypeRGB8, + kImageTensorChannelTypeBGR8, + kImageTensorChannelTypeGRAY8, + ImageTensorChannelType_COUNT +}; + +struct ImageTensorDescription { + ImageTensorDataType dataType; + ImageTensorChannelType channelType; + int64_t sizes[kImageTensorDimensionCountMax]; +}; +} // namespace Windows::AI::MachineLearning::Internal \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/ImageConverter.h b/winml/lib/Api.Image/inc/ImageConverter.h new file mode 100644 index 0000000000000..c564c51ce455f --- /dev/null +++ b/winml/lib/Api.Image/inc/ImageConverter.h @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include "WinML_Lock.h" +#include "ImageConversionHelpers.h" + +// Assign a name to the object to aid with debugging. +#if defined(_DEBUG) +inline void SetName(ID3D12Object* object, LPCWSTR name) { + object->SetName(name); +} +inline void SetNameIndexed(ID3D12Object* object, LPCWSTR name, UINT index) { + WCHAR full_name[50]; + if (swprintf_s(full_name, L"%s[%u]", name, index) > 0) { + object->SetName(full_name); + } +} +#else +inline void SetName(ID3D12Object*, LPCWSTR) { +} +inline void SetNameIndexed(ID3D12Object*, LPCWSTR, UINT) { +} +#endif + +// Forward declaration +namespace winrt::Windows::AI::MachineLearning::implementation { +class D3DDeviceCache; +} + +namespace Windows::AI::MachineLearning::Internal { +struct ConstantBufferCS { + UINT height; + UINT width; +}; + +class ImageConverter { + public: + ImageConverter() : converted_video_frame_(nullptr) {} + void ResetAllocator(); + + protected: + // Indices of shader resources in the descriptor heap. + enum DescriptorHeapIndex : UINT32 { + SrvBufferIdx = 0, + UavBufferIdx = SrvBufferIdx + 1, + DescriptorCount = UavBufferIdx + 1 + }; + + Microsoft::WRL::ComPtr command_list_; + Microsoft::WRL::ComPtr command_allocator_; + Microsoft::WRL::ComPtr root_signature_; + Microsoft::WRL::ComPtr pipeline_state_; + Microsoft::WRL::ComPtr descriptor_heap_; + Microsoft::WRL::ComPtr D3D11_cached_texture_; + winrt::Windows::Media::VideoFrame converted_video_frame_; + CWinMLLock lock_; + + void SyncD3D11ToD3D12(_In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, _In_ ID3D11Texture2D* D3D11_texture); + void SyncD3D12ToD3D11(_In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, _In_ ID3D11Texture2D* texture); + void ResetCommandList(_In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache); + Microsoft::WRL::ComPtr FetchOrCreateFenceOnDevice(_In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, _In_ ID3D11Device* D3D11_device); + + Microsoft::WRL::ComPtr CreateTextureFromUnsupportedColorFormat( + const winrt::Windows::Media::IVideoFrame& video_frame, + const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + const winrt::Windows::Graphics::Imaging::BitmapBounds& output_bounds, + winrt::Windows::Graphics::DirectX::DirectXPixelFormat new_format); + + static void CopyTextureIntoTexture( + _In_ ID3D11Texture2D* texture_from, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _Inout_ ID3D11Texture2D* texture_to); +}; +} // namespace Windows::AI::MachineLearning::Internal \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h new file mode 100644 index 0000000000000..319f9a07406c9 --- /dev/null +++ b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ImageConverter.h" +#include "ImageConversionTypes.h" + +namespace Windows::AI::MachineLearning::Internal { +class ITensorToVideoFrameConverter { + public: + virtual void DX12TensorToVideoFrame( + _In_ UINT32 batch_index, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ ID3D12Resource* input_tensor, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ winrt::Windows::Media::VideoFrame& destination_video_frame) = 0; + + virtual void SoftwareTensorToVideoFrame( + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ BYTE* CPU_tensor_to_convert, + _In_ ImageTensorDescription tensor_description, + _Inout_ winrt::Windows::Media::VideoFrame& destination_video_frame) = 0; +}; + +class TensorToVideoFrameConverter : ITensorToVideoFrameConverter, public ImageConverter { + public: + TensorToVideoFrameConverter() : shared_handle_(nullptr) {} + + // Function takes in a tensor DX12 Resource all compute ops should be completed + // converts it to a VideoFrame backed by either a SoftwareBitmap or D3DSurface + void DX12TensorToVideoFrame( + _In_ UINT32 batch_index, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ ID3D12Resource* input_tensor, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ winrt::Windows::Media::VideoFrame& destination_video_frame); + + // Function takes in a byte pointer to a CPUTensor + // converts it to VideoFrame backed by either a SoftwareBitmap or D3DSurface, + void SoftwareTensorToVideoFrame( + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ BYTE* CPU_tensor_to_convert, + _In_ ImageTensorDescription tensor_description, + _Inout_ winrt::Windows::Media::VideoFrame& destination_video_frame); + + private: + GUID _d3d11TextureGUID = {0x14bf1054, 0x6ce7, 0x4c00, {0xa1, 0x32, 0xb0, 0xf2, 0x11, 0x5D, 0xE0, 0x7f}}; // {14BF1054-6CE7-4C00-A132-B0F2115DE07F} + GUID _handleGUID = {0x700148fc, 0xc0cb, 0x4a7e, {0xa7, 0xc0, 0xe7, 0x43, 0xc1, 0x9, 0x9d, 0x62}}; + ; // {700148FC-C0CB-4A7E-A7C0-E743C1099D62} + Microsoft::WRL::ComPtr readback_heap_; + Microsoft::WRL::ComPtr output_resource_; + Microsoft::WRL::ComPtr UAV_resource_; + HANDLE shared_handle_; + + Microsoft::WRL::ComPtr ShareD3D12Texture(ID3D12Resource* pResource, ID3D11Device* pDevice); + + void ConvertGPUTensorToSoftwareBitmap( + _In_ UINT32 batch_index, + _In_ ID3D12Resource* input_tensor, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ winrt::Windows::Graphics::Imaging::SoftwareBitmap& software_bitmap); + + void ConvertGPUTensorToDX12Texture( + _In_ UINT32 batch_index, + _In_ ID3D12Resource* input_resource, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ ID3D12Resource* output_resource); + + void ConvertDX12TensorToUnsupportedVideoFrameFormat( + _In_ UINT32 batch_index, + _In_ ID3D12Resource* input_tensor, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ winrt::Windows::Media::VideoFrame& unsupported_video_frame); + + static D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor( + const UINT32 batch_index, + const D3D12_RESOURCE_DESC& resource_description, + const ImageTensorDescription& description); + + static void ConvertCPUTensorToSoftwareBitmap( + _In_ void* CPU_tensor, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ winrt::Windows::Graphics::Imaging::SoftwareBitmap& software_bitmap); + + static Microsoft::WRL::ComPtr CreateShareableD3D12Texture( + const D3D11_TEXTURE2D_DESC& d3d11Desc, + ID3D12Device* d3d12Device); +}; +} // namespace Windows::AI::MachineLearning::Internal \ No newline at end of file diff --git a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h new file mode 100644 index 0000000000000..82b2cfc0b6e80 --- /dev/null +++ b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ImageConverter.h" +#include "ImageConversionHelpers.h" +#include "ImageConversionTypes.h" + +namespace Windows::AI::MachineLearning::Internal { +class IVideoFrameToTensorConverter { + public: + virtual void VideoFrameToDX12Tensor( + _In_ const UINT32 batch_index, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ ID3D12Resource* output_tensor) = 0; + + virtual void VideoFrameToSoftwareTensor( + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ const ImageTensorDescription& tensor_description, + _Out_ BYTE* output_CPU_tensor) = 0; +}; + +class VideoFrameToTensorConverter : IVideoFrameToTensorConverter, public ImageConverter { + public: + VideoFrameToTensorConverter() : shared_handle_(nullptr) {} + + // Function takes in a VideoFrame backed by either a SoftwareBitmap or D3DSurface, + // and converts to a tensor DX12 Resource. + // CommandQueue and commandlist should be a compute resource, + // commandlist will be passed in open, closed and executing when function exits + // User should pass in a BitmapBounds describing the region of interest, in the form of + // {upperleft X, upperleft Y, width, height} to be turned into a tensor. + // If the region of interest is the entire VideoFrame, the input BitmapBounds should describe the entire image. + void VideoFrameToDX12Tensor( + _In_ const UINT32 batch_index, + _In_ winrt::Windows::AI::MachineLearning::LearningModelSession& session, + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ ID3D12Resource* output_tensor); + + // Function takes in a VideoFrame backed by either a SoftwareBitmap or D3DSurface, + // and converts to a tensor returned in a buffer. + // User should pass in a BitmapBounds describing the region of interest, in the form of + // {upperleft X, upperleft Y, width, height} to be turned into a tensor. + // If the region of interest is the entire VideoFrame, the input BitmapBounds should describe the entire image. + void VideoFrameToSoftwareTensor( + _In_ const winrt::Windows::Media::IVideoFrame& input_video_frame, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ const ImageTensorDescription& tensor_description, + _Out_ BYTE* output_CPU_tensor); + + private: + GUID d3d11_texture_GUID_ = {0x485e4bb3, 0x3fe8, 0x497b, {0x85, 0x9e, 0xc7, 0x5, 0x18, 0xdb, 0x11, 0x2a}}; // {485E4BB3-3FE8-497B-859E-C70518DB112A} + GUID handle_GUID_ = {0xce43264e, 0x41f7, 0x4882, {0x9e, 0x20, 0xfa, 0xa5, 0x1e, 0x37, 0x64, 0xfc}}; + ; // CE43264E-41F7-4882-9E20-FAA51E3764FC + Microsoft::WRL::ComPtr upload_heap_; + Microsoft::WRL::ComPtr input_D3D12_resource_; + HANDLE shared_handle_; + + Microsoft::WRL::ComPtr ShareD3D11Texture(ID3D11Texture2D* pTexture, ID3D12Device* pDevice); + + void ConvertSoftwareBitmapToGPUTensor( + _In_ const UINT32 batch_index, + _In_ const winrt::Windows::Media::IVideoFrame& videoFrame, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ ID3D12Resource* pOutputResource); + + void ConvertDX12TextureToGPUTensor( + _In_ const UINT32 batch_index, + _In_ ID3D12Resource* pInputResource, + _In_ winrt::Windows::AI::MachineLearning::implementation::D3DDeviceCache& device_cache, + _In_ const ImageTensorDescription& tensor_description, + _Inout_ ID3D12Resource* output_resource); + + static D3D12_UNORDERED_ACCESS_VIEW_DESC CreateUAVDescription( + const UINT32 batch_index, + const D3D12_RESOURCE_DESC& resource_description, + const ImageTensorDescription& description); + + static void VideoFrameToTensorConverter::ConvertSoftwareBitmapToCPUTensor( + _In_ const winrt::Windows::Graphics::Imaging::SoftwareBitmap& software_bitmap, + _In_ const ImageTensorDescription& tensor_description, + _In_ const winrt::Windows::Graphics::Imaging::BitmapBounds& input_bounds, + _Inout_ void* CPU_tensor); +}; +} // namespace Windows::AI::MachineLearning::Internal diff --git a/winml/lib/Api.Image/pch.h b/winml/lib/Api.Image/pch.h new file mode 100644 index 0000000000000..014684b0c92f7 --- /dev/null +++ b/winml/lib/Api.Image/pch.h @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "winrt_headers.h" +#include "dx.h" + +#include diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorBGR8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorBGR8.h new file mode 100644 index 0000000000000..4251cf7a922e3 --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorBGR8.h @@ -0,0 +1,295 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for output +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV struct r/w u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_structured u0, 4 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.x, vThreadID.xyzz, t0.xyzw + mul r0.x, r0.x, l(255.000000) + max r0.x, r0.x, l(0.000000) + min r0.x, r0.x, l(255.000000) + imul null, r0.y, cb0[0].x, cb0[0].y + imad r0.z, cb0[0].y, vThreadID.y, vThreadID.x + store_structured u0.x, r0.z, l(0), r0.x + imad r0.w, cb0[0].y, cb0[0].x, r0.z + store_structured u0.x, r0.w, l(0), r0.x + ishl r0.y, r0.y, l(1) + iadd r0.y, r0.y, r0.z + store_structured u0.x, r0.y, l(0), r0.x +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceGRAY8ToTensorBGR8[] = +{ + 68, 88, 66, 67, 55, 48, + 7, 135, 98, 70, 201, 11, + 195, 119, 84, 23, 189, 27, + 235, 75, 1, 0, 0, 0, + 36, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 136, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 6, 0, 0, 0, 6, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 162, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 52, 2, + 0, 0, 80, 0, 5, 0, + 141, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 85, 85, 0, 0, 158, 0, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 95, 0, 0, 2, + 114, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 136, 194, 0, + 0, 128, 67, 85, 21, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 70, 10, 2, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 127, 67, + 52, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 51, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 127, 67, 38, 0, 0, 10, + 0, 208, 0, 0, 34, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 35, 0, 0, 8, 66, 0, + 16, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 168, 0, 0, 9, + 18, 224, 17, 0, 0, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 35, 0, 0, 11, + 130, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 168, 0, + 0, 9, 18, 224, 17, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 168, 0, 0, 9, 18, 224, + 17, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 17, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 5, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorGRAY8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorGRAY8.h new file mode 100644 index 0000000000000..4373501c37a45 --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceGRAY8ToTensorGRAY8.h @@ -0,0 +1,253 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for output +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV struct r/w u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_structured u0, 4 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.x, vThreadID.xyzz, t0.xyzw + mul r0.x, r0.x, l(255.000000) + max r0.x, r0.x, l(0.000000) + min r0.x, r0.x, l(255.000000) + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + store_structured u0.x, r0.y, l(0), r0.x +endif +ret +// Approximately 11 instruction slots used +#endif + +const BYTE g_csSurfaceGRAY8ToTensorGRAY8[] = +{ + 68, 88, 66, 67, 78, 147, + 14, 124, 27, 161, 182, 134, + 246, 110, 9, 72, 216, 238, + 85, 150, 1, 0, 0, 0, + 80, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 180, 3, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 6, 0, 0, 0, 6, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 162, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 96, 1, + 0, 0, 80, 0, 5, 0, + 88, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 85, 85, 0, 0, 158, 0, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 95, 0, 0, 2, + 114, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 136, 194, 0, + 0, 128, 67, 85, 21, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 70, 10, 2, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 127, 67, + 52, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 51, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 127, 67, 35, 0, 0, 8, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 2, 0, + 10, 0, 2, 0, 168, 0, + 0, 9, 18, 224, 17, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 11, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorBGR8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorBGR8.h new file mode 100644 index 0000000000000..dbc308bc1462b --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorBGR8.h @@ -0,0 +1,301 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for output +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV struct r/w u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_structured u0, 4 +dcl_input vThreadID.xyz +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + imul null, r0.w, cb0[0].x, cb0[0].y + imad r1.x, cb0[0].y, vThreadID.y, vThreadID.x + store_structured u0.x, r1.x, l(0), r0.z + imad r0.z, cb0[0].y, cb0[0].x, r1.x + store_structured u0.x, r0.z, l(0), r0.y + ishl r0.y, r0.w, l(1) + iadd r0.y, r0.y, r1.x + store_structured u0.x, r0.y, l(0), r0.x +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorBGR8[] = +{ + 68, 88, 66, 67, 55, 254, + 187, 179, 167, 56, 132, 92, + 72, 96, 108, 51, 136, 147, + 160, 98, 1, 0, 0, 0, + 72, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 172, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 6, 0, 0, 0, 6, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 162, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 88, 2, + 0, 0, 80, 0, 5, 0, + 150, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 85, 85, 0, 0, 158, 0, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 95, 0, 0, 2, + 114, 0, 2, 0, 104, 0, + 0, 2, 2, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 136, 194, 0, + 0, 128, 67, 85, 21, 0, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 10, 2, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 52, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 51, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 0, 0, 38, 0, 0, 10, + 0, 208, 0, 0, 130, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 35, 0, 0, 8, 18, 0, + 16, 0, 1, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 168, 0, 0, 9, + 18, 224, 17, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 1, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 35, 0, 0, 11, + 66, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 1, 0, 0, 0, 168, 0, + 0, 9, 18, 224, 17, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 168, 0, 0, 9, 18, 224, + 17, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 17, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 5, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorGRAY8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorGRAY8.h new file mode 100644 index 0000000000000..4975cc2d21143 --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorGRAY8.h @@ -0,0 +1,301 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for output +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV struct r/w u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_structured u0, 4 +dcl_input vThreadID.xyz +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + imul null, r0.w, cb0[0].x, cb0[0].y + imad r1.x, cb0[0].y, vThreadID.y, vThreadID.x + store_structured u0.x, r1.x, l(0), r0.x + imad r0.x, cb0[0].y, cb0[0].x, r1.x + store_structured u0.x, r0.x, l(0), r0.y + ishl r0.x, r0.w, l(1) + iadd r0.x, r0.x, r1.x + store_structured u0.x, r0.x, l(0), r0.z +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorGRAY8[] = +{ + 68, 88, 66, 67, 100, 61, + 235, 140, 127, 209, 29, 201, + 8, 16, 208, 132, 168, 163, + 171, 164, 1, 0, 0, 0, + 72, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 172, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 6, 0, 0, 0, 6, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 162, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 88, 2, + 0, 0, 80, 0, 5, 0, + 150, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 85, 85, 0, 0, 158, 0, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 95, 0, 0, 2, + 114, 0, 2, 0, 104, 0, + 0, 2, 2, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 136, 194, 0, + 0, 128, 67, 85, 21, 0, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 10, 2, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 52, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 51, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 0, 0, 38, 0, 0, 10, + 0, 208, 0, 0, 130, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 35, 0, 0, 8, 18, 0, + 16, 0, 1, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 168, 0, 0, 9, + 18, 224, 17, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 1, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 35, 0, 0, 11, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 1, 0, 0, 0, 168, 0, + 0, 9, 18, 224, 17, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 168, 0, 0, 9, 18, 224, + 17, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 17, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 5, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorRGB8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorRGB8.h new file mode 100644 index 0000000000000..e6752d39d18cd --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor-SurfaceToTensorRGB8.h @@ -0,0 +1,267 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for output +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV struct r/w u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_structured u0, 4 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + dp3 r0.x, r0.xyzx, l(0.212600, 0.715200, 0.072200, 0.000000) + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + store_structured u0.x, r0.y, l(0), r0.x +endif +ret +// Approximately 12 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorRGB8[] = +{ + 68, 88, 66, 67, 84, 48, + 136, 223, 146, 195, 133, 139, + 177, 123, 108, 254, 18, 113, + 175, 71, 1, 0, 0, 0, + 156, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 0, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 6, 0, 0, 0, 6, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 162, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 172, 1, + 0, 0, 80, 0, 5, 0, + 107, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 85, 85, 0, 0, 158, 0, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 95, 0, 0, 2, + 114, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 136, 194, 0, + 0, 128, 67, 85, 21, 0, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 10, 2, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 52, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 51, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 0, 0, 16, 0, 0, 10, + 18, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 208, 179, 89, 62, + 89, 23, 55, 63, 152, 221, + 147, 61, 0, 0, 0, 0, + 35, 0, 0, 8, 34, 0, + 16, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 168, 0, 0, 9, + 18, 224, 17, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 21, 0, 0, 1, + 62, 0, 0, 1, 83, 84, + 65, 84, 148, 0, 0, 0, + 12, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 1, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorBGR8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorBGR8.h new file mode 100644 index 0000000000000..3560ba8e5f7da --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorBGR8.h @@ -0,0 +1,264 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV float buf u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_typed_buffer (float,float,float,float) u0 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.x, vThreadID.xyzz, t0.xyzw + mul r0.x, r0.x, l(255.000000) + max r0.x, r0.x, l(0.000000) + min r0.x, r0.x, l(255.000000) + imul null, r0.y, cb0[0].x, cb0[0].y + imad r0.z, cb0[0].y, vThreadID.y, vThreadID.x + store_uav_typed u0.xyzw, r0.zzzz, r0.xxxx + imad r0.w, cb0[0].y, cb0[0].x, r0.z + store_uav_typed u0.xyzw, r0.wwww, r0.xxxx + ishl r0.y, r0.y, l(1) + iadd r0.y, r0.y, r0.z + store_uav_typed u0.xyzw, r0.yyyy, r0.xxxx +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceGRAY8ToTensorBGR8[] = +{ + 68, 88, 66, 67, 225, 103, + 110, 123, 203, 101, 218, 220, + 167, 66, 151, 157, 183, 68, + 72, 238, 1, 0, 0, 0, + 152, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 252, 3, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 28, 2, 0, 0, + 80, 0, 5, 0, 135, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 24, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 8, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 114, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 136, 194, 0, 0, 128, + 67, 85, 21, 0, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 10, 2, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 127, 67, 52, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 51, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 127, 67, + 38, 0, 0, 10, 0, 208, + 0, 0, 34, 0, 16, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 66, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 166, 10, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 11, 130, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 246, 15, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 17, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorGRAY8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorGRAY8.h new file mode 100644 index 0000000000000..8a5a22b658d6c --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceGRAY8ToTensorGRAY8.h @@ -0,0 +1,226 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV float buf u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_typed_buffer (float,float,float,float) u0 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.x, vThreadID.xyzz, t0.xyzw + mul r0.x, r0.x, l(255.000000) + max r0.x, r0.x, l(0.000000) + min r0.x, r0.x, l(255.000000) + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + store_uav_typed u0.xyzw, r0.yyyy, r0.xxxx +endif +ret +// Approximately 11 instruction slots used +#endif + +const BYTE g_csSurfaceGRAY8ToTensorGRAY8[] = +{ + 68, 88, 66, 67, 46, 235, + 83, 166, 156, 124, 68, 0, + 26, 122, 55, 182, 183, 235, + 141, 75, 1, 0, 0, 0, + 212, 3, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 56, 3, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 88, 1, 0, 0, + 80, 0, 5, 0, 86, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 24, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 8, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 114, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 136, 194, 0, 0, 128, + 67, 85, 21, 0, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 10, 2, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 127, 67, 52, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 51, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 127, 67, + 35, 0, 0, 8, 34, 0, + 16, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 164, 0, 0, 7, + 242, 224, 17, 0, 0, 0, + 0, 0, 86, 5, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 11, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 1, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorBGR8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorBGR8.h new file mode 100644 index 0000000000000..b9a85c7979c41 --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorBGR8.h @@ -0,0 +1,270 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV float buf u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_typed_buffer (float,float,float,float) u0 +dcl_input vThreadID.xyz +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + imul null, r0.w, cb0[0].x, cb0[0].y + imad r1.x, cb0[0].y, vThreadID.y, vThreadID.x + store_uav_typed u0.xyzw, r1.xxxx, r0.zzzz + imad r0.z, cb0[0].y, cb0[0].x, r1.x + store_uav_typed u0.xyzw, r0.zzzz, r0.yyyy + ishl r0.y, r0.w, l(1) + iadd r0.y, r0.y, r1.x + store_uav_typed u0.xyzw, r0.yyyy, r0.xxxx +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorBGR8[] = +{ + 68, 88, 66, 67, 211, 132, + 11, 28, 113, 87, 48, 10, + 65, 230, 140, 169, 5, 175, + 215, 48, 1, 0, 0, 0, + 188, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 2, 0, 0, + 80, 0, 5, 0, 144, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 24, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 8, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 114, 0, + 2, 0, 104, 0, 0, 2, + 2, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 136, 194, 0, 0, 128, + 67, 85, 21, 0, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 10, 2, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 0, 0, 52, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 51, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 130, 0, 16, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 18, 0, 16, 0, + 1, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 1, 0, + 0, 0, 166, 10, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 11, 66, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 166, 10, 16, 0, 0, 0, + 0, 0, 86, 5, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 17, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorGRAY8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorGRAY8.h new file mode 100644 index 0000000000000..1a9f0c7c27dc7 --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorGRAY8.h @@ -0,0 +1,270 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV float buf u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_typed_buffer (float,float,float,float) u0 +dcl_input vThreadID.xyz +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + imul null, r0.w, cb0[0].x, cb0[0].y + imad r1.x, cb0[0].y, vThreadID.y, vThreadID.x + store_uav_typed u0.xyzw, r1.xxxx, r0.xxxx + imad r0.x, cb0[0].y, cb0[0].x, r1.x + store_uav_typed u0.xyzw, r0.xxxx, r0.yyyy + ishl r0.x, r0.w, l(1) + iadd r0.x, r0.x, r1.x + store_uav_typed u0.xyzw, r0.xxxx, r0.zzzz +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorGRAY8[] = +{ + 68, 88, 66, 67, 49, 119, + 183, 203, 67, 217, 51, 132, + 38, 109, 234, 182, 6, 203, + 245, 189, 1, 0, 0, 0, + 188, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 2, 0, 0, + 80, 0, 5, 0, 144, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 24, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 8, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 114, 0, + 2, 0, 104, 0, 0, 2, + 2, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 136, 194, 0, 0, 128, + 67, 85, 21, 0, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 10, 2, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 0, 0, 52, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 51, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 130, 0, 16, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 18, 0, 16, 0, + 1, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 1, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 11, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 86, 5, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 166, 10, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 17, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorRGB8.h b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorRGB8.h new file mode 100644 index 0000000000000..55250830b414d --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensor16-SurfaceToTensorRGB8.h @@ -0,0 +1,239 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float4 2d t0 1 +// output UAV float buf u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_texture2d (float,float,float,float) t0 +dcl_uav_typed_buffer (float,float,float,float) u0 +dcl_input vThreadID.xyz +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + ld_indexable(texture2d)(float,float,float,float) r0.xyz, vThreadID.xyzz, t0.xyzw + mul r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + max r0.xyz, r0.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) + min r0.xyz, r0.xyzx, l(255.000000, 255.000000, 255.000000, 0.000000) + dp3 r0.x, r0.xyzx, l(0.212600, 0.715200, 0.072200, 0.000000) + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + store_uav_typed u0.xyzw, r0.yyyy, r0.xxxx +endif +ret +// Approximately 12 instruction slots used +#endif + +const BYTE g_csSurfaceToTensorRGB8[] = +{ + 68, 88, 66, 67, 158, 93, + 8, 98, 234, 139, 247, 159, + 209, 201, 233, 140, 98, 232, + 105, 146, 1, 0, 0, 0, + 32, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 132, 3, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 13, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 164, 1, 0, 0, + 80, 0, 5, 0, 105, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 24, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 8, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 114, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 136, 194, 0, 0, 128, + 67, 85, 21, 0, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 10, 2, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 127, 67, + 0, 0, 0, 0, 52, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 51, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 127, 67, + 0, 0, 127, 67, 0, 0, + 127, 67, 0, 0, 0, 0, + 16, 0, 0, 10, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 208, 179, 89, 62, 89, 23, + 55, 63, 152, 221, 147, 61, + 0, 0, 0, 0, 35, 0, + 0, 8, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 164, 0, 0, 7, 242, 224, + 17, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 12, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensorFloat.hlsl b/winml/lib/Api.Image/shaders/SurfaceToTensorFloat.hlsl new file mode 100644 index 0000000000000..be21aaeab6bca --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensorFloat.hlsl @@ -0,0 +1,100 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// This shader converts a DX texture (BGRA/BGRX/RGBA/GRAY) into NCHW FLOAT Tensor with channel order RGB/BGR/GRAY +// + +Texture2D input : register(t0); // SRV + +#ifdef FP16 +RWBuffer output : register(u0); // UAV +#else +RWStructuredBuffer output : register(u0); // UAV +#endif + +cbuffer cbCS : register(b0) +{ + uint height; + uint width; +}; + +[numthreads(16, 4, 1)] +void SurfaceToTensorBGR8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 inputPixel = input.Load(globalThreadId); + inputPixel = clamp(inputPixel * 255, 0, 255); + + // Calculate the size of a single plan of color. + uint planeSize = width * height; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + output[threadOffset] = inputPixel.b; + output[threadOffset + planeSize] = inputPixel.g; + output[threadOffset + planeSize * 2] = inputPixel.r; + } +} + +[numthreads(16, 4, 1)] +void SurfaceToTensorGRAY8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 inputPixel = input.Load(globalThreadId); + inputPixel = clamp(inputPixel * 255, 0, 255); + + // Calculate the size of a single plan of color. + uint planeSize = width * height; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + output[threadOffset] = inputPixel.r; + output[threadOffset + planeSize] = inputPixel.g; + output[threadOffset + planeSize * 2] = inputPixel.b; + } +} + +[numthreads(16, 4, 1)] +void SurfaceToTensorRGB8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 inputPixel = input.Load(globalThreadId); + inputPixel = clamp(inputPixel * 255, 0, 255); + + float grayValue = 0.2126 * inputPixel.r + 0.7152 * inputPixel.g + 0.0722 * inputPixel.b; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + output[threadOffset] = grayValue; + } +} + +[numthreads(16, 4, 1)] +void SurfaceGRAY8ToTensorBGR8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 inputPixel = input.Load(globalThreadId); + float gray = clamp(inputPixel.r * 255, 0, 255); + + // Calculate the size of a single plan of color. + uint planeSize = width * height; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + output[threadOffset] = gray; + output[threadOffset + planeSize] = gray; + output[threadOffset + planeSize * 2] = gray; + } +} + +[numthreads(16, 4, 1)] +void SurfaceGRAY8ToTensorGRAY8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 inputPixel = input.Load(globalThreadId); + float gray = clamp(inputPixel.r * 255, 0, 255); + + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + output[threadOffset] = gray; + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/shaders/SurfaceToTensorUint8.hlsl b/winml/lib/Api.Image/shaders/SurfaceToTensorUint8.hlsl new file mode 100644 index 0000000000000..a596e584709db --- /dev/null +++ b/winml/lib/Api.Image/shaders/SurfaceToTensorUint8.hlsl @@ -0,0 +1,29 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// This shader converts a DX texture (BGRA/BGRX) into NCHW UINT8 Tensor with channel order RGB +// Note that this shader requires that width be a multiple of 4 because UAV loads are limited to UINT and it writes 4 UINT8 values per channel at a time +// + +Texture2D input : register(t0); // SRV +RWBuffer output : register(u0); // UAV +cbuffer cbCS : register(b0) +{ + uint g_height; + uint g_width; +}; + +static uint g_blkwdt = g_width / 4; // Blocks per line +static uint g_blkchn = g_height * g_blkwdt; // Blocks per channel + +[numthreads(3, 1, 1)] +void main(uint gi : SV_GroupIndex, uint3 gid : SV_GroupID) +{ + uint outid = gi * g_blkchn + gid.x; + uint inpid_x = (gid.x % g_blkwdt) * 4; + uint inpid_y = gid.x / g_blkwdt; + output[outid] = clamp(uint4(input.Load(uint3(inpid_x + 0, inpid_y, 0)) * 255), 0, 255)[gi]; + output[outid] |= clamp(uint4(input.Load(uint3(inpid_x + 1, inpid_y, 0)) * 255), 0, 255)[gi] << 8; + output[outid] |= clamp(uint4(input.Load(uint3(inpid_x + 2, inpid_y, 0)) * 255), 0, 255)[gi] << 16; + output[outid] |= clamp(uint4(input.Load(uint3(inpid_x + 3, inpid_y, 0)) * 255), 0, 255)[gi] << 24; +} \ No newline at end of file diff --git a/winml/lib/Api.Image/shaders/TensorFloatToSurface.hlsl b/winml/lib/Api.Image/shaders/TensorFloatToSurface.hlsl new file mode 100644 index 0000000000000..051ba64d6f5e7 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorFloatToSurface.hlsl @@ -0,0 +1,124 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// This shader converts an NCHW FLOAT Tensor (BGR/RGB/GRAY) into a DX texture with channel order BGRA/BGRX/RGBA/GRAY +// + +#ifdef FP16 +Buffer input : register(t0); // SRV +#else +StructuredBuffer input : register(t0); // SRV +#endif + +RWTexture2D output : register(u0); // UAV + +cbuffer cbCS : register(b0) +{ + uint height; + uint width; +}; + +[numthreads(16, 4, 1)] +void TensorBGR8ToSurface(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint blockSize = height * width; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + pixel.b = input[threadOffset] / 255.0; + pixel.g = input[threadOffset + blockSize] / 255.0; + pixel.r = input[threadOffset + blockSize * 2] / 255.0; + pixel.a = 1.0f; + + output[globalThreadId.xy] = pixel; + } +} + +[numthreads(16, 4, 1)] +void TensorRGB8ToSurface(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint blockSize = height * width; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + pixel.r = input[threadOffset] / 255.0; + pixel.g = input[threadOffset + blockSize] / 255.0; + pixel.b = input[threadOffset + blockSize * 2] / 255.0; + pixel.a = 1.0f; + + output[globalThreadId.xy] = pixel; + } +} + +[numthreads(16, 4, 1)] +void TensorGRAY8ToSurface(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + pixel.b = input[threadOffset] / 255.0; + pixel.g = pixel.b; + pixel.r = pixel.b; + pixel.a = 1.0; + + output[globalThreadId.xy] = pixel; + } +} + +[numthreads(16, 4, 1)] +void TensorBGR8ToSurfaceGRAY8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint blockSize = height * width; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + pixel.b = input[threadOffset] / 255.0; + pixel.g = input[threadOffset + blockSize] / 255.0; + pixel.r = input[threadOffset + blockSize * 2] / 255.0; + + float grayValue = 0.2126 * pixel.r + 0.7152 * pixel.g + 0.0722 * pixel.b; + + output[globalThreadId.xy] = float4(grayValue, 0.0, 0.0, 0.0); + } +} + +[numthreads(16, 4, 1)] +void TensorRGB8ToSurfaceGRAY8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint blockSize = height * width; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + pixel.r = input[threadOffset] / 255.0; + pixel.g = input[threadOffset + blockSize] / 255.0; + pixel.b = input[threadOffset + blockSize * 2] / 255.0; + + float grayValue = 0.2126 * pixel.r + 0.7152 * pixel.g + 0.0722 * pixel.b; + + output[globalThreadId.xy] = float4(grayValue, 0.0, 0.0, 0.0); + } +} + +[numthreads(16, 4, 1)] +void TensorGRAY8ToSurfaceGRAY8(uint3 globalThreadId : SV_DispatchThreadId) +{ + if (globalThreadId.x < width && globalThreadId.y < height) + { + float4 pixel; + uint threadOffset = width * globalThreadId.y + globalThreadId.x; + + float grayValue = input[threadOffset] / 255.0; + + output[globalThreadId.xy] = float4(grayValue, 0.0, 0.0, 0.0); + } +} \ No newline at end of file diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurface.h new file mode 100644 index 0000000000000..96741a8f0b7fe --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurface.h @@ -0,0 +1,298 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.y, l(0), t0.xxxx + mul r1.z, r0.z, l(0.003922) + imad r0.z, cb0[0].x, cb0[0].y, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.z, l(0), t0.xxxx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r1.xy, r0.xzxx, l(0.003922, 0.003922, 0.000000, 0.000000) + mov r1.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r1.xyzw +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csTensorBGR8ToSurface[] = +{ + 68, 88, 66, 67, 16, 222, + 20, 91, 4, 241, 52, 186, + 135, 184, 82, 120, 17, 26, + 206, 230, 1, 0, 0, 0, + 56, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 156, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 72, 2, + 0, 0, 80, 0, 5, 0, + 146, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 2, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 66, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 66, 0, 16, 0, 1, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 129, 128, 128, 59, + 35, 0, 0, 11, 66, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 66, 0, 16, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 50, 0, 16, 0, 1, 0, + 0, 0, 134, 0, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 129, 128, 128, 59, + 129, 128, 128, 59, 0, 0, + 0, 0, 0, 0, 0, 0, + 54, 0, 0, 5, 130, 0, + 16, 0, 1, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 128, 63, 164, 0, 0, 6, + 242, 224, 17, 0, 0, 0, + 0, 0, 70, 5, 2, 0, + 70, 14, 16, 0, 1, 0, + 0, 0, 21, 0, 0, 1, + 62, 0, 0, 1, 83, 84, + 65, 84, 148, 0, 0, 0, + 17, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..3bbb054a1d874 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorBGR8ToSurfaceGRAY8.h @@ -0,0 +1,306 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.y, l(0), t0.xxxx + imad r0.w, cb0[0].x, cb0[0].y, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.w, r0.w, l(0), t0.xxxx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r0.y, r0.w, l(0.002805) + mad r0.x, r0.x, l(0.000834), r0.y + mad r0.x, r0.z, l(0.000283), r0.x + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorBGR8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 35, 158, + 57, 119, 57, 98, 228, 192, + 147, 231, 36, 220, 94, 230, + 165, 6, 1, 0, 0, 0, + 100, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 200, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 116, 2, + 0, 0, 80, 0, 5, 0, + 157, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 66, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 35, 0, 0, 11, + 130, 0, 16, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 167, 0, + 0, 139, 2, 35, 0, 128, + 131, 153, 25, 0, 130, 0, + 16, 0, 0, 0, 0, 0, + 58, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 6, 112, + 16, 0, 0, 0, 0, 0, + 41, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 1, 0, 0, 0, 30, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 41, 207, + 55, 59, 50, 0, 0, 9, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 95, 142, 90, 58, + 26, 0, 16, 0, 0, 0, + 0, 0, 50, 0, 0, 9, + 18, 0, 16, 0, 0, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 11, 114, 148, 57, + 10, 0, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 8, + 226, 0, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 18, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurface.h new file mode 100644 index 0000000000000..7def54b24a900 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurface.h @@ -0,0 +1,248 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imad r0.x, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r0.xyz, r0.xxxx, l(0.003922, 0.003922, 0.003922, 0.000000) + mov r0.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 10 instruction slots used +#endif + +const BYTE g_csTensorGRAY8ToSurface[] = +{ + 68, 88, 66, 67, 70, 127, + 157, 229, 16, 115, 132, 98, + 196, 130, 138, 103, 5, 24, + 67, 111, 1, 0, 0, 0, + 56, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 156, 3, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 72, 1, + 0, 0, 80, 0, 5, 0, + 82, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 35, 0, 0, 8, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 129, 128, + 128, 59, 129, 128, 128, 59, + 129, 128, 128, 59, 0, 0, + 0, 0, 54, 0, 0, 5, + 130, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 128, 63, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 10, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..479708862bdd8 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorGRAY8ToSurfaceGRAY8.h @@ -0,0 +1,248 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imad r0.x, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r0.x, r0.x, l(0.003922) + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 10 instruction slots used +#endif + +const BYTE g_csTensorGRAY8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 208, 27, + 239, 96, 118, 131, 82, 177, + 76, 190, 191, 4, 11, 43, + 22, 92, 1, 0, 0, 0, + 56, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 156, 3, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 72, 1, + 0, 0, 80, 0, 5, 0, + 82, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 35, 0, 0, 8, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 2, 0, 10, 0, + 2, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 54, 0, 0, 8, + 226, 0, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 10, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurface.h new file mode 100644 index 0000000000000..e66942a6ca94e --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurface.h @@ -0,0 +1,298 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.y, l(0), t0.xxxx + mul r1.x, r0.z, l(0.003922) + imad r0.z, cb0[0].x, cb0[0].y, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.z, l(0), t0.xxxx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r1.yz, r0.zzxz, l(0.000000, 0.003922, 0.003922, 0.000000) + mov r1.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r1.xyzw +endif +ret +// Approximately 17 instruction slots used +#endif + +const BYTE g_csTensorRGB8ToSurface[] = +{ + 68, 88, 66, 67, 171, 63, + 173, 152, 78, 65, 227, 162, + 129, 136, 254, 15, 52, 197, + 13, 81, 1, 0, 0, 0, + 56, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 156, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 72, 2, + 0, 0, 80, 0, 5, 0, + 146, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 2, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 66, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 18, 0, 16, 0, 1, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 129, 128, 128, 59, + 35, 0, 0, 11, 66, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 128, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 66, 0, 16, 0, + 0, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 41, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 1, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 98, 0, 16, 0, 1, 0, + 0, 0, 166, 8, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 0, 0, + 129, 128, 128, 59, 129, 128, + 128, 59, 0, 0, 0, 0, + 54, 0, 0, 5, 130, 0, + 16, 0, 1, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 128, 63, 164, 0, 0, 6, + 242, 224, 17, 0, 0, 0, + 0, 0, 70, 5, 2, 0, + 70, 14, 16, 0, 1, 0, + 0, 0, 21, 0, 0, 1, + 62, 0, 0, 1, 83, 84, + 65, 84, 148, 0, 0, 0, + 17, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..e6c703636855e --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface-TensorRGB8ToSurfaceGRAY8.h @@ -0,0 +1,306 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// Resource bind info for input +// { +// +// float $Element; // Offset: 0 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture struct r/o t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_structured t0, 4 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.z, r0.y, l(0), t0.xxxx + imad r0.w, cb0[0].x, cb0[0].y, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.w, r0.w, l(0), t0.xxxx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.x, r0.x, l(0), t0.xxxx + mul r0.y, r0.w, l(0.002805) + mad r0.y, r0.z, l(0.000834), r0.y + mad r0.x, r0.x, l(0.000283), r0.y + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorRGB8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 203, 21, + 43, 103, 143, 172, 251, 111, + 63, 74, 141, 225, 46, 231, + 143, 14, 1, 0, 0, 0, + 100, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 44, 2, 0, 0, 60, 2, + 0, 0, 76, 2, 0, 0, + 200, 4, 0, 0, 82, 68, + 69, 70, 240, 1, 0, 0, + 2, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 200, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, + 1, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 224, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 156, 0, 0, 0, + 1, 0, 0, 0, 108, 1, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 48, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 64, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 100, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 64, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 171, 171, 148, 1, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 164, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 36, 69, 108, 101, + 109, 101, 110, 116, 0, 102, + 108, 111, 97, 116, 0, 171, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 157, 1, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 49, + 48, 46, 49, 0, 73, 83, + 71, 78, 8, 0, 0, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 79, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 83, 72, 69, 88, 116, 2, + 0, 0, 80, 0, 5, 0, + 157, 0, 0, 0, 106, 8, + 0, 1, 89, 0, 0, 4, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 162, 0, 0, 4, 0, 112, + 16, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 156, 24, + 0, 4, 0, 224, 17, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 95, 0, 0, 2, + 50, 0, 2, 0, 104, 0, + 0, 2, 1, 0, 0, 0, + 155, 0, 0, 4, 16, 0, + 0, 0, 4, 0, 0, 0, + 1, 0, 0, 0, 79, 0, + 0, 7, 50, 0, 16, 0, + 0, 0, 0, 0, 70, 0, + 2, 0, 22, 133, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 31, 0, 4, 3, 10, 0, + 16, 0, 0, 0, 0, 0, + 38, 0, 0, 10, 0, 208, + 0, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 167, 0, 0, 139, 2, 35, + 0, 128, 131, 153, 25, 0, + 66, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 6, 112, 16, 0, 0, 0, + 0, 0, 35, 0, 0, 11, + 130, 0, 16, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 167, 0, + 0, 139, 2, 35, 0, 128, + 131, 153, 25, 0, 130, 0, + 16, 0, 0, 0, 0, 0, + 58, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 0, 0, 6, 112, + 16, 0, 0, 0, 0, 0, + 41, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 1, 0, 0, 0, 30, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 167, 0, 0, 139, + 2, 35, 0, 128, 131, 153, + 25, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 0, 0, 6, 112, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 41, 207, + 55, 59, 50, 0, 0, 9, + 34, 0, 16, 0, 0, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 95, 142, 90, 58, + 26, 0, 16, 0, 0, 0, + 0, 0, 50, 0, 0, 9, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 11, 114, 148, 57, + 26, 0, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 8, + 226, 0, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 0, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 18, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurface.h new file mode 100644 index 0000000000000..c41c65ca1f6e4 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurface.h @@ -0,0 +1,271 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.z, r0.yyyy, t0.yzxw + mul r1.z, r0.z, l(0.003922) + imad r0.z, cb0[0].x, cb0[0].y, r0.y + ld_indexable(buffer)(float,float,float,float) r0.z, r0.zzzz, t0.yzxw + mul r1.y, r0.z, l(0.003922) + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r1.x, r0.x, l(0.003922) + mov r1.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r1.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorBGR8ToSurface[] = +{ + 68, 88, 66, 67, 92, 56, + 223, 206, 81, 198, 197, 60, + 101, 209, 115, 18, 127, 24, + 223, 226, 1, 0, 0, 0, + 188, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 2, 0, 0, + 80, 0, 5, 0, 144, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 2, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 38, 0, + 0, 10, 0, 208, 0, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 35, 0, 0, 8, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 2, 0, + 10, 0, 2, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 66, 0, 16, 0, + 1, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 35, 0, 0, 11, + 66, 0, 16, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 166, 10, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 34, 0, 16, 0, + 1, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 41, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 1, 0, 0, 0, + 30, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 18, 0, + 16, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 126, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 18, 0, 16, 0, + 1, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 54, 0, 0, 5, + 130, 0, 16, 0, 1, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 128, 63, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 1, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 18, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..dee534dbd2aa8 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorBGR8ToSurfaceGRAY8.h @@ -0,0 +1,276 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.z, r0.yyyy, t0.yzxw + imad r0.w, cb0[0].x, cb0[0].y, r0.y + ld_indexable(buffer)(float,float,float,float) r0.w, r0.wwww, t0.yzwx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r0.y, r0.w, l(0.002805) + mad r0.x, r0.x, l(0.000834), r0.y + mad r0.x, r0.z, l(0.000283), r0.x + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorBGR8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 26, 168, + 219, 87, 158, 42, 221, 180, + 11, 12, 65, 116, 205, 193, + 101, 86, 1, 0, 0, 0, + 216, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 60, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 92, 2, 0, 0, + 80, 0, 5, 0, 151, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 38, 0, + 0, 10, 0, 208, 0, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 35, 0, 0, 8, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 2, 0, + 10, 0, 2, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 11, 130, 0, 16, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 137, 66, 0, + 0, 128, 67, 85, 21, 0, + 130, 0, 16, 0, 0, 0, + 0, 0, 246, 15, 16, 0, + 0, 0, 0, 0, 150, 115, + 16, 0, 0, 0, 0, 0, + 41, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 1, 0, 0, 0, 30, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 45, 0, 0, 137, + 66, 0, 0, 128, 67, 85, + 21, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 41, 207, 55, 59, + 50, 0, 0, 9, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 95, 142, 90, 58, 26, 0, + 16, 0, 0, 0, 0, 0, + 50, 0, 0, 9, 18, 0, + 16, 0, 0, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 11, 114, 148, 57, 10, 0, + 16, 0, 0, 0, 0, 0, + 54, 0, 0, 8, 226, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 164, 0, 0, 6, + 242, 224, 17, 0, 0, 0, + 0, 0, 70, 5, 2, 0, + 70, 14, 16, 0, 0, 0, + 0, 0, 21, 0, 0, 1, + 62, 0, 0, 1, 83, 84, + 65, 84, 148, 0, 0, 0, + 18, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 3, 0, + 0, 0, 5, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurface.h new file mode 100644 index 0000000000000..95fcb1588b145 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurface.h @@ -0,0 +1,221 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imad r0.x, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r0.xyz, r0.xxxx, l(0.003922, 0.003922, 0.003922, 0.000000) + mov r0.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 10 instruction slots used +#endif + +const BYTE g_csTensorGRAY8ToSurface[] = +{ + 68, 88, 66, 67, 95, 57, + 17, 120, 240, 31, 90, 53, + 152, 219, 64, 79, 192, 196, + 70, 234, 1, 0, 0, 0, + 188, 3, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 3, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 1, 0, 0, + 80, 0, 5, 0, 80, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 45, 0, 0, 137, 66, 0, + 0, 128, 67, 85, 21, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 129, 128, 128, 59, 129, 128, + 128, 59, 129, 128, 128, 59, + 0, 0, 0, 0, 54, 0, + 0, 5, 130, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 128, 63, + 164, 0, 0, 6, 242, 224, + 17, 0, 0, 0, 0, 0, + 70, 5, 2, 0, 70, 14, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 10, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..6f30e6cf94ea8 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorGRAY8ToSurfaceGRAY8.h @@ -0,0 +1,221 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imad r0.x, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r0.x, r0.x, l(0.003922) + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 10 instruction slots used +#endif + +const BYTE g_csTensorGRAY8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 79, 211, + 56, 192, 105, 49, 155, 168, + 69, 205, 37, 0, 34, 75, + 7, 230, 1, 0, 0, 0, + 188, 3, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 3, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 1, 0, 0, + 80, 0, 5, 0, 80, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 8, 18, 0, 16, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 2, 0, 10, 0, 2, 0, + 45, 0, 0, 137, 66, 0, + 0, 128, 67, 85, 21, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 129, 128, 128, 59, 54, 0, + 0, 8, 226, 0, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 164, 0, 0, 6, 242, 224, + 17, 0, 0, 0, 0, 0, + 70, 5, 2, 0, 70, 14, + 16, 0, 0, 0, 0, 0, + 21, 0, 0, 1, 62, 0, + 0, 1, 83, 84, 65, 84, + 148, 0, 0, 0, 10, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurface.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurface.h new file mode 100644 index 0000000000000..16ecfca484a18 --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurface.h @@ -0,0 +1,271 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 2 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.z, r0.yyyy, t0.yzxw + mul r1.x, r0.z, l(0.003922) + imad r0.z, cb0[0].x, cb0[0].y, r0.y + ld_indexable(buffer)(float,float,float,float) r0.z, r0.zzzz, t0.yzxw + mul r1.y, r0.z, l(0.003922) + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r1.z, r0.x, l(0.003922) + mov r1.w, l(1.000000) + store_uav_typed u0.xyzw, vThreadID.xyyy, r1.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorRGB8ToSurface[] = +{ + 68, 88, 66, 67, 176, 147, + 187, 101, 220, 196, 15, 142, + 62, 141, 147, 163, 136, 77, + 210, 69, 1, 0, 0, 0, + 188, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 32, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 64, 2, 0, 0, + 80, 0, 5, 0, 144, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 2, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 38, 0, + 0, 10, 0, 208, 0, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 35, 0, 0, 8, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 2, 0, + 10, 0, 2, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 18, 0, 16, 0, + 1, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 35, 0, 0, 11, + 66, 0, 16, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 166, 10, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 34, 0, 16, 0, + 1, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 41, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 1, 0, 0, 0, + 30, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 18, 0, + 16, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 126, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 66, 0, 16, 0, + 1, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 129, 128, + 128, 59, 54, 0, 0, 5, + 130, 0, 16, 0, 1, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 128, 63, 164, 0, + 0, 6, 242, 224, 17, 0, + 0, 0, 0, 0, 70, 5, + 2, 0, 70, 14, 16, 0, + 1, 0, 0, 0, 21, 0, + 0, 1, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 18, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 5, 0, + 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurfaceGRAY8.h b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurfaceGRAY8.h new file mode 100644 index 0000000000000..db359493dfc6b --- /dev/null +++ b/winml/lib/Api.Image/shaders/TensorToSurface16-TensorRGB8ToSurfaceGRAY8.h @@ -0,0 +1,276 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer cbCS +// { +// +// uint height; // Offset: 0 Size: 4 +// uint width; // Offset: 4 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim HLSL Bind Count +// ------------------------------ ---------- ------- ----------- -------------- ------ +// input texture float buf t0 1 +// output UAV float4 2d u0 1 +// cbCS cbuffer NA NA cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_0 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[1], immediateIndexed +dcl_resource_buffer (float,float,float,float) t0 +dcl_uav_typed_texture2d (float,float,float,float) u0 +dcl_input vThreadID.xy +dcl_temps 1 +dcl_thread_group 16, 4, 1 +ult r0.xy, vThreadID.xyxx, cb0[0].yxyy +and r0.x, r0.y, r0.x +if_nz r0.x + imul null, r0.x, cb0[0].y, cb0[0].x + imad r0.y, cb0[0].y, vThreadID.y, vThreadID.x + ld_indexable(buffer)(float,float,float,float) r0.z, r0.yyyy, t0.yzxw + imad r0.w, cb0[0].x, cb0[0].y, r0.y + ld_indexable(buffer)(float,float,float,float) r0.w, r0.wwww, t0.yzwx + ishl r0.x, r0.x, l(1) + iadd r0.x, r0.x, r0.y + ld_indexable(buffer)(float,float,float,float) r0.x, r0.xxxx, t0.xyzw + mul r0.y, r0.w, l(0.002805) + mad r0.y, r0.z, l(0.000834), r0.y + mad r0.x, r0.x, l(0.000283), r0.y + mov r0.yzw, l(0,0,0,0) + store_uav_typed u0.xyzw, vThreadID.xyyy, r0.xyzw +endif +ret +// Approximately 18 instruction slots used +#endif + +const BYTE g_csTensorRGB8ToSurfaceGRAY8[] = +{ + 68, 88, 66, 67, 100, 14, + 198, 157, 254, 250, 215, 255, + 83, 243, 25, 204, 181, 131, + 126, 24, 1, 0, 0, 0, + 216, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 184, 1, 0, 0, 200, 1, + 0, 0, 216, 1, 0, 0, + 60, 4, 0, 0, 82, 68, + 69, 70, 124, 1, 0, 0, + 1, 0, 0, 0, 176, 0, + 0, 0, 3, 0, 0, 0, + 60, 0, 0, 0, 0, 5, + 83, 67, 0, 1, 0, 0, + 82, 1, 0, 0, 82, 68, + 49, 49, 60, 0, 0, 0, + 24, 0, 0, 0, 32, 0, + 0, 0, 40, 0, 0, 0, + 36, 0, 0, 0, 12, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 0, 0, 162, 0, 0, 0, + 4, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 169, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 105, 110, 112, 117, 116, 0, + 111, 117, 116, 112, 117, 116, + 0, 99, 98, 67, 83, 0, + 171, 171, 169, 0, 0, 0, + 2, 0, 0, 0, 200, 0, + 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 24, 1, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 40, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 76, 1, 0, 0, 4, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 40, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 104, 101, + 105, 103, 104, 116, 0, 100, + 119, 111, 114, 100, 0, 171, + 171, 171, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 1, + 0, 0, 119, 105, 100, 116, + 104, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 49, 48, 46, 49, 0, + 171, 171, 73, 83, 71, 78, + 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 79, 83, 71, 78, 8, 0, + 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 83, 72, + 69, 88, 92, 2, 0, 0, + 80, 0, 5, 0, 151, 0, + 0, 0, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 88, 8, + 0, 4, 0, 112, 16, 0, + 0, 0, 0, 0, 85, 85, + 0, 0, 156, 24, 0, 4, + 0, 224, 17, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 95, 0, 0, 2, 50, 0, + 2, 0, 104, 0, 0, 2, + 1, 0, 0, 0, 155, 0, + 0, 4, 16, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 79, 0, 0, 7, + 50, 0, 16, 0, 0, 0, + 0, 0, 70, 0, 2, 0, + 22, 133, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 31, 0, + 4, 3, 10, 0, 16, 0, + 0, 0, 0, 0, 38, 0, + 0, 10, 0, 208, 0, 0, + 18, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 35, 0, 0, 8, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 128, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 2, 0, + 10, 0, 2, 0, 45, 0, + 0, 137, 66, 0, 0, 128, + 67, 85, 21, 0, 66, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 150, 124, 16, 0, + 0, 0, 0, 0, 35, 0, + 0, 11, 130, 0, 16, 0, + 0, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 45, 0, 0, 137, 66, 0, + 0, 128, 67, 85, 21, 0, + 130, 0, 16, 0, 0, 0, + 0, 0, 246, 15, 16, 0, + 0, 0, 0, 0, 150, 115, + 16, 0, 0, 0, 0, 0, + 41, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 1, 0, 0, 0, 30, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 0, 0, 0, + 0, 0, 45, 0, 0, 137, + 66, 0, 0, 128, 67, 85, + 21, 0, 18, 0, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 70, 126, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 41, 207, 55, 59, + 50, 0, 0, 9, 34, 0, + 16, 0, 0, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 95, 142, 90, 58, 26, 0, + 16, 0, 0, 0, 0, 0, + 50, 0, 0, 9, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 11, 114, 148, 57, 26, 0, + 16, 0, 0, 0, 0, 0, + 54, 0, 0, 8, 226, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 164, 0, 0, 6, + 242, 224, 17, 0, 0, 0, + 0, 0, 70, 5, 2, 0, + 70, 14, 16, 0, 0, 0, + 0, 0, 21, 0, 0, 1, + 62, 0, 0, 1, 83, 84, + 65, 84, 148, 0, 0, 0, + 18, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 3, 0, + 0, 0, 5, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0 +}; diff --git a/winml/lib/Api.Image/shaders/gen.bat b/winml/lib/Api.Image/shaders/gen.bat new file mode 100644 index 0000000000000..718d954f82446 --- /dev/null +++ b/winml/lib/Api.Image/shaders/gen.bat @@ -0,0 +1,25 @@ + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor-SurfaceToTensorBGR8.h /E "SurfaceToTensorBGR8" /Vn g_csSurfaceToTensorBGR8 SurfaceToTensorFloat.hlsl + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor-SurfaceToTensorGRAY8.h /E "SurfaceToTensorGRAY8" /Vn g_csSurfaceToTensorGRAY8 SurfaceToTensorFloat.hlsl + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor-SurfaceToTensorRGB8.h /E "SurfaceToTensorRGB8" /Vn g_csSurfaceToTensorRGB8 SurfaceToTensorFloat.hlsl + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor-SurfaceGRAY8ToTensorBGR8.h /E "SurfaceGRAY8ToTensorBGR8" /Vn g_csSurfaceGRAY8ToTensorBGR8 SurfaceToTensorFloat.hlsl + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor-SurfaceGRAY8ToTensorGRAY8.h /E "SurfaceGRAY8ToTensorGRAY8" /Vn g_csSurfaceGRAY8ToTensorGRAY8 SurfaceToTensorFloat.hlsl + + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorBGR8ToSurface.h /E "TensorBGR8ToSurface" /Vn g_csTensorBGR8ToSurface TensorFloatToSurface.hlsl + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorRGB8ToSurface.h /E "TensorRGB8ToSurface" /Vn g_csTensorRGB8ToSurface TensorFloatToSurface.hlsl + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorGRAY8ToSurface.h /E "TensorGRAY8ToSurface" /Vn g_csTensorGRAY8ToSurface TensorFloatToSurface.hlsl + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorBGR8ToSurfaceGRAY8.h /E "TensorBGR8ToSurfaceGRAY8" /Vn g_csTensorBGR8ToSurfaceGRAY8 TensorFloatToSurface.hlsl + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorRGB8ToSurfaceGRAY8.h /E "TensorRGB8ToSurfaceGRAY8" /Vn g_csTensorRGB8ToSurfaceGRAY8 TensorFloatToSurface.hlsl + fxc.exe /Tcs_5_0 /Fh TensorToSurface-TensorGRAY8ToSurfaceGRAY8.h /E "TensorGRAY8ToSurfaceGRAY8" /Vn g_csTensorGRAY8ToSurfaceGRAY8 TensorFloatToSurface.hlsl + + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor16-SurfaceToTensorBGR8.h /E "SurfaceToTensorBGR8" /Vn g_csSurfaceToTensorBGR8 SurfaceToTensorFloat.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor16-SurfaceToTensorGRAY8.h /E "SurfaceToTensorGRAY8" /Vn g_csSurfaceToTensorGRAY8 SurfaceToTensorFloat.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor16-SurfaceToTensorRGB8.h /E "SurfaceToTensorRGB8" /Vn g_csSurfaceToTensorRGB8 SurfaceToTensorFloat.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor16-SurfaceGRAY8ToTensorBGR8.h /E "SurfaceGRAY8ToTensorBGR8" /Vn g_csSurfaceGRAY8ToTensorBGR8 SurfaceToTensorFloat.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh SurfaceToTensor16-SurfaceGRAY8ToTensorGRAY8.h /E "SurfaceGRAY8ToTensorGRAY8" /Vn g_csSurfaceGRAY8ToTensorGRAY8 SurfaceToTensorFloat.hlsl /DFP16 + + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorBGR8ToSurface.h /E "TensorBGR8ToSurface" /Vn g_csTensorBGR8ToSurface TensorFloatToSurface.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorRGB8ToSurface.h /E "TensorRGB8ToSurface" /Vn g_csTensorRGB8ToSurface TensorFloatToSurface.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorGRAY8ToSurface.h /E "TensorGRAY8ToSurface" /Vn g_csTensorGRAY8ToSurface TensorFloatToSurface.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorBGR8ToSurfaceGRAY8.h /E "TensorBGR8ToSurfaceGRAY8" /Vn g_csTensorBGR8ToSurfaceGRAY8 TensorFloatToSurface.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorRGB8ToSurfaceGRAY8.h /E "TensorRGB8ToSurfaceGRAY8" /Vn g_csTensorRGB8ToSurfaceGRAY8 TensorFloatToSurface.hlsl /DFP16 + fxc.exe /Tcs_5_0 /Fh TensorToSurface16-TensorGRAY8ToSurfaceGRAY8.h /E "TensorGRAY8ToSurfaceGRAY8" /Vn g_csTensorGRAY8ToSurfaceGRAY8 TensorFloatToSurface.hlsl /DFP16 \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.cpp b/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.cpp new file mode 100644 index 0000000000000..32796ed45b4a6 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.cpp @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "OnnxruntimeCpuSessionBuilder.h" +#include "OnnxruntimeEngine.h" +#include "OnnxruntimeErrors.h" + +using namespace Windows::AI::MachineLearning; + +HRESULT OnnxruntimeCpuSessionBuilder::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory) { + engine_factory_ = engine_factory; + return S_OK; +} + +HRESULT +OnnxruntimeCpuSessionBuilder::CreateSessionOptions( + OrtSessionOptions** options) { + RETURN_HR_IF_NULL(E_POINTER, options); + + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtSessionOptions* ort_options; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateSessionOptions(&ort_options), + ort_api); + + auto session_options = UniqueOrtSessionOptions(ort_options, ort_api->ReleaseSessionOptions); + + // set the graph optimization level to all (used to be called level 3) + RETURN_HR_IF_NOT_OK_MSG(ort_api->SetSessionGraphOptimizationLevel(session_options.get(), GraphOptimizationLevel::ORT_ENABLE_ALL), + ort_api); + + // Onnxruntime will use half the number of concurrent threads supported on the system + // by default. This causes MLAS to not exercise every logical core. + // We force the thread pool size to be maxxed out to ensure that WinML always + // runs the fastest. + RETURN_HR_IF_NOT_OK_MSG(ort_api->SetIntraOpNumThreads(session_options.get(), std::thread::hardware_concurrency()), + ort_api); + +#ifndef _WIN64 + auto use_arena = false; +#else + auto use_arena = true; +#endif + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->OrtSessionOptionsAppendExecutionProvider_CPU(session_options.get(), use_arena), + ort_api); + + // call release() so the underlying OrtSessionOptions object isn't freed + *options = session_options.release(); + + return S_OK; +} + +HRESULT +OnnxruntimeCpuSessionBuilder::CreateSession( + OrtSessionOptions* options, + OrtSession** session) { + RETURN_HR_IF_NULL(E_POINTER, session); + + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtEnv* ort_env; + RETURN_IF_FAILED(engine_factory_->GetOrtEnvironment(&ort_env)); + + OrtSession* ort_session_raw; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->CreateSessionWithoutModel(ort_env, options, &ort_session_raw), + engine_factory_->UseOrtApi()); + + auto ort_session = UniqueOrtSession(ort_session_raw, ort_api->ReleaseSession); + + *session = ort_session.release(); + + return S_OK; +} + +HRESULT +OnnxruntimeCpuSessionBuilder::Initialize( + OrtSession* session) { + RETURN_HR_IF_NULL(E_INVALIDARG, session); + + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionInitialize(session), + engine_factory_->UseOrtApi()); + + return S_OK; +} diff --git a/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.h b/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.h new file mode 100644 index 0000000000000..e1ad853429952 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeCpuSessionBuilder.h @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "OnnxruntimeSessionBuilder.h" + +namespace Windows::AI::MachineLearning { + +class OnnxruntimeEngineFactory; + +class OnnxruntimeCpuSessionBuilder : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IOrtSessionBuilder> { + public: + HRESULT RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory); + + HRESULT STDMETHODCALLTYPE CreateSessionOptions( + OrtSessionOptions** options) override; + + HRESULT STDMETHODCALLTYPE CreateSession( + OrtSessionOptions* options, + OrtSession** session) override; + + HRESULT STDMETHODCALLTYPE Initialize( + OrtSession* session) override; + + private: + Microsoft::WRL::ComPtr engine_factory_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.cpp b/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.cpp new file mode 100644 index 0000000000000..4a4c81352ec3f --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.cpp @@ -0,0 +1,633 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include + +#include "OnnxruntimeDescriptorConverter.h" +#include "ImageFeatureDescriptor.h" +#include "MapFeatureDescriptor.h" +#include "SequenceFeatureDescriptor.h" +#include "TensorFeatureDescriptor.h" + +#include "winrt/windows.foundation.collections.h" +#include "winrt/windows.graphics.imaging.h" + +#include "OnnxruntimeEngine.h" + +#include "OnnxruntimeErrors.h" + +using namespace winrt::Windows::AI::MachineLearning; + +// BitmapPixelFormat constants +static const char* c_bitmap_pixel_format_key = "Image.BitmapPixelFormat"; +static const char* c_supported_pixel_formats[] = + { + "Gray8", + "Rgb8", + "Bgr8"}; + +// ColorSpaceGamma constants +// Unlike the other supported value arrays, this is an UNSUPPORTED list. +// Unfortunately, the original RS5 implementation blocked unsupported +// color_space_gamma values (Linear), and did not allow the actual supported +// values (SRGB). +static const char* c_color_space_key = "Image.ColorSpaceGamma"; +static const char* c_unsupported_color_spaces[] = + { + "Linear"}; + +// NominalPixelRange constants +static const char* c_nominal_range_key = "Image.NominalPixelRange"; +static const char* c_supported_nominal_ranges[] = + { + "NominalRange_0_255"}; + +namespace Windows::AI::MachineLearning { + +// Forward declare CreateFeatureDescriptor +static winml::ILearningModelFeatureDescriptor +CreateFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata); + +static TensorKind +TensorKindFromONNXTensorElementDataType(ONNXTensorElementDataType dataType) { + switch (dataType) { + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: { + return TensorKind::Boolean; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: { + return TensorKind::String; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: { + return TensorKind::Float16; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: { + return TensorKind::Float; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: { + return TensorKind::Double; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: { + return TensorKind::Int8; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: { + return TensorKind::Int16; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: { + return TensorKind::Int32; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: { + return TensorKind::Int64; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: { + return TensorKind::UInt8; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: { + return TensorKind::UInt16; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: { + return TensorKind::UInt32; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: { + return TensorKind::UInt64; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: { + return TensorKind::Complex64; + } + case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: { + return TensorKind::Complex128; + } + default: { + return TensorKind::Undefined; + } + } +} + +static std::string +TensorKindToString(TensorKind tensorKind) { + switch (tensorKind) { + case TensorKind::Float: { + return "float"; + } + case TensorKind::UInt8: { + return "uint8"; + } + case TensorKind::Int8: { + return "int8"; + } + case TensorKind::UInt16: { + return "uint16"; + } + case TensorKind::Int16: { + return "int16"; + } + case TensorKind::Int32: { + return "int32"; + } + case TensorKind::Int64: { + return "int64"; + } + case TensorKind::String: { + return "string"; + } + case TensorKind::Boolean: { + return "boolean"; + } + case TensorKind::Float16: { + return "float16"; + } + case TensorKind::Double: { + return "double"; + } + case TensorKind::UInt32: { + return "uint32"; + } + case TensorKind::UInt64: { + return "uint64"; + } + case TensorKind::Complex64: { + return "complex64"; + } + case TensorKind::Complex128: { + return "complex128"; + } + case TensorKind::Undefined: + default: { + return "undefined"; + } + } +} + +static const char* +FetchMetadataValueOrNull( + const std::unordered_map& metadata, + const char* metadata_key) { + auto metadata_pair = metadata.find(metadata_key); + auto metadata_exists = metadata_pair != metadata.end(); + return metadata_exists + ? metadata_pair->second.c_str() + : nullptr; +} + +template +static bool +IsValueInRange( + const char* value, + const char* (&range)[TNumSupportedValues]) { + if (value) { + auto range_end = range + TNumSupportedValues; + auto found = std::find_if( + range, + range_end, + [&](auto& supported_value) { + return std::strcmp(supported_value, value) == 0; + }) != range_end; + return found; + } + return false; +} + +enum class RangeType { AllowedList, + BlockedList }; + +template +static bool +CheckImageMetadataIsUnsupported( + const std::unordered_map& metadata, + const char* metadata_key, + const char* (&range)[TNumSupportedValues], + std::ostringstream& log_stream, + RangeType range_type = RangeType::AllowedList) { + // Check is the model has pixel format metadata. + // This is retrieved from the metadata (which is global to the model). + // We only consider formats that are supported in the image converter. + // If not supported you MUST bind as a tensor. + auto value = FetchMetadataValueOrNull(metadata, metadata_key); + auto metadata_exists = value != nullptr; + if (metadata_exists) { + auto found = IsValueInRange(value, range); + + // if list of allowed values + auto is_allowed_list = range_type == RangeType::AllowedList; + auto is_not_in_allowed_list = is_allowed_list && !found; + + // if list of blocked values + auto is_blocked_list = range_type == RangeType::BlockedList; + auto is_in_blocked_list = is_blocked_list && found; + + auto is_unsupported = is_not_in_allowed_list || is_in_blocked_list; + + // log + if (is_unsupported) { + log_stream << "Unsupported " + << metadata_key + << ": " + << value + << " found." + << std::endl; + } + + return is_unsupported; + } + + // No metadata, so it cannot be unsupported + return false; +} + +static std::pair +CreateBitmapPixelFormatAndAlphaModeInfo( + const char* pixel_format) { + if (pixel_format) { + auto comparator = + std::bind(std::strcmp, pixel_format, std::placeholders::_1); + + if (0 == comparator("Gray8")) { + return {wgi::BitmapPixelFormat::Gray8, wgi::BitmapAlphaMode::Premultiplied}; + } else if (0 == comparator("Rgb8")) { + return {wgi::BitmapPixelFormat::Rgba8, wgi::BitmapAlphaMode::Premultiplied}; + } else if (0 == comparator("Bgr8")) { + return {wgi::BitmapPixelFormat::Bgra8, wgi::BitmapAlphaMode::Premultiplied}; + } else if (0 == comparator("Rgba8")) { + return {wgi::BitmapPixelFormat::Rgba8, wgi::BitmapAlphaMode::Straight}; + } else if (0 == comparator("Bgra8")) { + return {wgi::BitmapPixelFormat::Bgra8, wgi::BitmapAlphaMode::Straight}; + } + } + + // default value, non conforming values are overridden to Bgra8, Premultiplied + return {wgi::BitmapPixelFormat::Bgra8, wgi::BitmapAlphaMode::Premultiplied}; +} + +static winmlp::ImageColorSpaceGamma +CreateImageColorSpaceGamma(const char* color_space_gamma) { + using namespace winmlp; + + if (color_space_gamma) { + auto comparator = + std::bind(std::strcmp, color_space_gamma, std::placeholders::_1); + + if (0 == comparator("Linear")) { + return ImageColorSpaceGamma::ImageColorSpaceGamma_Linear; + } else if (0 == comparator("SRGB")) { + return ImageColorSpaceGamma::ImageColorSpaceGamma_SRGB; + } + } + + // default value, non conforming values are overridden to SRGB + return ImageColorSpaceGamma::ImageColorSpaceGamma_SRGB; +} + +static winmlp::ImageNominalPixelRange +CreateImageNominalPixelRange(const char* nominal_range) { + using namespace winmlp; + + if (nominal_range) { + auto comparator = + std::bind(std::strcmp, nominal_range, std::placeholders::_1); + + if (0 == comparator("NominalRange_0_255")) { + return ImageNominalPixelRange::ImageNominalPixelRange_NominalRange_0_255; + } else if (0 == comparator("Normalized_0_1")) { + return ImageNominalPixelRange::ImageNominalPixelRange_Normalized_0_1; + } else if (0 == comparator("Normalized_1_1")) { + return ImageNominalPixelRange::ImageNominalPixelRange_Normalized_1_1; + } else if (0 == comparator("NominalRange_16_235")) { + return ImageNominalPixelRange::ImageNominalPixelRange_NominalRange_16_235; + } + } + + // default value, non conforming values are overridden to NominalRange_0_255 + return ImageNominalPixelRange::ImageNominalPixelRange_NominalRange_0_255; +} + +enum class TensorType { Tensor_Data, + Tensor_Image, + Tensor_Data_UnsupportedImageMetadata }; + +static TensorType +GetTensorType( + OnnxruntimeEngineFactory* engine_factory, + OrtTypeInfo* type_info, + const std::unordered_map& metadata) { + const char* denotation; + size_t len; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetDenotationFromTypeInfo(type_info, &denotation, &len), + engine_factory->UseOrtApi()); + + constexpr char c_image[] = "IMAGE"; + auto has_image_denotation = strncmp(denotation, c_image, _countof(c_image)) == 0; + if (!has_image_denotation) { + return TensorType::Tensor_Data; + } + + // Create log_stream to capture any warning messages + // for improperly annotated image tensor + std::ostringstream log_stream; + + // Check if the tensor value_info_proto is of type float. + // IMAGE tensors MUST be of type float + const OrtTensorTypeAndShapeInfo* tensor_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->CastTypeInfoToTensorInfo(type_info, &tensor_info), + engine_factory->UseOrtApi()); + + ONNXTensorElementDataType tensor_element_data_type; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetTensorElementType(tensor_info, &tensor_element_data_type), + engine_factory->UseOrtApi()); + + auto tensor_kind = WinML::TensorKindFromONNXTensorElementDataType(tensor_element_data_type); + auto is_float_tensor = tensor_kind == TensorKind::Float; + if (!is_float_tensor) { + log_stream << "Unsupported image with " << TensorKindToString(tensor_kind) + << " found." << std::endl; + } + + // Check if the model has pixel format and color space metadata. + // This is retrieved from the metadata (which is global to the model). + // We only consider formats that are supported in the image converter. + // If not supported you MUST bind as a tensor. + auto has_unsupported_pixel_format = + CheckImageMetadataIsUnsupported(metadata, c_bitmap_pixel_format_key, + c_supported_pixel_formats, log_stream); + auto has_unsupported_nominal_range = + CheckImageMetadataIsUnsupported(metadata, c_nominal_range_key, + c_supported_nominal_ranges, log_stream); + + // Unfortunately, the original RS5 implementation blocked unsupported + // color_space_gamma values (Linear), and did not allow the actual supported + // values (SRGB) like the other image metadata. + // + // So to keep parity with RS5, we continue to check against a list of + // unsupported color spaces. + auto has_unsupported_color_space_gamma = + CheckImageMetadataIsUnsupported(metadata, c_color_space_key, + c_unsupported_color_spaces, log_stream, RangeType::BlockedList); + + bool has_unsupported_image_metadata = + has_unsupported_pixel_format || + has_unsupported_color_space_gamma || + has_unsupported_nominal_range; + + auto is_tensor_improperly_annotated_as_image = + has_image_denotation && + (!is_float_tensor || + has_unsupported_image_metadata); + + if (is_tensor_improperly_annotated_as_image) { + TraceLoggingWrite(winml_trace_logging_provider, + "WinMLInputValidation", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_WARNING), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(log_stream.str().c_str())); + } + + auto is_valid_image_tensor = + has_image_denotation && is_float_tensor && !has_unsupported_image_metadata; + + return is_valid_image_tensor + ? TensorType::Tensor_Image + : has_unsupported_image_metadata + ? TensorType::Tensor_Data_UnsupportedImageMetadata + : TensorType::Tensor_Data; +} + +static winml::ILearningModelFeatureDescriptor +CreateTensorFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata, + bool has_unsupported_image_metadata) { + auto type_info = feature_descriptor->type_info_.get(); + + const OrtTensorTypeAndShapeInfo* tensor_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->CastTypeInfoToTensorInfo(type_info, &tensor_info), + engine_factory->UseOrtApi()); + size_t num_dims; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetDimensionsCount(tensor_info, &num_dims), + engine_factory->UseOrtApi()); + + auto shape = std::vector(num_dims); + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetDimensions(tensor_info, shape.data(), shape.size()), + engine_factory->UseOrtApi()); + + ONNXTensorElementDataType tensor_element_data_type; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetTensorElementType(tensor_info, &tensor_element_data_type), + engine_factory->UseOrtApi()); + + auto kind = WinML::TensorKindFromONNXTensorElementDataType(tensor_element_data_type); + + auto descriptor = winrt::make( + feature_descriptor->name_, + feature_descriptor->description_, // description + kind, + shape, + feature_descriptor->name_length_ > 0, // is_required + has_unsupported_image_metadata); + + return descriptor.as(); +} + +static winml::ILearningModelFeatureDescriptor +CreateImageFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata) { + auto type_info = feature_descriptor->type_info_.get(); + + const OrtTensorTypeAndShapeInfo* tensor_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->CastTypeInfoToTensorInfo(type_info, &tensor_info), + engine_factory->UseOrtApi()); + + size_t num_dims; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetDimensionsCount(tensor_info, &num_dims), + engine_factory->UseOrtApi()); + + auto shape = std::vector(num_dims); + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetDimensions(tensor_info, shape.data(), shape.size()), + engine_factory->UseOrtApi()); + + ONNXTensorElementDataType tensor_element_data_type; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetTensorElementType(tensor_info, &tensor_element_data_type), + engine_factory->UseOrtApi()); + auto kind = WinML::TensorKindFromONNXTensorElementDataType(tensor_element_data_type); + + // pixel format and alpha + auto pixel_format_value = FetchMetadataValueOrNull(metadata, c_bitmap_pixel_format_key); + auto format_info = CreateBitmapPixelFormatAndAlphaModeInfo(pixel_format_value); + auto pixel_format = format_info.first; + auto alpha_mode = format_info.second; + + // color space gamma value + auto color_space_gamma_value = FetchMetadataValueOrNull(metadata, c_color_space_key); + auto color_space_gamma = CreateImageColorSpaceGamma(color_space_gamma_value); + + // nominal range + auto nominal_range_value = FetchMetadataValueOrNull(metadata, c_nominal_range_key); + auto nominal_range = CreateImageNominalPixelRange(nominal_range_value); + + // The current code assumes that the shape will be in NCHW. + // Should the model metadata be read instead??? + const int c_height_dimension = 2; + const int c_width_dimension = 3; + auto height = static_cast(shape[c_height_dimension]); + auto width = static_cast(shape[c_width_dimension]); + auto descriptor = winrt::make( + feature_descriptor->name_, + feature_descriptor->description_, + kind, + shape, + feature_descriptor->name_length_ > 0, // is_required + pixel_format, + alpha_mode, + width, + height, + nominal_range, + color_space_gamma); + + return descriptor.as(); +} + +static winml::ILearningModelFeatureDescriptor +CreateMapFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata) { + auto type_info = feature_descriptor->type_info_.get(); + + const OrtMapTypeInfo* map_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->CastTypeInfoToMapTypeInfo(type_info, &map_info), + engine_factory->UseOrtApi()); + + ONNXTensorElementDataType map_key_data_type; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetMapKeyType(map_info, &map_key_data_type), + engine_factory->UseOrtApi()); + + auto key_kind = WinML::TensorKindFromONNXTensorElementDataType(map_key_data_type); + + OrtTypeInfo* map_value_type_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetMapValueType(map_info, &map_value_type_info), + engine_factory->UseOrtApi()); + + UniqueOrtTypeInfo unique_map_value_type_info(map_value_type_info, engine_factory->UseOrtApi()->ReleaseTypeInfo); + + OnnxruntimeValueInfoWrapper dummy_ort_value_info_wrapper; + dummy_ort_value_info_wrapper.description_ = feature_descriptor->description_; + dummy_ort_value_info_wrapper.description_length_ = feature_descriptor->description_length_; + dummy_ort_value_info_wrapper.name_ = feature_descriptor->name_; + dummy_ort_value_info_wrapper.name_length_ = feature_descriptor->name_length_; + dummy_ort_value_info_wrapper.type_info_ = std::move(unique_map_value_type_info); + + auto value_descriptor = + CreateFeatureDescriptor(engine_factory, &dummy_ort_value_info_wrapper, metadata); + + auto descriptor = winrt::make( + feature_descriptor->name_, + feature_descriptor->description_, + feature_descriptor->name_length_ > 0, // is_required + key_kind, + value_descriptor); + return descriptor.as(); +} + +static winml::ILearningModelFeatureDescriptor +CreateSequenceFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata) { + auto type_info = feature_descriptor->type_info_.get(); + + const OrtSequenceTypeInfo* sequence_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->CastTypeInfoToSequenceTypeInfo(type_info, &sequence_info), + engine_factory->UseOrtApi()); + + OrtTypeInfo* sequence_element_type_info; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetSequenceElementType(sequence_info, &sequence_element_type_info), + engine_factory->UseOrtApi()); + + UniqueOrtTypeInfo unique_sequence_element_type_info(sequence_element_type_info, engine_factory->UseOrtApi()->ReleaseTypeInfo); + + OnnxruntimeValueInfoWrapper dummy_ort_value_info_wrapper; + dummy_ort_value_info_wrapper.description_ = feature_descriptor->description_; + dummy_ort_value_info_wrapper.description_length_ = feature_descriptor->description_length_; + dummy_ort_value_info_wrapper.name_ = feature_descriptor->name_; + dummy_ort_value_info_wrapper.name_length_ = feature_descriptor->name_length_; + dummy_ort_value_info_wrapper.type_info_ = std::move(unique_sequence_element_type_info); + + auto element_descriptor = + CreateFeatureDescriptor(engine_factory, &dummy_ort_value_info_wrapper, metadata); + + auto descriptor = winrt::make( + feature_descriptor->name_, + feature_descriptor->description_, + feature_descriptor->name_length_ > 0, // is_required + element_descriptor); + + return descriptor.as(); +} + +static winml::ILearningModelFeatureDescriptor +CreateFeatureDescriptor( + OnnxruntimeEngineFactory* engine_factory, + const OnnxruntimeValueInfoWrapper* feature_descriptor, + const std::unordered_map& metadata) { + auto type_info = feature_descriptor->type_info_.get(); + + ONNXType onnx_type; + THROW_IF_NOT_OK_MSG(engine_factory->UseOrtApi()->GetOnnxTypeFromTypeInfo(type_info, &onnx_type), + engine_factory->UseOrtApi()); + + switch (onnx_type) { + case ONNXType::ONNX_TYPE_TENSOR: { + auto tensor_type = GetTensorType(engine_factory, type_info, metadata); + if (tensor_type == TensorType::Tensor_Image) { + return CreateImageFeatureDescriptor( + engine_factory, + feature_descriptor, + metadata); + } else { + auto has_unsupported_image_metadata = + tensor_type == TensorType::Tensor_Data_UnsupportedImageMetadata; + return CreateTensorFeatureDescriptor( + engine_factory, + feature_descriptor, + metadata, + has_unsupported_image_metadata); + } + } + case ONNXType::ONNX_TYPE_MAP: { + return CreateMapFeatureDescriptor( + engine_factory, + feature_descriptor, + metadata); + } + case ONNXType::ONNX_TYPE_SEQUENCE: { + return CreateSequenceFeatureDescriptor( + engine_factory, + feature_descriptor, + metadata); + } + default: + throw winrt::hresult_not_implemented(); + } +} + +OnnxruntimeDescriptorConverter::OnnxruntimeDescriptorConverter( + OnnxruntimeEngineFactory* engine_factory, + const std::unordered_map& metadata) : engine_factory_(engine_factory), metadata_(metadata) {} + +wfc::IVector +OnnxruntimeDescriptorConverter::ConvertToLearningModelDescriptors(const std::vector& descriptors) { + auto features = winrt::single_threaded_vector(); + + for (const auto& descriptor : descriptors) { + auto learning_model_descriptor = WinML::CreateFeatureDescriptor(engine_factory_.Get(), &descriptor, metadata_); + features.Append(learning_model_descriptor); + } + + return features; +} +} // namespace Windows::AI::MachineLearning diff --git a/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.h b/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.h new file mode 100644 index 0000000000000..1152f7f0ec530 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeDescriptorConverter.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include "pch.h" + +namespace Windows::AI::MachineLearning { + +struct OnnxruntimeValueInfoWrapper { + OnnxruntimeValueInfoWrapper() : type_info_(UniqueOrtTypeInfo(nullptr, nullptr)) {} + const char* name_ = nullptr; + size_t name_length_ = 0; + const char* description_ = nullptr; + size_t description_length_ = 0; + UniqueOrtTypeInfo type_info_; +}; + +class OnnxruntimeEngineFactory; + +struct OnnxruntimeDescriptorConverter { + OnnxruntimeDescriptorConverter( + OnnxruntimeEngineFactory* engine_factory, + const std::unordered_map& model_metadata); + + wfc::IVector + ConvertToLearningModelDescriptors(const std::vector& descriptors); + + private: + Microsoft::WRL::ComPtr engine_factory_; + const std::unordered_map& metadata_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.cpp b/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.cpp new file mode 100644 index 0000000000000..c393dfd77609b --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.cpp @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#ifdef USE_DML + +#include "OnnxruntimeDmlSessionBuilder.h" +#include "OnnxruntimeEngine.h" +#include "OnnxruntimeErrors.h" +#include "LearningModelDevice.h" + +using namespace Windows::AI::MachineLearning; + +HRESULT OnnxruntimeDmlSessionBuilder::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, ID3D12Device* device, ID3D12CommandQueue* queue) { + engine_factory_ = engine_factory; + device_.copy_from(device); + queue_.copy_from(queue); + return S_OK; +} + +HRESULT +OnnxruntimeDmlSessionBuilder::CreateSessionOptions( + OrtSessionOptions** options) { + RETURN_HR_IF_NULL(E_POINTER, options); + + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtSessionOptions* ort_options; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateSessionOptions(&ort_options), + ort_api); + + auto session_options = UniqueOrtSessionOptions(ort_options, ort_api->ReleaseSessionOptions); + + // set the graph optimization level to all (used to be called level 3) + RETURN_HR_IF_NOT_OK_MSG(ort_api->SetSessionGraphOptimizationLevel(session_options.get(), GraphOptimizationLevel::ORT_ENABLE_ALL), + ort_api); + + // Disable the mem pattern session option for DML. It will cause problems with how memory is allocated. + RETURN_HR_IF_NOT_OK_MSG(ort_api->DisableMemPattern(session_options.get()), + ort_api); + + // Request the dml ep + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->OrtSessionOptionsAppendExecutionProvider_DML(session_options.get(), device_.get(), queue_.get()), + ort_api); + +#ifndef _WIN64 + auto use_arena = false; +#else + auto use_arena = true; +#endif + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->OrtSessionOptionsAppendExecutionProvider_CPU(session_options.get(), use_arena), + ort_api); + + // call release() so the underlying OrtSessionOptions object isn't freed + *options = session_options.release(); + + return S_OK; +} + +HRESULT OnnxruntimeDmlSessionBuilder::CreateSession( + OrtSessionOptions* options, + OrtSession** session) { + RETURN_HR_IF_NULL(E_POINTER, session); + + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtEnv* ort_env; + RETURN_IF_FAILED(engine_factory_->GetOrtEnvironment(&ort_env)); + + OrtSession* ort_session_raw; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->CreateSessionWithoutModel(ort_env, options, &ort_session_raw), + engine_factory_->UseOrtApi()); + auto ort_session = UniqueOrtSession(ort_session_raw, ort_api->ReleaseSession); + + *session = ort_session.release(); + + return S_OK; +} + +HRESULT OnnxruntimeDmlSessionBuilder::Initialize( + OrtSession* session) { + RETURN_HR_IF_NULL(E_INVALIDARG, session); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionInitialize(session), + engine_factory_->UseOrtApi()); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session, 0, &ort_provider), + engine_factory_->UseOrtApi()); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlExecutionProviderSetDefaultRoundingMode(ort_provider, true), + engine_factory_->UseOrtApi()); + + // Flush the D3D12 work from the DML execution provider + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlExecutionProviderFlushContext(ort_provider), + engine_factory_->UseOrtApi()); + + return S_OK; +} + +#endif USE_DML \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.h b/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.h new file mode 100644 index 0000000000000..8efd98e5b68da --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeDmlSessionBuilder.h @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "OnnxruntimeSessionBuilder.h" + +namespace Windows::AI::MachineLearning { + +class OnnxruntimeEngineFactory; + +class OnnxruntimeDmlSessionBuilder : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IOrtSessionBuilder> { + public: + HRESULT RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, ID3D12Device* device, ID3D12CommandQueue* queue); + + HRESULT STDMETHODCALLTYPE CreateSessionOptions( + OrtSessionOptions** options) override; + + HRESULT STDMETHODCALLTYPE CreateSession( + OrtSessionOptions* options, + OrtSession** session) override; + + HRESULT STDMETHODCALLTYPE Initialize( + OrtSession* session) override; + + private: + Microsoft::WRL::ComPtr engine_factory_; + winrt::com_ptr device_; + winrt::com_ptr queue_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp new file mode 100644 index 0000000000000..da37ef2712f20 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp @@ -0,0 +1,1282 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "pch.h" + +#include "OnnxruntimeEngine.h" + +#include "PheonixSingleton.h" +#include "OnnxruntimeEnvironment.h" +#include "OnnxruntimeEngineBuilder.h" +#include "OnnxruntimeModel.h" +#include "OnnxruntimeSessionBuilder.h" +#include "OnnxruntimeErrors.h" + +using namespace WinML; + +static const OrtApi* GetVersionedOrtApi() { + static const uint32_t ort_version = 1; + const auto ort_api_base = OrtGetApiBase(); + return ort_api_base->GetApi(ort_version); +} + +static const WinmlAdapterApi* GetVersionedWinmlAdapterApi() { + return OrtGetWinMLAdapter(GetVersionedOrtApi()); +} + +static ONNXTensorElementDataType +ONNXTensorElementDataTypeFromTensorKind(winml::TensorKind kind) { + switch (kind) { + case winml::TensorKind::Boolean: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL; + } + case winml::TensorKind::String: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING; + } + case winml::TensorKind::Float16: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; + } + case winml::TensorKind::Float: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } + case winml::TensorKind::Double: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; + } + case winml::TensorKind::Int8: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; + } + case winml::TensorKind::Int16: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16; + } + case winml::TensorKind::Int32: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + } + case winml::TensorKind::Int64: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; + } + case winml::TensorKind::UInt8: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; + } + case winml::TensorKind::UInt16: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16; + } + case winml::TensorKind::UInt32: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32; + } + case winml::TensorKind::UInt64: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64; + } + case winml::TensorKind::Complex64: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64; + } + case winml::TensorKind::Complex128: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128; + } + default: { + return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + } + } +} + +OnnxruntimeValue::OnnxruntimeValue() : value_(nullptr, nullptr), allocator_(nullptr, nullptr) {} + +OnnxruntimeValue::~OnnxruntimeValue() { + value_.reset(nullptr); + allocator_.reset(nullptr); +} + +HRESULT OnnxruntimeValue::RuntimeClassInitialize(OnnxruntimeEngine* engine, UniqueOrtValue&& ort_value, UniqueOrtAllocator&& allocator) { + engine_ = engine; + value_ = std::move(ort_value); + allocator_ = std::move(allocator); + + return S_OK; +} + +HRESULT OnnxruntimeValue::IsEmpty(bool* out) { + *out = UseOrtValue() == nullptr; + return S_OK; +} + +HRESULT OnnxruntimeValue::IsCpu(bool* out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + auto winml_adapter_api = engine_->GetEngineFactory()->UseWinmlAdapterApi(); + + OrtMemoryInfo* ort_memory_info; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->GetValueMemoryInfo(value_.get(), &ort_memory_info), + ort_api); + auto memory_info = UniqueOrtMemoryInfo(ort_memory_info, ort_api->ReleaseMemoryInfo); + + const char* name; + RETURN_HR_IF_NOT_OK_MSG(ort_api->MemoryInfoGetName(memory_info.get(), &name), + ort_api); + + OrtMemType type; + RETURN_HR_IF_NOT_OK_MSG(ort_api->MemoryInfoGetMemType(memory_info.get(), &type), + ort_api); + + *out = !strcmp(name, "Cpu") || + type == OrtMemType::OrtMemTypeCPUOutput || + type == OrtMemType::OrtMemTypeCPUInput; + return S_OK; +} + +static int64_t ShapeSize(const int64_t* shape, size_t count) { + // for each dim + int64_t size = 1; + for (size_t i = 0; i < count; i++) { + // find out it's total size + size *= shape[i]; + // make sure there are no invalid dimensions (-1 or any invalid shape) + THROW_HR_IF(E_INVALIDARG, shape[i] <= 0); + } + return size; +} + +static auto GetStrings(const OrtApi* ort_api, const OrtValue* ort_value, + OrtTensorTypeAndShapeInfo* type_and_shape_info) { + std::vector out; + + size_t size; + THROW_IF_NOT_OK_MSG(ort_api->GetDimensionsCount(type_and_shape_info, &size), + ort_api); + + std::vector shape(size); + + if (size > 0) { + THROW_IF_NOT_OK_MSG(ort_api->GetDimensions(type_and_shape_info, &shape[0], size), + ort_api); + } + auto length = ShapeSize(shape.data(), shape.size()); + + // make a big buffer to hold all the string data + size_t buffer_length; + THROW_IF_NOT_OK_MSG(ort_api->GetStringTensorDataLength(ort_value, &buffer_length), + ort_api); + + std::vector strings; + std::unique_ptr buffer(new uint8_t[buffer_length]); + std::vector offsets(static_cast(length)); + + THROW_IF_NOT_OK_MSG(ort_api->GetStringTensorContent(ort_value, buffer.get(), buffer_length, offsets.data(), offsets.size()), + ort_api); + + // now go build all the strings + for (auto i = 0; i < length; ++i) { + size_t str_len = 0; + // are we on the last one? + if (i == (length - 1)) { + str_len = buffer_length - offsets[i]; + } else { + str_len = offsets[i + 1] - offsets[i]; + } + strings.push_back(std::string_view(reinterpret_cast(buffer.get() + offsets[i]), str_len)); + } + + return std::make_shared>(std::move(strings), std::move(buffer)); +} + +HRESULT OnnxruntimeValue::GetResource(WinML::Resource& out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + auto winml_adapter_api = engine_->GetEngineFactory()->UseWinmlAdapterApi(); + + void* mutable_data = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorMutableData(value_.get(), &mutable_data), + ort_api); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(engine_->UseOrtSession(), 0, &ort_provider), + ort_api); + + bool is_cpu = false; + if (SUCCEEDED(IsCpu(&is_cpu)) && !is_cpu) { + void* resource; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlGetD3D12ResourceFromAllocation(ort_provider, mutable_data, + reinterpret_cast(&resource)), + ort_api); + out = WinML::Resource(resource, [](void*) { /*do nothing, as this pointer is actually a com pointer! */ }); + } else { + int is_tensor; + RETURN_HR_IF_NOT_OK_MSG(ort_api->IsTensor(value_.get(), &is_tensor), + ort_api); + if (is_tensor == 0) { + out = WinML::Resource(mutable_data, [](void*) { /*do nothing, as this pointer is actually owned elsewhere in ORT! */ }); + return S_OK; + } + + OrtTensorTypeAndShapeInfo* info = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorTypeAndShape(value_.get(), &info), + ort_api); + auto type_and_shape_info = UniqueOrtTensorTypeAndShapeInfo(info, ort_api->ReleaseTensorTypeAndShapeInfo); + + ONNXTensorElementDataType data_type; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorElementType(type_and_shape_info.get(), &data_type), + ort_api); + + if (data_type == ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) { + auto strings = GetStrings(ort_api, value_.get(), info); + auto string_data = strings->first.data(); + out = WinML::Resource(string_data, [capture_strings = strings](void*) { /*This deleter does nothing but capture the strings, which extends the lifetime of the returned strings.*/ }); + } else { + out = WinML::Resource(mutable_data, [](void*) { /*do nothing, as this pointer is actually owned elsewhere in ORT! */ }); + } + } + return S_OK; +} + +HRESULT OnnxruntimeValue::IsTensor(bool* out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + + ONNXType type = ONNXType::ONNX_TYPE_UNKNOWN; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetValueType(value_.get(), &type), + ort_api); + *out = type == ONNXType::ONNX_TYPE_TENSOR; + return S_OK; +} + +HRESULT OnnxruntimeValue::IsOfTensorType(winml::TensorKind kind, bool* out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + OrtTensorTypeAndShapeInfo* info = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorTypeAndShape(value_.get(), &info), + ort_api); + auto type_and_shape_info = UniqueOrtTensorTypeAndShapeInfo(info, ort_api->ReleaseTensorTypeAndShapeInfo); + + ONNXTensorElementDataType data_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorElementType(type_and_shape_info.get(), &data_type), + ort_api); + + *out = data_type == ONNXTensorElementDataTypeFromTensorKind(kind); + return S_OK; +} + +HRESULT OnnxruntimeValue::GetTensorShape(std::vector& shape_vector) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + OrtTensorTypeAndShapeInfo* info = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorTypeAndShape(value_.get(), &info), + ort_api); + auto type_and_shape_info = UniqueOrtTensorTypeAndShapeInfo(info, ort_api->ReleaseTensorTypeAndShapeInfo); + + size_t size; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetDimensionsCount(type_and_shape_info.get(), &size), + ort_api); + + std::vector shape(size); + if (size > 0) { + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetDimensions(type_and_shape_info.get(), &shape[0], size), + ort_api); + } + + shape_vector = std::move(shape); + return S_OK; +} + +static bool EnsureMapTypeInfo(OnnxruntimeEngine* engine, OrtTypeInfo* type_info, winml::TensorKind key_kind, winml::TensorKind value_kind) { + auto ort_api = engine->GetEngineFactory()->UseOrtApi(); + + const OrtMapTypeInfo* map_info; + THROW_IF_NOT_OK_MSG(ort_api->CastTypeInfoToMapTypeInfo(type_info, &map_info), + ort_api); + + ONNXTensorElementDataType map_key_type; + THROW_IF_NOT_OK_MSG(ort_api->GetMapKeyType(map_info, &map_key_type), + ort_api); + + if (map_key_type == ONNXTensorElementDataTypeFromTensorKind(key_kind)) { + OrtTypeInfo* value_info; + THROW_IF_NOT_OK_MSG(ort_api->GetMapValueType(map_info, &value_info), + ort_api); + auto map_value_info = UniqueOrtTypeInfo(value_info, ort_api->ReleaseTypeInfo); + + const OrtTensorTypeAndShapeInfo* value_tensor_info = nullptr; + THROW_IF_NOT_OK_MSG(ort_api->CastTypeInfoToTensorInfo(map_value_info.get(), &value_tensor_info), + ort_api); + + if (value_tensor_info) { + ONNXTensorElementDataType map_value_tensor_type; + THROW_IF_NOT_OK_MSG(ort_api->GetTensorElementType(value_tensor_info, &map_value_tensor_type), + ort_api); + + if (map_value_tensor_type == ONNXTensorElementDataTypeFromTensorKind(value_kind)) { + size_t num_dims; + THROW_IF_NOT_OK_MSG(ort_api->GetDimensionsCount(value_tensor_info, &num_dims), + ort_api); + + return num_dims == 0; + } + } + } + return false; +} + +HRESULT OnnxruntimeValue::IsOfMapType(winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + + OrtTypeInfo* info = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTypeInfo(value_.get(), &info), + ort_api); + auto unique_type_info = UniqueOrtTypeInfo(info, ort_api->ReleaseTypeInfo); + + ONNXType type; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetOnnxTypeFromTypeInfo(unique_type_info.get(), &type), + ort_api); + + if (type == ONNXType::ONNX_TYPE_MAP) { + *out = EnsureMapTypeInfo(engine_.Get(), unique_type_info.get(), key_kind, value_kind); + } + + *out = false; + + return S_OK; +} + +HRESULT OnnxruntimeValue::IsOfVectorMapType(winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) { + auto ort_api = engine_->GetEngineFactory()->UseOrtApi(); + + OrtTypeInfo* info = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTypeInfo(value_.get(), &info), + ort_api); + auto unique_type_info = UniqueOrtTypeInfo(info, ort_api->ReleaseTypeInfo); + + ONNXType type; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetOnnxTypeFromTypeInfo(unique_type_info.get(), &type), + ort_api); + + if (type == ONNXType::ONNX_TYPE_SEQUENCE) { + const OrtSequenceTypeInfo* sequence_info; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CastTypeInfoToSequenceTypeInfo(unique_type_info.get(), &sequence_info), + ort_api); + + OrtTypeInfo* element_info; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetSequenceElementType(sequence_info, &element_info), + ort_api); + auto unique_element_info = UniqueOrtTypeInfo(element_info, ort_api->ReleaseTypeInfo); + + *out = EnsureMapTypeInfo(engine_.Get(), unique_element_info.get(), key_kind, value_kind); + } + return S_OK; +} + +HRESULT OnnxruntimeValue::SetParameter(IUnknown* param) { + param_ = param; + return S_OK; +} + +OrtValue* OnnxruntimeValue::UseOrtValue() { + return value_.get(); +} + +HRESULT OnnxruntimeValue::AssignOrtValue(OrtValue* in) { + value_.reset(in); + return S_OK; +} + +OnnxruntimeEngine::OnnxruntimeEngine() : session_(nullptr, nullptr) { +} + +HRESULT OnnxruntimeEngine::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, + UniqueOrtSession&& session, + IOrtSessionBuilder* session_builder) { + engine_factory_ = engine_factory; + session_ = std::move(session); + session_builder_ = session_builder; + return S_OK; +} + +HRESULT OnnxruntimeEngine::LoadModel(_In_ IModel* model) { + Microsoft::WRL::ComPtr onnxruntime_model; + RETURN_IF_FAILED(model->QueryInterface(IID_PPV_ARGS(&onnxruntime_model))); + + OrtModel* ort_model; + RETURN_IF_FAILED(onnxruntime_model->DetachOrtModel(&ort_model)); + + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionLoadAndPurloinModel(session_.get(), ort_model), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::Initialize() { + RETURN_IF_FAILED(session_builder_->Initialize(session_.get())); + return S_OK; +} + +HRESULT OnnxruntimeEngine::RegisterGraphTransformers() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionRegisterGraphTransformers(session_.get()), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::RegisterCustomRegistry(IMLOperatorRegistry* registry) { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionRegisterCustomRegistry(session_.get(), registry), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::EndProfiling() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionEndProfiling(session_.get()), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::StartProfiling() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtEnv* ort_env; + engine_factory_->GetOrtEnvironment(&ort_env); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionStartProfiling(ort_env, session_.get()), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::FlushContext() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlExecutionProviderFlushContext(ort_provider), + engine_factory_->UseOrtApi()); + return S_OK; +} + +HRESULT OnnxruntimeEngine::TrimUploadHeap() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlExecutionProviderTrimUploadHeap(ort_provider), + engine_factory_->UseOrtApi()); + + return S_OK; +} + +HRESULT OnnxruntimeEngine::ReleaseCompletedReferences() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlExecutionProviderReleaseCompletedReferences(ort_provider), + engine_factory_->UseOrtApi()); + + return S_OK; +} + +HRESULT OnnxruntimeEngine::CopyValueAcrossDevices(IValue* src, IValue* dest) { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + auto src_value = static_cast(src); + auto dest_value = static_cast(dest); + + bool is_empty; + auto has_null_source = (SUCCEEDED(src_value->IsEmpty(&is_empty)) && is_empty); + RETURN_HR_IF(E_FAIL, has_null_source); + + auto has_null_dest = (SUCCEEDED(dest_value->IsEmpty(&is_empty)) && is_empty); + RETURN_HR_IF(E_FAIL, has_null_dest); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlCopyTensor(ort_provider, src_value->UseOrtValue(), dest_value->UseOrtValue()), + engine_factory_->UseOrtApi()); + + return S_OK; +} + +HRESULT OnnxruntimeEngine::Sync() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ExecutionProviderSync(ort_provider), + engine_factory_->UseOrtApi()); + + return S_OK; +} + +OrtSession* OnnxruntimeEngine::UseOrtSession() { + return session_.get(); +} + +const OrtApi* OnnxruntimeEngine::UseOrtApi() { + return engine_factory_->UseOrtApi(); +} + +OnnxruntimeEngineFactory* OnnxruntimeEngine::GetEngineFactory() { + return engine_factory_.Get(); +} + +HRESULT OnnxruntimeEngine::CreateTensorValueFromDefaultAllocator(const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + + OrtAllocator* ort_allocator; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetAllocatorWithDefaultOptions(&ort_allocator), ort_api); // This should not be freed as this owned by ort + + OrtValue* ort_value; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateTensorAsOrtValue(ort_allocator, shape, count, ONNXTensorElementDataTypeFromTensorKind(kind), &ort_value), + ort_api); + auto unique_value = UniqueOrtValue(ort_value, ort_api->ReleaseValue); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(unique_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; +} + +/* +* OnnxruntimeEngine::CreateTensorValue +* +* Used by callers like ImageFeatureValue to allocate a cpu or gpu OrtValue with ORT owned memory. +* In the image feature value case, tensorization creates temporary buffers, and will need to copy the value from +* its source location to the ort value. Since a copy is required, there is need to preserve the caller's memory locations. +* We simply allocate memory with ORT and copy the tensorized values into it. +*/ +HRESULT OnnxruntimeEngine::CreateTensorValue(const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + OrtAllocator* ort_allocator; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->GetProviderAllocator(ort_provider, &ort_allocator), + engine_factory_->UseOrtApi()); + + auto unique_allocator = UniqueOrtAllocator(ort_allocator, winml_adapter_api->FreeProviderAllocator); // the release here should probably not return anything + + OrtValue* ort_value; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateTensorAsOrtValue(unique_allocator.get(), shape, count, ONNXTensorElementDataTypeFromTensorKind(kind), &ort_value), + ort_api); + auto unique_value = UniqueOrtValue(ort_value, ort_api->ReleaseValue); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(unique_value), std::move(unique_allocator))); + return S_OK; +} + +using DmlAllocatorResource = std::unique_ptr; +class DmlAllocatorWrapper : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IUnknown> { + public: + DmlAllocatorWrapper() : dml_resource_(nullptr, nullptr) {} + + HRESULT RuntimeClassInitialize(DmlAllocatorResource&& dml_resource) { + dml_resource_ = std::move(dml_resource); + return S_OK; + } + + private: + DmlAllocatorResource dml_resource_; +}; + +/* +* OnnxruntimeEngine::CreateTensorValueFromExternalD3DResource +* +* Used by callers like TensorBase to allocate a gpu OrtValue based on a called owned ID3D12Resource. +* WinML cannot use ORT allocators here since they will allocate the ID3D12Resource and force a copy from the user provided value. +*/ +HRESULT OnnxruntimeEngine::CreateTensorValueFromExternalD3DResource(ID3D12Resource* d3d_resource, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtExecutionProvider* ort_provider; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetExecutionProvider(session_.get(), 0, &ort_provider), + engine_factory_->UseOrtApi()); + + OrtMemoryInfo* dml_memory = nullptr; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->GetProviderMemoryInfo(ort_provider, &dml_memory), + engine_factory_->UseOrtApi()); + + void* dml_allocator_resource; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->DmlCreateGPUAllocationFromD3DResource(d3d_resource, &dml_allocator_resource), + engine_factory_->UseOrtApi()); + + auto unique_dml_allocator_resource = + DmlAllocatorResource(dml_allocator_resource, + [](void* ptr) { + GetVersionedWinmlAdapterApi()->DmlFreeGPUAllocation(ptr); + }); + + // create the OrtValue as a tensor letting ort know that we own the data buffer + OrtValue* ort_value; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateTensorWithDataAsOrtValue( + dml_memory, + unique_dml_allocator_resource.get(), + static_cast(d3d_resource->GetDesc().Width), + shape, + count, + ONNXTensorElementDataTypeFromTensorKind(kind), + &ort_value), + ort_api); + auto unique_value = UniqueOrtValue(ort_value, ort_api->ReleaseValue); + + Microsoft::WRL::ComPtr out_value; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&out_value, this, std::move(unique_value), UniqueOrtAllocator(nullptr, nullptr))); + + // Cache the allocator on the value so it destructs appropriately when the value is dropped + Microsoft::WRL::ComPtr dml_allocator_resource_wrapper; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&dml_allocator_resource_wrapper, std::move(unique_dml_allocator_resource))); + + RETURN_IF_FAILED(out_value->SetParameter(dml_allocator_resource_wrapper.Get())); + + *out = out_value.Detach(); + + return S_OK; +} + +/* +* OnnxruntimeEngine::CreateStringTensorValueFromDataWithCopy +* +* Used by callers like TensorString to allocate a cpu OrtValue and populate the contents with use specified data. +* WinML cannot use CreateTensorWithDataAsOrtValue since externally allocated strings are not supported on the c-abi. +* The c-abi string implementation requires a copy the external buffer into its own internal std::string copy. +* In addition, strings have different APIs on the c-abi like FillStringTensor to populate the buffer, and so strings +* have a different calling pattern than other Tensor types of simple data types. +*/ +HRESULT OnnxruntimeEngine::CreateStringTensorValueFromDataWithCopy(const char* const* data, size_t num_elements, const int64_t* shape, size_t count, _Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + + RETURN_IF_FAILED(CreateTensorValueFromDefaultAllocator(shape, count, winml::TensorKind::String, out)); + + auto ort_value = reinterpret_cast(*out)->UseOrtValue(); + RETURN_HR_IF_NOT_OK_MSG(ort_api->FillStringTensor(ort_value, reinterpret_cast(data), num_elements), + ort_api); + return S_OK; +} + +/* +* OnnxruntimeEngine::CreateTensorValueFromExternalBuffer +* +* Used by callers like TensorBase to allocate a cpu OrtValue that is backed by caller owned memory. +*/ +HRESULT OnnxruntimeEngine::CreateTensorValueFromExternalBuffer(void* data, size_t size_in_bytes, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + + if (kind == winml::TensorKind::String) { + // String buffers cannot be passed into the ort api directly because ort c-api tensor strings cannot be backed by external memory + return E_NOTIMPL; + } + + // TODO: what is the difference between the device allocator and the arena allocator? + OrtMemoryInfo* cpu_memory; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &cpu_memory), + ort_api); + + OrtValue* ort_value; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateTensorWithDataAsOrtValue( + cpu_memory, + data, + size_in_bytes, + shape, + count, + ONNXTensorElementDataTypeFromTensorKind(kind), + &ort_value), + ort_api); + auto unique_value = UniqueOrtValue(ort_value, ort_api->ReleaseValue); + + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(unique_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; +} + +/* +* OnnxruntimeEngine::CreateNullValue +* +* Used by callers like TensorBase and the binding object to allocate a cpu OrtValue that is empty. +* This is used for WinML unbound outputs. +*/ +HRESULT OnnxruntimeEngine::CreateNullValue(_Out_ IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + auto unique_value = UniqueOrtValue(nullptr, ort_api->ReleaseValue); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(unique_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; +} + +template +struct AbiTypeInfo { + using CppWinRTType = TAbiType; + using OrtType = TAbiType; + using ResourceType = TAbiType; +}; + +template <> +struct AbiTypeInfo { + using CppWinRTType = winrt::hstring; + using OrtType = const char*; + using ResourceType = std::string_view; +}; + +template +typename auto CppwinrtTypeToOrtType(TCppwinrtType raw) { + return raw; +} + +template <> +typename auto CppwinrtTypeToOrtType(winrt::hstring raw) { + return WinML::Strings::UTF8FromHString(raw); +} + +template +typename auto ResourceTypeToCppwinrtType(typename AbiTypeInfo::ResourceType value) { + return value; +} + +template <> +typename auto ResourceTypeToCppwinrtType(typename AbiTypeInfo::ResourceType value) { + return WinML::Strings::HStringFromUTF8(value.data(), value.size()); +} + +template +auto CastToWinrtMap(IInspectable* map_insp) { + using cppwinrt_key_type = typename AbiTypeInfo::CppWinRTType; + using cppwinrt_value_type = typename AbiTypeInfo::CppWinRTType; + + ::winrt::Windows::Foundation::IInspectable map_inspectable; + ::winrt::Windows::Foundation::Collections::IMap map; + winrt::copy_from_abi(map_inspectable, map_insp); + map_inspectable.as(map); + return map; +} + +template +auto CastToWinrtSequenceOfMaps(IInspectable* sequence_insp) { + using cppwinrt_key_type = typename AbiTypeInfo::CppWinRTType; + using cppwinrt_value_type = typename AbiTypeInfo::CppWinRTType; + + using cppwinrt_element_map_type = ::winrt::Windows::Foundation::Collections::IMap; + using cppwinrt_sequence_type = ::winrt::Windows::Foundation::Collections::IVector; + cppwinrt_sequence_type sequence; + ::winrt::Windows::Foundation::IInspectable sequence_inspectable; + winrt::copy_from_abi(sequence_inspectable, sequence_insp); + sequence_inspectable.as(sequence); + return sequence; +} + +template +struct FillMapTensors { + static HRESULT Run(const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value) { + AbiTypeInfo::OrtType* keys_mutable_data; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast(&keys_mutable_data)), + ort_api); + + AbiTypeInfo::OrtType* values_mutable_data; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast(&values_mutable_data)), + ort_api); + + auto map = CastToWinrtMap(map_insp); + size_t index = 0; + for (const auto& pair : map) { + keys_mutable_data[index] = CppwinrtTypeToOrtType(pair.Key()); + values_mutable_data[index] = CppwinrtTypeToOrtType(pair.Value()); + index++; + } + return S_OK; + } +}; + +template +struct FillMapTensors { + static HRESULT Run(const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value) { + AbiTypeInfo::OrtType* values_mutable_data; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast(&values_mutable_data)), + ort_api); + + auto map = CastToWinrtMap(map_insp); + size_t index = 0; + std::vector keys; + for (const auto& pair : map) { + keys.push_back(CppwinrtTypeToOrtType(pair.Key())); + values_mutable_data[index] = CppwinrtTypeToOrtType(pair.Value()); + index++; + } + + std::vector raw_values; + std::transform( + keys.begin(), + keys.end(), + std::back_inserter(raw_values), + [&](auto& str) { return str.c_str(); }); + + RETURN_HR_IF_NOT_OK_MSG(ort_api->FillStringTensor(keys_ort_value, raw_values.data(), raw_values.size()), + ort_api); + + return S_OK; + } +}; + +template +struct FillMapTensors { + static HRESULT Run(const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value) { + AbiTypeInfo::OrtType* keys_mutable_data; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast(&keys_mutable_data)), + ort_api); + + auto map = CastToWinrtMap(map_insp); + size_t index = 0; + std::vector values; + for (const auto& pair : map) { + keys_mutable_data[index] = CppwinrtTypeToOrtType(pair.Key()); + values.push_back(CppwinrtTypeToOrtType(pair.Value())); + index++; + } + + std::vector raw_values; + std::transform( + values.begin(), + values.end(), + std::back_inserter(raw_values), + [&](auto& str) { return str.c_str(); }); + + RETURN_HR_IF_NOT_OK_MSG(ort_api->FillStringTensor(keys_ort_value, raw_values.data(), raw_values.size()), + ort_api); + return S_OK; + } +}; + +template <> +struct FillMapTensors { + static HRESULT Run(const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value) { + auto map = CastToWinrtMap(map_insp); + std::vector keys; + std::vector values; + for (const auto& pair : map) { + keys.push_back(CppwinrtTypeToOrtType(pair.Key())); + values.push_back(CppwinrtTypeToOrtType(pair.Value())); + } + + std::vector raw_keys; + std::transform( + keys.begin(), + keys.end(), + std::back_inserter(raw_keys), + [&](auto& str) { return str.c_str(); }); + + std::vector raw_values; + std::transform( + values.begin(), + values.end(), + std::back_inserter(raw_values), + [&](auto& str) { return str.c_str(); }); + + RETURN_HR_IF_NOT_OK_MSG(ort_api->FillStringTensor(keys_ort_value, raw_keys.data(), raw_keys.size()), + ort_api); + RETURN_HR_IF_NOT_OK_MSG(ort_api->FillStringTensor(values_ort_value, raw_values.data(), raw_values.size()), + ort_api); + return S_OK; + } +}; + +template +HRESULT CreateMapValue(OnnxruntimeEngine* engine, IInspectable* map_insp, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) { + auto ort_api = engine->UseOrtApi(); + auto map = CastToWinrtMap(map_insp); + std::vector shape = {static_cast(map.Size())}; + + winrt::com_ptr key_value; + RETURN_IF_FAILED(engine->CreateTensorValueFromDefaultAllocator(shape.data(), shape.size(), key_kind, key_value.put())); + auto keys_ort_value = static_cast(key_value.get())->UseOrtValue(); + + winrt::com_ptr value_value; + RETURN_IF_FAILED(engine->CreateTensorValueFromDefaultAllocator(shape.data(), shape.size(), value_kind, value_value.put())); + auto values_ort_value = static_cast(value_value.get())->UseOrtValue(); + + auto hr = FillMapTensors::Run(ort_api, map_insp, keys_ort_value, values_ort_value); + RETURN_IF_FAILED(hr); + + OrtValue* inputs[2] = {keys_ort_value, values_ort_value}; + + OrtValue* map_value; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateValue(inputs, 2, ONNXType::ONNX_TYPE_MAP, &map_value), + ort_api); + auto unique_map_ort_value = UniqueOrtValue(map_value, ort_api->ReleaseValue); + + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, engine, std::move(unique_map_ort_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; +} + +static auto GetMapValueCreator(OnnxruntimeEngine* engine, winml::TensorKind key_kind, winml::TensorKind value_kind) { + using namespace std::placeholders; + if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Int64) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::Int64, _2); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Float) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::Float, _2); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Double) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::Double, _2); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::String) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::String, _2); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Int64) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::String, winml::TensorKind::Int64, _2); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Float) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::String, winml::TensorKind::Float, _2); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Double) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::String, winml::TensorKind::Double, _2); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::String) { + return std::bind(&CreateMapValue, engine, _1, winml::TensorKind::String, winml::TensorKind::String, _2); + } + + THROW_HR(E_NOTIMPL); +} + +HRESULT OnnxruntimeEngine::CreateMapValue(IInspectable* map, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) { + return GetMapValueCreator(this, key_kind, value_kind)(map, out); +} + +template +HRESULT CreateSequenceOfMapsValue(OnnxruntimeEngine* engine, IInspectable* sequence_insp, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) { + auto ort_api = engine->UseOrtApi(); + auto sequence = CastToWinrtSequenceOfMaps(sequence_insp); + + std::vector> element_values; + for (auto element : sequence) { + winrt::com_ptr element_value; + engine->CreateMapValue(reinterpret_cast(winrt::get_abi(element)), key_kind, value_kind, element_value.put()); + element_values.push_back(element_value); + } + + std::vector element_ort_values; + std::transform(element_values.begin(), + element_values.end(), + std::back_inserter(element_ort_values), + [](auto value) { return static_cast(value.get())->UseOrtValue(); }); + + OrtValue* sequence_value; + RETURN_HR_IF_NOT_OK_MSG( + ort_api->CreateValue(element_ort_values.data(), element_ort_values.size(), + ONNXType::ONNX_TYPE_SEQUENCE, &sequence_value), + ort_api); + auto unique_sequence_ort_value = UniqueOrtValue(sequence_value, ort_api->ReleaseValue); + + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, engine, std::move(unique_sequence_ort_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; +} + +static auto GetSequenceOfMapsValueCreator(OnnxruntimeEngine* engine, winml::TensorKind key_kind, winml::TensorKind value_kind) { + using namespace std::placeholders; + if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Float) { + return std::bind(&CreateSequenceOfMapsValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::Int64, _2); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Float) { + return std::bind(&CreateSequenceOfMapsValue, engine, _1, winml::TensorKind::Int64, winml::TensorKind::Float, _2); + } + + THROW_HR(E_NOTIMPL); +} + +HRESULT OnnxruntimeEngine::CreateSequenceOfMapsValue(IInspectable* sequence, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) { + RETURN_IF_FAILED(GetSequenceOfMapsValueCreator(this, key_kind, value_kind)(sequence, out)); + return S_OK; +} + +template +static HRESULT FillAbiSequence(IInspectable* sequence_insp, std::vector<::winrt::Windows::Foundation::IInspectable>& elements) { + using cppwinrt_key_type = typename AbiTypeInfo::CppWinRTType; + using cppwinrt_value_type = typename AbiTypeInfo::CppWinRTType; + auto sequence = CastToWinrtSequenceOfMaps(sequence_insp); + for (auto element : elements) { + ::winrt::Windows::Foundation::Collections::IMap map_element; + element.as(map_element); + sequence.Append(map_element); + } + return S_OK; +} + +static auto GetAbiSequenceFiller(winml::TensorKind key_kind, winml::TensorKind value_kind) { + using namespace std::placeholders; + if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Float) { + return &FillAbiSequence; + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Float) { + return &FillAbiSequence; + } + THROW_HR(E_NOTIMPL); +} + +static winrt::Windows::Foundation::IInspectable CreateMap(winml::TensorKind key_kind, winml::TensorKind value_kind) { + winrt::Windows::Foundation::IInspectable map_insp; + if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Float) { + auto map = winrt::single_threaded_map(); + map.as(map_insp); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Float) { + auto map = winrt::single_threaded_map(); + map.as(map_insp); + } + + return map_insp; +} + +HRESULT OnnxruntimeEngine::FillSequenceOfMapsValue(IInspectable* sequence, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue* sequence_value) { + auto ort_api = engine_factory_->UseOrtApi(); + auto onnxruntime_squence_value = static_cast(sequence_value); + auto ort_sequence_value = onnxruntime_squence_value->UseOrtValue(); + + OrtAllocator* ort_allocator; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetAllocatorWithDefaultOptions(&ort_allocator), ort_api); // This should not be freed as this owned by ort + + size_t num_elements; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetValueCount(ort_sequence_value, &num_elements), ort_api); + + // get the elements + std::vector<::winrt::Windows::Foundation::IInspectable> element_map_inspectables; + for (size_t index = 0; index < num_elements; index++) { + OrtValue* elements_ort_value = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetValue(ort_sequence_value, static_cast(index), ort_allocator, &elements_ort_value), ort_api); + auto unique_element_value = UniqueOrtValue(elements_ort_value, ort_api->ReleaseValue); + + winrt::com_ptr element_value; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(element_value.put(), this, std::move(unique_element_value), UniqueOrtAllocator(nullptr, nullptr))); + + ::winrt::Windows::Foundation::IInspectable map_inspectable = CreateMap(key_kind, value_kind); + RETURN_IF_FAILED(FillFromMapValue(reinterpret_cast(winrt::get_abi(map_inspectable)), key_kind, value_kind, element_value.get())); + element_map_inspectables.push_back(map_inspectable); + } + + GetAbiSequenceFiller(key_kind, value_kind)(sequence, element_map_inspectables); + return S_OK; +} + +HRESULT OnnxruntimeEngine::CreateOneInputAcrossDevices(const char* name, IValue* src, IValue** out) { + auto ort_api = engine_factory_->UseOrtApi(); + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + auto src_value = static_cast(src); + + bool is_set; + auto is_empty = SUCCEEDED(src_value->IsEmpty(&is_set)) && is_set; + auto is_tensor = SUCCEEDED(src_value->IsTensor(&is_set)) && is_set; + + if (is_tensor && !is_empty) { + int16_t source_location; + int16_t input_required_location; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ValueGetDeviceId(src_value->UseOrtValue(), &source_location), + ort_api); + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionGetInputRequiredDeviceId(session_.get(), name, &input_required_location), + ort_api); + + if (source_location != input_required_location) { + OrtValue* dest_ort_value = nullptr; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->SessionCopyOneInputAcrossDevices(session_.get(), name, + src_value->UseOrtValue(), &dest_ort_value), + ort_api); + auto unique_dest_ort_value = UniqueOrtValue(dest_ort_value, ort_api->ReleaseValue); + + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(unique_dest_ort_value), UniqueOrtAllocator(nullptr, nullptr))); + return S_OK; + } + } + + *out = src; + (*out)->AddRef(); + return S_OK; +} + +HRESULT OnnxruntimeEngine::Run(const char** input_names, IValue** inputs, size_t num_inputs, const char** output_names, IValue** outputs, size_t num_outputs) { + auto ort_api = engine_factory_->UseOrtApi(); + + OrtRunOptions* run_options; + RETURN_HR_IF_NOT_OK_MSG(ort_api->CreateRunOptions(&run_options), + ort_api); + auto unique_run_options = UniqueOrtRunOptions(run_options, ort_api->ReleaseRunOptions); + + std::vector input_ort_values; + std::transform( + inputs, + inputs + num_inputs, + std::back_inserter(input_ort_values), + [&](auto& input) { + auto input_value = static_cast(input); + return input_value->UseOrtValue(); + }); + + std::vector output_ort_values; + std::transform( + outputs, + outputs + num_outputs, + std::back_inserter(output_ort_values), + [&](auto& output) { + auto output_value = static_cast(output); + return output_value->UseOrtValue(); + }); + + RETURN_HR_IF_NOT_OK_MSG(ort_api->Run(session_.get(), + unique_run_options.get(), + input_names, + input_ort_values.data(), + num_inputs, + output_names, + num_outputs, + output_ort_values.data()), + ort_api); + + for (size_t index = 0; index < num_outputs; index++) { + auto output_value = static_cast(outputs[index]); + if (output_value->UseOrtValue() != output_ort_values[index]) { + RETURN_IF_FAILED(output_value->AssignOrtValue(output_ort_values[index])); + } + } + + return S_OK; +} + +template +HRESULT FillAbiMap(IInspectable* map_insp, size_t num_elements, void* keys_data, void* values_data) { + auto map = CastToWinrtMap(map_insp); + + auto keys = reinterpret_cast::ResourceType*>(keys_data); + auto values = reinterpret_cast::ResourceType*>(values_data); + + for (size_t i = 0; i < num_elements; ++i) { + map.Insert( + ResourceTypeToCppwinrtType(keys[i]), + ResourceTypeToCppwinrtType(values[i])); + } + return S_OK; +} + +static auto GetAbiMapFiller(winml::TensorKind key_kind, winml::TensorKind value_kind) { + using namespace std::placeholders; + if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Int64) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Float) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::Double) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::Int64 && value_kind == winml::TensorKind::String) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Int64) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Float) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::Double) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } else if (key_kind == winml::TensorKind::String && value_kind == winml::TensorKind::String) { + return std::bind(&FillAbiMap, _1, _2, _3, _4); + } + + THROW_HR(E_NOTIMPL); +} + +HRESULT OnnxruntimeEngine::FillFromMapValue(IInspectable* map, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue* map_value) { + auto ort_api = engine_factory_->UseOrtApi(); + auto onnxruntime_map_value = static_cast(map_value); + auto ort_map_value = onnxruntime_map_value->UseOrtValue(); + + OrtAllocator* ort_allocator; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetAllocatorWithDefaultOptions(&ort_allocator), + ort_api); // This should not be freed as this owned by ort + + // get the keys + OrtValue* keys_ort_value = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetValue(ort_map_value, 0, ort_allocator, &keys_ort_value), + ort_api); + auto unique_keys_value = UniqueOrtValue(keys_ort_value, ort_api->ReleaseValue); + winrt::com_ptr keys_value; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(keys_value.put(), this, std::move(unique_keys_value), UniqueOrtAllocator(nullptr, nullptr))); + + // get the keys + OrtValue* values_ort_value = nullptr; + RETURN_HR_IF_NOT_OK_MSG(ort_api->GetValue(ort_map_value, 1, ort_allocator, &values_ort_value), + ort_api); + auto unique_values_value = UniqueOrtValue(values_ort_value, ort_api->ReleaseValue); + winrt::com_ptr values_value; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(values_value.put(), this, std::move(unique_values_value), UniqueOrtAllocator(nullptr, nullptr))); + + std::vector keys_shape; + keys_value->GetTensorShape(keys_shape); + + WinML::Resource keys_data; + RETURN_IF_FAILED(keys_value->GetResource(keys_data)); + WinML::Resource values_data; + RETURN_IF_FAILED(values_value->GetResource(values_data)); + + auto num_elements = static_cast(ShapeSize(keys_shape.data(), keys_shape.size())); + GetAbiMapFiller(key_kind, value_kind)(map, num_elements, keys_data.get(), values_data.get()); + + return S_OK; +} + +HRESULT OnnxruntimeEngineFactory::RuntimeClassInitialize() { + ort_api_ = GetVersionedOrtApi(); + winml_adapter_api_ = GetVersionedWinmlAdapterApi(); + return S_OK; +} + +HRESULT OnnxruntimeEngineFactory::EnsureEnvironment() { + if (environment_ == nullptr) { + std::lock_guard lock(mutex_); + if (environment_ == nullptr) { + environment_ = PheonixSingleton(ort_api_); + } + } + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineFactory::CreateModel(_In_ const char* model_path, _In_ size_t len, _Outptr_ IModel** out) { + RETURN_IF_FAILED(EnsureEnvironment()); + + OrtModel* ort_model = nullptr; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api_->CreateModelFromPath(model_path, len, &ort_model), + ort_api_); + + auto model = UniqueOrtModel(ort_model, winml_adapter_api_->ReleaseModel); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(model))); + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineFactory::CreateModel(_In_ void* data, _In_ size_t size, _Outptr_ IModel** out) { + RETURN_IF_FAILED(EnsureEnvironment()); + OrtModel* ort_model = nullptr; + if (auto status = winml_adapter_api_->CreateModelFromData(data, size, &ort_model)) { + return E_INVALIDARG; + } + + auto model = UniqueOrtModel(ort_model, winml_adapter_api_->ReleaseModel); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(out, this, std::move(model))); + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineFactory::CreateEngineBuilder(_Outptr_ Windows::AI::MachineLearning::IEngineBuilder** out) { + RETURN_IF_FAILED(EnsureEnvironment()); + Microsoft::WRL::ComPtr onnxruntime_engine_builder; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&onnxruntime_engine_builder, this)); + RETURN_IF_FAILED(onnxruntime_engine_builder.CopyTo(out)); + return S_OK; +} + +const OrtApi* OnnxruntimeEngineFactory::UseOrtApi() { + return ort_api_; +} + +const WinmlAdapterApi* OnnxruntimeEngineFactory::UseWinmlAdapterApi() { + return winml_adapter_api_; +} + +HRESULT OnnxruntimeEngineFactory::GetOrtEnvironment(OrtEnv** ort_env) { + RETURN_IF_FAILED(EnsureEnvironment()); + RETURN_IF_FAILED(environment_->GetOrtEnvironment(ort_env)); + return S_OK; +} + +HRESULT OnnxruntimeEngineFactory::EnableDebugOutput(bool is_enabled) { + RETURN_IF_FAILED(EnsureEnvironment()); + RETURN_IF_FAILED(environment_->EnableDebugOutput(is_enabled)); + return S_OK; +} + +HRESULT OnnxruntimeEngineFactory::CreateCustomRegistry(IMLOperatorRegistry** registry) { + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api_->CreateCustomRegistry(registry), + ort_api_); + return S_OK; +} + +STDAPI CreateOnnxruntimeEngineFactory(_Out_ Windows::AI::MachineLearning::IEngineFactory** engine_factory) { + Microsoft::WRL::ComPtr onnxruntime_engine_factory; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&onnxruntime_engine_factory)); + RETURN_IF_FAILED(onnxruntime_engine_factory.CopyTo(engine_factory)); + return S_OK; +} \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEngine.h b/winml/lib/Api.Ort/OnnxruntimeEngine.h new file mode 100644 index 0000000000000..474c3e2d04d81 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEngine.h @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "iengine.h" + +#include + +namespace Windows::AI::MachineLearning { + +class OnnxruntimeEngineBuilder; +class OnnxruntimeEngineFactory; +class OnnxruntimeEnvironment; +class OnnxruntimeModel; +class OnnxruntimeEngine; + +struct IOrtSessionBuilder; + +class OnnxruntimeValue : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IValue> { + public: + OnnxruntimeValue(); + ~OnnxruntimeValue(); + + HRESULT RuntimeClassInitialize(OnnxruntimeEngine* engine, UniqueOrtValue&& value, UniqueOrtAllocator&& allocator); + + STDMETHOD(IsEmpty) + (bool* out) override; + STDMETHOD(IsCpu) + (bool* out) override; + STDMETHOD(GetResource) + (WinML::Resource& resource) override; + STDMETHOD(IsTensor) + (bool* out) override; + STDMETHOD(IsOfTensorType) + (winml::TensorKind kind, bool* out) override; + STDMETHOD(GetTensorShape) + (std::vector& shape_vector) override; + STDMETHOD(IsOfMapType) + (winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) override; + STDMETHOD(IsOfVectorMapType) + (winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) override; + + HRESULT(SetParameter) + (IUnknown* param); + OrtValue* UseOrtValue(); + HRESULT AssignOrtValue(OrtValue* ptr); + + private: + Microsoft::WRL::ComPtr engine_; + Microsoft::WRL::ComPtr param_; + UniqueOrtValue value_; + UniqueOrtAllocator allocator_; +}; + +class OnnxruntimeEngine : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IEngine> { + public: + OnnxruntimeEngine(); + HRESULT RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, UniqueOrtSession&& session, IOrtSessionBuilder* session_builder); + + STDMETHOD(LoadModel) + (_In_ IModel* model) override; + STDMETHOD(Initialize) + () override; + STDMETHOD(RegisterGraphTransformers) + () override; + STDMETHOD(RegisterCustomRegistry) + (IMLOperatorRegistry* registry) override; + STDMETHOD(EndProfiling) + () override; + STDMETHOD(StartProfiling) + () override; + STDMETHOD(FlushContext) + () override; + STDMETHOD(TrimUploadHeap) + () override; + STDMETHOD(ReleaseCompletedReferences) + () override; + STDMETHOD(Sync) + () override; + STDMETHOD(CreateTensorValue) + (const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) override; + STDMETHOD(CreateTensorValueFromExternalD3DResource) + (ID3D12Resource* resource, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) override; + STDMETHOD(CreateTensorValueFromExternalBuffer) + (void* data, size_t size_in_bytes, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) override; + STDMETHOD(CreateStringTensorValueFromDataWithCopy) + (const char* const* data, size_t num_elements, const int64_t* shape, size_t count, _Out_ IValue** out) override; + STDMETHOD(CreateNullValue) + (_Out_ IValue** out) override; + STDMETHOD(CreateMapValue) + (IInspectable* map, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) override; + STDMETHOD(CreateSequenceOfMapsValue) + (IInspectable* map, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue** out) override; + STDMETHOD(CreateOneInputAcrossDevices) + (const char* name, IValue* src, IValue** dest) override; + STDMETHOD(CopyValueAcrossDevices) + (IValue* src, IValue* dest) override; + STDMETHOD(Run) + (const char** input_names, IValue** inputs, size_t num_inputs, const char** output_names, IValue** outputs, size_t num_outputs) override; + STDMETHOD(FillFromMapValue) + (IInspectable* map, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue* value) override; + STDMETHOD(FillSequenceOfMapsValue) + (IInspectable* sequence, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue* value) override; + + OrtSession* UseOrtSession(); + const OrtApi* UseOrtApi(); + OnnxruntimeEngineFactory* GetEngineFactory(); + HRESULT CreateTensorValueFromDefaultAllocator(const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out); + + private: + Microsoft::WRL::ComPtr engine_factory_; + Microsoft::WRL::ComPtr session_builder_; + UniqueOrtSession session_; +}; + +class OnnxruntimeEngineFactory : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IEngineFactory> { + public: + HRESULT RuntimeClassInitialize(); + STDMETHOD(CreateModel) + (_In_ const char* model_path, _In_ size_t len, _Outptr_ IModel** out) override; + STDMETHOD(CreateModel) + (_In_ void* data, _In_ size_t size, _Outptr_ IModel** out) override; + STDMETHOD(CreateEngineBuilder) + (IEngineBuilder** engine_builder) override; + STDMETHOD(EnableDebugOutput) + (bool is_enabled) override; + STDMETHOD(CreateCustomRegistry) + (_Out_ IMLOperatorRegistry** registry) override; + + const OrtApi* UseOrtApi(); + const WinmlAdapterApi* UseWinmlAdapterApi(); + HRESULT EnsureEnvironment(); + HRESULT GetOrtEnvironment(_Out_ OrtEnv** ort_env); + + private: + const OrtApi* ort_api_ = nullptr; + const WinmlAdapterApi* winml_adapter_api_ = nullptr; + std::shared_ptr environment_; + std::mutex mutex_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.cpp b/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.cpp new file mode 100644 index 0000000000000..9514de94782e1 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.cpp @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "OnnxruntimeEngine.h" +#include "OnnxruntimeEngineBuilder.h" +#include "OnnxruntimeCpuSessionBuilder.h" + +#ifdef USE_DML +#include "OnnxruntimeDmlSessionBuilder.h" +#endif + +#include "OnnxruntimeErrors.h" +using namespace WinML; + +HRESULT OnnxruntimeEngineBuilder::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory) { + engine_factory_ = engine_factory; + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineBuilder::CreateEngine(Windows::AI::MachineLearning::IEngine** out) { + auto ort_api = engine_factory_->UseOrtApi(); + + Microsoft::WRL::ComPtr onnxruntime_session_builder; + + if (device_ == nullptr) { + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&onnxruntime_session_builder, engine_factory_.Get())); + } else { +#ifdef USE_DML + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&onnxruntime_session_builder, engine_factory_.Get(), device_.Get(), queue_.Get())); +#endif + } + + OrtSessionOptions* ort_options; + RETURN_IF_FAILED(onnxruntime_session_builder->CreateSessionOptions(&ort_options)); + auto session_options = UniqueOrtSessionOptions(ort_options, ort_api->ReleaseSessionOptions); + + if (batch_size_override_.has_value()) { + constexpr const char* DATA_BATCH = "DATA_BATCH"; + RETURN_HR_IF_NOT_OK_MSG(ort_api->AddFreeDimensionOverride(session_options.get(), DATA_BATCH, batch_size_override_.value()), + ort_api); + } + + OrtSession* ort_session = nullptr; + onnxruntime_session_builder->CreateSession(session_options.get(), &ort_session); + auto session = UniqueOrtSession(ort_session, ort_api->ReleaseSession); + + Microsoft::WRL::ComPtr onnxruntime_engine; + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&onnxruntime_engine, + engine_factory_.Get(), std::move(session), onnxruntime_session_builder.Get())); + RETURN_IF_FAILED(onnxruntime_engine.CopyTo(out)); + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineBuilder::GetD3D12Device(ID3D12Device** device) { + *device = device_.Get(); + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineBuilder::SetD3D12Resources(ID3D12Device* device, ID3D12CommandQueue* queue) { + device_ = device; + queue_ = queue; + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineBuilder::GetID3D12CommandQueue(ID3D12CommandQueue** queue) { + *queue = queue_.Get(); + return S_OK; +} + +STDMETHODIMP OnnxruntimeEngineBuilder::SetBatchSizeOverride(uint32_t batch_size_override) { + batch_size_override_ = batch_size_override; + return S_OK; +} \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.h b/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.h new file mode 100644 index 0000000000000..6ac5f478021e0 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEngineBuilder.h @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "iengine.h" + +namespace Windows::AI::MachineLearning { + +class OnnxruntimeEngineBuilder : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IEngineBuilder> { + public: + HRESULT RuntimeClassInitialize(_In_ OnnxruntimeEngineFactory* engine); + + STDMETHOD(SetD3D12Resources) + (ID3D12Device* device, ID3D12CommandQueue* queue); + + STDMETHOD(GetD3D12Device) + (_Outptr_ ID3D12Device** device); + + STDMETHOD(GetID3D12CommandQueue) + (_Outptr_ ID3D12CommandQueue** queue); + + STDMETHOD(SetBatchSizeOverride) + (uint32_t batch_size_override); + + STDMETHOD(CreateEngine) + (_Outptr_ IEngine** out); + + private: + Microsoft::WRL::ComPtr engine_factory_; + Microsoft::WRL::ComPtr device_ = nullptr; + Microsoft::WRL::ComPtr queue_ = nullptr; + std::optional batch_size_override_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEnvironment.cpp b/winml/lib/Api.Ort/OnnxruntimeEnvironment.cpp new file mode 100644 index 0000000000000..f62d077de711e --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEnvironment.cpp @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "OnnxruntimeEnvironment.h" +#include "OnnxruntimeErrors.h" +#include "core/platform/windows/TraceLoggingConfig.h" +#include + +using namespace Windows::AI ::MachineLearning; + +static bool debug_output_ = false; + +static void __stdcall WinmlOrtLoggingCallback(void* param, OrtLoggingLevel severity, const char* category, + const char* logger_id, const char* code_location, const char* message) noexcept { + UNREFERENCED_PARAMETER(param); + UNREFERENCED_PARAMETER(logger_id); + // ORT Fatal and Error Messages are logged as Telemetry, rest are non-telemetry. + switch (severity) { + case OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL: //Telemetry + TraceLoggingWrite( + winml_trace_logging_provider, + "WinMLLogSink", + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_CRITICAL), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(category), + TraceLoggingUInt32((UINT32)severity), + TraceLoggingString(message), + TraceLoggingString(code_location), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); + break; + case OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR: //Telemetry + TraceLoggingWrite( + winml_trace_logging_provider, + "WinMLLogSink", + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_ERROR), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(category), + TraceLoggingUInt32((UINT32)severity), + TraceLoggingString(message), + TraceLoggingString(code_location), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); + break; + case OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING: + TraceLoggingWrite( + winml_trace_logging_provider, + "WinMLLogSink", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_WARNING), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(category), + TraceLoggingUInt32((UINT32)severity), + TraceLoggingString(message), + TraceLoggingString(code_location)); + break; + case OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO: + TraceLoggingWrite( + winml_trace_logging_provider, + "WinMLLogSink", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(category), + TraceLoggingUInt32((UINT32)severity), + TraceLoggingString(message), + TraceLoggingString(code_location)); + break; + case OrtLoggingLevel::ORT_LOGGING_LEVEL_VERBOSE: + __fallthrough; //Default is Verbose too. + default: + TraceLoggingWrite( + winml_trace_logging_provider, + "WinMLLogSink", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(category), + TraceLoggingUInt32((UINT32)severity), + TraceLoggingString(message), + TraceLoggingString(code_location)); + } + + if (debug_output_) { + OutputDebugStringA((std::string(message) + "\r\n").c_str()); + } +} + +static void __stdcall WinmlOrtProfileEventCallback(const OrtProfilerEventRecord* profiler_record) noexcept { + if (profiler_record->category_ == OrtProfilerEventCategory::NODE_EVENT) { + TraceLoggingWrite( + winml_trace_logging_provider, + "OnnxRuntimeProfiling", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_LOTUS_PROFILING), + TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(profiler_record->category_name_, "Category"), + TraceLoggingInt64(profiler_record->duration_, "Duration (us)"), + TraceLoggingInt64(profiler_record->time_span_, "Time Stamp (us)"), + TraceLoggingString(profiler_record->event_name_, "Event Name"), + TraceLoggingInt32(profiler_record->process_id_, "Process ID"), + TraceLoggingInt32(profiler_record->thread_id_, "Thread ID"), + TraceLoggingString(profiler_record->op_name_, "Operator Name"), + TraceLoggingString(profiler_record->execution_provider_, "Execution Provider")); + } else { + TraceLoggingWrite( + winml_trace_logging_provider, + "OnnxRuntimeProfiling", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_LOTUS_PROFILING), + TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), + TraceLoggingOpcode(EVENT_TRACE_TYPE_INFO), + TraceLoggingString(profiler_record->category_name_, "Category"), + TraceLoggingInt64(profiler_record->duration_, "Duration (us)"), + TraceLoggingInt64(profiler_record->time_span_, "Time Stamp (us)"), + TraceLoggingString(profiler_record->event_name_, "Event Name"), + TraceLoggingInt32(profiler_record->process_id_, "Process ID"), + TraceLoggingInt32(profiler_record->thread_id_, "Thread ID")); + } +} + +OnnxruntimeEnvironment::OnnxruntimeEnvironment(const OrtApi* ort_api) : ort_env_(nullptr, nullptr) { + OrtEnv* ort_env = nullptr; + THROW_IF_NOT_OK_MSG(ort_api->CreateEnv(OrtLoggingLevel::ORT_LOGGING_LEVEL_VERBOSE, "Default", &ort_env), + ort_api); + ort_env_ = UniqueOrtEnv(ort_env, ort_api->ReleaseEnv); + + // Configure the environment with the winml logger + auto winml_adapter_api = OrtGetWinMLAdapter(ort_api); + THROW_IF_NOT_OK_MSG(winml_adapter_api->EnvConfigureCustomLoggerAndProfiler(ort_env_.get(), + &WinmlOrtLoggingCallback, &WinmlOrtProfileEventCallback, nullptr, + OrtLoggingLevel::ORT_LOGGING_LEVEL_VERBOSE, "Default", &ort_env), + ort_api); + + THROW_IF_NOT_OK_MSG(winml_adapter_api->OverrideSchema(), ort_api); +} + +HRESULT OnnxruntimeEnvironment::GetOrtEnvironment(_Out_ OrtEnv** ort_env) { + *ort_env = ort_env_.get(); + return S_OK; +} + +HRESULT OnnxruntimeEnvironment::EnableDebugOutput(bool is_enabled) { + debug_output_ = is_enabled; + return S_OK; +} \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeEnvironment.h b/winml/lib/Api.Ort/OnnxruntimeEnvironment.h new file mode 100644 index 0000000000000..2d81579ce2ad5 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeEnvironment.h @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#pragma warning(push) +#pragma warning(disable : 4505) + +namespace Windows::AI ::MachineLearning { + +class OnnxruntimeEnvironment { + public: + OnnxruntimeEnvironment(const OrtApi* ort_api); + + HRESULT GetOrtEnvironment(_Out_ OrtEnv** ert_env); + HRESULT EnableDebugOutput(bool is_enabled); + + private: + UniqueOrtEnv ort_env_; +}; + +} // namespace Windows::AI::MachineLearning + +#pragma warning(pop) \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeErrors.h b/winml/lib/Api.Ort/OnnxruntimeErrors.h new file mode 100644 index 0000000000000..4184613ac806b --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeErrors.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "pch.h" +#include "core/providers/winml/winml_provider_factory.h" + +#ifdef _WIN32 +inline HRESULT OrtErrorCodeToHRESULT(OrtErrorCode status) noexcept { + switch (status) { + case OrtErrorCode::ORT_OK: + return S_OK; + case OrtErrorCode::ORT_FAIL: + return E_FAIL; + case OrtErrorCode::ORT_INVALID_ARGUMENT: + return E_INVALIDARG; + case OrtErrorCode::ORT_NO_SUCHFILE: + return __HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + case OrtErrorCode::ORT_NO_MODEL: + return __HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + case OrtErrorCode::ORT_ENGINE_ERROR: + return E_FAIL; + case OrtErrorCode::ORT_RUNTIME_EXCEPTION: + return E_FAIL; + case OrtErrorCode::ORT_INVALID_PROTOBUF: + return __HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + case OrtErrorCode::ORT_MODEL_LOADED: + return __HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + case OrtErrorCode::ORT_NOT_IMPLEMENTED: + return E_NOTIMPL; + case OrtErrorCode::ORT_INVALID_GRAPH: + return __HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + case OrtErrorCode::ORT_EP_FAIL: + return __HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + default: + return E_FAIL; + } +} +#endif + +#define RETURN_HR_IF_NOT_OK_MSG(status, ort_api) \ + do { \ + auto _status = status; \ + if (_status) { \ + auto error_code = ort_api->GetErrorCode(_status); \ + auto error_message = ort_api->GetErrorMessage(_status); \ + HRESULT hresult = OrtErrorCodeToHRESULT(error_code); \ + telemetry_helper.LogRuntimeError(hresult, std::string(error_message), __FILE__, __FUNCTION__, __LINE__); \ + RETURN_HR_MSG(hresult, \ + error_message); \ + } \ + } while (0) + +#define THROW_IF_NOT_OK_MSG(status, ort_api) \ + do { \ + auto _status = status; \ + if (_status) { \ + auto error_code = ort_api->GetErrorCode(_status); \ + auto error_message = ort_api->GetErrorMessage(_status); \ + HRESULT hresult = OrtErrorCodeToHRESULT(error_code); \ + telemetry_helper.LogRuntimeError(hresult, std::string(error_message), __FILE__, __FUNCTION__, __LINE__); \ + winrt::hstring errorMessage(WinML::Strings::HStringFromUTF8(error_message)); \ + throw winrt::hresult_error(hresult, errorMessage); \ + } \ + } while (0) diff --git a/winml/lib/Api.Ort/OnnxruntimeModel.cpp b/winml/lib/Api.Ort/OnnxruntimeModel.cpp new file mode 100644 index 0000000000000..562bf505d86f3 --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeModel.cpp @@ -0,0 +1,221 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "OnnxruntimeModel.h" +#include "core/platform/windows/TraceLoggingConfig.h" +#include + +#include "OnnxruntimeDescriptorConverter.h" +#include "OnnxruntimeEngine.h" +#include "OnnxruntimeErrors.h" + +using namespace Windows::AI::MachineLearning; + +struct winml_adapter_api_model_feature_helper { + decltype(WinmlAdapterApi::ModelGetInputCount) GetCount; + decltype(WinmlAdapterApi::ModelGetInputName) GetName; + decltype(WinmlAdapterApi::ModelGetInputDescription) GetDescription; + decltype(WinmlAdapterApi::ModelGetInputTypeInfo) GetTypeInfo; +}; + +HRESULT CreateFeatureDescriptors( + OnnxruntimeEngineFactory* engine_factory, + const winml_adapter_api_model_feature_helper* feature_helpers, + OrtModel* ort_model, + std::vector& descriptors) { + const auto ort_api = engine_factory->UseOrtApi(); + size_t count; + RETURN_HR_IF_NOT_OK_MSG(feature_helpers->GetCount(ort_model, &count), + engine_factory->UseOrtApi()); + + for (size_t i = 0; i < count; i++) { + OnnxruntimeValueInfoWrapper descriptor; + RETURN_HR_IF_NOT_OK_MSG(feature_helpers->GetName(ort_model, i, &descriptor.name_, &descriptor.name_length_), + engine_factory->UseOrtApi()); + RETURN_HR_IF_NOT_OK_MSG(feature_helpers->GetDescription(ort_model, i, &descriptor.description_, &descriptor.description_length_), + engine_factory->UseOrtApi()); + + OrtTypeInfo* type_info; + RETURN_HR_IF_NOT_OK_MSG(feature_helpers->GetTypeInfo(ort_model, i, &type_info), + engine_factory->UseOrtApi()); + + descriptor.type_info_ = UniqueOrtTypeInfo(type_info, ort_api->ReleaseTypeInfo); + + descriptors.push_back(std::move(descriptor)); + } + return S_OK; +} + +HRESULT ModelInfo::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, OrtModel* ort_model) { + RETURN_HR_IF_NULL(E_INVALIDARG, ort_model); + + const auto winml_adapter_api = engine_factory->UseWinmlAdapterApi(); + + // Get Metadata + size_t count; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetMetadataCount(ort_model, &count), + engine_factory->UseOrtApi()); + + const char* metadata_key; + size_t metadata_key_len; + const char* metadata_value; + size_t metadata_value_len; + for (size_t i = 0; i < count; i++) { + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetMetadata(ort_model, i, &metadata_key, &metadata_key_len, &metadata_value, &metadata_value_len), + engine_factory->UseOrtApi()); + + model_metadata_.insert_or_assign( + std::string(metadata_key, metadata_key_len), + std::string(metadata_value, metadata_value_len)); + } + + WinML::OnnxruntimeDescriptorConverter converter(engine_factory, model_metadata_); + + static const winml_adapter_api_model_feature_helper input_helpers = { + winml_adapter_api->ModelGetInputCount, + winml_adapter_api->ModelGetInputName, + winml_adapter_api->ModelGetInputDescription, + winml_adapter_api->ModelGetInputTypeInfo}; + + // Create inputs + std::vector inputs; + RETURN_IF_FAILED(CreateFeatureDescriptors(engine_factory, &input_helpers, ort_model, inputs)); + input_features_ = converter.ConvertToLearningModelDescriptors(inputs); + + // Create outputs + static const winml_adapter_api_model_feature_helper output_helpers = { + winml_adapter_api->ModelGetOutputCount, + winml_adapter_api->ModelGetOutputName, + winml_adapter_api->ModelGetOutputDescription, + winml_adapter_api->ModelGetOutputTypeInfo}; + + std::vector outputs; + RETURN_IF_FAILED(CreateFeatureDescriptors(engine_factory, &output_helpers, ort_model, outputs)); + output_features_ = converter.ConvertToLearningModelDescriptors(outputs); + + const char* out; + size_t len; + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetAuthor(ort_model, &out, &len), + engine_factory->UseOrtApi()); + author_ = std::string(out, len); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetName(ort_model, &out, &len), + engine_factory->UseOrtApi()); + name_ = std::string(out, len); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetDomain(ort_model, &out, &len), + engine_factory->UseOrtApi()); + domain_ = std::string(out, len); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetDescription(ort_model, &out, &len), + engine_factory->UseOrtApi()); + description_ = std::string(out, len); + + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->ModelGetVersion(ort_model, &version_), + engine_factory->UseOrtApi()); + + return S_OK; +} + +STDMETHODIMP ModelInfo::GetAuthor(const char** out, size_t* len) { + *out = author_.c_str(); + *len = author_.size(); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetName(const char** out, size_t* len) { + *out = name_.c_str(); + *len = name_.size(); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetDomain(const char** out, size_t* len) { + *out = domain_.c_str(); + *len = domain_.size(); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetDescription(const char** out, size_t* len) { + *out = description_.c_str(); + *len = description_.size(); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetVersion(int64_t* out) { + *out = version_; + return S_OK; +} + +STDMETHODIMP ModelInfo::GetModelMetadata(ABI::Windows::Foundation::Collections::IMapView** metadata) { + std::unordered_map map_copy; + for (auto& pair : model_metadata_) { + auto metadata_key = WinML::Strings::HStringFromUTF8(pair.first); + auto metadata_value = WinML::Strings::HStringFromUTF8(pair.second); + map_copy.emplace(std::move(metadata_key), std::move(metadata_value)); + } + auto map = winrt::single_threaded_map(std::move(map_copy)); + winrt::copy_to_abi(map, *(void**)metadata); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetInputFeatures(ABI::Windows::Foundation::Collections::IVectorView** features) { + *features = nullptr; + winrt::copy_to_abi(input_features_.GetView(), *(void**)features); + return S_OK; +} + +STDMETHODIMP ModelInfo::GetOutputFeatures(ABI::Windows::Foundation::Collections::IVectorView** features) { + *features = nullptr; + winrt::copy_to_abi(output_features_.GetView(), *(void**)features); + return S_OK; +} + +OnnruntimeModel::OnnruntimeModel() : ort_model_(nullptr, nullptr) { +} + +HRESULT OnnruntimeModel::RuntimeClassInitialize(OnnxruntimeEngineFactory* engine_factory, UniqueOrtModel&& ort_model) { + RETURN_HR_IF_NULL(E_INVALIDARG, ort_model); + + engine_factory_ = engine_factory; + ort_model_ = std::move(ort_model); + + return S_OK; +} + +STDMETHODIMP OnnruntimeModel::GetModelInfo(IModelInfo** info) { + if (info_ == nullptr) { + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(&info_, engine_factory_.Get(), ort_model_.get())); + } + + info_.CopyTo(info); + + return S_OK; +} + +STDMETHODIMP OnnruntimeModel::ModelEnsureNoFloat16() { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + if (auto status = winml_adapter_api->ModelEnsureNoFloat16(ort_model_.get())) { + return DXGI_ERROR_UNSUPPORTED; + } + return S_OK; +} + +STDMETHODIMP OnnruntimeModel::CloneModel(IModel** copy) { + auto winml_adapter_api = engine_factory_->UseWinmlAdapterApi(); + + OrtModel* ort_model_copy; + RETURN_HR_IF_NOT_OK_MSG(winml_adapter_api->CloneModel(ort_model_.get(), &ort_model_copy), + engine_factory_->UseOrtApi()); + + auto model = UniqueOrtModel(ort_model_copy, winml_adapter_api->ReleaseModel); + RETURN_IF_FAILED(Microsoft::WRL::MakeAndInitialize(copy, engine_factory_.Get(), std::move(model))); + + return S_OK; +} + +STDMETHODIMP OnnruntimeModel::DetachOrtModel(OrtModel** model) { + *model = ort_model_.release(); + return S_OK; +} diff --git a/winml/lib/Api.Ort/OnnxruntimeModel.h b/winml/lib/Api.Ort/OnnxruntimeModel.h new file mode 100644 index 0000000000000..47325780221aa --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeModel.h @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "iengine.h" + +namespace Windows::AI::MachineLearning { + +class OnnxruntimeEngineFactory; + +// The IOrtSessionBuilder offers an abstraction over the creation of +// InferenceSession, that enables the creation of the session based on a device (CPU/DML). +MIDL_INTERFACE("92679cbf-7a9d-48bb-b97f-ef9fb447ce8e") +IOnnxruntimeModel : IUnknown { + virtual HRESULT STDMETHODCALLTYPE DetachOrtModel(OrtModel * *model) PURE; +}; + +class ModelInfo : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IModelInfo> { + public: + HRESULT RuntimeClassInitialize(_In_ OnnxruntimeEngineFactory* engine, _In_ OrtModel* ort_model); + + STDMETHOD(GetAuthor) + (const char** out, size_t* len); + STDMETHOD(GetName) + (const char** out, size_t* len); + STDMETHOD(GetDomain) + (const char** out, size_t* len); + STDMETHOD(GetDescription) + (const char** out, size_t* len); + STDMETHOD(GetVersion) + (int64_t* out); + STDMETHOD(GetModelMetadata) + (ABI::Windows::Foundation::Collections::IMapView** metadata); + STDMETHOD(GetInputFeatures) + (ABI::Windows::Foundation::Collections::IVectorView** features); + STDMETHOD(GetOutputFeatures) + (ABI::Windows::Foundation::Collections::IVectorView** features); + + private: + std::string author_; + std::string name_; + std::string domain_; + std::string description_; + int64_t version_; + std::unordered_map model_metadata_; + wfc::IVector input_features_; + wfc::IVector output_features_; +}; + +class OnnruntimeModel : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, + IModel, + IOnnxruntimeModel> { + public: + OnnruntimeModel(); + + HRESULT RuntimeClassInitialize(OnnxruntimeEngineFactory* engine, UniqueOrtModel&& ort_model); + + STDMETHOD(GetModelInfo) + (IModelInfo** info); + STDMETHOD(ModelEnsureNoFloat16) + (); + STDMETHOD(CloneModel) + (IModel** copy); + STDMETHOD(DetachOrtModel) + (OrtModel** model); + + private: + UniqueOrtModel ort_model_; + + Microsoft::WRL::ComPtr engine_factory_; + Microsoft::WRL::ComPtr info_; + + std::optional> metadata_cache_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/OnnxruntimeSessionBuilder.h b/winml/lib/Api.Ort/OnnxruntimeSessionBuilder.h new file mode 100644 index 0000000000000..9924e96cc345e --- /dev/null +++ b/winml/lib/Api.Ort/OnnxruntimeSessionBuilder.h @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning { + +// The IOrtSessionBuilder offers an abstraction over the creation of +// InferenceSession, that enables the creation of the session based on a device (CPU/DML). +MIDL_INTERFACE("2746f03a-7e08-4564-b5d0-c670fef116ee") +IOrtSessionBuilder : IUnknown { + virtual HRESULT STDMETHODCALLTYPE CreateSessionOptions( + OrtSessionOptions * *options) = 0; + + virtual HRESULT STDMETHODCALLTYPE CreateSession( + OrtSessionOptions * options, + OrtSession * *session) = 0; + + virtual HRESULT STDMETHODCALLTYPE Initialize( + OrtSession * session) = 0; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api.Ort/inc/OnnxruntimeProvider.h b/winml/lib/Api.Ort/inc/OnnxruntimeProvider.h new file mode 100644 index 0000000000000..d7e3cbfa2f6fc --- /dev/null +++ b/winml/lib/Api.Ort/inc/OnnxruntimeProvider.h @@ -0,0 +1,8 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "iengine.h" + +STDAPI CreateOnnxruntimeEngineFactory(_Out_ Windows::AI::MachineLearning::IEngineFactory** engine_factory); \ No newline at end of file diff --git a/winml/lib/Api.Ort/pch.h b/winml/lib/Api.Ort/pch.h new file mode 100644 index 0000000000000..d44047376b44d --- /dev/null +++ b/winml/lib/Api.Ort/pch.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "winrt_headers.h" + +#include "core/providers/winml/winml_provider_factory.h" +#include "adapter/winml_adapter_c_api.h" + +using UniqueOrtModel = std::unique_ptr; +using UniqueOrtAllocator = std::unique_ptr; +using UniqueOrtSessionOptions = std::unique_ptr; +using UniqueOrtSession = std::unique_ptr; +using UniqueOrtValue = std::unique_ptr; +using UniqueOrtMemoryInfo = std::unique_ptr; +using UniqueOrtTypeInfo = std::unique_ptr; +using UniqueOrtTensorTypeAndShapeInfo = std::unique_ptr; +using UniqueOrtRunOptions = std::unique_ptr; +using UniqueOrtEnv = std::unique_ptr; diff --git a/winml/lib/Api/FeatureValues.h b/winml/lib/Api/FeatureValues.h new file mode 100644 index 0000000000000..b2c793377070b --- /dev/null +++ b/winml/lib/Api/FeatureValues.h @@ -0,0 +1,334 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +/* + Implementation of Feature Values + All data types in onnxruntime\core\framework\data_types.cc should be implemented here +*/ + +#include "TensorBoolean.g.h" +#include "TensorFloat.g.h" +#include "TensorDouble.g.h" +#include "TensorInt8Bit.g.h" +#include "TensorUInt8Bit.g.h" +#include "TensorUInt16Bit.g.h" +#include "TensorInt16Bit.g.h" +#include "TensorUInt32Bit.g.h" +#include "TensorInt32Bit.g.h" +#include "TensorUInt64Bit.g.h" +#include "TensorInt64Bit.g.h" +#include "TensorFloat16Bit.g.h" +#include "TensorString.g.h" + +#include "impl/MapBase.h" +#include "impl/SequenceBase.h" +#include "impl/TensorBase.h" + +#include "ImageFeatureValue.h" + +#define FREE_DIMENSION -1 + +// CREATE_TENSOR is used by data tensor types to implement common functionality +#define CREATE_TENSOR(type, element_type, element_view_type) \ + namespace winrt::Windows::AI::MachineLearning::implementation { \ + struct type : public WinML::TensorBase< \ + element_type, \ + element_view_type, \ + type, \ + I##type, \ + type##T> { \ + using Base = \ + TensorBase< \ + element_type, \ + element_view_type, \ + type, \ + I##type, \ + type##T< \ + type, \ + ITensorNative, \ + WinML::ILotusValueProviderPrivate>>; \ + \ + type() = default; \ + \ + type(wfc::IIterable const& shape) : Base(shape){}; \ + \ + type(std::vector const& shape) : Base(shape){}; \ + \ + type(std::vector const& shape, ID3D12Resource* pResource) : Base(shape, pResource){}; \ + }; \ + } \ + namespace winrt::Windows::AI::MachineLearning::factory_implementation { \ + struct type : type##T { \ + STDMETHOD(CreateFromD3D12Resource) \ + (ID3D12Resource * value, __int64* shape, int shapeSize, IUnknown** result) { \ + return implementation::type::CreateFromD3D12Resource(value, shape, shapeSize, result); \ + } \ + }; \ + } + +CREATE_TENSOR(TensorBoolean, bool, bool) +CREATE_TENSOR(TensorFloat, float, float) +CREATE_TENSOR(TensorDouble, double, double) + +// Currently, before the graph computation, we need to convert uint8 coming +// from application end to int8(ORT end) because winrt doesn't expose a signed 8-bit integer type, +// and after graph run, we need to convert it back. +CREATE_TENSOR(TensorInt8Bit, int8_t, uint8_t) +CREATE_TENSOR(TensorUInt8Bit, uint8_t, uint8_t) +CREATE_TENSOR(TensorUInt16Bit, uint16_t, uint16_t) +CREATE_TENSOR(TensorInt16Bit, int16_t, int16_t) +CREATE_TENSOR(TensorUInt32Bit, uint32_t, uint32_t) +CREATE_TENSOR(TensorInt32Bit, int32_t, int32_t) +CREATE_TENSOR(TensorUInt64Bit, uint64_t, uint64_t) +CREATE_TENSOR(TensorInt64Bit, int64_t, int64_t) +CREATE_TENSOR(TensorFloat16Bit, WinML::Half, float) + +#pragma warning(push) +#pragma warning(disable : 4702) // Unreachable code (one of TensorBase's constructor unconditionally throws for + // std::string because it's not supported with D3D12 resources) +CREATE_TENSOR(TensorString, std::string, winrt::hstring) +#pragma warning(pop) + +// CREATE_MAP is used by map types to implement common functionality +#define CREATE_MAP(type, key_type, value_type) \ + namespace winrt::Windows::AI::MachineLearning::implementation { \ + struct type : public WinML::MapBase { \ + type(wfc::IMap const& data) : MapBase(data){}; \ + }; \ + } + +CREATE_MAP(MapInt64BitToInt64Bit, int64_t, int64_t) +CREATE_MAP(MapInt64BitToFloat, int64_t, float) +CREATE_MAP(MapInt64BitToDouble, int64_t, double) +CREATE_MAP(MapInt64BitToString, int64_t, hstring) +CREATE_MAP(MapStringToInt64Bit, hstring, int64_t) +CREATE_MAP(MapStringToFloat, hstring, float) +CREATE_MAP(MapStringToDouble, hstring, double) +CREATE_MAP(MapStringToString, hstring, hstring) + +// CREATE_SEQUENCE is used by sequence types to implement common functionality +#define CREATE_SEQUENCE(type, element_type) \ + namespace winrt::Windows::AI::MachineLearning::implementation { \ + struct type : public WinML::SequenceBase { \ + type(wfc::IIterable const& data) : SequenceBase(data){}; \ + }; \ + } + +using AbiMapStringFloat = wfc::IMap; +using AbiMapInt64BitFloat = wfc::IMap; + +CREATE_SEQUENCE(SequenceMapStringFloat, AbiMapStringFloat) +CREATE_SEQUENCE(SequenceMapInt64BitFloat, AbiMapInt64BitFloat) + +namespace Windows::AI::MachineLearning { + +template +inline winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue CreateTensorValueFromInspectable( + WinML::BindingType bindingType, + const winrt::Windows::Foundation::IInspectable& inspectable, + const winrt::Windows::AI::MachineLearning::ITensorFeatureDescriptor& descriptor) { + namespace collections = winrt::Windows::Foundation::Collections; + + if (descriptor.TensorKind() == WinML::TensorKindFrom::Type) { + if (auto vector = inspectable.try_as>()) { + return TValueType::CreateFromIterable(descriptor.Shape(), vector); + } + + if (bindingType == Windows::AI::MachineLearning::BindingType::kInput) { + // Feature inputs should be more permissive, and allow for views to be bound since they are read only + if (auto vectorView = inspectable.try_as>()) { + return TValueType::CreateFromIterable(descriptor.Shape(), vectorView); + } + } + } + return nullptr; +} + +template <> +inline winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue CreateTensorValueFromInspectable( + WinML::BindingType bindingType, + const winrt::Windows::Foundation::IInspectable& inspectable, + const winrt::Windows::AI::MachineLearning::ITensorFeatureDescriptor& descriptor) { + namespace abi = winrt::Windows::AI::MachineLearning; + namespace impl = winrt::Windows::AI::MachineLearning::implementation; + namespace collections = winrt::Windows::Foundation::Collections; + + if (descriptor.TensorKind() == abi::TensorKind::Int8) { + if (auto vector = inspectable.try_as>()) { + return impl::TensorInt8Bit::CreateFromIterable(descriptor.Shape(), vector); + } + + if (bindingType == WinML::BindingType::kInput) { + // Feature inputs should be more permissive, and allow for views to be bound since they are read only + if (auto vectorView = inspectable.try_as>()) { + return impl::TensorInt8Bit::CreateFromIterable(descriptor.Shape(), vectorView); + } + } + } + return nullptr; +} + +template <> +inline winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue CreateTensorValueFromInspectable( + WinML::BindingType bindingType, + const winrt::Windows::Foundation::IInspectable& inspectable, + const winrt::Windows::AI::MachineLearning::ITensorFeatureDescriptor& descriptor) { + namespace abi = winrt::Windows::AI::MachineLearning; + namespace impl = winrt::Windows::AI::MachineLearning::implementation; + namespace collections = winrt::Windows::Foundation::Collections; + + if (descriptor.TensorKind() == abi::TensorKind::Float16) { + if (auto vector = inspectable.try_as>()) { + return impl::TensorFloat16Bit::CreateFromIterable(descriptor.Shape(), vector); + } + + if (bindingType == WinML::BindingType::kInput) { + // Feature inputs should be more permissive, and allow for views to be bound since they are read only + if (auto vectorView = inspectable.try_as>()) { + return impl::TensorFloat16Bit::CreateFromIterable(descriptor.Shape(), vectorView); + } + } + } + return nullptr; +} + +inline winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue CreateFeatureValueFromInspectable( + Windows::AI::MachineLearning::BindingType bindingType, + const winrt::Windows::Foundation::IInspectable& inspectable, + const winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor& descriptor) { + using namespace winrt::Windows::AI::MachineLearning; + using namespace winrt::Windows::Foundation::Collections; + + // Tensor and ImageFeatureValue types are passed in directly as feature values + if (auto featureValue = inspectable.try_as()) { + return featureValue; + } + + if (auto videoFrames = inspectable.try_as>()) { + return (0 == videoFrames.Size()) ? nullptr : winrt::make(videoFrames); + } + + if (bindingType == Windows::AI::MachineLearning::BindingType::kInput) { + // Allows to bind IVectorView as input. + if (auto videoFrames = inspectable.try_as>()) { + return (0 == videoFrames.Size()) ? nullptr : winrt::make(videoFrames); + } + } + + // ImageFeatureValues Types can be implicitly inferred from the VideoFrame object + if (auto videoFrame = inspectable.try_as()) { + return winrt::make(videoFrame); + } + + // MapFeatureValues Types are implicitly inferred from the iinspectable object + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToFloat::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToDouble::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToInt64Bit::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToString::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToFloat::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToDouble::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToInt64Bit::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToString::Create(map); + } + + if (bindingType == Windows::AI::MachineLearning::BindingType::kInput) { + // Feature inputs should be more permissive, and allow for views to be bound since they are read only + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToFloat::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToDouble::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToInt64Bit::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapStringToString::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToFloat::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToDouble::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToInt64Bit::Create(map); + } + if (auto map = inspectable.try_as>()) { + return implementation::MapInt64BitToString::Create(map); + } + } + + if (descriptor.Kind() == LearningModelFeatureKind::Sequence) { + // SequenceFeatureValues Types are implicitly inferred from the iinspectable object + if (auto sequence = inspectable.try_as>>()) { + return implementation::SequenceMapStringFloat::Create(sequence); + } + if (auto sequence = inspectable.try_as>>()) { + return implementation::SequenceMapInt64BitFloat::Create(sequence); + } + + if (bindingType == Windows::AI::MachineLearning::BindingType::kInput) { + // Feature inputs should be more permissive, and allow for views to be bound since they are read only + if (auto sequence = inspectable.try_as>>()) { + return implementation::SequenceMapStringFloat::Create(sequence); + } + if (auto sequence = inspectable.try_as>>()) { + return implementation::SequenceMapInt64BitFloat::Create(sequence); + } + } + } else if (descriptor.Kind() == LearningModelFeatureKind::Tensor) { + using Value = winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue; + using Inspectable = winrt::Windows::Foundation::IInspectable; + using Descriptor = winrt::Windows::AI::MachineLearning::ITensorFeatureDescriptor; + using TensorCreator = std::function; + + auto tensorDescriptor = descriptor.as(); + std::vector creators = + { + // Vector and VectorViews of float16 and int8 collide with float and uint8 respectively. + // They are omitted because of this ambiguity and are not constructible via raw winrt collections. + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }, + [&]() { return CreateTensorValueFromInspectable(bindingType, inspectable, tensorDescriptor); }}; + + for (const auto& tensorCreator : creators) { + if (auto createdTensor = tensorCreator()) { + return createdTensor; + } + } + } + + return nullptr; +} + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/ImageFeatureDescriptor.cpp b/winml/lib/Api/ImageFeatureDescriptor.cpp new file mode 100644 index 0000000000000..211a1da3d3383 --- /dev/null +++ b/winml/lib/Api/ImageFeatureDescriptor.cpp @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "ImageFeatureDescriptor.h" + +#include + +namespace winrt::Windows::AI::MachineLearning::implementation { +ImageFeatureDescriptor::ImageFeatureDescriptor( + const char* name, + const char* description, + winml::TensorKind tensor_kind, + const std::vector& shape, + bool is_required, + wgi::BitmapPixelFormat pixel_format, + wgi::BitmapAlphaMode alpha_mode, + uint32_t width, + uint32_t height, + ImageNominalPixelRange nominal_pixel_range, + ImageColorSpaceGamma color_space_gamma) : name_(WinML::Strings::HStringFromUTF8(name)), + description_(WinML::Strings::HStringFromUTF8(description)), + tensor_kind_(tensor_kind), + shape_(shape), + is_required_(is_required), + pixel_format_(pixel_format), + alpha_mode_(alpha_mode), + width_(width), + height_(height), + nominal_pixel_range_(nominal_pixel_range), + color_space_gamma_(color_space_gamma) { +} + +wgi::BitmapPixelFormat +ImageFeatureDescriptor::BitmapPixelFormat() try { + return pixel_format_; +} +WINML_CATCH_ALL + +wgi::BitmapAlphaMode +ImageFeatureDescriptor::BitmapAlphaMode() try { + return alpha_mode_; +} +WINML_CATCH_ALL + +uint32_t +ImageFeatureDescriptor::Width() try { + return width_; +} +WINML_CATCH_ALL + +uint32_t +ImageFeatureDescriptor::Height() try { + return height_; +} +WINML_CATCH_ALL + +hstring +ImageFeatureDescriptor::Name() try { + return name_; +} +WINML_CATCH_ALL + +hstring +ImageFeatureDescriptor::Description() try { + return description_; +} +WINML_CATCH_ALL + +winml::LearningModelFeatureKind +ImageFeatureDescriptor::Kind() try { + return LearningModelFeatureKind::Image; +} +WINML_CATCH_ALL + +bool ImageFeatureDescriptor::IsRequired() try { + return is_required_; +} +WINML_CATCH_ALL + +winml::TensorKind +ImageFeatureDescriptor::TensorKind() { + return tensor_kind_; +} + +wfc::IVectorView +ImageFeatureDescriptor::Shape() { + return winrt::single_threaded_vector( + std::vector( + std::begin(shape_), + std::end(shape_))) + .GetView(); +} + +HRESULT +ImageFeatureDescriptor::GetName( + const wchar_t** name, + uint32_t* cchName) { + *name = name_.data(); + *cchName = static_cast(name_.size()); + return S_OK; +} + +HRESULT +ImageFeatureDescriptor::GetDescription( + const wchar_t** description, + uint32_t* cchDescription) { + *description = description_.data(); + *cchDescription = static_cast(description_.size()); + return S_OK; +} + +ImageNominalPixelRange +ImageFeatureDescriptor::GetNominalPixelRange() { + return nominal_pixel_range_; +} + +ImageColorSpaceGamma +ImageFeatureDescriptor::GetColorSpaceGamma() { + return color_space_gamma_; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/ImageFeatureDescriptor.h b/winml/lib/Api/ImageFeatureDescriptor.h new file mode 100644 index 0000000000000..c038987827add --- /dev/null +++ b/winml/lib/Api/ImageFeatureDescriptor.h @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ImageFeatureDescriptor.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +enum class ImageNominalPixelRange { + ImageNominalPixelRange_NominalRange_0_255, + ImageNominalPixelRange_Normalized_0_1, + ImageNominalPixelRange_Normalized_1_1, + ImageNominalPixelRange_NominalRange_16_235, +}; +enum class ImageColorSpaceGamma { + ImageColorSpaceGamma_Linear, + ImageColorSpaceGamma_SRGB, +}; + +struct ImageFeatureDescriptor : ImageFeatureDescriptorT< + ImageFeatureDescriptor, + ILearningModelFeatureDescriptorNative> { + ImageFeatureDescriptor() = delete; + ImageFeatureDescriptor( + const char* name, + const char* description, + winml::TensorKind tensor_kind, + const std::vector& shape, + bool is_required, + wgi::BitmapPixelFormat pixelformat, + wgi::BitmapAlphaMode alphamode, + uint32_t width, + uint32_t height, + ImageNominalPixelRange nominalPixelRange, + ImageColorSpaceGamma colorSpaceGamma); + + wgi::BitmapPixelFormat + BitmapPixelFormat(); + + wgi::BitmapAlphaMode + BitmapAlphaMode(); + + uint32_t + Width(); + + uint32_t + Height(); + + hstring + Name(); + + hstring + Description(); + + winml::LearningModelFeatureKind + Kind(); + + bool + IsRequired(); + + winml::TensorKind + TensorKind(); + + wfc::IVectorView + Shape(); + + ImageNominalPixelRange + GetNominalPixelRange(); + + ImageColorSpaceGamma + GetColorSpaceGamma(); + + STDMETHOD(GetName) + ( + const wchar_t** name, + uint32_t* cchName) override; + + STDMETHOD(GetDescription) + ( + const wchar_t** description, + uint32_t* cchDescription) override; + + private: + winrt::hstring name_; + winrt::hstring description_; + winml::TensorKind tensor_kind_; + std::vector shape_; + bool is_required_; + wgi::BitmapPixelFormat pixel_format_; + wgi::BitmapAlphaMode alpha_mode_; + uint32_t width_; + uint32_t height_; + ImageNominalPixelRange nominal_pixel_range_; + ImageColorSpaceGamma color_space_gamma_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/ImageFeatureValue.cpp b/winml/lib/Api/ImageFeatureValue.cpp new file mode 100644 index 0000000000000..7c99184314679 --- /dev/null +++ b/winml/lib/Api/ImageFeatureValue.cpp @@ -0,0 +1,539 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "ImageFeatureValue.h" +#include "LearningModelBinding.h" +#include "LearningModelDevice.h" +#include "LearningModelSession.h" +#include +#include +#include "LearningModelBinding.h" +#include "LearningModelSession.h" +#include "LearningModelDevice.h" +#include "ImageConversionTypes.h" +#include "ConverterResourceStore.h" +#include "ImageFeatureDescriptor.h" + +#include "core/session/onnxruntime_c_api.h" + +#include "D3DDeviceCache.h" +#include "TensorFeatureDescriptor.h" + +using namespace WinML; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::DirectX; +using namespace Windows::AI::MachineLearning::Internal; +using namespace winrt::Windows::Foundation::Collections; + +namespace winrt::Windows::AI::MachineLearning::implementation { + +struct ImageFeatureValue::ImageResourceMetadata { + std::vector Bounds; + ::Windows::AI::MachineLearning::Internal::ImageTensorDescription TensorDescriptor; +}; + +Windows::AI::MachineLearning::ImageFeatureValue ImageFeatureValue::Create( + uint32_t batchSize, + BitmapPixelFormat format, + uint32_t width, + uint32_t height) { + std::vector videoFrames = {}; + for (uint32_t i = 0; i < batchSize; ++i) { + SoftwareBitmap bitmap(format, width, height); + Windows::Media::VideoFrame frame = Windows::Media::VideoFrame::CreateWithSoftwareBitmap(bitmap); + videoFrames.emplace_back(frame); + } + return make(winrt::single_threaded_vector(std::move(videoFrames))); +} + +Windows::AI::MachineLearning::ImageFeatureValue ImageFeatureValue::CreateFromVideoFrame(Windows::Media::VideoFrame const& image) try { + return make(image); +} +WINML_CATCH_ALL + +void ImageFeatureValue::Initialize() { + m_batchSize = m_videoFrames.Size(); + for (auto videoFrame : m_videoFrames) { + // TODO: Check all videoFrames come from either CPU or GPU. + if (auto surface = videoFrame.Direct3DSurface()) { + Direct3DSurfaceDescription description = surface.Description(); + m_widths.emplace_back(description.Width); + m_heights.emplace_back(description.Height); + } else { + ISoftwareBitmap softwarebitmap(videoFrame.SoftwareBitmap()); + m_widths.emplace_back(softwarebitmap.PixelWidth()); + m_heights.emplace_back(softwarebitmap.PixelHeight()); + } + } +} + +ImageFeatureValue::ImageFeatureValue(Windows::Media::VideoFrame const& image) { + std::vector frame = {image}; + m_videoFrames = winrt::single_threaded_vector(std::move(frame)); + Initialize(); +} + +ImageFeatureValue::ImageFeatureValue(IVector const& images) : m_videoFrames(images) { + Initialize(); +} + +ImageFeatureValue::ImageFeatureValue(IVectorView const& images) { + std::vector videoFrames = {}; + for (uint32_t i = 0; i < images.Size(); ++i) { + videoFrames.emplace_back(images.GetAt(i)); + } + m_videoFrames = winrt::single_threaded_vector(std::move(videoFrames)); + Initialize(); +} + +static std::optional GetBitmapPixelFormatFromMetadata(const IPropertySet& properties) { + if (properties != nullptr && properties.HasKey(L"BitmapPixelFormat")) { + if (auto pixelFormatInspectable = properties.Lookup(L"BitmapPixelFormat")) { + auto pixelFormatValue = pixelFormatInspectable.as(); + auto pixelFormat = static_cast(pixelFormatValue.GetInt32()); + WINML_THROW_HR_IF_FALSE_MSG( + WINML_ERR_INVALID_BINDING, + pixelFormat == BitmapPixelFormat::Rgba8 || + pixelFormat == BitmapPixelFormat::Bgra8 || + pixelFormat == BitmapPixelFormat::Gray8, + "BitmapPixelFormat must be either Rgba8, Bgra8, or Gray8"); + + return pixelFormat; + } + } + + return {}; +} + +static std::optional GetBoundsFromMetadata(const IPropertySet& properties) { + if (properties != nullptr && properties.HasKey(L"BitmapBounds")) { + if (auto boundsInspectable = properties.Lookup(L"BitmapBounds")) { + auto boundsPropertyValue = boundsInspectable.as(); + WINML_THROW_HR_IF_FALSE_MSG( + WINML_ERR_INVALID_BINDING, + boundsPropertyValue.Type() == Windows::Foundation::PropertyType::UInt32Array, + "BitmapBounds must reference a property value with type UInt32Array with 4 elements."); + + com_array bounds; + boundsPropertyValue.GetUInt32Array(bounds); + WINML_THROW_HR_IF_FALSE_MSG( + WINML_ERR_INVALID_BINDING, + bounds.size() == 4, + "BitmapBounds must reference a property value with type UInt32Array with 4 elements."); + + return Windows::Graphics::Imaging::BitmapBounds{bounds[0], bounds[1], bounds[2], bounds[3]}; + } + } + + return {}; +} + +BitmapBounds ImageFeatureValue::CenterAndCropBounds( + uint32_t idx, + uint32_t desiredWidth, + uint32_t desiredHeight) { + BitmapBounds bounds = {}; + float RequiredAspectRatio = static_cast(desiredWidth) / static_cast(desiredHeight); + + // crop to center while maintaining size + if (RequiredAspectRatio * m_heights[idx] < m_widths[idx]) { + // actual width is too wide. Cut off left and right of image + bounds.Width = std::min((UINT)(RequiredAspectRatio * m_heights[idx] + 0.5f), m_widths[idx]); + bounds.Height = m_heights[idx]; + bounds.X = (m_widths[idx] - bounds.Width) / 2; + bounds.Y = 0; + } else { + // actual height is too long. Cut off top and bottom + bounds.Width = m_widths[idx]; + bounds.Height = std::min((UINT)(m_widths[idx] / RequiredAspectRatio + 0.5f), m_heights[idx]); + bounds.X = 0; + bounds.Y = (m_heights[idx] - bounds.Height) / 2; + } + + // TODO: Do we allow smaller images? + WINML_THROW_HR_IF_FALSE_MSG( + WINML_ERR_INVALID_BINDING, + (bounds.X >= 0 && bounds.X <= m_widths[idx]) && + (bounds.Y >= 0 && bounds.Y <= m_heights[idx]), + "Failed to center crop the provided input image. The calculated bounds exceed the dimensions of the image, or do not match the model inputs dimensions."); + + return bounds; +} + +static ImageTensorDataType GetTensorDataTypeFromTensorKind(TensorKind kind) { + switch (kind) { + case TensorKind::Float: + return kImageTensorDataTypeFloat32; + case TensorKind::Float16: + return kImageTensorDataTypeFloat16; + default: + WINML_THROW_HR_IF_FALSE_MSG(WINML_ERR_INVALID_BINDING, false, "Model image inputs must have tensor type of Float or Float16."); + } + + FAIL_FAST_HR(E_INVALIDARG); +} + +static unsigned GetSizeFromTensorDataType(ImageTensorDataType type) { + switch (type) { + case kImageTensorDataTypeFloat32: + return sizeof(float); + case kImageTensorDataTypeFloat16: + return sizeof(uint16_t); + default: + WINML_THROW_HR_IF_FALSE_MSG(WINML_ERR_INVALID_BINDING, false, "Model image inputs must have tensor type of Float or Float16."); + } + + FAIL_FAST_HR(E_INVALIDARG); +} + +static ImageTensorDescription CreateImageTensorDescriptor(TensorKind tensorKind, BitmapPixelFormat pixelFormat, uint32_t batchSize, uint32_t width, uint32_t height) { + ImageTensorDescription tensorDescription = {}; + tensorDescription.dataType = GetTensorDataTypeFromTensorKind(tensorKind); + tensorDescription.sizes[0] = batchSize; + + if (pixelFormat == Windows::Graphics::Imaging::BitmapPixelFormat::Rgba8) { + tensorDescription.channelType = kImageTensorChannelTypeRGB8; + tensorDescription.sizes[1] = 3; + } else if (pixelFormat == Windows::Graphics::Imaging::BitmapPixelFormat::Bgra8) { + tensorDescription.channelType = kImageTensorChannelTypeBGR8; + tensorDescription.sizes[1] = 3; + } else if (pixelFormat == Windows::Graphics::Imaging::BitmapPixelFormat::Gray8) { + tensorDescription.channelType = kImageTensorChannelTypeGRAY8; + tensorDescription.sizes[1] = 1; + } else { + THROW_HR(E_NOTIMPL); + } + tensorDescription.sizes[2] = height; + tensorDescription.sizes[3] = width; + + return tensorDescription; +} + +static void CPUTensorize( + Windows::Media::IVideoFrame videoFrame, + BitmapBounds bounds, + ImageTensorDescription tensorDescriptor, + com_ptr spSession, + void* pResource) { + auto spDevice = spSession->Device().as(); + + ConverterResourceDescription descriptor = {}; + descriptor.pixel_format = static_cast(BitmapPixelFormat::Bgra8); + descriptor.width = static_cast(tensorDescriptor.sizes[3]); + descriptor.height = static_cast(tensorDescriptor.sizes[2]); + descriptor.luid = {}; // Converted image on CPU + + auto pooledConverter = PoolObjectWrapper::Create(spDevice->TensorizerStore()->Fetch(descriptor)); + + //apply tensorization + pooledConverter->Get()->Tensorizer->VideoFrameToSoftwareTensor( + videoFrame, + bounds, + tensorDescriptor, + reinterpret_cast(pResource)); + + // Software tensorization doesnt need to hold onto any resources beyond its scope, so we can + // return the converter to the pool on tensorization completion. + // (This happens automatically in the destruction of PoolObjectWrapper) +} + +static void CPUTensorize( + IVector videoFrames, + std::vector bounds, + ImageTensorDescription tensorDescriptor, + com_ptr spSession, + BYTE* resource, + unsigned int singleFrameBufferSize) { + // Tensorize video frames one by one without extra copy. + for (uint32_t batchIdx = 0; batchIdx < videoFrames.Size(); ++batchIdx) { + CPUTensorize(videoFrames.GetAt(batchIdx), bounds[batchIdx], tensorDescriptor, spSession, resource); + resource += singleFrameBufferSize; + } +} + +static void GPUTensorize( + IVector videoFrames, + std::vector bounds, + ImageTensorDescription tensorDescriptor, + com_ptr spSession, + ID3D12Resource* d3dResource, + WinML::BindingContext& context) { + auto spDevice = spSession->Device().as(); + + ConverterResourceDescription descriptor = {}; + descriptor.pixel_format = static_cast(DirectXPixelFormat::B8G8R8X8UIntNormalized); + descriptor.width = static_cast(tensorDescriptor.sizes[3]); + descriptor.height = static_cast(tensorDescriptor.sizes[2]); + descriptor.luid = spDevice->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU + + // Tensorize video frames one by one without extra copy. + for (uint32_t batchIdx = 0; batchIdx < videoFrames.Size(); ++batchIdx) { + auto pooledConverter = PoolObjectWrapper::Create(spDevice->TensorizerStore()->Fetch(descriptor)); + { + // Apply tensorization + auto session = spSession.as(); + pooledConverter->Get()->Tensorizer->VideoFrameToDX12Tensor( + batchIdx, + session, + videoFrames.GetAt(batchIdx), + bounds[batchIdx], + tensorDescriptor, + d3dResource); + + // Tensorization to a GPU tensor will run asynchronously and associated resources + // need to be kept alive until the gpu resources have been used in the queue. + // + // The PoolObjectWrapper needs to stay alive so that the underlying resources are + // not released to the cache. + // + // This object will be returned to the cache when evaluate has completed. So we cache this + // on the binding context. + context.converter = pooledConverter; + } + } +} + +std::optional ImageFeatureValue::GetInputMetadata(const WinML::BindingContext& context) { + uint32_t descriptorWidth; + uint32_t descriptorHeight; + + TensorKind tensorKind = TensorKind::Undefined; + auto spImageDescriptor = context.descriptor.try_as(); + auto spTensorDescriptor = context.descriptor.try_as(); + + // Set up descriptorWidth and descriptorHeight + if (spImageDescriptor) { + // If model expects free dimensions the descritpr will have MAXUINT32, and we use the supplied image + + // If the width or height in model metadata is -1, which means free dimension. + // The the widths and heights of input data must be the same. Or the + // tensorDescriptor cannot describ the shape of the inputs. + if (spImageDescriptor->Width() == MAXUINT32 && + !(std::adjacent_find(m_widths.begin(), m_widths.end(), std::not_equal_to()) == m_widths.end())) { + THROW_HR(E_INVALIDARG); + } + if (spImageDescriptor->Height() == MAXUINT32 && + !(std::adjacent_find(m_heights.begin(), m_heights.end(), std::not_equal_to()) == m_heights.end())) { + THROW_HR(E_INVALIDARG); + } + descriptorWidth = (spImageDescriptor->Width() == MAXUINT32) ? m_widths[0] : spImageDescriptor->Width(); + descriptorHeight = (spImageDescriptor->Height() == MAXUINT32) ? m_heights[0] : spImageDescriptor->Height(); + tensorKind = spImageDescriptor->TensorKind(); + } else if (spTensorDescriptor) { + // If model expects a tensor, use its shape + auto shape = spTensorDescriptor->Shape(); + + if (shape.Size() != 4) { + return {}; + } + bool hasAccecptableChannelSize = (shape.GetAt(1) == 3 || shape.GetAt(1) == 1); + if (!hasAccecptableChannelSize) { + return {}; + } + if (-1 == shape.GetAt(3) && + !(std::adjacent_find(m_widths.begin(), m_widths.end(), std::not_equal_to()) == m_widths.end())) { + THROW_HR(E_INVALIDARG); + } + if (-1 == shape.GetAt(2) && + !(std::adjacent_find(m_heights.begin(), m_heights.end(), std::not_equal_to()) == m_heights.end())) { + THROW_HR(E_INVALIDARG); + } + descriptorWidth = (-1 == shape.GetAt(3)) ? m_widths[0] : static_cast(shape.GetAt(3)); + descriptorHeight = (-1 == shape.GetAt(2)) ? m_heights[0] : static_cast(shape.GetAt(2)); + tensorKind = spTensorDescriptor->TensorKind(); + } else { + return {}; + } + + // Set up BitmapBounds + // For batch of images with different sizes, like { {1, 3, 1080, 1080}, {1, 3, 720, 720} }, + // a vector of bounds is to record the result after cropped. + std::vector bounds = {}; + for (uint32_t i = 0; i < m_batchSize; ++i) { + auto tempBounds = GetBoundsFromMetadata(context.properties); + if (!tempBounds.has_value()) { + // If the user has not specified bounds, we need to infer the bounds + // from the combination of descriptor, and input value or output value + if (context.type == BindingType::kInput) { + // If unspecified output, get the crop with correct aspect ratio + tempBounds = CenterAndCropBounds(i, descriptorWidth, descriptorHeight); + } else { + // If given an unspecified output region, write into the top left portion of the output image. + tempBounds = BitmapBounds{0, 0, m_widths[i], m_heights[i]}; + } + } + bounds.emplace_back(tempBounds.value()); + } + // TODO: Validate Bounds + + // Set up BitmapPixelFormat + + auto pixelFormat = std::optional{}; + pixelFormat = GetBitmapPixelFormatFromMetadata(context.properties); + if (!pixelFormat.has_value() && spImageDescriptor) { + pixelFormat = spImageDescriptor->BitmapPixelFormat(); + } else if (!pixelFormat.has_value() && spTensorDescriptor) { + auto shape = spTensorDescriptor->Shape(); + int channelCount = static_cast(shape.GetAt(1)); + if (channelCount == 1) { + // Assume Gray if no image descriptor is given and channelcount 1 + pixelFormat = BitmapPixelFormat::Gray8; + + } else if (channelCount == 3) { + // Assume Bgra8 if no image descriptor is given + pixelFormat = BitmapPixelFormat::Bgra8; + } else { + THROW_HR(WINML_ERR_SIZE_MISMATCH); + } + } + //NCHW layout + auto imageTensorDescriptor = CreateImageTensorDescriptor(tensorKind, pixelFormat.value(), m_batchSize, descriptorWidth, descriptorHeight); + + return ImageResourceMetadata{bounds, imageTensorDescriptor}; +} + +HRESULT ImageFeatureValue::GetValue(WinML::BindingContext& context, IValue** out) try { + FAIL_FAST_IF(!(std::all_of(m_widths.begin(), m_widths.end(), [](int i) { return i != 0; }))); + FAIL_FAST_IF(!(std::all_of(m_heights.begin(), m_heights.end(), [](int i) { return i != 0; }))); + + // Get image metadata from the binding context + auto metadata = GetInputMetadata(context); + RETURN_HR_IF(E_INVALIDARG, !metadata); + ImageResourceMetadata resourceMetadata = metadata.value(); + + // Get the session + auto spSession = context.session.as(); + auto spDevice = spSession->Device().as(); + auto engine = spSession->GetEngine(); + + // create the OrtValue + winrt::com_ptr value; + RETURN_IF_FAILED(engine->CreateTensorValue( + resourceMetadata.TensorDescriptor.sizes, + sizeof(resourceMetadata.TensorDescriptor.sizes) / sizeof(resourceMetadata.TensorDescriptor.sizes[0]), + resourceMetadata.TensorDescriptor.dataType == kImageTensorDataTypeFloat32 ? winml::TensorKind::Float : winml::TensorKind::Float16, + value.put())); + + // Get the tensor raw data + WinML::Resource void_resource; + RETURN_IF_FAILED(value->GetResource(void_resource)); + + if (context.type == BindingType::kInput) { + // Only tensorize inputs + auto bufferSize = std::accumulate(std::begin(resourceMetadata.TensorDescriptor.sizes), std::end(resourceMetadata.TensorDescriptor.sizes), static_cast(1), std::multiplies()); + auto bufferByteSize = GetSizeFromTensorDataType(resourceMetadata.TensorDescriptor.dataType) * bufferSize; + auto singleFrameBufferSize = bufferByteSize / m_batchSize; + if (spDevice->IsCpuDevice()) { + auto resource = reinterpret_cast(void_resource.get()); + CPUTensorize(m_videoFrames, resourceMetadata.Bounds, resourceMetadata.TensorDescriptor, spSession, resource, static_cast(singleFrameBufferSize)); + } else { + auto resource = reinterpret_cast(void_resource.get()); + GPUTensorize(m_videoFrames, resourceMetadata.Bounds, resourceMetadata.TensorDescriptor, spSession, resource, context); + } + } + + *out = value.detach(); + return S_OK; +} +WINML_CATCH_ALL_COM + +HRESULT ImageFeatureValue::IsPlaceholder(bool* pIsPlaceHolder) { + FAIL_FAST_IF_NULL(pIsPlaceHolder); + *pIsPlaceHolder = false; + return S_OK; +} + +HRESULT ImageFeatureValue::UpdateSourceResourceData(BindingContext& context, IValue* value) try { + // Get the device + auto spSession = context.session.as(); + auto spDevice = spSession->Device().as(); + + // Get the output tensor raw data + WinML::Resource void_resource; + RETURN_IF_FAILED(value->GetResource(void_resource)); + + // Get the run context + auto metadata = GetInputMetadata(context); + ImageResourceMetadata resourceMetadata = metadata.value(); + + ConverterResourceDescription descriptor = {}; + descriptor.width = static_cast(resourceMetadata.TensorDescriptor.sizes[3]); + descriptor.height = static_cast(resourceMetadata.TensorDescriptor.sizes[2]); + + bool out; + if (SUCCEEDED(value->IsCpu(&out)) && out) { + descriptor.pixel_format = static_cast(BitmapPixelFormat::Bgra8); + descriptor.luid = {}; // Converted image on CPU + + auto pooledConverter = PoolObjectWrapper::Create(spDevice->DetensorizerStore()->Fetch(descriptor)); + + auto bufferSize = std::accumulate(std::begin(resourceMetadata.TensorDescriptor.sizes), std::end(resourceMetadata.TensorDescriptor.sizes), static_cast(1), std::multiplies()); + auto bufferByteSize = GetSizeFromTensorDataType(resourceMetadata.TensorDescriptor.dataType) * bufferSize / m_batchSize; + + BYTE* resource = reinterpret_cast(void_resource.get()); + for (uint32_t batchIdx = 0; batchIdx < m_batchSize; ++batchIdx) { + // Convert Software Tensor to VideoFrame one by one based on the buffer size. + auto videoFrame = m_videoFrames.GetAt(batchIdx); + pooledConverter->Get()->Detensorizer->SoftwareTensorToVideoFrame(context.session, resource, resourceMetadata.TensorDescriptor, videoFrame); + resource += bufferByteSize; + } + } else { + descriptor.pixel_format = static_cast(DirectXPixelFormat::B8G8R8X8UIntNormalized); + descriptor.luid = spDevice->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU + + auto pooledConverter = PoolObjectWrapper::Create(spDevice->DetensorizerStore()->Fetch(descriptor)); + + auto d3dResource = reinterpret_cast(void_resource.get()); + + for (uint32_t batchIdx = 0; batchIdx < m_batchSize; ++batchIdx) { + auto videoFrame = m_videoFrames.GetAt(batchIdx); + pooledConverter->Get()->Detensorizer->DX12TensorToVideoFrame( + batchIdx, + context.session, + d3dResource, + resourceMetadata.TensorDescriptor, + videoFrame); + + // Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do. + spDevice->GetD3DDeviceCache()->SyncD3D12ToCPU(); + pooledConverter->Get()->Detensorizer->ResetAllocator(); + } + } + + // Release any converters back to the pool by nulling out the wrapper. + context.converter = nullptr; + return S_OK; +} +WINML_CATCH_ALL_COM + +HRESULT ImageFeatureValue::AbiRepresentation(winrt::Windows::Foundation::IInspectable& abiRepresentation) { + if (IsBatch()) { + m_videoFrames.as(abiRepresentation); + } else { + winrt::Windows::AI::MachineLearning::ImageFeatureValue to = nullptr; + RETURN_IF_FAILED(this->QueryInterface( + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(to)))); + + to.as(abiRepresentation); + } + return S_OK; +} + +Windows::AI::MachineLearning::LearningModelFeatureKind ImageFeatureValue::Kind() try { + return LearningModelFeatureKind::Image; +} +WINML_CATCH_ALL + +Windows::Media::VideoFrame ImageFeatureValue::VideoFrame() try { + return m_videoFrames.GetAt(0); +} +WINML_CATCH_ALL + +IIterable ImageFeatureValue::VideoFrames() try { + return m_videoFrames.try_as>(); +} +WINML_CATCH_ALL +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/ImageFeatureValue.h b/winml/lib/Api/ImageFeatureValue.h new file mode 100644 index 0000000000000..4c6292fb677b9 --- /dev/null +++ b/winml/lib/Api/ImageFeatureValue.h @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ImageFeatureValue.g.h" + +#include "inc/ILotusValueProviderPrivate.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +struct ImageFeatureValue : ImageFeatureValueT { + // Metadata about the resource which helps in finding + // compatible cached resources + struct ImageResourceMetadata; + + ImageFeatureValue() = delete; + ImageFeatureValue(Windows::Media::VideoFrame const& image); + ImageFeatureValue(winrt::Windows::Foundation::Collections::IVector const& images); + ImageFeatureValue(winrt::Windows::Foundation::Collections::IVectorView const& images); + + Windows::Media::VideoFrame VideoFrame(); + winrt::Windows::Foundation::Collections::IIterable VideoFrames(); + Windows::AI::MachineLearning::LearningModelFeatureKind Kind(); + + static Windows::AI::MachineLearning::ImageFeatureValue ImageFeatureValue::Create( + uint32_t batchSize, + Windows::Graphics::Imaging::BitmapPixelFormat format, + uint32_t width, + uint32_t height); + static Windows::AI::MachineLearning::ImageFeatureValue CreateFromVideoFrame(Windows::Media::VideoFrame const& image); + + std::optional GetInputMetadata(const WinML::BindingContext& context); + + // ILotusValueProviderPrivate implementation + STDMETHOD(GetValue) + (WinML::BindingContext& context, WinML::IValue** out); + STDMETHOD(IsPlaceholder) + (bool* pIsPlaceHolder); + STDMETHOD(UpdateSourceResourceData) + (WinML::BindingContext& context, WinML::IValue* value); + STDMETHOD(AbiRepresentation) + (winrt::Windows::Foundation::IInspectable& abiRepresentation); + + std::vector Widths() { return m_widths; } + std::vector Heights() { return m_heights; } + bool IsBatch() { return m_batchSize > 1; } + + private: + winrt::Windows::Foundation::Collections::IVector m_videoFrames; + std::vector m_widths = {}; + std::vector m_heights = {}; + uint32_t m_batchSize = 1; + // Crop the image with desired aspect ratio. + // This function does not crop image to desried width and height, but crops to center for desired ratio + Windows::Graphics::Imaging::BitmapBounds CenterAndCropBounds( + uint32_t idx, + uint32_t desiredWidth, + uint32_t desiredHeight); + void Initialize(); +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +struct ImageFeatureValue : ImageFeatureValueT { +}; +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModel.cpp b/winml/lib/Api/LearningModel.cpp new file mode 100644 index 0000000000000..e732d86625832 --- /dev/null +++ b/winml/lib/Api/LearningModel.cpp @@ -0,0 +1,281 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "LearningModel.h" + +#include "TelemetryEvent.h" +#include "MapFeatureDescriptor.h" +#include "SequenceFeatureDescriptor.h" +#include "TensorFeatureDescriptor.h" + +#include "OnnxruntimeProvider.h" + +#include + +namespace winrt::Windows::AI::MachineLearning::implementation { +LearningModel::LearningModel( + const hstring& path, + const winml::ILearningModelOperatorProvider op_provider) try : LearningModel(WinML::Strings::UTF8FromHString(path), + op_provider) { +} +WINML_CATCH_ALL + +LearningModel::LearningModel( + const std::string& path, + const winml::ILearningModelOperatorProvider operator_provider) try : operator_provider_(operator_provider) { + _winmlt::TelemetryEvent loadModel_event(_winmlt::EventCategory::kModelLoad); + + WINML_THROW_IF_FAILED(CreateOnnxruntimeEngineFactory(engine_factory_.put())); + WINML_THROW_IF_FAILED(engine_factory_->CreateModel(path.c_str(), path.size(), model_.put())); + WINML_THROW_IF_FAILED(model_->GetModelInfo(model_info_.put())); +} +WINML_CATCH_ALL + +static HRESULT CreateModelFromStream( + WinML::IEngineFactory* engine_factory, + const wss::IRandomAccessStreamReference stream, + WinML::IModel** model) { + auto content = stream.OpenReadAsync().get(); + + wss::Buffer buffer(static_cast(content.Size())); + auto result = content.ReadAsync( + buffer, + buffer.Capacity(), + wss::InputStreamOptions::None) + .get(); + + auto bytes = buffer.try_as<::Windows::Storage::Streams::IBufferByteAccess>(); + WINML_THROW_HR_IF_NULL_MSG(E_UNEXPECTED, bytes, "Model stream is invalid."); + + void* data; + WINML_THROW_IF_FAILED_MSG(bytes->Buffer(reinterpret_cast(&data)), "Failed to acquire buffer from model stream."); + + size_t len = static_cast(content.Size()); + WINML_THROW_IF_FAILED(engine_factory->CreateModel(data, len, model)); + + return S_OK; +} + +LearningModel::LearningModel( + const wss::IRandomAccessStreamReference stream, + const winml::ILearningModelOperatorProvider operator_provider) try : operator_provider_(operator_provider) { + _winmlt::TelemetryEvent loadModel_event(_winmlt::EventCategory::kModelLoad); + + WINML_THROW_IF_FAILED(CreateOnnxruntimeEngineFactory(engine_factory_.put())); + WINML_THROW_IF_FAILED(CreateModelFromStream(engine_factory_.get(), stream, model_.put())); + WINML_THROW_IF_FAILED(model_->GetModelInfo(model_info_.put())); +} +WINML_CATCH_ALL + +hstring +LearningModel::Author() try { + const char* out; + size_t len; + WINML_THROW_IF_FAILED(model_info_->GetAuthor(&out, &len)); + return WinML::Strings::HStringFromUTF8(out); +} +WINML_CATCH_ALL + +hstring +LearningModel::Name() try { + const char* out; + size_t len; + WINML_THROW_IF_FAILED(model_info_->GetName(&out, &len)); + return WinML::Strings::HStringFromUTF8(out); +} +WINML_CATCH_ALL + +hstring +LearningModel::Domain() try { + const char* out; + size_t len; + WINML_THROW_IF_FAILED(model_info_->GetDomain(&out, &len)); + return WinML::Strings::HStringFromUTF8(out); +} +WINML_CATCH_ALL + +hstring +LearningModel::Description() try { + const char* out; + size_t len; + WINML_THROW_IF_FAILED(model_info_->GetDescription(&out, &len)); + return WinML::Strings::HStringFromUTF8(out); +} +WINML_CATCH_ALL + +int64_t +LearningModel::Version() try { + int64_t version; + WINML_THROW_IF_FAILED(model_info_->GetVersion(&version)); + return version; +} +WINML_CATCH_ALL + +wfc::IMapView +LearningModel::Metadata() try { + ABI::Windows::Foundation::Collections::IMapView* metadata = nullptr; + wfc::IMapView out; + WINML_THROW_IF_FAILED(model_info_->GetModelMetadata(&metadata)); + winrt::attach_abi(out, metadata); + return out; +} +WINML_CATCH_ALL + +IMLOperatorRegistry* +LearningModel::GetOperatorRegistry() { + if (operator_provider_ == nullptr) { + return nullptr; + } + + // Get the native winrt provider interface out of winrt operator provider. + auto operator_provider_native = + operator_provider_.as(); + + IMLOperatorRegistry* registry = nullptr; + // Retrieve the "operator abi" registry. + THROW_IF_FAILED(operator_provider_native->GetRegistry(®istry)); + return registry; +} + +wfc::IVectorView +LearningModel::InputFeatures() try { + ABI::Windows::Foundation::Collections::IVectorView* features = nullptr; + wfc::IVectorView out; + WINML_THROW_IF_FAILED(model_info_->GetInputFeatures(&features)); + winrt::attach_abi(out, features); + return out; +} +WINML_CATCH_ALL + +wfc::IVectorView +LearningModel::OutputFeatures() try { + ABI::Windows::Foundation::Collections::IVectorView* features = nullptr; + wfc::IVectorView out; + WINML_THROW_IF_FAILED(model_info_->GetOutputFeatures(&features)); + winrt::attach_abi(out, features); + return out; +} +WINML_CATCH_ALL + +void LearningModel::Close() try { + // close the model + model_ = nullptr; +} +WINML_CATCH_ALL + +bool LearningModel::IsDisposed() { + return model_ == nullptr; +} + +wf::IAsyncOperation +LearningModel::LoadFromStorageFileAsync( + ws::IStorageFile const modelFile) { + return LoadFromStorageFileAsync(modelFile, nullptr); +} + +wf::IAsyncOperation +LearningModel::LoadFromStorageFileAsync( + ws::IStorageFile const modelFile, + winml::ILearningModelOperatorProvider const provider) { + co_await resume_background(); + return make(modelFile, provider); +} + +wf::IAsyncOperation +LearningModel::LoadFromStreamAsync( + wss::IRandomAccessStreamReference const model_stream) { + return LoadFromStreamAsync(model_stream, nullptr); +} + +wf::IAsyncOperation +LearningModel::LoadFromStreamAsync( + wss::IRandomAccessStreamReference const model_stream, + winml::ILearningModelOperatorProvider const provider) { + co_await resume_background(); + return make(model_stream, provider); +} + +winml::LearningModel +LearningModel::LoadFromFilePath( + hstring const& path) try { + return LoadFromFilePath(path, nullptr); +} +WINML_CATCH_ALL + +winml::LearningModel +LearningModel::LoadFromFilePath( + hstring const& path, + winml::ILearningModelOperatorProvider const provider) try { + return make(path, provider); +} +WINML_CATCH_ALL + +winml::LearningModel +LearningModel::LoadFromStream( + wss::IRandomAccessStreamReference const model_stream) try { + return LoadFromStream(model_stream, nullptr); +} +WINML_CATCH_ALL + +winml::LearningModel +LearningModel::LoadFromStream( + wss::IRandomAccessStreamReference const model_stream, + winml::ILearningModelOperatorProvider const provider) try { + return make(model_stream, provider); +} +WINML_CATCH_ALL + +WinML::IModel* +LearningModel::DetachModel() { + com_ptr detached_model; + if (model_ != nullptr) { + detached_model.attach(model_.detach()); + + // Close the model since we now own the model proto + Close(); + } + return detached_model.detach(); +} + +WinML::IModel* +LearningModel::CloneModel() { + if (model_ == nullptr) { + return nullptr; + } + + com_ptr model_copy; + WINML_THROW_IF_FAILED(model_->CloneModel(model_copy.put())); + + return model_copy.detach(); +} + +WinML::IEngineFactory* +LearningModel::GetEngineFactory() { + return engine_factory_.get(); +} + +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +// copied from cppwinrt magic to create abi wrappers. Need to do it this way +// since peeps underneath (like the constructor) will throw +HRESULT +__stdcall LearningModel::Load( + const wchar_t* p_model_path, + uint32_t model_path_size, + IUnknown** pp_model_unk) { + try { + WINML_THROW_HR_IF_NULL_MSG(E_INVALIDARG, p_model_path, "Failed to create LearningModel. Ivalid argument p_model_path."); + WINML_THROW_HR_IF_FALSE_MSG(E_INVALIDARG, model_path_size > 0, "Failed to create LearningModel. Ivalid argument model_path_size."); + WINML_THROW_HR_IF_NULL_MSG(E_INVALIDARG, pp_model_unk, "Failed to create LearningModel. Ivalid argument pp_model_unk."); + + auto path = WinML::Strings::UTF8FromUnicode(p_model_path, model_path_size); + auto model = make(path, nullptr); + *pp_model_unk = model.as().detach(); + return S_OK; + } + WINML_CATCH_ALL_COM +} +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModel.h b/winml/lib/Api/LearningModel.h new file mode 100644 index 0000000000000..66f940a0bd7ce --- /dev/null +++ b/winml/lib/Api/LearningModel.h @@ -0,0 +1,122 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModel.g.h" + +namespace Windows::AI::MachineLearning { +struct IEngineFactory; +struct IModel; +struct IModelInfo; +} // namespace Windows::AI::MachineLearning + +namespace winrt::Windows::AI::MachineLearning::implementation { + +struct LearningModel : LearningModelT { + /* LearningModel constructors (MachineLearningContract 1). */ + LearningModel() = default; + + LearningModel( + const hstring& path, + const winml::ILearningModelOperatorProvider operator_provider); + + LearningModel( + const wss::IRandomAccessStreamReference stream, + const winml::ILearningModelOperatorProvider operator_provider); + + LearningModel( + const std::string& path, + const winml::ILearningModelOperatorProvider operator_provider); + + /* LearningModel properties (MachineLearningContract 1). */ + hstring + Author(); + + hstring + Name(); + + hstring + Domain(); + + hstring + Description(); + + int64_t + Version(); + + wfc::IMapView + Metadata(); + + wfc::IVectorView + InputFeatures(); + + wfc::IVectorView + OutputFeatures(); + + /* IClosable methods. */ + void Close(); + + /* LearningModel static methods (MachineLearningContract 1). */ + static wf::IAsyncOperation + LoadFromStorageFileAsync( + Windows::Storage::IStorageFile const model_file); + + static wf::IAsyncOperation + LoadFromStorageFileAsync( + Windows::Storage::IStorageFile const model_file, + winml::ILearningModelOperatorProvider const operator_provider); + + static wf::IAsyncOperation + LoadFromStreamAsync( + wss::IRandomAccessStreamReference const stream); + + static wf::IAsyncOperation + LoadFromStreamAsync( + wss::IRandomAccessStreamReference const stream, + winml::ILearningModelOperatorProvider const operator_provider); + + static winml::LearningModel + LoadFromFilePath( + hstring const& path); + + static winml::LearningModel + LoadFromFilePath( + hstring const& path, + winml::ILearningModelOperatorProvider const operator_provider); + + static winml::LearningModel + LoadFromStream( + wss::IRandomAccessStreamReference const stream); + + static winml::LearningModel + LoadFromStream( + wss::IRandomAccessStreamReference const stream, + winml::ILearningModelOperatorProvider const operator_provider); + + public: + /* Non-ABI methods */ + bool IsDisposed(); + IMLOperatorRegistry* GetOperatorRegistry(); + WinML::IModel* DetachModel(); + WinML::IModel* CloneModel(); + WinML::IEngineFactory* GetEngineFactory(); + + private: + com_ptr engine_factory_; + com_ptr model_; + com_ptr model_info_; + + ILearningModelOperatorProvider operator_provider_; +}; + +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { + +struct LearningModel : LearningModelT { + STDMETHOD(Load) + (const wchar_t* p_model_path, UINT32 model_path_size, IUnknown** pp_model_unk); +}; + +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModelBinding.cpp b/winml/lib/Api/LearningModelBinding.cpp new file mode 100644 index 0000000000000..132875b3cc9ef --- /dev/null +++ b/winml/lib/Api/LearningModelBinding.cpp @@ -0,0 +1,558 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "ConverterResourceStore.h" +#include "impl/FeatureCompatibility.h" +#include "FeatureValues.h" +#include "LearningModelBinding.h" +#include "LearningModelSession.h" +#include "TelemetryEvent.h" +#include +#include "LearningModel.h" + +using namespace WinML; + +namespace winrt::Windows::AI::MachineLearning::implementation { +LearningModelBinding::LearningModelBinding( + Windows::AI::MachineLearning::LearningModelSession const& session) try : m_session(session) { + session.as()->CheckClosed(); +} +WINML_CATCH_ALL + +static Windows::AI::MachineLearning::ILearningModelFeatureDescriptor FindValidBinding( + winrt::Windows::Foundation::Collections::IIterable descriptors, + const std::wstring& name) { + for (auto descriptor : descriptors) { + auto descriptor_native = descriptor.as(); + + const wchar_t* feature_name; + uint32_t size; + WINML_THROW_IF_FAILED(descriptor_native->GetName(&feature_name, &size)); + + // Case insensetive comparison of onnx name in feature descriptor, and passed in name + if (_wcsicmp(feature_name, name.c_str()) == 0) { + return descriptor; + } + } + return nullptr; +} + +using NullableBindingPort = std::optional>; + +static NullableBindingPort FindValidBinding( + winml::LearningModel model, + const std::wstring& name) { + if (auto descriptor = FindValidBinding(model.InputFeatures(), name)) { + return std::make_pair(descriptor, BindingType::kInput); + } else if (auto output_descriptor = FindValidBinding(model.OutputFeatures(), name)) { + return std::make_pair(output_descriptor, BindingType::kOutput); + } + + return {}; +} + +void LearningModelBinding::CacheProvider( + std::string name, + ProviderInfo& providerInfo) { + m_providers[name] = providerInfo; +} + +std::tuple, BindingType> LearningModelBinding::CreateBinding( + const std::string& name, + const Windows::Foundation::IInspectable& inspectable, + Windows::Foundation::Collections::IPropertySet const& properties) { + // Given a known type, validate against the model + auto model = m_session.Model(); + auto bindingPort = FindValidBinding(model, WinML::Strings::WStringFromString(name)); + + WINML_THROW_HR_IF_FALSE_MSG( + WINML_ERR_INVALID_BINDING, + bindingPort.has_value(), + "The model has no variable with name %s.", + name.c_str()); + + // Retrieve the descriptor and binding type + auto descriptor = bindingPort->first; + auto bindingType = bindingPort->second; + + // Create a feature value from the iinspectable input + auto featureValue = WinML::CreateFeatureValueFromInspectable(bindingType, inspectable, descriptor); + WINML_THROW_HR_IF_NULL_MSG( + WINML_ERR_INVALID_BINDING, + featureValue, + "The model variable %s cannot be bound with the provided type.", + name.c_str()); + + // Validate that the feature value is compatible with the descriptor + WinML::VerifyFeatureValueCompatibleWithDescriptor(featureValue, descriptor); + + // Create the Binding Context to pass to the feature value + BindingContext context{ + bindingType, + m_session, + descriptor, + properties, + {} // SubresourceId is set by callee + }; + + // Get the bound tensor + winrt::com_ptr value; + + // Get the native interface for the given bind value + auto spLotusValueProvider = featureValue.as(); + + auto spSession = m_session.as(); + + // Check if the feature value is a placeholder + bool isPlaceHolder; + WINML_THROW_IF_FAILED(spLotusValueProvider->IsPlaceholder(&isPlaceHolder)); + + // If binding a tensor for gpu execution, always bind. + // If it is a placeholder, gpu resources will be preallocated during bind. + // This enables the chaining scenario. + auto spDevice = m_session.Device().as(); + auto isGpuSession = !spDevice->IsCpuDevice(); + auto spTensor = featureValue.try_as(); + auto isTensorWithShape = spTensor != nullptr && spTensor.Shape().Size() != 0; + auto shouldAlwaysTensorize = isTensorWithShape && isGpuSession; + + if (!isPlaceHolder || shouldAlwaysTensorize) { + // If not a placeholder, attempt to get the underlying resource + WINML_THROW_IF_FAILED_MSG( + spLotusValueProvider->GetValue(context, value.put()), + "The model variable %s failed tensorization.", + name.c_str()); + } else { + WINML_THROW_HR_IF_TRUE_MSG( + WINML_ERR_INVALID_BINDING, + isPlaceHolder && bindingType == BindingType::kInput, + "The model variable %s is an input, but has no associated resources to bind.", + name.c_str()); + + WINML_THROW_IF_FAILED(spSession->GetEngine()->CreateNullValue(value.put())); + } + + // Hold onto the input output providers so that our memory doesnt get destroyed! + auto providerInfo = ProviderInfo{inspectable, spLotusValueProvider, context}; + CacheProvider(name, providerInfo); + + return std::make_tuple(name, value, bindingType); +} + +void LearningModelBinding::Bind( + hstring const& name, + Windows::Foundation::IInspectable const& value) try { + return Bind(name, value, nullptr /* no properties */); +} +WINML_CATCH_ALL + +void LearningModelBinding::Bind( + hstring const& name, + Windows::Foundation::IInspectable const& value, + Windows::Foundation::Collections::IPropertySet const& properties) try { + _winmlt::TelemetryEvent binding_event(_winmlt::EventCategory::kBinding); + + BindingType binding_type; + std::string binding_name; + winrt::com_ptr binding_value = nullptr; + auto featureName = WinML::Strings::UTF8FromHString(name); + std::tie(binding_name, binding_value, binding_type) = CreateBinding(featureName, value, properties); + switch (binding_type) { + case BindingType::kInput: + WINML_THROW_IF_FAILED(BindInput(binding_name, binding_value)); + break; + case BindingType::kOutput: + WINML_THROW_IF_FAILED(BindOutput(binding_name, binding_value)); + break; + default: + FAIL_FAST(); + } +} +WINML_CATCH_ALL + +void LearningModelBinding::Clear() try { + m_session.as()->CheckClosed(); + inputs_.clear(); + input_names_.clear(); + outputs_.clear(); + output_names_.clear(); + m_providers.clear(); +} +WINML_CATCH_ALL + +Windows::Foundation::Collections::IIterator LearningModelBinding::First() { + std::unordered_map bindingsMap; + + for (auto mergedBindings : m_providers) { + auto name = WinML::Strings::HStringFromUTF8(mergedBindings.first); + bindingsMap[name] = mergedBindings.second.CallerSpecifiedFeatureValue; + } + + return winrt::single_threaded_map(std::move(bindingsMap)).First(); +} + +Windows::Foundation::IInspectable LearningModelBinding::Lookup(hstring const& key) { + auto utf8_name = WinML::Strings::UTF8FromHString(key); + + auto foundIt = m_providers.find(utf8_name); + WINML_THROW_HR_IF_FALSE_MSG( + E_BOUNDS, + foundIt != std::end(m_providers), + "The binding collection does not contain a variable with name %s.", + utf8_name.c_str()); + + auto providerInfo = foundIt->second; + return providerInfo.CallerSpecifiedFeatureValue; +} + +uint32_t LearningModelBinding::Size() { + return static_cast(m_providers.size()); +} + +bool LearningModelBinding::HasKey(hstring const& key) { + auto utf8_name = WinML::Strings::UTF8FromHString(key); + return m_providers.find(utf8_name) != m_providers.end(); +} + +void LearningModelBinding::Split( + Windows::Foundation::Collections::IMapView& first, + Windows::Foundation::Collections::IMapView& second) { + // the winrt api guide states: + // If the IMapView instance cannot be split, then both the first and second parameters are null when the method returns. + first = nullptr; + second = nullptr; +} + +ILearningModelFeatureValue LearningModelBinding::CreateUnboundOuputFeatureValue( + const winrt::com_ptr value, + ILearningModelFeatureDescriptor& descriptor) { + bool out; + if (SUCCEEDED(value->IsTensor(&out)) && out) { + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Float, &out)) && out) { + if (descriptor.Kind() == LearningModelFeatureKind::Image) { + using namespace Windows::Graphics::Imaging; + // TODO: this format for unbound output needs more discussion + BitmapPixelFormat format = descriptor.as()->BitmapPixelFormat(); + std::vector shape; + value->GetTensorShape(shape); + uint32_t width = static_cast(shape[3]); + uint32_t height = static_cast(shape[2]); + uint32_t batchSize = static_cast(shape[0]); + return implementation::ImageFeatureValue::Create(batchSize, format, width, height); + } else { + return implementation::TensorFloat::Create(); + } + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Double, &out)) && out) { + return implementation::TensorDouble::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::String, &out)) && out) { + return implementation::TensorString::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::UInt8, &out)) && out) { + return implementation::TensorUInt8Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Int8, &out)) && out) { + return implementation::TensorInt8Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::UInt16, &out)) && out) { + return implementation::TensorUInt16Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Int16, &out)) && out) { + return implementation::TensorInt16Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::UInt32, &out)) && out) { + return implementation::TensorUInt32Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Int32, &out)) && out) { + return implementation::TensorInt32Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::UInt64, &out)) && out) { + return implementation::TensorUInt64Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Int64, &out)) && out) { + return implementation::TensorInt64Bit::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Boolean, &out)) && out) { + return implementation::TensorBoolean::Create(); + } + if (SUCCEEDED(value->IsOfTensorType(TensorKind::Float16, &out)) && out) { + return implementation::TensorFloat16Bit::Create(); + } + } + + // Maps + if (SUCCEEDED(value->IsOfMapType(TensorKind::String, TensorKind::String, &out)) && out) { + return implementation::MapStringToString::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::String, TensorKind::Int64, &out)) && out) { + return implementation::MapStringToInt64Bit::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::String, TensorKind::Float, &out)) && out) { + return implementation::MapStringToFloat::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::String, TensorKind::Double, &out)) && out) { + return implementation::MapStringToDouble::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::Int64, TensorKind::String, &out)) && out) { + return implementation::MapInt64BitToString::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::Int64, TensorKind::Int64, &out)) && out) { + return implementation::MapInt64BitToInt64Bit::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::Int64, TensorKind::Float, &out)) && out) { + return implementation::MapInt64BitToFloat::Create(); + } + if (SUCCEEDED(value->IsOfMapType(TensorKind::Int64, TensorKind::Double, &out)) && out) { + return implementation::MapInt64BitToDouble::Create(); + } + // Sequences + if (SUCCEEDED(value->IsOfVectorMapType(TensorKind::String, TensorKind::Float, &out)) && out) { + return implementation::SequenceMapStringFloat::Create(); + } + if (SUCCEEDED(value->IsOfVectorMapType(TensorKind::Int64, TensorKind::Float, &out)) && out) { + return implementation::SequenceMapInt64BitFloat::Create(); + } + + auto utf8_name = WinML::Strings::UTF8FromHString(descriptor.Name()); + WINML_THROW_HR_IF_TRUE_MSG( + E_UNEXPECTED, + true, + "The engine produced an unexpected evaluation output for unbound output variable %s.", + utf8_name.c_str()); + + return nullptr; +} + +Windows::Foundation::IInspectable LearningModelBinding::CreateUnboundOutput( + const std::string& name, + winrt::com_ptr value) { + // Find valid binding port + auto bindingPort = FindValidBinding( + m_session.Model(), + WinML::Strings::WStringFromString(name)); + + WINML_THROW_HR_IF_FALSE_MSG( + E_UNEXPECTED, + bindingPort.has_value(), + "The engine produced an unexpected evaluation output %s, that is not a model variable.", + name.c_str()); + + // Retrieve the descriptor and binding type + auto descriptor = bindingPort->first; + auto bindingType = bindingPort->second; + WINML_THROW_HR_IF_FALSE_MSG( + E_UNEXPECTED, + bindingType == BindingType::kOutput, + "The engine produced an unexpected evaluation output %s, that is not a model variable output.", + name.c_str()); + + // Create a binding context + BindingContext context{ + bindingType, + m_session, + descriptor, + nullptr /* no binding properties for unbound outputs */, + {} // SubresourceId is set by callee + }; + + // Create empty feature value + auto featureValue = CreateUnboundOuputFeatureValue(value, descriptor); + + // Update feature value + auto spLotusValueProvider = featureValue.as(); + WINML_THROW_IF_FAILED_MSG( + spLotusValueProvider->UpdateSourceResourceData(context, value.get()), + "Failed to update bound object for model variable output %s", + name.c_str()); + + // Get abi representation + winrt::Windows::Foundation::IInspectable inspectable; + WINML_THROW_IF_FAILED_MSG( + spLotusValueProvider->AbiRepresentation(inspectable), + "Failed to return bound object for model variable output %s", + name.c_str()); + + return inspectable; +} + +std::unordered_map LearningModelBinding::UpdateProviders() { + std::unordered_map outputs; + + auto& output_names = GetOutputNames(); + auto& output_values = GetOutputs(); + WINML_THROW_HR_IF_FALSE_MSG( + E_UNEXPECTED, + output_names.size() == output_values.size(), + "Evaluation produced unexpected output variables."); + + for (unsigned i = 0; i < output_names.size(); i++) { + auto utf8_name = output_names[i]; + auto value = output_values[i]; + + if (m_providers.find(utf8_name) != std::end(m_providers)) { + auto& providerInfo = m_providers[utf8_name]; + auto provider = providerInfo.Provider; + auto context = providerInfo.Context; + WINML_THROW_IF_FAILED_MSG( + provider->UpdateSourceResourceData(context, value.get()), + "Failed to update bound object for model variable output %s", + utf8_name.c_str()); + + outputs[utf8_name] = providerInfo.CallerSpecifiedFeatureValue; + } else { + // unbound outputs + outputs[utf8_name] = CreateUnboundOutput(utf8_name, value); + } + } + + // Clear any converters cached on inputs to return them to the pool + for (auto&& provider : m_providers) { + if (provider.second.Context.converter != nullptr) { + provider.second.Context.converter->Get()->Tensorizer->ResetAllocator(); + provider.second.Context.converter = nullptr; + } + } + + return outputs; +} + +STDMETHODIMP LearningModelBinding::Bind( + const wchar_t* name, + UINT32 cchName, + IUnknown* value) { + try { + _winmlt::TelemetryEvent binding_event(_winmlt::EventCategory::kBinding); + BindingType binding_type; + std::string binding_name; + winrt::com_ptr binding_value; + + winrt::Windows::Foundation::IInspectable to; + RETURN_IF_FAILED(value->QueryInterface( + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(to)))); + + auto featureName = WinML::Strings::UTF8FromUnicode(name, cchName); + std::tie(binding_name, binding_value, binding_type) = CreateBinding(featureName, to, nullptr); + switch (binding_type) { + case BindingType::kInput: + WINML_THROW_IF_FAILED(BindInput(binding_name, binding_value)); + break; + case BindingType::kOutput: + WINML_THROW_IF_FAILED(BindOutput(binding_name, binding_value)); + break; + default: + FAIL_FAST(); + } + return S_OK; + } + WINML_CATCH_ALL_COM +} + +static std::pair Contains(const std::vector& names, const std::string& name) { + auto it = std::find(std::begin(names), std::end(names), name); + if (it == std::end(names)) { + return {false, 0}; + } + return {true, it - std::begin(names)}; +} + +// This method releases control of memory of ml_value from caller of BindInput +HRESULT LearningModelBinding::BindInput(const std::string& name, winrt::com_ptr value) { + bool exists; + size_t index; + std::tie(exists, index) = Contains(input_names_, name); + + auto engine = m_session.as()->GetEngine(); + winrt::com_ptr device_value; + WINML_THROW_IF_FAILED(engine->CreateOneInputAcrossDevices(name.c_str(), value.get(), device_value.put())); // an input will always be copied on device mismatch + + if (exists) { + inputs_[index] = device_value; + } else { + input_names_.push_back(name); + inputs_.push_back(device_value); + } + + return S_OK; +} + +HRESULT LearningModelBinding::BindOutput(const std::string& name, winrt::com_ptr value) { + bool exists; + size_t index; + std::tie(exists, index) = Contains(output_names_, name); + + if (exists) { + outputs_[index] = value; + return S_OK; + } + + output_names_.push_back(name); + outputs_.push_back(value); + return S_OK; +} + +const std::vector& LearningModelBinding::GetOutputNames() const { + return output_names_; +} + +const std::vector& LearningModelBinding::GetInputNames() const { + return input_names_; +} + +std::vector>& LearningModelBinding::GetOutputs() { + return outputs_; +} + +const std::vector>& LearningModelBinding::GetInputs() const { + return inputs_; +} + +void LearningModelBinding::BindUnboundOutputs() { + auto& bound_output_names = GetOutputNames(); + std::unordered_set bound_output_names_set( + bound_output_names.begin(), + bound_output_names.end()); + + // Get model output feature names + auto model_impl = m_session.Model().as(); + auto output_features = model_impl->OutputFeatures(); + std::vector output_descriptors( + begin(output_features), + end(output_features)); + + // Convert all output features to their feature names + std::vector output_feature_names; + std::transform( + std::begin(output_descriptors), + std::end(output_descriptors), + std::back_inserter(output_feature_names), + [&](auto& descriptor) { + auto descriptor_native = descriptor.as(); + const wchar_t* p_name; + uint32_t size; + WINML_THROW_IF_FAILED(descriptor_native->GetName(&p_name, &size)); + return WinML::Strings::UTF8FromUnicode(p_name, size); + }); + + // Find the set difference to determine if there are any unbound output features + std::vector unbound_output_names; + std::copy_if( + std::begin(output_feature_names), std::end(output_feature_names), + std::inserter(unbound_output_names, std::begin(unbound_output_names)), + [&](const auto& outputFeatureName) { + return bound_output_names_set.find(outputFeatureName) == bound_output_names_set.end(); + }); + + // Add all unbound outputs to binding collection + for (const auto& unbound_output : unbound_output_names) { + auto engine = m_session.as()->GetEngine(); + + winrt::com_ptr value; + WINML_THROW_IF_FAILED(engine->CreateNullValue(value.put())); + WINML_THROW_IF_FAILED(BindOutput(unbound_output, value)); + } +} + +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/LearningModelBinding.h b/winml/lib/Api/LearningModelBinding.h new file mode 100644 index 0000000000000..d3c2e06ebc9c6 --- /dev/null +++ b/winml/lib/Api/LearningModelBinding.h @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModelBinding.g.h" + +#include "inc/ILotusValueProviderPrivate.h" +#include "core/providers/winml/winml_provider_factory.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { + +struct LearningModelBinding : LearningModelBindingT { + struct ProviderInfo { + Windows::Foundation::IInspectable CallerSpecifiedFeatureValue = nullptr; + winrt::com_ptr Provider = nullptr; + WinML::BindingContext Context = {}; + }; + + public: + using KeyValuePair = + Windows::Foundation::Collections::IKeyValuePair; + + LearningModelBinding() = delete; + LearningModelBinding(Windows::AI::MachineLearning::LearningModelSession const& session); + + void Bind(hstring const& name, Windows::Foundation::IInspectable const& value); + void Bind(hstring const& name, Windows::Foundation::IInspectable const& value, Windows::Foundation::Collections::IPropertySet const& properties); + STDMETHOD(Bind)(const wchar_t* name, UINT32 cchName, IUnknown* value); + + void Clear(); + Windows::Foundation::Collections::IIterator First(); + Windows::Foundation::IInspectable Lookup(hstring const& key); + uint32_t Size(); + bool HasKey(hstring const& key); + void Split( + Windows::Foundation::Collections::IMapView& first, + Windows::Foundation::Collections::IMapView& second); + + std::tuple, WinML::BindingType> CreateBinding( + const std::string& name, + const Windows::Foundation::IInspectable& value, + Windows::Foundation::Collections::IPropertySet const& properties); + + std::unordered_map UpdateProviders(); + + const Windows::AI::MachineLearning::LearningModelSession& GetSession() { return m_session; } + + const std::vector& GetInputNames() const; + const std::vector& GetOutputNames() const; + + const std::vector>& GetInputs() const; + std::vector>& GetOutputs(); + + HRESULT BindOutput(const std::string& name, winrt::com_ptr value); + void BindUnboundOutputs(); + + private: + void CacheProvider(std::string name, ProviderInfo& spProvider); + Windows::Foundation::IInspectable CreateUnboundOutput(const std::string& name, winrt::com_ptr value); + ILearningModelFeatureValue CreateUnboundOuputFeatureValue( + const winrt::com_ptr value, + ILearningModelFeatureDescriptor& descriptor); + HRESULT BindInput(const std::string& name, winrt::com_ptr value); + + private: + const Windows::AI::MachineLearning::LearningModelSession m_session; + + std::unordered_map m_providers; + + std::vector input_names_; + std::vector> inputs_; + std::vector output_names_; + std::vector> outputs_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +struct LearningModelBinding : LearningModelBindingT { +}; +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModelDevice.cpp b/winml/lib/Api/LearningModelDevice.cpp new file mode 100644 index 0000000000000..ef0bf8e430b65 --- /dev/null +++ b/winml/lib/Api/LearningModelDevice.cpp @@ -0,0 +1,136 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "LearningModelDevice.h" + +#include +#include +#include "D3DDeviceCache.h" + +#include "ConverterResourceStore.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +/*static*/ void LearningModelDevice::DllUnload() { +} + +Windows::Graphics::DisplayAdapterId LearningModelDevice::AdapterId() try { + Windows::Graphics::DisplayAdapterId id; + id.LowPart = m_deviceCache->GetDeviceLuid().LowPart; + id.HighPart = m_deviceCache->GetDeviceLuid().HighPart; + return id; +} +WINML_CATCH_ALL + +LearningModelDevice::LearningModelDevice(Windows::AI::MachineLearning::LearningModelDeviceKind const& deviceKind) try : m_deviceCache(std::make_unique(deviceKind)) { + m_deviceKind = deviceKind; + m_isCpuDevice = m_deviceKind == LearningModelDeviceKind::Cpu || m_deviceKind == LearningModelDeviceKind::Default; + if (m_isCpuDevice) { + assert(m_deviceCache->GetD3D12Device() == nullptr); + } +} +WINML_CATCH_ALL + +LearningModelDevice::LearningModelDevice(Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice const& device) try : m_deviceCache(std::make_unique(device)) { + m_deviceKind = LearningModelDeviceKind::DirectX; + m_isCpuDevice = false; +} +WINML_CATCH_ALL + +LearningModelDevice::LearningModelDevice(ID3D12CommandQueue* queue) try : m_deviceKind(LearningModelDeviceKind::DirectX), + m_deviceCache(std::make_unique(queue)) { + m_isCpuDevice = false; +} +WINML_CATCH_ALL + +LearningModelDevice::~LearningModelDevice() { + // needed for shared ptr destruction +} + +Windows::AI::MachineLearning::LearningModelDevice LearningModelDevice::CreateFromDirect3D11Device(Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice const& device) try { + return make(device); +} +WINML_CATCH_ALL + +std::shared_ptr<::Windows::AI::MachineLearning::ConverterResourceStore> LearningModelDevice::TensorizerStore() { + if (m_tensorizerStore == nullptr) { + m_tensorizerStore = ::Windows::AI::MachineLearning::ConverterResourceStore::Create(5); + } + return m_tensorizerStore; +} + +std::shared_ptr<::Windows::AI::MachineLearning::ConverterResourceStore> LearningModelDevice::DetensorizerStore() { + if (m_detensorizerStore == nullptr) { + m_detensorizerStore = ::Windows::AI::MachineLearning::ConverterResourceStore::Create(5); + } + return m_detensorizerStore; +} + +winml::LearningModelDeviceKind +LearningModelDevice::GetDeviceKind() { + return m_deviceKind; +} + +bool LearningModelDevice::IsCpuDevice() { + return m_isCpuDevice; +} + +const LUID& +LearningModelDevice::GetDeviceLuid() { + return m_deviceCache->GetDeviceLuid(); +} + +D3DDeviceCache* +LearningModelDevice::GetD3DDeviceCache() { + return m_deviceCache.get(); +} + +wgdx::Direct3D11::IDirect3DDevice +LearningModelDevice::Direct3D11Device() try { + return m_deviceCache->GetWinrtDevice(); +} +WINML_CATCH_ALL + +ID3D12Device* +LearningModelDevice::GetD3DDevice() { + return m_deviceCache->GetD3D12Device(); +} + +ID3D12CommandQueue* +LearningModelDevice::GetDeviceQueue() { + return m_deviceCache->GetCommandQueue(); +} + +STDMETHODIMP +LearningModelDevice::SetMetacommandsEnabled(boolean enabled) { + m_areMetacommandsEnabled = enabled; + return S_OK; +} + +bool LearningModelDevice::MetacommandsEnabled() { + return m_areMetacommandsEnabled; +} + +STDMETHODIMP_(boolean) +LearningModelDevice::SharedHandleInitialized() { + return m_deviceCache->SharedHandleInitialized(); +} +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +// copied from cppwinrt magic to create abi wrappers. Need to do it this way +// since peeps underneath (like the constructor) will throw +HRESULT __stdcall LearningModelDevice::CreateFromD3D12CommandQueue( + ID3D12CommandQueue* queue, + IUnknown** device) noexcept { + try { + WINML_THROW_HR_IF_NULL_MSG(E_INVALIDARG, queue, "Failed to create LearningModelDevice. Ivalid argument queue."); + WINML_THROW_HR_IF_NULL_MSG(E_INVALIDARG, device, "Failed to create LearningModelDevice. Ivalid argument device."); + + auto machineLearningDevice = make(queue); + *device = machineLearningDevice.as().detach(); + return S_OK; + } + WINML_CATCH_ALL_COM +} +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModelDevice.h b/winml/lib/Api/LearningModelDevice.h new file mode 100644 index 0000000000000..ce20a4b7656de --- /dev/null +++ b/winml/lib/Api/LearningModelDevice.h @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModelDevice.g.h" + +namespace Windows::AI::MachineLearning { +class ConverterResourceStore; +} + +namespace winrt::Windows::AI::MachineLearning::implementation { +class D3DDeviceCache; + +struct LearningModelDevice : LearningModelDeviceT { + public: + LearningModelDevice() = delete; + + LearningModelDevice( + winml::LearningModelDeviceKind const& deviceKind); + + LearningModelDevice( + wgdx::Direct3D11::IDirect3DDevice const& device); + + LearningModelDevice( + ID3D12CommandQueue* queue); + + ~LearningModelDevice(); + + wg::DisplayAdapterId + AdapterId(); + + static winml::LearningModelDevice CreateFromDirect3D11Device( + wgdx::Direct3D11::IDirect3DDevice const& device); + + // internal: + STDMETHOD(SetMetacommandsEnabled) + ( + boolean enabled) final; + + // internal: + STDMETHOD_(boolean, SharedHandleInitialized) + (); + + // internal: + + winml::LearningModelDeviceKind + GetDeviceKind(); + + bool + MetacommandsEnabled(); + + bool + IsCpuDevice(); + + const LUID& + GetDeviceLuid(); + + D3DDeviceCache* + GetD3DDeviceCache(); + + wgdx::Direct3D11::IDirect3DDevice + Direct3D11Device(); + + ID3D12Device* + GetD3DDevice(); + + ID3D12CommandQueue* + GetDeviceQueue(); + + static void + DllUnload(); + + std::shared_ptr + TensorizerStore(); + + std::shared_ptr + DetensorizerStore(); + + private: + // stores the device kind that was originally chosen in the constructor + winml::LearningModelDeviceKind m_deviceKind; + // if the user asked us to run on the cpu, or asked us to choose and we chose cpu + bool m_isCpuDevice; + bool m_areMetacommandsEnabled = true; + std::shared_ptr m_detensorizerStore; + std::shared_ptr m_tensorizerStore; + + std::unique_ptr m_deviceCache; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +struct LearningModelDevice : LearningModelDeviceT { + HRESULT __stdcall CreateFromD3D12CommandQueue(ID3D12CommandQueue* queue, IUnknown** device) noexcept final; +}; +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModelEvaluationResult.cpp b/winml/lib/Api/LearningModelEvaluationResult.cpp new file mode 100644 index 0000000000000..776bd79422cb6 --- /dev/null +++ b/winml/lib/Api/LearningModelEvaluationResult.cpp @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "LearningModelEvaluationResult.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +hstring LearningModelEvaluationResult::CorrelationId() try { + return m_correlationId; +} +WINML_CATCH_ALL + +void LearningModelEvaluationResult::CorrelationId(const hstring& correlationId) { + m_correlationId = correlationId; +} + +int32_t LearningModelEvaluationResult::ErrorStatus() try { + return m_errorStatus; +} +WINML_CATCH_ALL + +void LearningModelEvaluationResult::ErrorStatus(int32_t errorStatus) { + m_errorStatus = errorStatus; +} + +bool LearningModelEvaluationResult::Succeeded() try { + return m_succeeded; +} +WINML_CATCH_ALL + +void LearningModelEvaluationResult::Succeeded(bool succeeded) { + m_succeeded = succeeded; +} + +Windows::Foundation::Collections::IMapView LearningModelEvaluationResult::Outputs() try { + std::unordered_map outputs; + + for (auto& output : m_outputs) { + auto key = WinML::Strings::HStringFromUTF8(output.first); + auto value = output.second; + outputs.emplace(key, value); + } + + return winrt::single_threaded_map(std::move(outputs)).GetView(); +} +WINML_CATCH_ALL + +void LearningModelEvaluationResult::Outputs(Windows::Foundation::Collections::IMapView outputs) { + m_outputs.clear(); + + for (auto pair : outputs) { + auto key = WinML::Strings::UTF8FromHString(pair.Key()); + auto value = pair.Value(); + m_outputs.emplace(key, value); + } +} + +HRESULT LearningModelEvaluationResult::GetOutput( + const wchar_t* name, + UINT32 cchName, + IUnknown** result) { + *result = nullptr; + + auto outputName = WinML::Strings::UTF8FromUnicode(name, cchName); + auto foundIt = m_outputs.find(outputName); + + if (foundIt == std::end(m_outputs)) { + return E_FAIL; + } + + auto inspectable = foundIt->second; + *result = inspectable.as<::IUnknown>().detach(); + + return S_OK; +} + +HRESULT LearningModelEvaluationResult::SetOutputs( + std::unordered_map&& outputs) { + m_outputs = std::move(outputs); + return S_OK; +} + +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/LearningModelEvaluationResult.h b/winml/lib/Api/LearningModelEvaluationResult.h new file mode 100644 index 0000000000000..b2d8cedac10e4 --- /dev/null +++ b/winml/lib/Api/LearningModelEvaluationResult.h @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModelEvaluationResult.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +struct LearningModelEvaluationResult : LearningModelEvaluationResultT< + LearningModelEvaluationResult, + ILearningModelEvaluationResultNative> { + LearningModelEvaluationResult() = default; + + hstring CorrelationId(); + void CorrelationId(const hstring& correlationId); + + int32_t ErrorStatus(); + void ErrorStatus(int32_t errorStatus); + + bool Succeeded(); + void Succeeded(bool succeeded); + + Windows::Foundation::Collections::IMapView Outputs(); + void Outputs(Windows::Foundation::Collections::IMapView outputs); + + // ILearningModelEvaluationResultNative + STDMETHOD(GetOutput) + ( + const wchar_t* name, + UINT32 cchName, + IUnknown** result); + + HRESULT SetOutputs(std::unordered_map&& outputs); + + private: + hstring m_correlationId; + int32_t m_errorStatus = 0; + bool m_succeeded = false; + std::unordered_map m_outputs; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/LearningModelSession.cpp b/winml/lib/Api/LearningModelSession.cpp new file mode 100644 index 0000000000000..a8f1e8ee7b32d --- /dev/null +++ b/winml/lib/Api/LearningModelSession.cpp @@ -0,0 +1,416 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "LearningModelSession.h" + +#include "ImageFeatureDescriptor.h" +#include "LearningModel.h" +#include "LearningModelBinding.h" +#include "LearningModelEvaluationResult.h" +#include "LearningModelDevice.h" +#include "LearningModelSessionOptions.h" +#include "TensorFeatureDescriptor.h" +#include "TelemetryEvent.h" + +#include "D3DDeviceCache.h" + +static const auto c_enable_debug_output = L"EnableDebugOutput"; + +namespace guid_details { +// This GUID is to be used for delimiting ML-related categories of capturable work. +// {D113B493-BBA2-4993-8608-D706A73B91CE} +struct __declspec(uuid("D113B493-BBA2-4993-8608-D706A73B91CE")) __declspec(novtable) WINML_PIX_EVAL_CAPTURABLE_WORK_GUID {}; +} // namespace guid_details +static const GUID WINML_PIX_EVAL_CAPTURABLE_WORK_GUID = __uuidof(guid_details::WINML_PIX_EVAL_CAPTURABLE_WORK_GUID); + +namespace winrt::Windows::AI::MachineLearning::implementation { + +LearningModelSession::LearningModelSession( + winml::LearningModel const& model) try : LearningModelSession(model, + make(LearningModelDeviceKind::Default)) {} +WINML_CATCH_ALL + +LearningModelSession::LearningModelSession( + winml::LearningModel const& model, + winml::LearningModelDevice const& deviceToRunOn) try : LearningModelSession(model, + deviceToRunOn, + nullptr) {} +WINML_CATCH_ALL + +LearningModelSession::LearningModelSession( + winml::LearningModel const& model, + winml::LearningModelDevice const& deviceToRunOn, + winml::LearningModelSessionOptions const& learningModelSessionOptions) try : model_(model), + device_(deviceToRunOn), + session_options_(learningModelSessionOptions), + operator_registry_(nullptr, nullptr) { + Initialize(); +} +WINML_CATCH_ALL + +WinML::IModel* +LearningModelSession::GetOptimizedModel() { + // Get the model proto + + auto should_close_model = + session_options_ != nullptr && + session_options_.CloseModelOnSessionCreation(); + + return GetOptimizedModel(should_close_model); +} + +WinML::IModel* +LearningModelSession::GetOptimizedModel(bool should_close_model) { + com_ptr model; + + { + // Lock the model detach/copy since multiple threads can access concurrently + CWinMLAutoLock lock(&session_creation_lock_); + + // Throw if the model has been disposed and is not capable of creating + // new sessions. + auto model_impl = model_.as(); + WINML_THROW_HR_IF_TRUE_MSG(E_INVALIDARG, model_impl->IsDisposed(), + "The model has been disposed."); + + model.attach(should_close_model + ? model_impl->DetachModel() + : model_impl->CloneModel()); + } + + // Ensure that the model is runnable on the device + auto isFloat16Supported = device_.as()->GetD3DDeviceCache()->IsFloat16Supported(); + if (!isFloat16Supported) { + WINML_THROW_IF_FAILED(model->ModelEnsureNoFloat16()); + } + return model.detach(); +} + +void LearningModelSession::Initialize() { + // Begin recording session creation telemetry + _winmlt::TelemetryEvent session_creation_event( + _winmlt::EventCategory::kSessionCreation); + // Get the optimized model proto from the learning model + com_ptr model; + model.attach(GetOptimizedModel()); + + // Create the session builder + auto device_impl = device_.as(); + auto model_impl = model_.as(); + + engine_factory_.copy_from(model_impl->GetEngineFactory()); + + com_ptr engine_builder; + engine_factory_->CreateEngineBuilder(engine_builder.put()); + + if (device_impl->IsCpuDevice() == false) { + engine_builder->SetD3D12Resources(device_impl->GetD3DDevice(), device_impl->GetDeviceQueue()); + } + + // Make onnxruntime apply the batch size override, if any + if (session_options_ && session_options_.BatchSizeOverride() != 0) { + engine_builder->SetBatchSizeOverride(session_options_.BatchSizeOverride()); + } + + com_ptr engine; + WINML_THROW_IF_FAILED(engine_builder->CreateEngine(engine.put())); + + // Register the custom operator registry + operator_registry_ = MLOperatorRegistry(model_impl->GetOperatorRegistry(), [](auto registry) { registry->Release(); }); + WINML_THROW_IF_FAILED(engine->RegisterCustomRegistry(operator_registry_.get())); + + // Register transformers - this should probably not be exposed on IEngine, but an internal call as this configuration step is ort specific. + engine->RegisterGraphTransformers(); + + // Load the model into the session + WINML_THROW_IF_FAILED(engine->LoadModel(model.get())); + + // the session owns the model_proto now, it used detach() + model = nullptr; + + // Initialize the session + WINML_THROW_IF_FAILED(engine->Initialize()); + + // Cache the constructed session + engine_ = engine; +} + +wfc::IPropertySet +LearningModelSession::EvaluationProperties() try { + if (evaluation_properties_ == nullptr) { + evaluation_properties_ = wfc::PropertySet(); + } + return evaluation_properties_; +} +WINML_CATCH_ALL + +winml::LearningModel +LearningModelSession::Model() try { + return model_; +} +WINML_CATCH_ALL + +winml::LearningModelDevice +LearningModelSession::Device() try { + return device_; +} +WINML_CATCH_ALL + +auto CreateBinding( + LearningModelSession& session, + wfc::IMap const features) { + auto binding = winrt::make(session); + + for (auto feature : features.GetView()) { + binding.Bind(feature.Key(), feature.Value()); + } + return binding; +} + +winml::LearningModelEvaluationResult +LearningModelSession::EvaluateFeatures( + wfc::IMap const features, + hstring const correlation_id) try { + auto binding = CreateBinding(*this, features); + return Evaluate(binding, correlation_id); +} +WINML_CATCH_ALL + +wf::IAsyncOperation +LearningModelSession::EvaluateFeaturesAsync( + wfc::IMap const features, + hstring const correlation_id) { + auto binding = CreateBinding(*this, features); + return EvaluateAsync(binding, correlation_id); +} + +uint64_t LearningModelSession::Run(winrt::com_ptr binding_impl) { + CheckClosed(); + + auto device = device_.as(); + CWinMLAutoLock lock(!device->IsCpuDevice() ? &evaluate_lock_ : nullptr); + + binding_impl->BindUnboundOutputs(); + + auto& input_names = binding_impl->GetInputNames(); + std::vector input_names_raw; + std::transform( + std::begin(input_names), + std::end(input_names), + std::back_inserter(input_names_raw), + [&](auto& name) { return name.c_str(); }); + + auto& inputs = binding_impl->GetInputs(); + std::vector inputs_raw; + std::transform( + std::begin(inputs), + std::end(inputs), + std::back_inserter(inputs_raw), + [&](auto& input) { return input.get(); }); + + auto& output_names = binding_impl->GetOutputNames(); + std::vector output_names_raw; + std::transform( + std::begin(output_names), + std::end(output_names), + std::back_inserter(output_names_raw), + [&](auto& name) { return name.c_str(); }); + + auto outputs = binding_impl->GetOutputs(); + std::vector outputs_raw; + std::transform( + std::begin(outputs), + std::end(outputs), + std::back_inserter(outputs_raw), + [&](auto& input) { return input.get(); }); + + engine_->Run(input_names_raw.data(), + inputs_raw.data(), + input_names_raw.size(), + output_names_raw.data(), + outputs_raw.data(), + output_names_raw.size()); + + if (!device->IsCpuDevice()) { + // Flush the D3D12 work from the DML execution provider and queue a fence before we release the lock. + // This allows us to wait without holding onto the lock in GetResults. + engine_->FlushContext(); + return device->GetD3DDeviceCache()->QueueFenceToD3D12(); + } + + // If it's the cpu then just return zero. fence value will be unused. + return 0; +} + +winml::LearningModelEvaluationResult +LearningModelSession::GetResults( + winrt::com_ptr binding_impl, + hstring const& correlation_id, + uint64_t evaluation_complete_fence) { + // First wait on the fence value for the expected frame. This is passed in so that + // the fence value is added to the queue in a thread safe manor. + auto device = device_.as(); + auto is_gpu_evaluation = !device->IsCpuDevice(); + + if (is_gpu_evaluation) { + device->GetD3DDeviceCache()->WaitForFenceValue(evaluation_complete_fence); + } + + CWinMLAutoLock lock(is_gpu_evaluation ? &evaluate_lock_ : nullptr); + + if (is_gpu_evaluation) { + // For DML we aren't using the Sync function because we want to make fencing the + // completed frame thread safe while not holding the lock while waiting for the gpu. + engine_->ReleaseCompletedReferences(); + } else { + // For CPU call the standard Sync function + engine_->Sync(); + } + + // This isn't the best we are holding the lock while we wait for detensorize on the GPU. + // Update output providers + auto outputs = binding_impl->UpdateProviders(); + + // Once the first evaluation following initialization is complete, and therefore the + // initialization work is also complete, trim the upload heap. This is only done once + // to avoid requiring the extra allocation during each evaluation. + if (is_first_evaluate_) { + if (is_gpu_evaluation) { + engine_->TrimUploadHeap(); + } + is_first_evaluate_ = false; + } + + // Create the return status object + auto result = winrt::make(); + auto result_impl = result.as(); + result_impl->Succeeded(true); + result_impl->ErrorStatus(0); + result_impl->CorrelationId(correlation_id); + result_impl->SetOutputs(std::move(outputs)); + + return result; +} + +wf::IAsyncOperation +LearningModelSession::EvaluateAsync( + winml::LearningModelBinding binding, + hstring const correlation_id) { + _winmlt::TelemetryEvent kEvaluateModel_event(_winmlt::EventCategory::kEvaluation); + auto device = device_.as(); + + // Get the binding collection + auto binding_impl = binding.as(); + + ApplyEvaluationProperties(); + + // If we're running on the CPU, then return now and process the rest in the background. + // If we're running on the GPU, then queue up the work first (fast) and wait for the + // results (slow) in the background. + bool should_queue_work = (!device->IsCpuDevice()); + if (!should_queue_work) { + co_await resume_background(); + } + + com_ptr queue; + queue.copy_from(device->GetDeviceQueue()); + com_ptr capture_interface = queue.try_as(); + + // markers for PIX debugging + if (capture_interface != nullptr) { + capture_interface->BeginCapturableWork(WINML_PIX_EVAL_CAPTURABLE_WORK_GUID); + } + + // call Run synchronously on the calling thread to queue up the work + uint64_t evaluation_complete_fence = Run(binding_impl); + + // markers for PIX debugging + if (capture_interface) { + capture_interface->EndCapturableWork(WINML_PIX_EVAL_CAPTURABLE_WORK_GUID); + } + + // after the work is queued, return to the caller + if (should_queue_work) { + // Queue detensorization + co_await resume_background(); + } + + // Get the Results on a background thread whenever they're ready + return GetResults(binding_impl, correlation_id, evaluation_complete_fence); +} + +winml::LearningModelEvaluationResult +LearningModelSession::Evaluate( + winml::LearningModelBinding binding, + hstring const& correlation_id) try { + ToggleProfiler(); + _winmlt::TelemetryEvent kEvaluateModel_event(_winmlt::EventCategory::kEvaluation); + + ApplyEvaluationProperties(); + + auto device = device_.as(); + + com_ptr queue; + queue.copy_from(device->GetDeviceQueue()); + com_ptr capture_interface = queue.try_as(); + + // markers for PIX debugging + if (capture_interface != nullptr) { + capture_interface->BeginCapturableWork(WINML_PIX_EVAL_CAPTURABLE_WORK_GUID); + } + + // Get the binding collection + auto binding_impl = binding.as(); + uint64_t evaluation_complete_fence = Run(binding_impl); + + // markers for PIX debugging + if (capture_interface) { + capture_interface->EndCapturableWork(WINML_PIX_EVAL_CAPTURABLE_WORK_GUID); + } + + return GetResults(binding_impl, correlation_id, evaluation_complete_fence); +} +WINML_CATCH_ALL + +void LearningModelSession::Close() { + engine_ = nullptr; +} + +void LearningModelSession::ApplyEvaluationProperties() try { + if (evaluation_properties_) { + auto is_debug_output_enabled = evaluation_properties_.HasKey(c_enable_debug_output); + if (is_debug_output_enabled) { + engine_factory_->EnableDebugOutput(is_debug_output_enabled); + } + } +} +WINML_CATCH_ALL + +void LearningModelSession::ToggleProfiler() { + CheckClosed(); + auto is_provider_enabled = + TraceLoggingProviderEnabled( + ::winml_trace_logging_provider, + WINEVENT_LEVEL_VERBOSE, + WINML_PROVIDER_KEYWORD_LOTUS_PROFILING); + + if (is_provider_enabled) { + engine_->StartProfiling(); + } else { + engine_->EndProfiling(); + } +} + +WinML::IEngine* +LearningModelSession::GetEngine() { + return engine_.get(); +} + +void LearningModelSession::CheckClosed() { + if (!engine_) { + WINML_THROW_HR(RO_E_CLOSED); + } +} +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/LearningModelSession.h b/winml/lib/Api/LearningModelSession.h new file mode 100644 index 0000000000000..eb55013907152 --- /dev/null +++ b/winml/lib/Api/LearningModelSession.h @@ -0,0 +1,131 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModelSession.g.h" + +#include "LearningModelBinding.h" +#include "MLOperatorAuthor.h" +#include "WinML_Lock.h" +#include "core/providers/winml/winml_provider_factory.h" +#include "iengine.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { + +struct LearningModelSession : LearningModelSessionT { + /* LearningModelSession constructors (MachineLearningContract 1). */ + LearningModelSession() = delete; + + LearningModelSession( + winml::LearningModel const& model); + + LearningModelSession( + winml::LearningModel const& model, + winml::LearningModelDevice const& deviceToRunOn); + + /* LearningModelSession constructors (MachineLearningContract 2). */ + LearningModelSession( + winml::LearningModel const& model, + winml::LearningModelDevice const& deviceToRunOn, + winml::LearningModelSessionOptions const& sessionOptions); + + /* IClosable methods. */ + void + Close(); + + /* LearningModelSession properties (MachineLearningContract 1). */ + wfc::IPropertySet + EvaluationProperties(); + + winml::LearningModel + Model(); + + winml::LearningModelDevice + Device(); + + /* LearningModelSession methods (MachineLearningContract 1). */ + winml::LearningModelEvaluationResult + Evaluate( + winml::LearningModelBinding binding, + hstring const& correlationId); + + wf::IAsyncOperation + EvaluateAsync( + winml::LearningModelBinding binding, + hstring const correlationId); + + winml::LearningModelEvaluationResult + EvaluateFeatures( + wfc::IMap const features, + hstring const correlationId); + + wf::IAsyncOperation + EvaluateFeaturesAsync( + wfc::IMap const features, + hstring const correlationId); + + public: + /* Non-ABI methods */ + + WinML::IEngine* + GetEngine(); + + void + CheckClosed(); + + private: + void + Initialize(); + + WinML::IModel* + GetOptimizedModel(); + + WinML::IModel* + GetOptimizedModel(bool should_close_model); + + uint64_t + Run( + winrt::com_ptr bindingImpl); + + winml::LearningModelEvaluationResult + GetResults( + winrt::com_ptr bindingImpl, + hstring const& correlationId, + uint64_t fenceValueForDML); + + void + ApplyEvaluationProperties(); + + void + ToggleProfiler(); + + private: + com_ptr engine_factory_; + com_ptr engine_; + + using MLOperatorRegistry = std::unique_ptr; + MLOperatorRegistry operator_registry_; + + winml::LearningModel model_; + winml::LearningModelDevice device_; + winml::LearningModelSessionOptions session_options_; + wfc::IPropertySet evaluation_properties_; + + // Synchronization + CWinMLLock session_creation_lock_; + CWinMLLock evaluate_lock_; + + // is_first_evaluate_ is used as a heuristic to determine + // when the dml upload heap can be trimmed. + bool is_first_evaluate_ = true; +}; + +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { + +struct LearningModelSession : LearningModelSessionT { +}; + +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/LearningModelSessionOptions.cpp b/winml/lib/Api/LearningModelSessionOptions.cpp new file mode 100644 index 0000000000000..c74880fe16279 --- /dev/null +++ b/winml/lib/Api/LearningModelSessionOptions.cpp @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" +#include "LearningModelSessionOptions.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +LearningModelSessionOptions::LearningModelSessionOptions(const LearningModelSessionOptions& options) : batch_size_override_(options.batch_size_override_), + close_model_on_session_creation_(options.close_model_on_session_creation_) {} + +uint32_t LearningModelSessionOptions::BatchSizeOverride() { + return batch_size_override_; +} + +void LearningModelSessionOptions::BatchSizeOverride(uint32_t value) { + batch_size_override_ = value; +} + +bool LearningModelSessionOptions::CloseModelOnSessionCreation() { + return close_model_on_session_creation_; +} + +void LearningModelSessionOptions::CloseModelOnSessionCreation(bool value) { + close_model_on_session_creation_ = value; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/LearningModelSessionOptions.h b/winml/lib/Api/LearningModelSessionOptions.h new file mode 100644 index 0000000000000..8e88c7264ea73 --- /dev/null +++ b/winml/lib/Api/LearningModelSessionOptions.h @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "LearningModelSessionOptions.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { + +struct LearningModelSessionOptions : LearningModelSessionOptionsT { + LearningModelSessionOptions() = default; + + LearningModelSessionOptions(const LearningModelSessionOptions& options); + + uint32_t BatchSizeOverride(); + void BatchSizeOverride(uint32_t value); + + bool CloseModelOnSessionCreation(); + void CloseModelOnSessionCreation(bool value); + + private: + // The batch size override property is used to inform the engine when the developer + // wants to explicitly set the batch size of a model to a fixed batch size. + // + // 0 : dont override the model batch definitions + // 1...n : override the model with the given batch size + // + // This value is a unsigned value, and users are not allowed to override models with a free batch size. + // If the model supports free dimensional batch sizes, the caller should provide 0, to not override. + // + // The default value here is 1 so that models with free dimension batch sizes (which is very common) + // can be optimized to fixed sizes. + uint32_t batch_size_override_ = 1; + + // The close model on session creation property is used to inform the engine when the developer + // no longer needs the learningmodelsession after session creation. + // The engine can use the learning model during session creation to move resources rather than make copies. + // + // True : Move resources in the LearningModel in to the LearningModelSession + // False : Copy resources in the LearningModel to the LearningModelSession + // + // The default value here is False so that models are not automatically closed on session creation. + bool close_model_on_session_creation_ = false; +}; + +} // namespace winrt::Windows::AI::MachineLearning::implementation + +namespace winrt::Windows::AI::MachineLearning::factory_implementation { +struct LearningModelSessionOptions : LearningModelSessionOptionsT { +}; +} // namespace winrt::Windows::AI::MachineLearning::factory_implementation diff --git a/winml/lib/Api/MapFeatureDescriptor.cpp b/winml/lib/Api/MapFeatureDescriptor.cpp new file mode 100644 index 0000000000000..9563ec8e3a33a --- /dev/null +++ b/winml/lib/Api/MapFeatureDescriptor.cpp @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "MapFeatureDescriptor.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +MapFeatureDescriptor::MapFeatureDescriptor( + const char* name, + const char* description, + bool is_required, + winml::TensorKind key_kind, + winml::ILearningModelFeatureDescriptor value_kind) : name_(WinML::Strings::HStringFromUTF8(name)), + description_(WinML::Strings::HStringFromUTF8(description)), + is_required_(is_required), + key_kind_(key_kind), + value_kind_(value_kind) { +} + +winml::TensorKind +MapFeatureDescriptor::KeyKind() try { + return key_kind_; +} +WINML_CATCH_ALL + +winml::ILearningModelFeatureDescriptor +MapFeatureDescriptor::ValueDescriptor() try { + return value_kind_; +} +WINML_CATCH_ALL + +hstring +MapFeatureDescriptor::Name() try { + return name_; +} +WINML_CATCH_ALL + +hstring +MapFeatureDescriptor::Description() try { + return description_; +} +WINML_CATCH_ALL + +winml::LearningModelFeatureKind +MapFeatureDescriptor::Kind() try { + return LearningModelFeatureKind::Map; +} +WINML_CATCH_ALL + +bool MapFeatureDescriptor::IsRequired() try { + return is_required_; +} +WINML_CATCH_ALL + +HRESULT +MapFeatureDescriptor::GetName( + const wchar_t** name, + uint32_t* cchName) { + *name = name_.data(); + *cchName = static_cast(name_.size()); + return S_OK; +} + +HRESULT +MapFeatureDescriptor::GetDescription( + const wchar_t** description, + uint32_t* cchDescription) { + *description = description_.data(); + *cchDescription = static_cast(description_.size()); + return S_OK; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/MapFeatureDescriptor.h b/winml/lib/Api/MapFeatureDescriptor.h new file mode 100644 index 0000000000000..8faa4292b0ce9 --- /dev/null +++ b/winml/lib/Api/MapFeatureDescriptor.h @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "MapFeatureDescriptor.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +struct MapFeatureDescriptor : MapFeatureDescriptorT< + MapFeatureDescriptor, + ILearningModelFeatureDescriptorNative> { + MapFeatureDescriptor() = delete; + + MapFeatureDescriptor( + const char* name, + const char* description, + bool is_required, + winml::TensorKind keyKind, + winml::ILearningModelFeatureDescriptor valueKind); + + // IMapDescriptor + winml::TensorKind + KeyKind(); + + winml::ILearningModelFeatureDescriptor + ValueDescriptor(); + + // IFeatureDescriptor + hstring + Name(); + + hstring + Description(); + + winml::LearningModelFeatureKind + Kind(); + + bool + IsRequired(); + + STDMETHOD(GetName) + ( + const wchar_t** name, + uint32_t* cchName) override; + + STDMETHOD(GetDescription) + ( + const wchar_t** description, + uint32_t* cchDescription) override; + + private: + winrt::hstring name_; + winrt::hstring description_; + bool is_required_; + winml::TensorKind key_kind_; + winml::ILearningModelFeatureDescriptor value_kind_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/SequenceFeatureDescriptor.cpp b/winml/lib/Api/SequenceFeatureDescriptor.cpp new file mode 100644 index 0000000000000..2c062c78f2046 --- /dev/null +++ b/winml/lib/Api/SequenceFeatureDescriptor.cpp @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "SequenceFeatureDescriptor.h" + +using namespace Windows::AI::MachineLearning; + +namespace winrt::Windows::AI::MachineLearning::implementation { +SequenceFeatureDescriptor::SequenceFeatureDescriptor( + const char* name, + const char* description, + bool is_required, + winml::ILearningModelFeatureDescriptor descriptor) : name_(WinML::Strings::HStringFromUTF8(name)), + description_(WinML::Strings::HStringFromUTF8(description)), + is_required_(is_required), + element_descriptor_(descriptor) {} + + +winml::ILearningModelFeatureDescriptor +SequenceFeatureDescriptor::ElementDescriptor() try { + return element_descriptor_; +} +WINML_CATCH_ALL + +hstring +SequenceFeatureDescriptor::Name() try { + return name_; +} +WINML_CATCH_ALL + +hstring +SequenceFeatureDescriptor::Description() try { + return description_; +} +WINML_CATCH_ALL + +winml::LearningModelFeatureKind +SequenceFeatureDescriptor::Kind() try { + return LearningModelFeatureKind::Sequence; +} +WINML_CATCH_ALL + +bool SequenceFeatureDescriptor::IsRequired() try { + return is_required_; +} +WINML_CATCH_ALL + +HRESULT +SequenceFeatureDescriptor::GetName( + const wchar_t** name, + uint32_t* cchName) { + *name = name_.data(); + *cchName = static_cast(name_.size()); + return S_OK; +} + +HRESULT +SequenceFeatureDescriptor::GetDescription( + const wchar_t** description, + uint32_t* cchDescription) { + *description = description_.data(); + *cchDescription = static_cast(description_.size()); + return S_OK; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/SequenceFeatureDescriptor.h b/winml/lib/Api/SequenceFeatureDescriptor.h new file mode 100644 index 0000000000000..7f60691691c9c --- /dev/null +++ b/winml/lib/Api/SequenceFeatureDescriptor.h @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "SequenceFeatureDescriptor.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +struct SequenceFeatureDescriptor : SequenceFeatureDescriptorT< + SequenceFeatureDescriptor, + ILearningModelFeatureDescriptorNative> { + SequenceFeatureDescriptor() = delete; + SequenceFeatureDescriptor( + const char* name, + const char* description, + bool is_required, + winml::ILearningModelFeatureDescriptor element_descriptor); + + winml::ILearningModelFeatureDescriptor + ElementDescriptor(); + + // IFeatureDescriptor + hstring + Name(); + + hstring + Description(); + + winml::LearningModelFeatureKind + Kind(); + + bool + IsRequired(); + + STDMETHOD(GetName) + ( + const wchar_t** name, + uint32_t* cchName) override; + + STDMETHOD(GetDescription) + ( + const wchar_t** description, + uint32_t* cchDescription) override; + + private: + winrt::hstring name_; + winrt::hstring description_; + bool is_required_; + winml::ILearningModelFeatureDescriptor element_descriptor_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/TensorFeatureDescriptor.cpp b/winml/lib/Api/TensorFeatureDescriptor.cpp new file mode 100644 index 0000000000000..192fae7287d6d --- /dev/null +++ b/winml/lib/Api/TensorFeatureDescriptor.cpp @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "LearningModel.h" + +#include "TensorFeatureDescriptor.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +TensorFeatureDescriptor::TensorFeatureDescriptor( + const char* name, + const char* description, + winml::TensorKind tensor_kind, + const std::vector& shape, + bool is_required, + bool has_unsupported_image_metadata) : name_(WinML::Strings::HStringFromUTF8(name)), + description_(WinML::Strings::HStringFromUTF8(description)), + tensor_kind_(tensor_kind), + shape_(shape), + is_required_(is_required), + has_unsupported_image_metadata_(has_unsupported_image_metadata) { +} + +winml::TensorKind +TensorFeatureDescriptor::TensorKind() try { + return tensor_kind_; +} +WINML_CATCH_ALL + +wfc::IVectorView +TensorFeatureDescriptor::Shape() try { + return winrt::single_threaded_vector( + std::vector( + std::begin(shape_), + std::end(shape_))) + .GetView(); +} +WINML_CATCH_ALL + +winrt::hstring +TensorFeatureDescriptor::Name() try { + return name_; +} +WINML_CATCH_ALL + +winrt::hstring +TensorFeatureDescriptor::Description() try { + return description_; +} +WINML_CATCH_ALL + +winml::LearningModelFeatureKind +TensorFeatureDescriptor::Kind() try { + return LearningModelFeatureKind::Tensor; +} +WINML_CATCH_ALL + +bool TensorFeatureDescriptor::IsRequired() try { + return is_required_; +} +WINML_CATCH_ALL + +bool TensorFeatureDescriptor::IsUnsupportedMetaData() try { + return has_unsupported_image_metadata_; +} +WINML_CATCH_ALL + +HRESULT +TensorFeatureDescriptor::GetName( + const wchar_t** name, + uint32_t* cchName) { + *name = name_.data(); + *cchName = static_cast(name_.size()); + return S_OK; +} + +HRESULT +TensorFeatureDescriptor::GetDescription( + const wchar_t** description, + uint32_t* cchDescription) { + *description = description_.data(); + *cchDescription = static_cast(description_.size()); + return S_OK; +} +} // namespace winrt::Windows::AI::MachineLearning::implementation diff --git a/winml/lib/Api/TensorFeatureDescriptor.h b/winml/lib/Api/TensorFeatureDescriptor.h new file mode 100644 index 0000000000000..dc233bb3aa4fa --- /dev/null +++ b/winml/lib/Api/TensorFeatureDescriptor.h @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "TensorFeatureDescriptor.g.h" + +namespace winrt::Windows::AI::MachineLearning::implementation { +struct TensorFeatureDescriptor : TensorFeatureDescriptorT< + TensorFeatureDescriptor, + ILearningModelFeatureDescriptorNative> { + TensorFeatureDescriptor() = delete; + TensorFeatureDescriptor( + const char* name, + const char* description, + winml::TensorKind tensor_kind, + const std::vector& shape, + bool is_required, + bool has_unsuppored_image_metadata); + + // ITensorDescriptor + winml::TensorKind + TensorKind(); + + wfc::IVectorView + Shape(); + + // IFeatureDescriptor + winrt::hstring + Name(); + + winrt::hstring + Description(); + + winml::LearningModelFeatureKind + Kind(); + + bool + IsRequired(); + + bool + IsUnsupportedMetaData(); + + STDMETHOD(GetName) + ( + const wchar_t** name, + uint32_t* cchName) override; + + STDMETHOD(GetDescription) + ( + const wchar_t** description, + uint32_t* cchDescription) override; + + private: + winrt::hstring name_; + winrt::hstring description_; + winml::TensorKind tensor_kind_; + std::vector shape_; + bool is_required_; + bool has_unsupported_image_metadata_; +}; +} // namespace winrt::Windows::AI::MachineLearning::implementation \ No newline at end of file diff --git a/winml/lib/Api/impl/FeatureCompatibility.h b/winml/lib/Api/impl/FeatureCompatibility.h new file mode 100644 index 0000000000000..f03acfad4bb4d --- /dev/null +++ b/winml/lib/Api/impl/FeatureCompatibility.h @@ -0,0 +1,401 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ImageFeatureDescriptor.h" +#include "ImageFeatureValue.h" +#include "IMapFeatureValue.h" +#include "ISequenceFeatureValue.h" +#include "TensorFeatureDescriptor.h" + +namespace Windows::AI::MachineLearning { + +namespace error_strings { +using namespace winrt::Windows::AI::MachineLearning; + +// This must be kept in sync with the TensorKind enum in Windows.AI.MachineLearning.idl +const char* SzTensorKind[] = + { + "Undefined", + "Float", + "UInt8", + "Int8", + "UInt16", + "Int16", + "Int32", + "Int64", + "String", + "Boolean", + "Float16", + "Double", + "UInt32", + "UInt64", + "Complex64", + "Complex128", +}; + +static std::string ToString(ILearningModelFeatureDescriptor descriptor); + +static std::string ToString(const std::vector& shape) { + std::ostringstream stream; + stream << "["; + std::copy(shape.begin(), shape.end(), std::ostream_iterator(stream, ",")); + stream << "]"; + + return stream.str(); +} + +static std::string ToString(winrt::Windows::Foundation::Collections::IVectorView shape) { + auto shapeVec = std::vector(begin(shape), end(shape)); + return ToString(shapeVec); +} + +static std::string ToString( + TensorKind kind, + winrt::Windows::Foundation::Collections::IVectorView shape) { + FAIL_FAST_IF_MSG(kind == TensorKind::Complex128, "Unexpected TensorKind Complex128."); + FAIL_FAST_IF_MSG(kind == TensorKind::Complex64, "Unexpected TensorKind Complex64."); + FAIL_FAST_IF_MSG(kind == TensorKind::Undefined, "Unexpected TensorKind Undefined."); + + std::ostringstream stream; + stream << SzTensorKind[static_cast(kind)] << ToString(shape); + return stream.str(); +} + +static std::string ToString(ITensorFeatureDescriptor descriptor) { + return ToString(descriptor.TensorKind(), descriptor.Shape()); +} + +static std::string ToString(ITensor value) { + return ToString(value.TensorKind(), value.Shape()); +} + +static std::string ToString(IMapFeatureDescriptor descriptor) { + auto keyKind = descriptor.KeyKind(); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Complex128, "Unexpected TensorKind Complex128."); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Complex64, "Unexpected TensorKind Complex64."); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Undefined, "Unexpected TensorKind Undefined."); + + auto valueDescriptor = descriptor.ValueDescriptor(); + std::ostringstream stream; + stream << "Map<" << SzTensorKind[static_cast(keyKind)] << "," << ToString(valueDescriptor) << ">"; + return stream.str(); +} + +static std::string ToString(winrt::com_ptr value) { + TensorKind keyKind; + FAIL_FAST_IF_FAILED(value->get_KeyKind(&keyKind)); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Complex128, "Unexpected TensorKind Complex128."); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Complex64, "Unexpected TensorKind Complex64."); + FAIL_FAST_IF_MSG(keyKind == TensorKind::Undefined, "Unexpected TensorKind Undefined."); + + ILearningModelFeatureDescriptor valueDescriptor; + FAIL_FAST_IF_FAILED(value->get_ValueDescriptor(&valueDescriptor)); + std::ostringstream stream; + stream << "Map<" << SzTensorKind[static_cast(keyKind)] << "," << ToString(valueDescriptor) << ">"; + return stream.str(); +} + +static std::string ToString(ISequenceFeatureDescriptor descriptor) { + auto elementDescriptor = descriptor.ElementDescriptor(); + std::ostringstream stream; + stream << "Sequence<" << ToString(elementDescriptor) << ">"; + return stream.str(); +} + +static std::string ToString(winrt::com_ptr value) { + ILearningModelFeatureDescriptor elementDescriptor; + FAIL_FAST_IF_FAILED(value->get_ElementDescriptor(&elementDescriptor)); + + std::ostringstream stream; + stream << "Sequence<" << ToString(elementDescriptor) << ">"; + return stream.str().c_str(); +} + +static std::string ToString(IImageFeatureDescriptor descriptor) { + std::ostringstream stream; + stream << "Image[" << descriptor.Width() << "x" << descriptor.Height() << "]"; + return stream.str(); +} + +static std::string ToString(winrt::com_ptr value) { + std::ostringstream stream; + stream << "Image[" << value->Widths()[0] << "x" << value->Heights()[0] << "]"; + return stream.str(); +} + +static std::string ToString(ILearningModelFeatureDescriptor descriptor) { + switch (descriptor.Kind()) { + case LearningModelFeatureKind::Image: + return ToString(descriptor.as()); + break; + case LearningModelFeatureKind::Map: + return ToString(descriptor.as()); + break; + case LearningModelFeatureKind::Sequence: + return ToString(descriptor.as()); + break; + case LearningModelFeatureKind::Tensor: + return ToString(descriptor.as()); + default: + FAIL_FAST_MSG("Unexpected descriptor LearningModelFeatureKind."); + } +} + +static std::string ToString(ILearningModelFeatureValue value) { + switch (value.Kind()) { + case LearningModelFeatureKind::Image: + return ToString(value.as()); + break; + case LearningModelFeatureKind::Map: + return ToString(value.as()); + break; + case LearningModelFeatureKind::Sequence: + return ToString(value.as()); + break; + case LearningModelFeatureKind::Tensor: + return ToString(value.as()); + default: + FAIL_FAST_MSG("Unexpected descriptor LearningModelFeatureKind."); + } +} +} // namespace error_strings + +// This file produces the IsFeatureValueCompatibleWithDescriptor helper method. +// It is used in the Bind() call to determine whether a feature value aggrees +// with the input or output descriptor present on the model. +// +// These checks are accomplished by indexing into the FeatureKindCompatibilityMatrix. +// This matrix is indexed by Kind, and is a group of function pointers which accept +// a feature value and descriptr, and return whether they are compatible. +namespace compatibility_details { +using namespace winrt::Windows::AI::MachineLearning; + +using K = LearningModelFeatureKind; + +static void not_compatible_hr(HRESULT hr, ILearningModelFeatureValue value, ILearningModelFeatureDescriptor descriptor) { + auto name = WinML::Strings::UTF8FromHString(descriptor.Name()); + + WINML_THROW_IF_FAILED_MSG( + hr, + "Model variable %s, expects %s, but binding was attempted with an incompatible type %s.", + name.c_str(), + error_strings::ToString(descriptor).c_str(), + error_strings::ToString(value).c_str()); +} + +static void not_compatible(ILearningModelFeatureValue value, ILearningModelFeatureDescriptor descriptor) { + not_compatible_hr(WINML_ERR_INVALID_BINDING, value, descriptor); +} + +static HRESULT verify(ILearningModelFeatureDescriptor first, ILearningModelFeatureDescriptor second) { + RETURN_HR_IF(WINML_ERR_INVALID_BINDING, first.Kind() != second.Kind()); + + if (auto mapFirst = first.try_as()) { + auto mapSecond = second.try_as(); + RETURN_HR_IF_NULL(WINML_ERR_INVALID_BINDING, mapSecond); + RETURN_HR_IF(WINML_ERR_INVALID_BINDING, mapFirst.KeyKind() != mapSecond.KeyKind()); + return verify(mapFirst.ValueDescriptor(), mapSecond.ValueDescriptor()); + } + + if (auto sequenceFirst = first.try_as()) { + auto sequenceSecond = second.try_as(); + RETURN_HR_IF_NULL(WINML_ERR_INVALID_BINDING, sequenceSecond); + return verify(sequenceFirst.ElementDescriptor(), sequenceSecond.ElementDescriptor()); + } + + if (auto tensorFirst = first.try_as()) { + auto tensorSecond = second.try_as(); + RETURN_HR_IF_NULL(WINML_ERR_INVALID_BINDING, tensorSecond); + RETURN_HR_IF(WINML_ERR_INVALID_BINDING, tensorFirst.TensorKind() != tensorSecond.TensorKind()); + + // since we only really support scalars inside maps and sequences, + // make sure that each dimension is either -1 or 1. + // Note that they don't have be the same since they're still compatible. + for (auto&& dim : tensorFirst.Shape()) { + RETURN_HR_IF(WINML_ERR_INVALID_BINDING, (dim != -1 && dim != 1)); + } + for (auto&& dim : tensorSecond.Shape()) { + RETURN_HR_IF(WINML_ERR_INVALID_BINDING, (dim != -1 && dim != 1)); + } + return S_OK; + } + + return WINML_ERR_INVALID_BINDING; +} + +/* + Checks if FeatureValue matches the feature description of a model + TValue: feature value from binding + TFeatureDescriptor: feature description from model + */ +template +void verify(ILearningModelFeatureValue value, ILearningModelFeatureDescriptor descriptor) { + not_compatible(value, descriptor); +} + +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + thrower fail = std::bind(not_compatible_hr, std::placeholders::_1, value, descriptor); + enforce check = std::bind(enforce_not_false, std::placeholders::_1, std::placeholders::_2, fail); + + auto tensorValue = value.as(); + auto tensorDescriptor = descriptor.as(); + check(WINML_ERR_INVALID_BINDING, tensorValue.TensorKind() == tensorDescriptor.TensorKind()); + + auto spValueProvider = tensorValue.as(); + + bool isPlaceHolder; + if (SUCCEEDED(spValueProvider->IsPlaceholder(&isPlaceHolder)) && !isPlaceHolder) { + // Placeholders dont have shapes set, so do the shape check for non-Placeholders + auto tensorValueShape = tensorValue.Shape(); + auto tensorDescriptorShape = tensorDescriptor.Shape(); + check(WINML_ERR_SIZE_MISMATCH, tensorValueShape.Size() == tensorDescriptorShape.Size()); + + for (unsigned i = 0; i < tensorValueShape.Size(); i++) { + if (tensorDescriptorShape.GetAt(i) == -1) { + // For free dimensions, the dimension will be set to -1. + // In that case skip validation. + continue; + } + check(WINML_ERR_SIZE_MISMATCH, tensorValueShape.GetAt(i) == tensorDescriptorShape.GetAt(i)); + } + } +} + +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + thrower fail = std::bind(not_compatible_hr, std::placeholders::_1, value, descriptor); + enforce check = std::bind(enforce_not_false, std::placeholders::_1, std::placeholders::_2, fail); + enforce_succeeded check_succeeded = std::bind(enforce_not_failed, std::placeholders::_1, fail); + + auto spMapFeatureValue = value.as(); + auto mapDescriptor = descriptor.as(); + + TensorKind valueKeyKind; + check_succeeded(spMapFeatureValue->get_KeyKind(&valueKeyKind)); + check(WINML_ERR_INVALID_BINDING, valueKeyKind == mapDescriptor.KeyKind()); + + ILearningModelFeatureDescriptor valueValueDescriptor; + check_succeeded(spMapFeatureValue->get_ValueDescriptor(&valueValueDescriptor)); + + check_succeeded(verify(valueValueDescriptor, mapDescriptor.ValueDescriptor())); +} + +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + thrower fail = std::bind(not_compatible_hr, std::placeholders::_1, value, descriptor); + enforce_succeeded check_succeeded = std::bind(enforce_not_failed, std::placeholders::_1, fail); + + auto spSequenceFeatureValue = value.as(); + auto sequenceDescriptor = descriptor.as(); + + ILearningModelFeatureDescriptor valueElementDescriptor; + check_succeeded(spSequenceFeatureValue->get_ElementDescriptor(&valueElementDescriptor)); + + check_succeeded(verify(valueElementDescriptor, sequenceDescriptor.ElementDescriptor())); +} + +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + // No check is needed here. Because: + // For batchSize==1, no matter what shape the input has (smaller or larger), we support to bind it. + // For batchSize > 1, + // 1. for non-free dimension, we support to bind a batch of inputs with different shapes + // because we would reshape the inputs to same size as descriptor specified. + // 2. for free dimension, we have check in ImageFeatureValue that all inputs must have the same shape. + // And the check will be triggered at GetOrtValue step before binding. + return; +} + +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + thrower fail = std::bind(not_compatible_hr, std::placeholders::_1, value, descriptor); + enforce check = std::bind(enforce_not_false, std::placeholders::_1, std::placeholders::_2, fail); + enforce_succeeded check_succeeded = std::bind(enforce_not_failed, std::placeholders::_1, fail); + + auto tensorValue = value.as(); + auto imageDescriptor = descriptor.as(); + + check(WINML_ERR_INVALID_BINDING, tensorValue.TensorKind() == TensorKind::Float); + + auto spValueProvider = tensorValue.as(); + + bool isPlaceHolder; + if (SUCCEEDED(spValueProvider->IsPlaceholder(&isPlaceHolder)) && !isPlaceHolder) { + auto tensorValueShape = tensorValue.Shape(); + auto imageDescriptorShape = imageDescriptor->Shape(); + + check(WINML_ERR_SIZE_MISMATCH, tensorValueShape.Size() == imageDescriptorShape.Size()); + + for (unsigned i = 0; i < tensorValueShape.Size(); i++) { + // Free dimensions on images are indicated by setting the shape size -1/MAXUINT + // In that case, ignore the tensor size check + if (imageDescriptorShape.GetAt(i) != -1) { + check(WINML_ERR_SIZE_MISMATCH, tensorValueShape.GetAt(i) == imageDescriptorShape.GetAt(i)); + } + } + } +} + +/* + This is the case when a model expects a tensor, but image is passed in for binding. + There are two main scenarios for this: + 1. Image metadata does not exist: We should be tolerant to the models that does not have Image Metadata. + In this case, user can still pass in ImageFeatureValue as long as it meets the requirement for image tensorization + 2. Model may have Image metadata that values that we do not support. In this case we should reject binding ImageFeatureValue + https://github.com/onnx/onnx/blob/master/docs/MetadataProps.md + Supported metadata values in RS5 + - Image.BitmapPixelFormat: Gray8, RGB8, BGR8 + - Image.ColorSpaceGamma: SRGB + - Image.NominalPixelRagne: NominalRange_0_255 + */ +template <> +void verify( + ILearningModelFeatureValue value, + ILearningModelFeatureDescriptor descriptor) { + thrower fail = std::bind(not_compatible_hr, std::placeholders::_1, value, descriptor); + enforce check = std::bind(enforce_not_false, std::placeholders::_1, std::placeholders::_2, fail); + + auto imageValue = value.as(); + auto tensorDescriptor = descriptor.as(); + + check(WINML_ERR_INVALID_BINDING, !tensorDescriptor->IsUnsupportedMetaData()); + // NCHW: images must be 4 dimensions + auto tensorDescriptorShape = tensorDescriptor->Shape(); + check(WINML_ERR_SIZE_MISMATCH, 4 == tensorDescriptorShape.Size()); +} + +static void (*FeatureKindCompatibilityMatrix[4][4])(ILearningModelFeatureValue, ILearningModelFeatureDescriptor) = + { + // Tensor, Sequence, Map, Image + /* Tensor */ {verify, not_compatible, not_compatible, verify}, + /* Sequence */ {not_compatible, verify, not_compatible, not_compatible}, + /* Map */ {not_compatible, not_compatible, verify, not_compatible}, + /* Image */ {verify, not_compatible, not_compatible, verify}}; +} // namespace compatibility_details + +inline void VerifyFeatureValueCompatibleWithDescriptor( + winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue value, + winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor descriptor) { + using namespace compatibility_details; + + auto pfnAreKindsCompatible = + FeatureKindCompatibilityMatrix + [static_cast(value.Kind())][static_cast(descriptor.Kind())]; + + pfnAreKindsCompatible(value, descriptor); +} + +} // namespace Windows::AI::MachineLearning diff --git a/winml/lib/Api/impl/IMapFeatureValue.h b/winml/lib/Api/impl/IMapFeatureValue.h new file mode 100644 index 0000000000000..630befcc40eec --- /dev/null +++ b/winml/lib/Api/impl/IMapFeatureValue.h @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning { + +/* [uuid("3e4d4350-0b61-4517-aa6d-79d49bf164b4"), feature, contract, object, exclusiveto] */ +MIDL_INTERFACE("3e4d4350-0b61-4517-aa6d-79d49bf164b4") +IMapFeatureValue : public ::IUnknown { + public: + /* [propget] */ virtual HRESULT STDMETHODCALLTYPE get_KeyKind( + /* [out, retval] */ winrt::Windows::AI::MachineLearning::TensorKind * kind) = 0; + + /* [propget] */ virtual HRESULT STDMETHODCALLTYPE get_ValueDescriptor( + /* [out, retval] */ winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor * result) = 0; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/ISequenceFeatureValue.h b/winml/lib/Api/impl/ISequenceFeatureValue.h new file mode 100644 index 0000000000000..131a3a4814dbd --- /dev/null +++ b/winml/lib/Api/impl/ISequenceFeatureValue.h @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning { + +/* [uuid("529d0bca-4c6c-48c1-9bd3-e1ea2e816348"), feature, contract, object, exclusiveto] */ +MIDL_INTERFACE("529d0bca-4c6c-48c1-9bd3-e1ea2e816348") +ISequenceFeatureValue : public ::IUnknown { + public: + /* [propget] */ virtual HRESULT STDMETHODCALLTYPE get_ElementDescriptor( + /* [out, retval] */ winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor * result) = 0; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/MapBase.h b/winml/lib/Api/impl/MapBase.h new file mode 100644 index 0000000000000..83d8f112a87cd --- /dev/null +++ b/winml/lib/Api/impl/MapBase.h @@ -0,0 +1,128 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "TensorKindFrom.h" + +#include "MapFeatureDescriptor.h" +#include "TensorFeatureDescriptor.h" + +namespace Windows::AI::MachineLearning { + +// +// MapBase +// +// This is the base class for all data based Map types. +// +// Supported derived classes: +// , , , +// , , , +// +template < + typename TDerived, + typename TKey, + typename TValue> +struct MapBase : winrt::implements< + MapBase, + winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue, + WinML::IMapFeatureValue, + WinML::ILotusValueProviderPrivate> { + static_assert( + std::is_same::value || + std::is_same::value, + "Map keys must be int64_t or winrt::hstring!"); + + static_assert( + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + "Map values must be int64_t, double, float, or winrt::hstring!"); + + using ABIMap = ::winrt::Windows::Foundation::Collections::IMap; + using ABIMapView = ::winrt::Windows::Foundation::Collections::IMapView; + + MapBase(ABIMap const& data) : data_(data) {} + + static winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue Create() { + auto abiMap = winrt::single_threaded_map(); + return winrt::make(abiMap); + } + + static winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue Create(const ABIMap& data) { + return winrt::make(data); + } + + static winrt::Windows::AI::MachineLearning::ILearningModelFeatureValue Create(const ABIMapView& data) { + auto abiMap = winrt::single_threaded_map(); + for (const auto& pair : data) { + auto key = pair.Key(); + auto value = pair.Value(); + abiMap.Insert(key, value); + } + + return winrt::make(abiMap); + } + // ILearningModelFeatureValue implementation + winrt::Windows::AI::MachineLearning::LearningModelFeatureKind Kind() { + return winrt::Windows::AI::MachineLearning::LearningModelFeatureKind::Map; + } + + STDMETHOD(get_KeyKind) + (winrt::Windows::AI::MachineLearning::TensorKind* kind) { + FAIL_FAST_IF_NULL(kind); + *kind = TensorKindFrom::Type; + return S_OK; + } + + STDMETHOD(get_ValueDescriptor) + (winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor* result) { + FAIL_FAST_IF_NULL(result); + + *result = TensorFeatureDescriptorFrom::CreateAnonymous(std::vector{}); + + return S_OK; + } + + STDMETHOD(GetValue) + (WinML::BindingContext& context, IValue** out) { + auto session = context.session.as(); + auto engine = session->GetEngine(); + + if (context.type == WinML::BindingType::kInput) { + RETURN_IF_FAILED(engine->CreateMapValue(reinterpret_cast<::IInspectable*>(winrt::get_abi(data_)), TensorKindFrom::Type, TensorKindFrom::Type, out)); + } else { + RETURN_IF_FAILED(engine->CreateNullValue(out)); + } + return S_OK; + } + + STDMETHOD(IsPlaceholder) + (bool* pIsPlaceHolder) { + FAIL_FAST_IF_NULL(pIsPlaceHolder); + *pIsPlaceHolder = false; + return S_OK; + } + + STDMETHOD(UpdateSourceResourceData) + (BindingContext& context, IValue* value) { + data_.Clear(); + auto session = context.session.as(); + auto engine = session->GetEngine(); + RETURN_IF_FAILED(engine->FillFromMapValue(reinterpret_cast<::IInspectable*>(winrt::get_abi(data_)), TensorKindFrom::Type, TensorKindFrom::Type, value)); + return S_OK; + } + + STDMETHOD(AbiRepresentation) + ( + winrt::Windows::Foundation::IInspectable& abiRepresentation) { + data_.as(abiRepresentation); + return S_OK; + } + + private: + ABIMap data_; +}; + +} // namespace Windows::AI::MachineLearning diff --git a/winml/lib/Api/impl/SequenceBase.h b/winml/lib/Api/impl/SequenceBase.h new file mode 100644 index 0000000000000..68281491e031b --- /dev/null +++ b/winml/lib/Api/impl/SequenceBase.h @@ -0,0 +1,162 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "MapFeatureDescriptor.h" +#include "SequenceFeatureDescriptor.h" +#include "TensorFeatureDescriptor.h" + +namespace Windows::AI::MachineLearning { + +// SequenceBase +// +// This is the base class for all data based Sequence types. +// +// Supported derived classes: +// Map, Map +// +template +struct SequenceBase : public winrt::implements< + SequenceBase, + winml::ILearningModelFeatureValue, + WinML::ISequenceFeatureValue, + WinML::ILotusValueProviderPrivate> { + using ABISequence = wfc::IIterable; + using AbiMapStringToFloat = wfc::IMap; + using AbiMapInt64BitToFloat = wfc::IMap; + + template struct SequenceAbiTypeInfo { + static constexpr winml::TensorKind Key = winml::TensorKind::Undefined; + static constexpr winml::TensorKind Value = winml::TensorKind::Undefined; + }; + template <> struct SequenceAbiTypeInfo { + static constexpr winml::TensorKind Key = winml::TensorKind::String; + static constexpr winml::TensorKind Value = winml::TensorKind::Float; + }; + template <> + struct SequenceAbiTypeInfo { + static constexpr winml::TensorKind Key = winml::TensorKind::Int64; + static constexpr winml::TensorKind Value = winml::TensorKind::Float; + }; + + template + void + GetElementDescriptor(winml::ILearningModelFeatureDescriptor* result) { + static_assert(false, "Only sequences of of map and map are supported.") + } + + template <> + void + GetElementDescriptor>( + winml::ILearningModelFeatureDescriptor* result) { + // zero dimensional tensor has empty shape + auto value_descriptor = + WinML::TensorFeatureDescriptorFrom::CreateAnonymous( + std::vector{}); + *result = + winrt::make( + nullptr /* set to null as values are name-less */, + nullptr /* set to null as values are description-less */, + false /* set to false as values dont have required annotations */, + winml::TensorKind::String /* key kind */, + value_descriptor /* value kind */); + } + + template <> + void + GetElementDescriptor>( + winml::ILearningModelFeatureDescriptor* result) { + // zero dimensional tensor has empty shape + auto value_descriptor = + WinML::TensorFeatureDescriptorFrom::CreateAnonymous( + std::vector{}); + *result = + winrt::make( + nullptr /* set to null as values are name-less */, + nullptr /* set to null as values are description-less */, + false /* set to false as values dont have required annotations */, + winml::TensorKind::Int64 /* key kind */, + value_descriptor /* value kind */); + } + + SequenceBase(const ABISequence& data) : data_(data) {} + + static winml::ILearningModelFeatureValue + Create() { + auto sequence = winrt::single_threaded_vector(); + return winrt::make(sequence); + } + + static winml::ILearningModelFeatureValue + Create( + const ABISequence& data) { + return winrt::make(data); + } + + // ILearningModelFeatureValue implementation + winml::LearningModelFeatureKind + Kind() { + return winml::LearningModelFeatureKind::Sequence; + } + + STDMETHOD(get_ElementDescriptor) + ( + winml::ILearningModelFeatureDescriptor* result) { + FAIL_FAST_IF_NULL(result); + + GetElementDescriptor(result); + + return S_OK; + } + + STDMETHOD(GetValue)( + WinML::BindingContext& context, + IValue** out) { + auto session = context.session.as(); + auto engine = session->GetEngine(); + + if (context.type == WinML::BindingType::kInput) { + // In opset 10, all ops that use sequences are seq. + // In opset 11, we will need to support seq> as well. + RETURN_IF_FAILED(engine->CreateSequenceOfMapsValue( + reinterpret_cast<::IInspectable*>(winrt::get_abi(data_)), + SequenceAbiTypeInfo::Key, SequenceAbiTypeInfo::Value, out)); + } else { + RETURN_IF_FAILED(engine->CreateNullValue(out)); + } + return S_OK; + } + + STDMETHOD(IsPlaceholder) + ( + bool* p_is_placeholder) { + FAIL_FAST_IF_NULL(p_is_placeholder); + *p_is_placeholder = false; + return S_OK; + } + + STDMETHOD(UpdateSourceResourceData)( + BindingContext& context, + IValue* out) { + auto writable_vector = data_.as>(); + writable_vector.Clear(); + + auto session = context.session.as(); + auto engine = session->GetEngine(); + RETURN_IF_FAILED(engine->FillSequenceOfMapsValue(reinterpret_cast<::IInspectable*>(winrt::get_abi(data_)), SequenceAbiTypeInfo::Key, SequenceAbiTypeInfo::Value, out)); + + return S_OK; + } + + STDMETHOD(AbiRepresentation)( + wf::IInspectable& abi_representation) { + data_.as(abi_representation); + return S_OK; + } + + private: + ABISequence data_; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/Tensor.h b/winml/lib/Api/impl/Tensor.h new file mode 100644 index 0000000000000..b5f694590efe9 --- /dev/null +++ b/winml/lib/Api/impl/Tensor.h @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "TensorBuffer.h" + +// +// the Tensor class is the actual object for CPU memory buffers. +// TensorBase contains one of these to represent the raw memory +// GetCpuResource() returns it +// +namespace Windows::AI::MachineLearning { +template +class Tensor { + private: + using TensorBuffer = TensorBuffer; + using TensorBufferPtr = typename TensorBuffer::TensorBufferPtr; + + TensorBufferPtr m_buffer; + std::vector shape_; + + public: + Tensor() = delete; + + Tensor( + std::vector const& shape, + winrt::Windows::Storage::Streams::IBuffer buffer) : shape_(shape), + m_buffer( + TensorBuffer::Create( + static_cast( + std::accumulate( + std::begin(shape), + std::end(shape), + static_cast(1), + std::multiplies())), + buffer)) { + } + + Tensor( + std::vector const& shape) : shape_(shape), + m_buffer( + TensorBuffer::Create( + static_cast( + std::accumulate( + std::begin(shape), + std::end(shape), + static_cast(1), + std::multiplies())))) { + } + + Tensor( + std::vector const&& shape) : shape_(std::move(shape)), + m_buffer( + TensorBuffer::Create( + static_cast( + std::accumulate( + std::begin(shape), + std::end(shape), + static_cast(1), + std::multiplies())))) { + } + + auto size() const { + return m_buffer->Size(); + } + + auto size_in_bytes() const { + return m_buffer->SizeInBytes(); + } + + auto buffer() { + return m_buffer->Buffer(); + } + + void set(uint32_t size, const T* pData) { + m_buffer->Set(size, pData); + } + + void set(std::vector&& other) { + m_buffer->Set(other); + } + + const std::vector& shape() const { + return shape_; + } + + auto get_tensor_buffer() { + return m_buffer; + } +}; +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/TensorBase.h b/winml/lib/Api/impl/TensorBase.h new file mode 100644 index 0000000000000..940546add4741 --- /dev/null +++ b/winml/lib/Api/impl/TensorBase.h @@ -0,0 +1,884 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#pragma warning(push) +#pragma warning(disable : 6387) + +#include "LearningModelBinding.h" +#include "LearningModelDevice.h" +#include "LearningModelSession.h" +#include "TensorKindFrom.h" +#include "TensorMemoryBufferReference.h" + +#include "core/session/onnxruntime_c_api.h" + +namespace Windows::AI::MachineLearning { + +// TensorBase +// +// This is the base class for all data based Tensor types. It exposes array and IVectorView +// based getter and setters. +// +// Look in FeatureValue.h to see where all of them actually get created with CREATE_TENSOR() +// +// Supported derived classes: +// Float, Int8, UInt8, UInt16, Int16, Int32, Int64, Boolean, Double, UInt32, UInt64 +// +// Unsupported types +// Float16 and String have different access patterns and Int8, Complex64, Complex128 are unsupported +// +template +struct TensorBase : TBase { + template + static void ASSERT_TEMPLATE_PARAMETERS() { + // This adds compile time checks that ensure that the API can only be called when: + // 1) the first template parameter matches the internal type (T), + // since the api attempts copy the tensor memory of type T into a vector of type ElementType. + // 2) the second template parameter matches the return type + static_assert( + std::is_same::value, + "This API can only be called with template parameters that match its internal data type T."); + static_assert( + std::is_same::value, + "This API can only be called with template parameters that match its internal data type T."); + } + + template + static void ASSERT_TEMPLATE_PARAMETERS_EXACT() { + // This adds compile time checks that ensure that the API can only be called when: + // 1) the conditions of ASSERT_TEMPLATE_PARAMETERS() are met. + // 2) the ABI type (ViewT) matches the internal type (t). + ASSERT_TEMPLATE_PARAMETERS(); + + static_assert( + std::is_same::value, + "This API can only be called with matching T and ViewT. Explicit specialization is required."); + } + + /// On creation, tensors can either: + /// 1) act as a placeholder without any backing memory (output tensors, chained values). In this case we + /// create the backing memory when the buffer is accessed. The buffer is allocated one of there scenarios: + /// GPUTensorize during binding (used to create DML resources for chaining) + /// UpdateSourceResourceData after eval (used for output placeholder tensors or unbound outputs) + /// GetBuffer when accessed by users + /// a) TensorBase() + /// 2) allocate backing cpu memory (when a shape is provided) + /// a) TensorBase(std::vector const& shape) + /// b) TensorBase(winrt::Windows::Foundation::Collections::IIterable const& shape) + /// 3) use provided backing gpu memory + /// a) TensorBase(std::vector const& shape, ID3D12Resource* pResource) + TensorBase() : m_resources(std::make_shared>()) { + } + + TensorBase(winrt::Windows::Foundation::Collections::IIterable const& shape) : shape_(begin(shape), end(shape)), + m_resources(std::make_shared>()) { + GetCpuResource() = std::make_shared>(shape_); + } + + TensorBase(std::vector const& shape) : shape_(shape), + m_resources(std::make_shared>()) { + GetCpuResource() = std::make_shared>(shape_); + } + + TensorBase(std::vector const& shape, ID3D12Resource* resource) : shape_(shape), + m_resources(std::make_shared>()) { + // This Api is not supported for TensorString + WINML_THROW_HR_IF_TRUE_MSG( + E_ILLEGAL_METHOD_CALL, + (std::is_same::value), + "TensorString objects cannot be created from a ID3D12Resource!"); + + GetGpuResource().copy_from(resource); + } + + HRESULT CreateGPUMLValue(ID3D12Resource* resource, BindingContext& context, IValue** out) { + THROW_HR_IF_NULL(E_INVALIDARG, resource); + + auto session = context.session.as(); + auto device = session->Device().as(); + WINML_THROW_HR_IF_TRUE_MSG(WINML_ERR_INVALID_BINDING, + device->IsCpuDevice(), + "Cannot create GPU tensor on CPU device"); + + auto engine = session->GetEngine(); + RETURN_IF_FAILED(engine->CreateTensorValueFromExternalD3DResource(resource, shape_.data(), shape_.size(), TensorKind(), out)); + return S_OK; + } + + HRESULT CPUTensorize(WinML::BindingContext& context, IValue** out) { + auto session = context.session.as(); + auto engine = session->GetEngine(); + + if (GetCpuResource() != nullptr) { + return CreateTensorValueFromExternalBuffer(engine, out); + } + + // If there is no matching cpu resource, then fallback to a gpu resource + if (GetGpuResource() != nullptr) { + return CreateGPUMLValue(GetGpuResource().get(), context, out); + } + + WINML_THROW_HR(WINML_ERR_INVALID_BINDING); + } + + HRESULT GPUTensorize(WinML::BindingContext& context, IValue** out) { + if (GetGpuResource() != nullptr) { + return CreateGPUMLValue(GetGpuResource().get(), context, out); + } + + // Get engine + auto session = context.session.as(); + auto engine = session->GetEngine(); + + // If there is no matching gpu resource, then fallback to a cpu resource + if (GetCpuResource() != nullptr) { + return CreateTensorValueFromExternalBuffer(engine, out); + } + + if (TensorKind() == winrt::Windows::AI::MachineLearning::TensorKind::String) { + // Lazily allocate the cpu TensorString resource + // TensorStrings are CPU only, and so a gpu resource cannot be allocated for them. + GetCpuResource() = std::make_shared>(shape_); + return CreateTensorValueFromExternalBuffer(engine, out); + } else { + // Try to allocate the backing memory for the caller + auto bufferSize = std::accumulate(std::begin(shape_), std::end(shape_), static_cast(1), std::multiplies()); + auto bufferByteSize = sizeof(T) * bufferSize; + + // DML needs the resources' sizes to be a multiple of 4 bytes + if (bufferByteSize % 4 != 0) { + bufferByteSize += 4 - (bufferByteSize % 4); + } + + D3D12_HEAP_PROPERTIES heapProperties = { + D3D12_HEAP_TYPE_DEFAULT, + D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + D3D12_MEMORY_POOL_UNKNOWN, + 0, + 0}; + D3D12_RESOURCE_DESC resourceDesc = { + D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + static_cast(bufferByteSize), + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + auto device = session->Device().as(); + + winrt::com_ptr gpu_resource = nullptr; + device->GetD3DDevice()->CreateCommittedResource( + &heapProperties, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + __uuidof(ID3D12Resource), + gpu_resource.put_void()); + + GetGpuResource() = gpu_resource; + + return CreateGPUMLValue(GetGpuResource().get(), context, out); + } + } + + void EnsureBufferNotInUse() { + auto isBufferInUse = + std::any_of( + m_outstandingReferences.begin(), + m_outstandingReferences.end(), + [](auto weakRef) { return weakRef.get() != nullptr; }); + + WINML_THROW_HR_IF_TRUE_MSG(WINML_ERR_INVALID_BINDING, isBufferInUse, "The tensor has outstanding memory buffer references that must be closed prior to evaluation!"); + } + + // ILotusValueProviderPrivate::GetOrtValue + STDMETHOD(GetValue) + (WinML::BindingContext& context, IValue** out) { + RETURN_HR_IF_NULL_MSG( + WINML_ERR_INVALID_BINDING, + m_resources, + "The tensor has been closed and its resources have been detached!"); + + EnsureBufferNotInUse(); + + auto spSession = context.session.as(); + auto spDevice = spSession->Device().as(); + + if (spDevice->IsCpuDevice()) { + RETURN_IF_FAILED(CPUTensorize(context, out)); + } else { + RETURN_IF_FAILED(GPUTensorize(context, out)); + } + + return S_OK; + } + + static int64_t ShapeSize(std::vector shape) { + // for each dim + int64_t size = 1; + for (size_t i = 0; i < shape.size(); i++) { + // find out it's total size + size *= shape[i]; + // make sure there are no invalid dimensions (-1 or any invalid shape) + THROW_HR_IF(E_INVALIDARG, shape[i] <= 0); + } + return size; + } + + template + void SetBufferFromValueResourceBuffer(uint32_t size, void* data) { + // This adds compile time checks that ensure that the API can only be called when + // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. + ASSERT_TEMPLATE_PARAMETERS(); + + GetCpuResource()->set(size, reinterpret_cast(data)); + } + + template <> + void SetBufferFromValueResourceBuffer(uint32_t size, void* data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + GetCpuResource()->get_tensor_buffer()->Set(size, reinterpret_cast(data)); + } + + template + HRESULT CreateTensorValueFromExternalBuffer(WinML::IEngine* engine, IValue** value) { + // This adds compile time checks that ensure that the API can only be called when + // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. + ASSERT_TEMPLATE_PARAMETERS(); + + RETURN_IF_FAILED_MSG(engine->CreateTensorValueFromExternalBuffer( + GetCpuResource()->buffer().second, GetCpuResource()->size_in_bytes(), GetCpuResource()->shape().data(), + GetCpuResource()->shape().size(), TensorKind(), value), + "Failed to prepare buffer for copy back from device resource."); + return S_OK; + } + + template <> + HRESULT CreateTensorValueFromExternalBuffer(WinML::IEngine* engine, IValue** value) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + std::vector raw_values; + auto string_array = GetCpuResource()->buffer().second; + std::transform( + string_array, + string_array + GetCpuResource()->size_in_bytes(), + std::back_inserter(raw_values), + [&](auto& str) { return str.c_str(); }); + + RETURN_IF_FAILED_MSG(engine->CreateStringTensorValueFromDataWithCopy( + raw_values.data(), raw_values.size(), GetCpuResource()->shape().data(), + GetCpuResource()->shape().size(), value), + "Failed to prepare buffer for copy back from device resource."); + return S_OK; + } + + // ILotusValueProviderPrivate::UpdateSourceResourceData + STDMETHOD(UpdateSourceResourceData) + (BindingContext& context, IValue* value) { + RETURN_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources have been detached during evaluation!"); + + WinML::Resource updated_resource; + RETURN_IF_FAILED(value->GetResource(updated_resource)); + + // get the shape + RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!"); + + // make sure we always have a CPU resource + if (GetCpuResource() == nullptr) { + GetCpuResource() = std::make_shared>(shape_); + } + + bool is_cpu; + if (SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu) { + // Get the data pointer and size + T* data; + uint32_t size; + std::tie(size, data) = GetCpuResource()->buffer(); + + if (updated_resource.get() != reinterpret_cast(data)) { + // Only copy the data if the source and destination are not the same! + // The engine provided buffer will not match the tensor buffer when + // the tensor is created as a placeholder output, or as an unbound output. + auto shape_size = static_cast(ShapeSize(shape_)); + SetBufferFromValueResourceBuffer(shape_size, updated_resource.get()); + } + } else { + // If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data. + // We don't need to copy the engine provided dx resource into a local copy since we always preallocate gpu + // resources for tensors. Therefore we are certain that the returned dxresource is the same as the one we passed in + // and was updated in place. + auto spSession = context.session.as(); + auto engine = spSession->GetEngine(); + + winrt::com_ptr dest; + RETURN_IF_FAILED_MSG(CreateTensorValueFromExternalBuffer(engine, dest.put()), + "Failed to prepare buffer for copy back from device resource."); + RETURN_IF_FAILED(engine->CopyValueAcrossDevices(value, dest.get())); + } + + return S_OK; + } + + /// + /// Tensor Creation Patterns + /// + + // ITensor::Create + static typename TBase::class_type Create() try { + return winrt::make(); + } + WINML_CATCH_ALL + + // ITensor::Create + static typename TBase::class_type Create( + winrt::Windows::Foundation::Collections::IIterable const& shape) try { + typename TBase::class_type tensorValue = winrt::make(); + auto tensorValueImpl = tensorValue.as(); + tensorValueImpl->shape_ = std::vector(begin(shape), end(shape)); + return tensorValue; + } + WINML_CATCH_ALL + + // ITensor::CreateFromIterable + static typename TBase::class_type CreateFromIterable( + winrt::Windows::Foundation::Collections::IIterable shape, + winrt::Windows::Foundation::Collections::IIterable const& data) try { + std::vector vecShape(begin(shape), end(shape)); + if (HasFreeDimensions(vecShape)) { + // If the tensor is being created with a free dimension, the data needs to + // provide its actual size so that the free dimension can be computed. + // In the case of IIterable, there is no Size accessor, and so we require that + // in this case the underlying object also implement IVectorView, so that we may + // efficiently query the size of the data. + if (auto vectorView = data.try_as>()) { + vecShape = GetAdjustedShape(vecShape, vectorView.Size()); + } + } + + typename TBase::class_type tensorValue = winrt::make(vecShape); + auto tensorValueImpl = tensorValue.as(); + tensorValueImpl->SetBufferFromIterable(data); + return tensorValue; + } + WINML_CATCH_ALL + + // ITensor::CreateFromArray + static typename TBase::class_type CreateFromArray( + winrt::Windows::Foundation::Collections::IIterable shape, + winrt::array_view data) try { + std::vector vecShape(begin(shape), end(shape)); + return CreateFromArrayInternal(vecShape, data); + } + WINML_CATCH_ALL + + // ITensor::CreateFromShapeArrayAndDataArray + static typename TBase::class_type CreateFromShapeArrayAndDataArray( + winrt::array_view shape, + winrt::array_view data) try { + std::vector vecShape(shape.begin(), shape.end()); + return CreateFromArrayInternal(vecShape, data); + } + WINML_CATCH_ALL + + static typename TBase::class_type CreateFromArrayInternal( + std::vector shape, + winrt::array_view data) { + if (HasFreeDimensions(shape)) { + shape = GetAdjustedShape(shape, data.size()); + } + + typename TBase::class_type tensorValue = winrt::make(shape); + auto tensorValueImpl = tensorValue.as(); + tensorValueImpl->SetBufferFromArray(data); + return tensorValue; + } + + // ITensor::CreateFromBuffer + static typename TBase::class_type CreateFromBuffer( + winrt::array_view shape, + winrt::Windows::Storage::Streams::IBuffer const& buffer) try { + std::vector vecShape(shape.begin(), shape.end()); + typename TBase::class_type tensorValue = winrt::make(); + auto tensorValueImpl = tensorValue.as(); + tensorValueImpl->shape_ = vecShape; + tensorValueImpl->GetCpuResource() = std::make_shared>(vecShape, buffer); + return tensorValue; + } + WINML_CATCH_ALL + + // ITensorNative::CreateFromD3D12Resource + static HRESULT CreateFromD3D12Resource( + ID3D12Resource* value, + __int64* shape, + int shapeCount, + IUnknown** result) { + try { + // make sure they gave us a valid shape + THROW_HR_IF(E_INVALIDARG, shape == nullptr); + THROW_HR_IF(E_INVALIDARG, shapeCount == 0); + + // turn the shape into a vector<> + std::vector shapeVector(shape, shape + shapeCount); + + // for each dim + UINT64 width = ShapeSize(shapeVector) * sizeof(T); + + // make sure they gave us a valid value + THROW_HR_IF(E_INVALIDARG, value == nullptr); + + // make sure it's a d3d12 buffer (!texture) + auto desc = value->GetDesc(); + THROW_HR_IF(E_INVALIDARG, desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); + + // make sure it's big enough + THROW_HR_IF(E_INVALIDARG, desc.Width < width); + + // make the underlying winrt object + typename TBase::class_type tensorValue = winrt::make(shapeVector, value); + + // return it (the caller owns the ref) + *result = tensorValue.as().detach(); + return S_OK; + } + WINML_CATCH_ALL_COM + } + + static std::vector GetAdjustedShape( + std::vector shape, + uint64_t actualSize) { + auto shapeSize = std::accumulate(std::begin(shape), std::end(shape), static_cast(1), + [](const auto& accumulatedValue, const auto& next) { + if (next == -1) { + return accumulatedValue; + } else { + return accumulatedValue * next; + } + }); + + THROW_HR_IF(E_INVALIDARG, actualSize % shapeSize != 0); + + auto foundIt = std::find_if(std::begin(shape), std::end(shape), [](auto dim) { return dim == -1; }); + auto iFreeDimension = std::distance(std::begin(shape), foundIt); + + shape[iFreeDimension] = static_cast(actualSize / shapeSize); + return shape; + } + + static bool HasFreeDimensions(std::vector const& shape) { + // Ensure that all dimension values are either -1, or positive + auto unsupportedIt = + std::find_if(begin(shape), end(shape), + [](const auto& dim) { + return dim < -1; + }); + THROW_HR_IF(E_INVALIDARG, unsupportedIt != end(shape)); + + auto nFreeDimensions = std::count(begin(shape), end(shape), -1); + if (nFreeDimensions == 0) { + return false; + } else if (nFreeDimensions == 1) { + return true; + } else { + throw winrt::hresult_invalid_argument(); + } + } + + /// + /// Tensor Data Buffer Accessor APIs + /// + + // IMemoryBuffer::CreateReference + winrt::Windows::Foundation::IMemoryBufferReference CreateReference() try { + // Create a TensorMemoryBufferReference + + // Per IMemoryBuffer.CreateReference (https://docs.microsoft.com/en-us/uwp/api/windows.foundation.imemorybuffer.createreference) + // "This method always successfully returns a new IMemoryBufferReference object even after the IMemoryBuffer + // "has been closed. In that case, the returned IMemoryBufferReference is already closed." + // Creating a TensorMemoryBufferReference with a null pointer is equivalent to creating it as closed. + + auto memoryBufferReference = winrt::make>(shape_, m_resources); + + // Create and cache a weak reference to the TensorMemoryBufferReference + winrt::weak_ref> weak(memoryBufferReference.as>()); + m_outstandingReferences.push_back(weak); + + // Return the strong ref to the caller + return memoryBufferReference; + } + WINML_CATCH_ALL + + // IMemoryBuffer::Close + void Close() try { + // Let go of the lifetime of the resources, this is will indicate that the memorybuffer is closed + m_resources = nullptr; + } + WINML_CATCH_ALL + + // ITensorNative::GetBuffer + STDMETHOD(GetBuffer) + (BYTE** value, UINT32* capacity) { + // This Api is not supported for TensorString + RETURN_HR_IF_MSG( + ERROR_INVALID_FUNCTION, + (std::is_same_v), + "TensorString objects cannot return byte buffers!"); + + RETURN_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources have been detached!"); + + return m_resources->GetBuffer(shape_, value, capacity); + } + + // ITensorNative::GetD3D12Resource + STDMETHOD(GetD3D12Resource) + (ID3D12Resource** ppResource) { + try { + // This Api is not supported for TensorString + RETURN_HR_IF(ERROR_INVALID_FUNCTION, (std::is_same::value)); + RETURN_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources have been detached!"); + + GetGpuResource().copy_to(ppResource); + return S_OK; + } + WINML_CATCH_ALL_COM + } + + // ITensor::GetAsVectorView + template + winrt::Windows::Foundation::Collections::IVectorView GetAsVectorView() try { + // This adds compile time checks that ensure that the API can only be called when: + // 1) the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. + // 2) the signature of the method conforms to the ABI signature and the return value matches the ABI Return Type (ViewT). + ASSERT_TEMPLATE_PARAMETERS_EXACT(); + + // This method returns the raw tensor data as an IVectorView. + // This is a slow API that performs a buffer copy into a caller + // owned IVectorView object. + + // Get the raw buffer pointer from the native tensor implementation. + uint32_t size; + ElementType* pData; + std::tie(size, pData) = GetCpuResource()->buffer(); + + // Copy data that will be passed back to caller. + auto copy = std::vector(pData, pData + size); + + // Create IVectorView from copied data. + return winrt::single_threaded_vector(std::move(copy)).GetView(); + } + WINML_CATCH_ALL + + // Specialized version to convert float16 to float + template <> + winrt::Windows::Foundation::Collections::IVectorView GetAsVectorView() try { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + WinML::Half* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + // Copy the HALFs to floats + std::vector floatValue(size); + DirectX::PackedVector::XMConvertHalfToFloatStream( + floatValue.data(), + sizeof(float) /* output stride */, + reinterpret_cast(pBuffer), + sizeof(WinML::Half) /* input stride */, + size); + + // Create IVectorView from copied data. + return winrt::single_threaded_vector(std::move(floatValue)).GetView(); + } + WINML_CATCH_ALL + + // Specialized version to convert string to hstring + template <> + winrt::Windows::Foundation::Collections::IVectorView GetAsVectorView() try { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + std::string* pData; + std::tie(size, pData) = GetCpuResource()->buffer(); + + auto copy = std::vector(size, L""); + std::generate( + copy.begin(), + copy.end(), + [n = 0, &pData]() mutable { + return WinML::Strings::HStringFromUTF8(pData[n++]); + }); + + return winrt::single_threaded_vector(std::move(copy)).GetView(); + } + WINML_CATCH_ALL + + // Specialized version to convert int8_t to uint8_t + template <> + winrt::Windows::Foundation::Collections::IVectorView GetAsVectorView() try { + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + int8_t* pData; + std::tie(size, pData) = GetCpuResource()->buffer(); + + // Copy data that will be passed back to caller. + + gsl::span span(reinterpret_cast(pData), size); + std::vector copy(span.begin(), span.begin() + size); + + // Create IVectorView from copied data. + return winrt::single_threaded_vector(std::move(copy)).GetView(); + } + WINML_CATCH_ALL + + /// + /// Tensor Property Accessors + /// + + // ILearningModelFeatureValue implementation + winrt::Windows::AI::MachineLearning::LearningModelFeatureKind Kind() try { + return winrt::Windows::AI::MachineLearning::LearningModelFeatureKind::Tensor; + } + WINML_CATCH_ALL + + // ITensor::TensorKind + winrt::Windows::AI::MachineLearning::TensorKind TensorKind() try { + return TensorKindFrom::Type; + } + WINML_CATCH_ALL + + // ITensor::Shape + winrt::Windows::Foundation::Collections::IVectorView Shape() try { + std::vector copy(shape_.cbegin(), shape_.cend()); + return winrt::single_threaded_vector(std::move(copy)).GetView(); + } + WINML_CATCH_ALL + + // ILotusValueProviderPrivate::AbiRepresentation + STDMETHOD(AbiRepresentation) + (winrt::Windows::Foundation::IInspectable& abiRepresentation) { + using ABIType = typename TBase::class_type; + ABIType to = nullptr; + RETURN_IF_FAILED(this->QueryInterface( + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(to)))); + + to.as(abiRepresentation); + + return S_OK; + } + + // ILotusValueProviderPrivate::IsPlaceholder + STDMETHOD(IsPlaceholder) + (bool* pIsPlaceHolder) { + RETURN_HR_IF_NULL(E_POINTER, pIsPlaceHolder); + RETURN_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources have been detached!"); + + *pIsPlaceHolder = GetCpuResource() == nullptr && GetGpuResource() == nullptr; + return S_OK; + } + + private: + /// + /// SetBufferFromArray and parameterized specializations for MLFloat16, int8_t, and std::string + /// + template + void SetBufferFromArray(winrt::array_view data) { + // This adds compile time checks that ensure that the API can only be called when + // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. + ASSERT_TEMPLATE_PARAMETERS_EXACT(); + + // This method accepts data as an array, T[], from the caller. + // This is a non-destructive API, so the caller data is + // left untouched, and the data is copied into internal buffers. + GetCpuResource()->set(data.size(), data.data()); + } + + // Specialized version to convert floats to float16 + template <> + void SetBufferFromArray(winrt::array_view data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + WinML::Half* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + THROW_HR_IF(E_UNEXPECTED, data.size() != size); + DirectX::PackedVector::XMConvertFloatToHalfStream( + reinterpret_cast(pBuffer), + sizeof(WinML::Half) /* output stride */, + data.data(), + sizeof(float) /* input stride */, + data.size()); + } + + // Specialized version to convert uint8_t to int8_t + template <> + void SetBufferFromArray(winrt::array_view data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + auto size = data.size(); + auto pData = data.data(); + + GetCpuResource()->set(size, reinterpret_cast(const_cast(pData))); + } + + // Specialized version to convert hstring to string + template <> + void SetBufferFromArray(winrt::array_view data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + std::string* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + THROW_HR_IF(E_UNEXPECTED, data.size() > size); + + // Convert and copy into the underlying buffer + std::transform( + data.begin(), data.end(), pBuffer, + [](auto& element) mutable { + return WinML::Strings::UTF8FromHString(element); + }); + } + + /// + /// SetBufferFromIterable and parameterized specializations for MLFloat16, int8_t, and std::string + /// + template + void SetBufferFromIterable( + winrt::Windows::Foundation::Collections::IIterable const& data) { + // This adds compile time checks that ensure that the API can only be called when + // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. + ASSERT_TEMPLATE_PARAMETERS_EXACT(); + + uint32_t size; + ElementType* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + // This method accepts data as an IVectorView. + // This is a non-destructive API, so the caller data is + // left untouched, and the data is copied into internal buffers. + std::copy(begin(data), end(data), pBuffer); + } + + // Specialized version to convert floats to float16 + template <> + void SetBufferFromIterable( + winrt::Windows::Foundation::Collections::IIterable const& data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + WinML::Half* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + // Now that we take in IIterables and not vector views + // how do we validate size??? + // THROW_HR_IF(E_UNEXPECTED, data.Size() != size); + + std::transform( + begin(data), + end(data), + reinterpret_cast(pBuffer), + DirectX::PackedVector::XMConvertFloatToHalf); + } + + // Specialized version to convert uint8_t to int8_t + template <> + void SetBufferFromIterable( + winrt::Windows::Foundation::Collections::IIterable const& data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + int8_t* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + std::transform(begin(data), end(data), pBuffer, [](auto element) { return static_cast(element); }); + } + + // Specialized version to convert hstring to string + template <> + void SetBufferFromIterable( + winrt::Windows::Foundation::Collections::IIterable const& data) { + // Ensure that this call is being called with the correct template parameters + ASSERT_TEMPLATE_PARAMETERS(); + + uint32_t size; + std::string* pBuffer; + + // Get the data pointer and size + std::tie(size, pBuffer) = GetCpuResource()->buffer(); + + // Convert and copy into the underlying buffer + std::transform(begin(data), end(data), pBuffer, [](const auto& element) { + return WinML::Strings::UTF8FromHString(element); + }); + } + + std::shared_ptr>& GetCpuResource() { + WINML_THROW_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources are detached!"); + + return m_resources->CpuResource; + } + + winrt::com_ptr& GetGpuResource() { + WINML_THROW_HR_IF_NULL_MSG( + E_ILLEGAL_METHOD_CALL, + m_resources, + "The tensor has been closed and its resources are detached!"); + + return m_resources->GpuResource; + } + + private: + std::vector shape_; + std::shared_ptr> m_resources; + std::vector>> m_outstandingReferences; + bool m_isClosed = false; +}; + +} // namespace Windows::AI::MachineLearning + +#pragma warning(pop) diff --git a/winml/lib/Api/impl/TensorBuffer.h b/winml/lib/Api/impl/TensorBuffer.h new file mode 100644 index 0000000000000..bd8c101c3fa88 --- /dev/null +++ b/winml/lib/Api/impl/TensorBuffer.h @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "robuffer.h" +#include "winrt/Windows.Storage.Streams.h" + +namespace Windows::AI::MachineLearning { +class VectorBuffer : public winrt::implements< + VectorBuffer, + winrt::Windows::Storage::Streams::IBuffer, + Windows::Storage::Streams::IBufferByteAccess> { + public: + VectorBuffer(size_t size) : m_buffer(size) {} + + ~VectorBuffer() {} + + uint32_t Capacity() const { + return static_cast(m_buffer.size()); + } + + uint32_t Length() const { + throw winrt::hresult_error(E_NOTIMPL); + } + + void Length(uint32_t /*value*/) { + throw winrt::hresult_error(E_NOTIMPL); + } + + STDMETHOD(Buffer) + (uint8_t** value) { + RETURN_HR_IF_NULL(E_POINTER, value); + *value = m_buffer.data(); + return S_OK; + } + + private: + std::vector m_buffer; +}; + +template +class TensorBuffer { + winrt::Windows::Storage::Streams::IBuffer m_buffer; + uint32_t m_size; + + TensorBuffer(uint32_t size) : m_size(size), + m_buffer(winrt::make(size * sizeof(T))) { + auto buffer = Buffer(); + + // The initial release of WinML (RS5) shipped with behavior that would + // zero-initialize uninitialized tensors. After measuring, the performance impact + // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats). + // To maintain parity with RS5 behavior, we always zero out the memory buffer. + memset(buffer.second, 0, buffer.first); + } + + TensorBuffer( + uint32_t size, + winrt::Windows::Storage::Streams::IBuffer buffer) : m_size(size), + m_buffer(buffer) {} + + public: + typedef std::shared_ptr TensorBufferPtr; + + static auto Create(uint32_t size) { + return std::shared_ptr(new TensorBuffer(size)); + } + + static auto Create( + uint32_t size, + winrt::Windows::Storage::Streams::IBuffer buffer) { + return std::shared_ptr(new TensorBuffer(size, buffer)); + } + + // this is the count of elements + auto Size() { + return m_size; + } + + // this is the size in bytes + auto SizeInBytes() { + return m_size * sizeof(T); + } + + auto Buffer() { + T* pData; + auto bufferByteAccess = m_buffer.as(); + bufferByteAccess->Buffer(reinterpret_cast(&pData)); + + return std::make_pair(m_size, pData); + } + + auto Set(uint32_t size, const T* pData) { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + size <= m_size, + "Argument size (%u) exceeds the tensor size (%u).", + size, + m_size); + + memcpy(Buffer().second, pData, m_buffer.Capacity()); + } + + auto Set(std::vector&& moveableData) { + Set(moveableData.size(), moveableData.data()); + } +}; + +template <> +class TensorBuffer { + std::vector m_buffer; + + TensorBuffer(uint32_t size) : m_buffer(size) {} + + public: + typedef std::shared_ptr TensorBufferPtr; + + static auto Create(uint32_t size) { + return std::shared_ptr(new TensorBuffer(size)); + } + + auto Size() { + return m_buffer.size(); + } + + // this is the size in bytes + auto SizeInBytes() { + return m_buffer.size(); + } + + auto Buffer() { + return std::make_pair(gsl::narrow_cast(m_buffer.size()), m_buffer.data()); + } + + auto Set(uint32_t size, std::string_view* data) { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + size <= m_buffer.size(), + "Argument size (%d) exceeds the tensor size (%d).", + static_cast(size), + static_cast(m_buffer.size())); + + // Copy + std::copy(data, data + size, m_buffer.begin()); + } +}; +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/TensorKindFrom.h b/winml/lib/Api/impl/TensorKindFrom.h new file mode 100644 index 0000000000000..487b9cb3b2370 --- /dev/null +++ b/winml/lib/Api/impl/TensorKindFrom.h @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning { + +// We need to define our own type for Half since DirectX::PackedVector::Half resolves to uint16_t per its typedef declaration. +// Templates require an actual type name to resolve correctly. +struct Half { + DirectX::PackedVector::HALF value; +}; + +template +struct TensorKindFrom {}; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int8; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt8; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt16; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int16; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt32; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int32; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt64; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int64; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Boolean; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Double; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Float; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Float16; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::String; }; + +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int8; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt8; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt16; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int16; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt32; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int32; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::UInt64; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Int64; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Boolean; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Double; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Float; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::String; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::String; }; +template <> +struct TensorKindFrom { static const winml::TensorKind Type = winml::TensorKind::Float16; }; + +template +struct TensorFeatureDescriptorFrom { + static winml::ILearningModelFeatureDescriptor + CreateAnonymous( + std::vector shape) { + return winrt::make( + nullptr /* set to null as values are name-less */, + nullptr /* set to null as values are description-less */, + TensorKindFrom::Type, + shape, + false /* set to false as values dont have required annotations */, + false /* set to false as this is not a tensor of unsupported metadata */); + } +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/impl/TensorMemoryBufferReference.h b/winml/lib/Api/impl/TensorMemoryBufferReference.h new file mode 100644 index 0000000000000..3463e66f14294 --- /dev/null +++ b/winml/lib/Api/impl/TensorMemoryBufferReference.h @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "Tensor.h" +#include + +#include + +namespace Windows::AI::MachineLearning { +template +struct TensorResources { + // ITensorNative::GetBuffer + STDMETHOD(GetBuffer) ( + std::vector shape, + BYTE** value, UINT32* capacity) { + RETURN_HR_IF_NULL(E_POINTER, value); + RETURN_HR_IF_NULL(E_POINTER, capacity); + + RETURN_HR_IF_MSG( + ERROR_INVALID_FUNCTION, + (std::is_same_v), + "TensorString objects cannot return byte buffers!"); + + try { + *value = nullptr; + *capacity = 0; + + // Lazily allocate the cpu resource on call to GetBuffer + if (CpuResource == nullptr) { + CpuResource = std::make_shared>(shape); + } + + // Get the data pointer and size + T* data; + uint32_t size; + std::tie(size, data) = CpuResource->buffer(); + + // Set out parameters + *capacity = static_cast(size * sizeof(T)); + *value = (BYTE*)data; + return S_OK; + } + WINML_CATCH_ALL_COM + } + + // Theses are access directly by TensorMemoryBufferReference and TensorBase + std::shared_ptr> CpuResource; + winrt::com_ptr GpuResource; +}; + +// This class holds onto the lifetime of TensorResources so that they can be kept alive by TensorBase AND its active MBRs. +// When the last MBR/Tensor object is destroyed then TensorResources and its associated cpu and gpu resources will be destroyed. +// The source MB (the tensor object) holds weak references to its TensorMemoryBufferReference MBRs to determine whether +// there are external callers of the API that are actively using native interface access. +// The template parameter is used to determine the type type of the underlying cpu resource (float, int, etc...). +template +class TensorMemoryBufferReference : public winrt::implements< + TensorMemoryBufferReference, + winrt::Windows::Foundation::IMemoryBufferReference, + Windows::Foundation::IMemoryBufferByteAccess> { + using ClosedDelegate = winrt::Windows::Foundation::TypedEventHandler; + + public: + // winrt::Windows::Foundation::IMemoryBufferReference + // + // Parameters: + // + // shape: The shape of the tensor being referenced + // tensorResources: An optional shared_ptr to underlying resources (cpu or gpu). + // This will be null when the source Tensor* object has already been closed. + // When the source IMemoryBuffer is closed, the IMemoryBuffer spec requires the + // successful creation of IMemoryBufferReferences in the closed state. + TensorMemoryBufferReference( + std::vector shape, + std::shared_ptr> tensorResources) : m_tensorResources(tensorResources), + m_handlers(), + m_shape(shape) {} + + uint32_t Capacity() const try { + uint32_t uCapacity = 0; + + // Per IMemoryBuffer.CreateReference (https://docs.microsoft.com/en-us/uwp/api/windows.foundation.imemorybuffer.createreference) + // If the IMemoryBufferReference has been closed (m_tensorResources == nullptr) then + // "IMemoryBufferReference instance's Capacity property will be zero." + if (m_tensorResources) { + BYTE* pCPUTensor; + WINML_THROW_IF_FAILED(m_tensorResources->GetBuffer(m_shape, reinterpret_cast(&pCPUTensor), &uCapacity)); + } + + return uCapacity; + } + WINML_CATCH_ALL + + winrt::event_token Closed(const ClosedDelegate& handler) try { + auto token = m_eventTokenCounter++; + m_handlers[token] = handler; + return winrt::event_token{token}; + } + WINML_CATCH_ALL + + void Closed(winrt::event_token const& cookie) try { + m_handlers.erase(cookie.value); + } + WINML_CATCH_ALL + + // Windows::Foundation::IClosable + void Close() try { + if (m_tensorResources) { + // This event must be fired before m_tensorResources are released + // so that callers can access the data one last time. + FireClosed(); + + // When the object is closed, release the reference to the Tensor + m_tensorResources = nullptr; + } + } + WINML_CATCH_ALL + + STDMETHOD(GetBuffer) + ( + _Outptr_result_buffer_(*capacity) BYTE** value, + _Out_ UINT32* capacity) try { + RETURN_HR_IF_NULL(E_POINTER, value); + RETURN_HR_IF_NULL(E_POINTER, capacity); + + *value = nullptr; + *capacity = 0; + + // Per IMemoryBuffer.CreateReference (https://docs.microsoft.com/en-us/uwp/api/windows.foundation.imemorybuffer.createreference) + // If the IMemoryBufferReference has been closed (m_tensorResources == nullptr) then + // "IMemoryBufferByteAccess::GetBuffer method will always return a null memory pointer and zero capacity." + RETURN_HR_IF_NULL(S_OK, m_tensorResources); + + return m_tensorResources->GetBuffer(m_shape, value, capacity); + } + WINML_CATCH_ALL_COM + + private: + void FireClosed() { + winrt::Windows::Foundation::IMemoryBufferReference memoryBufferReference = nullptr; + WINML_THROW_IF_FAILED(this->QueryInterface( + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(memoryBufferReference)))); + + for (auto handler : m_handlers) { + handler.second(memoryBufferReference, nullptr); + } + } + + private: + std::vector m_shape; + std::shared_ptr> m_tensorResources; + std::unordered_map m_handlers; + int64_t m_eventTokenCounter = 0; +}; + +} // namespace Windows::AI::MachineLearning diff --git a/winml/lib/Api/inc/ILotusValueProviderPrivate.h b/winml/lib/Api/inc/ILotusValueProviderPrivate.h new file mode 100644 index 0000000000000..70ec0b4f0ba3d --- /dev/null +++ b/winml/lib/Api/inc/ILotusValueProviderPrivate.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "iengine.h" + +// ILotusValueProviderPrivate exposes a private Lotus interface to the engine so that it can retrieve tensor +// resources stored in winrt structures. + +namespace Windows::AI::MachineLearning { + +class PoolObjectWrapper; + +enum class BindingType { kInput, + kOutput }; + +struct BindingContext { + BindingType type = BindingType::kInput; + winrt::Windows::AI::MachineLearning::LearningModelSession session = nullptr; + winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor descriptor = nullptr; + winrt::Windows::Foundation::Collections::IPropertySet properties = nullptr; + std::shared_ptr converter; +}; + +struct __declspec(uuid("27e2f437-0112-4693-849e-e04323a620fb")) __declspec(novtable) ILotusValueProviderPrivate : IUnknown { + virtual HRESULT __stdcall GetValue(BindingContext& binding_context, WinML::IValue** out) = 0; + virtual HRESULT __stdcall IsPlaceholder(bool* is_placeholder) = 0; + virtual HRESULT __stdcall UpdateSourceResourceData(BindingContext& binding_context, WinML::IValue* value) = 0; + virtual HRESULT __stdcall AbiRepresentation(winrt::Windows::Foundation::IInspectable& abi_representation) = 0; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Api/pch/pch.h b/winml/lib/Api/pch/pch.h new file mode 100644 index 0000000000000..d5d2f48c76787 --- /dev/null +++ b/winml/lib/Api/pch/pch.h @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#pragma warning(push) +#pragma warning(disable : 26495) +#pragma warning(disable : 6011) + +#pragma warning(disable : 26451) +#pragma warning(disable : 6387) + +#include "cppwinrt_onnx.h" +#include "dx.h" + +#pragma warning(pop) \ No newline at end of file diff --git a/winml/lib/Common/CommonDeviceHelpers.cpp b/winml/lib/Common/CommonDeviceHelpers.cpp new file mode 100644 index 0000000000000..a653544495dc8 --- /dev/null +++ b/winml/lib/Common/CommonDeviceHelpers.cpp @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// #include "dx.h" +// #include +#include "inc/pch.h" +#if USE_DML +#include +#endif USE_DML +#include "inc/CommonDeviceHelpers.h" +#include +#include +#include "LearningModelDevice.h" + +namespace { +constexpr uint32_t c_intelVendorId = 0x8086; +constexpr uint32_t c_nvidiaVendorId = 0x10DE; +constexpr uint32_t c_amdVendorId = 0x1002; + +bool CheckAdapterFP16Blocked(bool isMcdmAdapter, uint32_t vendorId, uint32_t majorVersion, uint32_t minorVersion) { + switch (vendorId) { + case c_intelVendorId: { + if (isMcdmAdapter) { + return false; + } + + // Check Intel GPU driver version + return (majorVersion < 25) || (majorVersion == 25 && minorVersion < 6574) || (majorVersion == 26 && minorVersion < 6572); + } + } + return false; +} + +void ParseDriverVersion(LARGE_INTEGER& version, uint32_t& majorVersion, uint32_t& minorVersion) { + majorVersion = HIWORD(version.HighPart); + minorVersion = LOWORD(version.LowPart); +} + +HRESULT GetDXGIAdapterMetadata(ID3D12Device& device, uint32_t& vendorId, uint32_t& majorVersion, uint32_t& minorVersion) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(CreateDXGIFactory1(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapter; + RETURN_IF_FAILED(spFactory->EnumAdapterByLuid(device.GetAdapterLuid(), IID_PPV_ARGS(spAdapter.put()))); + + DXGI_ADAPTER_DESC adapterDesc = {}; + RETURN_IF_FAILED(spAdapter->GetDesc(&adapterDesc)); + + LARGE_INTEGER driverVersion; + RETURN_IF_FAILED(spAdapter->CheckInterfaceSupport(__uuidof(IDXGIDevice), &driverVersion)); + + vendorId = adapterDesc.VendorId; + ParseDriverVersion(driverVersion, majorVersion, minorVersion); + return S_OK; +} + +#ifdef ENABLE_DXCORE +HRESULT GetDXCoreAdapterMetadata(ID3D12Device& device, bool& isMcdmAdapter, uint32_t& vendorId, uint32_t& majorVersion, uint32_t& minorVersion) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(DXCoreCreateAdapterFactory(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapter; + RETURN_IF_FAILED(spFactory->GetAdapterByLuid(device.GetAdapterLuid(), IID_PPV_ARGS(spAdapter.put()))); + + if (spAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE) && + (!(spAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS) || + spAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D11_GRAPHICS)))) { + isMcdmAdapter = true; + } else { + isMcdmAdapter = false; + } + + DXCoreHardwareID hardwareId; + RETURN_IF_FAILED(spAdapter->GetProperty(DXCoreAdapterProperty::HardwareID, &hardwareId)); + vendorId = hardwareId.vendorID; + + uint64_t rawDriverVersion; + RETURN_IF_FAILED(spAdapter->GetProperty(DXCoreAdapterProperty::DriverVersion, &rawDriverVersion)); + + LARGE_INTEGER driverVersion; + driverVersion.QuadPart = static_cast(rawDriverVersion); + ParseDriverVersion(driverVersion, majorVersion, minorVersion); + return S_OK; +} +#endif + +HRESULT GetD3D12Device(const winrt::Windows::AI::MachineLearning::LearningModelDevice& device, ID3D12Device** outDevice) { + _LUID id; + id.LowPart = device.AdapterId().LowPart; + id.HighPart = device.AdapterId().HighPart; + CommonDeviceHelpers::AdapterEnumerationSupport support; + RETURN_IF_FAILED(GetAdapterEnumerationSupport(&support)); + + if (support.has_dxgi) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(CreateDXGIFactory1(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapter; + RETURN_IF_FAILED(spFactory->EnumAdapterByLuid(id, IID_PPV_ARGS(spAdapter.put()))); + RETURN_IF_FAILED(D3D12CreateDevice(spAdapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(outDevice))); + } +#ifdef ENABLE_DXCORE + if (support.has_dxgi == false) { + winrt::com_ptr spFactory; + RETURN_IF_FAILED(DXCoreCreateAdapterFactory(IID_PPV_ARGS(spFactory.put()))); + + winrt::com_ptr spAdapter; + RETURN_IF_FAILED(spFactory->GetAdapterByLuid(id, IID_PPV_ARGS(spAdapter.put()))); + RETURN_IF_FAILED(D3D12CreateDevice(spAdapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(outDevice))); + } +#endif + return S_OK; +} + +HRESULT IsFloat16Blocked(ID3D12Device& device, bool* isBlocked) { + uint32_t vendorId; + uint32_t majorVersion; + uint32_t minorVersion; + bool isMcdmAdapter; + *isBlocked = true; + CommonDeviceHelpers::AdapterEnumerationSupport support; + RETURN_IF_FAILED(CommonDeviceHelpers::GetAdapterEnumerationSupport(&support)); +#ifdef ENABLE_DXCORE + if (support.has_dxcore) { + RETURN_IF_FAILED(GetDXCoreAdapterMetadata(device, isMcdmAdapter, vendorId, majorVersion, minorVersion)); + *isBlocked = CheckAdapterFP16Blocked(isMcdmAdapter, vendorId, majorVersion, minorVersion); + return S_OK; + } +#endif + RETURN_IF_FAILED(GetDXGIAdapterMetadata(device, vendorId, majorVersion, minorVersion)); + isMcdmAdapter = false; + *isBlocked = CheckAdapterFP16Blocked(isMcdmAdapter, vendorId, majorVersion, minorVersion); + return S_OK; +} +} + +namespace CommonDeviceHelpers { +constexpr uint32_t c_intelVendorId = 0x8086; +constexpr uint32_t c_nvidiaVendorId = 0x10DE; +constexpr uint32_t c_amdVendorId = 0x1002; + +bool IsFloat16Supported(const winrt::Windows::AI::MachineLearning::LearningModelDevice& device) { + auto adapterId = device.AdapterId(); + if (!adapterId.HighPart && !adapterId.LowPart) { + // CPU device + return true; + } + winrt::com_ptr d3d12Device; + if (FAILED(GetD3D12Device(device, d3d12Device.put()))) { + return false; + } + return IsFloat16Supported(d3d12Device.get()); +} + +bool IsFloat16Supported(ID3D12Device* device) { +#ifndef USE_DML + throw winrt::hresult_error(ERROR_NOT_SUPPORTED, L"IsFloat16Supported is not implemented for WinML only build."); +#else + bool isBlocked; + if (FAILED(IsFloat16Blocked(*device, &isBlocked)) || isBlocked) { + return false; + } + winrt::com_ptr dmlDevice; + winrt::check_hresult(DMLCreateDevice( + device, + DML_CREATE_DEVICE_FLAG_NONE, + IID_PPV_ARGS(dmlDevice.put()))); + + DML_FEATURE_QUERY_TENSOR_DATA_TYPE_SUPPORT float16Query = {DML_TENSOR_DATA_TYPE_FLOAT16}; + DML_FEATURE_DATA_TENSOR_DATA_TYPE_SUPPORT float16Data = {}; + + winrt::check_hresult(dmlDevice->CheckFeatureSupport( + DML_FEATURE_TENSOR_DATA_TYPE_SUPPORT, + sizeof(float16Query), + &float16Query, + sizeof(float16Data), + &float16Data)); + return float16Data.IsSupported; +#endif +} + +HRESULT GetAdapterEnumerationSupport(AdapterEnumerationSupport* support) { + static std::optional s_adapterEnumerationSupport; + if (!s_adapterEnumerationSupport.has_value()) { + // check for support, starting with DXGI + winrt::com_ptr dxgiFactory; +#ifdef ENABLE_DXCORE + winrt::com_ptr dxcoreFactory; + // necessary because DXCoreCreateAdapterFactory is overloaded + HRESULT(WINAPI * pDxCoreTestFunc) + (REFIID, void**) = DXCoreCreateAdapterFactory; +#endif + AdapterEnumerationSupport adapterEnumerationSupport = {}; + + if (SUCCEEDED(RunDelayLoadedApi(CreateDXGIFactory1, IID_PPV_ARGS(dxgiFactory.put())))) { + adapterEnumerationSupport.has_dxgi = true; + } +#ifdef ENABLE_DXCORE + if (SUCCEEDED(RunDelayLoadedApi(pDxCoreTestFunc, IID_PPV_ARGS(dxcoreFactory.put())))) { + adapterEnumerationSupport.has_dxcore = true; + } +#endif + + s_adapterEnumerationSupport = adapterEnumerationSupport; + + if (!(adapterEnumerationSupport.has_dxgi || adapterEnumerationSupport.has_dxcore)) { + return TYPE_E_CANTLOADLIBRARY; + } + } + *support = s_adapterEnumerationSupport.value(); + return S_OK; +} + +} // namespace CommonDeviceHelpers diff --git a/winml/lib/Common/inc/CommonDeviceHelpers.h b/winml/lib/Common/inc/CommonDeviceHelpers.h new file mode 100644 index 0000000000000..288202ae128d2 --- /dev/null +++ b/winml/lib/Common/inc/CommonDeviceHelpers.h @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include + +#if __has_include("dxcore.h") +#define ENABLE_DXCORE 1 +#endif +#ifdef ENABLE_DXCORE +#include +#endif + +// +// Exception information +// +#ifndef FACILITY_VISUALCPP +#define FACILITY_VISUALCPP ((LONG)0x6d) +#endif + +#define VcppException(sev, err) ((sev) | (FACILITY_VISUALCPP << 16) | err) + +namespace CommonDeviceHelpers { +struct AdapterEnumerationSupport { + bool has_dxgi; + bool has_dxcore; +}; + +// uses Structured Exception Handling (SEH) to detect for delay load failures of target API. +// You cannot mix and match SEH with C++ exception and object unwinding +// In this case we will catch it, and report up to the caller via HRESULT so our callers can use +// C++ exceptions +template +HRESULT RunDelayLoadedApi(TFunc& tfunc, TArgs&&... args) { + __try { + return tfunc(std::forward(args)...); + } __except (GetExceptionCode() == VcppException(ERROR_SEVERITY_ERROR, ERROR_MOD_NOT_FOUND) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { + // this could be ok, just let people know that it failed to load + return HRESULT_FROM_WIN32(ERROR_MOD_NOT_FOUND); + } +} + +HRESULT GetAdapterEnumerationSupport(AdapterEnumerationSupport* support); +bool IsFloat16Supported(ID3D12Device* device); +bool IsFloat16Supported(const winrt::Windows::AI::MachineLearning::LearningModelDevice& device); +} // namespace CommonDeviceHelpers diff --git a/winml/lib/Common/inc/NamespaceAliases.h b/winml/lib/Common/inc/NamespaceAliases.h new file mode 100644 index 0000000000000..b147a79c9987c --- /dev/null +++ b/winml/lib/Common/inc/NamespaceAliases.h @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace winrt::Windows::Foundation {} +namespace wf = ::winrt::Windows::Foundation; + +namespace winrt::Windows::Foundation::Collections {} +namespace wfc = ::winrt::Windows::Foundation::Collections; + +namespace winrt::Windows::Graphics {} +namespace wg = winrt::Windows::Graphics; + +namespace winrt::Windows::Graphics::DirectX {} +namespace wgdx = winrt::Windows::Graphics::DirectX; + +namespace winrt::Windows::Graphics::Imaging {} +namespace wgi = ::winrt::Windows::Graphics::Imaging; + +namespace winrt::Windows::Storage {} +namespace ws = ::winrt::Windows::Storage; + +namespace winrt::Windows::Storage::Streams {} +namespace wss = ::winrt::Windows::Storage::Streams; + +namespace winrt::Windows::AI::MachineLearning {} +namespace winml = ::winrt::Windows::AI::MachineLearning; + +namespace winrt::Windows::AI::MachineLearning::implementation {} +namespace winmlp = ::winrt::Windows::AI::MachineLearning::implementation; + +namespace Windows::AI::MachineLearning::Adapter {} +namespace winmla = ::Windows::AI::MachineLearning::Adapter; + +namespace Windows::AI::MachineLearning {} +namespace WinML = ::Windows::AI::MachineLearning; + +namespace Windows::AI::MachineLearning::Telemetry {} +namespace _winmlt = ::Windows::AI::MachineLearning::Telemetry; diff --git a/winml/lib/Common/inc/PheonixSingleton.h b/winml/lib/Common/inc/PheonixSingleton.h new file mode 100644 index 0000000000000..6361385ab6f63 --- /dev/null +++ b/winml/lib/Common/inc/PheonixSingleton.h @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +template +std::shared_ptr PheonixSingleton(TArgs&&... args) { + static std::weak_ptr instance_; + static std::mutex lock_; + + std::lock_guard lock(lock_); + if (auto instance = instance_.lock()) { + return instance; + } + + auto instance = std::make_shared(std::forward(args)...); + instance_ = instance; + return instance; +} \ No newline at end of file diff --git a/winml/lib/Common/inc/StringHelpers.h b/winml/lib/Common/inc/StringHelpers.h new file mode 100644 index 0000000000000..e8a4c5514aab3 --- /dev/null +++ b/winml/lib/Common/inc/StringHelpers.h @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// String Helpers +namespace Windows::AI::MachineLearning::Strings { +struct HStringBuilder { + HStringBuilder(HStringBuilder const&) = delete; + HStringBuilder& operator=(HStringBuilder const&) = delete; + + explicit HStringBuilder(UINT32 size) { + winrt::check_hresult(WindowsPreallocateStringBuffer(size, &data_, &buffer_)); + } + + ~HStringBuilder() noexcept { + if (buffer_ != nullptr) { + WindowsDeleteStringBuffer(buffer_); + } + } + + wchar_t* data() noexcept { + return data_; + } + + winrt::hstring to_hstring() { + winrt::hstring result; + winrt::check_hresult(WindowsPromoteStringBuffer(buffer_, reinterpret_cast(put_abi(result)))); + buffer_ = nullptr; + return result; + } + + private: + wchar_t* data_{nullptr}; + HSTRING_BUFFER buffer_{nullptr}; +}; + +static winrt::hstring HStringFromUTF8(const char* input, size_t input_length) { + if (input_length == 0) { + return {}; + } else if (input_length <= (std::numeric_limits::max)()) { + int output_length = MultiByteToWideChar(CP_UTF8, 0, input, static_cast(input_length), nullptr, 0); + if (output_length > 0) { + HStringBuilder buffer(static_cast(output_length)); + MultiByteToWideChar(CP_UTF8, 0, input, static_cast(input_length), buffer.data(), output_length); + return buffer.to_hstring(); + } else { + winrt::throw_hresult(E_INVALIDARG); + } + } else { + winrt::throw_hresult(E_INVALIDARG); + } +} + +static winrt::hstring HStringFromUTF8(const char* input) { + return input != nullptr + ? HStringFromUTF8(input, strlen(input)) + : L""; +} + +static winrt::hstring HStringFromUTF8(const std::string& input) { + return HStringFromUTF8(input.c_str(), input.size()); +} + +static std::string UTF8FromUnicode(const wchar_t* input, size_t input_length) { + if (input_length == 0) { + return {}; + } else if (input_length <= (std::numeric_limits::max)()) { + int output_length = WideCharToMultiByte(CP_UTF8, 0, input, static_cast(input_length), nullptr, 0, nullptr, nullptr); + if (output_length > 0) { + std::string output(output_length, 0); + WideCharToMultiByte(CP_UTF8, 0, input, static_cast(input_length), &output[0], output_length, nullptr, nullptr); + return output; + } else { + winrt::throw_hresult(E_INVALIDARG); + } + } else { + winrt::throw_hresult(E_INVALIDARG); + } +} + +static std::string UTF8FromHString(const winrt::hstring& input) { + return UTF8FromUnicode(input.data(), input.size()); +} + +static std::wstring WStringFromString(const std::string& string) { + std::wostringstream woss; + woss << string.data(); + return woss.str(); +} + +} // namespace Windows::AI::MachineLearning::Strings diff --git a/winml/lib/Common/inc/WinMLTelemetryHelper.h b/winml/lib/Common/inc/WinMLTelemetryHelper.h new file mode 100644 index 0000000000000..a4bac4c78873e --- /dev/null +++ b/winml/lib/Common/inc/WinMLTelemetryHelper.h @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// +// WinMLTelemetryHelper provides a centralized location for managing all telemetry +// usage in the WinML COM runtime. This aims to abstract all interaction with the +// TraceLogging APIs. +// +// A global instance of the helper is declared in precomp.h and defined in dll.cpp. +// + +// TraceLogging includes +#include +#include + +// Forward references +class WinMLRuntime; +typedef struct WinMLModelDescription { + LPWSTR Author; + LPWSTR Name; + LPWSTR Domain; + LPWSTR Description; + SIZE_T Version; +} WinMLModelDescription; + +template +class Profiler; + +// Schema versions. +#define WINML_TLM_PROCESS_INFO_SCHEMA_VERSION 0 +#define WINML_TLM_CONTEXT_CREATION_VERSION 0 +#define WINML_TLM_MODEL_CREATION_VERSION 0 +#define WINML_TLM_RUNTIME_ERROR_VERSION 0 +#define WINML_TLM_RUNTIME_PERF_VERSION 0 + +#define WinMLTraceLoggingWrite(hProvider, EventName, ...) \ + TraceLoggingWrite(hProvider, \ + EventName, \ + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), \ + __VA_ARGS__) +// +// WinMLRuntime Telemetry Support +// +// {BCAD6AEE-C08D-4F66-828C-4C43461A033D} +#define WINML_PROVIDER_DESC "Microsoft.Windows.AI.MachineLearning" +#define WINML_PROVIDER_GUID (0xbcad6aee, 0xc08d, 0x4f66, 0x82, 0x8c, 0x4c, 0x43, 0x46, 0x1a, 0x3, 0x3d) +#define WINML_PROVIDER_KEYWORD_DEFAULT 0x1 +#define WINML_PROVIDER_KEYWORD_LOTUS_PROFILING 0x2 +#define WINML_PROVIDER_KEYWORD_START_STOP 0x4 +struct MLOperatorKernelDescription; +struct MLOperatorSchemaDescription; + +class WinMLTelemetryHelper { + public: + TraceLoggingHProvider provider_ = nullptr; + // Flag indicating the success of registering our telemetry provider. + bool telemetry_enabled_ = false; + + WinMLTelemetryHelper(); + ~WinMLTelemetryHelper(); + + // + // Register telemetry provider and check success. Will only succeed if + // client has opted in to sending MS telemetry. + // + virtual HRESULT Register() { + HRESULT hr = TraceLoggingRegister(provider_); + if (SUCCEEDED(hr)) { + telemetry_enabled_ = true; + } + return hr; + } + + // + // Un-Register telemetry provider to ignore events from a TraceLogging provider. + // + void UnRegister() { + TraceLoggingUnregister(provider_); + } + + void LogWinMLShutDown(); + void LogRuntimeError(HRESULT hr, std::string message, PCSTR file, PCSTR function, int line); + void LogRuntimeError(HRESULT hr, PCSTR message, PCSTR file, PCSTR function, int line); + void LogRegisterOperatorKernel( + const char* name, + const char* domain, + int execution_type); + void RegisterOperatorSetSchema( + const char* name, + uint32_t input_count, + uint32_t output_count, + uint32_t type_constraint_count, + uint32_t attribute_count, + uint32_t default_attribute_count); + + void EndRuntimeSession() { ++runtime_session_id_; }; + bool IsMeasureSampled(); + + private: + void RestartTimer() { + timer_start_ = GetTickCount64(); + timer_started_ = true; + } + + private: + int runtime_session_id_; + unsigned int log_counter_ = 0; + + bool timer_started_ = false; + ULONGLONG timer_start_ = 0; +}; diff --git a/winml/lib/Common/inc/WinML_Lock.h b/winml/lib/Common/inc/WinML_Lock.h new file mode 100644 index 0000000000000..b43d0bd550347 --- /dev/null +++ b/winml/lib/Common/inc/WinML_Lock.h @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// +// Simple CRITICAL_SECTION based locks +// +class CWinMLLock { + private: + // make copy constructor and assignment operator inaccessible + + CWinMLLock(const CWinMLLock& critical_section); + CWinMLLock& operator=(const CWinMLLock& critical_section); + + CRITICAL_SECTION critical_section_; + + public: + CWinMLLock() { + InitializeCriticalSection(&critical_section_); + }; + + ~CWinMLLock() { + DeleteCriticalSection(&critical_section_); + }; + + void Lock() { + EnterCriticalSection(&critical_section_); + }; + void Unlock() { + LeaveCriticalSection(&critical_section_); + }; + void LockExclusive() { + EnterCriticalSection(&critical_section_); + }; + void UnlockExclusive() { + LeaveCriticalSection(&critical_section_); + }; + BOOL IsLockHeldByCurrentThread() { + return GetCurrentThreadId() == static_cast(reinterpret_cast(critical_section_.OwningThread)); + }; + BOOL IsLockHeld() { + return critical_section_.OwningThread != 0; + }; + BOOL TryLock() { + return TryEnterCriticalSection(&critical_section_); + }; + // aliased methods to help code compat so that CriticalSections can be passed to ReaderWriter templates + void LockShared() { + EnterCriticalSection(&critical_section_); + }; + void UnlockShared() { + LeaveCriticalSection(&critical_section_); + }; +}; + +// locks a critical section, and unlocks it automatically +// when the lock goes out of scope +class CWinMLAutoLock { + // make copy constructor and assignment operator inaccessible + + CWinMLAutoLock(const CWinMLAutoLock& auto_lock); + CWinMLAutoLock& operator=(const CWinMLAutoLock& auto_lock); + + protected: + CWinMLLock* winml_lock_; + + public: + CWinMLAutoLock(CWinMLLock* lock) { + winml_lock_ = lock; + if (winml_lock_ != nullptr) { + winml_lock_->Lock(); + } + }; + + ~CWinMLAutoLock() { + if (winml_lock_ != nullptr) { + winml_lock_->Unlock(); + } + }; +}; diff --git a/winml/lib/Common/inc/common.h b/winml/lib/Common/inc/common.h new file mode 100644 index 0000000000000..edecd5a006403 --- /dev/null +++ b/winml/lib/Common/inc/common.h @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// STL +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING +#define _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// WIL +#include +#include +#include +#include + +// Windows pollutes with preprocessor that redefine OPTIONAL. +// Undefine OPTIONAL to get onnx macros to resolve correctly. +#ifdef OPTIONAL +#undef OPTIONAL +#endif + +#pragma warning(disable : 4100) + +// Telemetry +#include "WinMLTelemetryHelper.h" +// Declare global telemetry helper +extern WinMLTelemetryHelper telemetry_helper; +#ifndef WINML_TELEMETRY_DISABLED +// Declare TraceLogging provider +TRACELOGGING_DECLARE_PROVIDER(winml_trace_logging_provider); +#endif //WINML_TELEMETRY_DISABLED + +// WinML +#include "errors.h" +#include "NamespaceAliases.h" +#include "StringHelpers.h" +#include "WinML_Lock.h" \ No newline at end of file diff --git a/winml/lib/Common/inc/cppwinrt_onnx.h b/winml/lib/Common/inc/cppwinrt_onnx.h new file mode 100644 index 0000000000000..e858b0fb19ca0 --- /dev/null +++ b/winml/lib/Common/inc/cppwinrt_onnx.h @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "winrt_headers.h" +#include "onnx.h" diff --git a/winml/lib/Common/inc/dx.h b/winml/lib/Common/inc/dx.h new file mode 100644 index 0000000000000..704397a4f841e --- /dev/null +++ b/winml/lib/Common/inc/dx.h @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// DirectX +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/winml/lib/Common/inc/errors.h b/winml/lib/Common/inc/errors.h new file mode 100644 index 0000000000000..2875c7f7d090a --- /dev/null +++ b/winml/lib/Common/inc/errors.h @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/status.h" + +#define WINML_THROW_IF_NOT_OK(status) \ + do { \ + auto _status = status; \ + if (!_status.IsOK()) { \ + HRESULT hresult = StatusCodeToHRESULT(static_cast(_status.Code())); \ + telemetry_helper.LogRuntimeError(hresult, _status.ErrorMessage(), __FILE__, __FUNCTION__, __LINE__); \ + winrt::hstring errorMessage(WinML::Strings::HStringFromUTF8(_status.ErrorMessage())); \ + throw winrt::hresult_error(hresult, errorMessage); \ + } \ + } while (0) + +// +// WINML_THROW_IF_*_MSG Variants +// + +#define WINML_THROW_HR_IF_FALSE_MSG(hr, value, message, ...) \ + do { \ + auto _value = value; \ + if (_value == false) { \ + auto _hr = hr; \ + char msg[1024]; \ + sprintf_s(msg, message, __VA_ARGS__); \ + telemetry_helper.LogRuntimeError(_hr, msg, __FILE__, __FUNCTION__, __LINE__); \ + winrt::hstring errorMessage(WinML::Strings::HStringFromUTF8(msg)); \ + throw winrt::hresult_error(_hr, errorMessage); \ + } \ + } while (0) + +#define WINML_THROW_HR_IF_TRUE_MSG(hr, value, message, ...) WINML_THROW_HR_IF_FALSE_MSG(hr, !(value), message, __VA_ARGS__) +#define WINML_THROW_HR_IF_NULL_MSG(hr, value, message, ...) WINML_THROW_HR_IF_TRUE_MSG(hr, ((value) == nullptr), message, __VA_ARGS__) + +// +// WINML_THROW_IF_FAILED* Variants +// + +#define WINML_THROW_HR(hr) \ + { \ + auto _result = hr; \ + telemetry_helper.LogRuntimeError(_result, "", __FILE__, __FUNCTION__, __LINE__); \ + throw winrt::hresult_error(_result); \ + } + +#define WINML_THROW_IF_FAILED(hr) \ + do { \ + HRESULT _hr = hr; \ + if (FAILED(_hr)) { \ + telemetry_helper.LogRuntimeError(_hr, "", __FILE__, __FUNCTION__, __LINE__); \ + throw winrt::hresult_error(_hr); \ + } \ + } while (0) + +#define WINML_THROW_IF_FAILED_MSG(hr, message, ...) \ + do { \ + HRESULT _result = hr; \ + if (FAILED(_result)) { \ + WINML_THROW_HR_IF_TRUE_MSG(_result, true, message, __VA_ARGS__); \ + } \ + } while (0) + +using thrower = std::function; +using enforce = std::function; +using enforce_succeeded = std::function; + +inline void enforce_not_false(HRESULT hr, bool value, thrower fnThrower) { + if (value == false) { + fnThrower(hr); + } +} + +inline void enforce_not_failed(HRESULT hr, thrower fnThrower) { + if (FAILED(hr)) { + fnThrower(hr); + } +} + +inline __declspec(noinline) winrt::hresult_error _to_hresult() noexcept { + try { + throw; + } catch (winrt::hresult_error const& e) { + return e; + } catch (wil::ResultException const& e) { + return winrt::hresult_error(e.GetErrorCode(), winrt::to_hstring(e.what())); + } catch (std::bad_alloc const&) { + return winrt::hresult_error(E_OUTOFMEMORY); + } catch (std::out_of_range const& e) { + return winrt::hresult_out_of_bounds(winrt::to_hstring(e.what())); + } catch (std::invalid_argument const& e) { + return winrt::hresult_invalid_argument(winrt::to_hstring(e.what())); + } catch (std::exception const& e) { + return winrt::hresult_error(E_FAIL, winrt::to_hstring(e.what())); + } catch (...) { + return winrt::hresult_error(E_FAIL); + } +} + +#define WINML_CATCH_ALL \ + catch (...) { \ + throw _to_hresult(); \ + } + +#define WINML_CATCH_ALL_COM \ + catch (...) { \ + return _to_hresult().to_abi(); \ + } + +#define WINML_CATCH_ALL_DONOTHING \ + catch (...) { \ + return; \ + } \ No newline at end of file diff --git a/winml/lib/Common/inc/iengine.h b/winml/lib/Common/inc/iengine.h new file mode 100644 index 0000000000000..8ec9d61d09b91 --- /dev/null +++ b/winml/lib/Common/inc/iengine.h @@ -0,0 +1,181 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning { + +MIDL_INTERFACE("eaae30b5-7381-432d-9730-322136b02371") +IModelInfo : IUnknown { + STDMETHOD(GetAuthor) + (const char** out, size_t* len) PURE; + + STDMETHOD(GetName) + (const char** out, size_t* len) PURE; + + STDMETHOD(GetDomain) + (const char** out, size_t* len) PURE; + + + STDMETHOD(GetDescription) + (const char** out, size_t* len) PURE; + + STDMETHOD(GetVersion) + (int64_t * out) PURE; + + STDMETHOD(GetModelMetadata) + (ABI::Windows::Foundation::Collections::IMapView * *metadata) PURE; + + STDMETHOD(GetInputFeatures) + (ABI::Windows::Foundation::Collections::IVectorView * *features) PURE; + + STDMETHOD(GetOutputFeatures) + (ABI::Windows::Foundation::Collections::IVectorView * *features) PURE; +}; + +MIDL_INTERFACE("1b198b76-5c44-480d-837c-8433ca6eaf99") +IModel : IUnknown { + STDMETHOD(GetModelInfo) + (IModelInfo * *info) PURE; + + STDMETHOD(ModelEnsureNoFloat16) + () PURE; + + STDMETHOD(CloneModel) + (IModel * *copy) PURE; +}; + +using Resource = std::unique_ptr>; +MIDL_INTERFACE("31f39226-cfe8-4758-af38-3d01b2a33ee1") +IValue : IUnknown { + STDMETHOD(IsEmpty) + (bool* out) PURE; + + STDMETHOD(IsCpu) + (bool* out) PURE; + + STDMETHOD(GetResource) + (WinML::Resource & resource) PURE; + + STDMETHOD(IsTensor) + (bool* out) PURE; + + STDMETHOD(IsOfTensorType) + (winml::TensorKind kind, bool* out) PURE; + + STDMETHOD(GetTensorShape) + (std::vector & shape_vector) PURE; + + STDMETHOD(IsOfMapType) + (winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) PURE; + + STDMETHOD(IsOfVectorMapType) + (winml::TensorKind key_kind, winml::TensorKind value_kind, bool* out) PURE; +}; + +MIDL_INTERFACE("30c99886-38d2-41cb-a615-203fe7d7daac") +IEngine : IUnknown { + STDMETHOD(LoadModel) + (_In_ IModel*) PURE; + + STDMETHOD(Initialize) + () PURE; + + STDMETHOD(RegisterGraphTransformers) + () PURE; + + STDMETHOD(RegisterCustomRegistry) + (IMLOperatorRegistry * registry) PURE; + + STDMETHOD(EndProfiling) + () PURE; + + STDMETHOD(StartProfiling) + () PURE; + + STDMETHOD(FlushContext) + () PURE; + + STDMETHOD(TrimUploadHeap) + () PURE; + + STDMETHOD(ReleaseCompletedReferences) + () PURE; + + STDMETHOD(Sync) + () PURE; + + STDMETHOD(CreateTensorValue) + (const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) PURE; + + STDMETHOD(CreateTensorValueFromExternalD3DResource) + (ID3D12Resource * resource, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) PURE; + + STDMETHOD(CreateTensorValueFromExternalBuffer) + (void* data, size_t size_in_bytes, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) PURE; + + STDMETHOD(CreateStringTensorValueFromDataWithCopy) + (const char* const* data, size_t num_elements, const int64_t* shape, size_t count, _Out_ IValue** out) PURE; + + STDMETHOD(CreateNullValue) + (_Out_ IValue * *out) PURE; + + STDMETHOD(CreateMapValue) + (IInspectable * map, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue * *out) PURE; + + STDMETHOD(CreateSequenceOfMapsValue) + (IInspectable * sequence, winml::TensorKind key_kind, winml::TensorKind value_kind, _Out_ IValue * *out) PURE; + + STDMETHOD(CreateOneInputAcrossDevices) + (const char* name, IValue* src, IValue** dest) PURE; + + STDMETHOD(CopyValueAcrossDevices) + (IValue * src, IValue * dest) PURE; + + STDMETHOD(Run) + (const char** input_names, IValue** inputs, size_t num_inputs, const char** output_names, IValue** outputs, size_t num_outputs) PURE; + + STDMETHOD(FillFromMapValue) + (IInspectable * map, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue * value) PURE; + + STDMETHOD(FillSequenceOfMapsValue) + (IInspectable * sequence, winml::TensorKind key_kind, winml::TensorKind value_kind, IValue * value) PURE; +}; + +MIDL_INTERFACE("0452ef15-b66b-47ca-9eff-aedac571764e") +IEngineBuilder : IUnknown { + STDMETHOD(SetD3D12Resources) + (ID3D12Device * device, ID3D12CommandQueue * queue) PURE; + + STDMETHOD(GetD3D12Device) + (ID3D12Device * *device) PURE; + + STDMETHOD(GetID3D12CommandQueue) + (ID3D12CommandQueue * *queue) PURE; + + STDMETHOD(SetBatchSizeOverride) + (uint32_t batch_size_override) PURE; + + STDMETHOD(CreateEngine) + (IEngine * *out) PURE; +}; + +MIDL_INTERFACE("5eddd25a-70ad-46ef-a445-78fbaf792c2f") +IEngineFactory : IUnknown { + STDMETHOD(CreateModel) + (_In_ const char* model_path, _In_ size_t len, _Outptr_ IModel** out) PURE; + + STDMETHOD(CreateModel) + (_In_ void* data, _In_ size_t size, _Outptr_ IModel** out) PURE; + + STDMETHOD(CreateEngineBuilder) + (IEngineBuilder * *engine_builder) PURE; + + STDMETHOD(EnableDebugOutput) + (bool is_enabled) PURE; + + STDMETHOD(CreateCustomRegistry) + (_Out_ IMLOperatorRegistry * *registry) PURE; +}; + +} // namespace Windows::AI::MachineLearning \ No newline at end of file diff --git a/winml/lib/Common/inc/onnx.h b/winml/lib/Common/inc/onnx.h new file mode 100644 index 0000000000000..9ef407d4a88e3 --- /dev/null +++ b/winml/lib/Common/inc/onnx.h @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "common.h" + +// Needed to work around the fact that OnnxRuntime defines ERROR +#ifdef ERROR +#undef ERROR +#endif +#include "core/session/inference_session.h" +// Restore ERROR define +#define ERROR 0 + +#ifdef USE_DML +#include +#endif USE_DML + +#include "core/framework/customregistry.h" +#include "core/framework/allocatormgr.h" +#include "core/session/environment.h" +#include "core/session/IOBinding.h" +#include "core/common/logging/logging.h" +#include "core/common/logging/sinks/clog_sink.h" \ No newline at end of file diff --git a/winml/lib/Common/inc/pch.h b/winml/lib/Common/inc/pch.h new file mode 100644 index 0000000000000..bb3b4b663e697 --- /dev/null +++ b/winml/lib/Common/inc/pch.h @@ -0,0 +1,3 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "winrt_headers.h" \ No newline at end of file diff --git a/winml/lib/Common/inc/winrt_headers.h b/winml/lib/Common/inc/winrt_headers.h new file mode 100644 index 0000000000000..3b02581a2c204 --- /dev/null +++ b/winml/lib/Common/inc/winrt_headers.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "common.h" + +#include +#include "winrt/windows.graphics.imaging.h" +#include "winrt/windows.foundation.h" +#include "winrt/windows.foundation.collections.h" +#include "comp_generated/winrt/windows.ai.machinelearning.h" + +// WinML Native Headers +#include "Windows.AI.MachineLearning.Native.h" +#include "Windows.AI.MachineLearning.Native.Internal.h" + +#pragma warning(disable : 4100) + +#include "Errors.h" \ No newline at end of file diff --git a/winml/lib/Telemetry/Telemetry.cpp b/winml/lib/Telemetry/Telemetry.cpp new file mode 100644 index 0000000000000..baece28cedb7c --- /dev/null +++ b/winml/lib/Telemetry/Telemetry.cpp @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +WinMLTelemetryHelper telemetry_helper; + +TRACELOGGING_DEFINE_PROVIDER( + winml_trace_logging_provider, + WINML_PROVIDER_DESC, + WINML_PROVIDER_GUID, + TraceLoggingOptionMicrosoftTelemetry()); + diff --git a/winml/lib/Telemetry/TelemetryEvent.cpp b/winml/lib/Telemetry/TelemetryEvent.cpp new file mode 100644 index 0000000000000..027ab4cd7bb5a --- /dev/null +++ b/winml/lib/Telemetry/TelemetryEvent.cpp @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "inc/TelemetryEvent.h" + +using namespace _winmlt; + +static uint64_t s_event_id = 0; + +static const char* +EventCategoryToString( + EventCategory category) { + switch (category) { + case EventCategory::kModelLoad: + return "Model load"; + case EventCategory::kSessionCreation: + return "Session creation"; + case EventCategory::kBinding: + return "Binding"; + case EventCategory::kEvaluation: + return "Evaluation"; + default: + throw std::invalid_argument("category"); + } +} + +TelemetryEvent::TelemetryEvent( + EventCategory category) { + auto is_provider_enabled = + TraceLoggingProviderEnabled( + winml_trace_logging_provider, + WINEVENT_LEVEL_VERBOSE, + WINML_PROVIDER_KEYWORD_START_STOP); + + if (is_provider_enabled) { + category_ = category; + event_id_ = InterlockedIncrement(&s_event_id); + + WinMLTraceLoggingWrite( + winml_trace_logging_provider, + "started event", + TraceLoggingString(EventCategoryToString(category_), "event"), + TraceLoggingInt64(event_id_.value(), "eventId"), + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_START_STOP)); + } +} + +TelemetryEvent::~TelemetryEvent() { + if (event_id_.has_value()) { + WinMLTraceLoggingWrite( + winml_trace_logging_provider, + "stopped event", + TraceLoggingString(EventCategoryToString(category_), "event"), + TraceLoggingInt64(event_id_.value(), "eventId"), + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_START_STOP)); + } +} diff --git a/winml/lib/Telemetry/WinMLTelemetryHelper.cpp b/winml/lib/Telemetry/WinMLTelemetryHelper.cpp new file mode 100644 index 0000000000000..a14caf8519e26 --- /dev/null +++ b/winml/lib/Telemetry/WinMLTelemetryHelper.cpp @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// WinMLTelemetryHelper +// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "pch.h" + +WinMLTelemetryHelper::WinMLTelemetryHelper() + : provider_(winml_trace_logging_provider) { +} + +WinMLTelemetryHelper::~WinMLTelemetryHelper() { +} + +void WinMLTelemetryHelper::LogWinMLShutDown() { + WinMLTraceLoggingWrite( + provider_, + "WinMLShutDown", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingString("windows.ai.machinelearning.dll is unloaded", "message"), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + +void WinMLTelemetryHelper::LogRuntimeError(HRESULT hr, PCSTR message, PCSTR file, PCSTR function, int line) { + if (!telemetry_enabled_) + return; + + WinMLTraceLoggingWrite( + provider_, + "RuntimeError", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + // Telemetry info + TraceLoggingUInt8(WINML_TLM_RUNTIME_ERROR_VERSION, "schemaVersion"), + // Error Info + TraceLoggingHResult(hr, "hResult"), + TraceLoggingString(message, "errormessage"), + TraceLoggingString(file, "file"), + TraceLoggingString(function, "function"), + TraceLoggingInt32(line, "line"), + TraceLoggingInt32(runtime_session_id_, "runtimeSessionId"), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + +void WinMLTelemetryHelper::LogRuntimeError(HRESULT hr, std::string message, PCSTR file, PCSTR function, int line) { + LogRuntimeError(hr, message.c_str(), file, function, line); +} + +bool WinMLTelemetryHelper::IsMeasureSampled() { + // If the machine isn't sampled at Measure Level, return false. + return TraceLoggingProviderEnabled(provider_, WINEVENT_LEVEL_LOG_ALWAYS, MICROSOFT_KEYWORD_MEASURES); +} + +void WinMLTelemetryHelper::LogRegisterOperatorKernel( + const char* name, + const char* domain, + int execution_type) { + if (!telemetry_enabled_) + return; + + WinMLTraceLoggingWrite( + provider_, + "RegisterOperatorKernel", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + // Telemetry info + TraceLoggingUInt8(WINML_TLM_RUNTIME_ERROR_VERSION, "schemaVersion"), + //op kernel info + TraceLoggingString(name, "name"), + TraceLoggingString(domain, "domain"), + TraceLoggingInt32(execution_type, "executionType"), + TraceLoggingInt32(runtime_session_id_, "runtimeSessionId"), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} + +void WinMLTelemetryHelper::RegisterOperatorSetSchema( + const char* name, + uint32_t input_count, + uint32_t output_count, + uint32_t type_constraint_count, + uint32_t attribute_count, + uint32_t default_attribute_count) { + if (!telemetry_enabled_) + return; + + WinMLTraceLoggingWrite( + provider_, + "RegisterOperatorSetSchema", + TraceLoggingKeyword(WINML_PROVIDER_KEYWORD_DEFAULT), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + // Telemetry info + TraceLoggingUInt8(WINML_TLM_RUNTIME_ERROR_VERSION, "schemaVersion"), + //op kernel info + TraceLoggingString(name, "name"), + TraceLoggingInt32(input_count, "inputCount"), //stats + TraceLoggingInt32(output_count, "outputCount"), + TraceLoggingInt32(type_constraint_count, "typeConstraintCount"), + TraceLoggingInt32(attribute_count, "attributeCount"), + TraceLoggingInt32(default_attribute_count, "defaultAttributeCount"), + TraceLoggingInt32(runtime_session_id_, "runtime_session_id_"), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES)); +} \ No newline at end of file diff --git a/winml/lib/Telemetry/inc/TelemetryEvent.h b/winml/lib/Telemetry/inc/TelemetryEvent.h new file mode 100644 index 0000000000000..8376211818b2f --- /dev/null +++ b/winml/lib/Telemetry/inc/TelemetryEvent.h @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace Windows::AI::MachineLearning::Telemetry { + +enum class EventCategory { + kModelLoad = 0, + kSessionCreation, + kBinding, + kEvaluation, +}; + +class TelemetryEvent { + public: + TelemetryEvent( + EventCategory eventCategory); + + ~TelemetryEvent(); + + private: + EventCategory category_; + std::optional event_id_; +}; + +} // namespace Windows::AI::MachineLearning::Telemetry \ No newline at end of file diff --git a/winml/lib/Telemetry/pch.h b/winml/lib/Telemetry/pch.h new file mode 100644 index 0000000000000..421903725edb8 --- /dev/null +++ b/winml/lib/Telemetry/pch.h @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "common.h" +#include "TraceLoggingConfig.h" diff --git a/winml/test/.gitignore b/winml/test/.gitignore new file mode 100644 index 0000000000000..dbc16e37ec7f3 --- /dev/null +++ b/winml/test/.gitignore @@ -0,0 +1 @@ +!*.onnx diff --git a/winml/test/api/APITest.h b/winml/test/api/APITest.h new file mode 100644 index 0000000000000..dfa64f26830d1 --- /dev/null +++ b/winml/test/api/APITest.h @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "fileHelpers.h" +namespace APITest { +static void LoadModel(const std::wstring& modelPath, + winrt::Windows::AI::MachineLearning::LearningModel& learningModel) { + std::wstring fullPath = FileHelpers::GetModulePath() + modelPath; + learningModel = winrt::Windows::AI::MachineLearning::LearningModel::LoadFromFilePath(fullPath); +}; + +static uint64_t GetAdapterIdQuadPart(winrt::Windows::AI::MachineLearning::LearningModelDevice& device) { + LARGE_INTEGER id; + id.LowPart = device.AdapterId().LowPart; + id.HighPart = device.AdapterId().HighPart; + return id.QuadPart; +}; + +static _LUID GetAdapterIdAsLUID(winrt::Windows::AI::MachineLearning::LearningModelDevice& device) { + _LUID id; + id.LowPart = device.AdapterId().LowPart; + id.HighPart = device.AdapterId().HighPart; + return id; +} +}; // namespace APITest diff --git a/winml/test/api/LearningModelAPITest.cpp b/winml/test/api/LearningModelAPITest.cpp new file mode 100644 index 0000000000000..d07981fe28de0 --- /dev/null +++ b/winml/test/api/LearningModelAPITest.cpp @@ -0,0 +1,278 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "testPch.h" +#include "LearningModelAPITest.h" +#include "APITest.h" +#include +#include +#include +#include + +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Storage; +using namespace winrt::Windows::Storage::Streams; + +static void LearningModelAPITestSetup() { + init_apartment(); +} + +static void LearningModelAPITestGpuSetup() { + GPUTEST; + init_apartment(); +} + +static void CreateModelFromFilePath() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); +} + +static void CreateModelFromIStorage() { + std::wstring path = FileHelpers::GetModulePath() + L"squeezenet_modifiedforruntimestests.onnx"; + auto storageFile = winrt::Windows::Storage::StorageFile::GetFileFromPathAsync(path).get(); + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromStorageFileAsync(storageFile).get()); + WINML_EXPECT_TRUE(learningModel != nullptr); + + // check the author so we know the model was populated correctly. + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(L"onnx-caffe2", author); +} + +static void CreateModelFromIStorageOutsideCwd() { + std::wstring path = FileHelpers::GetModulePath() + L"ModelSubdirectory\\ModelInSubdirectory.onnx"; + auto storageFile = winrt::Windows::Storage::StorageFile::GetFileFromPathAsync(path).get(); + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromStorageFileAsync(storageFile).get()); + WINML_EXPECT_TRUE(learningModel != nullptr); + + // check the author so we know the model was populated correctly. + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(L"onnx-caffe2", author); +} + +static void CreateModelFromIStream() { + std::wstring path = FileHelpers::GetModulePath() + L"squeezenet_modifiedforruntimestests.onnx"; + auto storageFile = winrt::Windows::Storage::StorageFile::GetFileFromPathAsync(path).get(); + winrt::Windows::Storage::Streams::IRandomAccessStreamReference streamref; + storageFile.as(streamref); + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromStreamAsync(streamref).get()); + WINML_EXPECT_TRUE(learningModel != nullptr); + + // check the author so we know the model was populated correctly. + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(L"onnx-caffe2", author); +} + +static void ModelGetAuthor() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(L"onnx-caffe2", author); +} + +static void ModelGetName() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + std::wstring name(learningModel.Name()); + WINML_EXPECT_EQUAL(L"squeezenet_old", name); +} + +static void ModelGetDomain() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + std::wstring domain(learningModel.Domain()); + WINML_EXPECT_EQUAL(L"test-domain", domain); +} + +static void ModelGetDescription() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + std::wstring description(learningModel.Description()); + WINML_EXPECT_EQUAL(L"test-doc_string", description); +} + +static void ModelGetVersion() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + int64_t version(learningModel.Version()); + (void)(version); +} + +typedef std::vector> Metadata; + +/* +class MetadataTest : public LearningModelAPITest, public testing::WithParamInterface> +{}; + +TEST_P(MetadataTest, GetMetaData) +{ + std::wstring fileName; + std::vector> keyValuePairs; + + tie(fileName, keyValuePairs) = GetParam(); + WINML_EXPECT_NO_THROW(LoadModel(fileName.c_str())); + WINML_EXPECT_TRUE(m_model.Metadata() != nullptr); + WINML_EXPECT_EQUAL(keyValuePairs.size(), m_model.Metadata().Size()); + + auto iter = m_model.Metadata().First(); + for (auto& keyValue : keyValuePairs) + { + WINML_EXPECT_TRUE(iter.HasCurrent()); + WINML_EXPECT_EQUAL(keyValue.first, std::wstring(iter.Current().Key())); + WINML_EXPECT_EQUAL(keyValue.second, std::wstring(iter.Current().Value())); + iter.MoveNext(); + } +} + +INSTANTIATE_TEST_SUITE_P( + ModelMetadata, + MetadataTest, + ::testing::Values( + std::pair(L"squeezenet_modifiedforruntimestests.onnx", Metadata{}), + std::pair(L"modelWithMetaData.onnx", Metadata{{L"thisisalongkey", L"thisisalongvalue"}}), + std::pair(L"modelWith2MetaData.onnx", Metadata{{L"thisisalongkey", L"thisisalongvalue"}, {L"key2", L"val2"}}) +)); +*/ + +static void EnumerateInputs() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + + // purposely don't cache "InputFeatures" in order to exercise calling it multiple times + WINML_EXPECT_TRUE(learningModel.InputFeatures().First().HasCurrent()); + + std::wstring name(learningModel.InputFeatures().First().Current().Name()); + WINML_EXPECT_EQUAL(L"data_0", name); + + // make sure it's either tensor or image + TensorFeatureDescriptor tensorDescriptor = nullptr; + learningModel.InputFeatures().First().Current().try_as(tensorDescriptor); + if (tensorDescriptor == nullptr) { + ImageFeatureDescriptor imageDescriptor = nullptr; + WINML_EXPECT_NO_THROW(learningModel.InputFeatures().First().Current().as(imageDescriptor)); + } + + auto modelDataKind = tensorDescriptor.TensorKind(); + WINML_EXPECT_EQUAL(TensorKind::Float, modelDataKind); + + WINML_EXPECT_TRUE(tensorDescriptor.IsRequired()); + + std::vector expectedShapes = {1, 3, 224, 224}; + WINML_EXPECT_EQUAL(expectedShapes.size(), tensorDescriptor.Shape().Size()); + for (uint32_t j = 0; j < tensorDescriptor.Shape().Size(); j++) { + WINML_EXPECT_EQUAL(expectedShapes.at(j), tensorDescriptor.Shape().GetAt(j)); + } + + auto first = learningModel.InputFeatures().First(); + first.MoveNext(); + WINML_EXPECT_FALSE(first.HasCurrent()); +} + +static void EnumerateOutputs() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + + // purposely don't cache "OutputFeatures" in order to exercise calling it multiple times + std::wstring name(learningModel.OutputFeatures().First().Current().Name()); + WINML_EXPECT_EQUAL(L"softmaxout_1", name); + + TensorFeatureDescriptor tensorDescriptor = nullptr; + WINML_EXPECT_NO_THROW(learningModel.OutputFeatures().First().Current().as(tensorDescriptor)); + WINML_EXPECT_TRUE(tensorDescriptor != nullptr); + + auto tensorName = tensorDescriptor.Name(); + WINML_EXPECT_EQUAL(L"softmaxout_1", tensorName); + + auto modelDataKind = tensorDescriptor.TensorKind(); + WINML_EXPECT_EQUAL(TensorKind::Float, modelDataKind); + + WINML_EXPECT_TRUE(tensorDescriptor.IsRequired()); + + std::vector expectedShapes = {1, 1000, 1, 1}; + WINML_EXPECT_EQUAL(expectedShapes.size(), tensorDescriptor.Shape().Size()); + for (uint32_t j = 0; j < tensorDescriptor.Shape().Size(); j++) { + WINML_EXPECT_EQUAL(expectedShapes.at(j), tensorDescriptor.Shape().GetAt(j)); + } + + auto first = learningModel.OutputFeatures().First(); + first.MoveNext(); + WINML_EXPECT_FALSE(first.HasCurrent()); +} + +static void CloseModelCheckMetadata() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"squeezenet_modifiedforruntimestests.onnx", learningModel)); + WINML_EXPECT_NO_THROW(learningModel.Close()); + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(L"onnx-caffe2", author); + std::wstring name(learningModel.Name()); + WINML_EXPECT_EQUAL(L"squeezenet_old", name); + std::wstring domain(learningModel.Domain()); + WINML_EXPECT_EQUAL(L"test-domain", domain); + std::wstring description(learningModel.Description()); + WINML_EXPECT_EQUAL(L"test-doc_string", description); + int64_t version(learningModel.Version()); + WINML_EXPECT_EQUAL(123456, version); +} + +static void CloseModelCheckEval() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + LearningModelSession session = nullptr; + WINML_EXPECT_NO_THROW(session = LearningModelSession(learningModel)); + WINML_EXPECT_NO_THROW(learningModel.Close()); + + std::wstring fullImagePath = FileHelpers::GetModulePath() + L"kitten_224.png"; + StorageFile imagefile = StorageFile::GetFileFromPathAsync(fullImagePath).get(); + IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get(); + VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + + LearningModelBinding binding = nullptr; + WINML_EXPECT_NO_THROW(binding = LearningModelBinding(session)); + WINML_EXPECT_NO_THROW(binding.Bind(learningModel.InputFeatures().First().Current().Name(), frame)); + + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +static void CloseModelNoNewSessions() { + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + WINML_EXPECT_NO_THROW(learningModel.Close()); + LearningModelSession session = nullptr; + WINML_EXPECT_THROW_SPECIFIC( + session = LearningModelSession(learningModel);, + winrt::hresult_error, + [](const winrt::hresult_error& e) -> bool { + return e.code() == E_INVALIDARG; + }); +} + +const LearningModelApiTestApi& getapi() { + static constexpr LearningModelApiTestApi api = + { + LearningModelAPITestSetup, + LearningModelAPITestGpuSetup, + CreateModelFromFilePath, + CreateModelFromIStorage, + CreateModelFromIStorageOutsideCwd, + CreateModelFromIStream, + ModelGetAuthor, + ModelGetName, + ModelGetDomain, + ModelGetDescription, + ModelGetVersion, + EnumerateInputs, + EnumerateOutputs, + CloseModelCheckMetadata, + CloseModelCheckEval, + CloseModelNoNewSessions + }; + return api; +} \ No newline at end of file diff --git a/winml/test/api/LearningModelAPITest.h b/winml/test/api/LearningModelAPITest.h new file mode 100644 index 0000000000000..46d815fc27579 --- /dev/null +++ b/winml/test/api/LearningModelAPITest.h @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test.h" +struct LearningModelApiTestApi +{ + SetupTest LearningModelAPITestSetup; + SetupTest LearningModelAPITestGpuSetup; + VoidTest CreateModelFromFilePath; + VoidTest CreateModelFromIStorage; + VoidTest CreateModelFromIStorageOutsideCwd; + VoidTest CreateModelFromIStream; + VoidTest ModelGetAuthor; + VoidTest ModelGetName; + VoidTest ModelGetDomain; + VoidTest ModelGetDescription; + VoidTest ModelGetVersion; + VoidTest EnumerateInputs; + VoidTest EnumerateOutputs; + VoidTest CloseModelCheckMetadata; + VoidTest CloseModelCheckEval; + VoidTest CloseModelNoNewSessions; +}; +const LearningModelApiTestApi& getapi(); + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelAPITest, LearningModelAPITestSetup) +WINML_TEST(LearningModelAPITest, CreateModelFromFilePath) +WINML_TEST(LearningModelAPITest, CreateModelFromIStorage) +WINML_TEST(LearningModelAPITest, CreateModelFromIStorageOutsideCwd) +WINML_TEST(LearningModelAPITest, CreateModelFromIStream) +WINML_TEST(LearningModelAPITest, ModelGetAuthor) +WINML_TEST(LearningModelAPITest, ModelGetName) +WINML_TEST(LearningModelAPITest, ModelGetDomain) +WINML_TEST(LearningModelAPITest, ModelGetDescription) +WINML_TEST(LearningModelAPITest, ModelGetVersion) +WINML_TEST(LearningModelAPITest, EnumerateInputs) +WINML_TEST(LearningModelAPITest, EnumerateOutputs) +WINML_TEST(LearningModelAPITest, CloseModelCheckMetadata) +WINML_TEST(LearningModelAPITest, CloseModelNoNewSessions) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelAPITestGpu, LearningModelAPITestGpuSetup) +WINML_TEST(LearningModelAPITestGpu, CloseModelCheckEval) +WINML_TEST_CLASS_END() \ No newline at end of file diff --git a/winml/test/api/LearningModelBindingAPITest.cpp b/winml/test/api/LearningModelBindingAPITest.cpp new file mode 100644 index 0000000000000..d72c3516abc79 --- /dev/null +++ b/winml/test/api/LearningModelBindingAPITest.cpp @@ -0,0 +1,674 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "testPch.h" + +#include "APITest.h" +#include "LearningModelBindingAPITest.h" +#include "SqueezeNetValidator.h" + +#include +#include +#include "winrt/Windows.Storage.h" +#include +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Storage; + +static void LearningModelBindingAPITestSetup() { + init_apartment(); +} + +static void LearningModelBindingAPITestGpuSetup() { + GPUTEST; + init_apartment(); +} + +static void CpuSqueezeNet() +{ + std::string cpuInstance("CPU"); + WINML_EXPECT_NO_THROW(WinML::Engine::Test::ModelValidator::SqueezeNet(cpuInstance, LearningModelDeviceKind::Cpu, /*dataTolerance*/ 0.00001f, false)); +} + +static void CpuSqueezeNetEmptyOutputs() +{ + std::string cpuInstance("CPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + cpuInstance, + LearningModelDeviceKind::Cpu, + /*dataTolerance*/ 0.00001f, + false, + OutputBindingStrategy::Empty); + ); +} + +static void CpuSqueezeNetUnboundOutputs() +{ + std::string cpuInstance("CPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + cpuInstance, + LearningModelDeviceKind::Cpu, + /*dataTolerance*/ 0.00001f, + false, + OutputBindingStrategy::Unbound); + ); +} + +static void CpuSqueezeNetBindInputTensorAsInspectable() +{ + std::string cpuInstance("CPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + cpuInstance, + LearningModelDeviceKind::Cpu, + /*dataTolerance*/ 0.00001f, + false, + OutputBindingStrategy::Bound /* empty outputs */, + true /* bind inputs as inspectables */); + ); +} + +static void CastMapInt64() +{ + WINML_EXPECT_NO_THROW(LearningModel::LoadFromFilePath(FileHelpers::GetModulePath() + L"castmap-int64.onnx")); + // TODO: Check Descriptor +} + +static void DictionaryVectorizerMapInt64() +{ + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"dictvectorizer-int64.onnx", learningModel)); + + auto inputDescriptor = learningModel.InputFeatures().First().Current(); + WINML_EXPECT_TRUE(inputDescriptor.Kind() == LearningModelFeatureKind::Map); + auto mapDescriptor = inputDescriptor.as(); + WINML_EXPECT_TRUE(mapDescriptor.KeyKind() == TensorKind::Int64); + WINML_EXPECT_TRUE(mapDescriptor.ValueDescriptor().Kind() == LearningModelFeatureKind::Tensor); + auto tensorDescriptor = mapDescriptor.ValueDescriptor().as(); + // empty size means tensor of scalar value + WINML_EXPECT_TRUE(tensorDescriptor.Shape().Size() == 0); + WINML_EXPECT_TRUE(tensorDescriptor.TensorKind() == TensorKind::Float); + + LearningModelSession modelSession(learningModel); + LearningModelBinding binding(modelSession); + std::unordered_map map; + map[1] = 1.f; + map[10] = 10.f; + map[3] = 3.f; + + + auto mapInputName = inputDescriptor.Name(); + + // Bind as IMap + auto abiMap = winrt::single_threaded_map(std::move(map)); + binding.Bind(mapInputName, abiMap); + auto mapInputInspectable = abiMap.as(); + auto first = binding.First(); + WINML_EXPECT_TRUE(first.Current().Key() == mapInputName); + WINML_EXPECT_TRUE(first.Current().Value() == mapInputInspectable); + WINML_EXPECT_TRUE(binding.Lookup(mapInputName) == mapInputInspectable); + + // Bind as IMapView + auto mapView = abiMap.GetView(); + binding.Bind(mapInputName, mapView); + mapInputInspectable = mapView.as(); + first = binding.First(); + WINML_EXPECT_TRUE(first.Current().Key() == mapInputName); + WINML_EXPECT_TRUE(first.Current().Value() == mapView); + WINML_EXPECT_TRUE(binding.Lookup(mapInputName) == mapView); + +} + +static void DictionaryVectorizerMapString() +{ + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"dictvectorizer-string.onnx", learningModel)); + + auto inputDescriptor = learningModel.InputFeatures().First().Current(); + WINML_EXPECT_TRUE(inputDescriptor.Kind() == LearningModelFeatureKind::Map); + + auto mapDescriptor = inputDescriptor.as(); + WINML_EXPECT_TRUE(mapDescriptor.KeyKind() == TensorKind::String); + WINML_EXPECT_TRUE(mapDescriptor.ValueDescriptor().Kind() == LearningModelFeatureKind::Tensor); + + auto tensorDescriptor = mapDescriptor.ValueDescriptor().as(); + // empty size means tensor of scalar value + WINML_EXPECT_TRUE(tensorDescriptor.Shape().Size() == 0); + WINML_EXPECT_TRUE(tensorDescriptor.TensorKind() == TensorKind::Float); + + LearningModelSession modelSession(learningModel); + LearningModelBinding binding(modelSession); + std::unordered_map map; + map[L"1"] = 1.f; + map[L"10"] = 10.f; + map[L"2"] = 2.f; + + auto mapInputName = inputDescriptor.Name(); + auto abiMap = winrt::single_threaded_map(std::move(map)); + binding.Bind(mapInputName, abiMap); + + auto mapInputInspectable = abiMap.as(); + auto first = binding.First(); + WINML_EXPECT_TRUE(first.Current().Key() == mapInputName); + WINML_EXPECT_TRUE(first.Current().Value() == mapInputInspectable); + WINML_EXPECT_TRUE(binding.Lookup(mapInputName) == mapInputInspectable); + + modelSession.Evaluate(binding, L""); +} + +static void RunZipMapInt64( + winrt::Windows::AI::MachineLearning::LearningModel model, + OutputBindingStrategy bindingStrategy) +{ + auto outputFeatures = model.OutputFeatures(); + auto outputDescriptor = outputFeatures.First().Current(); + WINML_EXPECT_TRUE(outputDescriptor.Kind() == LearningModelFeatureKind::Sequence); + + auto seqDescriptor = outputDescriptor.as(); + auto mapDescriptor = seqDescriptor.ElementDescriptor().as(); + WINML_EXPECT_TRUE(mapDescriptor.KeyKind() == TensorKind::Int64); + + WINML_EXPECT_TRUE(mapDescriptor.ValueDescriptor().Kind() == LearningModelFeatureKind::Tensor); + auto tensorDescriptor = mapDescriptor.ValueDescriptor().as(); + WINML_EXPECT_TRUE(tensorDescriptor.TensorKind() == TensorKind::Float); + + LearningModelSession session(model); + LearningModelBinding binding(session); + + std::vector inputs = { 0.5f, 0.25f, 0.125f }; + std::vector shape = { 1, 3 }; + + // Bind inputs + auto inputTensor = + TensorFloat::CreateFromArray( + shape, + winrt::array_view(std::move(inputs))); + binding.Bind(winrt::hstring(L"X"), inputTensor); + + typedef IMap ABIMap; + typedef IVector ABISequeneceOfMap; + + ABISequeneceOfMap abiOutput = nullptr; + // Bind outputs + if (bindingStrategy == OutputBindingStrategy::Bound) + { + abiOutput = winrt::single_threaded_vector(); + binding.Bind(winrt::hstring(L"Y"), abiOutput); + } + + // Evaluate + auto result = session.Evaluate(binding, L"0").Outputs(); + + if (bindingStrategy == OutputBindingStrategy::Bound) + { + // from output binding + const auto &out1 = abiOutput.GetAt(0); + const auto &out2 = result.Lookup(L"Y").as>().GetAt(0); + WINML_LOG_COMMENT((std::ostringstream() << "size: " << out1.Size()).str()); + // check outputs + auto iter1 = out1.First(); + auto iter2 = out2.First(); + for (uint32_t i = 0, size = (uint32_t)inputs.size(); i < size; ++i) + { + WINML_EXPECT_TRUE(iter1.HasCurrent()); + WINML_EXPECT_TRUE(iter2.HasCurrent()); + const auto &pair1 = iter1.Current(); + const auto &pair2 = iter2.Current(); + WINML_LOG_COMMENT((std::ostringstream() << "key: " << pair1.Key() << ", value: " << pair2.Value()).str()); + WINML_EXPECT_TRUE(pair1.Key() == i && pair2.Key() == i); + WINML_EXPECT_TRUE(pair1.Value() == inputs[i] && pair2.Value() == inputs[i]); + iter1.MoveNext(); + iter2.MoveNext(); + } + WINML_EXPECT_TRUE(!iter1.HasCurrent()); + WINML_EXPECT_TRUE(!iter2.HasCurrent()); + } + else + { + abiOutput = result.Lookup(L"Y").as(); + WINML_EXPECT_TRUE(abiOutput.Size() == 1); + ABIMap map = abiOutput.GetAt(0); + WINML_EXPECT_TRUE(map.Size() == 3); + WINML_EXPECT_TRUE(map.Lookup(0) == 0.5); + WINML_EXPECT_TRUE(map.Lookup(1) == .25); + WINML_EXPECT_TRUE(map.Lookup(2) == .125); + } +} + +static void ZipMapInt64() +{ + LearningModel learningModel= nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"zipmap-int64.onnx", learningModel)); + RunZipMapInt64(learningModel, OutputBindingStrategy::Bound); +} + +static void ZipMapInt64Unbound() +{ + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"zipmap-int64.onnx", learningModel)); + RunZipMapInt64(learningModel, OutputBindingStrategy::Unbound); +} + +static void ZipMapString() +{ + // output constraint: "seq(map(string, float))" or "seq(map(int64, float))" + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"zipmap-string.onnx", learningModel)); + auto outputs = learningModel.OutputFeatures(); + auto outputDescriptor = outputs.First().Current(); + WINML_EXPECT_TRUE(outputDescriptor.Kind() == LearningModelFeatureKind::Sequence); + auto mapDescriptor = outputDescriptor.as().ElementDescriptor().as(); + WINML_EXPECT_TRUE(mapDescriptor.KeyKind() == TensorKind::String); + WINML_EXPECT_TRUE(mapDescriptor.ValueDescriptor().Kind() == LearningModelFeatureKind::Tensor); + auto tensorDescriptor = mapDescriptor.ValueDescriptor().as(); + WINML_EXPECT_TRUE(tensorDescriptor.TensorKind() == TensorKind::Float); + + LearningModelSession session(learningModel); + LearningModelBinding binding(session); + + std::vector inputs = { 0.5f, 0.25f, 0.125f }; + std::vector shape = { 1, 3 }; + std::vector labels = { L"cat", L"dog", L"lion" }; + std::map mapData = { { L"cat", 0.0f }, { L"dog", 0.0f }, { L"lion", 0.0f } }; + typedef IMap ABIMap; + ABIMap abiMap = winrt::single_threaded_map(std::move(mapData)); + std::vector seqOutput = { abiMap }; + IVector ABIOutput = winrt::single_threaded_vector(std::move(seqOutput)); + + TensorFloat inputTensor = TensorFloat::CreateFromArray(shape, winrt::array_view(std::move(inputs))); + binding.Bind(winrt::hstring(L"X"), inputTensor); + binding.Bind(winrt::hstring(L"Y"), ABIOutput); + auto result = session.Evaluate(binding, L"0").Outputs(); + // from output binding + const auto &out1 = ABIOutput.GetAt(0); + const auto &out2 = result.Lookup(L"Y").as>().GetAt(0); + WINML_LOG_COMMENT((std::ostringstream() << "size: " << out1.Size()).str()); + // single key,value pair for each map + auto iter1 = out1.First(); + auto iter2 = out2.First(); + for (uint32_t i = 0, size = (uint32_t)inputs.size(); i < size; ++i) + { + WINML_EXPECT_TRUE(iter2.HasCurrent()); + const auto &pair1 = iter1.Current(); + const auto &pair2 = iter2.Current(); + WINML_LOG_COMMENT((std::ostringstream() << "key: " << pair1.Key().c_str() << ", value " << pair2.Value()).str()); + WINML_EXPECT_TRUE(std::wstring(pair1.Key().c_str()).compare(labels[i]) == 0); + WINML_EXPECT_TRUE(std::wstring(pair2.Key().c_str()).compare(labels[i]) == 0); + WINML_EXPECT_TRUE(pair1.Value() == inputs[i] && pair2.Value() == inputs[i]); + iter1.MoveNext(); + iter2.MoveNext(); + } + WINML_EXPECT_TRUE(!iter1.HasCurrent()); + WINML_EXPECT_TRUE(!iter2.HasCurrent()); +} + +static void GpuSqueezeNet() +{ + std::string gpuInstance("GPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + gpuInstance, + LearningModelDeviceKind::DirectX, + /*dataTolerance*/ 0.00001f); + ); +} + +static void GpuSqueezeNetEmptyOutputs() +{ + std::string gpuInstance("GPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + gpuInstance, + LearningModelDeviceKind::DirectX, + /*dataTolerance*/ 0.00001f, + false, + OutputBindingStrategy::Empty); + ); +} + +static void GpuSqueezeNetUnboundOutputs() +{ + std::string gpuInstance("GPU"); + WINML_EXPECT_NO_THROW( + WinML::Engine::Test::ModelValidator::SqueezeNet( + gpuInstance, + LearningModelDeviceKind::DirectX, + /*dataTolerance*/ 0.00001f, + false, + OutputBindingStrategy::Unbound); + ); +} + +// Validates that when the input image is the same as the model expects, the binding step is executed correctly. +static void ImageBindingDimensions() +{ + LearningModelBinding learningModelBinding = nullptr; + LearningModel learningModel = nullptr; + LearningModelSession learningModelSession = nullptr; + LearningModelDevice leraningModelDevice = nullptr; + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + // load a model with expected input size: 224 x 224 + WINML_EXPECT_NO_THROW(leraningModelDevice = LearningModelDevice(LearningModelDeviceKind::Default)); + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromFilePath(filePath)); + WINML_EXPECT_TRUE(learningModel != nullptr); + WINML_EXPECT_NO_THROW(learningModelSession = LearningModelSession(learningModel, leraningModelDevice)); + WINML_EXPECT_NO_THROW(learningModelBinding = LearningModelBinding(learningModelSession)); + + // Create input images and execute bind + // Test Case 1: both width and height are larger than model expects + VideoFrame inputImage1(BitmapPixelFormat::Rgba8, 1000, 1000); + ImageFeatureValue inputTensor = ImageFeatureValue::CreateFromVideoFrame(inputImage1); + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"data_0", inputTensor)); + + // Test Case 2: only height is larger, while width is smaller + VideoFrame inputImage2(BitmapPixelFormat::Rgba8, 20, 1000); + inputTensor = ImageFeatureValue::CreateFromVideoFrame(inputImage2); + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"data_0", inputTensor)); + + // Test Case 3: only width is larger, while height is smaller + VideoFrame inputImage3(BitmapPixelFormat::Rgba8, 1000, 20); + inputTensor = ImageFeatureValue::CreateFromVideoFrame(inputImage3); + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"data_0", inputTensor)); + + // Test Case 4: both width and height are smaller than model expects + VideoFrame inputImage4(BitmapPixelFormat::Rgba8, 20, 20); + inputTensor = ImageFeatureValue::CreateFromVideoFrame(inputImage4); + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"data_0", inputTensor)); +} + +static void VerifyInvalidBindExceptions() +{ + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"zipmap-int64.onnx", learningModel)); + + LearningModelSession session(learningModel); + LearningModelBinding binding(session); + + std::vector inputs = { 0.5f, 0.25f, 0.125f }; + std::vector shape = { 1, 3 }; + + auto matchException = + [](const winrt::hresult_error& e, HRESULT hr) -> bool + { + return e.code() == hr; + }; + + auto ensureWinmlSizeMismatch = std::bind(matchException, std::placeholders::_1, WINML_ERR_SIZE_MISMATCH); + auto ensureWinmlInvalidBinding = std::bind(matchException, std::placeholders::_1, WINML_ERR_INVALID_BINDING); + + /* + Verify tensor bindings throw correct bind exceptions + */ + + // Bind invalid image as tensorfloat input + auto image = FileHelpers::LoadImageFeatureValue(L"227x227.png"); + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"X", image), winrt::hresult_error, ensureWinmlSizeMismatch); + + // Bind invalid map as tensorfloat input + std::unordered_map map; + auto abiMap = winrt::single_threaded_map(std::move(map)); + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"X", abiMap), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid sequence as tensorfloat input + std::vector sequence; + auto abiSequence = winrt::single_threaded_vector(std::move(sequence)); + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"X", abiSequence), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid tensor size as tensorfloat input + auto tensorBoolean = TensorBoolean::Create(); + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"X", tensorBoolean), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid tensor shape as tensorfloat input + auto tensorInvalidShape = TensorFloat::Create(std::vector { 2, 3, 4 }); + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"X", tensorInvalidShape), winrt::hresult_error, ensureWinmlInvalidBinding); + + /* + Verify sequence bindings throw correct bind exceptions + */ + + // Bind invalid image as sequence output + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"Y", image), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid map as sequence output + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"Y", abiMap), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid sequence as sequence output + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"Y", abiSequence), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid tensor as sequence output + WINML_EXPECT_THROW_SPECIFIC(binding.Bind(L"Y", tensorBoolean), winrt::hresult_error, ensureWinmlInvalidBinding); + + /* + Verify image bindings throw correct bind exceptions + */ + + // WINML_EXPECT_NO_THROW(LoadModel(L"fns-candy.onnx")); + + // LearningModelSession imageSession(m_model); + // LearningModelBinding imageBinding(imageSession); + + // auto inputName = m_model.InputFeatures().First().Current().Name(); + + // // Bind invalid map as image input + // WINML_EXPECT_THROW_SPECIFIC(imageBinding.Bind(inputName, abiMap), winrt::hresult_error, ensureWinmlInvalidBinding); + + // // Bind invalid sequence as image input + // WINML_EXPECT_THROW_SPECIFIC(imageBinding.Bind(inputName, abiSequence), winrt::hresult_error, ensureWinmlInvalidBinding); + + // // Bind invalid tensor type as image input + // WINML_EXPECT_THROW_SPECIFIC(imageBinding.Bind(inputName, tensorBoolean), winrt::hresult_error, ensureWinmlInvalidBinding); + + // // Bind invalid tensor size as image input + // auto tensorFloat = TensorFloat::Create(std::vector { 1, 1, 100, 100 }); + // WINML_EXPECT_THROW_SPECIFIC(imageBinding.Bind(inputName, tensorFloat), winrt::hresult_error, ensureWinmlInvalidBinding); + + // // Bind invalid tensor shape as image input + // WINML_EXPECT_THROW_SPECIFIC(imageBinding.Bind(inputName, tensorInvalidShape), winrt::hresult_error, ensureWinmlInvalidBinding); + + /* + Verify map bindings throw correct bind exceptions + */ + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"dictvectorizer-int64.onnx", learningModel)); + + LearningModelSession mapSession(learningModel); + LearningModelBinding mapBinding(mapSession); + + auto inputName = learningModel.InputFeatures().First().Current().Name(); + + // Bind invalid image as image input + auto smallImage = FileHelpers::LoadImageFeatureValue(L"100x100.png"); + WINML_EXPECT_THROW_SPECIFIC(mapBinding.Bind(inputName, smallImage), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid map as image input + WINML_EXPECT_THROW_SPECIFIC(mapBinding.Bind(inputName, abiMap), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid sequence as image input + WINML_EXPECT_THROW_SPECIFIC(mapBinding.Bind(inputName, abiSequence), winrt::hresult_error, ensureWinmlInvalidBinding); + + // Bind invalid tensor type as image input + WINML_EXPECT_THROW_SPECIFIC(mapBinding.Bind(inputName, tensorBoolean), winrt::hresult_error, ensureWinmlInvalidBinding); +} + +// Verify that it throws an error when binding an invalid name. +static void BindInvalidInputName() +{ + LearningModel learningModel = nullptr; + LearningModelBinding learningModelBinding = nullptr; + LearningModelDevice learningModelDevice = nullptr; + LearningModelSession learningModelSession = nullptr; + std::wstring modelPath = FileHelpers::GetModulePath() + L"Add_ImageNet1920.onnx"; + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromFilePath(modelPath)); + WINML_EXPECT_TRUE(learningModel != nullptr); + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::Default)); + WINML_EXPECT_NO_THROW(learningModelSession = LearningModelSession(learningModel, learningModelDevice)); + WINML_EXPECT_NO_THROW(learningModelBinding = LearningModelBinding(learningModelSession)); + + VideoFrame iuputImage(BitmapPixelFormat::Rgba8, 1920, 1080); + ImageFeatureValue inputTensor = ImageFeatureValue::CreateFromVideoFrame(iuputImage); + + auto first = learningModel.InputFeatures().First(); + std::wstring testInvalidName = L"0"; + + // Verify that testInvalidName is not in model's InputFeatures + while (first.HasCurrent()) + { + WINML_EXPECT_NOT_EQUAL(testInvalidName, first.Current().Name()); + first.MoveNext(); + } + + // Bind inputTensor to a valid input name + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"input_39:0", inputTensor)); + + // Bind inputTensor to an invalid input name + WINML_EXPECT_THROW_SPECIFIC(learningModelBinding.Bind(testInvalidName, inputTensor), + winrt::hresult_error, + [](const winrt::hresult_error& e) -> bool + { + return e.code() == WINML_ERR_INVALID_BINDING; + }); +} + +static void VerifyOutputAfterEvaluateAsyncCalledTwice() +{ + LearningModel learningModel = nullptr; + LearningModelBinding learningModelBinding = nullptr; + LearningModelDevice learningModelDevice = nullptr; + LearningModelSession learningModelSession = nullptr; + std::wstring filePath = FileHelpers::GetModulePath() + L"relu.onnx"; + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::Default)); + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromFilePath(filePath)); + WINML_EXPECT_TRUE(learningModel != nullptr); + WINML_EXPECT_NO_THROW(learningModelSession = LearningModelSession(learningModel, learningModelDevice)); + WINML_EXPECT_NO_THROW(learningModelBinding = LearningModelBinding(learningModelSession)); + + auto inputShape = std::vector{ 5 }; + auto inputData1 = std::vector{ -50.f, -25.f, 0.f, 25.f, 50.f }; + auto inputValue1 = + TensorFloat::CreateFromIterable( + inputShape, + single_threaded_vector(std::move(inputData1)).GetView()); + + auto inputData2 = std::vector{ 50.f, 25.f, 0.f, -25.f, -50.f }; + auto inputValue2 = + TensorFloat::CreateFromIterable( + inputShape, + single_threaded_vector(std::move(inputData2)).GetView()); + + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"X", inputValue1)); + + auto outputValue = TensorFloat::Create(); + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"Y", outputValue)); + + WINML_EXPECT_NO_THROW(learningModelSession.Evaluate(learningModelBinding, L"")); + + auto buffer1 = outputValue.GetAsVectorView(); + WINML_EXPECT_TRUE(buffer1 != nullptr); + + // The second evaluation + // If we don't bind output again, the output value will not change + WINML_EXPECT_NO_THROW(learningModelBinding.Bind(L"X", inputValue2)); + WINML_EXPECT_NO_THROW(learningModelSession.Evaluate(learningModelBinding, L"")); + auto buffer2 = outputValue.GetAsVectorView(); + WINML_EXPECT_EQUAL(buffer1.Size(), buffer2.Size()); + bool isSame = true; + for (uint32_t i = 0; i < buffer1.Size(); ++i) + { + if (buffer1.GetAt(i) != buffer2.GetAt(i)) + { + isSame = false; + break; + } + } + WINML_EXPECT_FALSE(isSame); +} + +static VideoFrame CreateVideoFrame(const wchar_t* path) +{ + auto imagefile = StorageFile::GetFileFromPathAsync(path).get(); + auto stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + auto decoder = BitmapDecoder::CreateAsync(stream).get(); + auto softwareBitmap = decoder.GetSoftwareBitmapAsync().get(); + return VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); +} + +static void VerifyOutputAfterImageBindCalledTwice() +{ + std::wstring fullModelPath = FileHelpers::GetModulePath() + L"model.onnx"; + std::wstring fullImagePath1 = FileHelpers::GetModulePath() + L"kitten_224.png"; + std::wstring fullImagePath2 = FileHelpers::GetModulePath() + L"fish.png"; + + // winml model creation + LearningModel model = nullptr; + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromFilePath(fullModelPath)); + LearningModelSession modelSession = nullptr; + WINML_EXPECT_NO_THROW(modelSession = LearningModelSession(model, LearningModelDevice(LearningModelDeviceKind::Default))); + LearningModelBinding modelBinding(modelSession); + + // create the tensor for the actual output + auto output = TensorFloat::Create(); + modelBinding.Bind(L"softmaxout_1", output); + + // Bind image 1 and evaluate + auto frame = CreateVideoFrame(fullImagePath1.c_str()); + auto imageTensor = ImageFeatureValue::CreateFromVideoFrame(frame); + WINML_EXPECT_NO_THROW(modelBinding.Bind(L"data_0", imageTensor)); + WINML_EXPECT_NO_THROW(modelSession.Evaluate(modelBinding, L"")); + + // Store 1st result + auto outputVectorView1 = output.GetAsVectorView(); + + // Bind image 2 and evaluate + // In this scenario, the backing videoframe is updated, and the imagefeaturevalue is rebound. + // The expected result is that the videoframe will be re-tensorized at bind + auto frame2 = CreateVideoFrame(fullImagePath2.c_str()); + frame2.CopyToAsync(frame).get(); + WINML_EXPECT_NO_THROW(modelBinding.Bind(L"data_0", imageTensor)); + WINML_EXPECT_NO_THROW(modelSession.Evaluate(modelBinding, L"")); + + // Store 2nd result + auto outputVectorView2 = output.GetAsVectorView(); + + WINML_EXPECT_EQUAL(outputVectorView1.Size(), outputVectorView2.Size()); + bool isSame = true; + for (uint32_t i = 0; i < outputVectorView1.Size(); ++i) + { + if (outputVectorView1.GetAt(i) != outputVectorView2.GetAt(i)) + { + isSame = false; + break; + } + } + WINML_EXPECT_FALSE(isSame); +} + +const LearningModelBindingAPITestApi& getapi() { + static constexpr LearningModelBindingAPITestApi api = + { + LearningModelBindingAPITestSetup, + LearningModelBindingAPITestGpuSetup, + CpuSqueezeNet, + CpuSqueezeNetEmptyOutputs, + CpuSqueezeNetUnboundOutputs, + CpuSqueezeNetBindInputTensorAsInspectable, + CastMapInt64, + DictionaryVectorizerMapInt64, + DictionaryVectorizerMapString, + ZipMapInt64, + ZipMapInt64Unbound, + ZipMapString, + GpuSqueezeNet, + GpuSqueezeNetEmptyOutputs, + GpuSqueezeNetUnboundOutputs, + ImageBindingDimensions, + VerifyInvalidBindExceptions, + BindInvalidInputName, + VerifyOutputAfterEvaluateAsyncCalledTwice, + VerifyOutputAfterImageBindCalledTwice + }; + return api; +} diff --git a/winml/test/api/LearningModelBindingAPITest.h b/winml/test/api/LearningModelBindingAPITest.h new file mode 100644 index 0000000000000..06128c6f5d474 --- /dev/null +++ b/winml/test/api/LearningModelBindingAPITest.h @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test.h" + +struct LearningModelBindingAPITestApi { + SetupTest LearningModelBindingAPITestSetup; + SetupTest LearningModelBindingAPITestGpuSetup; + VoidTest CpuSqueezeNet; + VoidTest CpuSqueezeNetEmptyOutputs; + VoidTest CpuSqueezeNetUnboundOutputs; + VoidTest CpuSqueezeNetBindInputTensorAsInspectable; + VoidTest CastMapInt64; + VoidTest DictionaryVectorizerMapInt64; + VoidTest DictionaryVectorizerMapString; + VoidTest ZipMapInt64; + VoidTest ZipMapInt64Unbound; + VoidTest ZipMapString; + VoidTest GpuSqueezeNet; + VoidTest GpuSqueezeNetEmptyOutputs; + VoidTest GpuSqueezeNetUnboundOutputs; + VoidTest ImageBindingDimensions; + VoidTest VerifyInvalidBindExceptions; + VoidTest BindInvalidInputName; + VoidTest VerifyOutputAfterEvaluateAsyncCalledTwice; + VoidTest VerifyOutputAfterImageBindCalledTwice; +}; +const LearningModelBindingAPITestApi& getapi(); + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelBindingAPITest, LearningModelBindingAPITestSetup) +WINML_TEST(LearningModelBindingAPITest, CpuSqueezeNet) +WINML_TEST(LearningModelBindingAPITest, CpuSqueezeNetEmptyOutputs) +WINML_TEST(LearningModelBindingAPITest, CpuSqueezeNetUnboundOutputs) +WINML_TEST(LearningModelBindingAPITest, CpuSqueezeNetBindInputTensorAsInspectable) +WINML_TEST(LearningModelBindingAPITest, CastMapInt64) +WINML_TEST(LearningModelBindingAPITest, DictionaryVectorizerMapInt64) +WINML_TEST(LearningModelBindingAPITest, DictionaryVectorizerMapString) +WINML_TEST(LearningModelBindingAPITest, ZipMapInt64) +WINML_TEST(LearningModelBindingAPITest, ZipMapInt64Unbound) +WINML_TEST(LearningModelBindingAPITest, ZipMapString) +WINML_TEST(LearningModelBindingAPITest, VerifyOutputAfterEvaluateAsyncCalledTwice) +WINML_TEST(LearningModelBindingAPITest, VerifyOutputAfterImageBindCalledTwice) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelBindingAPITestGpu, LearningModelBindingAPITestGpuSetup) +WINML_TEST(LearningModelBindingAPITestGpu, GpuSqueezeNet) +WINML_TEST(LearningModelBindingAPITestGpu, GpuSqueezeNetEmptyOutputs) +WINML_TEST(LearningModelBindingAPITestGpu, GpuSqueezeNetUnboundOutputs) +WINML_TEST(LearningModelBindingAPITestGpu, ImageBindingDimensions) +WINML_TEST(LearningModelBindingAPITestGpu, VerifyInvalidBindExceptions) +WINML_TEST(LearningModelBindingAPITestGpu, BindInvalidInputName) +WINML_TEST_CLASS_END() \ No newline at end of file diff --git a/winml/test/api/LearningModelSessionAPITest.cpp b/winml/test/api/LearningModelSessionAPITest.cpp new file mode 100644 index 0000000000000..f60ae6503d068 --- /dev/null +++ b/winml/test/api/LearningModelSessionAPITest.cpp @@ -0,0 +1,423 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "testPch.h" + +#include "APITest.h" +#include "CommonDeviceHelpers.h" +#include "LearningModelSessionAPITest.h" +#include "protobufHelpers.h" +#include "winrt/Windows.Storage.h" + +#include +#include +#include "Psapi.h" + +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; + +using winrt::Windows::Foundation::IPropertyValue; + +static void LearningModelSessionAPITestSetup() { + init_apartment(); +} + +static void LearningModelSessionAPITestGpuSetup() { + GPUTEST; + init_apartment(); +} + +static void LearningModelSessionAPITestsSkipEdgeCoreSetup() { + LearningModelSessionAPITestGpuSetup(); + SKIP_EDGECORE +} + +static void CreateSessionDeviceDefault() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::Default)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); +} + +static void CreateSessionDeviceCpu() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::Cpu)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); + // for the CPU device, make sure that we get back NULL and 0 for any device properties + WINML_EXPECT_EQUAL(learningModelDevice.Direct3D11Device(), nullptr); + LARGE_INTEGER id; + id.QuadPart = APITest::GetAdapterIdQuadPart(learningModelDevice); + WINML_EXPECT_EQUAL(id.LowPart, static_cast(0)); + WINML_EXPECT_EQUAL(id.HighPart, 0); +} + +static void CreateSessionWithModelLoadedFromStream() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + std::wstring path = FileHelpers::GetModulePath() + L"model.onnx"; + auto storageFile = winrt::Windows::Storage::StorageFile::GetFileFromPathAsync(path).get(); + + WINML_EXPECT_NO_THROW(learningModel = LearningModel::LoadFromStream(storageFile)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::Default)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); +} + +static void CreateSessionDeviceDirectX() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectX)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); +} + +static void CreateSessionDeviceDirectXHighPerformance() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); +} + +static void CreateSessionDeviceDirectXMinimumPower() +{ + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + WINML_EXPECT_NO_THROW(learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectXMinPower)); + WINML_EXPECT_NO_THROW(LearningModelSession(learningModel, learningModelDevice)); +} + +static void AdapterIdAndDevice() { + LearningModel learningModel = nullptr; + LearningModelDevice learningModelDevice = nullptr; + LearningModelSession learningModelSession = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + + com_ptr factory; + WINML_EXPECT_HRESULT_SUCCEEDED(CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void())); + com_ptr adapter; + + learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectX); + WINML_EXPECT_HRESULT_SUCCEEDED(factory->EnumAdapters(0, adapter.put())); + DXGI_ADAPTER_DESC desc; + WINML_EXPECT_HRESULT_SUCCEEDED(adapter->GetDesc(&desc)); + LARGE_INTEGER id; + id.QuadPart = APITest::GetAdapterIdQuadPart(learningModelDevice); + WINML_EXPECT_EQUAL(desc.AdapterLuid.LowPart, id.LowPart); + WINML_EXPECT_EQUAL(desc.AdapterLuid.HighPart, id.HighPart); + WINML_EXPECT_TRUE(learningModelDevice.Direct3D11Device() != nullptr); + + learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance); + adapter = nullptr; + WINML_EXPECT_HRESULT_SUCCEEDED(factory->EnumAdapterByGpuPreference(0, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, __uuidof(IDXGIAdapter), adapter.put_void())); + WINML_EXPECT_HRESULT_SUCCEEDED(adapter->GetDesc(&desc)); + id.QuadPart = APITest::GetAdapterIdQuadPart(learningModelDevice); + WINML_EXPECT_EQUAL(desc.AdapterLuid.LowPart, id.LowPart); + WINML_EXPECT_EQUAL(desc.AdapterLuid.HighPart, id.HighPart); + WINML_EXPECT_TRUE(learningModelDevice.Direct3D11Device() != nullptr); + + adapter = nullptr; + learningModelDevice = LearningModelDevice(LearningModelDeviceKind::DirectXMinPower); + WINML_EXPECT_HRESULT_SUCCEEDED(factory->EnumAdapterByGpuPreference(0, DXGI_GPU_PREFERENCE_MINIMUM_POWER, __uuidof(IDXGIAdapter), adapter.put_void())); + WINML_EXPECT_HRESULT_SUCCEEDED(adapter->GetDesc(&desc)); + id.QuadPart = APITest::GetAdapterIdQuadPart(learningModelDevice); + WINML_EXPECT_EQUAL(desc.AdapterLuid.LowPart, id.LowPart); + WINML_EXPECT_EQUAL(desc.AdapterLuid.HighPart, id.HighPart); + WINML_EXPECT_TRUE(learningModelDevice.Direct3D11Device() != nullptr); + + WINML_EXPECT_NO_THROW(learningModelSession = LearningModelSession(learningModel, learningModelDevice)); + WINML_EXPECT_EQUAL(learningModelSession.Device().AdapterId(), learningModelDevice.AdapterId()); +} + +static void EvaluateFeatures() +{ + std::vector shape = { 4 }; + std::vector data = { L"one", L"two", L"three", L"four" }; + + // create from buffer + auto tensor = TensorString::CreateFromArray(shape, data); + WINML_EXPECT_EQUAL(tensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(tensor.GetAsVectorView()))); + + // create from vector view + auto dataCopy = data; + tensor = TensorString::CreateFromIterable( + shape, winrt::single_threaded_vector(std::move(dataCopy)).GetView()); + WINML_EXPECT_EQUAL(tensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(tensor.GetAsVectorView()))); + + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"id-tensor-string.onnx", learningModel)); + LearningModelSession session(learningModel); + + auto outputTensor = TensorString::Create(); + + std::map featuresstandardmap; + featuresstandardmap[L"X"] = tensor; + featuresstandardmap[L"Y"] = outputTensor; + auto featureswinrtmap = winrt::single_threaded_map(std::move(featuresstandardmap)); + session.EvaluateFeatures(featureswinrtmap, L"0"); + + // verify identity model round-trip works + WINML_EXPECT_EQUAL(outputTensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(outputTensor.GetAsVectorView()))); +} + +static void EvaluateFeaturesAsync() +{ + std::vector shape = { 4 }; + std::vector data = { L"one", L"two", L"three", L"four" }; + + // create from buffer + auto tensor = TensorString::CreateFromArray(shape, data); + WINML_EXPECT_EQUAL(tensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(tensor.GetAsVectorView()))); + + // create from vector view + auto dataCopy = data; + tensor = TensorString::CreateFromIterable( + shape, winrt::single_threaded_vector(std::move(dataCopy)).GetView()); + WINML_EXPECT_EQUAL(tensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(tensor.GetAsVectorView()))); + + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"id-tensor-string.onnx", learningModel)); + LearningModelSession session(learningModel); + + auto outputTensor = TensorString::Create(shape); + + std::map featuresstandardmap; + featuresstandardmap[L"X"] = tensor; + featuresstandardmap[L"Y"] = outputTensor; + auto featureswinrtmap = winrt::single_threaded_map(std::move(featuresstandardmap)); + session.EvaluateFeaturesAsync(featureswinrtmap, L"0").get(); + + // verify identity model round-trip works + WINML_EXPECT_EQUAL(outputTensor.GetAsVectorView().Size(), data.size()); + WINML_EXPECT_TRUE(std::equal(data.cbegin(), data.cend(), begin(outputTensor.GetAsVectorView()))); +} + +static void EvaluationProperties() +{ + // load a model + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + // create a session + LearningModelSession learningModelSession = nullptr; + learningModelSession = LearningModelSession(learningModel); + // set a property + auto value = winrt::Windows::Foundation::PropertyValue::CreateBoolean(true); + learningModelSession.EvaluationProperties().Insert(L"propName1", value); + // get the property and make sure it's there with the right value + auto value2 = learningModelSession.EvaluationProperties().Lookup(L"propName1"); + WINML_EXPECT_EQUAL(value2.as().GetBoolean(), true); +} + +static LearningModelSession CreateSession(LearningModel model) +{ + LearningModelDevice device(nullptr); + WINML_EXPECT_NO_THROW(device = LearningModelDevice(LearningModelDeviceKind::DirectX)); + + LearningModelSession session(nullptr); + if (CommonDeviceHelpers::IsFloat16Supported(device)) + { + WINML_EXPECT_NO_THROW(session = LearningModelSession(model, device)); + } + else + { + WINML_EXPECT_THROW_SPECIFIC( + session = LearningModelSession(model, device), + winrt::hresult_error, + [](const winrt::hresult_error& e) -> bool + { + return e.code() == DXGI_ERROR_UNSUPPORTED; + }); + } + + return session; +} + +static void CreateSessionWithCastToFloat16InModel() +{ + // load a model + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"fp16-truncate-with-cast.onnx", learningModel)); + + CreateSession(learningModel); +} + +static void DISABLED_CreateSessionWithFloat16InitializersInModel() +{ + // Disabled due to https://microsoft.visualstudio.com/DefaultCollection/OS/_workitems/edit/21624720: + // Model fails to resolve due to ORT using incorrect IR version within partition + + // load a model + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"fp16-initializer.onnx", learningModel)); + + CreateSession(learningModel); +} + +static void EvaluateSessionAndCloseModelHelper( + LearningModelDeviceKind kind, + bool close_model_on_session_creation) +{ + auto shape = std::vector{ 1, 1000 }; + + auto model = ProtobufHelpers::CreateModel(TensorKind::Float, shape, 1000); + + auto device = LearningModelDevice(kind); + auto options = LearningModelSessionOptions(); + + // close the model on session creation + options.CloseModelOnSessionCreation(close_model_on_session_creation); + + // ensure you can create a session from the model + LearningModelSession session(nullptr); + + WINML_EXPECT_NO_THROW(session = LearningModelSession(model, device, options)); + + std::vector input(1000); + std::iota(std::begin(input), std::end(input), 0.0f); + auto tensor_input = TensorFloat::CreateFromShapeArrayAndDataArray(shape, input); + auto binding = LearningModelBinding(session); + binding.Bind(L"input", tensor_input); + + LearningModelEvaluationResult result(nullptr); + WINML_EXPECT_NO_THROW(result = session.Evaluate(binding, L"")); + + if (close_model_on_session_creation) + { + // ensure that the model has been closed + WINML_EXPECT_THROW_SPECIFIC( + LearningModelSession(model, device, options), + winrt::hresult_error, + [](const winrt::hresult_error& e) -> bool + { + return e.code() == E_INVALIDARG; + }); + } + else + { + WINML_EXPECT_NO_THROW(LearningModelSession(model, device, options)); + } +} + +static void EvaluateSessionAndCloseModel() +{ + WINML_EXPECT_NO_THROW(::EvaluateSessionAndCloseModelHelper(LearningModelDeviceKind::Cpu, true)); + WINML_EXPECT_NO_THROW(::EvaluateSessionAndCloseModelHelper(LearningModelDeviceKind::Cpu, false)); +} + +static void CloseSession() +{ + LearningModel learningModel = nullptr; + WINML_EXPECT_NO_THROW(APITest::LoadModel(L"model.onnx", learningModel)); + LearningModelSession session = nullptr; + + /* + HANDLE currentProcessHandle = NULL; + try + { + currentProcessHandle = GetCurrentProcess(); + } + catch (...) + { + VERIFY_FAIL(L"Failed to get current process handle."); + } + PROCESS_MEMORY_COUNTERS pmc = { 0 }; + SIZE_T beforeSessionCloseWorkingSetSize = 0; + SIZE_T afterSessionCloseWorkingSetSize = 0; + bool getProcessMemoryInfoSuccess = false; + */ + WINML_EXPECT_NO_THROW(session = LearningModelSession(learningModel)); + + /* + // Get the current process memory info after session creation. + getProcessMemoryInfoSuccess = GetProcessMemoryInfo(currentProcessHandle, &pmc, sizeof(pmc)); + if (!getProcessMemoryInfoSuccess) + { + VERIFY_FAIL(L"Failed to get current process memory info."); + } + beforeSessionCloseWorkingSetSize = pmc.WorkingSetSize; + pmc = { 0 }; + */ + WINML_EXPECT_NO_THROW(session.Close()); + + /* + Bug 23659026: Working set difference tolerance is too tight for LearningModelSessionAPITests::CloseSession + https://microsoft.visualstudio.com/OS/_workitems/edit/23659026 + + // Check that working set size has dropped after session close + getProcessMemoryInfoSuccess = GetProcessMemoryInfo(currentProcessHandle, &pmc, sizeof(pmc)); + if (!getProcessMemoryInfoSuccess) + { + VERIFY_FAIL(L"Failed to get current process memory info."); + } + afterSessionCloseWorkingSetSize = pmc.WorkingSetSize; + pmc = { 0 }; + + // expected working set difference of session close. It is approximately 2x the size of the weights of model.onnx + // there needs to be a tolerance because the working set difference varies from run to run. + + // Bug 23739697: Closing Session API in LearningModelSessionAPITests::CloseSession doesn't always result in ~2x working set memory reduction. + // https://microsoft.visualstudio.com/OS/_workitems/edit/23739697 + float tolerance = 0.4f; + int64_t expectedWorkingSetDifference = 9662464; + VERIFY_IS_LESS_THAN(expectedWorkingSetDifference - (beforeSessionCloseWorkingSetSize - afterSessionCloseWorkingSetSize), expectedWorkingSetDifference * tolerance); + */ + + // verify that model still has metadata info after session close + std::wstring author(learningModel.Author()); + WINML_EXPECT_EQUAL(author, L"onnx-caffe2"); + + // verify that session throws RO_E_CLOSED error + std::vector input(1 * 3 * 224 * 224, 0); + std::vector shape = { 1, 3, 224, 224 }; + auto tensor_input = TensorFloat::CreateFromShapeArrayAndDataArray(shape, input); + WINML_EXPECT_THROW_SPECIFIC(LearningModelBinding binding(session), + winrt::hresult_error, + [](const winrt::hresult_error &e) -> bool + { + return e.code() == RO_E_CLOSED; + }); + } + +const LearningModelSesssionAPITestApi& getapi() { + static constexpr LearningModelSesssionAPITestApi api = + { + LearningModelSessionAPITestSetup, + LearningModelSessionAPITestGpuSetup, + LearningModelSessionAPITestsSkipEdgeCoreSetup, + CreateSessionDeviceDefault, + CreateSessionDeviceCpu, + CreateSessionWithModelLoadedFromStream, + CreateSessionDeviceDirectX, + CreateSessionDeviceDirectXHighPerformance, + CreateSessionDeviceDirectXMinimumPower, + AdapterIdAndDevice, + EvaluateFeatures, + EvaluateFeaturesAsync, + EvaluationProperties, + CreateSessionWithCastToFloat16InModel, + DISABLED_CreateSessionWithFloat16InitializersInModel, + EvaluateSessionAndCloseModel, + CloseSession, + }; + return api; +} diff --git a/winml/test/api/LearningModelSessionAPITest.h b/winml/test/api/LearningModelSessionAPITest.h new file mode 100644 index 0000000000000..618fab0ea7628 --- /dev/null +++ b/winml/test/api/LearningModelSessionAPITest.h @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test.h" + +struct LearningModelSesssionAPITestApi { + SetupTest LearningModelSessionAPITestSetup; + SetupTest LearningModelSessionAPITestGpuSetup; + SetupTest LearningModelSessionAPITestsSkipEdgeCoreSetup; + VoidTest CreateSessionDeviceDefault; + VoidTest CreateSessionDeviceCpu; + VoidTest CreateSessionWithModelLoadedFromStream; + VoidTest CreateSessionDeviceDirectX; + VoidTest CreateSessionDeviceDirectXHighPerformance; + VoidTest CreateSessionDeviceDirectXMinimumPower; + VoidTest AdapterIdAndDevice; + VoidTest EvaluateFeatures; + VoidTest EvaluateFeaturesAsync; + VoidTest EvaluationProperties; + VoidTest CreateSessionWithCastToFloat16InModel; + VoidTest DISABLED_CreateSessionWithFloat16InitializersInModel; + VoidTest EvaluateSessionAndCloseModel; + VoidTest CloseSession; +}; +const LearningModelSesssionAPITestApi& getapi(); + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelSessionAPITest, LearningModelSessionAPITestSetup) +WINML_TEST(LearningModelSessionAPITest, CreateSessionDeviceDefault) +WINML_TEST(LearningModelSessionAPITest,CreateSessionDeviceCpu) +WINML_TEST(LearningModelSessionAPITest,CreateSessionWithModelLoadedFromStream) +WINML_TEST(LearningModelSessionAPITest,EvaluateFeatures) +WINML_TEST(LearningModelSessionAPITest,EvaluateFeaturesAsync) +WINML_TEST(LearningModelSessionAPITest,EvaluationProperties) +WINML_TEST(LearningModelSessionAPITest,EvaluateSessionAndCloseModel) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelSessionAPITestGpu, LearningModelSessionAPITestGpuSetup) +WINML_TEST(LearningModelSessionAPITestGpu, CreateSessionDeviceDirectX) +WINML_TEST(LearningModelSessionAPITestGpu, CreateSessionDeviceDirectXHighPerformance) +WINML_TEST(LearningModelSessionAPITestGpu, CreateSessionDeviceDirectXMinimumPower) +WINML_TEST(LearningModelSessionAPITestGpu, CreateSessionWithCastToFloat16InModel) +WINML_TEST(LearningModelSessionAPITestGpu, DISABLED_CreateSessionWithFloat16InitializersInModel) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(LearningModelSessionAPITestsSkipEdgeCore, LearningModelSessionAPITestsSkipEdgeCoreSetup) +WINML_TEST(LearningModelSessionAPITestsSkipEdgeCore, AdapterIdAndDevice) +WINML_TEST_CLASS_END() \ No newline at end of file diff --git a/winml/test/api/models/fp16-initializer.onnx b/winml/test/api/models/fp16-initializer.onnx new file mode 100644 index 0000000000000..36e7e5e8e03ed --- /dev/null +++ b/winml/test/api/models/fp16-initializer.onnx @@ -0,0 +1,12 @@ +sheilk:Š +6 +fp16_initializercast_to_float_output"Cast* +to +! +X +cast_to_float_outputY"Add* +*Bfp16_initializerZ +X +b +Y +B \ No newline at end of file diff --git a/winml/test/api/models/fp16-truncate-with-cast.onnx b/winml/test/api/models/fp16-truncate-with-cast.onnx new file mode 100644 index 0000000000000..74b459db71ebd --- /dev/null +++ b/winml/test/api/models/fp16-truncate-with-cast.onnx @@ -0,0 +1,12 @@ +sheilk:r +, +Xcast_to_float16_output"Cast* +to +  +, +cast_to_float16_outputY"Cast* +to  Z +X +b +Y + B \ No newline at end of file diff --git a/winml/test/api/models/id-tensor-string.onnx b/winml/test/api/models/id-tensor-string.onnx new file mode 100644 index 0000000000000..3959b0d2f9868 --- /dev/null +++ b/winml/test/api/models/id-tensor-string.onnx @@ -0,0 +1,11 @@ +dwayner:> + +XYIdentity"IdentityZ +X + + +b +Y + + +B \ No newline at end of file diff --git a/winml/test/collateral/images/100x100.png b/winml/test/collateral/images/100x100.png new file mode 100644 index 0000000000000..ef222ed264816 Binary files /dev/null and b/winml/test/collateral/images/100x100.png differ diff --git a/winml/test/collateral/images/227x227.png b/winml/test/collateral/images/227x227.png new file mode 100644 index 0000000000000..facc9cbd6e433 Binary files /dev/null and b/winml/test/collateral/images/227x227.png differ diff --git a/winml/test/collateral/images/LICENSE.md b/winml/test/collateral/images/LICENSE.md new file mode 100644 index 0000000000000..dd8020a1e4c9e --- /dev/null +++ b/winml/test/collateral/images/LICENSE.md @@ -0,0 +1,6 @@ +# Licenses + +| Image | Source | License | +| ----------- | ---------- | ----------- | +| [fish_720](fish_720.png), [fish](fish.png) | https://commons.wikimedia.org/wiki/File:Tinca_tinca.jpeg | CC Attribution 3.0 Unported | +| [kitten_224](kitten_224.png) | https://clipart.info/42-cat-png-image-download-picture-kitten-8177 | CC BY 4.0 | diff --git a/winml/test/collateral/images/fish.png b/winml/test/collateral/images/fish.png new file mode 100644 index 0000000000000..5c083f805f0f0 Binary files /dev/null and b/winml/test/collateral/images/fish.png differ diff --git a/winml/test/collateral/images/fish_720.png b/winml/test/collateral/images/fish_720.png new file mode 100644 index 0000000000000..3d413cfc2afd5 Binary files /dev/null and b/winml/test/collateral/images/fish_720.png differ diff --git a/winml/test/collateral/images/kitten_224.png b/winml/test/collateral/images/kitten_224.png new file mode 100644 index 0000000000000..e47ca94863757 Binary files /dev/null and b/winml/test/collateral/images/kitten_224.png differ diff --git a/winml/test/collateral/models/Add_ImageNet1920.onnx b/winml/test/collateral/models/Add_ImageNet1920.onnx new file mode 100644 index 0000000000000..694f3e5c65839 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:Ä +/ + +input_39:0 + +input_40:0 add_3/add:0Add"Addkeras_Add_ImageNet_smallZ& + +input_39:0 + + + +¸ +€Z& + +input_40:0 + + + +¸ +€b' + add_3/add:0 + + + +¸ +€B \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_LINEAR_0_255.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_LINEAR_0_255.onnx new file mode 100644 index 0000000000000..3943966a5ba24 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_LINEAR_0_255.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:í +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b^ +add_3U +LH + +DATA_BATCH + DATA_CHANNEL +¸ DATA_FEATURE +€ DATA_FEATURE2IMAGEBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaLinearr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_1.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_1.onnx new file mode 100644 index 0000000000000..88fb241f32f04 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_1.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:í +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b^ +add_3U +LH + +DATA_BATCH + DATA_CHANNEL +¸ DATA_FEATURE +€ DATA_FEATURE2IMAGEBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaSRGBr) +Image.NominalPixelRangeNormalized_0_1 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_255.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_255.onnx new file mode 100644 index 0000000000000..11f1bf6178555 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_255.onnx @@ -0,0 +1,26 @@ + OnnxMLTools +0.1.0.0000"onnxml:· +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b( +add_3 + + + +¸ +€2IMAGEBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaSRGBr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_16_235.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_16_235.onnx new file mode 100644 index 0000000000000..6a0b5692ce94d --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_16_235.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:í +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b^ +add_3U +LH + +DATA_BATCH + DATA_CHANNEL +¸ DATA_FEATURE +€ DATA_FEATURE2IMAGEBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaSRGBr. +Image.NominalPixelRangeNominalRange_16_235 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx new file mode 100644 index 0000000000000..c3faef843f933 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:í +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b^ +add_3U +LH + +DATA_BATCH + DATA_CHANNEL +¸ DATA_FEATURE +€ DATA_FEATURE2IMAGEBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaSRGBr) +Image.NominalPixelRangeNormalized_1_1 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgra8_SRGB_0_255.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgra8_SRGB_0_255.onnx new file mode 100644 index 0000000000000..dfb06e76ce2e7 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataBgra8_SRGB_0_255.onnx @@ -0,0 +1,26 @@ + OnnxMLTools +0.1.0.0000"onnxml:· +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b( +add_3 + + + +¸ +€2IMAGEBr +Image.BitmapPixelFormatBgra8r +Image.ColorSpaceGammaSRGBr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgb8_SRGB_0_255.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgb8_SRGB_0_255.onnx new file mode 100644 index 0000000000000..7478cb96e57e4 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgb8_SRGB_0_255.onnx @@ -0,0 +1,26 @@ + OnnxMLTools +0.1.0.0000"onnxml:· +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b( +add_3 + + + +¸ +€2IMAGEBr +Image.BitmapPixelFormatRgb8r +Image.ColorSpaceGammaSRGBr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgba8_SRGB_0_255.onnx b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgba8_SRGB_0_255.onnx new file mode 100644 index 0000000000000..cd325f227feb1 --- /dev/null +++ b/winml/test/collateral/models/Add_ImageNet1920WithImageMetadataRgba8_SRGB_0_255.onnx @@ -0,0 +1,26 @@ + OnnxMLTools +0.1.0.0000"onnxml:· +% +input_39 +input_40add_3Add"Addkeras_Add_ImageNet_smallZ$ +input_39 + + + +¸ +€Z$ +input_40 + + + +¸ +€b( +add_3 + + + +¸ +€2IMAGEBr +Image.BitmapPixelFormatRgba8r +Image.ColorSpaceGammaSRGBr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/LICENSE.md b/winml/test/collateral/models/LICENSE.md new file mode 100644 index 0000000000000..f6b9ebbecc768 --- /dev/null +++ b/winml/test/collateral/models/LICENSE.md @@ -0,0 +1,65 @@ +# Licenses + +| Model | Source | License | +| ----------- | ---------- | ----------- | +| SqueezeNet | https://github.com/DeepScale/SqueezeNet | BSD 2-Clause | +| Starry Night | https://github.com/pytorch/examples/tree/master/fast_neural_style | BSD 3-Clause | + +SqueezeNet license: + +``` +BSD LICENSE. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions +and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +and the following disclaimer in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` + +Starry Night license: + +``` +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` diff --git a/winml/test/collateral/models/ModelSubdirectory/ModelInSubdirectory.onnx b/winml/test/collateral/models/ModelSubdirectory/ModelInSubdirectory.onnx new file mode 100644 index 0000000000000..64e447620ca5c Binary files /dev/null and b/winml/test/collateral/models/ModelSubdirectory/ModelInSubdirectory.onnx differ diff --git a/winml/test/collateral/models/bad_names.onnx b/winml/test/collateral/models/bad_names.onnx new file mode 100644 index 0000000000000..5a0645c0d1146 --- /dev/null +++ b/winml/test/collateral/models/bad_names.onnx @@ -0,0 +1,13 @@ +kiyoung:^ +* + +input/nameoutput:0Identity"IdentityZ + +input/name + + +b +output:0 + + + \ No newline at end of file diff --git a/winml/test/collateral/models/castmap-int64.onnx b/winml/test/collateral/models/castmap-int64.onnx new file mode 100644 index 0000000000000..e80ecfa15e38b --- /dev/null +++ b/winml/test/collateral/models/castmap-int64.onnx @@ -0,0 +1,12 @@ +dwayner:N +$ +XYCastMap"CastMap: +ai.onnx.mlZ +X* + + +b +Y + + +B \ No newline at end of file diff --git a/winml/test/collateral/models/conv-float.onnx b/winml/test/collateral/models/conv-float.onnx new file mode 100644 index 0000000000000..76206b93c1631 Binary files /dev/null and b/winml/test/collateral/models/conv-float.onnx differ diff --git a/winml/test/collateral/models/dictvectorizer-int64.onnx b/winml/test/collateral/models/dictvectorizer-int64.onnx new file mode 100644 index 0000000000000..063580f747776 Binary files /dev/null and b/winml/test/collateral/models/dictvectorizer-int64.onnx differ diff --git a/winml/test/collateral/models/dictvectorizer-string.onnx b/winml/test/collateral/models/dictvectorizer-string.onnx new file mode 100644 index 0000000000000..389ec8dee78be Binary files /dev/null and b/winml/test/collateral/models/dictvectorizer-string.onnx differ diff --git a/winml/test/collateral/models/foo.onnx b/winml/test/collateral/models/foo.onnx new file mode 100644 index 0000000000000..f786e4ca75444 Binary files /dev/null and b/winml/test/collateral/models/foo.onnx differ diff --git a/winml/test/collateral/models/foo_truncated.onnx b/winml/test/collateral/models/foo_truncated.onnx new file mode 100644 index 0000000000000..6eae1c6f9eb72 Binary files /dev/null and b/winml/test/collateral/models/foo_truncated.onnx differ diff --git a/winml/test/collateral/models/free_dimensional_imageDes.onnx b/winml/test/collateral/models/free_dimensional_imageDes.onnx new file mode 100644 index 0000000000000..87687688f9b40 --- /dev/null +++ b/winml/test/collateral/models/free_dimensional_imageDes.onnx @@ -0,0 +1,33 @@ + OnnxMLTools +0.1.0.0000"onnxml:¬ +/ + +input_39:0 + +input_40:0 add_3/add:0Add"Addkeras_Add_ImageNet_smallZs + +input_39:0e +\X + +DATA_BATCH + DATA_CHANNEL +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE2IMAGEZs + +input_40:0e +\X + +DATA_BATCH + DATA_CHANNEL +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE2IMAGEbu + add_3/add:0f +\X + +DATA_BATCH + DATA_CHANNEL +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE +ÿÿÿÿÿÿÿÿÿ DATA_FEATURE2TENSORBr +Image.BitmapPixelFormatBgr8r +Image.ColorSpaceGammaSRGBr- +Image.NominalPixelRangeNominalRange_0_255 \ No newline at end of file diff --git a/winml/test/collateral/models/free_dimensional_image_input.onnx b/winml/test/collateral/models/free_dimensional_image_input.onnx new file mode 100644 index 0000000000000..6750f70262789 --- /dev/null +++ b/winml/test/collateral/models/free_dimensional_image_input.onnx @@ -0,0 +1,27 @@ + OnnxMLTools +0.1.0.0000"onnxml:ô +/ + +input_39:0 + +input_40:0 add_3/add:0Add"Addkeras_Add_ImageNet_smallZ6 + +input_39:0( +&" + + + ÿÿÿÿÿÿÿÿÿ + ÿÿÿÿÿÿÿÿÿZ6 + +input_40:0( +&" + + + ÿÿÿÿÿÿÿÿÿ + ÿÿÿÿÿÿÿÿÿb7 + add_3/add:0( +&" + + + ÿÿÿÿÿÿÿÿÿ + ÿÿÿÿÿÿÿÿÿB \ No newline at end of file diff --git a/winml/test/collateral/models/mnist.onnx b/winml/test/collateral/models/mnist.onnx new file mode 100644 index 0000000000000..bb189a52ea6ed Binary files /dev/null and b/winml/test/collateral/models/mnist.onnx differ diff --git a/winml/test/collateral/models/modelWith2MetaData.onnx b/winml/test/collateral/models/modelWith2MetaData.onnx new file mode 100644 index 0000000000000..bd6dfdc0c385d Binary files /dev/null and b/winml/test/collateral/models/modelWith2MetaData.onnx differ diff --git a/winml/test/collateral/models/modelWithMetaData.onnx b/winml/test/collateral/models/modelWithMetaData.onnx new file mode 100644 index 0000000000000..f8d7e1613d671 Binary files /dev/null and b/winml/test/collateral/models/modelWithMetaData.onnx differ diff --git a/winml/test/collateral/models/mul.onnx b/winml/test/collateral/models/mul.onnx new file mode 100644 index 0000000000000..ad295112ebafc Binary files /dev/null and b/winml/test/collateral/models/mul.onnx differ diff --git a/winml/test/collateral/models/relu.onnx b/winml/test/collateral/models/relu.onnx new file mode 100644 index 0000000000000..d9f25e904b886 --- /dev/null +++ b/winml/test/collateral/models/relu.onnx @@ -0,0 +1,11 @@ +justoeck:0 + +XY"ReluZ +X + + +b +Y + + +B \ No newline at end of file diff --git a/winml/test/collateral/models/squeezenet_modifiedforruntimestests.onnx b/winml/test/collateral/models/squeezenet_modifiedforruntimestests.onnx new file mode 100644 index 0000000000000..64e447620ca5c Binary files /dev/null and b/winml/test/collateral/models/squeezenet_modifiedforruntimestests.onnx differ diff --git a/winml/test/collateral/models/squeezenet_tensor_input.onnx b/winml/test/collateral/models/squeezenet_tensor_input.onnx new file mode 100644 index 0000000000000..e14d8502a8171 Binary files /dev/null and b/winml/test/collateral/models/squeezenet_tensor_input.onnx differ diff --git a/winml/test/collateral/models/starry-night-fp16.onnx b/winml/test/collateral/models/starry-night-fp16.onnx new file mode 100644 index 0000000000000..9a002019055b5 Binary files /dev/null and b/winml/test/collateral/models/starry-night-fp16.onnx differ diff --git a/winml/test/collateral/models/zipmap-int64.onnx b/winml/test/collateral/models/zipmap-int64.onnx new file mode 100644 index 0000000000000..9884705603685 Binary files /dev/null and b/winml/test/collateral/models/zipmap-int64.onnx differ diff --git a/winml/test/collateral/models/zipmap-string.onnx b/winml/test/collateral/models/zipmap-string.onnx new file mode 100644 index 0000000000000..c29d5a72f4cab Binary files /dev/null and b/winml/test/collateral/models/zipmap-string.onnx differ diff --git a/winml/test/common/SqueezeNetValidator.cpp b/winml/test/common/SqueezeNetValidator.cpp new file mode 100644 index 0000000000000..8beec6f48ed46 --- /dev/null +++ b/winml/test/common/SqueezeNetValidator.cpp @@ -0,0 +1,292 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "SqueezeNetValidator.h" +#include "protobufHelpers.h" +#include "fileHelpers.h" +#include "core/common/common.h" +#include +#include +#include +#include +#include +// using namespace winrt::Windows::Foundation; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Storage; +using namespace winrt::Windows::Storage::Streams; + +namespace WinML::Engine::Test{ + +#define MAX_PROFILING_LOOP 100 + + +static void BindImage( + LearningModelBinding binding, + const wchar_t* name, + const wchar_t* fullImagePath, + bool bindAsInspectable = false) +{ + auto imagefile = StorageFile::GetFileFromPathAsync(fullImagePath).get(); + auto stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + auto decoder = BitmapDecoder::CreateAsync(stream).get(); + auto softwareBitmap = decoder.GetSoftwareBitmapAsync().get(); + auto frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + + if (bindAsInspectable) + { + binding.Bind(name, frame); + } + else + { + auto imagetensor = ImageFeatureValue::CreateFromVideoFrame(frame); + binding.Bind(name, imagetensor); + } +} + +static void BindTensor( + LearningModelBinding binding, + const wchar_t* name, + ITensor inputTensor, + bool bindAsInspectable = false) +{ + if (inputTensor == nullptr) + { + throw winrt::hresult_invalid_argument(L"input tensor provided to squeezenet is null."); + } + + if (bindAsInspectable) + { + binding.Bind(name, inputTensor.as().GetAsVectorView()); + } + else + { + binding.Bind(name, inputTensor); + } +} + +template +ITensor BindOutput( + OutputBindingStrategy strategy, + LearningModelBinding binding, + const wchar_t* name, + const IVectorView shape = nullptr +) +{ + ITensor outputTensor = nullptr; + switch (strategy) + { + case OutputBindingStrategy::Bound: + outputTensor = T::Create(shape); + binding.Bind(name, outputTensor); + break; + case OutputBindingStrategy::Empty: + outputTensor = T::Create(); + binding.Bind(name, outputTensor); + break; + case OutputBindingStrategy::Unbound: + __fallthrough; + default: + break; + } + + return outputTensor; +} + +ImageFeatureValue BindImageOutput( + OutputBindingStrategy strategy, + LearningModelBinding binding, + const wchar_t* name +) +{ + ImageFeatureValue outputTensor = nullptr; + switch (strategy) + { + case OutputBindingStrategy::Bound: + { + SoftwareBitmap bitmap(BitmapPixelFormat::Bgra8, 720, 720); + VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(bitmap); + outputTensor = ImageFeatureValue::CreateFromVideoFrame(frame); + binding.Bind(name, outputTensor); + break; + } + case OutputBindingStrategy::Unbound: + __fallthrough; + } + + return outputTensor; +} + + +void ModelValidator::FnsCandy16( + std::string instance, + LearningModelDeviceKind deviceKind, + OutputBindingStrategy outputBindingStrategy, + bool bindInputsAsIInspectable, + float dataTolerance) +{ + ORT_UNUSED_PARAMETER(dataTolerance); + // file name strings + static wchar_t* modelFileName = L"winmlperf_coreml_FNS-Candy_prerelease_fp16.onnx"; + static wchar_t* inputDataImageFileName = L"fish_720.png"; + static wchar_t* outputDataFileName = L"output.png"; + static wchar_t* inputBindingName = L"inputImage"; + static const wchar_t* outputDataBindingName = L"outputImage"; + + auto modulePath = FileHelpers::GetModulePath(); + auto fullModelPath = modulePath + modelFileName; + auto outputFileName = modulePath + outputDataFileName; + + // WinML model creation + LearningModel model = nullptr; + model = LearningModel::LoadFromFilePath(fullModelPath); + + LearningModelSession modelSession = nullptr; + modelSession = LearningModelSession(model, LearningModelDevice(deviceKind)); + + LearningModelBinding modelBinding(modelSession); + auto fullImagePath = modulePath + inputDataImageFileName; + BindImage(modelBinding, inputBindingName, fullImagePath.c_str(), bindInputsAsIInspectable); + + // create the tensor for the actual output + auto output = model.OutputFeatures().First().Current(); + if (output.Kind() != LearningModelFeatureKind::Tensor) + { + throw winrt::hresult_invalid_argument(L"Model output kind is not type Tensor"); + } + + auto shape = winrt::single_threaded_vector(std::vector {1, 1}); + auto outputTensor = BindImageOutput(outputBindingStrategy, modelBinding, outputDataBindingName); + + // Evaluate the model + std::cout << "Calling EvaluateSync on instance" << instance << "\n"; + LearningModelEvaluationResult result = nullptr; + result = modelSession.Evaluate(modelBinding, {}); + + // Get results + if (outputBindingStrategy == OutputBindingStrategy::Unbound) + { + // When output binding strategy is unbound, the output tensor was not set on bind. + // Therefore, we need to retrieve it from the LearnignModelEvaluationResult + // TODO: is this right? outputTensorT is unused... + /*auto outputTensorT = */result.Outputs().Lookup(outputDataBindingName).as(); + } + else + { + if (result.Outputs().Lookup(outputDataBindingName) != outputTensor) + { + throw winrt::hresult_invalid_argument(L"Evaluation Results lookup don't match LearningModelBinding Output Tensor."); + } + + auto softwareBitmap = outputTensor.VideoFrame().SoftwareBitmap(); + + auto folder = StorageFolder::GetFolderFromPathAsync(modulePath.c_str()).get(); + auto imagefile = folder.CreateFileAsync(outputDataFileName, CreationCollisionOption::ReplaceExisting).get(); + auto stream = imagefile.OpenAsync(FileAccessMode::ReadWrite).get(); + auto encoder = BitmapEncoder::CreateAsync(BitmapEncoder::JpegEncoderId(), stream).get(); + encoder.SetSoftwareBitmap(softwareBitmap); + encoder.FlushAsync(); + + } +} + +void ModelValidator::SqueezeNet( + std::string instance, + LearningModelDeviceKind deviceKind, + float dataTolerance, + bool bindAsImage, + OutputBindingStrategy outputBindingStrategy, + bool bindInputsAsIInspectable) +{ + // file name strings + static wchar_t* modelFileName = L"model.onnx"; + static wchar_t* inputDataFileName = L"test_data_0_input.pb"; + static wchar_t* outputDataFileName = L"test_data_0_output.pb"; + static wchar_t* inputBindingName = L"data_0"; + static wchar_t* inputDataImageFileName = L"kitten_224.png"; + static const wchar_t* outputDataBindingName = L"softmaxout_1"; + + auto modulePath = FileHelpers::GetModulePath(); + auto fullModelPath = modulePath + modelFileName; + auto outputFileName = modulePath + outputDataFileName; + + // WinML model creation + LearningModel model = nullptr; + model = LearningModel::LoadFromFilePath(fullModelPath); + + LearningModelSession modelSession = nullptr; + modelSession = LearningModelSession(model, LearningModelDevice(deviceKind)); + + LearningModelBinding modelBinding(modelSession); + + if (bindAsImage) + { + std::wstring fullImagePath = modulePath + inputDataImageFileName; + BindImage(modelBinding, inputBindingName, fullImagePath.c_str(), bindInputsAsIInspectable); + } + else + { + auto inputDataPath = modulePath + inputDataFileName; + auto inputTensor = ProtobufHelpers::LoadTensorFromProtobufFile(inputDataPath, false); + BindTensor(modelBinding, inputBindingName, inputTensor, bindInputsAsIInspectable); + } + + // load up the expected output + auto expectedResultsTensor = ProtobufHelpers::LoadTensorFromProtobufFile(outputFileName, false); + if (expectedResultsTensor == nullptr) + { + throw winrt::hresult_invalid_argument(L"Expected Results from protobuf file are null."); + } + + // create the tensor for the actual output + auto output = model.OutputFeatures().First().Current(); + if (output.Kind() != LearningModelFeatureKind::Tensor) + { + throw winrt::hresult_invalid_argument(L"Expected output feature kind of model to be Tensor"); + } + + auto outputTensor = BindOutput( + outputBindingStrategy, modelBinding, outputDataBindingName, expectedResultsTensor.Shape()); + + // Evaluate the model + std::cout << "Calling EvaluateSync on instance" << instance << "\n"; + LearningModelEvaluationResult result = nullptr; + result = modelSession.Evaluate(modelBinding, {}); + + // Get results + if (outputBindingStrategy == OutputBindingStrategy::Unbound) + { + // When output binding strategy is unbound, the output tensor was not set on bind. + // Therefore, we need to retrieve it from the LearnignModelEvaluationResult + outputTensor = result.Outputs().Lookup(outputDataBindingName).as(); + } + else + { + if (result.Outputs().Lookup(outputDataBindingName) != outputTensor) + { + throw winrt::hresult_error(E_UNEXPECTED, L"Evaluation Results lookup don't match LearningModelBinding output tensor."); + } + } + + auto outDataExpected = expectedResultsTensor.as().GetAsVectorView(); + auto outDataActual = outputTensor.as().GetAsVectorView(); + + if (outDataActual.Size() != outDataExpected.Size()) + { + throw winrt::hresult_error(E_UNEXPECTED, L"Actual tensor data size doesn't match expected tensor data size."); + } + for (uint32_t i = 0; i < outDataActual.Size(); i++) + { + float delta = std::abs(outDataActual.GetAt(i) - outDataExpected.GetAt(i)); + if (delta > dataTolerance) + { + std::wstringstream ss; + ss << "EXPECTED: " << outDataExpected.GetAt(i) << " , ACTUAL: " << outDataActual.GetAt(i) + << "instance " << instance.c_str() << ", element " << i; + throw winrt::hresult_error(E_UNEXPECTED, ss.str()); + } + } +} +} diff --git a/winml/test/common/SqueezeNetValidator.h b/winml/test/common/SqueezeNetValidator.h new file mode 100644 index 0000000000000..ef8ad2cd00240 --- /dev/null +++ b/winml/test/common/SqueezeNetValidator.h @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "std.h" + +enum OutputBindingStrategy { Bound, Unbound, Empty }; + +namespace WinML::Engine::Test::ModelValidator +{ + void FnsCandy16( + std::string instance, + winrt::Windows::AI::MachineLearning::LearningModelDeviceKind deviceKind, + OutputBindingStrategy outputBindingStrategy, + bool bindInputsAsIInspectable, + float dataTolerance = false); + + void SqueezeNet( + std::string instance, + winrt::Windows::AI::MachineLearning::LearningModelDeviceKind deviceKind, + float dataTolerance, + bool bindAsImage = false, + OutputBindingStrategy outputBindingStrategy = OutputBindingStrategy::Bound, + bool bindInputsAsIInspectable = false + ); +} diff --git a/winml/test/common/dllload.cpp b/winml/test/common/dllload.cpp new file mode 100644 index 0000000000000..01de69da11e30 --- /dev/null +++ b/winml/test/common/dllload.cpp @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "Std.h" +#include "fileHelpers.h" +#include + +extern "C" +{ + HRESULT __stdcall OS_RoGetActivationFactory(HSTRING classId, GUID const& iid, void** factory) noexcept; +} + +#ifdef _M_IX86 +#pragma comment(linker, "/alternatename:_OS_RoGetActivationFactory@12=_RoGetActivationFactory@12") +#else +#pragma comment(linker, "/alternatename:OS_RoGetActivationFactory=RoGetActivationFactory") +#endif + +bool starts_with(std::wstring_view value, std::wstring_view match) noexcept +{ + return 0 == value.compare(0, match.size(), match); +} + +HRESULT __stdcall WINRT_RoGetActivationFactory(HSTRING classId_hstring, GUID const& iid, void** factory) noexcept +{ + *factory = nullptr; + std::wstring_view name{ WindowsGetStringRawBuffer(classId_hstring, nullptr), WindowsGetStringLen(classId_hstring) }; + HMODULE library{ nullptr }; + + std::wstring winmlDllPath = FileHelpers::GetWinMLPath() + L"Windows.AI.MachineLearning.dll"; + + if (starts_with(name, L"Windows.AI.MachineLearning.")) + { + const wchar_t* libPath = winmlDllPath.c_str(); + library = LoadLibraryW(libPath); + } + else + { + return OS_RoGetActivationFactory(classId_hstring, iid, factory); + } + + if (!library) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + using DllGetActivationFactory = HRESULT __stdcall(HSTRING classId, void** factory); + auto call = reinterpret_cast(GetProcAddress(library, "DllGetActivationFactory")); + + if (!call) + { + HRESULT const hr = HRESULT_FROM_WIN32(GetLastError()); + WINRT_VERIFY(FreeLibrary(library)); + return hr; + } + + winrt::com_ptr activation_factory; + HRESULT const hr = call(classId_hstring, activation_factory.put_void()); + + if (FAILED(hr)) + { + WINRT_VERIFY(FreeLibrary(library)); + return hr; + } + + if (winrt::guid(iid) != winrt::guid_of()) + { + return activation_factory->QueryInterface(iid, factory); + } + + *factory = activation_factory.detach(); + return S_OK; +} + +int32_t __stdcall WINRT_RoGetActivationFactory(void* classId, winrt::guid const& iid, void** factory) noexcept +{ + return WINRT_RoGetActivationFactory((HSTRING)classId, (GUID)iid, factory); +} diff --git a/winml/test/common/fileHelpers.cpp b/winml/test/common/fileHelpers.cpp new file mode 100644 index 0000000000000..3fa43b44715cc --- /dev/null +++ b/winml/test/common/fileHelpers.cpp @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "std.h" +#include "fileHelpers.h" +#include "winrt/Windows.Media.h" +#include "winrt/Windows.Storage.h" + +EXTERN_C IMAGE_DOS_HEADER __ImageBase; + +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Storage; + +namespace FileHelpers +{ + std::wstring GetModulePath() + { + std::wstring val; + wchar_t modulePath[MAX_PATH] = { 0 }; + GetModuleFileNameW((HINSTANCE)&__ImageBase, modulePath, _countof(modulePath)); + wchar_t drive[_MAX_DRIVE]; + wchar_t dir[_MAX_DIR]; + wchar_t filename[_MAX_FNAME]; + wchar_t ext[_MAX_EXT]; + _wsplitpath_s(modulePath, drive, _MAX_DRIVE, dir, _MAX_DIR, filename, _MAX_FNAME, ext, _MAX_EXT); + + val = drive; + val += dir; + + return val; + } + + std::wstring GetWinMLPath() + { + // bool inboxDll = false; + // TODO Add command line parsing + // if (SUCCEEDED(WEX::TestExecution::RuntimeParameters::TryGetValue(L"inbox", inboxDll)) && inboxDll) + // { + // return L""; + // } + return GetModulePath(); + } + + + winrt::Windows::Graphics::Imaging::SoftwareBitmap GetSoftwareBitmapFromFile(const std::wstring& filePath) + { + auto storageFile = StorageFile::GetFileFromPathAsync(filePath).get(); + auto stream = storageFile.OpenAsync(FileAccessMode::Read).get(); + auto decoder = BitmapDecoder::CreateAsync(stream).get(); + IBitmapFrameWithSoftwareBitmap bitmapFrameWithSoftwareBitmap; + decoder.as(bitmapFrameWithSoftwareBitmap); + auto softwareBitmap = bitmapFrameWithSoftwareBitmap.GetSoftwareBitmapAsync( + BitmapPixelFormat::Bgra8, + BitmapAlphaMode::Ignore, + BitmapTransform::BitmapTransform(), + ExifOrientationMode::IgnoreExifOrientation, + ColorManagementMode::DoNotColorManage + ).get(); + return softwareBitmap; + } + + ImageFeatureValue LoadImageFeatureValue(const std::wstring& imagePath) + { + auto softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(FileHelpers::GetModulePath() + imagePath); + auto videoFrame = winrt::Windows::Media::VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + return ImageFeatureValue::CreateFromVideoFrame(videoFrame); + } +} diff --git a/winml/test/common/fileHelpers.h b/winml/test/common/fileHelpers.h new file mode 100644 index 0000000000000..9426744914cb1 --- /dev/null +++ b/winml/test/common/fileHelpers.h @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "winrt/Windows.Graphics.Imaging.h" +#include "winrt/Windows.AI.MachineLearning.h" + +namespace FileHelpers +{ + std::wstring GetModulePath(); + std::wstring GetWinMLPath(); + + winrt::Windows::Graphics::Imaging::SoftwareBitmap GetSoftwareBitmapFromFile(const std::wstring& filePath); + winrt::Windows::AI::MachineLearning::ImageFeatureValue LoadImageFeatureValue(const std::wstring& imagePath); +} diff --git a/winml/test/common/googleTestMacros.h b/winml/test/common/googleTestMacros.h new file mode 100644 index 0000000000000..dd2bc26515fd3 --- /dev/null +++ b/winml/test/common/googleTestMacros.h @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include "runtimeParameters.h" + +#define TEST_GROUP_BEGIN(group_name) +#define TEST_GROUP_END() + +#define WINML_TEST(group_name, test_name) \ + TEST_F(group_name, test_name) { \ + getapi().test_name(); \ + } + +#define WINML_TEST_CLASS_BEGIN_NO_SETUP(test_class_name) \ + namespace { \ + class test_class_name : public ::testing::Test { \ + }; + +#define WINML_TEST_CLASS_BEGIN_WITH_SETUP(test_class_name, setup_method) \ + namespace { \ + class test_class_name : public ::testing::Test { \ + protected: \ + void SetUp() override { \ + getapi().setup_method(); \ + } \ + }; + +#define WINML_TEST_CLASS_END() } + +// For old versions of gtest without GTEST_SKIP, stream the message and return success instead +#ifndef GTEST_SKIP +#define GTEST_SKIP_(message) \ + return GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) +#define GTEST_SKIP GTEST_SKIP_("") +#endif + +#define EXPECT_THROW_SPECIFIC(statement, exception, condition) \ + EXPECT_THROW( \ + try { \ + statement; \ + } catch (const exception& e) { \ + EXPECT_TRUE(condition(e)); \ + throw; \ + } \ + , exception); + +#ifndef INSTANTIATE_TEST_SUITE_P +// Use the old name, removed in newer versions of googletest +#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P +#endif + +#define WINML_SKIP_TEST(message) \ + GTEST_SKIP() << message; + +#define WINML_EXPECT_NO_THROW(statement) EXPECT_NO_THROW(statement) +#define WINML_EXPECT_TRUE(statement) EXPECT_TRUE(statement) +#define WINML_EXPECT_FALSE(statement) EXPECT_FALSE(statement) +#define WINML_EXPECT_EQUAL(val1, val2) EXPECT_EQ(val1, val2) +#define WINML_EXPECT_NOT_EQUAL(val1, val2) EXPECT_NE(val1, val2) + +#define WINML_LOG_ERROR(message) \ + ADD_FAILURE() << message +#define WINML_LOG_COMMENT(message)\ + SCOPED_TRACE(message) +#define WINML_EXPECT_HRESULT_SUCCEEDED(hresult_expression) EXPECT_HRESULT_SUCCEEDED(hresult_expression) +#define WINML_EXPECT_HRESULT_FAILED(hresult_expression) EXPECT_HRESULT_FAILED(hresult_expression) +#define WINML_EXPECT_THROW_SPECIFIC(statement, exception, condition) EXPECT_THROW_SPECIFIC(statement, exception, condition) + +#ifndef USE_DML +#define GPUTEST \ + WINML_SUPRESS_UNREACHABLE_BELOW(WINML_SKIP_TEST("GPU tests disabled because this is a WinML only build (no DML)")) +#else +#define GPUTEST \ + if (auto noGpuTests = RuntimeParameters::Parameters.find("noGPUtests"); \ + noGpuTests != RuntimeParameters::Parameters.end() && noGpuTests->second != "0") { \ + WINML_SKIP_TEST("GPU tests disabled"); \ + } +#endif + +#define SKIP_EDGECORE \ + if (auto isEdgeCore = RuntimeParameters::Parameters.find("EdgeCore"); \ + isEdgeCore != RuntimeParameters::Parameters.end() && isEdgeCore->second != "0") { \ + WINML_SKIP_TEST("Test can't be run in EdgeCore"); \ + } diff --git a/winml/test/common/googletest/main.cpp b/winml/test/common/googletest/main.cpp new file mode 100644 index 0000000000000..bf08c10ce3d6d --- /dev/null +++ b/winml/test/common/googletest/main.cpp @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include + +#include "runtimeParameters.h" + +namespace RuntimeParameters { +std::unordered_map Parameters; +} + +namespace { +void usage(char** argv, int failedArgument) { + std::cerr << "Unrecognized argument: " << argv[failedArgument] << "\n" + << "Usage:\n\t" + << argv[0] << " [/p:parameterName=parameterValue ...]\n"; +} + +bool parseArgument(const std::string& argument) { + if (argument.rfind("/p:", 0) == 0) { + // Parse argument in the form of /p:parameterName=parameterValue + auto separatorIndex = argument.find('='); + if (separatorIndex == std::string::npos || separatorIndex == 3) { + return false; + } + auto parameterName = argument.substr(3, separatorIndex - 3); + auto parameterValue = argument.substr(separatorIndex + 1); + RuntimeParameters::Parameters[parameterName] = parameterValue; + return true; + } + return false; +} +} // namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + for (int i = 1; i < argc; i++) { + if (!parseArgument(argv[i])) { + usage(argv, i); + return -1; + } + } + return RUN_ALL_TESTS(); +} diff --git a/winml/test/common/protobufHelpers.cpp b/winml/test/common/protobufHelpers.cpp new file mode 100644 index 0000000000000..f109dc22b8713 --- /dev/null +++ b/winml/test/common/protobufHelpers.cpp @@ -0,0 +1,344 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS +#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS +#endif + +// LotusRT +#include "core/framework/allocatormgr.h" +#include "core/common/logging/logging.h" +#include "core/common/logging/sinks/clog_sink.h" +#include "protobufHelpers.h" + +#pragma warning(push) +#pragma warning(disable : 4100) +#include "onnx/onnx-ml.pb.h" +#pragma warning(pop) + +#include + +#include "winrt/Windows.Storage.Streams.h" + +using namespace winrt::Windows::Storage::Streams; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; + +// Copy and pasted from LOTUS as is. temporary code to load tensors from protobufs +int FdOpen(const std::string& name) { + int fd = -1; +#ifdef _WIN32 + _sopen_s(&fd, name.c_str(), _O_RDONLY | _O_SEQUENTIAL | _O_BINARY, _SH_DENYWR, _S_IREAD | _S_IWRITE); +#else + fd = open(name.c_str(), O_RDONLY); +#endif + return fd; +}; + +// Copy and pasted from LOTUS as is. temporary code to load tensors from protobufs +void FdClose(int fd) { + if (fd >= 0) { +#ifdef _WIN32 + _close(fd); +#else + close(fd); +#endif + } +} + +// Copy and pasted from LOTUS as is. temporary code to load tensors from protobufs +bool LoadTensorFromPb(onnx::TensorProto& tensor, std::wstring filePath) { + // setup a string converter + using convert_type = std::codecvt_utf8; + std::wstring_convert converter; + + // use converter (.to_bytes: wstr->str, .from_bytes: str->wstr) + std::string file = converter.to_bytes(filePath.c_str()); + + std::ifstream stream(file, std::ios::binary | std::ios::ate); + std::streamsize size = stream.tellg(); + stream.seekg(0, std::ios::beg); + + std::vector buffer(static_cast(size)); + if (stream.read(buffer.data(), size)) { + return tensor.ParseFromArray(buffer.data(), static_cast(size)); + } else { + return false; + } +} + +template +std::vector GetTypeSpecificDataFromTensorProto( + onnx::TensorProto /*tensorProto*/) { + static_assert(false, "UNDEFINED! TensorProto methods aren't templated, so add a new template specialization."); +} +template <> +std::vector GetTypeSpecificDataFromTensorProto( + onnx::TensorProto tensorProto) { + return std::vector(std::begin(tensorProto.float_data()), std::end(tensorProto.float_data())); +} +template <> +std::vector GetTypeSpecificDataFromTensorProto( + onnx::TensorProto tensorProto) { + return std::vector(std::begin(tensorProto.int32_data()), std::end(tensorProto.int32_data())); +} +template <> +std::vector GetTypeSpecificDataFromTensorProto( + onnx::TensorProto tensorProto) { + return std::vector(std::begin(tensorProto.int64_data()), std::end(tensorProto.int64_data())); +} + +template +std::vector GetTensorDataFromTensorProto( + onnx::TensorProto tensorProto, + uint64_t elementCount) { + if (tensorProto.has_raw_data()) { + std::vector tensorData; + auto& values = tensorProto.raw_data(); + if (elementCount != values.size() / sizeof(DataType)) { + throw winrt::hresult_invalid_argument(L"TensorProto element count should match raw data buffer size in elements."); + } + + tensorData = std::vector(static_cast(elementCount)); + memcpy(tensorData.data(), values.data(), values.size()); + return tensorData; + } else { + return GetTypeSpecificDataFromTensorProto(tensorProto); + } +} + +static std::vector GetTensorStringDataFromTensorProto( + onnx::TensorProto tensorProto, + uint64_t elementCount) { + if(tensorProto.string_data_size() != elementCount) + { + throw winrt::hresult_invalid_argument(L"Number of elements in TensorProto does not match expected element count."); + } + auto& values = tensorProto.string_data(); + auto returnVector = std::vector(static_cast(elementCount)); + std::transform(std::begin(values), std::end(values), std::begin(returnVector), + [](auto& value) { return winrt::to_hstring(value); }); + return returnVector; +} + +ITensor ProtobufHelpers::LoadTensorFromProtobufFile( + const std::wstring& filePath, + bool isFp16) { + // load from the file path into the onnx format + onnx::TensorProto tensorProto; + if (LoadTensorFromPb(tensorProto, filePath)) { + std::vector tensorShape = std::vector(tensorProto.dims().begin(), tensorProto.dims().end()); + int64_t initialValue = 1; + int64_t elementCount = std::accumulate(tensorShape.begin(), tensorShape.end(), initialValue, std::multiplies()); + + if (!tensorProto.has_data_type()) { + std::cerr << "WARNING: Loading unknown TensorProto datatype.\n"; + } + if (isFp16) { + return TensorFloat16Bit::CreateFromIterable(tensorShape, GetTensorDataFromTensorProto(tensorProto, elementCount)); + } + switch (tensorProto.data_type()) { + case (onnx::TensorProto::DataType::TensorProto_DataType_FLOAT): + return TensorFloat::CreateFromIterable(tensorShape, GetTensorDataFromTensorProto(tensorProto, elementCount)); + case (onnx::TensorProto::DataType::TensorProto_DataType_INT32): + return TensorInt32Bit::CreateFromIterable(tensorShape, GetTensorDataFromTensorProto(tensorProto, elementCount)); + case (onnx::TensorProto::DataType::TensorProto_DataType_INT64): + return TensorInt64Bit::CreateFromIterable(tensorShape, GetTensorDataFromTensorProto(tensorProto, elementCount)); + case (onnx::TensorProto::DataType::TensorProto_DataType_STRING): + return TensorString::CreateFromIterable(tensorShape, GetTensorStringDataFromTensorProto(tensorProto, elementCount)); + default: + throw winrt::hresult_invalid_argument(L"Tensor type for creating tensor from protobuf file not supported."); + break; + } + } + return nullptr; +} + +TensorFloat16Bit ProtobufHelpers::LoadTensorFloat16FromProtobufFile( + const std::wstring& filePath) { + // load from the file path into the onnx format + onnx::TensorProto tensorProto; + if (LoadTensorFromPb(tensorProto, filePath)) { + if (tensorProto.has_data_type()) { + if(onnx::TensorProto::DataType::TensorProto_DataType_FLOAT16 != tensorProto.data_type()) { + throw winrt::hresult_invalid_argument(L"TensorProto datatype isn't of type Float16."); + } + } else { + std::cerr << "Loading unknown TensorProto datatype as TensorFloat16Bit.\n"; + } + + auto shape = winrt::single_threaded_vector(std::vector(tensorProto.dims().begin(), tensorProto.dims().end())); + TensorFloat16Bit singleTensorValue = TensorFloat16Bit::Create(shape.GetView()); + + uint16_t* data; + winrt::com_ptr spTensorValueNative; + singleTensorValue.as(spTensorValueNative); + uint32_t sizeInBytes; + spTensorValueNative->GetBuffer(reinterpret_cast(&data), &sizeInBytes); + + if (!tensorProto.has_raw_data()) + { + throw winrt::hresult_invalid_argument(L"Float16 tensor proto buffers are expected to contain raw data."); + } + + auto& raw_data = tensorProto.raw_data(); + auto buff = raw_data.c_str(); + const size_t type_size = sizeof(uint16_t); + + memcpy((void*)data, (void*)buff, raw_data.size() * sizeof(char)); + + return singleTensorValue; + } + return nullptr; +} + +winrt::Windows::AI::MachineLearning::LearningModel ProtobufHelpers::CreateModel( + winrt::Windows::AI::MachineLearning::TensorKind kind, + const std::vector& shape, + uint32_t num_elements) { + onnx::ModelProto model; + model.set_ir_version(onnx::Version::IR_VERSION); + + // Set opset import + auto opsetimportproto = model.add_opset_import(); + opsetimportproto->set_version(7); + + onnx::GraphProto& graph = *model.mutable_graph(); + + uint32_t begin = 0; + uint32_t end = num_elements - 1; + for (uint32_t i = begin; i <= end; i++) { + onnx::NodeProto& node = *graph.add_node(); + node.set_op_type("Identity"); + if (i == begin && i == end) { + node.add_input("input"); + node.add_output("output"); + } else if (i == begin) { + node.add_input("input"); + node.add_output("output" + std::to_string(i)); + + } else if (i == end) { + node.add_input("output" + std::to_string(i - 1)); + node.add_output("output"); + } else { + node.add_input("output" + std::to_string(i - 1)); + node.add_output("output" + std::to_string(i)); + } + } + + onnx::TensorProto_DataType dataType; + switch (kind) { + case TensorKind::Float: + dataType = onnx::TensorProto_DataType_FLOAT; + break; + case TensorKind::UInt8: + dataType = onnx::TensorProto_DataType_UINT8; + break; + case TensorKind::Int8: + dataType = onnx::TensorProto_DataType_INT8; + break; + case TensorKind::UInt16: + dataType = onnx::TensorProto_DataType_UINT16; + break; + case TensorKind::Int16: + dataType = onnx::TensorProto_DataType_INT16; + break; + case TensorKind::Int32: + dataType = onnx::TensorProto_DataType_INT32; + break; + case TensorKind::Int64: + dataType = onnx::TensorProto_DataType_INT64; + break; + case TensorKind::String: + dataType = onnx::TensorProto_DataType_STRING; + break; + case TensorKind::Boolean: + dataType = onnx::TensorProto_DataType_BOOL; + break; + case TensorKind::Float16: + dataType = onnx::TensorProto_DataType_FLOAT16; + break; + case TensorKind::Double: + dataType = onnx::TensorProto_DataType_DOUBLE; + break; + case TensorKind::UInt32: + dataType = onnx::TensorProto_DataType_UINT32; + break; + case TensorKind::UInt64: + dataType = onnx::TensorProto_DataType_UINT64; + break; + default: + return nullptr; + } + + char dim_param = 'a'; + // input + { + onnx::ValueInfoProto& variable = *graph.add_input(); + variable.set_name("input"); + variable.mutable_type()->mutable_tensor_type()->set_elem_type(dataType); + for (auto dim : shape) { + if (dim == -1) { + variable.mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param(&dim_param, 1); + dim_param++; + } else { + variable.mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); + } + } + + if (shape.size() > 0) { + variable.mutable_type()->mutable_tensor_type()->mutable_shape()->mutable_dim(0)->set_denotation("DATA_BATCH"); + } + } + + // output + { + onnx::ValueInfoProto& variable = *graph.add_output(); + variable.set_name("output"); + variable.mutable_type()->mutable_tensor_type()->set_elem_type(dataType); + for (auto dim : shape) { + if (dim == -1) { + variable.mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param(&dim_param, 1); + dim_param++; + } else { + variable.mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); + } + } + } + + struct BufferStreamAdapter : public std::streambuf { + RandomAccessStreamReference BufferAsRandomAccessStreamReference() { + auto buffer = m_dataWriter.DetachBuffer(); + m_dataWriter = DataWriter(); + + InMemoryRandomAccessStream stream; + stream.WriteAsync(buffer).get(); + return RandomAccessStreamReference::CreateFromStream(stream); + } + + protected: + virtual int_type overflow(int_type c) { + if (c != EOF) { + // convert lowercase to uppercase + auto temp = static_cast(c); + + m_dataWriter.WriteByte(temp); + } + return c; + } + + private: + DataWriter m_dataWriter; + }; + + auto size = model.ByteSizeLong(); + auto raw_array = std::unique_ptr(new char[size]); + model.SerializeToArray(raw_array.get(), static_cast(size)); + + BufferStreamAdapter buffer; + std::ostream os(&buffer); + + os.write(raw_array.get(), size); + + return LearningModel::LoadFromStream(buffer.BufferAsRandomAccessStreamReference()); +} diff --git a/winml/test/common/protobufHelpers.h b/winml/test/common/protobufHelpers.h new file mode 100644 index 0000000000000..84c1b20883bc9 --- /dev/null +++ b/winml/test/common/protobufHelpers.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "std.h" + +namespace ProtobufHelpers +{ + // LoadTensorFromProtobufFile take a path to a FP32 data file and loads it into a 32bit array or + // 16bit array based on isFp16 + winrt::Windows::AI::MachineLearning::ITensor LoadTensorFromProtobufFile(const std::wstring& filePath, bool isFp16); + // LoadTensorFloat16FromProtobufFile takes a path to a FP16 data file and loads it into a 16bit array + winrt::Windows::AI::MachineLearning::TensorFloat16Bit LoadTensorFloat16FromProtobufFile(const std::wstring& filePath); + + winrt::Windows::AI::MachineLearning::LearningModel CreateModel( + winrt::Windows::AI::MachineLearning::TensorKind kind, + const std::vector& shape, + uint32_t num_elements = 1); +} diff --git a/winml/test/common/runtimeParameters.h b/winml/test/common/runtimeParameters.h new file mode 100644 index 0000000000000..e2095959bf058 --- /dev/null +++ b/winml/test/common/runtimeParameters.h @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +namespace RuntimeParameters +{ + // Runtime parameters passed through CLI arguments + extern std::unordered_map Parameters; +} \ No newline at end of file diff --git a/winml/test/common/std.h b/winml/test/common/std.h new file mode 100644 index 0000000000000..a51abb0e91912 --- /dev/null +++ b/winml/test/common/std.h @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// stl +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +// IUnknown must be declared before winrt/base.h is included to light up support for native COM +// interfaces with C++/WinRT types (e.g. winrt::com_ptr). +#include +#include +#include "winrt/base.h" +#include "winrt/Windows.Foundation.Collections.h" +#include "comp_generated/winrt/windows.ai.machinelearning.h" + +// WinML +#include "Windows.AI.MachineLearning.Native.h" diff --git a/winml/test/common/taefTestMacros.h b/winml/test/common/taefTestMacros.h new file mode 100644 index 0000000000000..9eab80bcdf293 --- /dev/null +++ b/winml/test/common/taefTestMacros.h @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "WexTestClass.h" + +using namespace WEX::Logging; +using namespace WEX::Common; +using namespace WEX::TestExecution; + +#define WINML_EXPECT_NO_THROW(statement) VERIFY_NO_THROW(statement) + +#define WINML_TEST_CLASS_BEGIN_WITH_SETUP(test_class_name, setup_method) \ + class test_class_name { \ + TEST_CLASS(test_class_name); \ + TEST_CLASS_SETUP(TestClassSetup) { \ + getapi().setup_method(); \ + return true; \ + } + +#define WINML_TEST_CLASS_END() \ + } \ + ; + +#define WINML_TEST(group_name, test_name) \ + TEST_METHOD(test_name) { \ + getapi().test_name(); \ + } + +#define WINML_SKIP_TEST(message) \ + do { \ + Log::Result(TestResults::Skipped, \ + std::wstring_convert>().from_bytes(message).c_str()); \ + return; \ + } while (0) + +#define WINML_EXPECT_NO_THROW(statement) VERIFY_NO_THROW(statement) +#define WINML_EXPECT_TRUE(statement) VERIFY_IS_TRUE(statement) +#define WINML_EXPECT_FALSE(statement) VERIFY_IS_FALSE(statement) +#define WINML_EXPECT_EQUAL(val1, val2) VERIFY_ARE_EQUAL(val1, val2) +#define WINML_EXPECT_NOT_EQUAL(val1, val2) VERIFY_ARE_NOT_EQUAL(val1, val2) +#define WINML_LOG_ERROR(message) \ + VERIFY_FAIL(std::wstring_convert>().from_bytes(message).c_str()) +#define WINML_LOG_COMMENT(message)\ + WEX::Logging::Log::Comment(std::wstring_convert>().from_bytes(message).c_str()) +#define WINML_EXPECT_HRESULT_SUCCEEDED(hresult_expression) VERIFY_SUCCEEDED(hresult_expression) +#define WINML_EXPECT_THROW_SPECIFIC(statement, exception, condition) VERIFY_THROWS_SPECIFIC(statement, exception, condition) +#define WINML_EXPECT_HRESULT_FAILED(hresult_expression) VERIFY_FAILED(hresult_expression) + +#ifndef USE_DML +#define GPUTEST \ + WINML_SUPRESS_UNREACHABLE_BELOW(WINML_SKIP_TEST("GPU tests disabled because this is a WinML only build (no DML)")) +#else +#define GPUTEST \ + bool noGPUTests; \ + if (SUCCEEDED(RuntimeParameters::TryGetValue(L"noGPUtests", noGPUTests)) && noGPUTests) { \ + WINML_SKIP_TEST("This test is disabled by the noGPUTests runtime parameter."); \ + return; \ + } +#endif + +#define SKIP_EDGECORE \ + bool edgeCoreRun; \ + if (SUCCEEDED(RuntimeParameters::TryGetValue(L"EdgeCore", edgeCoreRun)) && edgeCoreRun) { \ + WINML_SKIP_TEST("This test is disabled by the EdgeCore runtime parameter."); \ + return; \ + } diff --git a/winml/test/common/test.h b/winml/test/common/test.h new file mode 100644 index 0000000000000..9a97331d06f07 --- /dev/null +++ b/winml/test/common/test.h @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +using VoidTest = void (*)(); +using SetupTest = VoidTest; + +constexpr bool alwaysTrue() { + return true; +} +#define WINML_SUPRESS_UNREACHABLE_BELOW(statement) \ + if (alwaysTrue()) { statement; } + +#ifdef BUILD_GOOGLE_TEST +#include "googleTestMacros.h" +#else +#ifdef BUILD_TAEF_TEST +#include "taefTestMacros.h" +#endif +#endif diff --git a/winml/test/common/testPch.h b/winml/test/common/testPch.h new file mode 100644 index 0000000000000..ec055aee26888 --- /dev/null +++ b/winml/test/common/testPch.h @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS +#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS +#endif +#include "std.h" + +#include +#include + +#include "fileHelpers.h" diff --git a/winml/test/common/testdata/squeezenet/model.onnx b/winml/test/common/testdata/squeezenet/model.onnx new file mode 100644 index 0000000000000..b8e1dfce26d99 Binary files /dev/null and b/winml/test/common/testdata/squeezenet/model.onnx differ diff --git a/winml/test/common/testdata/squeezenet/test_data_0_input.pb b/winml/test/common/testdata/squeezenet/test_data_0_input.pb new file mode 100644 index 0000000000000..f521c230e5e76 Binary files /dev/null and b/winml/test/common/testdata/squeezenet/test_data_0_input.pb differ diff --git a/winml/test/common/testdata/squeezenet/test_data_0_output.pb b/winml/test/common/testdata/squeezenet/test_data_0_output.pb new file mode 100644 index 0000000000000..e731eb0234cc7 Binary files /dev/null and b/winml/test/common/testdata/squeezenet/test_data_0_output.pb differ diff --git a/winml/test/scenario/cppwinrt/CustomNullOp.h b/winml/test/scenario/cppwinrt/CustomNullOp.h new file mode 100644 index 0000000000000..94e32b862ff32 --- /dev/null +++ b/winml/test/scenario/cppwinrt/CustomNullOp.h @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// +// Implements a custom operator kernel which counts the number of calls to Compute(), but otherwise is a no-op. +// + +#pragma once + +#include "test.h" + +template +struct NullShapeInferrer : winrt::implements, IMLOperatorShapeInferrer> +{ + STDMETHOD(InferOutputShapes)(IMLOperatorShapeInferenceContext* context) noexcept + { + WINML_EXPECT_NO_THROW(OperatorHelper::ShapeInferenceFunction(context)); + return S_OK; + } +}; + +struct NullOperator : winrt::implements +{ + NullOperator(std::atomic* callCount) : m_callCount(callCount) {} + + STDMETHOD(Compute)(IMLOperatorKernelContext* context) + { + winrt::com_ptr outputTensor; + WINML_EXPECT_HRESULT_SUCCEEDED(context->GetOutputTensor(0, outputTensor.put())); + + ++(*m_callCount); + return S_OK; + } + +private: + std::atomic* m_callCount; +}; + +struct NullOperatorFactory : winrt::implements +{ + NullOperatorFactory(std::atomic* callCount) : m_callCount(callCount) {} + + STDMETHOD(CreateKernel)( + IMLOperatorKernelCreationContext* context, + IMLOperatorKernel** kernel) + { + ORT_UNUSED_PARAMETER(context); + auto op = winrt::make(m_callCount); + op.copy_to(kernel); + return S_OK; + } + + static MLOperatorEdgeDescription CreateEdgeDescriptor(MLOperatorEdgeType type, MLOperatorTensorDataType dataType) + { + ORT_UNUSED_PARAMETER(type); + MLOperatorEdgeDescription desc; + desc.edgeType = MLOperatorEdgeType::Tensor; + desc.tensorDataType = dataType; + return desc; + } + + static void RegisterKernel( + const char* name, + const char* domain, + int versionSince, + winrt::com_ptr registry, + winrt::com_ptr shapeInferrer, + std::atomic* callCount) + { + MLOperatorKernelDescription kernelDescription; + kernelDescription.domain = domain; + kernelDescription.name = name; + kernelDescription.minimumOperatorSetVersion = versionSince; + kernelDescription.executionType = MLOperatorExecutionType::D3D12; + + MLOperatorEdgeTypeConstrant typeConstraint; + typeConstraint.typeLabel = "T"; + std::vector allowedEdges + { + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16) + }; + typeConstraint.allowedTypes = allowedEdges.data(); + typeConstraint.allowedTypeCount = static_cast(allowedEdges.size()); + + std::vector typeConstraints{ typeConstraint }; + kernelDescription.typeConstraints = typeConstraints.data(); + kernelDescription.typeConstraintCount = static_cast(typeConstraints.size()); + + kernelDescription.defaultAttributes = nullptr; + kernelDescription.defaultAttributeCount = 0; + kernelDescription.options = MLOperatorKernelOptions::None; + kernelDescription.executionOptions = 0; + + auto factory = winrt::make(callCount); + + WINML_EXPECT_HRESULT_SUCCEEDED(registry->RegisterOperatorKernel( + &kernelDescription, + factory.get(), + shapeInferrer.get() + )); + } + +private: + std::atomic* m_callCount; +}; diff --git a/winml/test/scenario/cppwinrt/CustomOperatorProvider.h b/winml/test/scenario/cppwinrt/CustomOperatorProvider.h new file mode 100644 index 0000000000000..a280295be32eb --- /dev/null +++ b/winml/test/scenario/cppwinrt/CustomOperatorProvider.h @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "NoisyReluCpu.h" +#include "ReluCpu.h" + +struct CustomOperatorProvider : + winrt::implements< + CustomOperatorProvider, + winrt::Windows::AI::MachineLearning::ILearningModelOperatorProvider, + ILearningModelOperatorProviderNative> +{ + HMODULE m_library; + winrt::com_ptr m_registry; + + CustomOperatorProvider() + { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + m_library = LoadLibraryW(L"windows.ai.machinelearning.dll"); +#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PC_APP) + m_library = LoadPackagedLibrary(L"windows.ai.machinelearning.dll", 0 /*Reserved*/); +#endif + using create_registry_delegate = HRESULT WINAPI (_COM_Outptr_ IMLOperatorRegistry** registry); + auto create_registry = reinterpret_cast(GetProcAddress(m_library, "MLCreateOperatorRegistry")); + if (FAILED(create_registry(m_registry.put()))) + { + __fastfail(0); + } + + RegisterSchemas(); + RegisterKernels(); + } + + ~CustomOperatorProvider() + { + FreeLibrary(m_library); + } + + void RegisterSchemas() + { + NoisyReluOperatorFactory::RegisterNoisyReluSchema(m_registry); + } + + void RegisterKernels() + { + // Replace the Relu operator kernel + ReluOperatorFactory::RegisterReluKernel(m_registry); + + // Add a new operator kernel for Relu + NoisyReluOperatorFactory::RegisterNoisyReluKernel(m_registry); + } + + STDMETHOD(GetRegistry)(IMLOperatorRegistry** ppOperatorRegistry) + { + m_registry.copy_to(ppOperatorRegistry); + return S_OK; + } +}; \ No newline at end of file diff --git a/winml/test/scenario/cppwinrt/CustomOps.cpp b/winml/test/scenario/cppwinrt/CustomOps.cpp new file mode 100644 index 0000000000000..2afd05d99b9ab --- /dev/null +++ b/winml/test/scenario/cppwinrt/CustomOps.cpp @@ -0,0 +1,737 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "testPch.h" +#include +#include +#include +#include "filehelpers.h" +#include +#include +#include +#include "winrt/Windows.Storage.h" +#include +#include +#include +#include "CustomOperatorProvider.h" +#include "CustomOps.h" + +// For custom operator and shape inferencing support +#include "core/providers/dml/DmlExecutionProvider/inc/MLOperatorAuthor.h" +#include "core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h" +#include "core/providers/dml/OperatorAuthorHelper/OperatorHelper.h" +#include "core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h" +#include "core/graph/constants.h" +#include "CustomNullOp.h" +#include + +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Storage; +using namespace winrt::Windows::Storage::Streams; + +static void CustomOpsScenarioTestSetup() +{ + init_apartment(); +} + +static void CustomOpsScenarioGpuTestSetup() +{ + init_apartment(); + GPUTEST; +} + +// Tests that the execution provider correctly fuses operators together when custom ops are involved. +static void CustomOperatorFusion() { + constexpr const wchar_t* c_modelFilename = L"squeezenet_tensor_input.onnx"; + + // This particular model has 25 Conv ops and 25 Relu ops, all of which are eligible for fusion so we expect them + // all to be fused (removing them from the graph) and replaced with the appropriate fused op instead. The same + // goes for the single Gemm+Sigmoid in the model too. + constexpr const uint32_t c_expectedConvOps = 0; + constexpr const uint32_t c_expectedReluOps = 0; + constexpr const uint32_t c_expectedFusedConvOps = 25; + constexpr const uint32_t c_expectedGemmOps = 0; + constexpr const uint32_t c_expectedSigmoidOps = 0; + constexpr const uint32_t c_expectedFusedGemmOps = 1; + + // These ops are also part of the model but shouldn't be fused + constexpr const uint32_t c_expectedBatchNormOps = 1; + constexpr const uint32_t c_expectedMaxPoolOps = 3; + constexpr const uint32_t c_expectedConcatOps = 8; + + struct CallbackOperatorProvider : + winrt::implements< + CallbackOperatorProvider, + winrt::Windows::AI::MachineLearning::ILearningModelOperatorProvider, + ILearningModelOperatorProviderNative> + { + struct CallCounts + { + std::atomic conv = 0; + std::atomic relu = 0; + std::atomic fusedConv = 0; + std::atomic gemm = 0; + std::atomic sigmoid = 0; + std::atomic fusedGemm = 0; + std::atomic batchNorm = 0; + std::atomic maxPool = 0; + std::atomic concat = 0; + }; + + const CallCounts& GetCallCounts() + { + return m_callCounts; + } + + CallbackOperatorProvider() + { + using namespace OperatorHelper; + + WINML_EXPECT_HRESULT_SUCCEEDED(MLCreateOperatorRegistry(m_registry.put())); + +#pragma push_macro("REGISTER_KERNEL") +#define REGISTER_KERNEL(_name, _domain, _opSet, _shapeInferrer, _callCount) \ + NullOperatorFactory::RegisterKernel( \ + #_name, \ + (_domain), \ + _opSet::sc_sinceVer_ ## _name, \ + m_registry, \ + winrt::make>(), \ + (_callCount)); + + REGISTER_KERNEL(Conv, onnxruntime::kOnnxDomain, OnnxOperatorSet7, ConvHelper, &m_callCounts.conv); + REGISTER_KERNEL(Relu, onnxruntime::kOnnxDomain, OnnxOperatorSet7, GetOutputShapeAsInputShapeHelper, &m_callCounts.relu); + REGISTER_KERNEL(FusedConv, onnxruntime::kMSDmlDomain, MsftOperatorSet1, ConvHelper, &m_callCounts.fusedConv); + + REGISTER_KERNEL(Gemm, onnxruntime::kOnnxDomain, OnnxOperatorSet7, GemmHelper, &m_callCounts.gemm); + REGISTER_KERNEL(Sigmoid, onnxruntime::kOnnxDomain, OnnxOperatorSet7, GetOutputShapeAsInputShapeHelper, &m_callCounts.sigmoid); + REGISTER_KERNEL(FusedGemm, onnxruntime::kMSDmlDomain, MsftOperatorSet1, GemmHelper, &m_callCounts.fusedGemm); + + REGISTER_KERNEL(BatchNormalization, onnxruntime::kOnnxDomain, OnnxOperatorSet7, GetOutputShapeAsInputShapeHelper, &m_callCounts.batchNorm); + REGISTER_KERNEL(MaxPool, onnxruntime::kOnnxDomain, OnnxOperatorSet7, PoolingHelper, &m_callCounts.maxPool); + REGISTER_KERNEL(Concat, onnxruntime::kOnnxDomain, OnnxOperatorSet7, ConcatHelper, &m_callCounts.concat); + +#pragma pop_macro("REGISTER_KERNEL") + } + + STDMETHOD(GetRegistry)(IMLOperatorRegistry** ppOperatorRegistry) + { + if (ppOperatorRegistry == nullptr) + { + return E_POINTER; + } + + m_registry.copy_to(ppOperatorRegistry); + return S_OK; + } + + private: + winrt::com_ptr m_registry; + CallCounts m_callCounts; + }; + + auto customOperatorProvider = winrt::make(); + auto provider = customOperatorProvider.as(); + + LearningModelDevice device = nullptr; + WINML_EXPECT_NO_THROW(device = LearningModelDevice(LearningModelDeviceKind::DirectX)); + std::wstring fullPath = FileHelpers::GetModulePath() + c_modelFilename; + auto model = LearningModel::LoadFromFilePath(fullPath, provider); + + auto featureValue = FileHelpers::LoadImageFeatureValue(L"227x227.png"); + + LearningModelSession session = nullptr; + WINML_EXPECT_NO_THROW(session = LearningModelSession(model, device)); + LearningModelBinding modelBinding(session); + + modelBinding.Bind(L"data", featureValue); + auto result = session.Evaluate(modelBinding, L""); + + const auto& callCounts = customOperatorProvider.as()->GetCallCounts(); + + // Verify that the correct number of each operator was seen (i.e. that none were dropped / incorrectly fused) + WINML_EXPECT_EQUAL(c_expectedConvOps, callCounts.conv); + WINML_EXPECT_EQUAL(c_expectedReluOps, callCounts.relu); + WINML_EXPECT_EQUAL(c_expectedFusedConvOps, callCounts.fusedConv); + WINML_EXPECT_EQUAL(c_expectedGemmOps, callCounts.gemm); + WINML_EXPECT_EQUAL(c_expectedSigmoidOps, callCounts.sigmoid); + WINML_EXPECT_EQUAL(c_expectedFusedGemmOps, callCounts.fusedGemm); + WINML_EXPECT_EQUAL(c_expectedBatchNormOps, callCounts.batchNorm); + WINML_EXPECT_EQUAL(c_expectedMaxPoolOps, callCounts.maxPool); + WINML_EXPECT_EQUAL(c_expectedConcatOps, callCounts.concat); +} + +struct LocalCustomOperatorProvider : + winrt::implements< + LocalCustomOperatorProvider, + winrt::Windows::AI::MachineLearning::ILearningModelOperatorProvider, + ILearningModelOperatorProviderNative> +{ + LocalCustomOperatorProvider() + { + WINML_EXPECT_HRESULT_SUCCEEDED(MLCreateOperatorRegistry(m_registry.put())); + } + + STDMETHOD(GetRegistry)(IMLOperatorRegistry** ppOperatorRegistry) + { + if (ppOperatorRegistry == nullptr) + { + return E_POINTER; + } + + m_registry.copy_to(ppOperatorRegistry); + return S_OK; + } + + IMLOperatorRegistry* GetRegistry() + { + return m_registry.get(); + } + +protected: + winrt::com_ptr m_registry; +}; + +// Checks test attributes set on ABI kernels can be queried with correct values +void VerifyTestAttributes(const MLOperatorAttributes& attrs) +{ + std::string strAttr = attrs.GetAttribute("DefaultedNonRequiredString"); + WINML_EXPECT_EQUAL(strAttr, "1"); + + std::vector strArrayAttr = attrs.GetAttributeVector("DefaultedNonRequiredStringArray"); + std::vector expected = std::vector({ "1", "2" }); + for (size_t i = 0; i < expected.size(); ++i) + { + WINML_EXPECT_EQUAL(strArrayAttr[i], expected[i]); + } + + WINML_EXPECT_EQUAL(1, attrs.GetAttribute("DefaultedNonRequiredInt")); + WINML_EXPECT_EQUAL(1.0f, attrs.GetAttribute("DefaultedNonRequiredFloat")); + + WINML_EXPECT_EQUAL(std::vector({ 1, 2 }), attrs.GetAttributeVector("DefaultedNonRequiredIntArray")); + WINML_EXPECT_EQUAL(std::vector({ 1.0f, 2.0f }), attrs.GetAttributeVector("DefaultedNonRequiredFloatArray")); +} + +// Foo kernel which is doing Add and optionally truncates its output +template +class FooKernel +{ +public: + FooKernel(const MLOperatorKernelCreationContext& info) + { + if (VerifyAttributes) + { + VerifyTestAttributes(info); + } + + VerifyShapeInfo(info); + } + + void VerifyShapeInfo(const MLOperatorKernelCreationContext& info) + { + if (!Truncate) + { + com_ptr shapeInfo; + WINML_EXPECT_EQUAL(info.GetInterface()->HasTensorShapeDescription(), false); + WINML_EXPECT_HRESULT_FAILED(info.GetInterface()->GetTensorShapeDescription(shapeInfo.put())); + } + else + { + com_ptr shapeInfo; + WINML_EXPECT_EQUAL(info.GetInterface()->HasTensorShapeDescription(), true); + WINML_EXPECT_EQUAL(info.GetInterface()->GetTensorShapeDescription(shapeInfo.put()), S_OK); + } + } + + void Compute(const MLOperatorKernelContext& context) const + { + const auto X = context.GetInputTensor(0); + const auto W = context.GetInputTensor(1); + + auto xData = X.GetData(); + auto wData = W.GetData(); + + auto shape = X.GetShape(); + + // This is used to test shape inference + if (Truncate) + { + shape[0] -= 1; + } + + if (!Truncate) + { + com_ptr tensor; + WINML_EXPECT_HRESULT_FAILED(context.GetInterface()->GetOutputTensor(0, tensor.put())); + } + else + { + MLOperatorTensor tensor = context.GetOutputTensor(0); + } + + auto Y = context.GetOutputTensor(0, shape); + auto yData = Y.GetData(); + + size_t size = 1; + for (size_t i = 0; i < shape.size(); i++) + { + size *= shape[i]; + } + + for (size_t i = 0; i < size; i++) + { + yData[i] = xData[i] + wData[i]; + } + } +}; + +template +void CALLBACK CreateABIFooKernel(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel) +{ + HRESULT hr = MLOperatorKernel>::CreateInstance(*kernelInfo, opKernel); + THROW_IF_FAILED(hr); +} + +void CALLBACK CreateTruncatedABIFooKernel(IMLOperatorKernelCreationContext* kernelInfo, IMLOperatorKernel** opKernel) +{ + HRESULT hr = MLOperatorKernel>::CreateInstance(*kernelInfo, opKernel); + THROW_IF_FAILED(hr); +} + +// Test using a foo kernel which is doing Add, but register it as "Mul". +static void CustomKernelWithBuiltInSchema() +{ + // Create the registry + auto operatorProvider = winrt::make(); + IMLOperatorRegistry* registry = operatorProvider.as()->GetRegistry(); + + // Register the kernel + MLOperatorEdgeDescription floatTensorType = + { + MLOperatorEdgeType::Tensor, + static_cast(MLOperatorTensorDataType::Float) + }; + + MLOperatorEdgeTypeConstrant constraint = { "T", &floatTensorType, 1 }; + + MLOperatorKernelDescription kernelDesc = + { + "", + "Mul", + 7, + MLOperatorExecutionType::Cpu, + &constraint, + 1, + nullptr, + 0, + MLOperatorKernelOptions::AllowDynamicInputShapes + }; + + Microsoft::WRL::ComPtr factory = wil::MakeOrThrow(CreateABIFooKernel); + WINML_EXPECT_HRESULT_SUCCEEDED(registry->RegisterOperatorKernel(&kernelDesc, factory.Get(), nullptr)); + + // Prepare inputs + std::vector dimsX = { 3, 2 }; + std::vector valuesX = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f }; + + // Prepare expected inputs and outputs + std::vector expectedDimsY = { 3, 2 }; + + // The expected value should be Add's result. + std::vector expectedValuesY = { 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f }; + + // Create the model and sessions + std::wstring fullPath = FileHelpers::GetModulePath() + L"mul.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(fullPath, operatorProvider); + + LearningModelSession session(model); + LearningModelBinding bindings(session); + + // Bind inputs and outputs + TensorFloat inputTensor = TensorFloat::CreateFromArray(dimsX, winrt::array_view(std::move(valuesX))); + bindings.Bind(winrt::hstring(L"X"), inputTensor); + + auto outputValue = TensorFloat::Create(); + WINML_EXPECT_NO_THROW(bindings.Bind(L"Y", outputValue)); + + // Evaluate the model + hstring correlationId; + WINML_EXPECT_NO_THROW(session.Evaluate(bindings, correlationId)); + + // Check the result shape + WINML_EXPECT_EQUAL(expectedDimsY.size(), outputValue.Shape().Size()); + for (uint32_t j = 0; j < outputValue.Shape().Size(); j++) + { + WINML_EXPECT_EQUAL(expectedDimsY.at(j), outputValue.Shape().GetAt(j)); + } + + // Check the results + auto buffer = outputValue.GetAsVectorView(); + WINML_EXPECT_TRUE(buffer != nullptr); + WINML_EXPECT_TRUE(std::equal(expectedValuesY.cbegin(), expectedValuesY.cend(), begin(buffer))); + + // Release the model before operatorProvider goes out of scope + model = nullptr; +} + +// Similar to MLOperatorShapeInferrer, but using an std::function +class MLOperatorShapeInferrerFromFunc : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags, IMLOperatorShapeInferrer> +{ +public: + MLOperatorShapeInferrerFromFunc(std::function shapeInferenceFn) : + m_func(shapeInferenceFn) + {} + + HRESULT STDMETHODCALLTYPE InferOutputShapes(IMLOperatorShapeInferenceContext* context) noexcept override try + { + m_func(context); + return S_OK; + } + CATCH_RETURN(); + +private: + std::function m_func; +}; + +// Test using a custom kernel and schema, while verifying attribute defaults, type mapping, and inference methods +static void CustomKernelWithCustomSchema() +{ + // Test cases + struct + { + // Whether the Foo kernel should truncate its output + bool truncateOutput; + + // Whether a type label is used in the schema, versus a type description + bool useTypeLabel; + + // Whether the schema provides a type inference function, and uses an output type + // of Int32 instead of Float32 + bool useTypeInference; + + // Whether a shape inference method is provided in the schema + bool useShapeInferenceInSchema; + + // Whether a shape inference method is provided in the kernel + bool useShapeInferenceInKernel; + + // Whether attribute defaults are provided in the schema, instead of the kernel + bool attributeDefaultsInSchema; + } testCases[] = + { + {false, true, false, false, false, false}, + {false, false, false, false, false, false}, + {false, true, true, false, false, true}, + {true, false, false, false, true, false}, + {true, true, true, true, true, true}, + }; + + for (size_t caseIndex = 0; caseIndex < std::size(testCases); ++caseIndex) + { + // Create the registry + auto operatorProvider = winrt::make(); + IMLOperatorRegistry* registry = operatorProvider.as()->GetRegistry(); + + // Create input and output parameters + MLOperatorSchemaEdgeDescription inputParam = {}; + inputParam.options = MLOperatorParameterOptions::Single; + + if (!testCases[caseIndex].useTypeLabel) + { + assert(!testCases[caseIndex].useTypeInference); + + MLOperatorEdgeDescription edgeDesc = {}; + edgeDesc.edgeType = MLOperatorEdgeType::Tensor; + edgeDesc.tensorDataType = MLOperatorTensorDataType::Float; + + inputParam.typeFormat = MLOperatorSchemaEdgeTypeFormat::EdgeDescription; + inputParam.edgeDescription = edgeDesc; + } + else + { + inputParam.typeFormat = MLOperatorSchemaEdgeTypeFormat::Label; + inputParam.typeLabel = "T1"; + } + + MLOperatorSchemaEdgeDescription outputParam = inputParam; + + // Type inference should set this to tensor(float) even though T2 is not matched + // on an input label + if (testCases[caseIndex].useTypeInference) + { + if (inputParam.typeFormat == MLOperatorSchemaEdgeTypeFormat::Label) + { + outputParam.typeLabel = "T2"; + } + else + { + outputParam.edgeDescription.tensorDataType = MLOperatorTensorDataType::Int32; + } + } + + MLOperatorSchemaEdgeDescription inputs[] = { inputParam, inputParam }; + + MLOperatorEdgeDescription edgeTypes[6] = + { + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::UInt32)}, + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::UInt64)}, + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::Int32)}, + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::Int64)}, + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::Float)}, + {MLOperatorEdgeType::Tensor, static_cast(MLOperatorTensorDataType::Double)} + }; + + // Type constraints. Only the first is used unless type inference is provided and + // the kernel emits a different output type as "T2" + MLOperatorEdgeTypeConstrant constraints[] = + { + {"T1", edgeTypes, static_cast(std::size(edgeTypes))}, + {"T2", edgeTypes, static_cast(std::size(edgeTypes))} + }; + + // Test attributes + MLOperatorAttribute attributes[] = + { + {"DefaultedNonRequiredInt", MLOperatorAttributeType::Int, false}, + {"DefaultedNonRequiredFloat", MLOperatorAttributeType::Float, false}, + {"DefaultedNonRequiredString", MLOperatorAttributeType::String, false}, + {"DefaultedNonRequiredIntArray", MLOperatorAttributeType::IntArray, false}, + {"DefaultedNonRequiredFloatArray", MLOperatorAttributeType::FloatArray, false}, + {"DefaultedNonRequiredStringArray", MLOperatorAttributeType::StringArray, false}, + + {"NonDefaultedNonRequiredStringArray", MLOperatorAttributeType::StringArray, false}, + }; + + // Defaults. These are queried back during kernel creation, type and shape inference + // and tested against the same values + MLOperatorAttributeNameValue defaultAttributes[] = + { + {"DefaultedNonRequiredInt", MLOperatorAttributeType::Int, 1}, + {"DefaultedNonRequiredFloat", MLOperatorAttributeType::Float, 1}, + {"DefaultedNonRequiredString", MLOperatorAttributeType::String, 1}, + {"DefaultedNonRequiredIntArray", MLOperatorAttributeType::IntArray, 2}, + {"DefaultedNonRequiredFloatArray", MLOperatorAttributeType::FloatArray, 2}, + {"DefaultedNonRequiredStringArray", MLOperatorAttributeType::StringArray, 2}, + }; + + int64_t defaultInts[] = { 1, 2 }; + float defaultFloats[] = { 1.0f, 2.0f }; + const char* defaultStrings[] = { "1", "2" }; + defaultAttributes[0].ints = defaultInts; + defaultAttributes[1].floats = defaultFloats; + defaultAttributes[2].strings = defaultStrings; + defaultAttributes[3].ints = defaultInts; + defaultAttributes[4].floats = defaultFloats; + defaultAttributes[5].strings = defaultStrings; + + // Schema definition + MLOperatorSchemaDescription schemaDesc = {}; + schemaDesc.name = "Foo"; + schemaDesc.operatorSetVersionAtLastChange = 7; + schemaDesc.inputs = inputs; + schemaDesc.inputCount = 2; + schemaDesc.outputs = &outputParam; + schemaDesc.outputCount = 1; + schemaDesc.typeConstraints = constraints; + schemaDesc.typeConstraintCount = testCases[caseIndex].useTypeLabel ? 2 : 0; + schemaDesc.attributes = attributes; + schemaDesc.attributeCount = static_cast(std::size(attributes)); + + if (testCases[caseIndex].attributeDefaultsInSchema) + { + schemaDesc.defaultAttributes = defaultAttributes; + schemaDesc.defaultAttributeCount = static_cast(std::size(defaultAttributes)); + } + + Microsoft::WRL::ComPtr typeInferrer; + Microsoft::WRL::ComPtr shapeInferrer; + + // Type inference function + if (testCases[caseIndex].useTypeInference) + { + typeInferrer = wil::MakeOrThrow([](IMLOperatorTypeInferenceContext* ctx) -> void + { + VerifyTestAttributes(MLOperatorTypeInferenceContext(ctx)); + + MLOperatorEdgeDescription edgeDesc = {}; + edgeDesc.edgeType = MLOperatorEdgeType::Tensor; + edgeDesc.tensorDataType = MLOperatorTensorDataType::Float; + + MLOperatorTypeInferenceContext(ctx).SetOutputEdgeDescription(0, &edgeDesc); + }); + } + + // Store the shape inference context with a reference following the call to InferOutputShapes. + // This will be called after loading the model as an isolated test for how ABI context objects + // are "closed." + Microsoft::WRL::ComPtr shapeInferenceContext; + + // Shape inference is tested by truncating the output size + bool truncateOutput = testCases[caseIndex].truncateOutput; + if (truncateOutput) + { + shapeInferrer = wil::MakeOrThrow([&shapeInferenceContext](IMLOperatorShapeInferenceContext* ctx) -> void + { + VerifyTestAttributes(MLShapeInferenceContext(ctx)); + MLShapeInferenceContext(ctx).SetOutputTensorShape(0, { 2, 2 }); + shapeInferenceContext = ctx; + }); + } + + // Register the schema + MLOperatorSetId opsetId = { "", 7 }; + MLOperatorSchemaDescription* opSchemaDescs = &schemaDesc; + WINML_EXPECT_EQUAL(S_OK, registry->RegisterOperatorSetSchema( + &opsetId, + 1, + &opSchemaDescs, + 1, + typeInferrer.Get(), + testCases[caseIndex].useShapeInferenceInSchema ? shapeInferrer.Get() : nullptr + )); + + { + // Register a future version of the schema in the same domain, while setting its + // input count to zero to ensure it is not being used. + auto futureSchemaDesc = schemaDesc; + futureSchemaDesc.inputCount = 0; + + MLOperatorSetId id = { "", 9 }; + MLOperatorSchemaDescription* schemaDescs = &futureSchemaDesc; + WINML_EXPECT_EQUAL(S_OK, registry->RegisterOperatorSetSchema( + &id, + 7, + &schemaDescs, + 1, + typeInferrer.Get(), + testCases[caseIndex].useShapeInferenceInSchema ? shapeInferrer.Get() : nullptr + )); + } + { + // Register in another (unused) domain to the custom registry + auto otherSchemaDesc = schemaDesc; + otherSchemaDesc.inputCount = 0; + + MLOperatorSetId id = { "otherDomain", 7 }; + MLOperatorSchemaDescription* schemaDescs = &otherSchemaDesc; + WINML_EXPECT_EQUAL(S_OK, registry->RegisterOperatorSetSchema( + &id, + 1, + &schemaDescs, + 1, + typeInferrer.Get(), + testCases[caseIndex].useShapeInferenceInSchema ? shapeInferrer.Get() : nullptr + )); + } + // Register the Foo kernel + MLOperatorEdgeDescription floatTensorEdgeDesc = {}; + floatTensorEdgeDesc.edgeType = MLOperatorEdgeType::Tensor; + floatTensorEdgeDesc.tensorDataType = MLOperatorTensorDataType::Float; + + MLOperatorEdgeTypeConstrant kernelConstraint = { "T", &floatTensorEdgeDesc, 1 }; + + MLOperatorKernelDescription kernelDesc = + { + "", + "Foo", + 7, + MLOperatorExecutionType::Cpu, + &kernelConstraint, + 1 + }; + + if (!testCases[caseIndex].attributeDefaultsInSchema) + { + kernelDesc.defaultAttributes = defaultAttributes; + kernelDesc.defaultAttributeCount = static_cast(std::size(defaultAttributes)); + } + + if (!truncateOutput) + { + kernelDesc.options = MLOperatorKernelOptions::AllowDynamicInputShapes; + Microsoft::WRL::ComPtr factory = wil::MakeOrThrow(CreateABIFooKernel); + + WINML_EXPECT_EQUAL(S_OK, registry->RegisterOperatorKernel(&kernelDesc, factory.Get(), nullptr)); + } + else + { + Microsoft::WRL::ComPtr factory = wil::MakeOrThrow(CreateTruncatedABIFooKernel); + WINML_EXPECT_EQUAL(S_OK, registry->RegisterOperatorKernel( + &kernelDesc, + factory.Get(), + testCases[caseIndex].useShapeInferenceInKernel ? shapeInferrer.Get() : nullptr + )); + } + + // Prepare inputs + std::vector dimsX = { 3, 2 }; + std::vector valuesX = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f }; + + // Prepare expected inputs and outputs + std::vector expectedDimsY = { truncateOutput ? 2 : 3, 2 }; + // now the expected value should be Add's result. + std::vector expectedValuesY = { 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f }; + if (truncateOutput) + { + // The leading dimension is truncated, and the second dimension has two elements over that dim + expectedValuesY.resize(expectedValuesY.size() - 2); + } + + // Load the model and sessions + std::wstring fullPath = FileHelpers::GetModulePath() + (truncateOutput ? L"foo_truncated.onnx" : L"foo.onnx"); + LearningModel model = LearningModel::LoadFromFilePath(fullPath, operatorProvider); + LearningModelSession session(model); + + // Bind input and outputs + LearningModelBinding bindings(session); + + TensorFloat inputTensor = TensorFloat::CreateFromArray(dimsX, winrt::array_view(std::move(valuesX))); + bindings.Bind(winrt::hstring(L"X"), inputTensor); + + auto outputValue = TensorFloat::Create(); + WINML_EXPECT_NO_THROW(bindings.Bind(L"Y", outputValue)); + + // Evaluate the model + hstring correlationId; + WINML_EXPECT_NO_THROW(session.Evaluate(bindings, correlationId)); + + // Verify the result shape + WINML_EXPECT_EQUAL(expectedDimsY.size(), outputValue.Shape().Size()); + for (uint32_t j = 0; j < outputValue.Shape().Size(); j++) + { + WINML_EXPECT_EQUAL(expectedDimsY.at(j), outputValue.Shape().GetAt(j)); + } + + // Verify the result values + auto buffer = outputValue.GetAsVectorView(); + WINML_EXPECT_TRUE(buffer != nullptr); + WINML_EXPECT_TRUE(std::equal(expectedValuesY.cbegin(), expectedValuesY.cend(), begin(buffer))); + + // Release the model before operatorProvider goes out of scope + model = nullptr; + + if (shapeInferenceContext) + { + // Check that the shape inference context is closed and safely fails + MLOperatorEdgeDescription edgeDesc; + WINML_EXPECT_EQUAL(E_INVALIDARG, shapeInferenceContext->GetInputEdgeDescription(0, &edgeDesc)); + } + } +} + +const CustomOpsTestApi& getapi() { + static constexpr CustomOpsTestApi api = + { + CustomOpsScenarioTestSetup, + CustomOpsScenarioGpuTestSetup, + CustomOperatorFusion, + CustomKernelWithBuiltInSchema, + CustomKernelWithCustomSchema + }; + return api; +} \ No newline at end of file diff --git a/winml/test/scenario/cppwinrt/CustomOps.h b/winml/test/scenario/cppwinrt/CustomOps.h new file mode 100644 index 0000000000000..02447bac9d84f --- /dev/null +++ b/winml/test/scenario/cppwinrt/CustomOps.h @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test.h" +struct CustomOpsTestApi +{ + SetupTest CustomOpsScenarioTestSetup; + SetupTest CustomOpsScenarioGpuTestSetup; + VoidTest CustomOperatorFusion; + VoidTest CustomKernelWithBuiltInSchema; + VoidTest CustomKernelWithCustomSchema; +}; +const CustomOpsTestApi& getapi(); + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(CustomOpsScenarioTest, CustomOpsScenarioTestSetup) +WINML_TEST(CustomOpsScenarioTest, CustomKernelWithBuiltInSchema) +WINML_TEST(CustomOpsScenarioTest, CustomKernelWithCustomSchema) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(CustomOpsScenarioGpuTest, CustomOpsScenarioGpuTestSetup) +WINML_TEST(CustomOpsScenarioGpuTest, CustomOperatorFusion) +WINML_TEST_CLASS_END() \ No newline at end of file diff --git a/winml/test/scenario/cppwinrt/NoisyReluCpu.h b/winml/test/scenario/cppwinrt/NoisyReluCpu.h new file mode 100644 index 0000000000000..771554cdb9b26 --- /dev/null +++ b/winml/test/scenario/cppwinrt/NoisyReluCpu.h @@ -0,0 +1,298 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/dml/DmlExecutionProvider/inc/MLOperatorAuthor.h" +#include "core/common//common.h" + +struct NoisyReluShapeInferrer : winrt::implements +{ + STDMETHOD(InferOutputShapes)(IMLOperatorShapeInferenceContext* context) noexcept + { + try + { + uint32_t inputDimsSize; + context->GetInputTensorDimensionCount(0, &inputDimsSize); + + uint32_t *inputDims = new uint32_t[inputDimsSize]; + context->GetInputTensorShape(0, inputDimsSize, inputDims); + + context->SetOutputTensorShape(0, inputDimsSize, inputDims); + return S_OK; + } + catch (...) + { + return winrt::to_hresult(); + } + } +}; + +struct NoisyReluOperator: winrt::implements +{ + float m_mean; + float m_variance; + + NoisyReluOperator(float mean, float variance) : + m_mean(mean), + m_variance(variance) + {} + + // Computes the outputs of the kernel. This may be called multiple times + // simultaneously within the same instance of the class. Implementations + // of this method must be thread-safe. + STDMETHOD(Compute)(IMLOperatorKernelContext* context) + { + try + { + // Get the input tensor + winrt::com_ptr inputTensor; + context->GetInputTensor(0, inputTensor.put()); + + // Get the output tensor + winrt::com_ptr outputTensor; + context->GetOutputTensor(0, outputTensor.put()); + + // Get the input and output shape sizes + uint32_t inputDimsSize = inputTensor->GetDimensionCount(); + uint32_t outputDimsSize = outputTensor->GetDimensionCount(); + if (inputDimsSize != outputDimsSize) + { + return E_UNEXPECTED; + } + + // Get the input shape + std::vector inputDims(inputDimsSize); + outputTensor->GetShape(inputDimsSize, inputDims.data()); + + // Get the output shape + std::vector outputDims(outputDimsSize); + outputTensor->GetShape(outputDimsSize, outputDims.data()); + + // For the number of total elements in the input and output shapes + auto outputDataSize = std::accumulate(outputDims.begin(), outputDims.end(), 1, std::multiplies()); + auto inputDataSize = std::accumulate(inputDims.begin(), inputDims.end(), 1, std::multiplies()); + if (outputDataSize != inputDataSize) + { + return E_UNEXPECTED; + } + + // If the tensor types are both float type + if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float && + inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float) + { + // For cpu data + if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) + { + ComputeInternal(inputTensor.get(), outputTensor.get(), inputDataSize); + } + } + else if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double && + inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double) + { + // For cpu data + if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) + { + ComputeInternal(inputTensor.get(), outputTensor.get(), inputDataSize); + } + } + + return S_OK; + } + catch (...) + { + return winrt::to_hresult(); + } + } + + template + void ComputeInternal(IMLOperatorTensor* pInputTensor, IMLOperatorTensor* pOutputTensor, uint32_t size) + { + // Create a normal distribution + std::normal_distribution<> dist{ m_mean, m_variance }; + std::random_device rd{}; + std::mt19937 gen{ rd() }; + + auto inputData = static_cast(pInputTensor->GetData()); + auto outputData = static_cast(pOutputTensor->GetData()); + + for (uint32_t i = 0; i < size; i++) + { + outputData[i] = static_cast(std::max(0, static_cast(inputData[i] + dist(gen)))); + } + } +}; + +struct NoisyReluOperatorFactory : winrt::implements +{ + STDMETHOD(CreateKernel)( + IMLOperatorKernelCreationContext* context, + IMLOperatorKernel** kernel) + { + try + { + float mean; + context->GetAttribute("mean", MLOperatorAttributeType::Float, 1, sizeof(float), reinterpret_cast(&mean)); + float variance; + context->GetAttribute("variance", MLOperatorAttributeType::Float, 1, sizeof(float), reinterpret_cast(&variance)); + + auto noisyReluOperator = winrt::make(mean, variance); + noisyReluOperator.copy_to(kernel); + return S_OK; + } + catch (...) + { + return winrt::to_hresult(); + } + } + + static MLOperatorEdgeDescription CreateEdgeDescriptor(MLOperatorEdgeType type, MLOperatorTensorDataType dataType) + { + ORT_UNUSED_PARAMETER(type); + MLOperatorEdgeDescription desc; + desc.edgeType = MLOperatorEdgeType::Tensor; + desc.tensorDataType = dataType; + return desc; + } + + static void RegisterNoisyReluSchema(winrt::com_ptr registry) + { + MLOperatorSetId operatorSetId; + operatorSetId.domain = ""; + operatorSetId.version = 7; + + MLOperatorSchemaDescription noisyReluSchema; + noisyReluSchema.name = "NoisyRelu"; + noisyReluSchema.operatorSetVersionAtLastChange = 1; + + MLOperatorSchemaEdgeDescription noisyReluXInput; + noisyReluXInput.options = MLOperatorParameterOptions::Single; + noisyReluXInput.typeFormat = MLOperatorSchemaEdgeTypeFormat::Label; + noisyReluXInput.typeLabel = "T"; + + std::vector inputs { noisyReluXInput }; + noisyReluSchema.inputs = inputs.data(); + noisyReluSchema.inputCount = static_cast(inputs.size()); + + MLOperatorSchemaEdgeDescription noisyReluXOutput; + noisyReluXOutput.options = MLOperatorParameterOptions::Single; + noisyReluXOutput.typeFormat = MLOperatorSchemaEdgeTypeFormat::Label; + noisyReluXOutput.typeLabel = "T"; + + std::vector outputs{ noisyReluXOutput }; + noisyReluSchema.outputs = outputs.data(); + noisyReluSchema.outputCount = static_cast(outputs.size()); + + MLOperatorEdgeTypeConstrant typeConstraint; + typeConstraint.typeLabel = "T"; + std::vector allowedEdges + { + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16) + }; + typeConstraint.allowedTypes = allowedEdges.data(); + typeConstraint.allowedTypeCount = static_cast(allowedEdges.size()); + + std::vector typeConstraints { typeConstraint }; + noisyReluSchema.typeConstraints = typeConstraints.data(); + noisyReluSchema.typeConstraintCount = static_cast(typeConstraints.size()); + + + MLOperatorAttribute noisyReluMeanAttribute; + noisyReluMeanAttribute.name = "mean"; + noisyReluMeanAttribute.required = false; + noisyReluMeanAttribute.type = MLOperatorAttributeType::Float; + + MLOperatorAttribute noisyReluVarianceAttribute; + noisyReluVarianceAttribute.name = "variance"; + noisyReluVarianceAttribute.required = false; + noisyReluVarianceAttribute.type = MLOperatorAttributeType::Float; + + std::vector attributes { noisyReluMeanAttribute, noisyReluVarianceAttribute }; + noisyReluSchema.attributes = attributes.data(); + noisyReluSchema.attributeCount = static_cast(attributes.size()); + + MLOperatorAttributeNameValue noisyReluMeanAttributeValue; + noisyReluMeanAttributeValue.name = "mean"; + noisyReluMeanAttributeValue.type = MLOperatorAttributeType::Float; + noisyReluMeanAttributeValue.valueCount = 1; + static float defaultMeans[] = { 0.f }; + noisyReluMeanAttributeValue.floats = defaultMeans; + + MLOperatorAttributeNameValue noisyReluVarianceAttributeValue; + noisyReluVarianceAttributeValue.name = "variance"; + noisyReluVarianceAttributeValue.type = MLOperatorAttributeType::Float; + noisyReluVarianceAttributeValue.valueCount = 1; + static float defaultVariance[] = { 1.f }; + noisyReluVarianceAttributeValue.floats = defaultVariance; + + std::vector attributeDefaultValues { noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue }; + noisyReluSchema.defaultAttributes = attributeDefaultValues.data(); + noisyReluSchema.defaultAttributeCount = static_cast(attributeDefaultValues.size()); + + std::vector schemas { &noisyReluSchema }; + registry->RegisterOperatorSetSchema( + &operatorSetId, + 6 /* baseline version */, + schemas.data(), + static_cast(schemas.size()), + nullptr, + nullptr + ); + } + + static void RegisterNoisyReluKernel(winrt::com_ptr registry) + { + MLOperatorKernelDescription kernelDescription; + kernelDescription.domain = ""; + kernelDescription.name = "NoisyRelu"; + kernelDescription.minimumOperatorSetVersion = 1; + kernelDescription.executionType = MLOperatorExecutionType::Cpu; + + MLOperatorEdgeTypeConstrant typeConstraint; + typeConstraint.typeLabel = "T"; + std::vector allowedEdges + { + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16) + }; + typeConstraint.allowedTypes = allowedEdges.data(); + typeConstraint.allowedTypeCount = static_cast(allowedEdges.size()); + + std::vector typeConstraints{ typeConstraint }; + kernelDescription.typeConstraints = typeConstraints.data(); + kernelDescription.typeConstraintCount = static_cast(typeConstraints.size()); + + + MLOperatorAttributeNameValue noisyReluMeanAttributeValue; + noisyReluMeanAttributeValue.name = "mean"; + noisyReluMeanAttributeValue.type = MLOperatorAttributeType::Float; + noisyReluMeanAttributeValue.valueCount = 1; + static float defaultMeans[] = { 0.f }; + noisyReluMeanAttributeValue.floats = defaultMeans; + + MLOperatorAttributeNameValue noisyReluVarianceAttributeValue; + noisyReluVarianceAttributeValue.name = "variance"; + noisyReluVarianceAttributeValue.type = MLOperatorAttributeType::Float; + noisyReluVarianceAttributeValue.valueCount = 1; + static float defaultVariance[] = { 1.f }; + noisyReluVarianceAttributeValue.floats = defaultVariance; + + std::vector attributeDefaultValues{ noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue }; + kernelDescription.defaultAttributes = attributeDefaultValues.data(); + kernelDescription.defaultAttributeCount = static_cast(attributeDefaultValues.size()); + kernelDescription.options = MLOperatorKernelOptions::None; + kernelDescription.executionOptions = 0; + + auto factory = winrt::make(); + auto shareInferrer = winrt::make(); + + registry->RegisterOperatorKernel( + &kernelDescription, + factory.get(), + shareInferrer.get() + ); + } +}; diff --git a/winml/test/scenario/cppwinrt/ReluCpu.h b/winml/test/scenario/cppwinrt/ReluCpu.h new file mode 100644 index 0000000000000..44b59a5f1d07d --- /dev/null +++ b/winml/test/scenario/cppwinrt/ReluCpu.h @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/dml/DmlExecutionProvider/inc/MLOperatorAuthor.h" +#include "core/common//common.h" + +struct ReluShapeInferrer : winrt::implements +{ + STDMETHOD(InferOutputShapes)(IMLOperatorShapeInferenceContext* context) noexcept + { + uint32_t inputDimsSize; + context->GetInputTensorDimensionCount(0, &inputDimsSize); + + uint32_t *inputDims = new uint32_t[inputDimsSize]; + context->GetInputTensorShape(0, inputDimsSize, inputDims); + + context->SetOutputTensorShape(0, inputDimsSize, inputDims); + return S_OK; + } +}; + +struct ReluOperator: winrt::implements +{ + ReluOperator() {} + + // Computes the outputs of the kernel. In this case, the output will represent + // the Rectified Linear Unit (Relu) output. + // + // Based on the operators location in the model graph this operator may be called multiple times + // or simultaneously within the same instance of the class during evaluation. Implementations + // of this method must be thread-safe. + STDMETHOD(Compute)(IMLOperatorKernelContext* context) + { + // Get the input tensor + winrt::com_ptr inputTensor; + context->GetInputTensor(0, inputTensor.put()); + + // Get the output tensor + winrt::com_ptr outputTensor; + context->GetOutputTensor(0, outputTensor.put()); + + // Get the input and output shape sizes + uint32_t inputDimsSize = inputTensor->GetDimensionCount(); + uint32_t outputDimsSize = outputTensor->GetDimensionCount(); + if (inputDimsSize != outputDimsSize) + { + return E_UNEXPECTED; + } + + // Get the input shape + std::vector inputDims(inputDimsSize); + outputTensor->GetShape(inputDimsSize, inputDims.data()); + + // Get the output shape + std::vector outputDims(outputDimsSize); + outputTensor->GetShape(outputDimsSize, outputDims.data()); + + // For the number of total elements in the input and output shapes + auto outputDataSize = std::accumulate(outputDims.begin(), outputDims.end(), 1, std::multiplies()); + auto inputDataSize = std::accumulate(inputDims.begin(), inputDims.end(), 1, std::multiplies()); + if (outputDataSize != inputDataSize) + { + return E_UNEXPECTED; + } + + // If the tensor types are both float type + if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float && + inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float) + { + // For cpu data + if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) + { + ComputeInternal(inputTensor.get(), outputTensor.get(), inputDataSize); + } + } + else if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double && + inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double) + { + // For cpu data + if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) + { + ComputeInternal(inputTensor.get(), outputTensor.get(), inputDataSize); + } + } + + + return S_OK; + } + + template + void ComputeInternal(IMLOperatorTensor* pInputTensor, IMLOperatorTensor* pOutputTensor, uint32_t size) + { + auto inputData = static_cast(pInputTensor->GetData()); + auto outputData = static_cast(pOutputTensor->GetData()); + + for (uint32_t i = 0; i < size; i++) + { + outputData[i] = static_cast(std::max(0, inputData[i])); + } + } +}; + +struct ReluOperatorFactory : winrt::implements +{ + STDMETHOD(CreateKernel)( + IMLOperatorKernelCreationContext* context, + IMLOperatorKernel** kernel) + { + ORT_UNUSED_PARAMETER(context); + auto reluOperator = winrt::make(); + reluOperator.copy_to(kernel); + return S_OK; + } + + static MLOperatorEdgeDescription CreateEdgeDescriptor(MLOperatorEdgeType type, MLOperatorTensorDataType dataType) + { + ORT_UNUSED_PARAMETER(type); + MLOperatorEdgeDescription desc; + desc.edgeType = MLOperatorEdgeType::Tensor; + desc.tensorDataType = dataType; + return desc; + } + + static void RegisterReluKernel(winrt::com_ptr registry) + { + MLOperatorKernelDescription kernelDescription; + kernelDescription.domain = ""; + kernelDescription.name = "Relu"; + kernelDescription.minimumOperatorSetVersion = 1; + kernelDescription.executionType = MLOperatorExecutionType::Cpu; + + MLOperatorEdgeTypeConstrant typeConstraint; + typeConstraint.typeLabel = "T"; + std::vector allowedEdges + { + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float), + CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16) + }; + typeConstraint.allowedTypes = allowedEdges.data(); + typeConstraint.allowedTypeCount = static_cast(allowedEdges.size()); + + std::vector typeConstraints{ typeConstraint }; + kernelDescription.typeConstraints = typeConstraints.data(); + kernelDescription.typeConstraintCount = static_cast(typeConstraints.size()); + + kernelDescription.defaultAttributes = nullptr; + kernelDescription.defaultAttributeCount = 0; + kernelDescription.options = MLOperatorKernelOptions::None; + kernelDescription.executionOptions = 0; + + auto factory = winrt::make(); + auto shareInferrer = winrt::make(); + + registry->RegisterOperatorKernel( + &kernelDescription, + factory.get(), + shareInferrer.get() + ); + } +}; diff --git a/winml/test/scenario/cppwinrt/noisy_relu.onnx b/winml/test/scenario/cppwinrt/noisy_relu.onnx new file mode 100644 index 0000000000000..d83dddb035760 Binary files /dev/null and b/winml/test/scenario/cppwinrt/noisy_relu.onnx differ diff --git a/winml/test/scenario/cppwinrt/relu.onnx b/winml/test/scenario/cppwinrt/relu.onnx new file mode 100644 index 0000000000000..d9f25e904b886 --- /dev/null +++ b/winml/test/scenario/cppwinrt/relu.onnx @@ -0,0 +1,11 @@ +justoeck:0 + +XY"ReluZ +X + + +b +Y + + +B \ No newline at end of file diff --git a/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp new file mode 100644 index 0000000000000..366915bf8536a --- /dev/null +++ b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp @@ -0,0 +1,1456 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "testPch.h" + +#include + +#include "winrt/Windows.Devices.Enumeration.Pnp.h" +#include "winrt/Windows.Graphics.DirectX.Direct3D11.h" +#include "winrt/Windows.Media.Capture.h" +#include "winrt/Windows.Media.h" +#include "winrt/Windows.Security.Cryptography.Core.h" +#include "winrt/Windows.Security.Cryptography.h" +#include "winrt/Windows.Storage.h" +#include "winrt/Windows.Storage.Streams.h" + +// lame, but WinBase.h redefines this, which breaks winrt headers later +#ifdef GetCurrentTime +#undef GetCurrentTime +#endif +#include "CommonDeviceHelpers.h" +#include "CustomOperatorProvider.h" +#include "filehelpers.h" +#include "robuffer.h" +#include "scenariotestscppwinrt.h" +#include "Windows.AI.MachineLearning.Native.h" +#include "Windows.Graphics.DirectX.Direct3D11.interop.h" +#include "windows.ui.xaml.media.dxinterop.h" +#include "winrt/Windows.UI.Xaml.Controls.h" +#include "winrt/Windows.UI.Xaml.Media.Imaging.h" + +#include +#include +#include +#include +#include +#if __has_include("dxcore.h") +#define ENABLE_DXCORE 1 +#endif +#ifdef ENABLE_DXCORE +#include +#endif + +using namespace winrt; +using namespace winrt::Windows::AI::MachineLearning; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX; +using namespace ::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Storage; +using namespace winrt::Windows::Storage::Streams; +using namespace winrt::Windows::UI::Xaml::Media::Imaging; + +static void ScenarioCppWinrtTestSetup() { + init_apartment(); +} + +static void ScenarioCppWinrtGpuTestSetup() { + ScenarioCppWinrtTestSetup(); + GPUTEST +}; + +static void ScenarioCppWinrtGpuSkipEdgeCoreTestSetup() { + ScenarioCppWinrtGpuTestSetup(); + SKIP_EDGECORE +}; + +static void Sample1() { + LearningModel model = nullptr; + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromFilePath(filePath)); +} + +ILearningModelFeatureValue MakeTensor(const ITensorFeatureDescriptor& descriptor) { + auto dataType = descriptor.TensorKind(); + std::vector shape; + int64_t size = 1; + for (auto&& dim : descriptor.Shape()) { + if (dim == -1) dim = 1; + shape.push_back(dim); + size *= dim; + } + + switch (dataType) { + case TensorKind::Float: { + std::vector buffer; + buffer.resize(static_cast(size)); + auto ftv = TensorFloat::CreateFromIterable(shape, winrt::single_threaded_vector(std::move(buffer))); + return ftv; + } + default: + throw_hresult(E_NOTIMPL); + break; + } +} + +ILearningModelFeatureValue MakeImage(const IImageFeatureDescriptor& /*descriptor*/, winrt::Windows::Foundation::IInspectable data) { + VideoFrame videoFrame = nullptr; + if (data != nullptr) { + SoftwareBitmap sb = nullptr; + data.as(sb); + videoFrame = VideoFrame::CreateWithSoftwareBitmap(sb); + } else { + SoftwareBitmap sb = SoftwareBitmap(BitmapPixelFormat::Bgra8, 28, 28); + videoFrame = VideoFrame::CreateWithSoftwareBitmap(sb); + } + auto imageValue = ImageFeatureValue::CreateFromVideoFrame(videoFrame); + return imageValue; +} + +ILearningModelFeatureValue FeatureValueFromFeatureValueDescriptor(ILearningModelFeatureDescriptor descriptor, winrt::Windows::Foundation::IInspectable data = nullptr) { + auto kind = descriptor.Kind(); + switch (kind) { + case LearningModelFeatureKind::Image: { + ImageFeatureDescriptor imageDescriptor = nullptr; + descriptor.as(imageDescriptor); + return MakeImage(imageDescriptor, data); + } + case LearningModelFeatureKind::Map: + throw_hresult(E_NOTIMPL); + break; + case LearningModelFeatureKind::Sequence: + throw_hresult(E_NOTIMPL); + break; + case LearningModelFeatureKind::Tensor: { + TensorFeatureDescriptor tensorDescriptor = nullptr; + descriptor.as(tensorDescriptor); + return MakeTensor(tensorDescriptor); + } + default: + throw_hresult(E_INVALIDARG); + break; + } +} + +// helper method that populates a binding object with default data +static void BindFeatures(LearningModelBinding binding, IVectorView features) { + for (auto&& feature : features) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(feature); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(feature.Name(), featureValue); + } +} + +//! Scenario1 : Load , bind, eval a model using all the system defaults (easy path) +static void Scenario1LoadBindEvalDefault() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(input.Name(), featureValue); + } + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +//! Scenario2: Load a model from stream +// - winRT, and win32 +static void Scenario2LoadModelFromStream() { + // get a stream + std::wstring path = FileHelpers::GetModulePath() + L"model.onnx"; + auto storageFile = StorageFile::GetFileFromPathAsync(path).get(); + + // load the stream + Streams::IRandomAccessStreamReference streamref; + storageFile.as(streamref); + + // load a model + LearningModel model = nullptr; + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromStreamAsync(streamref).get()); + WINML_EXPECT_TRUE(model != nullptr); +} + +//! Scenario3: pass a SoftwareBitmap into a model +static void Scenario3SoftwareBitmapInputBinding() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + // load the SoftwareBitmap + SoftwareBitmap sb = FileHelpers::GetSoftwareBitmapFromFile(FileHelpers::GetModulePath() + L"fish.png"); + auto videoFrame = VideoFrame::CreateWithSoftwareBitmap(sb); + auto imageValue = ImageFeatureValue::CreateFromVideoFrame(videoFrame); + + WINML_EXPECT_NO_THROW(binding.Bind(input.Name(), imageValue)); + } + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +//! Scenario5: run an async eval +winrt::Windows::Foundation::IAsyncOperation DoEvalAsync() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(input.Name(), featureValue); + } + // run eval async + return session.EvaluateAsync(binding, L""); +} + +static void Scenario5AsyncEval() { + auto task = DoEvalAsync(); + + while (task.Status() == winrt::Windows::Foundation::AsyncStatus::Started) { + std::cout << "Waiting...\n"; + Sleep(30); + } + std::cout << "Done\n"; + WINML_EXPECT_NO_THROW(task.get()); +} + +//! Scenario6: use BindInputWithProperties - BitmapBounds, BitmapPixelFormat +// apparently this scenario is cut for rs5. - not cut, just rewprked. move props +// to the image value when that is checked in. +static void Scenario6BindWithProperties() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + SoftwareBitmap sb = SoftwareBitmap(BitmapPixelFormat::Bgra8, 224, 224); + auto videoFrame = VideoFrame::CreateWithSoftwareBitmap(sb); + auto imageValue = ImageFeatureValue::CreateFromVideoFrame(videoFrame); + + PropertySet propertySet; + + // make a BitmapBounds + BitmapBounds bounds; + bounds.X = 0; + bounds.Y = 0; + bounds.Height = 100; + bounds.Width = 100; + + auto bitmapsBoundsProperty = winrt::Windows::Foundation::PropertyValue::CreateUInt32Array({bounds.X, bounds.Y, bounds.Width, bounds.Height}); + // insert it in the property set + propertySet.Insert(L"BitmapBounds", bitmapsBoundsProperty); + + // make a BitmapPixelFormat + BitmapPixelFormat bitmapPixelFormat = BitmapPixelFormat::Bgra8; + // translate it to an int so it can be used as a PropertyValue; + int intFromBitmapPixelFormat = static_cast(bitmapPixelFormat); + auto bitmapPixelFormatProperty = winrt::Windows::Foundation::PropertyValue::CreateInt32(intFromBitmapPixelFormat); + // insert it in the property set + propertySet.Insert(L"BitmapPixelFormat", bitmapPixelFormatProperty); + + // bind with properties + WINML_EXPECT_NO_THROW(binding.Bind(input.Name(), imageValue, propertySet)); + } + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +//! Scenario7: run eval without creating a binding object +static void Scenario7EvalWithNoBind() { + auto map = winrt::single_threaded_map(); + + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // enumerate feature descriptors and create features (but don't bind them) + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + map.Insert(input.Name(), featureValue); + } + // run eval + WINML_EXPECT_NO_THROW(session.EvaluateFeaturesAsync(map, L"").get()); +} + +//! Scenario8: choose which device to run the model on - PreferredDeviceType, PreferredDevicePerformance, SetDeviceFromSurface, SetDevice +// create a session on the default device +static void Scenario8SetDeviceSampleDefault() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + LearningModelDevice anyDevice(LearningModelDeviceKind::Default); + LearningModelSession anySession(model, anyDevice); +} + +// create a session on the CPU device +static void Scenario8SetDeviceSampleCPU() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + LearningModelDevice cpuDevice(LearningModelDeviceKind::Cpu); + LearningModelSession cpuSession(model, cpuDevice); +} + +// create a session on the default DML device +static void Scenario8SetDeviceSampleDefaultDirectX() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + LearningModelDevice dmlDeviceDefault(LearningModelDeviceKind::DirectX); + LearningModelSession dmlSessionDefault(model, dmlDeviceDefault); +} + +// create a session on the DML device that provides best power +static void Scenario8SetDeviceSampleMinPower() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + LearningModelDevice dmlDeviceMinPower(LearningModelDeviceKind::DirectXMinPower); + LearningModelSession dmlSessionMinPower(model, dmlDeviceMinPower); +} + +// create a session on the DML device that provides best perf +static void Scenario8SetDeviceSampleMaxPerf() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + LearningModelDevice dmlDeviceMaxPerf(LearningModelDeviceKind::DirectXHighPerformance); + LearningModelSession dmlSessionMaxPerf(model, dmlDeviceMaxPerf); +} + +// create a session on the same device my camera is on +static void Scenario8SetDeviceSampleMyCameraDevice() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + auto devices = winrt::Windows::Devices::Enumeration::DeviceInformation::FindAllAsync(winrt::Windows::Devices::Enumeration::DeviceClass::VideoCapture).get(); + hstring deviceId; + if (devices.Size() > 0) { + auto device = devices.GetAt(0); + deviceId = device.Id(); + auto deviceName = device.Name(); + auto enabled = device.IsEnabled(); + std::cout << "Found device " << deviceName.c_str() << ", enabled = " << enabled << "\n"; + winrt::Windows::Media::Capture::MediaCapture captureManager; + winrt::Windows::Media::Capture::MediaCaptureInitializationSettings settings; + settings.VideoDeviceId(deviceId); + captureManager.InitializeAsync(settings).get(); + auto mediaCaptureSettings = captureManager.MediaCaptureSettings(); + auto direct3D11Device = mediaCaptureSettings.Direct3D11Device(); + LearningModelDevice dmlDeviceCamera = LearningModelDevice::CreateFromDirect3D11Device(direct3D11Device); + LearningModelSession dmlSessionCamera(model, dmlDeviceCamera); + } else { + WINML_SKIP_TEST("Test skipped because video capture device is missing"); + } +} + +// create a device from D3D11 Device +static void Scenario8SetDeviceSampleD3D11Device() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + com_ptr pD3D11Device = nullptr; + com_ptr pContext = nullptr; + D3D_FEATURE_LEVEL fl; + HRESULT result = D3D11CreateDevice( + nullptr, D3D_DRIVER_TYPE::D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, nullptr, 0, + D3D11_SDK_VERSION, pD3D11Device.put(), &fl, pContext.put()); + if (FAILED(result)) { + WINML_SKIP_TEST("Test skipped because d3d11 device is missing"); + } + + // get dxgiDevice from d3ddevice + com_ptr pDxgiDevice; + pD3D11Device.get()->QueryInterface(pDxgiDevice.put()); + + com_ptr<::IInspectable> pInspectable; + CreateDirect3D11DeviceFromDXGIDevice(pDxgiDevice.get(), pInspectable.put()); + + LearningModelDevice device = LearningModelDevice::CreateFromDirect3D11Device( + pInspectable.as()); + LearningModelSession session(model, device); +} + +// create a session on the a specific dx device that I chose some other way , note we have to use native interop here and pass a cmd queue +static void Scenario8SetDeviceSampleCustomCommandQueue() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + com_ptr pD3D12Device = nullptr; + CommonDeviceHelpers::AdapterEnumerationSupport support; + if (FAILED(CommonDeviceHelpers::GetAdapterEnumerationSupport(&support))) { + WINML_LOG_ERROR("Unable to load DXGI or DXCore"); + return; + } + HRESULT result = S_OK; + if (support.has_dxgi) { + WINML_EXPECT_NO_THROW(result = D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_12_0, __uuidof(ID3D12Device), reinterpret_cast(pD3D12Device.put()))); + } +#ifdef ENABLE_DXCORE + if (support.has_dxgi == false) { + com_ptr spFactory; + DXCoreCreateAdapterFactory(IID_PPV_ARGS(spFactory.put())); + const GUID gpuFilter[] = {DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS}; + com_ptr spAdapterList; + spFactory->CreateAdapterList(1, gpuFilter, IID_PPV_ARGS(spAdapterList.put())); + com_ptr spAdapter; + WINML_EXPECT_NO_THROW(spAdapterList->GetAdapter(0, IID_PPV_ARGS(spAdapter.put()))); + ::IUnknown* pAdapter = spAdapter.get(); + WINML_EXPECT_NO_THROW(result = D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_12_0, __uuidof(ID3D12Device), reinterpret_cast(pD3D12Device.put()))); + } +#endif + + if (FAILED(result)) { + WINML_SKIP_TEST("Test skipped because d3d12 device is missing"); + return; + } + com_ptr dxQueue = nullptr; + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {}; + commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), reinterpret_cast(&dxQueue)); + auto factory = get_activation_factory(); + + com_ptr<::IUnknown> spUnk; + factory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put()); + + auto dmlDeviceCustom = spUnk.as(); + LearningModelSession dmlSessionCustom(model, dmlDeviceCustom); +} + +//pass a Tensor in as an input GPU +static void Scenario9LoadBindEvalInputTensorGPU() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"fns-candy.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + + com_ptr pD3D12Device; + WINML_EXPECT_NO_THROW(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), pD3D12Device.put_void())); + com_ptr dxQueue; + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {}; + commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), dxQueue.put_void()); + auto devicefactory = get_activation_factory(); + auto tensorfactory = get_activation_factory(); + + com_ptr<::IUnknown> spUnk; + WINML_EXPECT_NO_THROW(devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put())); + + LearningModelDevice dmlDeviceCustom = nullptr; + WINML_EXPECT_NO_THROW(spUnk.as(dmlDeviceCustom)); + LearningModelSession dmlSessionCustom = nullptr; + WINML_EXPECT_NO_THROW(dmlSessionCustom = LearningModelSession(model, dmlDeviceCustom)); + + LearningModelBinding modelBinding(dmlSessionCustom); + + UINT64 bufferbytesize = 720 * 720 * 3 * sizeof(float); + D3D12_HEAP_PROPERTIES heapProperties = { + D3D12_HEAP_TYPE_DEFAULT, + D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + D3D12_MEMORY_POOL_UNKNOWN, + 0, + 0}; + D3D12_RESOURCE_DESC resourceDesc = { + D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + bufferbytesize, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + com_ptr pGPUResource = nullptr; + pD3D12Device->CreateCommittedResource( + &heapProperties, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + __uuidof(ID3D12Resource), + pGPUResource.put_void()); + com_ptr<::IUnknown> spUnkTensor; + TensorFloat input1imagetensor(nullptr); + __int64 shape[4] = {1, 3, 720, 720}; + tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), shape, 4, spUnkTensor.put()); + spUnkTensor.try_as(input1imagetensor); + + auto feature = model.InputFeatures().First(); + WINML_EXPECT_NO_THROW(modelBinding.Bind(feature.Current().Name(), input1imagetensor)); + + auto outputtensordescriptor = model.OutputFeatures().First().Current().as(); + auto outputtensorshape = outputtensordescriptor.Shape(); + VideoFrame outputimage( + BitmapPixelFormat::Rgba8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2))); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + + WINML_EXPECT_NO_THROW(modelBinding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // Testing GetAsD3D12Resource + com_ptr pReturnedResource; + input1imagetensor.as()->GetD3D12Resource(pReturnedResource.put()); + WINML_EXPECT_EQUAL(pReturnedResource.get(), pGPUResource.get()); + + // Evaluate the model + winrt::hstring correlationId; + dmlSessionCustom.EvaluateAsync(modelBinding, correlationId).get(); +} + +static void Scenario13SingleModelOnCPUandGPU() { + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + LearningModelSession cpuSession(model, LearningModelDevice(LearningModelDeviceKind::Cpu)); + LearningModelSession gpuSession(model, LearningModelDevice(LearningModelDeviceKind::DirectX)); + + LearningModelBinding cpuBinding(cpuSession); + LearningModelBinding gpuBinding(gpuSession); + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto cpuFeatureValue = FeatureValueFromFeatureValueDescriptor(input); + cpuBinding.Bind(input.Name(), cpuFeatureValue); + + auto gpuFeatureValue = FeatureValueFromFeatureValueDescriptor(input); + gpuBinding.Bind(input.Name(), gpuFeatureValue); + } + + auto cpuTask = cpuSession.EvaluateAsync(cpuBinding, L"cpu"); + auto gpuTask = gpuSession.EvaluateAsync(gpuBinding, L"gpu"); + + WINML_EXPECT_NO_THROW(cpuTask.get()); + WINML_EXPECT_NO_THROW(gpuTask.get()); +} + +// Validates when binding input image with free dimensions, the binding step is executed correctly. +static void Scenario11FreeDimensionsTensor() { + std::wstring filePath = FileHelpers::GetModulePath() + L"free_dimensional_image_input.onnx"; + // load a model with expected input size: -1 x -1 + auto model = LearningModel::LoadFromFilePath(filePath); + auto session = LearningModelSession(model); + auto binding = LearningModelBinding(session); + + VideoFrame inputImage(BitmapPixelFormat::Rgba8, 1000, 1000); + ImageFeatureValue inputimagetensor = ImageFeatureValue::CreateFromVideoFrame(inputImage); + + auto feature = model.InputFeatures().First(); + binding.Bind(feature.Current().Name(), inputimagetensor); + feature.MoveNext(); + binding.Bind(feature.Current().Name(), inputimagetensor); + + session.Evaluate(binding, L""); +} + +static void Scenario11FreeDimensionsImage() { + std::wstring filePath = FileHelpers::GetModulePath() + L"free_dimensional_imageDes.onnx"; + // load a model with expected input size: -1 x -1 + auto model = LearningModel::LoadFromFilePath(filePath); + auto session = LearningModelSession(model); + auto binding = LearningModelBinding(session); + + VideoFrame inputImage(BitmapPixelFormat::Bgra8, 1000, 1000); + ImageFeatureValue inputimagetensor = ImageFeatureValue::CreateFromVideoFrame(inputImage); + + auto feature = model.InputFeatures().First(); + ImageFeatureDescriptor imageDescriptor = nullptr; + feature.Current().as(imageDescriptor); + binding.Bind(feature.Current().Name(), inputimagetensor); + + feature.MoveNext(); + feature.Current().as(imageDescriptor); + binding.Bind(feature.Current().Name(), inputimagetensor); + + session.Evaluate(binding, L""); +} + +struct SwapChainEntry { + LearningModelSession session; + LearningModelBinding binding; + winrt::Windows::Foundation::IAsyncOperation activetask; + SwapChainEntry() : session(nullptr), binding(nullptr), activetask(nullptr) {} +}; +void SubmitEval(LearningModel model, SwapChainEntry* sessionBindings, int swapchaindex) { + if (sessionBindings[swapchaindex].activetask != nullptr) { + //make sure the previously submitted work for this swapchain index is complete before reusing resources + sessionBindings[swapchaindex].activetask.get(); + } + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + // set an actual buffer here. we're using uninitialized data for simplicity. + sessionBindings[swapchaindex].binding.Bind(input.Name(), featureValue); + } + // submit an eval and wait for it to finish submitting work + sessionBindings[swapchaindex].activetask = sessionBindings[swapchaindex].session.EvaluateAsync(sessionBindings[swapchaindex].binding, L"0"); + // return without waiting for the submit to finish, setup the completion handler +} + +//Scenario14:Load single model, run it mutliple times on a single gpu device using a fast swapchain pattern +static void Scenario14RunModelSwapchain() { + const int swapchainentrycount = 3; + SwapChainEntry sessionBindings[swapchainentrycount]; + + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on gpu1 + LearningModelDevice dmlDevice = LearningModelDevice(LearningModelDeviceKind::DirectX); + // create the swapchain style bindings to cycle through + for (int i = 0; i < swapchainentrycount; i++) { + sessionBindings[i].session = LearningModelSession(model, dmlDevice); + sessionBindings[i].binding = LearningModelBinding(sessionBindings[i].session); + } + + //submit 10 evaluations to 3 swapchain entries + int swapchaindex = 0; + for (int i = 0; i < 10; i++) { + swapchaindex = swapchaindex % swapchainentrycount; + SubmitEval(model, sessionBindings, (swapchaindex)++); + } + + //wait for all work to be completed + for (int i = 0; i < swapchainentrycount; i++) { + if (sessionBindings[i].activetask != nullptr) { + //make sure the previously submitted work for this swapchain index is compolete before resuing resources + sessionBindings[i].activetask.get(); + } + } +} +static void LoadBindEval_CustomOperator_CPU(const wchar_t* fileName) { + auto customOperatorProvider = winrt::make(); + auto provider = customOperatorProvider.as(); + + LearningModel model = LearningModel::LoadFromFilePath(fileName, provider); + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + LearningModelBinding bindings(session); + + auto inputShape = std::vector{5}; + auto inputData = std::vector{-50.f, -25.f, 0.f, 25.f, 50.f}; + auto inputValue = + TensorFloat::CreateFromIterable( + inputShape, + single_threaded_vector(std::move(inputData)).GetView()); + WINML_EXPECT_NO_THROW(bindings.Bind(L"X", inputValue)); + + auto outputValue = TensorFloat::Create(); + WINML_EXPECT_NO_THROW(bindings.Bind(L"Y", outputValue)); + + hstring correlationId; + WINML_EXPECT_NO_THROW(session.Evaluate(bindings, correlationId)); + + auto buffer = outputValue.GetAsVectorView(); + WINML_EXPECT_TRUE(buffer != nullptr); +} + +//! Scenario17 : Control the dev diagnostics features of WinML Tracing +static void Scenario17DevDiagnostics() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(input.Name(), featureValue); + } + session.EvaluationProperties().Insert(L"EnableDebugOutput", nullptr); + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +/** + * Custom Operator Tests are labeled as GPU tests because the DML code is interlaced with the custom op code + * even though CPU custom ops shouldn't be dependent on GPU functionality. + * These should be reclassed to ScenarioCppWinrt once the DML code is decoupled from the custom op code. +**/ +// create a session that loads a model with a branch new operator, register the custom operator, and load/bind/eval +static void Scenario20aLoadBindEvalCustomOperatorCPU() { + std::wstring filePath = FileHelpers::GetModulePath() + L"noisy_relu.onnx"; + LoadBindEval_CustomOperator_CPU(filePath.c_str()); +} + +// create a session that loads a model with an overridden operator, register the replacement custom operator, and load/bind/eval +static void Scenario20bLoadBindEvalReplacementCustomOperatorCPU() { + std::wstring filePath = FileHelpers::GetModulePath() + L"relu.onnx"; + LoadBindEval_CustomOperator_CPU(filePath.c_str()); +} + +//! Scenario21: Load two models, set them up to run chained after one another on the same gpu hardware device +static void Scenario21RunModel2ChainZ() { + // load a model, TODO: get a model that has an image descriptor + std::wstring filePath = FileHelpers::GetModulePath() + L"fns-candy.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create both session on the default gpu + LearningModelSession session1(model, LearningModelDevice(LearningModelDeviceKind::DirectX)); + LearningModelSession session2(model, LearningModelDevice(LearningModelDeviceKind::DirectX)); + // create both binding sets + LearningModelBinding binding1(session1); + LearningModelBinding binding2(session2); + // get the input descriptor + auto input = model.InputFeatures().GetAt(0); + // load a SoftwareBitmap + auto sb = FileHelpers::GetSoftwareBitmapFromFile(FileHelpers::GetModulePath() + L"fish_720.png"); + auto videoFrame = VideoFrame::CreateWithSoftwareBitmap(sb); + // bind it + binding1.Bind(input.Name(), videoFrame); + // get the output descriptor + auto output = model.OutputFeatures().GetAt(0); + // create an empty output tensor since we don't want the first model to detensorize into an image. + + std::vector shape = {1, 3, 720, 720}; + auto outputValue = TensorFloat::Create(shape); // FeatureValueFromFeatureValueDescriptor(input, nullptr); + // now bind the(empty) output so we have a marker to chain with + binding1.Bind(output.Name(), outputValue); + // and leave the output unbound on the second model, we will fetch it later + // run both models async + WINML_EXPECT_NO_THROW(session1.EvaluateAsync(binding1, L"")); + + // now bind that output to the next models input + binding2.Bind(input.Name(), outputValue); + + //eval the second model + auto session2AsyncOp = session2.EvaluateAsync(binding2, L""); + + // now get the output don't wait, queue up the next model + auto finalOutput = session2AsyncOp.get().Outputs().First().Current().Value(); +} + +bool VerifyHelper(ImageFeatureValue actual, ImageFeatureValue expected) { + auto softwareBitmapActual = actual.VideoFrame().SoftwareBitmap(); + auto softwareBitmapExpected = expected.VideoFrame().SoftwareBitmap(); + WINML_EXPECT_EQUAL(softwareBitmapActual.PixelHeight(), softwareBitmapExpected.PixelHeight()); + WINML_EXPECT_EQUAL(softwareBitmapActual.PixelWidth(), softwareBitmapExpected.PixelWidth()); + WINML_EXPECT_EQUAL(softwareBitmapActual.BitmapPixelFormat(), softwareBitmapExpected.BitmapPixelFormat()); + + // 4 means 4 channels + uint32_t size = 4 * softwareBitmapActual.PixelHeight() * softwareBitmapActual.PixelWidth(); + + winrt::Windows::Storage::Streams::Buffer actualOutputBuffer(size); + winrt::Windows::Storage::Streams::Buffer expectedOutputBuffer(size); + + softwareBitmapActual.CopyToBuffer(actualOutputBuffer); + softwareBitmapExpected.CopyToBuffer(expectedOutputBuffer); + + byte* actualBytes; + actualOutputBuffer.try_as<::Windows::Storage::Streams::IBufferByteAccess>()->Buffer(&actualBytes); + byte* expectedBytes; + expectedOutputBuffer.try_as<::Windows::Storage::Streams::IBufferByteAccess>()->Buffer(&expectedBytes); + + byte* pActualByte = actualBytes; + byte* pExpectedByte = expectedBytes; + + // hard code, might need to be modified later. + const float cMaxErrorRate = 0.06f; + byte epsilon = 20; + + UINT errors = 0; + for (uint32_t i = 0; i < size; i++, pActualByte++, pExpectedByte++) { + auto diff = (*pActualByte - *pExpectedByte); + if (diff > epsilon) { + errors++; + } + } + std::cout << "total errors is " << errors << "/" << size << ", errors rate is " << (float)errors / size << "\n"; + + return ((float)errors / size < cMaxErrorRate); +} + +static void Scenario22ImageBindingAsCPUTensor() { + std::wstring modulePath = FileHelpers::GetModulePath(); + std::wstring inputImagePath = modulePath + L"fish_720.png"; + std::wstring bmImagePath = modulePath + L"bm_fish_720.jpg"; + std::wstring modelPath = modulePath + L"fns-candy.onnx"; + + auto device = LearningModelDevice(LearningModelDeviceKind::Default); + auto model = LearningModel::LoadFromFilePath(modelPath); + auto session = LearningModelSession(model, device); + auto binding = LearningModelBinding(session); + + SoftwareBitmap softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(inputImagePath); + softwareBitmap = SoftwareBitmap::Convert(softwareBitmap, BitmapPixelFormat::Bgra8); + + // Put softwareBitmap into buffer + BYTE* pData = nullptr; + UINT32 size = 0; + winrt::Windows::Graphics::Imaging::BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(winrt::Windows::Graphics::Imaging::BitmapBufferAccessMode::Read)); + winrt::Windows::Foundation::IMemoryBufferReference reference = spBitmapBuffer.CreateReference(); + auto spByteAccess = reference.as<::Windows::Foundation::IMemoryBufferByteAccess>(); + spByteAccess->GetBuffer(&pData, &size); + + std::vector shape = {1, 3, softwareBitmap.PixelHeight(), softwareBitmap.PixelWidth()}; + float* pCPUTensor; + uint32_t uCapacity; + TensorFloat tf = TensorFloat::Create(shape); + com_ptr itn = tf.as(); + itn->GetBuffer(reinterpret_cast(&pCPUTensor), &uCapacity); + + uint32_t height = softwareBitmap.PixelHeight(); + uint32_t width = softwareBitmap.PixelWidth(); + for (UINT32 i = 0; i < size; i += 4) { + UINT32 pixelInd = i / 4; + pCPUTensor[pixelInd] = (float)pData[i]; + pCPUTensor[(height * width) + pixelInd] = (float)pData[i + 1]; + pCPUTensor[(height * width * 2) + pixelInd] = (float)pData[i + 2]; + } + + // Bind input + binding.Bind(model.InputFeatures().First().Current().Name(), tf); + + // Bind output + auto outputtensordescriptor = model.OutputFeatures().First().Current().as(); + auto outputtensorshape = outputtensordescriptor.Shape(); + VideoFrame outputimage( + BitmapPixelFormat::Bgra8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2))); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + WINML_EXPECT_NO_THROW(binding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // Evaluate the model + winrt::hstring correlationId; + WINML_EXPECT_NO_THROW(session.EvaluateAsync(binding, correlationId).get()); + + // Verify the output by comparing with the benchmark image + SoftwareBitmap bm_softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(bmImagePath); + bm_softwareBitmap = SoftwareBitmap::Convert(bm_softwareBitmap, BitmapPixelFormat::Bgra8); + VideoFrame bm_videoFrame = VideoFrame::CreateWithSoftwareBitmap(bm_softwareBitmap); + ImageFeatureValue bm_imagevalue = ImageFeatureValue::CreateFromVideoFrame(bm_videoFrame); + WINML_EXPECT_TRUE(VerifyHelper(bm_imagevalue, outputTensor)); + + // check the output video frame object by saving output image to disk + std::wstring outputDataImageFileName = L"out_cpu_tensor_fish_720.jpg"; + StorageFolder currentfolder = StorageFolder::GetFolderFromPathAsync(modulePath).get(); + StorageFile outimagefile = currentfolder.CreateFileAsync(outputDataImageFileName, CreationCollisionOption::ReplaceExisting).get(); + IRandomAccessStream writestream = outimagefile.OpenAsync(FileAccessMode::ReadWrite).get(); + BitmapEncoder encoder = BitmapEncoder::CreateAsync(BitmapEncoder::JpegEncoderId(), writestream).get(); + // Set the software bitmap + encoder.SetSoftwareBitmap(outputimage.SoftwareBitmap()); + encoder.FlushAsync().get(); +} + +static void Scenario22ImageBindingAsGPUTensor() { + std::wstring modulePath = FileHelpers::GetModulePath(); + std::wstring inputImagePath = modulePath + L"fish_720.png"; + std::wstring bmImagePath = modulePath + L"bm_fish_720.jpg"; + std::wstring modelPath = modulePath + L"fns-candy.onnx"; + std::wstring outputDataImageFileName = L"out_gpu_tensor_fish_720.jpg"; + + SoftwareBitmap softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(inputImagePath); + softwareBitmap = SoftwareBitmap::Convert(softwareBitmap, BitmapPixelFormat::Bgra8); + + // Put softwareBitmap into cpu buffer + BYTE* pData = nullptr; + UINT32 size = 0; + winrt::Windows::Graphics::Imaging::BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(winrt::Windows::Graphics::Imaging::BitmapBufferAccessMode::Read)); + winrt::Windows::Foundation::IMemoryBufferReference reference = spBitmapBuffer.CreateReference(); + auto spByteAccess = reference.as<::Windows::Foundation::IMemoryBufferByteAccess>(); + spByteAccess->GetBuffer(&pData, &size); + + std::vector shape = {1, 3, softwareBitmap.PixelHeight(), softwareBitmap.PixelWidth()}; + FLOAT* pCPUTensor; + uint32_t uCapacity; + + // CPU tensorization + TensorFloat tf = TensorFloat::Create(shape); + com_ptr itn = tf.as(); + itn->GetBuffer(reinterpret_cast(&pCPUTensor), &uCapacity); + + uint32_t height = softwareBitmap.PixelHeight(); + uint32_t width = softwareBitmap.PixelWidth(); + for (UINT32 i = 0; i < size; i += 4) { + UINT32 pixelInd = i / 4; + pCPUTensor[pixelInd] = (FLOAT)pData[i]; + pCPUTensor[(height * width) + pixelInd] = (FLOAT)pData[i + 1]; + pCPUTensor[(height * width * 2) + pixelInd] = (FLOAT)pData[i + 2]; + } + + // create the d3d device. + com_ptr pD3D12Device = nullptr; + WINML_EXPECT_NO_THROW(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), reinterpret_cast(&pD3D12Device))); + + // create the command queue. + com_ptr dxQueue = nullptr; + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {}; + commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), reinterpret_cast(&dxQueue)); + auto devicefactory = get_activation_factory(); + auto tensorfactory = get_activation_factory(); + com_ptr<::IUnknown> spUnk; + devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put()); + + LearningModel model(nullptr); + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromFilePath(modelPath)); + LearningModelDevice dmlDeviceCustom = nullptr; + WINML_EXPECT_NO_THROW(spUnk.as(dmlDeviceCustom)); + LearningModelSession dmlSessionCustom = nullptr; + WINML_EXPECT_NO_THROW(dmlSessionCustom = LearningModelSession(model, dmlDeviceCustom)); + LearningModelBinding modelBinding = nullptr; + WINML_EXPECT_NO_THROW(modelBinding = LearningModelBinding(dmlSessionCustom)); + + // Create ID3D12GraphicsCommandList and Allocator + D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type; + com_ptr alloctor; + com_ptr cmdList; + + pD3D12Device->CreateCommandAllocator( + queuetype, + winrt::guid_of(), + alloctor.put_void()); + + pD3D12Device->CreateCommandList( + 0, + queuetype, + alloctor.get(), + nullptr, + winrt::guid_of(), + cmdList.put_void()); + + // Create Committed Resource + // 3 is number of channels we use. R G B without alpha. + UINT64 bufferbytesize = 3 * sizeof(float) * softwareBitmap.PixelWidth() * softwareBitmap.PixelHeight(); + D3D12_HEAP_PROPERTIES heapProperties = { + D3D12_HEAP_TYPE_DEFAULT, + D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + D3D12_MEMORY_POOL_UNKNOWN, + 0, + 0}; + D3D12_RESOURCE_DESC resourceDesc = { + D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + bufferbytesize, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + com_ptr pGPUResource = nullptr; + com_ptr imageUploadHeap; + pD3D12Device->CreateCommittedResource( + &heapProperties, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + __uuidof(ID3D12Resource), + pGPUResource.put_void()); + + // Create the GPU upload buffer. + CD3DX12_HEAP_PROPERTIES props(D3D12_HEAP_TYPE_UPLOAD); + auto buffer = CD3DX12_RESOURCE_DESC::Buffer(bufferbytesize); + WINML_EXPECT_NO_THROW(pD3D12Device->CreateCommittedResource( + &props, + D3D12_HEAP_FLAG_NONE, + &buffer, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + __uuidof(ID3D12Resource), + imageUploadHeap.put_void())); + + // Copy from Cpu to GPU + D3D12_SUBRESOURCE_DATA CPUData = {}; + CPUData.pData = reinterpret_cast(pCPUTensor); + CPUData.RowPitch = static_cast(bufferbytesize); + CPUData.SlicePitch = static_cast(bufferbytesize); + UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData); + + // Close the command list and execute it to begin the initial GPU setup. + WINML_EXPECT_NO_THROW(cmdList->Close()); + ID3D12CommandList* ppCommandLists[] = {cmdList.get()}; + dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // GPU tensorize + com_ptr<::IUnknown> spUnkTensor; + TensorFloat input1imagetensor(nullptr); + __int64 shapes[4] = {1, 3, softwareBitmap.PixelWidth(), softwareBitmap.PixelHeight()}; + tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), shapes, 4, spUnkTensor.put()); + spUnkTensor.try_as(input1imagetensor); + + auto feature = model.InputFeatures().First(); + WINML_EXPECT_NO_THROW(modelBinding.Bind(feature.Current().Name(), input1imagetensor)); + + auto outputtensordescriptor = model.OutputFeatures().First().Current().as(); + auto outputtensorshape = outputtensordescriptor.Shape(); + VideoFrame outputimage( + BitmapPixelFormat::Rgba8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2))); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + + WINML_EXPECT_NO_THROW(modelBinding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // Evaluate the model + winrt::hstring correlationId; + dmlSessionCustom.EvaluateAsync(modelBinding, correlationId).get(); + + // Verify the output by comparing with the benchmark image + SoftwareBitmap bm_softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(bmImagePath); + bm_softwareBitmap = SoftwareBitmap::Convert(bm_softwareBitmap, BitmapPixelFormat::Rgba8); + VideoFrame bm_videoFrame = VideoFrame::CreateWithSoftwareBitmap(bm_softwareBitmap); + ImageFeatureValue bm_imagevalue = ImageFeatureValue::CreateFromVideoFrame(bm_videoFrame); + WINML_EXPECT_TRUE(VerifyHelper(bm_imagevalue, outputTensor)); + + //check the output video frame object + StorageFolder currentfolder = StorageFolder::GetFolderFromPathAsync(modulePath).get(); + StorageFile outimagefile = currentfolder.CreateFileAsync(outputDataImageFileName, CreationCollisionOption::ReplaceExisting).get(); + IRandomAccessStream writestream = outimagefile.OpenAsync(FileAccessMode::ReadWrite).get(); + BitmapEncoder encoder = BitmapEncoder::CreateAsync(BitmapEncoder::JpegEncoderId(), writestream).get(); + // Set the software bitmap + encoder.SetSoftwareBitmap(outputimage.SoftwareBitmap()); + encoder.FlushAsync().get(); +} + +static void QuantizedModels() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"onnxzoo_lotus_inception_v1-dq.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the default device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::Default)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(input.Name(), featureValue); + } + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, filePath)); +} + +static void MsftQuantizedModels() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"coreml_Resnet50_ImageNet-dq.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::DirectX)); + // create a binding set + LearningModelBinding binding(session); + // bind the input and the output buffers by name + + std::wstring fullImagePath = FileHelpers::GetModulePath() + L"kitten_224.png"; + StorageFile imagefile = StorageFile::GetFileFromPathAsync(fullImagePath).get(); + IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get(); + + auto inputs = model.InputFeatures(); + for (auto&& input : inputs) { + auto featureValue = FeatureValueFromFeatureValueDescriptor(input, softwareBitmap); + // set an actual buffer here. we're using uninitialized data for simplicity. + binding.Bind(input.Name(), featureValue); + } + // run eval + WINML_EXPECT_NO_THROW(session.Evaluate(binding, filePath)); +} + +static void SyncVsAsync() { + // create model, device and session + LearningModel model = nullptr; + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromFilePath(FileHelpers::GetModulePath() + L"fns-candy.onnx")); + + LearningModelSession session = nullptr; + WINML_EXPECT_NO_THROW(session = LearningModelSession(model, LearningModelDevice(LearningModelDeviceKind::DirectX))); + + // create the binding + LearningModelBinding modelBinding(session); + + // bind the input + std::wstring fullImagePath = FileHelpers::GetModulePath() + L"fish_720.png"; + StorageFile imagefile = StorageFile::GetFileFromPathAsync(fullImagePath).get(); + IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get(); + VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + + auto imagetensor = ImageFeatureValue::CreateFromVideoFrame(frame); + auto inputFeatureDescriptor = model.InputFeatures().First(); + WINML_EXPECT_NO_THROW(modelBinding.Bind(inputFeatureDescriptor.Current().Name(), imagetensor)); + + UINT N = 20; + + auto outputtensordescriptor = model.OutputFeatures().First().Current().as(); + auto outputtensorshape = outputtensordescriptor.Shape(); + VideoFrame outputimage( + BitmapPixelFormat::Rgba8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2))); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + WINML_EXPECT_NO_THROW(modelBinding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // evaluate N times synchronously and time it + auto startSync = std::chrono::high_resolution_clock::now(); + for (UINT i = 0; i < N; i++) { + session.Evaluate(modelBinding, L""); + } + auto syncTime = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - startSync); + std::cout << "Synchronous time for " << N << " evaluations: " << syncTime.count() << " milliseconds\n"; + + // evaluate N times Asynchronously and time it + std::vector> tasks; + std::vector bindings(N, nullptr); + + for (size_t i = 0; i < bindings.size(); i++) { + bindings[i] = LearningModelBinding(session); + bindings[i].Bind(inputFeatureDescriptor.Current().Name(), imagetensor); + bindings[i].Bind( + model.OutputFeatures().First().Current().Name(), + VideoFrame(BitmapPixelFormat::Rgba8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2)))); + } + + auto startAsync = std::chrono::high_resolution_clock::now(); + for (UINT i = 0; i < N; i++) { + tasks.emplace_back(session.EvaluateAsync(bindings[i], L"")); + } + // wait for them all to complete + for (auto&& task : tasks) { + task.get(); + } + auto asyncTime = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - startAsync); + std::cout << "Asynchronous time for " << N << " evaluations: " << asyncTime.count() << " milliseconds\n"; +} + +static void CustomCommandQueueWithFence() { + static const wchar_t* const modelFileName = L"fns-candy.onnx"; + static const wchar_t* const inputDataImageFileName = L"fish_720.png"; + + com_ptr d3d12Device; + WINML_EXPECT_HRESULT_SUCCEEDED(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), d3d12Device.put_void())); + + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + + com_ptr queue; + WINML_EXPECT_HRESULT_SUCCEEDED(d3d12Device->CreateCommandQueue(&queueDesc, __uuidof(ID3D12CommandQueue), queue.put_void())); + + com_ptr fence; + WINML_EXPECT_HRESULT_SUCCEEDED(d3d12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), fence.put_void())); + + auto devicefactory = get_activation_factory(); + + com_ptr<::IUnknown> learningModelDeviceUnknown; + WINML_EXPECT_HRESULT_SUCCEEDED(devicefactory->CreateFromD3D12CommandQueue(queue.get(), learningModelDeviceUnknown.put())); + + LearningModelDevice device = nullptr; + WINML_EXPECT_NO_THROW(learningModelDeviceUnknown.as(device)); + + std::wstring modulePath = FileHelpers::GetModulePath(); + + // WinML model creation + std::wstring fullModelPath = modulePath + modelFileName; + LearningModel model(nullptr); + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromFilePath(fullModelPath)); + LearningModelSession modelSession = nullptr; + WINML_EXPECT_NO_THROW(modelSession = LearningModelSession(model, device)); + LearningModelBinding modelBinding = nullptr; + WINML_EXPECT_NO_THROW(modelBinding = LearningModelBinding(modelSession)); + + std::wstring fullImagePath = modulePath + inputDataImageFileName; + + StorageFile imagefile = StorageFile::GetFileFromPathAsync(fullImagePath).get(); + IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get(); + SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get(); + VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + ImageFeatureValue input1imagetensor = ImageFeatureValue::CreateFromVideoFrame(frame); + + auto feature = model.InputFeatures().First(); + WINML_EXPECT_NO_THROW(modelBinding.Bind(feature.Current().Name(), input1imagetensor)); + + auto outputtensordescriptor = model.OutputFeatures().First().Current().as(); + auto outputtensorshape = outputtensordescriptor.Shape(); + VideoFrame outputimage( + BitmapPixelFormat::Rgba8, + static_cast(outputtensorshape.GetAt(3)), + static_cast(outputtensorshape.GetAt(2))); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + + WINML_EXPECT_NO_THROW(modelBinding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // Block the queue on the fence, evaluate the model, then queue a signal. The model evaluation should not complete + // until after the wait is unblocked, and the signal should not complete until model evaluation does. This can + // only be true if WinML executes the workload on the supplied queue (instead of using its own). + + WINML_EXPECT_HRESULT_SUCCEEDED(queue->Wait(fence.get(), 1)); + + WINML_EXPECT_HRESULT_SUCCEEDED(queue->Signal(fence.get(), 2)); + + winrt::hstring correlationId; + winrt::Windows::Foundation::IAsyncOperation asyncOp; + WINML_EXPECT_NO_THROW(asyncOp = modelSession.EvaluateAsync(modelBinding, correlationId)); + + Sleep(1000); // Give the model a chance to run (which it shouldn't if everything is working correctly) + + // Because we haven't unblocked the wait yet, model evaluation must not have completed (nor the fence signal) + WINML_EXPECT_NOT_EQUAL(asyncOp.Status(), winrt::Windows::Foundation::AsyncStatus::Completed); + WINML_EXPECT_EQUAL(fence->GetCompletedValue(), 0); + + // Unblock the queue + WINML_EXPECT_HRESULT_SUCCEEDED(fence->Signal(1)); + + // Wait for model evaluation to complete + asyncOp.get(); + + // The fence must be signaled by now (because model evaluation has completed) + WINML_EXPECT_EQUAL(fence->GetCompletedValue(), 2); +} + +static void ReuseVideoFrame() { + std::wstring modulePath = FileHelpers::GetModulePath(); + std::wstring inputImagePath = modulePath + L"fish_720.png"; + std::wstring bmImagePath = modulePath + L"bm_fish_720.jpg"; + std::wstring modelPath = modulePath + L"fns-candy.onnx"; + + std::vector deviceKinds = {LearningModelDeviceKind::Cpu, LearningModelDeviceKind::DirectX}; + std::vector videoFrameSources; + CommonDeviceHelpers::AdapterEnumerationSupport support; + CommonDeviceHelpers::GetAdapterEnumerationSupport(&support); + if (support.has_dxgi) { + videoFrameSources = {"SoftwareBitmap", "Direct3DSurface"}; + } else { + videoFrameSources = {"SoftwareBitmap"}; + } + + for (auto deviceKind : deviceKinds) { + auto device = LearningModelDevice(deviceKind); + auto model = LearningModel::LoadFromFilePath(modelPath); + auto session = LearningModelSession(model, device); + auto binding = LearningModelBinding(session); + for (auto videoFrameSource : videoFrameSources) { + VideoFrame reuseVideoFrame = nullptr; + if (videoFrameSource == "SoftwareBitmap") { + reuseVideoFrame = VideoFrame::CreateWithSoftwareBitmap(SoftwareBitmap(BitmapPixelFormat::Bgra8, 720, 720)); + } else { + reuseVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(DirectXPixelFormat::B8G8R8X8UIntNormalized, 720, 720); + } + for (uint32_t i = 0; i < 3; ++i) { + SoftwareBitmap softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(inputImagePath); + VideoFrame videoFrame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + // reuse video frame + videoFrame.CopyToAsync(reuseVideoFrame).get(); + + // bind input + binding.Bind(model.InputFeatures().First().Current().Name(), reuseVideoFrame); + + // bind output + VideoFrame outputimage(BitmapPixelFormat::Bgra8, 720, 720); + ImageFeatureValue outputTensor = ImageFeatureValue::CreateFromVideoFrame(outputimage); + WINML_EXPECT_NO_THROW(binding.Bind(model.OutputFeatures().First().Current().Name(), outputTensor)); + + // evaluate + winrt::hstring correlationId; + WINML_EXPECT_NO_THROW(session.EvaluateAsync(binding, correlationId).get()); + + // verify result + SoftwareBitmap bm_softwareBitmap = FileHelpers::GetSoftwareBitmapFromFile(bmImagePath); + bm_softwareBitmap = SoftwareBitmap::Convert(bm_softwareBitmap, BitmapPixelFormat::Bgra8); + VideoFrame bm_videoFrame = VideoFrame::CreateWithSoftwareBitmap(bm_softwareBitmap); + ImageFeatureValue bm_imagevalue = ImageFeatureValue::CreateFromVideoFrame(bm_videoFrame); + WINML_EXPECT_TRUE(VerifyHelper(bm_imagevalue, outputTensor)); + } + } + } +} +static void EncryptedStream() { + // get a stream + std::wstring path = FileHelpers::GetModulePath() + L"model.onnx"; + auto storageFile = StorageFile::GetFileFromPathAsync(path).get(); + auto fileBuffer = winrt::Windows::Storage::FileIO::ReadBufferAsync(storageFile).get(); + + // encrypt + auto algorithmName = winrt::Windows::Security::Cryptography::Core::SymmetricAlgorithmNames::AesCbcPkcs7(); + auto algorithm = winrt::Windows::Security::Cryptography::Core::SymmetricKeyAlgorithmProvider::OpenAlgorithm(algorithmName); + uint32_t keyLength = 32; + auto keyBuffer = winrt::Windows::Security::Cryptography::CryptographicBuffer::GenerateRandom(keyLength); + auto key = algorithm.CreateSymmetricKey(keyBuffer); + auto iv = winrt::Windows::Security::Cryptography::CryptographicBuffer::GenerateRandom(algorithm.BlockLength()); + auto encryptedBuffer = winrt::Windows::Security::Cryptography::Core::CryptographicEngine::Encrypt(key, fileBuffer, iv); + + // verify loading the encrypted stream fails appropriately. + auto encryptedStream = InMemoryRandomAccessStream(); + encryptedStream.WriteAsync(encryptedBuffer).get(); + WINML_EXPECT_THROW_SPECIFIC(LearningModel::LoadFromStream(RandomAccessStreamReference::CreateFromStream(encryptedStream)), + winrt::hresult_error, + [](const winrt::hresult_error& e) -> bool { + return e.code() == E_INVALIDARG; + }); + + // now decrypt + auto decryptedBuffer = winrt::Windows::Security::Cryptography::Core::CryptographicEngine::Decrypt(key, encryptedBuffer, iv); + auto decryptedStream = InMemoryRandomAccessStream(); + decryptedStream.WriteAsync(decryptedBuffer).get(); + + // load! + LearningModel model = nullptr; + WINML_EXPECT_NO_THROW(model = LearningModel::LoadFromStream(RandomAccessStreamReference::CreateFromStream(decryptedStream))); + LearningModelSession session = nullptr; + WINML_EXPECT_NO_THROW(session = LearningModelSession(model)); +} + +static void DeviceLostRecovery() { + // load a model + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a session on the DirectX device + LearningModelSession session(model, LearningModelDevice(LearningModelDeviceKind::DirectX)); + // create a binding set + LearningModelBinding binding(session); + // bind the inputs + BindFeatures(binding, model.InputFeatures()); + + // force device lost here + { + winrt::com_ptr d3d12Device; + D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device5), d3d12Device.put_void()); + d3d12Device->RemoveDevice(); + } + + // evaluate should fail + try { + session.Evaluate(binding, L""); + WINML_LOG_ERROR("Evaluate should fail after removing the device"); + } catch (...) { + } + + // remove all references to the device by reseting the session and binding. + session = nullptr; + binding = nullptr; + + // create new session and binding and try again! + WINML_EXPECT_NO_THROW(session = LearningModelSession(model, LearningModelDevice(LearningModelDeviceKind::DirectX))); + WINML_EXPECT_NO_THROW(binding = LearningModelBinding(session)); + BindFeatures(binding, model.InputFeatures()); + WINML_EXPECT_NO_THROW(session.Evaluate(binding, L"")); +} + +static void D2DInterop() { + // load a model (model.onnx == squeezenet[1,3,224,224]) + std::wstring filePath = FileHelpers::GetModulePath() + L"model.onnx"; + LearningModel model = LearningModel::LoadFromFilePath(filePath); + // create a dx12 device + com_ptr device = nullptr; + WINML_EXPECT_HRESULT_SUCCEEDED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device1), device.put_void())); + // now create a command queue from it + com_ptr commandQueue = nullptr; + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + WINML_EXPECT_HRESULT_SUCCEEDED(device->CreateCommandQueue(&queueDesc, winrt::guid_of(), commandQueue.put_void())); + // create a winml learning device based on that dx12 queue + auto factory = get_activation_factory(); + com_ptr<::IUnknown> spUnk; + WINML_EXPECT_HRESULT_SUCCEEDED(factory->CreateFromD3D12CommandQueue(commandQueue.get(), spUnk.put())); + auto learningDevice = spUnk.as(); + // create a winml session from that dx device + LearningModelSession session(model, learningDevice); + // now lets try and do some XAML/d2d on that same device, first prealloc a VideoFrame + VideoFrame frame = VideoFrame::CreateAsDirect3D11SurfaceBacked( + DirectXPixelFormat::B8G8R8A8UIntNormalized, + 224, + 224, + session.Device().Direct3D11Device()); + // create a D2D factory + D2D1_FACTORY_OPTIONS options = {}; + com_ptr d2dFactory; + WINML_EXPECT_HRESULT_SUCCEEDED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(ID2D1Factory), &options, d2dFactory.put_void())); + // grab the dxgi surface back from our video frame + com_ptr dxgiSurface; + com_ptr dxgiInterfaceAccess = frame.Direct3DSurface().as(); + WINML_EXPECT_HRESULT_SUCCEEDED(dxgiInterfaceAccess->GetInterface(__uuidof(IDXGISurface), dxgiSurface.put_void())); + // and try and use our surface to create a render targer + com_ptr renderTarget; + D2D1_RENDER_TARGET_PROPERTIES props = D2D1::RenderTargetProperties(); + props.pixelFormat = D2D1::PixelFormat( + DXGI_FORMAT_B8G8R8A8_UNORM, + D2D1_ALPHA_MODE_IGNORE); + WINML_EXPECT_HRESULT_SUCCEEDED(d2dFactory->CreateDxgiSurfaceRenderTarget( + dxgiSurface.get(), + props, + renderTarget.put())); +} + +const ScenarioTestApi& getapi() { + static constexpr ScenarioTestApi api = + { + ScenarioCppWinrtTestSetup, + ScenarioCppWinrtGpuTestSetup, + ScenarioCppWinrtGpuSkipEdgeCoreTestSetup, + Sample1, + Scenario1LoadBindEvalDefault, + Scenario2LoadModelFromStream, + Scenario5AsyncEval, + Scenario7EvalWithNoBind, + Scenario8SetDeviceSampleDefault, + Scenario8SetDeviceSampleCPU, + Scenario17DevDiagnostics, + Scenario22ImageBindingAsCPUTensor, + QuantizedModels, + EncryptedStream, + Scenario3SoftwareBitmapInputBinding, + Scenario6BindWithProperties, + Scenario8SetDeviceSampleDefaultDirectX, + Scenario8SetDeviceSampleMinPower, + Scenario8SetDeviceSampleMaxPerf, + Scenario8SetDeviceSampleMyCameraDevice, + Scenario8SetDeviceSampleCustomCommandQueue, + Scenario9LoadBindEvalInputTensorGPU, + Scenario13SingleModelOnCPUandGPU, + Scenario11FreeDimensionsTensor, + Scenario11FreeDimensionsImage, + Scenario14RunModelSwapchain, + Scenario20aLoadBindEvalCustomOperatorCPU, + Scenario20bLoadBindEvalReplacementCustomOperatorCPU, + Scenario21RunModel2ChainZ, + Scenario22ImageBindingAsGPUTensor, + MsftQuantizedModels, + SyncVsAsync, + CustomCommandQueueWithFence, + ReuseVideoFrame, + DeviceLostRecovery, + Scenario8SetDeviceSampleD3D11Device, + D2DInterop, + }; + return api; +} diff --git a/winml/test/scenario/cppwinrt/scenariotestscppwinrt.h b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.h new file mode 100644 index 0000000000000..2409de5fd60c2 --- /dev/null +++ b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.h @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test.h" +struct ScenarioTestApi +{ + SetupTest ScenarioCppWinrtTestSetup; + SetupTest ScenarioCppWinrtGpuTestSetup; + SetupTest ScenarioCppWinrtGpuSkipEdgeCoreTestSetup; + VoidTest Sample1; + VoidTest Scenario1LoadBindEvalDefault; + VoidTest Scenario2LoadModelFromStream; + VoidTest Scenario5AsyncEval; + VoidTest Scenario7EvalWithNoBind; + VoidTest Scenario8SetDeviceSampleDefault; + VoidTest Scenario8SetDeviceSampleCPU; + VoidTest Scenario17DevDiagnostics; + VoidTest DISABLED_Scenario22ImageBindingAsCPUTensor; + VoidTest QuantizedModels; + VoidTest EncryptedStream; + VoidTest Scenario3SoftwareBitmapInputBinding; + VoidTest Scenario6BindWithProperties; + VoidTest Scenario8SetDeviceSampleDefaultDirectX; + VoidTest Scenario8SetDeviceSampleMinPower; + VoidTest Scenario8SetDeviceSampleMaxPerf; + VoidTest Scenario8SetDeviceSampleMyCameraDevice; + VoidTest Scenario8SetDeviceSampleCustomCommandQueue; + VoidTest DISABLED_Scenario9LoadBindEvalInputTensorGPU; + VoidTest Scenario13SingleModelOnCPUandGPU; + VoidTest Scenario11FreeDimensionsTensor; + VoidTest Scenario11FreeDimensionsImage; + VoidTest Scenario14RunModelSwapchain; + VoidTest Scenario20aLoadBindEvalCustomOperatorCPU; + VoidTest Scenario20bLoadBindEvalReplacementCustomOperatorCPU; + VoidTest DISABLED_Scenario21RunModel2ChainZ; + VoidTest DISABLED_Scenario22ImageBindingAsGPUTensor; + VoidTest MsftQuantizedModels; + VoidTest DISABLED_SyncVsAsync; + VoidTest DISABLED_CustomCommandQueueWithFence; + VoidTest DISABLED_ReuseVideoFrame; + VoidTest DeviceLostRecovery; + VoidTest Scenario8SetDeviceSampleD3D11Device; + VoidTest D2DInterop; +}; +const ScenarioTestApi& getapi(); + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(ScenarioCppWinrtTest, ScenarioCppWinrtTestSetup) +WINML_TEST(ScenarioCppWinrtTest, Sample1) +WINML_TEST(ScenarioCppWinrtTest, Scenario1LoadBindEvalDefault) +WINML_TEST(ScenarioCppWinrtTest, Scenario2LoadModelFromStream) +WINML_TEST(ScenarioCppWinrtTest, Scenario5AsyncEval) +WINML_TEST(ScenarioCppWinrtTest, Scenario7EvalWithNoBind) +WINML_TEST(ScenarioCppWinrtTest, Scenario8SetDeviceSampleDefault) +WINML_TEST(ScenarioCppWinrtTest, Scenario8SetDeviceSampleCPU) +WINML_TEST(ScenarioCppWinrtTest, Scenario17DevDiagnostics) +WINML_TEST(ScenarioCppWinrtTest, DISABLED_Scenario22ImageBindingAsCPUTensor) +WINML_TEST(ScenarioCppWinrtTest, QuantizedModels) +WINML_TEST(ScenarioCppWinrtTest, EncryptedStream) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(ScenarioCppWinrtGpuTest, ScenarioCppWinrtGpuTestSetup) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario3SoftwareBitmapInputBinding) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario6BindWithProperties) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario8SetDeviceSampleDefaultDirectX) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario8SetDeviceSampleMinPower) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario8SetDeviceSampleMaxPerf) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario8SetDeviceSampleCustomCommandQueue) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_Scenario9LoadBindEvalInputTensorGPU) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario13SingleModelOnCPUandGPU) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario11FreeDimensionsTensor) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario11FreeDimensionsImage) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario14RunModelSwapchain) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario20aLoadBindEvalCustomOperatorCPU) +WINML_TEST(ScenarioCppWinrtGpuTest, Scenario20bLoadBindEvalReplacementCustomOperatorCPU) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_Scenario21RunModel2ChainZ) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_Scenario22ImageBindingAsGPUTensor) +WINML_TEST(ScenarioCppWinrtGpuTest, MsftQuantizedModels) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_SyncVsAsync) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_CustomCommandQueueWithFence) +WINML_TEST(ScenarioCppWinrtGpuTest, DISABLED_ReuseVideoFrame) +WINML_TEST(ScenarioCppWinrtGpuTest, DeviceLostRecovery) +WINML_TEST_CLASS_END() + +WINML_TEST_CLASS_BEGIN_WITH_SETUP(ScenarioCppWinrtGpuSkipEdgeCoreTest, ScenarioCppWinrtGpuSkipEdgeCoreTestSetup) +WINML_TEST(ScenarioCppWinrtGpuSkipEdgeCoreTest, Scenario8SetDeviceSampleMyCameraDevice) +WINML_TEST(ScenarioCppWinrtGpuSkipEdgeCoreTest, Scenario8SetDeviceSampleD3D11Device ) +WINML_TEST(ScenarioCppWinrtGpuSkipEdgeCoreTest, D2DInterop) +WINML_TEST_CLASS_END() \ No newline at end of file diff --git a/winml/test/scenario/models/LICENSE.md b/winml/test/scenario/models/LICENSE.md new file mode 100644 index 0000000000000..4b636b6fb8082 --- /dev/null +++ b/winml/test/scenario/models/LICENSE.md @@ -0,0 +1,6 @@ +# Licenses + +| Model | Source | License | +| ----------- | ---------- | ----------- | +| Resnet 50 | https://github.com/keras-team/keras | MIT | +| Inception v1 | https://github.com/onnx/models | MIT | diff --git a/winml/test/scenario/models/coreml_Resnet50_ImageNet-dq.onnx b/winml/test/scenario/models/coreml_Resnet50_ImageNet-dq.onnx new file mode 100644 index 0000000000000..070d5d4f066a5 Binary files /dev/null and b/winml/test/scenario/models/coreml_Resnet50_ImageNet-dq.onnx differ diff --git a/winml/test/scenario/models/onnxzoo_lotus_inception_v1-dq.onnx b/winml/test/scenario/models/onnxzoo_lotus_inception_v1-dq.onnx new file mode 100644 index 0000000000000..bf3db4fe677a1 Binary files /dev/null and b/winml/test/scenario/models/onnxzoo_lotus_inception_v1-dq.onnx differ