From 1d51f4b91c10ba4eb3dd97316118c9d8cc32e4da Mon Sep 17 00:00:00 2001 From: John Falkowski Date: Wed, 25 Sep 2024 17:41:16 +0000 Subject: [PATCH] feature: Add driver-experimental API for retrieval of kernel binary program data Related-To: NEO-11651 Signed-off-by: John Falkowski --- .../driver_experimental/public/zex_module.cpp | 12 ++++ .../driver_experimental/public/zex_module.h | 6 +- .../driver/driver_handle_imp_helper.cpp | 2 +- .../driver/extension_function_address.cpp | 1 + level_zero/core/source/kernel/kernel.h | 1 + level_zero/core/source/kernel/kernel_imp.cpp | 12 ++++ level_zero/core/source/kernel/kernel_imp.h | 1 + .../unit_tests/sources/kernel/test_kernel.cpp | 71 +++++++++++++++++++ .../GET_KERNEL_BINARY.md | 61 ++++++++++++++++ level_zero/include/ze_intel_gpu.h | 31 ++++++++ 10 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 level_zero/doc/experimental_extensions/GET_KERNEL_BINARY.md diff --git a/level_zero/api/driver_experimental/public/zex_module.cpp b/level_zero/api/driver_experimental/public/zex_module.cpp index befc15d261..b6e9e48874 100644 --- a/level_zero/api/driver_experimental/public/zex_module.cpp +++ b/level_zero/api/driver_experimental/public/zex_module.cpp @@ -18,6 +18,12 @@ zexKernelGetBaseAddress( return L0::Kernel::fromHandle(toInternalType(hKernel))->getBaseAddress(baseAddress); } +ze_result_t ZE_APICALL +zeIntelKernelGetBinaryExp( + ze_kernel_handle_t hKernel, size_t *pSize, char *pKernelBinary) { + return L0::Kernel::fromHandle(toInternalType(hKernel))->getKernelProgramBinary(pSize, pKernelBinary); +} + } // namespace L0 extern "C" { @@ -28,4 +34,10 @@ zexKernelGetBaseAddress( uint64_t *baseAddress) { return L0::zexKernelGetBaseAddress(hKernel, baseAddress); } + +ZE_APIEXPORT ze_result_t ZE_APICALL +zeIntelKernelGetBinaryExp( + ze_kernel_handle_t hKernel, size_t *pSize, char *pKernelBinary) { + return L0::zeIntelKernelGetBinaryExp(hKernel, pSize, pKernelBinary); +} } diff --git a/level_zero/api/driver_experimental/public/zex_module.h b/level_zero/api/driver_experimental/public/zex_module.h index 3b3e261ee7..ead5765477 100644 --- a/level_zero/api/driver_experimental/public/zex_module.h +++ b/level_zero/api/driver_experimental/public/zex_module.h @@ -18,7 +18,11 @@ zexKernelGetBaseAddress( ze_kernel_handle_t hKernel, uint64_t *baseAddress); -} +ze_result_t ZE_APICALL +zeIntelKernelGetBinaryExp( + ze_kernel_handle_t hKernel, size_t *pSize, char *pKernelBinary); + +} // namespace L0 /////////////////////////////////////////////////////////////////////////////// /// @brief General Register File descriptor. diff --git a/level_zero/core/source/driver/driver_handle_imp_helper.cpp b/level_zero/core/source/driver/driver_handle_imp_helper.cpp index 2daabc4f0a..07e69ad6ba 100644 --- a/level_zero/core/source/driver/driver_handle_imp_helper.cpp +++ b/level_zero/core/source/driver/driver_handle_imp_helper.cpp @@ -38,5 +38,5 @@ const std::vector> DriverHandleImp::extensionsS {ZE_INTEL_COMMAND_LIST_MEMORY_SYNC, ZE_INTEL_COMMAND_LIST_MEMORY_SYNC_EXP_VERSION_CURRENT}, {ZEX_INTEL_EVENT_SYNC_MODE_EXP_NAME, ZEX_INTEL_EVENT_SYNC_MODE_EXP_VERSION_CURRENT}, {ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_NAME, ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_VERSION_CURRENT}, -}; + {ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME, ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_CURRENT}}; } // namespace L0 diff --git a/level_zero/core/source/driver/extension_function_address.cpp b/level_zero/core/source/driver/extension_function_address.cpp index d2e8151003..8b423df464 100644 --- a/level_zero/core/source/driver/extension_function_address.cpp +++ b/level_zero/core/source/driver/extension_function_address.cpp @@ -30,6 +30,7 @@ void *ExtensionFunctionAddressHelper::getExtensionFunctionAddress(const std::str RETURN_FUNC_PTR_IF_EXIST(zexDriverGetHostPointerBaseAddress); RETURN_FUNC_PTR_IF_EXIST(zexKernelGetBaseAddress); + RETURN_FUNC_PTR_IF_EXIST(zeIntelKernelGetBinaryExp); RETURN_FUNC_PTR_IF_EXIST(zexMemGetIpcHandles); RETURN_FUNC_PTR_IF_EXIST(zexMemOpenIpcHandles); diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index d29839253c..290239a24c 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -123,6 +123,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { virtual ze_result_t destroy() = 0; virtual ze_result_t getBaseAddress(uint64_t *baseAddress) = 0; + virtual ze_result_t getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) = 0; virtual ze_result_t setIndirectAccess(ze_kernel_indirect_access_flags_t flags) = 0; virtual ze_result_t getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) = 0; virtual ze_result_t getSourceAttributes(uint32_t *pSize, char **pString) = 0; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index f5ddb5079e..02595ac8b6 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -276,6 +276,18 @@ KernelImp::~KernelImp() { dynamicStateHeapData.reset(); } +ze_result_t KernelImp::getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) { + size_t kSize = static_cast(this->kernelImmData->getKernelInfo()->heapInfo.kernelHeapSize); + if (nullptr == pKernelBinary) { + *kernelSize = kSize; + return ZE_RESULT_SUCCESS; + } + *kernelSize = std::min(*kernelSize, kSize); + memcpy_s(pKernelBinary, *kernelSize, this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap, *kernelSize); + + return ZE_RESULT_SUCCESS; +} + ze_result_t KernelImp::setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) { if (argIndex >= kernelArgHandlers.size()) { diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 989505633e..0c770830af 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -49,6 +49,7 @@ struct KernelImp : Kernel { } ze_result_t getBaseAddress(uint64_t *baseAddress) override; + ze_result_t getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) override; ze_result_t setIndirectAccess(ze_kernel_indirect_access_flags_t flags) override; ze_result_t getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) override; ze_result_t getSourceAttributes(uint32_t *pSize, char **pString) override; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 048bd0a420..d9f9d875b8 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -3413,6 +3413,77 @@ TEST_F(KernelGlobalWorkOffsetTests, whenSettingGlobalOffsetThenCrossThreadDataIs EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[2]), globalOffsetz); } +class KernelProgramBinaryTests : public ModuleFixture, public ::testing::Test { + public: + void SetUp() override { + ModuleFixture::setUp(); + + ze_kernel_desc_t kernelDesc = {}; + kernelDesc.pKernelName = kernelName.c_str(); + + ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + kernel = L0::Kernel::fromHandle(kernelHandle); + } + + void TearDown() override { + Kernel::fromHandle(kernelHandle)->destroy(); + ModuleFixture::tearDown(); + } + + ze_kernel_handle_t kernelHandle; + L0::Kernel *kernel = nullptr; +}; + +TEST_F(KernelProgramBinaryTests, givenCallToGetKernelProgramBinaryThenCorrectSizeAndDataReturned) { + + size_t kernelBinarySize = 0; + char *kernelBinaryRetrieved = nullptr; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize, kernelBinaryRetrieved)); + EXPECT_GT(kernelBinarySize, 0u); + kernelBinaryRetrieved = new char[kernelBinarySize]; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize, kernelBinaryRetrieved)); + + auto &kernelImmutableData = this->module->kernelImmDatas.front(); + EXPECT_EQ(kernelBinarySize, kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize); + const char *heapPtr = reinterpret_cast(kernelImmutableData->getKernelInfo()->heapInfo.pKernelHeap); + EXPECT_EQ(0, memcmp(kernelBinaryRetrieved, heapPtr, kernelBinarySize)); + delete[] kernelBinaryRetrieved; +} + +TEST_F(KernelProgramBinaryTests, givenCallToGetKernelProgramBinaryWithSmallSizeThenSmallSizeReturned) { + size_t kernelBinarySize = 0; + char *kernelBinaryRetrieved = nullptr; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize, kernelBinaryRetrieved)); + kernelBinaryRetrieved = new char[kernelBinarySize]; + size_t kernelBinarySize2 = kernelBinarySize / 2; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize2, kernelBinaryRetrieved)); + EXPECT_EQ(kernelBinarySize2, (kernelBinarySize / 2)); + + auto &kernelImmutableData = this->module->kernelImmDatas.front(); + EXPECT_EQ(kernelBinarySize, kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize); + const char *heapPtr = reinterpret_cast(kernelImmutableData->getKernelInfo()->heapInfo.pKernelHeap); + EXPECT_EQ(0, memcmp(kernelBinaryRetrieved, heapPtr, kernelBinarySize2)); + delete[] kernelBinaryRetrieved; +} + +TEST_F(KernelProgramBinaryTests, givenCallToGetKernelProgramBinaryWithLargeSizeThenCorrectSizeReturned) { + size_t kernelBinarySize = 0; + char *kernelBinaryRetrieved = nullptr; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize, kernelBinaryRetrieved)); + kernelBinaryRetrieved = new char[kernelBinarySize]; + size_t kernelBinarySize2 = kernelBinarySize * 2; + EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->getKernelProgramBinary(&kernelBinarySize2, kernelBinaryRetrieved)); + EXPECT_EQ(kernelBinarySize2, kernelBinarySize); + + auto &kernelImmutableData = this->module->kernelImmDatas.front(); + EXPECT_EQ(kernelBinarySize2, kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize); + const char *heapPtr = reinterpret_cast(kernelImmutableData->getKernelInfo()->heapInfo.pKernelHeap); + EXPECT_EQ(0, memcmp(kernelBinaryRetrieved, heapPtr, kernelBinarySize2)); + delete[] kernelBinaryRetrieved; +} + using KernelWorkDimTests = Test; TEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched) { diff --git a/level_zero/doc/experimental_extensions/GET_KERNEL_BINARY.md b/level_zero/doc/experimental_extensions/GET_KERNEL_BINARY.md new file mode 100644 index 0000000000..e8e5a83684 --- /dev/null +++ b/level_zero/doc/experimental_extensions/GET_KERNEL_BINARY.md @@ -0,0 +1,61 @@ + + +# Get Driver Version String + +* [Overview](#Overview) +* [Definitions](#Definitions) + +# Overview + +A new extension API 'zeIntelKernelGetBinaryExp' is created to retrieve both the size of the kernel binary program data, and the data itself. It can first be called to obtain the size of the kernel binary program data. The application then allocates memory accordingly and in the second call retrieves the kernel binary program data. + +`zeIntelKernelGetBinaryExp` returns Intel Graphics Assembly (GEN ISA) format binary program data for kernel handle. The binary program data may be directly disassembled with iga64 offline tool. + +# Definitions + +```cpp +#define ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME "ZE_intel_experimental_kernel_get_program_binary" +``` + +## Interfaces + +```cpp +/// @brief Get Kernel Program Binary +/// +/// @details +/// - A valid kernel handle must be created with zeKernelCreate. +/// - Returns Intel Graphics Assembly (GEN ISA) format binary program data for kernel handle. +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// @returns +/// - ::ZE_RESULT_SUCCESS +ze_result_t ZE_APICALL +zeIntelKernelGetBinaryExp( + ze_kernel_handle_t hKernel, ///< [in] Kernel handle + size_t *pSize, ///< [in, out] pointer to variable with size of GEN ISA binary + char *pKernelBinary ///< [in,out] pointer to storage area for GEN ISA binary function +); +``` + +## Programming example + +```cpp +#include + +//kernel = valid kernel handle +size_t kBinarySize = 0; +zeKernelGetBinaryExp(kernel, &kBinarySize, nullptr); +char *progArray; +program_array = new char[kBinarySize]; +zeKernelGetBinaryExp(kernel, &kBinarySize, progArray); +const std::string file_path = "program_binary.bin"; +std::ofstream stream(file_path, std::ios::out | std::ios::binary); +stream.write(reinterpret_cast(progArray), kBinarySize); +stream.close(); +``` diff --git a/level_zero/include/ze_intel_gpu.h b/level_zero/include/ze_intel_gpu.h index 94fb91e7b5..3a5226fc2d 100644 --- a/level_zero/include/ze_intel_gpu.h +++ b/level_zero/include/ze_intel_gpu.h @@ -235,6 +235,37 @@ zeIntelGetDriverVersionString( size_t *pVersionSize); ///< [in,out] pointer to the size of the driver version string. ///< if size is zero, then the size of the version string is returned. +/// @brief Get Kernel Program Binary +/// +/// @details +/// - A valid kernel handle must be created with zeKernelCreate. +/// - Returns Intel Graphics Assembly (GEN ISA) format binary program data for kernel handle. +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// @returns +/// - ::ZE_RESULT_SUCCESS + +#ifndef ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME +/// @brief Get Kernel Program Binary experimental name +#define ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME "ZE_intel_experimental_kernel_get_program_binary" +#endif // ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intel Kernel Get Binary Extension Version(s) +typedef enum _ze_intel_kernel_get_binary_exp_version_t { + ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0), ///< version 1.0 + ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version + ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_FORCE_UINT32 = 0x7fffffff + +} ze_intel_kernel_get_binary_exp_version_t; + +ze_result_t ZE_APICALL +zeIntelKernelGetBinaryExp( + ze_kernel_handle_t hKernel, ///< [in] Kernel handle + size_t *pSize, ///< [in, out] pointer to variable with size of GEN ISA binary + char *pKernelBinary ///< [in,out] pointer to storage area for GEN ISA binary function +); + #if defined(__cplusplus) } // extern "C" #endif