From 5e4238ddb1af8ae1741871255532c31893158b28 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Thu, 27 Feb 2025 03:53:44 +0100 Subject: [PATCH] Revert "fix: respect arg size when setting kernel arg in OCL path" This reverts commit c10ff0f3c690fdc8375617c1a883aef22fdd6df6. Signed-off-by: Compute-Runtime-Validation --- opencl/source/kernel/kernel.cpp | 11 +-- .../test/unit_test/api/api_tests_wrapper3.cpp | 1 - .../unit_test/api/cl_set_kernel_arg_tests.inl | 79 ------------------- .../unit_test/kernel/clone_kernel_tests.cpp | 4 +- .../kernel/kernel_immediate_arg_tests.cpp | 56 ++++++++++--- 5 files changed, 53 insertions(+), 98 deletions(-) delete mode 100644 opencl/test/unit_test/api/cl_set_kernel_arg_tests.inl diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 4aaa90124b..973c9f27d0 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1714,16 +1714,17 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex, const auto &argAsVal = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); for (const auto &element : argAsVal.elements) { DEBUG_BREAK_IF(element.size <= 0); - if (static_cast(element.sourceOffset + element.size) > argSize) { - return CL_INVALID_ARG_SIZE; - } auto pDst = ptrOffset(crossThreadData, element.offset); auto pSrc = ptrOffset(argVal, element.sourceOffset); - auto dstAvailableSpace = crossThreadDataEnd - pDst; + DEBUG_BREAK_IF(!(ptrOffset(pDst, element.size) <= crossThreadDataEnd)); - memcpy_s(pDst, dstAvailableSpace, pSrc, element.size); + if (element.sourceOffset < argSize) { + size_t maxBytesToCopy = argSize - element.sourceOffset; + size_t bytesToCopy = std::min(static_cast(element.size), maxBytesToCopy); + memcpy_s(pDst, element.size, pSrc, bytesToCopy); + } } retVal = CL_SUCCESS; diff --git a/opencl/test/unit_test/api/api_tests_wrapper3.cpp b/opencl/test/unit_test/api/api_tests_wrapper3.cpp index 7a2737ace9..49fcfe90f1 100644 --- a/opencl/test/unit_test/api/api_tests_wrapper3.cpp +++ b/opencl/test/unit_test/api/api_tests_wrapper3.cpp @@ -33,7 +33,6 @@ #include "opencl/test/unit_test/api/cl_set_context_destructor_callback.inl" #include "opencl/test/unit_test/api/cl_set_event_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl" -#include "opencl/test/unit_test/api/cl_set_kernel_arg_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl" #include "opencl/test/unit_test/api/cl_set_mem_object_destructor_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_performance_configuration_tests.inl" diff --git a/opencl/test/unit_test/api/cl_set_kernel_arg_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_arg_tests.inl deleted file mode 100644 index 6947d6483b..0000000000 --- a/opencl/test/unit_test/api/cl_set_kernel_arg_tests.inl +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (C) 2024-2025 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/compiler_interface/compiler_interface.h" -#include "shared/source/compiler_interface/compiler_options.h" -#include "shared/source/device/device.h" -#include "shared/source/helpers/file_io.h" -#include "shared/test/common/helpers/kernel_binary_helper.h" -#include "shared/test/common/helpers/test_files.h" - -#include "opencl/source/context/context.h" - -#include "cl_api_tests.h" - -using namespace NEO; - -using ClSetKernelArgTests = ApiTests; - -namespace ULT { - -TEST_F(ClSetKernelArgTests, WhenSettingKernelArgThenArgSizeIsRespected) { - cl_program pProgram = nullptr; - size_t sourceSize = 0; - std::string testFile; - - KernelBinaryHelper kbHelper("simple_kernels", false); - - testFile.append(clFiles); - testFile.append("simple_kernels.cl"); - - auto pSource = loadDataFromFile( - testFile.c_str(), - sourceSize); - - ASSERT_NE(0u, sourceSize); - ASSERT_NE(nullptr, pSource); - - const char *sources[1] = {pSource.get()}; - pProgram = clCreateProgramWithSource( - pContext, - 1, - sources, - &sourceSize, - &retVal); - - EXPECT_NE(nullptr, pProgram); - ASSERT_EQ(CL_SUCCESS, retVal); - - retVal = clBuildProgram( - pProgram, - 1, - &testedClDevice, - CompilerOptions::argInfo.data(), - nullptr, - nullptr); - - ASSERT_EQ(CL_SUCCESS, retVal); - - cl_kernel kernel = clCreateKernel(pProgram, "simple_kernel_1", &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - - uint32_t data = 1; - retVal = clSetKernelArg(kernel, 1, 1, &data); - EXPECT_EQ(CL_INVALID_ARG_SIZE, retVal); - - retVal = clSetKernelArg(kernel, 1, sizeof(data), &data); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseKernel(kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); -} -} // namespace ULT diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 600e1242a7..96e42418e6 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -485,9 +485,9 @@ TEST_F(CloneKernelTest, givenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect } TEST_F(CloneKernelTest, givenArgImmediateWhenCloningKernelThenKernelInfoIsCorrect) { - using TypeParam = unsigned long; - pKernelInfo->addArgImmediate(0, sizeof(TypeParam), 0x20); + pKernelInfo->addArgImmediate(0, sizeof(void *), 0x20); + using TypeParam = unsigned long; auto value = (TypeParam)0xAA55AA55UL; retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(TypeParam), &value); diff --git a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp index e63bcd2dd7..3e87dc2ea7 100644 --- a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp @@ -221,7 +221,8 @@ TYPED_TEST(KernelArgImmediateTest, givenTooLargePatchSizeWhenSettingArgThenDontR const auto memoryBeyondLimitBefore = *reinterpret_cast(memoryBeyondLimitAddress); - auto retVal = pKernel->setArg(0, sizeof(memory), memory); + this->pKernelInfo->argAsVal(0).elements[0].size = sizeof(TypeParam) + 1; + auto retVal = pKernel->setArg(0, sizeof(TypeParam), &memory[0]); const auto memoryBeyondLimitAfter = *reinterpret_cast(memoryBeyondLimitAddress); EXPECT_EQ(memoryBeyondLimitBefore, memoryBeyondLimitAfter); @@ -256,6 +257,43 @@ TYPED_TEST(KernelArgImmediateTest, givenNotTooLargePatchSizeWhenSettingArgThenDo } } +TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndFirstPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { + if (sizeof(TypeParam) == 1) + return; // multiple patch chars don't make sense + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); + TypeParam memory[2]; + std::memset(&memory[0], 0xaa, sizeof(TypeParam)); + std::memset(&memory[1], 0xbb, sizeof(TypeParam)); + + auto &elements = this->pKernelInfo->argAsVal(3).elements; + const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + + elements[2].offset; + const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + + elements[1].offset; + const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); + const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; + + const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); + const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); + + elements[2].sourceOffset = 0; + elements[1].sourceOffset = sizeof(TypeParam) / 2; + elements[2].size = sizeof(TypeParam); + elements[1].size = sizeof(TypeParam) / 2; + auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); + + EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam))); + EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); + + EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); + EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress2, sizeof(TypeParam) / 2)); + + EXPECT_EQ(CL_SUCCESS, retVal); + } +} + TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { if (sizeof(TypeParam) == 1) return; // multiple patch chars don't make sense @@ -272,17 +310,17 @@ TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLarg const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + elements[1].offset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam) / 2; - const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam); + const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam) / 2); - const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam)); + const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); elements[0].size = 0; elements[2].sourceOffset = 0; elements[1].sourceOffset = sizeof(TypeParam) / 2; elements[2].size = sizeof(TypeParam) / 2; elements[1].size = sizeof(TypeParam); - auto retVal = pKernel->setArg(3, sizeof(memory), memory); + auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); @@ -295,9 +333,6 @@ TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLarg } TYPED_TEST(KernelArgImmediateTest, givenMultiplePatchesAndOneSourceOffsetBeyondArgumentWhenSettingArgThenDontCopyThisPatch) { - if (sizeof(TypeParam) == 1u) { - GTEST_SKIP(); - } for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; @@ -310,23 +345,22 @@ TYPED_TEST(KernelArgImmediateTest, givenMultiplePatchesAndOneSourceOffsetBeyondA const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + elements[2].offset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); - const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + 1; + const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); - const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) - 1); + const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam)); elements[0].size = 0; elements[1].sourceOffset = 0; elements[1].size = sizeof(TypeParam); elements[2].sourceOffset = sizeof(TypeParam); elements[2].size = 1; - auto retVal = pKernel->setArg(3, sizeof(memory), memory); + auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, memoryBeyondLimitBefore1.size())); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, memoryBeyondLimitBefore2.size())); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); - EXPECT_EQ(0, std::memcmp(&memory[1], destinationMemoryAddress2, 1)); EXPECT_EQ(CL_SUCCESS, retVal); }