diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 4770e89c16..0d44590299 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -5737,30 +5737,31 @@ cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, cl_int *errcodeRet) { TRACING_ENTER(ClCloneKernel, &sourceKernel, &errcodeRet); MultiDeviceKernel *pSourceMultiDeviceKernel = nullptr; - MultiDeviceKernel *pClonedMultiDeviceKernel = nullptr; + cl_kernel clonedMultiDeviceKernel = nullptr; auto retVal = validateObjects(withCastToInternal(sourceKernel, &pSourceMultiDeviceKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("sourceKernel", sourceKernel); if (CL_SUCCESS == retVal) { - pClonedMultiDeviceKernel = MultiDeviceKernel::create(pSourceMultiDeviceKernel->getProgram(), - pSourceMultiDeviceKernel->getKernelInfos(), - retVal); - UNRECOVERABLE_IF((pClonedMultiDeviceKernel == nullptr) || (retVal != CL_SUCCESS)); + clonedMultiDeviceKernel = MultiDeviceKernel::create(pSourceMultiDeviceKernel->getProgram(), + pSourceMultiDeviceKernel->getKernelInfos(), + retVal); + UNRECOVERABLE_IF((clonedMultiDeviceKernel == nullptr) || (retVal != CL_SUCCESS)); + auto pClonedMultiDeviceKernel = castToObject(clonedMultiDeviceKernel); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel); } if (errcodeRet) { *errcodeRet = retVal; } - if (pClonedMultiDeviceKernel != nullptr) { - gtpinNotifyKernelCreate(pClonedMultiDeviceKernel); + if (clonedMultiDeviceKernel != nullptr) { + gtpinNotifyKernelCreate(clonedMultiDeviceKernel); } - TRACING_EXIT(ClCloneKernel, (cl_kernel *)&pClonedMultiDeviceKernel); - return pClonedMultiDeviceKernel; + TRACING_EXIT(ClCloneKernel, &clonedMultiDeviceKernel); + return clonedMultiDeviceKernel; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL(cl_command_queue commandQueue, diff --git a/opencl/test/unit_test/api/cl_intel_tracing_tests.inl b/opencl/test/unit_test/api/cl_intel_tracing_tests.inl index 03021bd2d5..fceaac197b 100644 --- a/opencl/test/unit_test/api/cl_intel_tracing_tests.inl +++ b/opencl/test/unit_test/api/cl_intel_tracing_tests.inl @@ -1063,4 +1063,67 @@ TEST_F(IntelClLinkProgramTracingTest, givenLinkProgramCallTracingWhenInvokingCal clReleaseProgram(programReturned); } +struct IntelClCloneKernelTracingTest : public IntelTracingTest, PlatformFixture { + public: + void SetUp() override { + PlatformFixture::setUp(); + IntelTracingTest::setUp(); + + status = clCreateTracingHandleINTEL(devices[0], callback, this, &handle); + ASSERT_NE(nullptr, handle); + ASSERT_EQ(CL_SUCCESS, status); + + status = clSetTracingPointINTEL(handle, CL_FUNCTION_clCloneKernel, CL_TRUE); + ASSERT_EQ(CL_SUCCESS, status); + + status = clEnableTracingINTEL(handle); + ASSERT_EQ(CL_SUCCESS, status); + + const auto &gfxHelper = pDevice->getGfxCoreHelper(); + const_cast(pKernel->getKernelInfo().kernelDescriptor).kernelAttributes.simdSize = gfxHelper.getMinimalSIMDSize(); + } + void TearDown() override { + status = clDisableTracingINTEL(handle); + ASSERT_EQ(CL_SUCCESS, status); + + status = clDestroyTracingHandleINTEL(handle); + ASSERT_EQ(CL_SUCCESS, status); + IntelTracingTest::tearDown(); + PlatformFixture::tearDown(); + } + + protected: + void call() { + cl_kernel sourceKernel = pMultiDeviceKernel; + clonedKernel = clCloneKernel(sourceKernel, &retVal); + ASSERT_EQ(CL_SUCCESS, retVal); + } + + void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { + ASSERT_EQ(CL_FUNCTION_clCloneKernel, fid); + if (callbackData->site == CL_CALLBACK_SITE_ENTER) { + ++enterCount; + } else if (callbackData->site == CL_CALLBACK_SITE_EXIT) { + obtainedClonedKernelCallback = *reinterpret_cast(callbackData->functionReturnValue); + ++exitCount; + } + } + + cl_kernel clonedKernel = nullptr; + cl_kernel obtainedClonedKernelCallback = nullptr; + uint16_t enterCount = 0; + uint16_t exitCount = 0; +}; + +TEST_F(IntelClCloneKernelTracingTest, givenCloneKernelCallTracingWhenInvokingCallbackThenPointersFromCallAndCallbackPointToTheSameAddress) { + call(); + EXPECT_EQ(1u, enterCount); + EXPECT_EQ(1u, exitCount); + + EXPECT_NE(nullptr, clonedKernel); + EXPECT_NE(nullptr, obtainedClonedKernelCallback); + EXPECT_EQ(clonedKernel, obtainedClonedKernelCallback); + clReleaseKernel(clonedKernel); +} + } // namespace ULT