From 0c8af82d1b8d354db01816f6ccf6e6f7619ac519 Mon Sep 17 00:00:00 2001 From: Damian Tomczak Date: Tue, 8 Oct 2024 02:55:03 +0000 Subject: [PATCH] fix: stateless suffix for stateless kernels Resolves: NEO-12847 Signed-off-by: Damian Tomczak --- .../builtin/builtin_functions_lib_impl.cpp | 32 +++---- .../builtin/builtin_functions_tests.cpp | 18 ++-- manifests/manifest.yml | 2 +- .../built_ins/builtins_dispatch_builder.cpp | 96 +++++++++---------- .../test/unit_test/aub_tests/CMakeLists.txt | 7 ++ .../enqueue_fill_buffer_tests.cpp | 15 +-- .../enqueue_svm_mem_copy_tests.cpp | 2 +- .../enqueue_svm_mem_fill_tests.cpp | 2 +- .../copy_buffer_rect_stateless.builtin_kernel | 8 +- ..._buffer_to_buffer_stateless.builtin_kernel | 18 ++-- ...buffer_to_image3d_stateless.builtin_kernel | 12 +-- ...image3d_to_buffer_stateless.builtin_kernel | 12 +-- .../fill_buffer_stateless.builtin_kernel | 16 ++-- .../test_files/builtin_copyfill_stateless.cl | 42 ++++---- 14 files changed, 146 insertions(+), 136 deletions(-) diff --git a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp index 9eaf3f9575..d4c5ea86e9 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp +++ b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp @@ -35,11 +35,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::copyBufferToBuffer; break; case Builtin::copyBufferBytesStateless: - kernelName = "copyBufferToBufferBytesSingle"; + kernelName = "copyBufferToBufferBytesSingleStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStateless; break; case Builtin::copyBufferBytesStatelessHeapless: - kernelName = "copyBufferToBufferBytesSingle"; + kernelName = "copyBufferToBufferBytesSingleStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless; break; case Builtin::copyBufferRectBytes2d: @@ -55,11 +55,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::copyBufferToBuffer; break; case Builtin::copyBufferToBufferMiddleStateless: - kernelName = "CopyBufferToBufferMiddleRegion"; + kernelName = "CopyBufferToBufferMiddleRegionStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStateless; break; case Builtin::copyBufferToBufferMiddleStatelessHeapless: - kernelName = "CopyBufferToBufferMiddleRegion"; + kernelName = "CopyBufferToBufferMiddleRegionStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless; break; case Builtin::copyBufferToBufferSide: @@ -67,11 +67,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::copyBufferToBuffer; break; case Builtin::copyBufferToBufferSideStateless: - kernelName = "CopyBufferToBufferSideRegion"; + kernelName = "CopyBufferToBufferSideRegionStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStateless; break; case Builtin::copyBufferToBufferSideStatelessHeapless: - kernelName = "CopyBufferToBufferSideRegion"; + kernelName = "CopyBufferToBufferSideRegionStateless"; builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless; break; case Builtin::fillBufferImmediate: @@ -79,11 +79,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::fillBuffer; break; case Builtin::fillBufferImmediateStateless: - kernelName = "FillBufferImmediate"; + kernelName = "FillBufferImmediateStateless"; builtin = NEO::EBuiltInOps::fillBufferStateless; break; case Builtin::fillBufferImmediateStatelessHeapless: - kernelName = "FillBufferImmediate"; + kernelName = "FillBufferImmediateStateless"; builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless; break; case Builtin::fillBufferImmediateLeftOver: @@ -91,11 +91,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::fillBuffer; break; case Builtin::fillBufferImmediateLeftOverStateless: - kernelName = "FillBufferImmediateLeftOver"; + kernelName = "FillBufferImmediateLeftOverStateless"; builtin = NEO::EBuiltInOps::fillBufferStateless; break; case Builtin::fillBufferImmediateLeftOverStatelessHeapless: - kernelName = "FillBufferImmediateLeftOver"; + kernelName = "FillBufferImmediateLeftOverStateless"; builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless; break; case Builtin::fillBufferSSHOffset: @@ -103,11 +103,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::fillBuffer; break; case Builtin::fillBufferSSHOffsetStateless: - kernelName = "FillBufferSSHOffset"; + kernelName = "FillBufferSSHOffsetStateless"; builtin = NEO::EBuiltInOps::fillBufferStateless; break; case Builtin::fillBufferSSHOffsetStatelessHeapless: - kernelName = "FillBufferSSHOffset"; + kernelName = "FillBufferSSHOffsetStateless"; builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless; break; case Builtin::fillBufferMiddle: @@ -115,11 +115,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::fillBuffer; break; case Builtin::fillBufferMiddleStateless: - kernelName = "FillBufferMiddle"; + kernelName = "FillBufferMiddleStateless"; builtin = NEO::EBuiltInOps::fillBufferStateless; break; case Builtin::fillBufferMiddleStatelessHeapless: - kernelName = "FillBufferMiddle"; + kernelName = "FillBufferMiddleStateless"; builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless; break; case Builtin::fillBufferRightLeftover: @@ -127,11 +127,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtin = NEO::EBuiltInOps::fillBuffer; break; case Builtin::fillBufferRightLeftoverStateless: - kernelName = "FillBufferRightLeftover"; + kernelName = "FillBufferRightLeftoverStateless"; builtin = NEO::EBuiltInOps::fillBufferStateless; break; case Builtin::fillBufferRightLeftoverStatelessHeapless: - kernelName = "FillBufferRightLeftover"; + kernelName = "FillBufferRightLeftoverStateless"; builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless; break; case Builtin::queryKernelTimestamps: diff --git a/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp b/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp index 7afe3fcc6e..655935d199 100644 --- a/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp @@ -157,39 +157,39 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessBuiltinsWhenInitBuiltinKernel lib.initBuiltinKernel(L0::Builtin::copyBufferBytesStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("copyBufferToBufferBytesSingle", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("copyBufferToBufferBytesSingleStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::copyBufferToBufferMiddleStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("CopyBufferToBufferMiddleRegion", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("CopyBufferToBufferMiddleRegionStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::copyBufferToBufferSideStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("CopyBufferToBufferSideRegion", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("CopyBufferToBufferSideRegionStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferImmediateStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferImmediate", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferImmediateStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferImmediateLeftOverStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferImmediateLeftOver", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferImmediateLeftOverStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferSSHOffsetStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferSSHOffset", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferSSHOffsetStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferSSHOffsetStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferSSHOffset", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferSSHOffsetStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferMiddleStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferMiddle", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferMiddleStateless", lib.kernelNamePassed.c_str()); lib.initBuiltinKernel(L0::Builtin::fillBufferRightLeftoverStatelessHeapless); EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed); - EXPECT_STREQ("FillBufferRightLeftover", lib.kernelNamePassed.c_str()); + EXPECT_STREQ("FillBufferRightLeftoverStateless", lib.kernelNamePassed.c_str()); } HWTEST_F(TestBuiltinFunctionsLibImpl, givenCompilerInterfaceWhenCreateDeviceAndImageSupportedThenBuiltinsImageFunctionsAreLoaded) { diff --git a/manifests/manifest.yml b/manifests/manifest.yml index 700bde9749..02f41510de 100644 --- a/manifests/manifest.yml +++ b/manifests/manifest.yml @@ -42,7 +42,7 @@ components: dest_dir: kernels_bin type: git branch: kernels_bin - revision: 3104-2549 + revision: 3104-2557 kmdaf: branch: kmdaf dest_dir: kmdaf diff --git a/opencl/source/built_ins/builtins_dispatch_builder.cpp b/opencl/source/built_ins/builtins_dispatch_builder.cpp index d7226e3275..9f176f988e 100644 --- a/opencl/source/built_ins/builtins_dispatch_builder.cpp +++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp @@ -144,10 +144,10 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyBufferToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyBufferToBufferLeftLeftover", kernLeftLeftover, - "CopyBufferToBufferMiddle", kernMiddle, - "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, - "CopyBufferToBufferRightLeftover", kernRightLeftover); + "CopyBufferToBufferLeftLeftoverStateless", kernLeftLeftover, + "CopyBufferToBufferMiddleStateless", kernMiddle, + "CopyBufferToBufferMiddleMisalignedStateless", kernMiddleMisaligned, + "CopyBufferToBufferRightLeftoverStateless", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { @@ -162,10 +162,10 @@ class BuiltInOp : public Built : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyBufferToBufferStatelessHeapless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyBufferToBufferLeftLeftover", kernLeftLeftover, - "CopyBufferToBufferMiddle", kernMiddle, - "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, - "CopyBufferToBufferRightLeftover", kernRightLeftover); + "CopyBufferToBufferLeftLeftoverStateless", kernLeftLeftover, + "CopyBufferToBufferMiddleStateless", kernMiddle, + "CopyBufferToBufferMiddleMisalignedStateless", kernMiddleMisaligned, + "CopyBufferToBufferRightLeftoverStateless", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { @@ -408,18 +408,18 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyBufferRectStateless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyBufferRectBytes2d", kernelBytes[0], - "CopyBufferRectBytes2d", kernelBytes[1], - "CopyBufferRectBytes3d", kernelBytes[2], - "CopyBufferRectBytes2d", kernelLeft[0], - "CopyBufferRectBytes2d", kernelLeft[1], - "CopyBufferRectBytes3d", kernelLeft[2], - "CopyBufferRectBytesMiddle2d", kernelMiddle[0], - "CopyBufferRectBytesMiddle2d", kernelMiddle[1], - "CopyBufferRectBytesMiddle3d", kernelMiddle[2], - "CopyBufferRectBytes2d", kernelRight[0], - "CopyBufferRectBytes2d", kernelRight[1], - "CopyBufferRectBytes3d", kernelRight[2]); + "CopyBufferRectBytes2dStateless", kernelBytes[0], + "CopyBufferRectBytes2dStateless", kernelBytes[1], + "CopyBufferRectBytes3dStateless", kernelBytes[2], + "CopyBufferRectBytes2dStateless", kernelLeft[0], + "CopyBufferRectBytes2dStateless", kernelLeft[1], + "CopyBufferRectBytes3dStateless", kernelLeft[2], + "CopyBufferRectBytesMiddle2dStateless", kernelMiddle[0], + "CopyBufferRectBytesMiddle2dStateless", kernelMiddle[1], + "CopyBufferRectBytesMiddle3dStateless", kernelMiddle[2], + "CopyBufferRectBytes2dStateless", kernelRight[0], + "CopyBufferRectBytes2dStateless", kernelRight[1], + "CopyBufferRectBytes3dStateless", kernelRight[2]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); @@ -433,18 +433,18 @@ class BuiltInOp : public BuiltInOp : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyBufferRectStatelessHeapless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyBufferRectBytes2d", kernelBytes[0], - "CopyBufferRectBytes2d", kernelBytes[1], - "CopyBufferRectBytes3d", kernelBytes[2], - "CopyBufferRectBytes2d", kernelLeft[0], - "CopyBufferRectBytes2d", kernelLeft[1], - "CopyBufferRectBytes3d", kernelLeft[2], - "CopyBufferRectBytesMiddle2d", kernelMiddle[0], - "CopyBufferRectBytesMiddle2d", kernelMiddle[1], - "CopyBufferRectBytesMiddle3d", kernelMiddle[2], - "CopyBufferRectBytes2d", kernelRight[0], - "CopyBufferRectBytes2d", kernelRight[1], - "CopyBufferRectBytes3d", kernelRight[2]); + "CopyBufferRectBytes2dStateless", kernelBytes[0], + "CopyBufferRectBytes2dStateless", kernelBytes[1], + "CopyBufferRectBytes3dStateless", kernelBytes[2], + "CopyBufferRectBytes2dStateless", kernelLeft[0], + "CopyBufferRectBytes2dStateless", kernelLeft[1], + "CopyBufferRectBytes3dStateless", kernelLeft[2], + "CopyBufferRectBytesMiddle2dStateless", kernelMiddle[0], + "CopyBufferRectBytesMiddle2dStateless", kernelMiddle[1], + "CopyBufferRectBytesMiddle3dStateless", kernelMiddle[2], + "CopyBufferRectBytes2dStateless", kernelRight[0], + "CopyBufferRectBytes2dStateless", kernelRight[1], + "CopyBufferRectBytes3dStateless", kernelRight[2]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); @@ -550,9 +550,9 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::fillBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, - "FillBufferLeftLeftover", kernLeftLeftover, - "FillBufferMiddle", kernMiddle, - "FillBufferRightLeftover", kernRightLeftover); + "FillBufferLeftLeftoverStateless", kernLeftLeftover, + "FillBufferMiddleStateless", kernMiddle, + "FillBufferRightLeftoverStateless", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override { return buildDispatchInfosTyped(multiDispatchInfos); @@ -565,9 +565,9 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::fillBufferStatelessHeapless, CompilerOptions::greaterThan4gbBuffersRequired, - "FillBufferLeftLeftover", kernLeftLeftover, - "FillBufferMiddle", kernMiddle, - "FillBufferRightLeftover", kernRightLeftover); + "FillBufferLeftLeftoverStateless", kernLeftLeftover, + "FillBufferMiddleStateless", kernMiddle, + "FillBufferRightLeftoverStateless", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override { return buildDispatchInfosTyped(multiDispatchInfos); @@ -677,11 +677,11 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyBufferToImage3dStateless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyBufferToImage3dBytes", kernelBytes[0], - "CopyBufferToImage3d2Bytes", kernelBytes[1], - "CopyBufferToImage3d4Bytes", kernelBytes[2], - "CopyBufferToImage3d8Bytes", kernelBytes[3], - "CopyBufferToImage3d16Bytes", kernelBytes[4]); + "CopyBufferToImage3dBytesStateless", kernelBytes[0], + "CopyBufferToImage3d2BytesStateless", kernelBytes[1], + "CopyBufferToImage3d4BytesStateless", kernelBytes[2], + "CopyBufferToImage3d8BytesStateless", kernelBytes[3], + "CopyBufferToImage3d16BytesStateless", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { @@ -799,11 +799,11 @@ class BuiltInOp : public BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::copyImage3dToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, - "CopyImage3dToBufferBytes", kernelBytes[0], - "CopyImage3dToBuffer2Bytes", kernelBytes[1], - "CopyImage3dToBuffer4Bytes", kernelBytes[2], - "CopyImage3dToBuffer8Bytes", kernelBytes[3], - "CopyImage3dToBuffer16Bytes", kernelBytes[4]); + "CopyImage3dToBufferBytesStateless", kernelBytes[0], + "CopyImage3dToBuffer2BytesStateless", kernelBytes[1], + "CopyImage3dToBuffer4BytesStateless", kernelBytes[2], + "CopyImage3dToBuffer8BytesStateless", kernelBytes[3], + "CopyImage3dToBuffer16BytesStateless", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { diff --git a/opencl/test/unit_test/aub_tests/CMakeLists.txt b/opencl/test/unit_test/aub_tests/CMakeLists.txt index 3a03ce3314..67ced91179 100644 --- a/opencl/test/unit_test/aub_tests/CMakeLists.txt +++ b/opencl/test/unit_test/aub_tests/CMakeLists.txt @@ -30,6 +30,13 @@ if(NOT NEO_SKIP_AUB_TESTS AND DEFINED AUB_STREAM_PROJECT_NAME) set_target_properties(igdrcl_aub_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET igdrcl_aub_tests PROPERTY ENABLE_EXPORTS TRUE) + target_sources(igdrcl_aub_tests PRIVATE + $ + $ + $ + $ + ) + if(MSVC) set_target_properties(igdrcl_aub_tests PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY ${TargetDir} diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index c63a6f51ab..bbb20f9652 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -141,7 +141,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenRightLeftoverWhenFillingBufferThenFillB EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(EBuiltInOps::fillBuffer), + const EBuiltInOps::Type builtInType = adjustBuiltInType(EBuiltInOps::fillBuffer); + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); @@ -158,7 +159,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenRightLeftoverWhenFillingBufferThenFillB EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferRightLeftoverStateless" : "FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -168,7 +169,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenMiddleWhenFillingBufferThenFillBufferMi EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(EBuiltInOps::fillBuffer), + const EBuiltInOps::Type builtInType = adjustBuiltInType(EBuiltInOps::fillBuffer); + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); @@ -185,7 +187,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenMiddleWhenFillingBufferThenFillBufferMi EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -195,7 +197,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenLeftLeftoverWhenFillingBufferThenFillBu EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(EBuiltInOps::fillBuffer), + const EBuiltInOps::Type builtInType = adjustBuiltInType(EBuiltInOps::fillBuffer); + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); @@ -212,7 +215,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenLeftLeftoverWhenFillingBufferThenFillBu EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferLeftLeftoverStateless" : "FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index 20783745d0..8937387a53 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -152,7 +152,7 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBuffer EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = mdi->begin()->getKernel(); - EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ(EBuiltInOps::isHeapless(builtIn) ? "CopyBufferToBufferMiddleStateless" : "CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp index 4b07ee2990..77596213b6 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp @@ -229,7 +229,7 @@ HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilder EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ(EBuiltInOps::isHeapless(builtIn) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); } INSTANTIATE_TEST_SUITE_P(size_t, diff --git a/shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel b/shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel index a6dae83cea..6781326d01 100644 --- a/shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel @@ -7,7 +7,7 @@ R"===( -__kernel void CopyBufferRectBytes2d( +__kernel void CopyBufferRectBytes2dStateless( __global const char* src, __global char* dst, ulong4 SrcOrigin, @@ -26,7 +26,7 @@ __kernel void CopyBufferRectBytes2d( } -__kernel void CopyBufferRectBytesMiddle2d( +__kernel void CopyBufferRectBytesMiddle2dStateless( const __global uint* src, __global uint* dst, ulong4 SrcOrigin, @@ -48,7 +48,7 @@ __kernel void CopyBufferRectBytesMiddle2d( vstore4(loaded,x,dst); } -__kernel void CopyBufferRectBytes3d( +__kernel void CopyBufferRectBytes3dStateless( __global const char* src, __global char* dst, ulong4 SrcOrigin, @@ -68,7 +68,7 @@ __kernel void CopyBufferRectBytes3d( } -__kernel void CopyBufferRectBytesMiddle3d( +__kernel void CopyBufferRectBytesMiddle3dStateless( const __global uint* src, __global uint* dst, ulong4 SrcOrigin, diff --git a/shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel b/shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel index a04235e953..6e2da1d648 100644 --- a/shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel @@ -1,12 +1,12 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( -__kernel void CopyBufferToBufferBytes( +__kernel void CopyBufferToBufferBytesStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -18,7 +18,7 @@ __kernel void CopyBufferToBufferBytes( pDst[ 0 ] = pSrc[ 0 ]; } -__kernel void CopyBufferToBufferLeftLeftover( +__kernel void CopyBufferToBufferLeftLeftoverStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -28,7 +28,7 @@ __kernel void CopyBufferToBufferLeftLeftover( pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } -__kernel void CopyBufferToBufferMiddle( +__kernel void CopyBufferToBufferMiddleStateless( const __global uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, @@ -41,7 +41,7 @@ __kernel void CopyBufferToBufferMiddle( vstore4(loaded, gid, pDst); } -__kernel void CopyBufferToBufferMiddleMisaligned( +__kernel void CopyBufferToBufferMiddleMisalignedStateless( __global const uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, @@ -62,7 +62,7 @@ __kernel void CopyBufferToBufferMiddleMisaligned( vstore4(result, gid, pDst); } -__kernel void CopyBufferToBufferRightLeftover( +__kernel void CopyBufferToBufferRightLeftoverStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -72,12 +72,12 @@ __kernel void CopyBufferToBufferRightLeftover( pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } -__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { +__kernel void copyBufferToBufferBytesSingleStateless(__global uchar *dst, const __global uchar *src) { size_t gid = get_global_id(0); dst[gid] = (uchar)(src[gid]); } -__kernel void CopyBufferToBufferSideRegion( +__kernel void CopyBufferToBufferSideRegionStateless( __global uchar* pDst, const __global uchar* pSrc, ulong len, @@ -93,7 +93,7 @@ __kernel void CopyBufferToBufferSideRegion( } } -__kernel void CopyBufferToBufferMiddleRegion( +__kernel void CopyBufferToBufferMiddleRegionStateless( __global uint* pDst, const __global uint* pSrc, ulong elems, diff --git a/shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel b/shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel index b468a88503..5d74570567 100644 --- a/shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,7 @@ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable -__kernel void CopyBufferToImage3dBytes(__global uchar *src, +__kernel void CopyBufferToImage3dBytesStateless(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, @@ -23,7 +23,7 @@ __kernel void CopyBufferToImage3dBytes(__global uchar *src, write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } -__kernel void CopyBufferToImage3d2Bytes(__global uchar *src, +__kernel void CopyBufferToImage3d2BytesStateless(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, @@ -49,7 +49,7 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, write_imageui(output, dstCoord, c); } -__kernel void CopyBufferToImage3d4Bytes(__global uchar *src, +__kernel void CopyBufferToImage3d4BytesStateless(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, @@ -77,7 +77,7 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, write_imageui(output, dstCoord, c); } -__kernel void CopyBufferToImage3d8Bytes(__global uchar *src, +__kernel void CopyBufferToImage3d8BytesStateless(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, @@ -112,7 +112,7 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } -__kernel void CopyBufferToImage3d16Bytes(__global uchar *src, +__kernel void CopyBufferToImage3d16BytesStateless(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, diff --git a/shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel b/shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel index 837a3c4fe3..16df33e5b2 100644 --- a/shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel @@ -1,12 +1,12 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( -__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, +__kernel void CopyImage3dToBufferBytesStateless(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, @@ -22,7 +22,7 @@ __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, *(dst + DstOffset + x) = convert_uchar_sat(c.x); } -__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, +__kernel void CopyImage3dToBuffer2BytesStateless(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, @@ -45,7 +45,7 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, } } -__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, +__kernel void CopyImage3dToBuffer4BytesStateless(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, @@ -70,7 +70,7 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, } } -__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, +__kernel void CopyImage3dToBuffer8BytesStateless(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, @@ -100,7 +100,7 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, } } -__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, +__kernel void CopyImage3dToBuffer16BytesStateless(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, diff --git a/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel b/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel index 358de60781..f7d33fac33 100644 --- a/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,7 +7,7 @@ R"===( // assumption is local work size = pattern size -__kernel void FillBufferBytes( +__kernel void FillBufferBytesStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern ) @@ -17,7 +17,7 @@ __kernel void FillBufferBytes( pDst[dstIndex] = pPattern[srcIndex]; } -__kernel void FillBufferLeftLeftover( +__kernel void FillBufferLeftLeftoverStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, @@ -27,7 +27,7 @@ __kernel void FillBufferLeftLeftover( pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferMiddle( +__kernel void FillBufferMiddleStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uint* pPattern, @@ -37,7 +37,7 @@ __kernel void FillBufferMiddle( ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferRightLeftover( +__kernel void FillBufferRightLeftoverStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, @@ -47,7 +47,7 @@ __kernel void FillBufferRightLeftover( pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferImmediate( +__kernel void FillBufferImmediateStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) @@ -57,7 +57,7 @@ __kernel void FillBufferImmediate( dstPtr[gid] = value; } -__kernel void FillBufferImmediateLeftOver( +__kernel void FillBufferImmediateLeftOverStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) @@ -66,7 +66,7 @@ __kernel void FillBufferImmediateLeftOver( (ptr + dstSshOffset)[gid] = value; } -__kernel void FillBufferSSHOffset( +__kernel void FillBufferSSHOffsetStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const __global uchar* pPattern, diff --git a/shared/test/common/test_files/builtin_copyfill_stateless.cl b/shared/test/common/test_files/builtin_copyfill_stateless.cl index 8da437a5f1..9e597584ca 100644 --- a/shared/test/common/test_files/builtin_copyfill_stateless.cl +++ b/shared/test/common/test_files/builtin_copyfill_stateless.cl @@ -11,7 +11,7 @@ __kernel void fullCopy(__global const uint* src, __global uint* dst) { vstore4(loaded, gid, dst); } -__kernel void CopyBufferToBufferBytes( +__kernel void CopyBufferToBufferBytesStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -23,7 +23,7 @@ __kernel void CopyBufferToBufferBytes( pDst[ 0 ] = pSrc[ 0 ]; } -__kernel void CopyBufferToBufferLeftLeftover( +__kernel void CopyBufferToBufferLeftLeftoverStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -33,7 +33,7 @@ __kernel void CopyBufferToBufferLeftLeftover( pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } -__kernel void CopyBufferToBufferMiddle( +__kernel void CopyBufferToBufferMiddleStateless( const __global uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, @@ -46,7 +46,7 @@ __kernel void CopyBufferToBufferMiddle( vstore4(loaded, gid, pDst); } -__kernel void CopyBufferToBufferMiddleMisaligned( +__kernel void CopyBufferToBufferMiddleMisalignedStateless( __global const uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, @@ -67,7 +67,7 @@ __kernel void CopyBufferToBufferMiddleMisaligned( vstore4(result, gid, pDst); } -__kernel void CopyBufferToBufferRightLeftover( +__kernel void CopyBufferToBufferRightLeftoverStateless( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, @@ -77,11 +77,11 @@ __kernel void CopyBufferToBufferRightLeftover( pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } -__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { +__kernel void copyBufferToBufferBytesSingleStateless(__global uchar *dst, const __global uchar *src) { size_t gid = get_global_id(0); dst[gid] = (uchar)(src[gid]); } -__kernel void CopyBufferToBufferSideRegion( +__kernel void CopyBufferToBufferSideRegionStateless( __global uchar* pDst, const __global uchar* pSrc, ulong len, @@ -97,7 +97,7 @@ __kernel void CopyBufferToBufferSideRegion( } } -__kernel void CopyBufferToBufferMiddleRegion( +__kernel void CopyBufferToBufferMiddleRegionStateless( __global uint* pDst, const __global uint* pSrc, ulong elems, @@ -115,7 +115,7 @@ __kernel void CopyBufferToBufferMiddleRegion( } // assumption is local work size = pattern size -__kernel void FillBufferBytes( +__kernel void FillBufferBytesStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern ) @@ -125,7 +125,7 @@ __kernel void FillBufferBytes( pDst[dstIndex] = pPattern[srcIndex]; } -__kernel void FillBufferLeftLeftover( +__kernel void FillBufferLeftLeftoverStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, @@ -135,7 +135,7 @@ __kernel void FillBufferLeftLeftover( pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferMiddle( +__kernel void FillBufferMiddleStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uint* pPattern, @@ -145,7 +145,7 @@ __kernel void FillBufferMiddle( ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferRightLeftover( +__kernel void FillBufferRightLeftoverStateless( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, @@ -155,7 +155,7 @@ __kernel void FillBufferRightLeftover( pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } -__kernel void FillBufferImmediate( +__kernel void FillBufferImmediateStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) @@ -165,7 +165,7 @@ __kernel void FillBufferImmediate( dstPtr[gid] = value; } -__kernel void FillBufferImmediateLeftOver( +__kernel void FillBufferImmediateLeftOverStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) @@ -174,7 +174,7 @@ __kernel void FillBufferImmediateLeftOver( (ptr + dstSshOffset)[gid] = value; } -__kernel void FillBufferSSHOffset( +__kernel void FillBufferSSHOffsetStateless( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const __global uchar* pPattern, @@ -189,7 +189,7 @@ __kernel void FillBufferSSHOffset( } ////////////////////////////////////////////////////////////////////////////// -__kernel void CopyBufferRectBytes2d( +__kernel void CopyBufferRectBytes2dStateless( __global const char* src, __global char* dst, uint4 SrcOrigin, @@ -208,7 +208,7 @@ __kernel void CopyBufferRectBytes2d( } ////////////////////////////////////////////////////////////////////////////// -__kernel void CopyBufferRectBytes3d( +__kernel void CopyBufferRectBytes3dStateless( __global const char* src, __global char* dst, uint4 SrcOrigin, @@ -228,7 +228,7 @@ __kernel void CopyBufferRectBytes3d( } -__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst, uint useOnlyGlobalTimestamps) { +__kernel void QueryKernelTimestampsStateless(__global ulong* srcEvents, __global ulong* dst, uint useOnlyGlobalTimestamps) { uint gid = get_global_id(0); uint currentOffset = gid * 4; dst[currentOffset] = 0; @@ -276,7 +276,7 @@ __kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* d } -__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets, uint useOnlyGlobalTimestamps) { +__kernel void QueryKernelTimestampsWithOffsetsStateless(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets, uint useOnlyGlobalTimestamps) { uint gid = get_global_id(0); uint currentOffset = offsets[gid] / 8; dst[currentOffset] = 0; @@ -324,7 +324,7 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob } -__kernel void CopyBufferRectBytesMiddle2d( +__kernel void CopyBufferRectBytesMiddle2dStateless( const __global uint* src, __global uint* dst, ulong4 SrcOrigin, @@ -346,7 +346,7 @@ __kernel void CopyBufferRectBytesMiddle2d( vstore4(loaded,x,dst); } -__kernel void CopyBufferRectBytesMiddle3d( +__kernel void CopyBufferRectBytesMiddle3dStateless( const __global uint* src, __global uint* dst, ulong4 SrcOrigin,