From 583a57c159ce12c37ce3a8f6e9c816353b964c2f Mon Sep 17 00:00:00 2001 From: Krystian Chmielewski Date: Fri, 26 Feb 2021 12:04:49 +0100 Subject: [PATCH] Use defaultQueueSurfaceAddress arg instead of patchToken Use KernelDescriptor's defaultQueueSurfaceAddress arg instead of storing SPatchAllocateStatelessDefaultDeviceQueueSurface token in KernelInfo's patchInfo. Related-To: NEO-4729 Signed-off-by: Krystian Chmielewski --- opencl/source/kernel/kernel.cpp | 37 +++++------- opencl/source/kernel/kernel.inl | 21 +++---- opencl/source/program/kernel_info.cpp | 5 -- opencl/source/program/kernel_info.h | 1 - .../program/kernel_info_from_patchtokens.cpp | 7 --- opencl/source/program/patch_info.h | 1 - .../enqueue_execution_model_kernel_tests.cpp | 28 +++++---- .../hardware_commands_helper_tests.cpp | 11 ++-- .../kernel_reflection_surface_tests.cpp | 9 +-- opencl/test/unit_test/kernel/kernel_tests.cpp | 57 +++++++++---------- .../unit_test/kernel/parent_kernel_tests.cpp | 12 ++-- opencl/test/unit_test/mocks/mock_kernel.h | 25 ++++---- .../unit_test/program/kernel_data_OCL2_0.cpp | 12 ++-- 13 files changed, 95 insertions(+), 131 deletions(-) diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 323b12dd01..5e38d2cd82 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -343,13 +343,10 @@ cl_int Kernel::initialize() { Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0); } - if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { - - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), - patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); - Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0); - } + if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) { + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), + kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful); + Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0); } setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy()); @@ -2444,23 +2441,17 @@ void Kernel::provideInitializationHints() { } void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { - auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; - if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { - if (kernelDeviceInfos[rootDeviceIndex].crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), - patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); - - patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize, - static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); - } - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), - patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); - Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), - (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0); - } + const auto &defaultQueueSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + if (isValidOffset(defaultQueueSurfaceAddress.stateless) && kernelDeviceInfos[rootDeviceIndex].crossThreadData) { + auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless); + patchWithRequiredSize(patchLocation, defaultQueueSurfaceAddress.pointerSize, + static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); + } + if (isValidOffset(defaultQueueSurfaceAddress.bindful)) { + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), defaultQueueSurfaceAddress.bindful); + Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), + (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0); } } diff --git a/opencl/source/kernel/kernel.inl b/opencl/source/kernel/kernel.inl index a101d0cdc6..6a2998bac2 100644 --- a/opencl/source/kernel/kernel.inl +++ b/opencl/source/kernel/kernel.inl @@ -24,16 +24,6 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - // clang-format off - uint64_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? - pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; - uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset; - - uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? - pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize : 0; - uint32_t deviceQueueSize = 0; - // clang-format on - uint64_t printfBufferOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t printfBufferPatchSize = 0U; const auto &printfSurface = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; @@ -51,6 +41,17 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf eventPoolSize = eventPoolSurfaceAddress.pointerSize; } + uint64_t defaultQueueOffset = ReflectionSurfaceHelper::undefinedOffset; + uint32_t defaultQueueSize = 0U; + const auto &defaultQueueSurface = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + if (isValidOffset(defaultQueueSurface.stateless)) { + defaultQueueOffset = defaultQueueSurface.stateless; + defaultQueueSize = defaultQueueSurface.pointerSize; + } + + uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset; + uint32_t deviceQueueSize = 0; + uint64_t privateSurfaceOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t privateSurfacePatchSize = 0; uint64_t privateSurfaceGpuAddress = 0; diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index 1eca37d42b..dc1538b232 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -314,11 +314,6 @@ void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfac patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg; } -void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) { - usesSsh |= true; - patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg; -} - void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) { this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo; attributes = reinterpret_cast(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo); diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index 032836e549..0f901cb205 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -115,7 +115,6 @@ struct KernelInfo { void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg); void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg); - void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg); void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo); void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface); void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer); diff --git a/opencl/source/program/kernel_info_from_patchtokens.cpp b/opencl/source/program/kernel_info_from_patchtokens.cpp index 18b202959d..7e73f8b30b 100644 --- a/opencl/source/program/kernel_info_from_patchtokens.cpp +++ b/opencl/source/program/kernel_info_from_patchtokens.cpp @@ -184,13 +184,6 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); - if (nullptr != src.tokens.allocateStatelessEventPoolSurface) { - dst.usesSsh = true; - } - if (nullptr != src.tokens.allocateStatelessPrintfSurface) { - dst.usesSsh = true; - } - storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer); dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); diff --git a/opencl/source/program/patch_info.h b/opencl/source/program/patch_info.h index f47f617c5b..74c1726b19 100644 --- a/opencl/source/program/patch_info.h +++ b/opencl/source/program/patch_info.h @@ -67,7 +67,6 @@ struct PatchInfo { const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr; const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; - const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr; const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr; }; diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 89e94b3a75..485d94e6ff 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -454,18 +454,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - const auto &patchInfo = parentKernel->getKernelInfo(rootDeviceIndex).patchInfo; - - if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), - patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); + const auto &implicitArgs = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs; + const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + if (isValidOffset(defaultQueueSurfaceAddress.stateless)) { + auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation); } - const auto &eventPool = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - if (isValidOffset(eventPool.stateless)) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPool.stateless); + const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + if (isValidOffset(eventPoolSurfaceAddress.stateless)) { + auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPoolSurfaceAddress.stateless); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation); } } @@ -486,18 +485,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq uint32_t blockCount = static_cast(blockManager->getCount()); for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + const auto implicitArgs = blockManager->getBlockKernelInfo(i)->kernelDescriptor.payloadMappings.implicitArgs; const uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i); - uint32_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset; - uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize; - if (defaultQueueSize == sizeof(uint64_t)) { - EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); + const auto &defaultQueue = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + if (defaultQueue.pointerSize == sizeof(uint64_t)) { + EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless)); } else { - EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); + EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless)); } - const auto &eventPoolSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) { EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless)); } else { diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 4647951f31..b91c8dee6c 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -719,12 +719,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh allocateStatelessEventPoolSurface.DataParamSize = 8; populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); - SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; - AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256; - AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 32; - AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface; + allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256; + allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 32; + allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface); // create program with valid context MockContext context; diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 22a41fe60f..ab7b1710b1 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -1065,13 +1065,14 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK } } - if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { - auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); - if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint32_t)) { + const auto &defaultQueueSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + if (isValidOffset(defaultQueueSurfaceAddress.stateless)) { + auto *patchedPointer = ptrOffset(pCurbe, defaultQueueSurfaceAddress.stateless); + if (defaultQueueSurfaceAddress.pointerSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); - } else if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint64_t)) { + } else if (defaultQueueSurfaceAddress.pointerSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 28d901b809..8537574a87 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -437,7 +437,6 @@ TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) { EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); - EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface); } typedef Test KernelPrivateSurfaceTest; @@ -1324,12 +1323,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup default device queue surface - SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; - AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface = {}; + allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface); // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -1351,7 +1349,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); @@ -1368,12 +1366,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup default device queue surface - SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; - AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface = {}; + allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface); // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -1397,7 +1394,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), surfaceAddress); @@ -1411,21 +1408,20 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // define kernel info auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup default device queue surface - SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; - AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface = {}; + allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); - // define stateful path + // define stateless path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; @@ -1441,13 +1437,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr; + pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; // create kernel MockProgram program(toClDeviceVector(*pClDevice)); MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); - // define stateful path + // define stateless path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; @@ -1467,20 +1463,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; + pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; // setup default device queue surface - SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; - AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; - AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface = {}; + allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; + allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); - // define stateful path + // define stateless path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index 49a78d81ec..f8638175c5 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -114,12 +114,12 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA uint32_t crossThreadOffsetBlock = 0; auto infoBlock = new KernelInfo(); - SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueueBlock = new SPatchAllocateStatelessDefaultDeviceQueueSurface; - allocateDeviceQueueBlock->DataParamOffset = crossThreadOffsetBlock; - allocateDeviceQueueBlock->DataParamSize = 8; - allocateDeviceQueueBlock->SurfaceStateHeapOffset = 0; - allocateDeviceQueueBlock->Size = 8; - infoBlock->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueueBlock; + infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateDeviceQueueSurface = {}; + allocateDeviceQueueSurface.DataParamOffset = crossThreadOffsetBlock; + allocateDeviceQueueSurface.DataParamSize = 8; + allocateDeviceQueueSurface.Size = 8; + populateKernelDescriptor(infoBlock->kernelDescriptor, allocateDeviceQueueSurface); crossThreadOffsetBlock += 8; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index fb29fb6a06..d570ad2cc6 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -28,6 +28,7 @@ namespace NEO { void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrintfSurface &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessEventPoolSurface &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessDefaultDeviceQueueSurface &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchString &token); struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation { @@ -444,12 +445,11 @@ class MockParentKernel : public Kernel { info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; info->kernelDescriptor.kernelAttributes.simdSize = 32; - SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface; - allocateDeviceQueue->DataParamOffset = crossThreadOffset; - allocateDeviceQueue->DataParamSize = 8; - allocateDeviceQueue->SurfaceStateHeapOffset = 0; - allocateDeviceQueue->Size = 8; - info->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueue; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateDeviceQueueSurface = {}; + allocateDeviceQueueSurface.DataParamOffset = crossThreadOffset; + allocateDeviceQueueSurface.DataParamSize = 8; + allocateDeviceQueueSurface.Size = 8; + populateKernelDescriptor(info->kernelDescriptor, allocateDeviceQueueSurface); crossThreadOffset += 8; @@ -494,12 +494,11 @@ class MockParentKernel : public Kernel { infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueueBlock = new SPatchAllocateStatelessDefaultDeviceQueueSurface; - allocateDeviceQueueBlock->DataParamOffset = crossThreadOffsetBlock; - allocateDeviceQueueBlock->DataParamSize = 8; - allocateDeviceQueueBlock->SurfaceStateHeapOffset = 0; - allocateDeviceQueueBlock->Size = 8; - infoBlock->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueueBlock; + SPatchAllocateStatelessDefaultDeviceQueueSurface allocateDeviceQueueSurfaceBlock = {}; + allocateDeviceQueueSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + allocateDeviceQueueSurfaceBlock.DataParamSize = 8; + allocateDeviceQueueSurfaceBlock.Size = 8; + populateKernelDescriptor(infoBlock->kernelDescriptor, allocateDeviceQueueSurfaceBlock); crossThreadOffsetBlock += 8; @@ -599,14 +598,12 @@ class MockParentKernel : public Kernel { continue; } auto &kernelInfo = *pKernelInfo; - delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; delete kernelInfo.patchInfo.threadPayload; delete &kernelInfo; BlockKernelManager *blockManager = program->getBlockKernelManager(); for (uint32_t i = 0; i < blockManager->getCount(); i++) { const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); - delete blockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; delete blockInfo->patchInfo.threadPayload; delete blockInfo->patchInfo.dataParameterStream; delete blockInfo->patchInfo.bindingTableState; diff --git a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp index bc86dfecf4..a3aefe9bb6 100644 --- a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp +++ b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp @@ -42,14 +42,10 @@ TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessDefaultDeviceQueueSurface buildAndDecode(); - EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.Token, - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->Token); - EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamOffset, - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); - EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamSize, - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize); - EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset, - pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); + const auto &defaultQueueSurfaceAddress = pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamOffset, defaultQueueSurfaceAddress.stateless); + EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamSize, defaultQueueSurfaceAddress.pointerSize); + EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset, defaultQueueSurfaceAddress.bindful); } TEST_F(KernelDataTest, GIVENpatchTokenStatelessDeviceQueueKernelArgumentWHENdecodeTokensTHENapropriateKernelArgInfoFilled) {