diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 57671a143e..4209154461 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -180,6 +180,7 @@ cl_int Kernel::initialize() { auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &kernelDeviceInfo = kernelDeviceInfos[rootDeviceIndex]; auto &kernelInfo = *kernelInfos[rootDeviceIndex]; + auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto maxSimdSize = kernelInfo.getMaxSimdSize(); const auto &workloadInfo = kernelInfo.workloadInfo; const auto &heapInfo = kernelInfo.heapInfo; @@ -335,12 +336,10 @@ cl_int Kernel::initialize() { patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), *patch); } - if (patchInfo.pAllocateStatelessEventPoolSurface) { - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), - patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); - Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0); - } + if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), + kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful); + Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0); } if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { @@ -2461,24 +2460,20 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { } void Kernel::patchEventPool(DeviceQueue *devQueue) { - auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; - if (patchInfo.pAllocateStatelessEventPoolSurface) { - if (kernelDeviceInfos[rootDeviceIndex].crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), - patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); + const auto &eventPoolSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize, - static_cast(devQueue->getEventPoolBuffer()->getGpuAddressToPatch())); - } + if (isValidOffset(eventPoolSurfaceAddress.stateless) && kernelDeviceInfos[rootDeviceIndex].crossThreadData) { + auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), eventPoolSurfaceAddress.stateless); + patchWithRequiredSize(patchLocation, eventPoolSurfaceAddress.pointerSize, + static_cast(devQueue->getEventPoolBuffer()->getGpuAddressToPatch())); + } - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), - patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); - Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), - (void *)devQueue->getEventPoolBuffer()->getGpuAddress(), 0, devQueue->getEventPoolBuffer(), 0, 0); - } + if (isValidOffset(eventPoolSurfaceAddress.bindful)) { + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), eventPoolSurfaceAddress.bindful); + auto eventPoolBuffer = devQueue->getEventPoolBuffer(); + Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(), + (void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0); } } diff --git a/opencl/source/kernel/kernel.inl b/opencl/source/kernel/kernel.inl index 84e4a69f87..dc622ccf99 100644 --- a/opencl/source/kernel/kernel.inl +++ b/opencl/source/kernel/kernel.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -27,14 +27,10 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf // clang-format off uint64_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; - uint64_t eventPoolOffset = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface ? - pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize : 0; - uint32_t eventPoolSize = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface ? - pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize : 0; uint32_t deviceQueueSize = 0; uint64_t printfBufferOffset = pBlockInfo->patchInfo.pAllocateStatelessPrintfSurface ? @@ -44,6 +40,14 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf uint64_t printfGpuAddress = 0; // clang-format on + uint64_t eventPoolOffset = ReflectionSurfaceHelper::undefinedOffset; + uint32_t eventPoolSize = 0U; + const auto &eventPoolSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + if (isValidOffset(eventPoolSurfaceAddress.stateless)) { + eventPoolOffset = eventPoolSurfaceAddress.stateless; + eventPoolSize = eventPoolSurfaceAddress.pointerSize; + } + uint64_t privateSurfaceOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t privateSurfacePatchSize = 0; uint64_t privateSurfaceGpuAddress = 0; diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index 77ff5798f3..fc73054365 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -320,11 +320,6 @@ void KernelInfo::storePatchToken(const SPatchAllocateStatelessPrintfSurface *pSt patchInfo.pAllocateStatelessPrintfSurface = pStatelessPrintfSurfaceArg; } -void KernelInfo::storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg) { - usesSsh |= true; - patchInfo.pAllocateStatelessEventPoolSurface = pStatelessEventPoolSurfaceArg; -} - void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) { usesSsh |= true; patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg; diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index 5f71fa02b7..d310625fab 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -116,7 +116,6 @@ struct KernelInfo { void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg); - void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg); void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg); void storePatchToken(const SPatchString *pStringArg); void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo); diff --git a/opencl/source/program/kernel_info_from_patchtokens.cpp b/opencl/source/program/kernel_info_from_patchtokens.cpp index e162b03d5a..eb294f6079 100644 --- a/opencl/source/program/kernel_info_from_patchtokens.cpp +++ b/opencl/source/program/kernel_info_from_patchtokens.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -185,7 +185,9 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrintfSurface); - storeTokenIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface); + if (nullptr != src.tokens.allocateStatelessEventPoolSurface) { + dst.usesSsh = true; + } storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer); diff --git a/opencl/source/program/patch_info.h b/opencl/source/program/patch_info.h index d3e2452570..e2ccc982d8 100644 --- a/opencl/source/program/patch_info.h +++ b/opencl/source/program/patch_info.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -68,7 +68,6 @@ struct PatchInfo { const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; const SPatchAllocateStatelessPrintfSurface *pAllocateStatelessPrintfSurface = nullptr; - const SPatchAllocateStatelessEventPoolSurface *pAllocateStatelessEventPoolSurface = nullptr; const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr; const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr; ::std::unordered_map stringDataMap; diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 5cffc94b6d..89e94b3a75 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -463,10 +463,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation); } - if (patchInfo.pAllocateStatelessEventPoolSurface) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), - patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); - + const auto &eventPool = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + if (isValidOffset(eventPool.stateless)) { + auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPool.stateless); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation); } } @@ -488,25 +487,21 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + const uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i); uint32_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset; - uint32_t eventPoolOffset = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset; - uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize; - uint32_t eventPoolSize = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize; - - uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i); - if (defaultQueueSize == sizeof(uint64_t)) { EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); } else { EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); } - if (eventPoolSize == sizeof(uint64_t)) { - EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolOffset)); + const auto &eventPoolSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) { + EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless)); } else { - EXPECT_EQ((uint32_t)pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + eventPoolOffset)); + EXPECT_EQ((uint32_t)pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless)); } } } diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index dffcc400d4..4647951f31 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -713,12 +713,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh AllocateStatelessPrivateMemorySurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; - SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; - AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 192; - AllocateStatelessEventPoolSurface.DataParamOffset = 24; - AllocateStatelessEventPoolSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; + SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; + allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 192; + allocateStatelessEventPoolSurface.DataParamOffset = 24; + allocateStatelessEventPoolSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256; diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 25805a6773..2ecec238bf 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -1052,13 +1052,14 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset)); - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) { - auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { + const auto &eventPoolSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + if (isValidOffset(eventPoolSurfaceAddress.stateless)) { + auto *patchedPointer = ptrOffset(pCurbe, eventPoolSurfaceAddress.stateless); + if (eventPoolSurfaceAddress.pointerSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64); - } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { + } else if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue); } @@ -1066,11 +1067,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { + if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); - } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { + } else if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a480c6fc43..c9f792a6ef 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -438,7 +438,6 @@ TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) { EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrintfSurface); - EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessEventPoolSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface); } @@ -1153,12 +1152,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup event pool surface - SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; - AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessEventPoolSurface.DataParamOffset = 0; - AllocateStatelessEventPoolSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; + SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; + allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; + allocateStatelessEventPoolSurface.DataParamOffset = 0; + allocateStatelessEventPoolSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -1180,7 +1178,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); @@ -1197,12 +1195,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup event pool surface - SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; - AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessEventPoolSurface.DataParamOffset = 0; - AllocateStatelessEventPoolSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; + SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; + allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; + allocateStatelessEventPoolSurface.DataParamOffset = 0; + allocateStatelessEventPoolSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -1224,7 +1221,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), surfaceAddress); @@ -1239,7 +1236,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr; // create kernel MockProgram program(toClDeviceVector(*pClDevice)); @@ -1261,18 +1257,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched) { - // define kernel info auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup event pool surface - SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; - AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessEventPoolSurface.DataParamOffset = 0; - AllocateStatelessEventPoolSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; + SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; + allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; + allocateStatelessEventPoolSurface.DataParamOffset = 0; + allocateStatelessEventPoolSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); @@ -1292,18 +1287,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched) { - // define kernel info auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup event pool surface - SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; - AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; - AllocateStatelessEventPoolSurface.DataParamOffset = 0; - AllocateStatelessEventPoolSurface.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; + SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; + allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; + allocateStatelessEventPoolSurface.DataParamOffset = 0; + allocateStatelessEventPoolSurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index a351cfbf61..49a78d81ec 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -123,12 +123,12 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA crossThreadOffsetBlock += 8; - SPatchAllocateStatelessEventPoolSurface *eventPoolBlock = new SPatchAllocateStatelessEventPoolSurface; - eventPoolBlock->DataParamOffset = crossThreadOffsetBlock; - eventPoolBlock->DataParamSize = 8; - eventPoolBlock->EventPoolSurfaceIndex = 0; - eventPoolBlock->Size = 8; - infoBlock->patchInfo.pAllocateStatelessEventPoolSurface = eventPoolBlock; + SPatchAllocateStatelessEventPoolSurface allocateEventPoolSurface = {}; + allocateEventPoolSurface.DataParamOffset = crossThreadOffsetBlock; + allocateEventPoolSurface.DataParamSize = 8; + allocateEventPoolSurface.EventPoolSurfaceIndex = 0; + allocateEventPoolSurface.Size = 8; + populateKernelDescriptor(infoBlock->kernelDescriptor, allocateEventPoolSurface); crossThreadOffsetBlock += 8; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 9e185af7af..597f0cfc58 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -26,6 +26,7 @@ namespace NEO { void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessEventPoolSurface &token); struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation { MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) { @@ -435,6 +436,7 @@ class MockParentKernel : public Kernel { info->patchInfo.threadPayload = threadPayload; + info->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; info->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -448,12 +450,12 @@ class MockParentKernel : public Kernel { crossThreadOffset += 8; - SPatchAllocateStatelessEventPoolSurface *eventPool = new SPatchAllocateStatelessEventPoolSurface; - eventPool->DataParamOffset = crossThreadOffset; - eventPool->DataParamSize = 8; - eventPool->EventPoolSurfaceIndex = 0; - eventPool->Size = 8; - info->patchInfo.pAllocateStatelessEventPoolSurface = eventPool; + SPatchAllocateStatelessEventPoolSurface allocateEventPoolSurface = {}; + allocateEventPoolSurface.DataParamOffset = crossThreadOffset; + allocateEventPoolSurface.DataParamSize = 8; + allocateEventPoolSurface.EventPoolSurfaceIndex = 0; + allocateEventPoolSurface.Size = 8; + populateKernelDescriptor(info->kernelDescriptor, allocateEventPoolSurface); crossThreadOffset += 8; if (addPrintfForParent) { @@ -496,12 +498,12 @@ class MockParentKernel : public Kernel { crossThreadOffsetBlock += 8; - SPatchAllocateStatelessEventPoolSurface *eventPoolBlock = new SPatchAllocateStatelessEventPoolSurface; - eventPoolBlock->DataParamOffset = crossThreadOffsetBlock; - eventPoolBlock->DataParamSize = 8; - eventPoolBlock->EventPoolSurfaceIndex = 0; - eventPoolBlock->Size = 8; - infoBlock->patchInfo.pAllocateStatelessEventPoolSurface = eventPoolBlock; + SPatchAllocateStatelessEventPoolSurface allocateEventPoolSurfaceBlock = {}; + allocateEventPoolSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + allocateEventPoolSurfaceBlock.DataParamSize = 8; + allocateEventPoolSurfaceBlock.EventPoolSurfaceIndex = 0; + allocateEventPoolSurfaceBlock.Size = 8; + populateKernelDescriptor(infoBlock->kernelDescriptor, allocateEventPoolSurfaceBlock); crossThreadOffsetBlock += 8; if (addPrintfForBlock) { @@ -594,7 +596,6 @@ class MockParentKernel : public Kernel { } auto &kernelInfo = *pKernelInfo; delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; - delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface; delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface; delete kernelInfo.patchInfo.threadPayload; delete &kernelInfo; @@ -603,7 +604,6 @@ class MockParentKernel : public Kernel { for (uint32_t i = 0; i < blockManager->getCount(); i++) { const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); delete blockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; - delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface; delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface; delete blockInfo->patchInfo.threadPayload; delete blockInfo->patchInfo.dataParameterStream; diff --git a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp index e0a80fed92..bc86dfecf4 100644 --- a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp +++ b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,14 +22,10 @@ TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessEventPoolSurfaceWHENdecod buildAndDecode(); - EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.Token, - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->Token); - EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamOffset, - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); - EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamSize, - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize); - EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.SurfaceStateHeapOffset, - pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); + const auto &eventPoolArg = pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamOffset, eventPoolArg.stateless); + EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamSize, eventPoolArg.pointerSize); + EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.SurfaceStateHeapOffset, eventPoolArg.bindful); } TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessDefaultDeviceQueueSurfaceWHENdecodeTokensTHENtokenLocatedInPatchInfo) {