From 6f58ec9d2c45843bbac0efa7ebebaa2252a82925 Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Wed, 4 Oct 2023 15:55:40 +0000 Subject: [PATCH] fix: set walker params prior to adjusting interface descriptor Signed-off-by: Mrozek, Michal --- .../hardware_interface_bdw_and_later.inl | 10 +++---- .../hardware_interface_xehp_and_later.inl | 7 ++--- .../xe_hpc_core/enqueue_tests_xe_hpc_core.cpp | 27 +++++++++++++++++++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl index 9d55cb60c5..91971f3059 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl @@ -77,6 +77,11 @@ inline void HardwareInterface::programWalker( auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor, + globalOffsets, startWorkGroups, + numWorkGroups, walkerArgs.localWorkSizes, simd, dim, + false, false, 0u); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, @@ -95,11 +100,6 @@ inline void HardwareInterface::programWalker( kernelUsesLocalIds, commandQueue.getDevice()); - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor, - globalOffsets, startWorkGroups, - numWorkGroups, walkerArgs.localWorkSizes, simd, dim, - false, false, 0u); - EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false, kernel.getKernelInfo().kernelDescriptor}; EncodeDispatchKernel::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs); *walkerCmdBuf = walkerCmd; diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 3315b70b3a..164ace4151 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -96,6 +96,10 @@ inline void HardwareInterface::programWalker( EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, 0, rootDeviceEnvironment); } + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, + numWorkGroups, walkerArgs.localWorkSizes, simd, dim, + localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, @@ -114,9 +118,6 @@ inline void HardwareInterface::programWalker( localIdsGenerationByRuntime, commandQueue.getDevice()); - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, - numWorkGroups, walkerArgs.localWorkSizes, simd, dim, - localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder); bool kernelSystemAllocation = false; if (kernel.isBuiltIn) { kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory(); diff --git a/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp index e2db11771f..7878256eb4 100644 --- a/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp @@ -103,6 +103,33 @@ XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenPrefetchEnabledWhenEstimati using ProgramWalkerTestsXeHpcCore = EnqueueFixtureXeHpcCore; +XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenProperThreadGroupSizesWhenWalkerIsProgrammedThenThreadGroupDispatchSizeIsProperlyProgrammed) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + + auto commandQueue = createCommandQueue(); + auto &commandStream = commandQueue->getCS(1024); + + auto &heap = commandQueue->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 1); + size_t workSize[] = {1, 1, 1}; + Vec3 wgInfo = {1024, 1, 1}; + + HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(workSize, wgInfo, PreemptionMode::Disabled); + + { + HardwareInterface::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue, + heap, heap, heap, dispatchInfo, walkerArgs); + HardwareParse hwParse; + hwParse.parseCommands(commandStream, 0); + auto itorWalker = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); + EXPECT_NE(hwParse.cmdList.end(), itorWalker); + auto walkerCmd = genCmdCast(*itorWalker); + EXPECT_NE(nullptr, walkerCmd); + auto &idd = walkerCmd->getInterfaceDescriptor(); + + EXPECT_EQ(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, idd.getThreadGroupDispatchSize()); + } +} + XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenDebugVariableSetWhenProgrammingWalkerThenSetL3Prefetch) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;