diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index f7d43933f4..25e073d8d1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -305,7 +305,7 @@ struct CommandListCoreFamily : public CommandListImp { ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]); ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions); - void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t additionalSizeParam); + void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize); void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation); void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition, bool copyOperation); void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 37ea875b23..f514633c93 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2821,14 +2821,14 @@ ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kern } template -void CommandListCoreFamily::programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t additionalSizeParam) { +void CommandListCoreFamily::programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize) { auto neoDevice = device->getNEODevice(); neoDevice->allocateSyncBufferHandler(); const size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ; - size_t size = alignUp((requestedNumberOfWorkgroups / additionalSizeParam) * (additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); + size_t size = alignUp((requestedNumberOfWorkgroups / localRegionSize) * (localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(size); @@ -4279,11 +4279,11 @@ template void CommandListCoreFamily::setAdditionalKernelLaunchParams(CmdListKernelLaunchParams &launchParams, Kernel &kernel) const { auto &kernelDescriptor = kernel.getImmutableData()->getDescriptor(); - if (launchParams.additionalSizeParam == NEO::additionalKernelLaunchSizeParamNotSet) { - launchParams.additionalSizeParam = kernelDescriptor.kernelAttributes.additionalSize; + if (launchParams.localRegionSize == NEO::localRegionSizeParamNotSet) { + launchParams.localRegionSize = kernelDescriptor.kernelAttributes.localRegionSize; } if (launchParams.requiredDispatchWalkOrder == NEO::RequiredDispatchWalkOrder::none) { - launchParams.requiredDispatchWalkOrder = kernelDescriptor.kernelAttributes.walkOrder; + launchParams.requiredDispatchWalkOrder = kernelDescriptor.kernelAttributes.dispatchWalkOrder; } if (launchParams.requiredPartitionDim == NEO::RequiredPartitionDim::none) { launchParams.requiredPartitionDim = kernelDescriptor.kernelAttributes.partitionDim; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 41979f7398..c1f3b7de88 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -207,7 +207,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K commandListPreemptionMode, // preemptionMode launchParams.requiredPartitionDim, // requiredPartitionDim launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder - launchParams.additionalSizeParam, // additionalSizeParam + launchParams.localRegionSize, // localRegionSize 0, // partitionCount launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace maxWgCountPerTile, // maxWgCountPerTile diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index c92b5efa08..142bd5a64a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -256,7 +256,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } if (kernel->usesRegionGroupBarrier()) { - programRegionGroupBarrier(*kernel, threadGroupDimensions, launchParams.additionalSizeParam); + programRegionGroupBarrier(*kernel, threadGroupDimensions, launchParams.localRegionSize); } bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs()); @@ -331,7 +331,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K kernelPreemptionMode, // preemptionMode launchParams.requiredPartitionDim, // requiredPartitionDim launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder - launchParams.additionalSizeParam, // additionalSizeParam + launchParams.localRegionSize, // localRegionSize this->partitionCount, // partitionCount launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace maxWgCountPerTile, // maxWgCountPerTile diff --git a/level_zero/core/source/cmdlist/cmdlist_launch_params.h b/level_zero/core/source/cmdlist/cmdlist_launch_params.h index 007cc81fdc..dbb0b2fbae 100644 --- a/level_zero/core/source/cmdlist/cmdlist_launch_params.h +++ b/level_zero/core/source/cmdlist/cmdlist_launch_params.h @@ -54,7 +54,7 @@ struct CmdListKernelLaunchParams { uint32_t externalPerThreadScratchSize[2] = {0U, 0U}; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; - uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; + uint32_t localRegionSize = NEO::localRegionSizeParamNotSet; uint32_t numKernelsInSplitLaunch = 0; uint32_t numKernelsExecutedInSplitLaunch = 0; uint32_t reserveExtraPayloadSpace = 0; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 94cb7a56e2..b9564edf4e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -65,13 +65,13 @@ HWTEST2_F(MultiTileImmediateCommandListTest, givenMultipleTilesWhenAllocatingBar size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ; - size_t additionalSizeParam = 4; + size_t localRegionSize = 4; - whiteBoxCmdList->programRegionGroupBarrier(mockKernel, threadGroupDimensions, additionalSizeParam); + whiteBoxCmdList->programRegionGroupBarrier(mockKernel, threadGroupDimensions, localRegionSize); auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(1); - size_t expectedOffset = alignUp((requestedNumberOfWorkgroups / additionalSizeParam) * (additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); + size_t expectedOffset = alignUp((requestedNumberOfWorkgroups / localRegionSize) * (localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); EXPECT_EQ(patchData.second, expectedOffset); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 279dc728f9..605bd01237 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -196,44 +196,44 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA const uint32_t threadGroupDimensions[3] = {1, 1, 1}; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - 0, // eventAddress - 0, // postSyncImmValue - 0, // inOrderCounterValue - device->getNEODevice(), // device - nullptr, // inOrderExecInfo - kernel.get(), // dispatchInterface - nullptr, // surfaceStateHeap - nullptr, // dynamicStateHeap - threadGroupDimensions, // threadGroupDimensions - nullptr, // outWalkerPtr - nullptr, // cpuWalkerBuffer - nullptr, // cpuPayloadBuffer - nullptr, // outImplicitArgsPtr - nullptr, // additionalCommands - PreemptionMode::MidBatch, // preemptionMode - NEO::RequiredPartitionDim::none, // requiredPartitionDim - NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam - 0, // partitionCount - 0, // reserveExtraPayloadSpace - 1, // maxWgCountPerTile - NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy - false, // isIndirect - false, // isPredicate - false, // isTimestampEvent - false, // requiresUncachedMocs - false, // isInternal - false, // isCooperative - false, // isHostScopeSignalEvent - false, // isKernelUsingSystemAllocation - false, // isKernelDispatchedFromImmediateCmdList - false, // isRcs - commandList->getDcFlushRequired(true), // dcFlushEnable - false, // isHeaplessModeEnabled - false, // isHeaplessStateInitEnabled - false, // interruptEvent - false, // immediateScratchAddressPatching - false, // makeCommandView + 0, // eventAddress + 0, // postSyncImmValue + 0, // inOrderCounterValue + device->getNEODevice(), // device + nullptr, // inOrderExecInfo + kernel.get(), // dispatchInterface + nullptr, // surfaceStateHeap + nullptr, // dynamicStateHeap + threadGroupDimensions, // threadGroupDimensions + nullptr, // outWalkerPtr + nullptr, // cpuWalkerBuffer + nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr + nullptr, // additionalCommands + PreemptionMode::MidBatch, // preemptionMode + NEO::RequiredPartitionDim::none, // requiredPartitionDim + NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder + NEO::localRegionSizeParamNotSet, // localRegionSize + 0, // partitionCount + 0, // reserveExtraPayloadSpace + 1, // maxWgCountPerTile + NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy + false, // isIndirect + false, // isPredicate + false, // isTimestampEvent + false, // requiresUncachedMocs + false, // isInternal + false, // isCooperative + false, // isHostScopeSignalEvent + false, // isKernelUsingSystemAllocation + false, // isKernelDispatchedFromImmediateCmdList + false, // isRcs + commandList->getDcFlushRequired(true), // dcFlushEnable + false, // isHeaplessModeEnabled + false, // isHeaplessStateInitEnabled + false, // interruptEvent + false, // immediateScratchAddressPatching + false, // makeCommandView }; NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 928e6285b8..12c2a5e44a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -541,7 +541,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA std::unique_ptr cmdList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, result)); CmdListKernelLaunchParams launchParams = {}; - launchParams.additionalSizeParam = 4; + launchParams.localRegionSize = 4; EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false)); auto patchPtr = *reinterpret_cast(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless)); @@ -572,7 +572,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ; - auto offset = alignUp((requestedNumberOfWorkgroups / launchParams.additionalSizeParam) * (launchParams.additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); + auto offset = alignUp((requestedNumberOfWorkgroups / launchParams.localRegionSize) * (launchParams.localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize); EXPECT_EQ(patchPtr2, patchPtr + offset); } @@ -778,44 +778,44 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA const uint32_t threadGroupDimensions[3] = {1, 1, 1}; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - 0, // eventAddress - 0, // postSyncImmValue - 0, // inOrderCounterValue - device->getNEODevice(), // device - nullptr, // inOrderExecInfo - kernel.get(), // dispatchInterface - nullptr, // surfaceStateHeap - nullptr, // dynamicStateHeap - threadGroupDimensions, // threadGroupDimensions - nullptr, // outWalkerPtr - nullptr, // cpuWalkerBuffer - nullptr, // cpuPayloadBuffer - nullptr, // outImplicitArgsPtr - nullptr, // additionalCommands - PreemptionMode::MidBatch, // preemptionMode - NEO::RequiredPartitionDim::none, // requiredPartitionDim - NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam - 0, // partitionCount - 0, // reserveExtraPayloadSpace - 1, // maxWgCountPerTile - NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy - false, // isIndirect - false, // isPredicate - false, // isTimestampEvent - false, // requiresUncachedMocs - false, // isInternal - false, // isCooperative - false, // isHostScopeSignalEvent - false, // isKernelUsingSystemAllocation - false, // isKernelDispatchedFromImmediateCmdList - false, // isRcs - commandList->getDcFlushRequired(true), // dcFlushEnable - false, // isHeaplessModeEnabled - false, // isHeaplessStateInitEnabled - false, // interruptEvent - false, // immediateScratchAddressPatching - false, // makeCommandView + 0, // eventAddress + 0, // postSyncImmValue + 0, // inOrderCounterValue + device->getNEODevice(), // device + nullptr, // inOrderExecInfo + kernel.get(), // dispatchInterface + nullptr, // surfaceStateHeap + nullptr, // dynamicStateHeap + threadGroupDimensions, // threadGroupDimensions + nullptr, // outWalkerPtr + nullptr, // cpuWalkerBuffer + nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr + nullptr, // additionalCommands + PreemptionMode::MidBatch, // preemptionMode + NEO::RequiredPartitionDim::none, // requiredPartitionDim + NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder + NEO::localRegionSizeParamNotSet, // localRegionSize + 0, // partitionCount + 0, // reserveExtraPayloadSpace + 1, // maxWgCountPerTile + NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy + false, // isIndirect + false, // isPredicate + false, // isTimestampEvent + false, // requiresUncachedMocs + false, // isInternal + false, // isCooperative + false, // isHostScopeSignalEvent + false, // isKernelUsingSystemAllocation + false, // isKernelDispatchedFromImmediateCmdList + false, // isRcs + commandList->getDcFlushRequired(true), // dcFlushEnable + false, // isHeaplessModeEnabled + false, // isHeaplessStateInitEnabled + false, // interruptEvent + false, // immediateScratchAddressPatching + false, // makeCommandView }; EXPECT_THROW(NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs), std::exception); } diff --git a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl index 4928b13249..1c615a4ab8 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl @@ -87,7 +87,7 @@ inline void HardwareInterface::programWalker( kernel.getKernelInfo().kernelDescriptor, // kernelDescriptor kernel.getExecutionType(), // kernelExecutionType RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - 0, // additionalSizeParam + 0, // localRegionSize 0, // maxFrontEndThreads false}; // requiredSystemFence EncodeDispatchKernel::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs); diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 8adea3a630..9d1eefcdea 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -146,12 +146,12 @@ inline void HardwareInterface::programWalker( auto maxFrontEndThreads = device.getDeviceInfo().maxFrontEndThreads; EncodeWalkerArgs encodeWalkerArgs{ - kernelInfo.kernelDescriptor, // kernelDescriptor - kernel.getExecutionType(), // kernelExecutionType - kernelAttributes.walkOrder, // requiredDispatchWalkOrder - kernelAttributes.additionalSize, // additionalSizeParam - maxFrontEndThreads, // maxFrontEndThreads - requiredSystemFence}; // requiredSystemFence + kernelInfo.kernelDescriptor, // kernelDescriptor + kernel.getExecutionType(), // kernelExecutionType + kernelAttributes.dispatchWalkOrder, // requiredDispatchWalkOrder + kernelAttributes.localRegionSize, // localRegionSize + maxFrontEndThreads, // maxFrontEndThreads + requiredSystemFence}; // requiredSystemFence EncodeDispatchKernel::template encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs); EncodeDispatchKernel::template encodeWalkerPostSyncFields(walkerCmd, encodeWalkerArgs); @@ -192,7 +192,7 @@ inline void HardwareInterface::programWalker( queueCsr.getDcFlushSupport(), // dcFlush kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile false, // blockDispatchToCommandBuffer - requiredWalkOrder != 0}; // isRequiredWorkGroupOrder + requiredWalkOrder != 0}; // isRequiredDispatchWorkGroupOrder ImplicitScalingDispatch::template dispatchCommands(commandStream, walkerCmd, diff --git a/opencl/test/unit_test/xe2_hpg_core/dispatch_walker_tests_xe2_hpg_core.cpp b/opencl/test/unit_test/xe2_hpg_core/dispatch_walker_tests_xe2_hpg_core.cpp index 72f259ff79..c890dce04a 100644 --- a/opencl/test/unit_test/xe2_hpg_core/dispatch_walker_tests_xe2_hpg_core.cpp +++ b/opencl/test/unit_test/xe2_hpg_core/dispatch_walker_tests_xe2_hpg_core.cpp @@ -30,7 +30,7 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel kernelDescriptor, // kernelDescriptor KernelExecutionType::concurrent, // kernelExecutionType NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - 0, // additionalSizeParam + 0, // localRegionSize 0, // maxFrontEndThreads true}; // requiredSystemFence { diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 702359ea47..51eb330c52 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -63,7 +63,7 @@ struct EncodeDispatchKernelArgs { PreemptionMode preemptionMode = PreemptionMode::Initial; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; - uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; + uint32_t localRegionSize = NEO::localRegionSizeParamNotSet; uint32_t partitionCount = 0u; uint32_t reserveExtraPayloadSpace = 0; uint32_t maxWgCountPerTile = 0; @@ -109,7 +109,7 @@ struct EncodeWalkerArgs { const KernelDescriptor &kernelDescriptor; KernelExecutionType kernelExecutionType = KernelExecutionType::defaultType; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; - uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; + uint32_t localRegionSize = NEO::localRegionSizeParamNotSet; uint32_t maxFrontEndThreads = 0; bool requiredSystemFence = false; }; @@ -188,7 +188,7 @@ struct EncodeDispatchKernel { static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); template - static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); + static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder); template static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 47cc6d5ab3..477ce4b8cc 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -283,7 +283,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis kernelDescriptor, // kernelDescriptor KernelExecutionType::defaultType, // kernelExecutionType args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder - args.additionalSizeParam, // additionalSizeParam + args.localRegionSize, // localRegionSize args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads args.requiresSystemMemoryFence()}; // requiredSystemFence EncodeDispatchKernel::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs); @@ -684,7 +684,7 @@ void EncodeDispatchKernel::encodeEuSchedulingPolicy(InterfaceDescriptorT template template -void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} +void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {} template template diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 00a2a81223..0d361da417 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -30,7 +30,7 @@ template uint64_t NEO::EncodeDispatchKernel::getScratchAddressForImmedia template void NEO::EncodeDispatchKernel::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); template void NEO::EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(Family::DefaultWalkerType &walkerCmd); template void NEO::EncodeDispatchKernel::setWalkerRegionSettings(Family::DefaultWalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, - uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); + uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder); template void NEO::EncodeDispatchKernel::overrideDefaultValues(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor); template void NEO::EncodeDispatchKernel::encodeWalkerPostSyncFields(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); template void NEO::EncodeDispatchKernel::encodeComputeDispatchAllWalker(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl index f8a9391e8a..b9fc5e50b2 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl @@ -90,6 +90,6 @@ void EncodeDispatchKernel::adjustTimestampPacket(WalkerType &walkerCmd, template template -void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} +void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {} } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 170483a8b0..04a424e6b9 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -402,7 +402,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis kernelDescriptor, // kernelDescriptor kernelExecutionType, // kernelExecutionType args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder - args.additionalSizeParam, // additionalSizeParam + args.localRegionSize, // localRegionSize args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads args.requiresSystemMemoryFence()}; // requiresMemoryFence EncodeDispatchKernel::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs); @@ -412,7 +412,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeDispatchKernel::overrideDefaultValues(walkerCmd, idd); uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2]; - bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none; + bool isRequiredDispatchWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none; if (args.partitionCount > 1 && !args.isInternal) { const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); @@ -429,7 +429,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.dcFlushEnable, // dcFlush EncodeDispatchKernel::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile args.makeCommandView, // blockDispatchToCommandBuffer - isRequiredWorkGroupOrder}; // isRequiredWorkGroupOrder + isRequiredDispatchWorkGroupOrder}; // isRequiredDispatchWorkGroupOrder ImplicitScalingDispatch::dispatchCommands(*listCmdBufferStream, walkerCmd, @@ -438,7 +438,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.partitionCount = implicitScalingArgs.partitionCount; } else { args.partitionCount = 1; - EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder); + EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredDispatchWorkGroupOrder); if (!args.makeCommandView) { auto buffer = listCmdBufferStream->getSpaceForCmd(); diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index 0923f595be..f546a1de22 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -53,7 +53,7 @@ struct ImplicitScalingDispatchCommandArgs { bool dcFlush = false; bool forceExecutionOnSingleTile = false; bool blockDispatchToCommandBuffer = false; - bool isRequiredWorkGroupOrder = false; + bool isRequiredDispatchWorkGroupOrder = false; }; template diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index a036532ec6..9f596f17be 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -51,7 +51,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingD args.workgroupSize = dispatchCommandArgs.workgroupSize; args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile; - args.isRequiredWorkGroupOrder = dispatchCommandArgs.isRequiredWorkGroupOrder; + args.isRequiredDispatchWorkGroupOrder = dispatchCommandArgs.isRequiredDispatchWorkGroupOrder; return args; } diff --git a/shared/source/command_container/walker_partition_interface.h b/shared/source/command_container/walker_partition_interface.h index 5e6984b71f..b5c9de723f 100644 --- a/shared/source/command_container/walker_partition_interface.h +++ b/shared/source/command_container/walker_partition_interface.h @@ -35,7 +35,7 @@ struct WalkerPartitionArgs { bool dcFlushEnable = false; bool forceExecutionOnSingleTile = false; bool blockDispatchToCommandBuffer = false; - bool isRequiredWorkGroupOrder = false; + bool isRequiredDispatchWorkGroupOrder = false; }; inline constexpr uint32_t wparidCCSOffset = 0x221C; diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 195df384ed..3a9e21b8cd 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -531,7 +531,7 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm args.partitionCount, args.workgroupSize, args.maxWgCountPerTile, - args.isRequiredWorkGroupOrder); + args.isRequiredDispatchWorkGroupOrder); appendWalkerFields(*inputWalker, args.tileCount, workgroupCount); } diff --git a/shared/source/device_binary_format/zebin/zeinfo.h b/shared/source/device_binary_format/zebin/zeinfo.h index 1ec0fa717a..aa6be4aaa0 100644 --- a/shared/source/device_binary_format/zebin/zeinfo.h +++ b/shared/source/device_binary_format/zebin/zeinfo.h @@ -350,7 +350,7 @@ using IndirectStatelessCountT = int32_t; using HasSampleT = bool; using PrivateSizeT = int32_t; using SpillSizeT = int32_t; -using AdditionalSizeT = int32_t; +using LocalRegionSizeT = int32_t; using WalkOrderT = int32_t; using PartitionDimT = int32_t; @@ -384,8 +384,8 @@ inline constexpr IndirectStatelessCountT indirectStatelessCount = 0; inline constexpr HasSampleT hasSample = false; inline constexpr PrivateSizeT privateSize = 0; inline constexpr SpillSizeT spillSize = 0; -inline constexpr AdditionalSizeT additionalSize = -1; -inline constexpr WalkOrderT walkOrder = -1; +inline constexpr LocalRegionSizeT localRegionSize = -1; +inline constexpr WalkOrderT dispatchWalkOrder = -1; inline constexpr PartitionDimT partitionDim = -1; } // namespace Defaults @@ -422,8 +422,8 @@ struct ExecutionEnvBaseT { HasSampleT hasSample = Defaults::hasSample; PrivateSizeT privateSize = Defaults::privateSize; SpillSizeT spillSize = Defaults::spillSize; - AdditionalSizeT additionalSize = Defaults::additionalSize; - WalkOrderT walkOrder = Defaults::walkOrder; + LocalRegionSizeT localRegionSize = Defaults::localRegionSize; + WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder; PartitionDimT partitionDim = Defaults::partitionDim; }; diff --git a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp index ea3549515c..23770f2078 100644 --- a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp +++ b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp @@ -718,11 +718,11 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu dst.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0); dst.kernelAttributes.numThreadsRequired = static_cast(execEnv.euThreadCount); - if (execEnv.additionalSize != Types::Kernel::ExecutionEnv::Defaults::additionalSize) { - dst.kernelAttributes.additionalSize = static_cast(execEnv.additionalSize); + if (execEnv.localRegionSize != Types::Kernel::ExecutionEnv::Defaults::localRegionSize) { + dst.kernelAttributes.localRegionSize = static_cast(execEnv.localRegionSize); } - if (execEnv.walkOrder != Types::Kernel::ExecutionEnv::Defaults::walkOrder) { - dst.kernelAttributes.walkOrder = EncodeParamsApiMappings::walkOrder[execEnv.walkOrder]; + if (execEnv.dispatchWalkOrder != Types::Kernel::ExecutionEnv::Defaults::dispatchWalkOrder) { + dst.kernelAttributes.dispatchWalkOrder = EncodeParamsApiMappings::dispatchWalkOrder[execEnv.dispatchWalkOrder]; } if (execEnv.partitionDim != Types::Kernel::ExecutionEnv::Defaults::partitionDim) { dst.kernelAttributes.partitionDim = EncodeParamsApiMappings::partitionDim[execEnv.partitionDim]; diff --git a/shared/source/helpers/definitions/command_encoder_args.h b/shared/source/helpers/definitions/command_encoder_args.h index 59e04367d8..a3e0ccb721 100644 --- a/shared/source/helpers/definitions/command_encoder_args.h +++ b/shared/source/helpers/definitions/command_encoder_args.h @@ -43,11 +43,11 @@ enum class RequiredDispatchWalkOrder : uint32_t { additional }; -static constexpr uint32_t additionalKernelLaunchSizeParamNotSet = 0; +static constexpr uint32_t localRegionSizeParamNotSet = 0; namespace EncodeParamsApiMappings { static constexpr std::array partitionDim = {{RequiredPartitionDim::x, NEO::RequiredPartitionDim::y, NEO::RequiredPartitionDim::z}}; -static constexpr std::array walkOrder = {{NEO::RequiredDispatchWalkOrder::x, NEO::RequiredDispatchWalkOrder::y, NEO::RequiredDispatchWalkOrder::additional}}; +static constexpr std::array dispatchWalkOrder = {{NEO::RequiredDispatchWalkOrder::x, NEO::RequiredDispatchWalkOrder::y, NEO::RequiredDispatchWalkOrder::additional}}; } // namespace EncodeParamsApiMappings } // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 48241dfb24..db11263ec8 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -57,8 +57,8 @@ struct KernelDescriptor { uint32_t numThreadsRequired = 0u; uint32_t spillFillScratchMemorySize = 0u; uint32_t privateScratchMemorySize = 0u; - uint32_t additionalSize = NEO::additionalKernelLaunchSizeParamNotSet; - NEO::RequiredDispatchWalkOrder walkOrder = NEO::RequiredDispatchWalkOrder::none; + uint32_t localRegionSize = NEO::localRegionSizeParamNotSet; + NEO::RequiredDispatchWalkOrder dispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; NEO::RequiredPartitionDim partitionDim = NEO::RequiredPartitionDim::none; ThreadArbitrationPolicy threadArbitrationPolicy = NotPresent; uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U}; diff --git a/shared/test/unit_test/fixtures/command_container_fixture.cpp b/shared/test/unit_test/fixtures/command_container_fixture.cpp index 6782d659d1..4e7618e7f5 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.cpp +++ b/shared/test/unit_test/fixtures/command_container_fixture.cpp @@ -39,44 +39,44 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel bool requiresUncachedMocs) { EncodeDispatchKernelArgs args{ - 0, // eventAddress - 0, // postSyncImmValue - 0, // inOrderCounterValue - device, // device - nullptr, // inOrderExecInfo - dispatchInterface, // dispatchInterface - nullptr, // surfaceStateHeap - nullptr, // dynamicStateHeap - threadGroupDimensions, // threadGroupDimensions - nullptr, // outWalkerPtr - nullptr, // cpuWalkerBuffer - nullptr, // cpuPayloadBuffer - nullptr, // outImplicitArgsPtr - nullptr, // additionalCommands - PreemptionMode::Disabled, // preemptionMode - NEO::RequiredPartitionDim::none, // requiredPartitionDim - NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam - 1, // partitionCount - 0, // reserveExtraPayloadSpace - 1, // maxWgCountPerTile - NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy - false, // isIndirect - false, // isPredicate - false, // isTimestampEvent - requiresUncachedMocs, // requiresUncachedMocs - false, // isInternal - false, // isCooperative - false, // isHostScopeSignalEvent - false, // isKernelUsingSystemAllocation - false, // isKernelDispatchedFromImmediateCmdList - false, // isRcs - false, // dcFlushEnable - false, // isHeaplessModeEnabled - false, // isHeaplessStateInitEnabled - false, // interruptEvent - false, // immediateScratchAddressPatching - false, // makeCommandView + 0, // eventAddress + 0, // postSyncImmValue + 0, // inOrderCounterValue + device, // device + nullptr, // inOrderExecInfo + dispatchInterface, // dispatchInterface + nullptr, // surfaceStateHeap + nullptr, // dynamicStateHeap + threadGroupDimensions, // threadGroupDimensions + nullptr, // outWalkerPtr + nullptr, // cpuWalkerBuffer + nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr + nullptr, // additionalCommands + PreemptionMode::Disabled, // preemptionMode + NEO::RequiredPartitionDim::none, // requiredPartitionDim + NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder + NEO::localRegionSizeParamNotSet, // localRegionSize + 1, // partitionCount + 0, // reserveExtraPayloadSpace + 1, // maxWgCountPerTile + NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy + false, // isIndirect + false, // isPredicate + false, // isTimestampEvent + requiresUncachedMocs, // requiresUncachedMocs + false, // isInternal + false, // isCooperative + false, // isHostScopeSignalEvent + false, // isKernelUsingSystemAllocation + false, // isKernelDispatchedFromImmediateCmdList + false, // isRcs + false, // dcFlushEnable + false, // isHeaplessModeEnabled + false, // isHeaplessStateInitEnabled + false, // interruptEvent + false, // immediateScratchAddressPatching + false, // makeCommandView }; return args; @@ -87,7 +87,7 @@ EncodeWalkerArgs CommandEncodeStatesFixture::createDefaultEncodeWalkerArgs(const kernelDescriptor, // kernelDescriptor NEO::KernelExecutionType::defaultType, // kernelExecutionType NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder - 0, // additionalSizeParam + 0, // localRegionSize 0, // maxFrontEndThreads false}; // requiredSystemFence diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp index 09e82115e7..68529af4f6 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp @@ -54,7 +54,7 @@ ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommand dcFlushFlag, // dcFlush forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile false, // blockDispatchToCommandBuffer - false}; // isRequiredWorkGroupOrder + false}; // isRequiredDispatchWorkGroupOrder return args; }