refactor: pass increment address and value to dispatch params

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-02-10 11:41:53 +00:00
committed by Compute-Runtime-Automation
parent 09298d3e6c
commit 18d7a22861
9 changed files with 166 additions and 227 deletions

View File

@@ -190,45 +190,47 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
static_cast<uint64_t>(Event::STATE_SIGNALED), // postSyncImmValue
0, // inOrderCounterValue
neoDevice, // device
nullptr, // inOrderExecInfo
kernel, // dispatchInterface
ssh, // surfaceStateHeap
dsh, // dynamicStateHeap
reinterpret_cast<const void *>(&threadGroupDimensions), // threadGroupDimensions
nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
&additionalCommands, // additionalCommands
nullptr, // extendedArgs
commandListPreemptionMode, // preemptionMode
launchParams.requiredPartitionDim, // requiredPartitionDim
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
launchParams.localRegionSize, // localRegionSize
0, // partitionCount
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
maxWgCountPerTile, // maxWgCountPerTile
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
false, // isTimestampEvent
uncachedMocsKernel, // requiresUncachedMocs
internalUsage, // isInternal
launchParams.isCooperative, // isCooperative
false, // isHostScopeSignalEvent
false, // isKernelUsingSystemAllocation
isImmediateType(), // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled, // isHeaplessModeEnabled
this->heaplessStateInitEnabled, // isHeaplessStateInitEnabled
false, // interruptEvent
!this->scratchAddressPatchingEnabled, // immediateScratchAddressPatching
false, // makeCommandView
.eventAddress = 0,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = neoDevice,
.inOrderExecInfo = nullptr,
.dispatchInterface = kernel,
.surfaceStateHeap = ssh,
.dynamicStateHeap = dsh,
.threadGroupDimensions = reinterpret_cast<const void *>(&threadGroupDimensions),
.outWalkerPtr = nullptr,
.cpuWalkerBuffer = nullptr,
.cpuPayloadBuffer = nullptr,
.outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands,
.extendedArgs = nullptr,
.preemptionMode = commandListPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
.localRegionSize = launchParams.localRegionSize,
.partitionCount = 0,
.reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace,
.maxWgCountPerTile = maxWgCountPerTile,
.defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent,
.isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate,
.isTimestampEvent = false,
.requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage,
.isCooperative = launchParams.isCooperative,
.isHostScopeSignalEvent = false,
.isKernelUsingSystemAllocation = false,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.dcFlushEnable = this->dcFlushSupport,
.isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.interruptEvent = false,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = false,
};
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);

View File

@@ -303,6 +303,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
bool inOrderNonWalkerSignalling = false;
uint64_t inOrderCounterValue = 0;
uint64_t inOrderIncrementValue = 0;
uint64_t inOrderIncrementGpuAddress = 0;
NEO::InOrderExecInfo *inOrderExecInfo = nullptr;
if (!launchParams.makeKernelCommandView) {
@@ -327,8 +329,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
} else {
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
inOrderExecInfo = this->inOrderExecInfo.get();
if (eventForInOrderExec && eventForInOrderExec->isCounterBased() && !isTimestampEvent) {
eventAddress = 0;
if (eventForInOrderExec && eventForInOrderExec->isCounterBased()) {
if (eventForInOrderExec->getInOrderIncrementValue() > 0) {
inOrderIncrementGpuAddress = eventForInOrderExec->getInOrderExecInfo()->getBaseDeviceAddress();
inOrderIncrementValue = eventForInOrderExec->getInOrderIncrementValue();
}
if (!isTimestampEvent) {
eventAddress = 0;
}
}
}
}
@@ -338,46 +346,49 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {};
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
static_cast<uint64_t>(Event::STATE_SIGNALED), // postSyncImmValue
inOrderCounterValue, // inOrderCounterValue
neoDevice, // device
inOrderExecInfo, // inOrderExecInfo
kernel, // dispatchInterface
ssh, // surfaceStateHeap
dsh, // dynamicStateHeap
reinterpret_cast<const void *>(&threadGroupDimensions), // threadGroupDimensions
nullptr, // outWalkerPtr
launchParams.cmdWalkerBuffer, // cpuWalkerBuffer
launchParams.hostPayloadBuffer, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
&additionalCommands, // additionalCommands
&dispatchKernelArgsExt, // extendedArgs
kernelPreemptionMode, // preemptionMode
launchParams.requiredPartitionDim, // requiredPartitionDim
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
launchParams.localRegionSize, // localRegionSize
this->partitionCount, // partitionCount
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
maxWgCountPerTile, // maxWgCountPerTile
this->defaultPipelinedThreadArbitrationPolicy, // defaultPipelinedThreadArbitrationPolicy
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
isTimestampEvent, // isTimestampEvent
uncachedMocsKernel, // requiresUncachedMocs
internalUsage, // isInternal
launchParams.isCooperative, // isCooperative
isHostSignalScopeEvent, // isHostScopeSignalEvent
isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation
isImmediateType(), // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled, // isHeaplessModeEnabled
this->heaplessStateInitEnabled, // isHeaplessStateInitEnabled
interruptEvent, // interruptEvent
!this->scratchAddressPatchingEnabled, // immediateScratchAddressPatching
launchParams.makeKernelCommandView // makeCommandView
.eventAddress = eventAddress,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = inOrderCounterValue,
.inOrderIncrementGpuAddress = inOrderIncrementGpuAddress,
.inOrderIncrementValue = inOrderIncrementValue,
.device = neoDevice,
.inOrderExecInfo = inOrderExecInfo,
.dispatchInterface = kernel,
.surfaceStateHeap = ssh,
.dynamicStateHeap = dsh,
.threadGroupDimensions = reinterpret_cast<const void *>(&threadGroupDimensions),
.outWalkerPtr = nullptr,
.cpuWalkerBuffer = launchParams.cmdWalkerBuffer,
.cpuPayloadBuffer = launchParams.hostPayloadBuffer,
.outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands,
.extendedArgs = &dispatchKernelArgsExt,
.preemptionMode = kernelPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
.localRegionSize = launchParams.localRegionSize,
.partitionCount = this->partitionCount,
.reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace,
.maxWgCountPerTile = maxWgCountPerTile,
.defaultPipelinedThreadArbitrationPolicy = this->defaultPipelinedThreadArbitrationPolicy,
.isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate,
.isTimestampEvent = isTimestampEvent,
.requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage,
.isCooperative = launchParams.isCooperative,
.isHostScopeSignalEvent = isHostSignalScopeEvent,
.isKernelUsingSystemAllocation = isKernelUsingSystemAllocation,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.dcFlushEnable = this->dcFlushSupport,
.isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.interruptEvent = interruptEvent,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = launchParams.makeKernelCommandView,
};
setAdditionalDispatchKernelArgsFromLaunchParams(dispatchKernelArgs, launchParams);

View File

@@ -17,6 +17,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/sources/helper/ze_object_utils.h"
#include "level_zero/driver_experimental/zex_api.h"
namespace L0 {
namespace ult {
@@ -84,6 +85,19 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
::Test<ModuleFixture>::TearDown();
}
void createExternalSyncStorageEvent(uint64_t counterValue, uint64_t incrementValue, uint64_t *deviceAddress, ze_event_handle_t &outEvent) {
zex_counter_based_event_external_storage_properties_t externalStorageAllocProperties = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_STORAGE_ALLOC_PROPERTIES}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901
externalStorageAllocProperties.completionValue = counterValue;
externalStorageAllocProperties.deviceAddress = deviceAddress;
externalStorageAllocProperties.incrementValue = incrementValue;
zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901
counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_IMMEDIATE | ZEX_COUNTER_BASED_EVENT_FLAG_NON_IMMEDIATE;
counterBasedDesc.pNext = &externalStorageAllocProperties;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate2(context, device, &counterBasedDesc, &outEvent));
}
DestroyableZeUniquePtr<FixtureMockEvent> createStandaloneCbEvent(const ze_base_desc_t *pNext) {
constexpr uint32_t counterBasedFlags = (ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE | ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE);

View File

@@ -22,6 +22,7 @@
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
@@ -195,47 +196,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
0, // postSyncImmValue
0, // inOrderCounterValue
device->getNEODevice(), // device
nullptr, // inOrderExecInfo
kernel.get(), // dispatchInterface
nullptr, // surfaceStateHeap
nullptr, // dynamicStateHeap
threadGroupDimensions, // threadGroupDimensions
nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
nullptr, // additionalCommands
nullptr, // extendedArgs
PreemptionMode::MidBatch, // preemptionMode
NEO::RequiredPartitionDim::none, // requiredPartitionDim
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
NEO::localRegionSizeParamNotSet, // localRegionSize
0, // partitionCount
0, // reserveExtraPayloadSpace
1, // maxWgCountPerTile
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
false, // isIndirect
false, // isPredicate
false, // isTimestampEvent
false, // requiresUncachedMocs
false, // isInternal
false, // isCooperative
false, // isHostScopeSignalEvent
false, // isKernelUsingSystemAllocation
false, // isKernelDispatchedFromImmediateCmdList
false, // isRcs
commandList->getDcFlushRequired(true), // dcFlushEnable
false, // isHeaplessModeEnabled
false, // isHeaplessStateInitEnabled
false, // interruptEvent
false, // immediateScratchAddressPatching
false, // makeCommandView
};
auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false);
dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true);
NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs);
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();

View File

@@ -19,6 +19,7 @@
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_sync_buffer_handler.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
@@ -912,47 +913,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
0, // postSyncImmValue
0, // inOrderCounterValue
device->getNEODevice(), // device
nullptr, // inOrderExecInfo
kernel.get(), // dispatchInterface
nullptr, // surfaceStateHeap
nullptr, // dynamicStateHeap
threadGroupDimensions, // threadGroupDimensions
nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
nullptr, // additionalCommands
nullptr, // extendedArgs
PreemptionMode::MidBatch, // preemptionMode
NEO::RequiredPartitionDim::none, // requiredPartitionDim
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
NEO::localRegionSizeParamNotSet, // localRegionSize
0, // partitionCount
0, // reserveExtraPayloadSpace
1, // maxWgCountPerTile
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
false, // isIndirect
false, // isPredicate
false, // isTimestampEvent
false, // requiresUncachedMocs
false, // isInternal
false, // isCooperative
false, // isHostScopeSignalEvent
false, // isKernelUsingSystemAllocation
false, // isKernelDispatchedFromImmediateCmdList
false, // isRcs
commandList->getDcFlushRequired(true), // dcFlushEnable
false, // isHeaplessModeEnabled
false, // isHeaplessStateInitEnabled
false, // interruptEvent
false, // immediateScratchAddressPatching
false, // makeCommandView
};
auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false);
dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true);
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception);
}

View File

@@ -5332,17 +5332,8 @@ HWTEST2_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendThenDont
auto devAddress = reinterpret_cast<uint64_t *>(allocDeviceMem(sizeof(uint64_t) * 2));
zex_counter_based_event_external_storage_properties_t externalStorageAllocProperties = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_STORAGE_ALLOC_PROPERTIES}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901
externalStorageAllocProperties.completionValue = counterValue;
externalStorageAllocProperties.deviceAddress = devAddress;
externalStorageAllocProperties.incrementValue = incValue;
zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901
counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_IMMEDIATE | ZEX_COUNTER_BASED_EVENT_FLAG_NON_IMMEDIATE;
counterBasedDesc.pNext = &externalStorageAllocProperties;
ze_event_handle_t handle = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate2(context, device, &counterBasedDesc, &handle));
createExternalSyncStorageEvent(counterValue, incValue, devAddress, handle);
auto eventObj = Event::fromHandle(handle);
ASSERT_NE(nullptr, eventObj->getInOrderExecInfo());