mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
refactor: change parameter names into more meaningful
- change additional size into local region size - change walk order into dispatch walk order to distinguish for local id walk Related-To: NEO-13350 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b56d4324c5
commit
6b7235cd6c
@@ -305,7 +305,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
|||||||
|
|
||||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions);
|
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions);
|
||||||
void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t additionalSizeParam);
|
void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize);
|
||||||
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation);
|
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation);
|
||||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition, bool copyOperation);
|
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition, bool copyOperation);
|
||||||
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
|
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
|
||||||
|
|||||||
@@ -2821,14 +2821,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t additionalSizeParam) {
|
void CommandListCoreFamily<gfxCoreFamily>::programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize) {
|
||||||
auto neoDevice = device->getNEODevice();
|
auto neoDevice = device->getNEODevice();
|
||||||
|
|
||||||
neoDevice->allocateSyncBufferHandler();
|
neoDevice->allocateSyncBufferHandler();
|
||||||
|
|
||||||
const size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ;
|
const size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ;
|
||||||
|
|
||||||
size_t size = alignUp((requestedNumberOfWorkgroups / additionalSizeParam) * (additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
size_t size = alignUp((requestedNumberOfWorkgroups / localRegionSize) * (localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
||||||
|
|
||||||
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(size);
|
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(size);
|
||||||
|
|
||||||
@@ -4279,11 +4279,11 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
|||||||
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalKernelLaunchParams(CmdListKernelLaunchParams &launchParams, Kernel &kernel) const {
|
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalKernelLaunchParams(CmdListKernelLaunchParams &launchParams, Kernel &kernel) const {
|
||||||
auto &kernelDescriptor = kernel.getImmutableData()->getDescriptor();
|
auto &kernelDescriptor = kernel.getImmutableData()->getDescriptor();
|
||||||
|
|
||||||
if (launchParams.additionalSizeParam == NEO::additionalKernelLaunchSizeParamNotSet) {
|
if (launchParams.localRegionSize == NEO::localRegionSizeParamNotSet) {
|
||||||
launchParams.additionalSizeParam = kernelDescriptor.kernelAttributes.additionalSize;
|
launchParams.localRegionSize = kernelDescriptor.kernelAttributes.localRegionSize;
|
||||||
}
|
}
|
||||||
if (launchParams.requiredDispatchWalkOrder == NEO::RequiredDispatchWalkOrder::none) {
|
if (launchParams.requiredDispatchWalkOrder == NEO::RequiredDispatchWalkOrder::none) {
|
||||||
launchParams.requiredDispatchWalkOrder = kernelDescriptor.kernelAttributes.walkOrder;
|
launchParams.requiredDispatchWalkOrder = kernelDescriptor.kernelAttributes.dispatchWalkOrder;
|
||||||
}
|
}
|
||||||
if (launchParams.requiredPartitionDim == NEO::RequiredPartitionDim::none) {
|
if (launchParams.requiredPartitionDim == NEO::RequiredPartitionDim::none) {
|
||||||
launchParams.requiredPartitionDim = kernelDescriptor.kernelAttributes.partitionDim;
|
launchParams.requiredPartitionDim = kernelDescriptor.kernelAttributes.partitionDim;
|
||||||
|
|||||||
@@ -207,7 +207,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||||||
commandListPreemptionMode, // preemptionMode
|
commandListPreemptionMode, // preemptionMode
|
||||||
launchParams.requiredPartitionDim, // requiredPartitionDim
|
launchParams.requiredPartitionDim, // requiredPartitionDim
|
||||||
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
||||||
launchParams.additionalSizeParam, // additionalSizeParam
|
launchParams.localRegionSize, // localRegionSize
|
||||||
0, // partitionCount
|
0, // partitionCount
|
||||||
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
|
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
|
||||||
maxWgCountPerTile, // maxWgCountPerTile
|
maxWgCountPerTile, // maxWgCountPerTile
|
||||||
|
|||||||
@@ -256,7 +256,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (kernel->usesRegionGroupBarrier()) {
|
if (kernel->usesRegionGroupBarrier()) {
|
||||||
programRegionGroupBarrier(*kernel, threadGroupDimensions, launchParams.additionalSizeParam);
|
programRegionGroupBarrier(*kernel, threadGroupDimensions, launchParams.localRegionSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
|
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
|
||||||
@@ -331,7 +331,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||||||
kernelPreemptionMode, // preemptionMode
|
kernelPreemptionMode, // preemptionMode
|
||||||
launchParams.requiredPartitionDim, // requiredPartitionDim
|
launchParams.requiredPartitionDim, // requiredPartitionDim
|
||||||
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
||||||
launchParams.additionalSizeParam, // additionalSizeParam
|
launchParams.localRegionSize, // localRegionSize
|
||||||
this->partitionCount, // partitionCount
|
this->partitionCount, // partitionCount
|
||||||
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
|
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
|
||||||
maxWgCountPerTile, // maxWgCountPerTile
|
maxWgCountPerTile, // maxWgCountPerTile
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ struct CmdListKernelLaunchParams {
|
|||||||
uint32_t externalPerThreadScratchSize[2] = {0U, 0U};
|
uint32_t externalPerThreadScratchSize[2] = {0U, 0U};
|
||||||
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
||||||
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
||||||
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
|
uint32_t localRegionSize = NEO::localRegionSizeParamNotSet;
|
||||||
uint32_t numKernelsInSplitLaunch = 0;
|
uint32_t numKernelsInSplitLaunch = 0;
|
||||||
uint32_t numKernelsExecutedInSplitLaunch = 0;
|
uint32_t numKernelsExecutedInSplitLaunch = 0;
|
||||||
uint32_t reserveExtraPayloadSpace = 0;
|
uint32_t reserveExtraPayloadSpace = 0;
|
||||||
|
|||||||
@@ -65,13 +65,13 @@ HWTEST2_F(MultiTileImmediateCommandListTest, givenMultipleTilesWhenAllocatingBar
|
|||||||
|
|
||||||
size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ;
|
size_t requestedNumberOfWorkgroups = threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ;
|
||||||
|
|
||||||
size_t additionalSizeParam = 4;
|
size_t localRegionSize = 4;
|
||||||
|
|
||||||
whiteBoxCmdList->programRegionGroupBarrier(mockKernel, threadGroupDimensions, additionalSizeParam);
|
whiteBoxCmdList->programRegionGroupBarrier(mockKernel, threadGroupDimensions, localRegionSize);
|
||||||
|
|
||||||
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(1);
|
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(1);
|
||||||
|
|
||||||
size_t expectedOffset = alignUp((requestedNumberOfWorkgroups / additionalSizeParam) * (additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
size_t expectedOffset = alignUp((requestedNumberOfWorkgroups / localRegionSize) * (localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
||||||
|
|
||||||
EXPECT_EQ(patchData.second, expectedOffset);
|
EXPECT_EQ(patchData.second, expectedOffset);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -196,44 +196,44 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
|||||||
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
||||||
|
|
||||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||||
0, // eventAddress
|
0, // eventAddress
|
||||||
0, // postSyncImmValue
|
0, // postSyncImmValue
|
||||||
0, // inOrderCounterValue
|
0, // inOrderCounterValue
|
||||||
device->getNEODevice(), // device
|
device->getNEODevice(), // device
|
||||||
nullptr, // inOrderExecInfo
|
nullptr, // inOrderExecInfo
|
||||||
kernel.get(), // dispatchInterface
|
kernel.get(), // dispatchInterface
|
||||||
nullptr, // surfaceStateHeap
|
nullptr, // surfaceStateHeap
|
||||||
nullptr, // dynamicStateHeap
|
nullptr, // dynamicStateHeap
|
||||||
threadGroupDimensions, // threadGroupDimensions
|
threadGroupDimensions, // threadGroupDimensions
|
||||||
nullptr, // outWalkerPtr
|
nullptr, // outWalkerPtr
|
||||||
nullptr, // cpuWalkerBuffer
|
nullptr, // cpuWalkerBuffer
|
||||||
nullptr, // cpuPayloadBuffer
|
nullptr, // cpuPayloadBuffer
|
||||||
nullptr, // outImplicitArgsPtr
|
nullptr, // outImplicitArgsPtr
|
||||||
nullptr, // additionalCommands
|
nullptr, // additionalCommands
|
||||||
PreemptionMode::MidBatch, // preemptionMode
|
PreemptionMode::MidBatch, // preemptionMode
|
||||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||||
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
|
NEO::localRegionSizeParamNotSet, // localRegionSize
|
||||||
0, // partitionCount
|
0, // partitionCount
|
||||||
0, // reserveExtraPayloadSpace
|
0, // reserveExtraPayloadSpace
|
||||||
1, // maxWgCountPerTile
|
1, // maxWgCountPerTile
|
||||||
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
||||||
false, // isIndirect
|
false, // isIndirect
|
||||||
false, // isPredicate
|
false, // isPredicate
|
||||||
false, // isTimestampEvent
|
false, // isTimestampEvent
|
||||||
false, // requiresUncachedMocs
|
false, // requiresUncachedMocs
|
||||||
false, // isInternal
|
false, // isInternal
|
||||||
false, // isCooperative
|
false, // isCooperative
|
||||||
false, // isHostScopeSignalEvent
|
false, // isHostScopeSignalEvent
|
||||||
false, // isKernelUsingSystemAllocation
|
false, // isKernelUsingSystemAllocation
|
||||||
false, // isKernelDispatchedFromImmediateCmdList
|
false, // isKernelDispatchedFromImmediateCmdList
|
||||||
false, // isRcs
|
false, // isRcs
|
||||||
commandList->getDcFlushRequired(true), // dcFlushEnable
|
commandList->getDcFlushRequired(true), // dcFlushEnable
|
||||||
false, // isHeaplessModeEnabled
|
false, // isHeaplessModeEnabled
|
||||||
false, // isHeaplessStateInitEnabled
|
false, // isHeaplessStateInitEnabled
|
||||||
false, // interruptEvent
|
false, // interruptEvent
|
||||||
false, // immediateScratchAddressPatching
|
false, // immediateScratchAddressPatching
|
||||||
false, // makeCommandView
|
false, // makeCommandView
|
||||||
};
|
};
|
||||||
NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs);
|
NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs);
|
||||||
|
|
||||||
|
|||||||
@@ -541,7 +541,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
|||||||
std::unique_ptr<L0::CommandList> cmdList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, result));
|
std::unique_ptr<L0::CommandList> cmdList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, result));
|
||||||
|
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
launchParams.additionalSizeParam = 4;
|
launchParams.localRegionSize = 4;
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false));
|
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false));
|
||||||
|
|
||||||
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
|
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||||
@@ -572,7 +572,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
|||||||
|
|
||||||
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
|
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
|
||||||
|
|
||||||
auto offset = alignUp((requestedNumberOfWorkgroups / launchParams.additionalSizeParam) * (launchParams.additionalSizeParam + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
auto offset = alignUp((requestedNumberOfWorkgroups / launchParams.localRegionSize) * (launchParams.localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
|
||||||
|
|
||||||
EXPECT_EQ(patchPtr2, patchPtr + offset);
|
EXPECT_EQ(patchPtr2, patchPtr + offset);
|
||||||
}
|
}
|
||||||
@@ -778,44 +778,44 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
|||||||
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
||||||
|
|
||||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||||
0, // eventAddress
|
0, // eventAddress
|
||||||
0, // postSyncImmValue
|
0, // postSyncImmValue
|
||||||
0, // inOrderCounterValue
|
0, // inOrderCounterValue
|
||||||
device->getNEODevice(), // device
|
device->getNEODevice(), // device
|
||||||
nullptr, // inOrderExecInfo
|
nullptr, // inOrderExecInfo
|
||||||
kernel.get(), // dispatchInterface
|
kernel.get(), // dispatchInterface
|
||||||
nullptr, // surfaceStateHeap
|
nullptr, // surfaceStateHeap
|
||||||
nullptr, // dynamicStateHeap
|
nullptr, // dynamicStateHeap
|
||||||
threadGroupDimensions, // threadGroupDimensions
|
threadGroupDimensions, // threadGroupDimensions
|
||||||
nullptr, // outWalkerPtr
|
nullptr, // outWalkerPtr
|
||||||
nullptr, // cpuWalkerBuffer
|
nullptr, // cpuWalkerBuffer
|
||||||
nullptr, // cpuPayloadBuffer
|
nullptr, // cpuPayloadBuffer
|
||||||
nullptr, // outImplicitArgsPtr
|
nullptr, // outImplicitArgsPtr
|
||||||
nullptr, // additionalCommands
|
nullptr, // additionalCommands
|
||||||
PreemptionMode::MidBatch, // preemptionMode
|
PreemptionMode::MidBatch, // preemptionMode
|
||||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||||
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
|
NEO::localRegionSizeParamNotSet, // localRegionSize
|
||||||
0, // partitionCount
|
0, // partitionCount
|
||||||
0, // reserveExtraPayloadSpace
|
0, // reserveExtraPayloadSpace
|
||||||
1, // maxWgCountPerTile
|
1, // maxWgCountPerTile
|
||||||
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
||||||
false, // isIndirect
|
false, // isIndirect
|
||||||
false, // isPredicate
|
false, // isPredicate
|
||||||
false, // isTimestampEvent
|
false, // isTimestampEvent
|
||||||
false, // requiresUncachedMocs
|
false, // requiresUncachedMocs
|
||||||
false, // isInternal
|
false, // isInternal
|
||||||
false, // isCooperative
|
false, // isCooperative
|
||||||
false, // isHostScopeSignalEvent
|
false, // isHostScopeSignalEvent
|
||||||
false, // isKernelUsingSystemAllocation
|
false, // isKernelUsingSystemAllocation
|
||||||
false, // isKernelDispatchedFromImmediateCmdList
|
false, // isKernelDispatchedFromImmediateCmdList
|
||||||
false, // isRcs
|
false, // isRcs
|
||||||
commandList->getDcFlushRequired(true), // dcFlushEnable
|
commandList->getDcFlushRequired(true), // dcFlushEnable
|
||||||
false, // isHeaplessModeEnabled
|
false, // isHeaplessModeEnabled
|
||||||
false, // isHeaplessStateInitEnabled
|
false, // isHeaplessStateInitEnabled
|
||||||
false, // interruptEvent
|
false, // interruptEvent
|
||||||
false, // immediateScratchAddressPatching
|
false, // immediateScratchAddressPatching
|
||||||
false, // makeCommandView
|
false, // makeCommandView
|
||||||
};
|
};
|
||||||
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception);
|
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
kernel.getKernelInfo().kernelDescriptor, // kernelDescriptor
|
kernel.getKernelInfo().kernelDescriptor, // kernelDescriptor
|
||||||
kernel.getExecutionType(), // kernelExecutionType
|
kernel.getExecutionType(), // kernelExecutionType
|
||||||
RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
0, // additionalSizeParam
|
0, // localRegionSize
|
||||||
0, // maxFrontEndThreads
|
0, // maxFrontEndThreads
|
||||||
false}; // requiredSystemFence
|
false}; // requiredSystemFence
|
||||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
||||||
|
|||||||
@@ -146,12 +146,12 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
auto maxFrontEndThreads = device.getDeviceInfo().maxFrontEndThreads;
|
auto maxFrontEndThreads = device.getDeviceInfo().maxFrontEndThreads;
|
||||||
|
|
||||||
EncodeWalkerArgs encodeWalkerArgs{
|
EncodeWalkerArgs encodeWalkerArgs{
|
||||||
kernelInfo.kernelDescriptor, // kernelDescriptor
|
kernelInfo.kernelDescriptor, // kernelDescriptor
|
||||||
kernel.getExecutionType(), // kernelExecutionType
|
kernel.getExecutionType(), // kernelExecutionType
|
||||||
kernelAttributes.walkOrder, // requiredDispatchWalkOrder
|
kernelAttributes.dispatchWalkOrder, // requiredDispatchWalkOrder
|
||||||
kernelAttributes.additionalSize, // additionalSizeParam
|
kernelAttributes.localRegionSize, // localRegionSize
|
||||||
maxFrontEndThreads, // maxFrontEndThreads
|
maxFrontEndThreads, // maxFrontEndThreads
|
||||||
requiredSystemFence}; // requiredSystemFence
|
requiredSystemFence}; // requiredSystemFence
|
||||||
|
|
||||||
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
||||||
EncodeDispatchKernel<GfxFamily>::template encodeWalkerPostSyncFields<WalkerType>(walkerCmd, encodeWalkerArgs);
|
EncodeDispatchKernel<GfxFamily>::template encodeWalkerPostSyncFields<WalkerType>(walkerCmd, encodeWalkerArgs);
|
||||||
@@ -192,7 +192,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
queueCsr.getDcFlushSupport(), // dcFlush
|
queueCsr.getDcFlushSupport(), // dcFlush
|
||||||
kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile
|
kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile
|
||||||
false, // blockDispatchToCommandBuffer
|
false, // blockDispatchToCommandBuffer
|
||||||
requiredWalkOrder != 0}; // isRequiredWorkGroupOrder
|
requiredWalkOrder != 0}; // isRequiredDispatchWorkGroupOrder
|
||||||
|
|
||||||
ImplicitScalingDispatch<GfxFamily>::template dispatchCommands<WalkerType>(commandStream,
|
ImplicitScalingDispatch<GfxFamily>::template dispatchCommands<WalkerType>(commandStream,
|
||||||
walkerCmd,
|
walkerCmd,
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
|||||||
kernelDescriptor, // kernelDescriptor
|
kernelDescriptor, // kernelDescriptor
|
||||||
KernelExecutionType::concurrent, // kernelExecutionType
|
KernelExecutionType::concurrent, // kernelExecutionType
|
||||||
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
0, // additionalSizeParam
|
0, // localRegionSize
|
||||||
0, // maxFrontEndThreads
|
0, // maxFrontEndThreads
|
||||||
true}; // requiredSystemFence
|
true}; // requiredSystemFence
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ struct EncodeDispatchKernelArgs {
|
|||||||
PreemptionMode preemptionMode = PreemptionMode::Initial;
|
PreemptionMode preemptionMode = PreemptionMode::Initial;
|
||||||
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
||||||
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
||||||
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
|
uint32_t localRegionSize = NEO::localRegionSizeParamNotSet;
|
||||||
uint32_t partitionCount = 0u;
|
uint32_t partitionCount = 0u;
|
||||||
uint32_t reserveExtraPayloadSpace = 0;
|
uint32_t reserveExtraPayloadSpace = 0;
|
||||||
uint32_t maxWgCountPerTile = 0;
|
uint32_t maxWgCountPerTile = 0;
|
||||||
@@ -109,7 +109,7 @@ struct EncodeWalkerArgs {
|
|||||||
const KernelDescriptor &kernelDescriptor;
|
const KernelDescriptor &kernelDescriptor;
|
||||||
KernelExecutionType kernelExecutionType = KernelExecutionType::defaultType;
|
KernelExecutionType kernelExecutionType = KernelExecutionType::defaultType;
|
||||||
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
||||||
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
|
uint32_t localRegionSize = NEO::localRegionSizeParamNotSet;
|
||||||
uint32_t maxFrontEndThreads = 0;
|
uint32_t maxFrontEndThreads = 0;
|
||||||
bool requiredSystemFence = false;
|
bool requiredSystemFence = false;
|
||||||
};
|
};
|
||||||
@@ -188,7 +188,7 @@ struct EncodeDispatchKernel {
|
|||||||
static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||||
|
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
|
static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder);
|
||||||
|
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||||
|
|||||||
@@ -283,7 +283,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
kernelDescriptor, // kernelDescriptor
|
kernelDescriptor, // kernelDescriptor
|
||||||
KernelExecutionType::defaultType, // kernelExecutionType
|
KernelExecutionType::defaultType, // kernelExecutionType
|
||||||
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
||||||
args.additionalSizeParam, // additionalSizeParam
|
args.localRegionSize, // localRegionSize
|
||||||
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
|
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
|
||||||
args.requiresSystemMemoryFence()}; // requiredSystemFence
|
args.requiresSystemMemoryFence()}; // requiredSystemFence
|
||||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
|
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
|
||||||
@@ -684,7 +684,7 @@ void EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(InterfaceDescriptorT
|
|||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
|
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmedia
|
|||||||
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
|
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount,
|
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount,
|
||||||
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
|
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor);
|
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
template void NEO::EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||||
|
|||||||
@@ -90,6 +90,6 @@ void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd,
|
|||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
|
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -402,7 +402,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
kernelDescriptor, // kernelDescriptor
|
kernelDescriptor, // kernelDescriptor
|
||||||
kernelExecutionType, // kernelExecutionType
|
kernelExecutionType, // kernelExecutionType
|
||||||
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
|
||||||
args.additionalSizeParam, // additionalSizeParam
|
args.localRegionSize, // localRegionSize
|
||||||
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
|
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
|
||||||
args.requiresSystemMemoryFence()}; // requiresMemoryFence
|
args.requiresSystemMemoryFence()}; // requiresMemoryFence
|
||||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||||
@@ -412,7 +412,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
||||||
|
|
||||||
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
|
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
|
||||||
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
|
bool isRequiredDispatchWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
|
||||||
if (args.partitionCount > 1 && !args.isInternal) {
|
if (args.partitionCount > 1 && !args.isInternal) {
|
||||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||||
|
|
||||||
@@ -429,7 +429,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
args.dcFlushEnable, // dcFlush
|
args.dcFlushEnable, // dcFlush
|
||||||
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
|
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
|
||||||
args.makeCommandView, // blockDispatchToCommandBuffer
|
args.makeCommandView, // blockDispatchToCommandBuffer
|
||||||
isRequiredWorkGroupOrder}; // isRequiredWorkGroupOrder
|
isRequiredDispatchWorkGroupOrder}; // isRequiredDispatchWorkGroupOrder
|
||||||
|
|
||||||
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
||||||
walkerCmd,
|
walkerCmd,
|
||||||
@@ -438,7 +438,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
args.partitionCount = implicitScalingArgs.partitionCount;
|
args.partitionCount = implicitScalingArgs.partitionCount;
|
||||||
} else {
|
} else {
|
||||||
args.partitionCount = 1;
|
args.partitionCount = 1;
|
||||||
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder);
|
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredDispatchWorkGroupOrder);
|
||||||
|
|
||||||
if (!args.makeCommandView) {
|
if (!args.makeCommandView) {
|
||||||
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
|
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ struct ImplicitScalingDispatchCommandArgs {
|
|||||||
bool dcFlush = false;
|
bool dcFlush = false;
|
||||||
bool forceExecutionOnSingleTile = false;
|
bool forceExecutionOnSingleTile = false;
|
||||||
bool blockDispatchToCommandBuffer = false;
|
bool blockDispatchToCommandBuffer = false;
|
||||||
bool isRequiredWorkGroupOrder = false;
|
bool isRequiredDispatchWorkGroupOrder = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingD
|
|||||||
|
|
||||||
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
||||||
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
||||||
args.isRequiredWorkGroupOrder = dispatchCommandArgs.isRequiredWorkGroupOrder;
|
args.isRequiredDispatchWorkGroupOrder = dispatchCommandArgs.isRequiredDispatchWorkGroupOrder;
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ struct WalkerPartitionArgs {
|
|||||||
bool dcFlushEnable = false;
|
bool dcFlushEnable = false;
|
||||||
bool forceExecutionOnSingleTile = false;
|
bool forceExecutionOnSingleTile = false;
|
||||||
bool blockDispatchToCommandBuffer = false;
|
bool blockDispatchToCommandBuffer = false;
|
||||||
bool isRequiredWorkGroupOrder = false;
|
bool isRequiredDispatchWorkGroupOrder = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline constexpr uint32_t wparidCCSOffset = 0x221C;
|
inline constexpr uint32_t wparidCCSOffset = 0x221C;
|
||||||
|
|||||||
@@ -531,7 +531,7 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
|||||||
args.partitionCount,
|
args.partitionCount,
|
||||||
args.workgroupSize,
|
args.workgroupSize,
|
||||||
args.maxWgCountPerTile,
|
args.maxWgCountPerTile,
|
||||||
args.isRequiredWorkGroupOrder);
|
args.isRequiredDispatchWorkGroupOrder);
|
||||||
|
|
||||||
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, args.tileCount, workgroupCount);
|
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, args.tileCount, workgroupCount);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -350,7 +350,7 @@ using IndirectStatelessCountT = int32_t;
|
|||||||
using HasSampleT = bool;
|
using HasSampleT = bool;
|
||||||
using PrivateSizeT = int32_t;
|
using PrivateSizeT = int32_t;
|
||||||
using SpillSizeT = int32_t;
|
using SpillSizeT = int32_t;
|
||||||
using AdditionalSizeT = int32_t;
|
using LocalRegionSizeT = int32_t;
|
||||||
using WalkOrderT = int32_t;
|
using WalkOrderT = int32_t;
|
||||||
using PartitionDimT = int32_t;
|
using PartitionDimT = int32_t;
|
||||||
|
|
||||||
@@ -384,8 +384,8 @@ inline constexpr IndirectStatelessCountT indirectStatelessCount = 0;
|
|||||||
inline constexpr HasSampleT hasSample = false;
|
inline constexpr HasSampleT hasSample = false;
|
||||||
inline constexpr PrivateSizeT privateSize = 0;
|
inline constexpr PrivateSizeT privateSize = 0;
|
||||||
inline constexpr SpillSizeT spillSize = 0;
|
inline constexpr SpillSizeT spillSize = 0;
|
||||||
inline constexpr AdditionalSizeT additionalSize = -1;
|
inline constexpr LocalRegionSizeT localRegionSize = -1;
|
||||||
inline constexpr WalkOrderT walkOrder = -1;
|
inline constexpr WalkOrderT dispatchWalkOrder = -1;
|
||||||
inline constexpr PartitionDimT partitionDim = -1;
|
inline constexpr PartitionDimT partitionDim = -1;
|
||||||
} // namespace Defaults
|
} // namespace Defaults
|
||||||
|
|
||||||
@@ -422,8 +422,8 @@ struct ExecutionEnvBaseT {
|
|||||||
HasSampleT hasSample = Defaults::hasSample;
|
HasSampleT hasSample = Defaults::hasSample;
|
||||||
PrivateSizeT privateSize = Defaults::privateSize;
|
PrivateSizeT privateSize = Defaults::privateSize;
|
||||||
SpillSizeT spillSize = Defaults::spillSize;
|
SpillSizeT spillSize = Defaults::spillSize;
|
||||||
AdditionalSizeT additionalSize = Defaults::additionalSize;
|
LocalRegionSizeT localRegionSize = Defaults::localRegionSize;
|
||||||
WalkOrderT walkOrder = Defaults::walkOrder;
|
WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder;
|
||||||
PartitionDimT partitionDim = Defaults::partitionDim;
|
PartitionDimT partitionDim = Defaults::partitionDim;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -718,11 +718,11 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu
|
|||||||
dst.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
|
dst.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
|
||||||
dst.kernelAttributes.numThreadsRequired = static_cast<uint32_t>(execEnv.euThreadCount);
|
dst.kernelAttributes.numThreadsRequired = static_cast<uint32_t>(execEnv.euThreadCount);
|
||||||
|
|
||||||
if (execEnv.additionalSize != Types::Kernel::ExecutionEnv::Defaults::additionalSize) {
|
if (execEnv.localRegionSize != Types::Kernel::ExecutionEnv::Defaults::localRegionSize) {
|
||||||
dst.kernelAttributes.additionalSize = static_cast<uint32_t>(execEnv.additionalSize);
|
dst.kernelAttributes.localRegionSize = static_cast<uint32_t>(execEnv.localRegionSize);
|
||||||
}
|
}
|
||||||
if (execEnv.walkOrder != Types::Kernel::ExecutionEnv::Defaults::walkOrder) {
|
if (execEnv.dispatchWalkOrder != Types::Kernel::ExecutionEnv::Defaults::dispatchWalkOrder) {
|
||||||
dst.kernelAttributes.walkOrder = EncodeParamsApiMappings::walkOrder[execEnv.walkOrder];
|
dst.kernelAttributes.dispatchWalkOrder = EncodeParamsApiMappings::dispatchWalkOrder[execEnv.dispatchWalkOrder];
|
||||||
}
|
}
|
||||||
if (execEnv.partitionDim != Types::Kernel::ExecutionEnv::Defaults::partitionDim) {
|
if (execEnv.partitionDim != Types::Kernel::ExecutionEnv::Defaults::partitionDim) {
|
||||||
dst.kernelAttributes.partitionDim = EncodeParamsApiMappings::partitionDim[execEnv.partitionDim];
|
dst.kernelAttributes.partitionDim = EncodeParamsApiMappings::partitionDim[execEnv.partitionDim];
|
||||||
|
|||||||
@@ -43,11 +43,11 @@ enum class RequiredDispatchWalkOrder : uint32_t {
|
|||||||
additional
|
additional
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint32_t additionalKernelLaunchSizeParamNotSet = 0;
|
static constexpr uint32_t localRegionSizeParamNotSet = 0;
|
||||||
|
|
||||||
namespace EncodeParamsApiMappings {
|
namespace EncodeParamsApiMappings {
|
||||||
static constexpr std::array<NEO::RequiredPartitionDim, 3> partitionDim = {{RequiredPartitionDim::x, NEO::RequiredPartitionDim::y, NEO::RequiredPartitionDim::z}};
|
static constexpr std::array<NEO::RequiredPartitionDim, 3> partitionDim = {{RequiredPartitionDim::x, NEO::RequiredPartitionDim::y, NEO::RequiredPartitionDim::z}};
|
||||||
static constexpr std::array<NEO::RequiredDispatchWalkOrder, 3> walkOrder = {{NEO::RequiredDispatchWalkOrder::x, NEO::RequiredDispatchWalkOrder::y, NEO::RequiredDispatchWalkOrder::additional}};
|
static constexpr std::array<NEO::RequiredDispatchWalkOrder, 3> dispatchWalkOrder = {{NEO::RequiredDispatchWalkOrder::x, NEO::RequiredDispatchWalkOrder::y, NEO::RequiredDispatchWalkOrder::additional}};
|
||||||
} // namespace EncodeParamsApiMappings
|
} // namespace EncodeParamsApiMappings
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ struct KernelDescriptor {
|
|||||||
uint32_t numThreadsRequired = 0u;
|
uint32_t numThreadsRequired = 0u;
|
||||||
uint32_t spillFillScratchMemorySize = 0u;
|
uint32_t spillFillScratchMemorySize = 0u;
|
||||||
uint32_t privateScratchMemorySize = 0u;
|
uint32_t privateScratchMemorySize = 0u;
|
||||||
uint32_t additionalSize = NEO::additionalKernelLaunchSizeParamNotSet;
|
uint32_t localRegionSize = NEO::localRegionSizeParamNotSet;
|
||||||
NEO::RequiredDispatchWalkOrder walkOrder = NEO::RequiredDispatchWalkOrder::none;
|
NEO::RequiredDispatchWalkOrder dispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
||||||
NEO::RequiredPartitionDim partitionDim = NEO::RequiredPartitionDim::none;
|
NEO::RequiredPartitionDim partitionDim = NEO::RequiredPartitionDim::none;
|
||||||
ThreadArbitrationPolicy threadArbitrationPolicy = NotPresent;
|
ThreadArbitrationPolicy threadArbitrationPolicy = NotPresent;
|
||||||
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
|
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
|
||||||
|
|||||||
@@ -39,44 +39,44 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
|
|||||||
bool requiresUncachedMocs) {
|
bool requiresUncachedMocs) {
|
||||||
|
|
||||||
EncodeDispatchKernelArgs args{
|
EncodeDispatchKernelArgs args{
|
||||||
0, // eventAddress
|
0, // eventAddress
|
||||||
0, // postSyncImmValue
|
0, // postSyncImmValue
|
||||||
0, // inOrderCounterValue
|
0, // inOrderCounterValue
|
||||||
device, // device
|
device, // device
|
||||||
nullptr, // inOrderExecInfo
|
nullptr, // inOrderExecInfo
|
||||||
dispatchInterface, // dispatchInterface
|
dispatchInterface, // dispatchInterface
|
||||||
nullptr, // surfaceStateHeap
|
nullptr, // surfaceStateHeap
|
||||||
nullptr, // dynamicStateHeap
|
nullptr, // dynamicStateHeap
|
||||||
threadGroupDimensions, // threadGroupDimensions
|
threadGroupDimensions, // threadGroupDimensions
|
||||||
nullptr, // outWalkerPtr
|
nullptr, // outWalkerPtr
|
||||||
nullptr, // cpuWalkerBuffer
|
nullptr, // cpuWalkerBuffer
|
||||||
nullptr, // cpuPayloadBuffer
|
nullptr, // cpuPayloadBuffer
|
||||||
nullptr, // outImplicitArgsPtr
|
nullptr, // outImplicitArgsPtr
|
||||||
nullptr, // additionalCommands
|
nullptr, // additionalCommands
|
||||||
PreemptionMode::Disabled, // preemptionMode
|
PreemptionMode::Disabled, // preemptionMode
|
||||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||||
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
|
NEO::localRegionSizeParamNotSet, // localRegionSize
|
||||||
1, // partitionCount
|
1, // partitionCount
|
||||||
0, // reserveExtraPayloadSpace
|
0, // reserveExtraPayloadSpace
|
||||||
1, // maxWgCountPerTile
|
1, // maxWgCountPerTile
|
||||||
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
|
||||||
false, // isIndirect
|
false, // isIndirect
|
||||||
false, // isPredicate
|
false, // isPredicate
|
||||||
false, // isTimestampEvent
|
false, // isTimestampEvent
|
||||||
requiresUncachedMocs, // requiresUncachedMocs
|
requiresUncachedMocs, // requiresUncachedMocs
|
||||||
false, // isInternal
|
false, // isInternal
|
||||||
false, // isCooperative
|
false, // isCooperative
|
||||||
false, // isHostScopeSignalEvent
|
false, // isHostScopeSignalEvent
|
||||||
false, // isKernelUsingSystemAllocation
|
false, // isKernelUsingSystemAllocation
|
||||||
false, // isKernelDispatchedFromImmediateCmdList
|
false, // isKernelDispatchedFromImmediateCmdList
|
||||||
false, // isRcs
|
false, // isRcs
|
||||||
false, // dcFlushEnable
|
false, // dcFlushEnable
|
||||||
false, // isHeaplessModeEnabled
|
false, // isHeaplessModeEnabled
|
||||||
false, // isHeaplessStateInitEnabled
|
false, // isHeaplessStateInitEnabled
|
||||||
false, // interruptEvent
|
false, // interruptEvent
|
||||||
false, // immediateScratchAddressPatching
|
false, // immediateScratchAddressPatching
|
||||||
false, // makeCommandView
|
false, // makeCommandView
|
||||||
};
|
};
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
@@ -87,7 +87,7 @@ EncodeWalkerArgs CommandEncodeStatesFixture::createDefaultEncodeWalkerArgs(const
|
|||||||
kernelDescriptor, // kernelDescriptor
|
kernelDescriptor, // kernelDescriptor
|
||||||
NEO::KernelExecutionType::defaultType, // kernelExecutionType
|
NEO::KernelExecutionType::defaultType, // kernelExecutionType
|
||||||
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
|
||||||
0, // additionalSizeParam
|
0, // localRegionSize
|
||||||
0, // maxFrontEndThreads
|
0, // maxFrontEndThreads
|
||||||
false}; // requiredSystemFence
|
false}; // requiredSystemFence
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommand
|
|||||||
dcFlushFlag, // dcFlush
|
dcFlushFlag, // dcFlush
|
||||||
forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile
|
forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile
|
||||||
false, // blockDispatchToCommandBuffer
|
false, // blockDispatchToCommandBuffer
|
||||||
false}; // isRequiredWorkGroupOrder
|
false}; // isRequiredDispatchWorkGroupOrder
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user