fix: pass WalkerType to ImplicitScaling helpers

Related-To: NEO-7621

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-12-21 14:33:38 +00:00
committed by Compute-Runtime-Automation
parent 0f5389b452
commit 4ff760baf2
9 changed files with 334 additions and 240 deletions

View File

@@ -463,7 +463,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker
taskStream->getSpace(totalBytesProgrammed); taskStream->getSpace(totalBytesProgrammed);
flushTaskStream(*taskStream); flushTaskStream(*taskStream);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
uint32_t expectedValue = 0x4; uint32_t expectedValue = 0x4;
@@ -509,7 +509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke
taskStream->getSpace(totalBytesProgrammed); taskStream->getSpace(totalBytesProgrammed);
flushTaskStream(*taskStream); flushTaskStream(*taskStream);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
uint32_t expectedValue = 0x0; uint32_t expectedValue = 0x0;

View File

@@ -1009,7 +1009,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB
taskStream->getSpace(totalBytesProgrammed); taskStream->getSpace(totalBytesProgrammed);
flushStream(); flushStream();
auto expectedGpuAddress = taskStream->getGraphicsAllocation()->getGpuAddress() + auto expectedGpuAddress = taskStream->getGraphicsAllocation()->getGpuAddress() +
WalkerPartition::computeControlSectionOffset<FamilyType>(testArgs); WalkerPartition::computeControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
// 16 partitions updated atomic to value 16 // 16 partitions updated atomic to value 16
// 17th partition updated it to 17 and was predicated out of the batch buffer // 17th partition updated it to 17 and was predicated out of the batch buffer

View File

@@ -1080,6 +1080,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u); debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true); VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
UltClDeviceFactory deviceFactory{1, 2}; UltClDeviceFactory deviceFactory{1, 2};
@@ -1091,7 +1093,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1; size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() + HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment()); EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1111,13 +1113,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.staticPartitioning = false; testArgs.staticPartitioning = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
csr.staticWorkPartitioningEnabled = false; csr.staticWorkPartitioningEnabled = false;
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true; csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
@@ -1125,19 +1127,21 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = false; testArgs.staticPartitioning = false;
csr.staticWorkPartitioningEnabled = false; csr.staticWorkPartitioningEnabled = false;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true; csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u); debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false); VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
UltClDeviceFactory deviceFactory{1, 2}; UltClDeviceFactory deviceFactory{1, 2};
@@ -1149,7 +1153,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1; size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() + HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment()); EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1169,13 +1173,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.staticPartitioning = false; testArgs.staticPartitioning = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
csr.staticWorkPartitioningEnabled = false; csr.staticWorkPartitioningEnabled = false;
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true; csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
@@ -1183,14 +1187,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = false; testArgs.staticPartitioning = false;
csr.staticWorkPartitioningEnabled = false; csr.staticWorkPartitioningEnabled = false;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true; csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize); EXPECT_EQ(returnedSize, partitionSize + baseSize);
} }
@@ -1231,6 +1235,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenPipeContro
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u); debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true); VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
@@ -1239,7 +1245,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1; size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() + HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment()); EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1252,7 +1258,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count()); testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
DispatchInfo dispatchInfo{}; DispatchInfo dispatchInfo{};
dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
@@ -1262,6 +1268,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u); debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false); VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
@@ -1270,7 +1278,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1; size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() + HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment()); EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1283,7 +1291,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count()); testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
DispatchInfo dispatchInfo{}; DispatchInfo dispatchInfo{};
dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); dispatchInfo.setNumberOfWorkgroups({32, 1, 1});

View File

@@ -84,7 +84,7 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool apiSelfCleanup,
false, false,
false); false);
return static_cast<size_t>(WalkerPartition::estimateSpaceRequiredInCommandBuffer<GfxFamily>(args)); return static_cast<size_t>(WalkerPartition::estimateSpaceRequiredInCommandBuffer<GfxFamily, WalkerType>(args));
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@@ -27,8 +27,6 @@ struct PipeControlArgs;
namespace WalkerPartition { namespace WalkerPartition {
template <typename GfxFamily>
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
template <typename GfxFamily> template <typename GfxFamily>
using POSTSYNC_DATA = typename GfxFamily::POSTSYNC_DATA; using POSTSYNC_DATA = typename GfxFamily::POSTSYNC_DATA;
template <typename GfxFamily> template <typename GfxFamily>
@@ -455,13 +453,13 @@ void programTilesSynchronizationWithPostSyncs(void *&currentBatchBufferPointer,
} }
} }
template <typename GfxFamily> template <typename GfxFamily, typename WalkerType>
uint64_t computeWalkerSectionSize() { uint64_t computeWalkerSectionSize() {
return sizeof(BATCH_BUFFER_START<GfxFamily>) + return sizeof(BATCH_BUFFER_START<GfxFamily>) +
sizeof(COMPUTE_WALKER<GfxFamily>); sizeof(WalkerType);
} }
template <typename GfxFamily> template <typename GfxFamily, typename WalkerType>
uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) { uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) {
uint64_t size = 0u; uint64_t size = 0u;
@@ -472,7 +470,7 @@ uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) {
size += sizeof(MI_SET_PREDICATE<GfxFamily>) * 2 + size += sizeof(MI_SET_PREDICATE<GfxFamily>) * 2 +
sizeof(BATCH_BUFFER_START<GfxFamily>) * 2; sizeof(BATCH_BUFFER_START<GfxFamily>) * 2;
size += (args.semaphoreProgrammingRequired ? NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() * args.partitionCount : 0u); size += (args.semaphoreProgrammingRequired ? NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() * args.partitionCount : 0u);
size += computeWalkerSectionSize<GfxFamily>(); size += computeWalkerSectionSize<GfxFamily, WalkerType>();
size += args.emitPipeControlStall ? NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false) : 0u; size += args.emitPipeControlStall ? NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false) : 0u;
if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) { if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
size += computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>(); size += computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>();
@@ -484,10 +482,10 @@ uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) {
return size; return size;
} }
template <typename GfxFamily> template <typename GfxFamily, typename WalkerType>
uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) { uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) {
return computeControlSectionOffset<GfxFamily>(args) - return computeControlSectionOffset<GfxFamily, WalkerType>(args) -
computeWalkerSectionSize<GfxFamily>(); computeWalkerSectionSize<GfxFamily, WalkerType>();
} }
template <typename GfxFamily, typename WalkerType> template <typename GfxFamily, typename WalkerType>
@@ -569,7 +567,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
totalBytesProgrammed = 0u; totalBytesProgrammed = 0u;
void *currentBatchBufferPointer = cpuPointer; void *currentBatchBufferPointer = cpuPointer;
auto controlSectionOffset = computeControlSectionOffset<GfxFamily>(args); auto controlSectionOffset = computeControlSectionOffset<GfxFamily, WalkerType>(args);
if (args.synchronizeBeforeExecution) { if (args.synchronizeBeforeExecution) {
auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, inTileCount); auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, inTileCount);
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount);
@@ -592,7 +590,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
programMiBatchBufferStart<GfxFamily>(currentBatchBufferPointer, programMiBatchBufferStart<GfxFamily>(currentBatchBufferPointer,
totalBytesProgrammed, totalBytesProgrammed,
gpuAddressOfAllocation + gpuAddressOfAllocation +
computeWalkerSectionStart<GfxFamily>(args), computeWalkerSectionStart<GfxFamily, WalkerType>(args),
true, true,
args.secondaryBatchBuffer); args.secondaryBatchBuffer);
@@ -671,7 +669,7 @@ bool isStartAndControlSectionRequired(WalkerPartitionArgs &args) {
return args.synchronizeBeforeExecution || args.crossTileAtomicSynchronization || args.emitSelfCleanup; return args.synchronizeBeforeExecution || args.crossTileAtomicSynchronization || args.emitSelfCleanup;
} }
template <typename GfxFamily> template <typename GfxFamily, typename WalkerType>
uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args) { uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args) {
const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution
? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>() ? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>()
@@ -697,7 +695,7 @@ uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args
return beforeExecutionSyncAtomicSize + return beforeExecutionSyncAtomicSize +
wparidRegisterSize + wparidRegisterSize +
pipeControlSize + pipeControlSize +
sizeof(COMPUTE_WALKER<GfxFamily>) + sizeof(WalkerType) +
selfCleanupSectionSize + selfCleanupSectionSize +
afterExecutionSyncAtomicSize + afterExecutionSyncAtomicSize +
afterExecutionSyncPostSyncSize + afterExecutionSyncPostSyncSize +
@@ -716,7 +714,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
void *currentBatchBufferPointer = cpuPointer; void *currentBatchBufferPointer = cpuPointer;
// Get address of the control section // Get address of the control section
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<GfxFamily>(args); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<GfxFamily, WalkerType>(args);
const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection); const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection);
// Synchronize tiles before walker // Synchronize tiles before walker
@@ -781,15 +779,15 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
} }
} }
template <typename GfxFamily> template <typename GfxFamily, typename WalkerType>
uint64_t estimateSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args) { uint64_t estimateSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args) {
uint64_t size = {}; uint64_t size = {};
if (args.staticPartitioning) { if (args.staticPartitioning) {
size += computeStaticPartitioningControlSectionOffset<GfxFamily>(args); size += computeStaticPartitioningControlSectionOffset<GfxFamily, WalkerType>(args);
size += isStartAndControlSectionRequired<GfxFamily>(args) ? sizeof(StaticPartitioningControlSection) : 0u; size += isStartAndControlSectionRequired<GfxFamily>(args) ? sizeof(StaticPartitioningControlSection) : 0u;
size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(staticPartitioningFieldsForCleanupCount, args) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(staticPartitioningFieldsForCleanupCount, args) : 0u;
} else { } else {
size += computeControlSectionOffset<GfxFamily>(args); size += computeControlSectionOffset<GfxFamily, WalkerType>(args);
size += sizeof(BatchBufferControlData); size += sizeof(BatchBufferControlData);
size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END<GfxFamily>) : 0u; size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END<GfxFamily>) : 0u;
size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(dynamicPartitioningFieldsForCleanupCount, args) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(dynamicPartitioningFieldsForCleanupCount, args) : 0u;

View File

@@ -1247,7 +1247,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
args.emitSelfCleanup = true; args.emitSelfCleanup = true;
args.dcFlushEnable = dispatchArgs.dcFlushEnable; args.dcFlushEnable = dispatchArgs.dcFlushEnable;
auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType>(args); auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType, DefaultWalkerType>(args);
uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() +
cleanupSectionOffset; cleanupSectionOffset;
constexpr uint32_t expectedData = 0ull; constexpr uint32_t expectedData = 0ull;

View File

@@ -22,9 +22,11 @@ struct WalkerPartitionTests : public ::testing::Test {
template <typename GfxFamily> template <typename GfxFamily>
auto createWalker(uint64_t postSyncAddress) { auto createWalker(uint64_t postSyncAddress) {
WalkerPartition::COMPUTE_WALKER<GfxFamily> walker; using WalkerType = typename GfxFamily::DefaultWalkerType;
WalkerType walker;
walker = GfxFamily::cmdInitGpgpuWalker; walker = GfxFamily::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<GfxFamily>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<GfxFamily>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);

View File

@@ -13,6 +13,8 @@
#include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h" #include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h"
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]); testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]);
@@ -20,9 +22,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);
@@ -34,13 +36,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData);
@@ -135,9 +138,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);
@@ -156,6 +159,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
@@ -169,7 +174,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer,
nullptr, nullptr,
@@ -191,9 +196,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -239,6 +244,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndPreWalkerSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndPreWalkerSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
@@ -252,7 +259,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer,
@@ -294,9 +301,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -342,6 +349,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndSynchronizationWithPostSyncsWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndSynchronizationWithPostSyncsWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.semaphoreProgrammingRequired = true; testArgs.semaphoreProgrammingRequired = true;
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
@@ -356,14 +365,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType, WalkerType>(cmdBuffer,
nullptr, nullptr,
cmdBufferGpuAddress, cmdBufferGpuAddress,
&walker, &walker,
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
EXPECT_EQ(controlSectionOffset, totalBytesProgrammed); EXPECT_EQ(controlSectionOffset, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -377,9 +386,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -429,6 +438,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
@@ -443,7 +454,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
@@ -454,7 +465,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -468,9 +479,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto storeDataImm = genCmdCast<WalkerPartition::MI_STORE_DATA_IMM<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto storeDataImm = genCmdCast<WalkerPartition::MI_STORE_DATA_IMM<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -581,6 +592,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndCrossTileSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndCrossTileSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
@@ -596,7 +609,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
@@ -607,7 +620,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -621,9 +634,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto storeDataImm = genCmdCast<WalkerPartition::MI_STORE_DATA_IMM<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto storeDataImm = genCmdCast<WalkerPartition::MI_STORE_DATA_IMM<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -734,6 +747,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndAtomicsForSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndAtomicsForSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
@@ -749,7 +764,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
@@ -760,7 +775,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -774,9 +789,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -893,6 +908,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSlefCleanupAndCrossTileSyncDisabledWithFlagWhenUsingAtomicForSelfCleanupAndConstructCommandBufferIsCalledThenStillProgramTheSync) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSlefCleanupAndCrossTileSyncDisabledWithFlagWhenUsingAtomicForSelfCleanupAndConstructCommandBufferIsCalledThenStillProgramTheSync) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
@@ -909,7 +926,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
@@ -920,7 +937,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -934,9 +951,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
{ {
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@@ -1053,6 +1070,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.semaphoreProgrammingRequired = true; testArgs.semaphoreProgrammingRequired = true;
@@ -1064,9 +1083,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);
@@ -1078,13 +1097,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * testArgs.partitionCount; sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * testArgs.partitionCount;
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData);
@@ -1170,9 +1190,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);
@@ -1191,6 +1211,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenWparidRegisterProgrammingDisabledThenExpectNoMiLoadRegisterMemCommand) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenWparidRegisterProgrammingDisabledThenExpectNoMiLoadRegisterMemCommand) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.initializeWparidRegister = false; testArgs.initializeWparidRegister = false;
@@ -1206,10 +1228,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
testArgs.workPartitionAllocationGpuVa = 0x8000444000; testArgs.workPartitionAllocationGpuVa = 0x8000444000;
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); uint64_t expectedControlSectionOffset = sizeof(WalkerType);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset);
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer,
nullptr, nullptr,
@@ -1218,19 +1240,21 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.emitSelfCleanup = false; testArgs.emitSelfCleanup = false;
@@ -1246,10 +1270,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
auto walker = createWalker<FamilyType>(postSyncAddress); auto walker = createWalker<FamilyType>(postSyncAddress);
uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) + uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
uint32_t totalBytesProgrammed{}; uint32_t totalBytesProgrammed{};
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs); const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset);
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(cmdBuffer,
nullptr, nullptr,
@@ -1258,7 +1282,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed);
auto parsedOffset = 0u; auto parsedOffset = 0u;
@@ -1272,16 +1296,18 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress());
} }
{ {
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
} }
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferredAndWalkerWithNonUniformStartWhenDynamicPartitionSelectedThenExpectReconfigureWparidToStatic) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferredAndWalkerWithNonUniformStartWhenDynamicPartitionSelectedThenExpectReconfigureWparidToStatic) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdStartingX(1u); walker.setThreadGroupIdStartingX(1u);
@@ -1305,14 +1331,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>); sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
auto totalProgrammedSize = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto totalProgrammedSize = expectedCommandUsedSize + sizeof(BatchBufferControlData);
testArgs.tileCount = 2; testArgs.tileCount = 2;
@@ -1404,9 +1431,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + totalProgrammedSize); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + totalProgrammedSize);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, computeWalker); ASSERT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);

View File

@@ -27,13 +27,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramRegisterCommandWh
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledWithoutBatchBufferEndThenBatchBufferEndIsNotProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledWithoutBatchBufferEndThenBatchBufferEndIsNotProgrammed) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
WalkerPartition::constructDynamicallyPartitionedCommandBuffer<FamilyType>(cmdBuffer, WalkerPartition::constructDynamicallyPartitionedCommandBuffer<FamilyType>(cmdBuffer,
nullptr, nullptr,
@@ -42,54 +44,60 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
totalBytesProgrammed, totalBytesProgrammed,
testArgs, testArgs,
*defaultHwInfo); *defaultHwInfo);
auto totalProgrammedSize = computeControlSectionOffset<FamilyType>(testArgs) + auto totalProgrammedSize = computeControlSectionOffset<FamilyType, WalkerType>(testArgs) +
sizeof(BatchBufferControlData); sizeof(BatchBufferControlData);
EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenItIsCalledThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 +
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::BatchBufferControlData) + sizeof(WalkerPartition::BatchBufferControlData) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
EXPECT_EQ(expectedUsedSize, auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
testArgs.emitBatchBufferEnd = true; testArgs.emitBatchBufferEnd = true;
EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END<FamilyType>), size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END<FamilyType>), size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenPartitionCountIs4ThenSizeIsProperlyEstimated) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenPartitionCountIs4ThenSizeIsProperlyEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 4u; testArgs.partitionCount = 4u;
auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 +
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::BatchBufferControlData) + sizeof(WalkerPartition::BatchBufferControlData) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
EXPECT_EQ(expectedUsedSize, auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
testArgs.emitBatchBufferEnd = true; testArgs.emitBatchBufferEnd = true;
EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END<FamilyType>), size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END<FamilyType>), size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
@@ -97,7 +105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronize
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::BatchBufferControlData) + sizeof(WalkerPartition::BatchBufferControlData) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
@@ -105,18 +113,20 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronize
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
EXPECT_EQ(expectedUsedSize, auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
EXPECT_EQ(expectedUsedSize + expectedDelta, size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize + expectedDelta, size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationWhenItIsCalledThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) + const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
@@ -125,19 +135,22 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstima
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
EXPECT_EQ(expectedUsedSize,
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedUsedSize, size);
testArgs.emitBatchBufferEnd = true; testArgs.emitBatchBufferEnd = true;
EXPECT_EQ(expectedUsedSize, size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) + const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
@@ -146,16 +159,20 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstima
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
EXPECT_EQ(expectedUsedSize,
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedUsedSize, size);
testArgs.synchronizeBeforeExecution = true; testArgs.synchronizeBeforeExecution = true;
const auto preExecutionSynchronizationSize = sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>); const auto preExecutionSynchronizationSize = sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>);
EXPECT_EQ(expectedUsedSize + preExecutionSynchronizationSize,
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedUsedSize + preExecutionSynchronizationSize, size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenItIsCalledThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
@@ -166,7 +183,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSec
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) + sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::BatchBufferControlData) + sizeof(WalkerPartition::BatchBufferControlData) +
@@ -175,11 +192,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSec
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * 2 +
sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) * 3; sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) * 3;
EXPECT_EQ(expectedUsedSize, auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenAtomicsUsedForSelfCleanupThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenAtomicsUsedForSelfCleanupThenProperSizeIsReturned) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
@@ -191,7 +210,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSec
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::BatchBufferControlData) + sizeof(WalkerPartition::BatchBufferControlData) +
@@ -200,8 +219,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSec
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) * 2 +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 3; sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 3;
EXPECT_EQ(expectedUsedSize, auto size = estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); EXPECT_EQ(expectedUsedSize, size);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithWrongInputThenFalseIsReturnedAndNothingIsProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithWrongInputThenFalseIsReturnedAndNothingIsProgrammed) {
@@ -396,55 +415,59 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramBatchBufferStartC
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhenItIsCalledThenWalkerIsProperlyProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhenItIsCalledThenWalkerIsProperlyProgrammed) {
auto expectedUsedSize = sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
auto expectedUsedSize = sizeof(WalkerType);
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(7u); walker.setThreadGroupIdXDimension(7u);
walker.setThreadGroupIdYDimension(10u); walker.setThreadGroupIdYDimension(10u);
walker.setThreadGroupIdZDimension(11u); walker.setThreadGroupIdZDimension(11u);
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
void *walkerCommandAddress = cmdBufferAddress; void *walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
auto walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); EXPECT_EQ(expectedUsedSize, totalBytesProgrammed);
EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable()); EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType());
EXPECT_EQ(4u, walkerCommand->getPartitionSize()); EXPECT_EQ(4u, walkerCommand->getPartitionSize());
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Y); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y);
walkerCommandAddress = cmdBufferAddress; walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Y, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walkerCommand->getPartitionType());
EXPECT_EQ(5u, walkerCommand->getPartitionSize()); EXPECT_EQ(5u, walkerCommand->getPartitionSize());
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Z); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z);
walkerCommandAddress = cmdBufferAddress; walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Z, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walkerCommand->getPartitionType());
EXPECT_EQ(6u, walkerCommand->getPartitionSize()); EXPECT_EQ(6u, walkerCommand->getPartitionSize());
// if we program with partition Count == 1 then do not trigger partition stuff // if we program with partition Count == 1 then do not trigger partition stuff
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
walkerCommandAddress = cmdBufferAddress; walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, false); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_EQ(0u, walkerCommand->getPartitionSize()); EXPECT_EQ(0u, walkerCommand->getPartitionSize());
EXPECT_FALSE(walkerCommand->getWorkloadPartitionEnable()); EXPECT_FALSE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walkerCommand->getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWhenComputePartitionCountIsCalledThenDefaultSizeAndTypeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWhenComputePartitionCountIsCalledThenDefaultSizeAndTypeIsReturned) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(16u); walker.setThreadGroupIdXDimension(16u);
@@ -452,11 +475,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWhenComputePartiti
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStartWhenComputePartitionCountIsCalledThenPartitionsAreDisabled) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStartWhenComputePartitionCountIsCalledThenPartitionsAreDisabled) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdStartingX(1u); walker.setThreadGroupIdStartingX(1u);
@@ -464,7 +489,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStar
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(1u, partitionCount); EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType());
walker.setThreadGroupIdStartingX(0u); walker.setThreadGroupIdStartingX(0u);
walker.setThreadGroupIdStartingY(1u); walker.setThreadGroupIdStartingY(1u);
@@ -472,7 +497,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStar
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(1u, partitionCount); EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType());
walker.setThreadGroupIdStartingY(0u); walker.setThreadGroupIdStartingY(0u);
walker.setThreadGroupIdStartingZ(1u); walker.setThreadGroupIdStartingZ(1u);
@@ -480,11 +505,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStar
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(1u, partitionCount); EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenHighestDimensionIsPartitioned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenHighestDimensionIsPartitioned) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(64u); walker.setThreadGroupIdXDimension(64u);
walker.setThreadGroupIdYDimension(64u); walker.setThreadGroupIdYDimension(64u);
@@ -494,25 +521,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkg
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
walker.setThreadGroupIdYDimension(65u); walker.setThreadGroupIdYDimension(65u);
walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
walker.setThreadGroupIdZDimension(66u); walker.setThreadGroupIdZDimension(66u);
walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisabledMinimalPartitionSizeWhenComputePartitionSizeThenProperValueIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisabledMinimalPartitionSizeWhenComputePartitionSizeThenProperValueIsReturned) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(64u); walker.setThreadGroupIdXDimension(64u);
walker.setThreadGroupIdYDimension(64u); walker.setThreadGroupIdYDimension(64u);
@@ -525,25 +554,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisabledMinimalPartition
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(16u, partitionCount); EXPECT_EQ(16u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
walker.setThreadGroupIdYDimension(65u); walker.setThreadGroupIdYDimension(65u);
walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(16u, partitionCount); EXPECT_EQ(16u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
walker.setThreadGroupIdZDimension(66u); walker.setThreadGroupIdZDimension(66u);
walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(16u, partitionCount); EXPECT_EQ(16u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenPartitionCountIsClampedToHighestDimension) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenPartitionCountIsClampedToHighestDimension) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(15u); walker.setThreadGroupIdXDimension(15u);
walker.setThreadGroupIdYDimension(7u); walker.setThreadGroupIdYDimension(7u);
@@ -553,28 +584,30 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkg
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_DISABLED); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeHintWhenPartitionCountIsObtainedThenSuggestedTypeIsUsedForPartition) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeHintWhenPartitionCountIsObtainedThenSuggestedTypeIsUsedForPartition) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(8u); walker.setThreadGroupIdXDimension(8u);
walker.setThreadGroupIdYDimension(4u); walker.setThreadGroupIdYDimension(4u);
@@ -585,31 +618,33 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeH
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X)); debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X));
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Y)); debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y));
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Z)); debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z));
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenInvalidPartitionTypeIsRequestedWhenPartitionCountIsObtainedThenFail) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenInvalidPartitionTypeIsRequestedWhenPartitionCountIsObtainedThenFail) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(8u); walker.setThreadGroupIdXDimension(8u);
walker.setThreadGroupIdYDimension(4u); walker.setThreadGroupIdYDimension(4u);
@@ -621,7 +656,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenInvalidPartitionTypeIsRe
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithSmallXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsAdujsted) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithSmallXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsAdujsted) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(32u); walker.setThreadGroupIdXDimension(32u);
walker.setThreadGroupIdYDimension(1024u); walker.setThreadGroupIdYDimension(1024u);
@@ -631,11 +668,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithSmallXDimensio
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithBigXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsNotAdjusted) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithBigXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsNotAdjusted) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(16384u); walker.setThreadGroupIdXDimension(16384u);
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
@@ -645,11 +684,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithBigXDimensionS
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(16u, partitionCount); EXPECT_EQ(16u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenCustomMinimalPartitionSizeWhenComputePartitionCountThenProperValueIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenCustomMinimalPartitionSizeWhenComputePartitionCountThenProperValueIsReturned) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(16384u); walker.setThreadGroupIdXDimension(16384u);
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
@@ -662,11 +703,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenCustomMinimalPartitionSi
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeProgrammedWhenPartitionCountIsObtainedAndItEqualsOneThenPartitionMechanismIsDisabled) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeProgrammedWhenPartitionCountIsObtainedAndItEqualsOneThenPartitionMechanismIsDisabled) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
@@ -676,11 +719,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeP
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(1u, partitionCount); EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenXDimensionIsNotLargetAnd2DImagesAreUsedWhenPartitionTypeIsObtainedThenSelectXDimension) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenXDimensionIsNotLargetAnd2DImagesAreUsedWhenPartitionTypeIsObtainedThenSelectXDimension) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(8u); walker.setThreadGroupIdXDimension(8u);
walker.setThreadGroupIdYDimension(64u); walker.setThreadGroupIdYDimension(64u);
@@ -688,14 +732,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenXDimensionIsNotLargetAnd
bool staticPartitioning = false; bool staticPartitioning = false;
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, false, &staticPartitioning);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::x, 4u, false, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::x, 4u, false, &staticPartitioning);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndNonPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndNonPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
@@ -705,11 +750,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndNon
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::x, 4u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::x, 4u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(2u); walker.setThreadGroupIdYDimension(2u);
@@ -719,11 +765,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPar
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndBigPartitionCountProgrammedInWalkerWhenPartitionCountIsObtainedThenNumberOfPartitionsIsEqualToNumberOfTiles) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndBigPartitionCountProgrammedInWalkerWhenPartitionCountIsObtainedThenNumberOfPartitionsIsEqualToNumberOfTiles) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(16384u); walker.setThreadGroupIdYDimension(16384u);
@@ -733,11 +780,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndBig
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndAndNonUniformStartProgrammedInWalkerWhenPartitionCountIsObtainedThenDoNotAllowStaticPartitioningAndSetPartitionCountToOne) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndAndNonUniformStartProgrammedInWalkerWhenPartitionCountIsObtainedThenDoNotAllowStaticPartitioningAndSetPartitionCountToOne) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(16384u); walker.setThreadGroupIdYDimension(16384u);
@@ -750,12 +798,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndAnd
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning);
EXPECT_FALSE(staticPartitioning); EXPECT_FALSE(staticPartitioning);
EXPECT_EQ(1u, partitionCount); EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionTypeHintIsUsedWhenPartitionCountIsObtainedThenUseRequestedPartitionType) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionTypeHintIsUsedWhenPartitionCountIsObtainedThenUseRequestedPartitionType) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(16384u); walker.setThreadGroupIdYDimension(16384u);
@@ -765,19 +814,20 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPar
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Z)); debugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast<int32_t>(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z));
staticPartitioning = false; staticPartitioning = false;
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 4u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(4u, partitionCount); EXPECT_EQ(4u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsNotDivisibleByTwoButIsAboveThreasholThenItIsSelected) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsNotDivisibleByTwoButIsAboveThreasholThenItIsSelected) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(1u); walker.setThreadGroupIdXDimension(1u);
walker.setThreadGroupIdYDimension(16384u); walker.setThreadGroupIdYDimension(16384u);
@@ -787,19 +837,20 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
debugManager.flags.WalkerPartitionPreferHighestDimension.set(0); debugManager.flags.WalkerPartitionPreferHighestDimension.set(0);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenYDimensionIsDivisibleByTwoThenItIsSelected) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenYDimensionIsDivisibleByTwoThenItIsSelected) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(16384u); walker.setThreadGroupIdXDimension(16384u);
walker.setThreadGroupIdYDimension(2u); walker.setThreadGroupIdYDimension(2u);
@@ -809,19 +860,20 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenYD
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType());
debugManager.flags.WalkerPartitionPreferHighestDimension.set(0); debugManager.flags.WalkerPartitionPreferHighestDimension.set(0);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsDivisibleByTwoThenItIsSelected) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsDivisibleByTwoThenItIsSelected) {
using WalkerType = typename FamilyType::DefaultWalkerType;
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(512u); walker.setThreadGroupIdXDimension(512u);
walker.setThreadGroupIdYDimension(512u); walker.setThreadGroupIdYDimension(512u);
@@ -831,17 +883,18 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD
auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); auto partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
debugManager.flags.WalkerPartitionPreferHighestDimension.set(0); debugManager.flags.WalkerPartitionPreferHighestDimension.set(0);
partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning); partitionCount = computePartitionCountAndSetPartitionType<FamilyType>(&walker, NEO::RequiredPartitionDim::none, 2u, true, &staticPartitioning);
EXPECT_TRUE(staticPartitioning); EXPECT_TRUE(staticPartitioning);
EXPECT_EQ(2u, partitionCount); EXPECT_EQ(2u, partitionCount);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
@@ -852,9 +905,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe
testArgs.emitSelfCleanup = true; testArgs.emitSelfCleanup = true;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);
@@ -866,14 +919,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>); sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>);
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData);
@@ -975,9 +1029,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_NE(nullptr, computeWalker); EXPECT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);
@@ -1048,6 +1102,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUsedForCleanupWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUsedForCleanupWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) {
using WalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
@@ -1059,9 +1114,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);
@@ -1073,14 +1128,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) + sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) + sizeof(WalkerType) +
sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>); sizeof(WalkerPartition::MI_ATOMIC<FamilyType>);
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData);
@@ -1183,9 +1239,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_NE(nullptr, computeWalker); EXPECT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);
@@ -1262,6 +1318,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) {
using WalkerType = typename FamilyType::DefaultWalkerType;
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
@@ -1272,9 +1329,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
auto &postSync = walker.getPostSync(); auto &postSync = walker.getPostSync();
postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP); postSync.setOperation(POSTSYNC_DATA<FamilyType>::OPERATION::OPERATION_WRITE_TIMESTAMP);
postSync.setDestinationAddress(postSyncAddress); postSync.setDestinationAddress(postSyncAddress);
@@ -1285,12 +1342,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP
sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) + sizeof(WalkerPartition::LOAD_REGISTER_REG<FamilyType>) +
sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 + sizeof(WalkerPartition::MI_SET_PREDICATE<FamilyType>) * 2 +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 + sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) * 3 +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>) +
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); sizeof(WalkerType);
EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset<FamilyType>(testArgs)); auto offset = computeControlSectionOffset<FamilyType, WalkerType>(testArgs);
EXPECT_EQ(expectedCommandUsedSize, offset);
auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData);
@@ -1358,9 +1416,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP
EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset);
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
auto computeWalker = genCmdCast<WalkerPartition::COMPUTE_WALKER<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); auto computeWalker = genCmdCast<WalkerType *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_NE(nullptr, computeWalker); EXPECT_NE(nullptr, computeWalker);
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>); parsedOffset += sizeof(WalkerType);
batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset)); batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, batchBufferStart); ASSERT_NE(nullptr, batchBufferStart);
@@ -1690,30 +1748,31 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTileWhenProgramComputeWalkerThenWalkerIsProperlyProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTileWhenProgramComputeWalkerThenWalkerIsProperlyProgrammed) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; using WalkerType = typename FamilyType::DefaultWalkerType;
WalkerType walker;
walker = FamilyType::cmdInitGpgpuWalker; walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(32u); walker.setThreadGroupIdXDimension(32u);
walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdYDimension(1u);
walker.setThreadGroupIdZDimension(1u); walker.setThreadGroupIdZDimension(1u);
bool forceExecutionOnSingleTile = false; bool forceExecutionOnSingleTile = false;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
void *walkerCommandAddress = cmdBufferAddress; void *walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile);
auto walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable()); EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType());
EXPECT_EQ(16u, walkerCommand->getPartitionSize()); EXPECT_EQ(16u, walkerCommand->getPartitionSize());
forceExecutionOnSingleTile = true; forceExecutionOnSingleTile = true;
walkerCommandAddress = cmdBufferAddress; walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile); programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress); walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand); ASSERT_NE(nullptr, walkerCommand);
EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable()); EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType()); EXPECT_EQ(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType());
EXPECT_EQ(32u, walkerCommand->getPartitionSize()); EXPECT_EQ(32u, walkerCommand->getPartitionSize());
} }