fix: pass WalkerType to ImplicitScaling helpers

Related-To: NEO-7621

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-12-21 14:33:38 +00:00
committed by Compute-Runtime-Automation
parent 0f5389b452
commit 4ff760baf2
9 changed files with 334 additions and 240 deletions

View File

@@ -463,7 +463,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker
taskStream->getSpace(totalBytesProgrammed);
flushTaskStream(*taskStream);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
uint32_t expectedValue = 0x4;
@@ -509,7 +509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke
taskStream->getSpace(totalBytesProgrammed);
flushTaskStream(*taskStream);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
uint32_t expectedValue = 0x0;

View File

@@ -1009,7 +1009,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB
taskStream->getSpace(totalBytesProgrammed);
flushStream();
auto expectedGpuAddress = taskStream->getGraphicsAllocation()->getGpuAddress() +
WalkerPartition::computeControlSectionOffset<FamilyType>(testArgs);
WalkerPartition::computeControlSectionOffset<FamilyType, DefaultWalkerType>(testArgs);
// 16 partitions updated atomic to value 16
// 17th partition updated it to 17 and was predicated out of the batch buffer

View File

@@ -1080,6 +1080,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
UltClDeviceFactory deviceFactory{1, 2};
@@ -1091,7 +1093,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1111,13 +1113,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.staticPartitioning = false;
testArgs.synchronizeBeforeExecution = false;
csr.staticWorkPartitioningEnabled = false;
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
@@ -1125,19 +1127,21 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = false;
csr.staticWorkPartitioningEnabled = false;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
UltClDeviceFactory deviceFactory{1, 2};
@@ -1149,7 +1153,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1169,13 +1173,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.staticPartitioning = false;
testArgs.synchronizeBeforeExecution = false;
csr.staticWorkPartitioningEnabled = false;
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
@@ -1183,14 +1187,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = false;
csr.staticWorkPartitioningEnabled = false;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
testArgs.synchronizeBeforeExecution = true;
testArgs.staticPartitioning = true;
csr.staticWorkPartitioningEnabled = true;
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
EXPECT_EQ(returnedSize, partitionSize + baseSize);
}
@@ -1231,6 +1235,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenPipeContro
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
@@ -1239,7 +1245,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1252,7 +1258,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
DispatchInfo dispatchInfo{};
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
@@ -1262,6 +1268,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
using WalkerType = typename FamilyType::DefaultWalkerType;
debugManager.flags.EnableWalkerPartition.set(1u);
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
@@ -1270,7 +1278,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getRootDeviceEnvironment()) ? 2 : 1;
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
auto baseSize = sizeof(WalkerType) +
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.kernelHeapSize, device->getRootDeviceEnvironment());
@@ -1283,7 +1291,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType, WalkerType>(testArgs);
DispatchInfo dispatchInfo{};
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});