Rename functions and variables in Implicit Scaling

Related-To: NEO-6244

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-16 12:11:22 +00:00
committed by Compute-Runtime-Automation
parent eace896ec8
commit eda3531729
12 changed files with 160 additions and 157 deletions

View File

@@ -1088,7 +1088,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart
WalkerPartition::WalkerPartitionArgs testArgs = {}; WalkerPartition::WalkerPartitionArgs testArgs = {};
testArgs.initializeWparidRegister = true; testArgs.initializeWparidRegister = true;
testArgs.crossTileAtomicSynchronization = true; testArgs.crossTileAtomicSynchronization = true;
testArgs.usePipeControlStall = true; testArgs.emitPipeControlStall = true;
testArgs.partitionCount = 2u; testArgs.partitionCount = 2u;
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count()); testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
@@ -1171,7 +1171,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsMul
WalkerPartition::WalkerPartitionArgs testArgs = {}; WalkerPartition::WalkerPartitionArgs testArgs = {};
testArgs.initializeWparidRegister = true; testArgs.initializeWparidRegister = true;
testArgs.usePipeControlStall = true; testArgs.emitPipeControlStall = true;
testArgs.crossTileAtomicSynchronization = true; testArgs.crossTileAtomicSynchronization = true;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count()); testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
@@ -1386,7 +1386,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenDispatchPr
EXPECT_EQ(0u, cmdStream.getUsed()); EXPECT_EQ(0u, cmdStream.getUsed());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenOpenClWhenEnqueuePartitionWalkerThenExpectNoNativeCrossTileSyncCleanup) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenOpenClWhenEnqueuePartitionWalkerThenExpectNoSelfCleanupSection) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
if (!OSInterface::osEnableLocalMemory) { if (!OSInterface::osEnableLocalMemory) {

View File

@@ -11,9 +11,9 @@ void WalkerPartitionTests::SetUp() {
cmdBufferAddress = cmdBuffer; cmdBufferAddress = cmdBuffer;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
testArgs.nativeCrossTileAtomicSync = false; testArgs.emitSelfCleanup = false;
testArgs.initializeWparidRegister = true; testArgs.initializeWparidRegister = true;
testArgs.usePipeControlStall = true; testArgs.emitPipeControlStall = true;
testArgs.crossTileAtomicSynchronization = true; testArgs.crossTileAtomicSynchronization = true;
} }

View File

@@ -416,10 +416,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
@@ -564,11 +564,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndCrossTileSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) {
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t cmdBufferGpuAddress = 0x8000123000;
@@ -712,11 +712,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndAtomicsForNativeWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSelfCleanupAndAtomicsForSelfCleanupWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.useAtomicsForNativeCleanup = true; testArgs.useAtomicsForSelfCleanup = true;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t cmdBufferGpuAddress = 0x8000123000;
@@ -866,12 +866,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
EXPECT_EQ(parsedOffset, totalBytesProgrammed); EXPECT_EQ(parsedOffset, totalBytesProgrammed);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenUsingAtomicForNativeAndConstructCommandBufferIsCalledThenStillProgramTheSync) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithSlefCleanupAndCrossTileSyncDisabledWithFlagWhenUsingAtomicForSelfCleanupAndConstructCommandBufferIsCalledThenStillProgramTheSync) {
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.useAtomicsForNativeCleanup = true; testArgs.useAtomicsForSelfCleanup = true;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t cmdBufferGpuAddress = 0x8000123000;
@@ -1159,9 +1159,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.initializeWparidRegister = false; testArgs.initializeWparidRegister = false;
testArgs.nativeCrossTileAtomicSync = false; testArgs.emitSelfCleanup = false;
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.useAtomicsForNativeCleanup = false; testArgs.useAtomicsForSelfCleanup = false;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
@@ -1218,10 +1218,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) {
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount; testArgs.partitionCount = testArgs.tileCount;
testArgs.nativeCrossTileAtomicSync = false; testArgs.emitSelfCleanup = false;
testArgs.usePipeControlStall = false; testArgs.emitPipeControlStall = false;
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.useAtomicsForNativeCleanup = false; testArgs.useAtomicsForSelfCleanup = false;
testArgs.staticPartitioning = true; testArgs.staticPartitioning = true;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;

View File

@@ -148,11 +148,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstima
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs));
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenItIsCalledThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenItIsCalledThenProperSizeIsReturned) {
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 +
@@ -172,12 +172,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSections
estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs)); estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs));
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenAtomicsUsedForNativeThenProperSizeIsReturned) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationSelfCleanupSectionsWhenAtomicsUsedForSelfCleanupThenProperSizeIsReturned) {
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = false; testArgs.emitBatchBufferEnd = false;
testArgs.synchronizeBeforeExecution = false; testArgs.synchronizeBeforeExecution = false;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.useAtomicsForNativeCleanup = true; testArgs.useAtomicsForSelfCleanup = true;
auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 +
@@ -828,11 +828,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) {
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
@@ -1028,12 +1028,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncWhenD
EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * testArgs.tileCount); EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * testArgs.tileCount);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncAndAtomicsUsedForNativeWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUsedForCleanupWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) {
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
testArgs.nativeCrossTileAtomicSync = true; testArgs.emitSelfCleanup = true;
testArgs.useAtomicsForNativeCleanup = true; testArgs.useAtomicsForSelfCleanup = true;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000; uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker; WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
@@ -1240,9 +1240,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP
testArgs.crossTileAtomicSynchronization = false; testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u; testArgs.partitionCount = 16u;
testArgs.tileCount = 4u; testArgs.tileCount = 4u;
testArgs.nativeCrossTileAtomicSync = false; testArgs.emitSelfCleanup = false;
testArgs.useAtomicsForNativeCleanup = false; testArgs.useAtomicsForSelfCleanup = false;
testArgs.usePipeControlStall = false; testArgs.emitPipeControlStall = false;
checkForProperCmdBufferAddressOffset = false; checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000; uint64_t gpuVirtualAddress = 0x8000123000;

View File

@@ -313,7 +313,7 @@ OverrideUseKmdWaitFunction = -1
EnableCacheFlushAfterWalkerForAllQueues = -1 EnableCacheFlushAfterWalkerForAllQueues = -1
Force32BitDriverSupport = -1 Force32BitDriverSupport = -1
OverrideCmdQueueSynchronousMode = -1 OverrideCmdQueueSynchronousMode = -1
UseAtomicsForNativeSectionCleanup = -1 UseAtomicsForSelfCleanupSection = -1
HBMSizePerTileInGigabytes = 0 HBMSizePerTileInGigabytes = 0
OverrideSystolicPipelineSelect = -1 OverrideSystolicPipelineSelect = -1
OverrideSystolicInComputeWalker = -1 OverrideSystolicInComputeWalker = -1
@@ -324,7 +324,7 @@ DoNotFreeResources = 0
OverrideGmmResourceUsageField = -1 OverrideGmmResourceUsageField = -1
LogAllocationType = 0 LogAllocationType = 0
ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0 ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0
ProgramNativeCleanup = -1 ProgramWalkerPartitionSelfCleanup = -1
WparidRegisterProgramming = -1 WparidRegisterProgramming = -1
UsePipeControlAfterPartitionedWalker = -1 UsePipeControlAfterPartitionedWalker = -1
OverrideBufferSuitableForRenderCompression = -1 OverrideBufferSuitableForRenderCompression = -1

View File

@@ -28,46 +28,49 @@ bool ImplicitScalingHelper::isImplicitScalingEnabled(const DeviceBitfield &devic
bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() { bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() {
auto synchronizeBeforeExecution = false; auto synchronizeBeforeExecution = false;
if (DebugManager.flags.SynchronizeWalkerInWparidMode.get() != -1) { int overrideSynchronizeBeforeExecution = DebugManager.flags.SynchronizeWalkerInWparidMode.get();
synchronizeBeforeExecution = static_cast<bool>(DebugManager.flags.SynchronizeWalkerInWparidMode.get()); if (overrideSynchronizeBeforeExecution != -1) {
synchronizeBeforeExecution = !!overrideSynchronizeBeforeExecution;
} }
return synchronizeBeforeExecution; return synchronizeBeforeExecution;
} }
bool ImplicitScalingHelper::isSemaphoreProgrammingRequired() { bool ImplicitScalingHelper::isSemaphoreProgrammingRequired() {
auto semaphoreProgrammingRequired = ImplicitScaling::semaphoreProgrammingRequired; auto semaphoreProgrammingRequired = ImplicitScaling::semaphoreProgrammingRequired;
if (NEO::DebugManager.flags.SynchronizeWithSemaphores.get() == 1) { int overrideSemaphoreProgrammingRequired = NEO::DebugManager.flags.SynchronizeWithSemaphores.get();
semaphoreProgrammingRequired = true; if (overrideSemaphoreProgrammingRequired != -1) {
semaphoreProgrammingRequired = !!overrideSemaphoreProgrammingRequired;
} }
return semaphoreProgrammingRequired; return semaphoreProgrammingRequired;
} }
bool ImplicitScalingHelper::isCrossTileAtomicRequired() { bool ImplicitScalingHelper::isCrossTileAtomicRequired() {
auto crossTileAtomicSynchronization = ImplicitScaling::crossTileAtomicSynchronization; auto crossTileAtomicSynchronization = ImplicitScaling::crossTileAtomicSynchronization;
if (NEO::DebugManager.flags.UseCrossAtomicSynchronization.get() == 0) { int overrideCrossTileAtomicSynchronization = NEO::DebugManager.flags.UseCrossAtomicSynchronization.get();
crossTileAtomicSynchronization = false; if (overrideCrossTileAtomicSynchronization != -1) {
crossTileAtomicSynchronization = !!overrideCrossTileAtomicSynchronization;
} }
return crossTileAtomicSynchronization; return crossTileAtomicSynchronization;
} }
bool ImplicitScalingHelper::useAtomicsForNativeCleanup() { bool ImplicitScalingHelper::isAtomicsUsedForSelfCleanup() {
bool useAtomics = false; bool useAtomics = false;
int overrideUseAtomics = DebugManager.flags.UseAtomicsForNativeSectionCleanup.get(); int overrideUseAtomics = DebugManager.flags.UseAtomicsForSelfCleanupSection.get();
if (overrideUseAtomics != -1) { if (overrideUseAtomics != -1) {
useAtomics = !!(overrideUseAtomics); useAtomics = !!(overrideUseAtomics);
} }
return useAtomics; return useAtomics;
} }
bool ImplicitScalingHelper::programNativeCleanup(bool defaultNativeCleanup) { bool ImplicitScalingHelper::isSelfCleanupRequired(bool defaultSelfCleanup) {
int overrideProgramNativeCleanup = DebugManager.flags.ProgramNativeCleanup.get(); int overrideProgramSelfCleanup = DebugManager.flags.ProgramWalkerPartitionSelfCleanup.get();
if (overrideProgramNativeCleanup != -1) { if (overrideProgramSelfCleanup != -1) {
defaultNativeCleanup = !!(overrideProgramNativeCleanup); defaultSelfCleanup = !!(overrideProgramSelfCleanup);
} }
return defaultNativeCleanup; return defaultSelfCleanup;
} }
bool ImplicitScalingHelper::initWparidRegister() { bool ImplicitScalingHelper::isWparidRegisterInitializationRequired() {
bool initWparidRegister = true; bool initWparidRegister = true;
int overrideInitWparidRegister = DebugManager.flags.WparidRegisterProgramming.get(); int overrideInitWparidRegister = DebugManager.flags.WparidRegisterProgramming.get();
if (overrideInitWparidRegister != -1) { if (overrideInitWparidRegister != -1) {
@@ -76,13 +79,13 @@ bool ImplicitScalingHelper::initWparidRegister() {
return initWparidRegister; return initWparidRegister;
} }
bool ImplicitScalingHelper::usePipeControl() { bool ImplicitScalingHelper::isPipeControlStallRequired() {
bool usePipeControl = true; bool emitPipeControl = true;
int overrideUsePipeControl = DebugManager.flags.UsePipeControlAfterPartitionedWalker.get(); int overrideUsePipeControl = DebugManager.flags.UsePipeControlAfterPartitionedWalker.get();
if (overrideUsePipeControl != -1) { if (overrideUsePipeControl != -1) {
usePipeControl = !!(overrideUsePipeControl); emitPipeControl = !!(overrideUsePipeControl);
} }
return usePipeControl; return emitPipeControl;
} }
} // namespace NEO } // namespace NEO

View File

@@ -27,17 +27,17 @@ struct ImplicitScalingHelper {
static bool isSemaphoreProgrammingRequired(); static bool isSemaphoreProgrammingRequired();
static bool isCrossTileAtomicRequired(); static bool isCrossTileAtomicRequired();
static bool isSynchronizeBeforeExecutionRequired(); static bool isSynchronizeBeforeExecutionRequired();
static bool useAtomicsForNativeCleanup(); static bool isAtomicsUsedForSelfCleanup();
static bool programNativeCleanup(bool defaultNativeCleanup); static bool isSelfCleanupRequired(bool defaultSelfCleanup);
static bool initWparidRegister(); static bool isWparidRegisterInitializationRequired();
static bool usePipeControl(); static bool isPipeControlStallRequired();
}; };
template <typename GfxFamily> template <typename GfxFamily>
struct ImplicitScalingDispatch { struct ImplicitScalingDispatch {
using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; using WALKER_TYPE = typename GfxFamily::WALKER_TYPE;
static size_t getSize(bool nativeCrossTileAtomicSync, static size_t getSize(bool emitSelfCleanup,
bool preferStaticPartitioning, bool preferStaticPartitioning,
const DeviceBitfield &devices, const DeviceBitfield &devices,
const Vec3<size_t> &groupStart, const Vec3<size_t> &groupStart,
@@ -47,7 +47,7 @@ struct ImplicitScalingDispatch {
const DeviceBitfield &devices, const DeviceBitfield &devices,
uint32_t &partitionCount, uint32_t &partitionCount,
bool useSecondaryBatchBuffer, bool useSecondaryBatchBuffer,
bool nativeCrossTileAtomicSync, bool emitSelfCleanup,
bool usesImages, bool usesImages,
uint64_t workPartitionAllocationGpuVa); uint64_t workPartitionAllocationGpuVa);
}; };

View File

@@ -12,7 +12,7 @@
namespace NEO { namespace NEO {
template <typename GfxFamily> template <typename GfxFamily>
size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool nativeCrossTileAtomicSync, size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool emitSelfCleanup,
bool preferStaticPartitioning, bool preferStaticPartitioning,
const DeviceBitfield &devices, const DeviceBitfield &devices,
const Vec3<size_t> &groupStart, const Vec3<size_t> &groupStart,
@@ -34,12 +34,12 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool nativeCrossTileAtomicSyn
args.partitionCount = partitionCount; args.partitionCount = partitionCount;
args.tileCount = tileCount; args.tileCount = tileCount;
args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired();
args.useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup();
args.nativeCrossTileAtomicSync = ImplicitScalingHelper::programNativeCleanup(nativeCrossTileAtomicSync); args.emitSelfCleanup = ImplicitScalingHelper::isSelfCleanupRequired(emitSelfCleanup);
args.initializeWparidRegister = ImplicitScalingHelper::initWparidRegister(); args.initializeWparidRegister = ImplicitScalingHelper::isWparidRegisterInitializationRequired();
args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(); args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired();
args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired(); args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired();
args.usePipeControlStall = ImplicitScalingHelper::usePipeControl(); args.emitPipeControlStall = ImplicitScalingHelper::isPipeControlStallRequired();
args.emitBatchBufferEnd = false; args.emitBatchBufferEnd = false;
args.staticPartitioning = staticPartitioning; args.staticPartitioning = staticPartitioning;
@@ -52,7 +52,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
const DeviceBitfield &devices, const DeviceBitfield &devices,
uint32_t &partitionCount, uint32_t &partitionCount,
bool useSecondaryBatchBuffer, bool useSecondaryBatchBuffer,
bool nativeCrossTileAtomicSync, bool emitSelfCleanup,
bool usesImages, bool usesImages,
uint64_t workPartitionAllocationGpuVa) { uint64_t workPartitionAllocationGpuVa) {
uint32_t totalProgrammedSize = 0u; uint32_t totalProgrammedSize = 0u;
@@ -67,12 +67,12 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
args.partitionCount = partitionCount; args.partitionCount = partitionCount;
args.tileCount = tileCount; args.tileCount = tileCount;
args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired();
args.useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup();
args.nativeCrossTileAtomicSync = ImplicitScalingHelper::programNativeCleanup(nativeCrossTileAtomicSync); args.emitSelfCleanup = ImplicitScalingHelper::isSelfCleanupRequired(emitSelfCleanup);
args.initializeWparidRegister = ImplicitScalingHelper::initWparidRegister(); args.initializeWparidRegister = ImplicitScalingHelper::isWparidRegisterInitializationRequired();
args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(); args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired();
args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired(); args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired();
args.usePipeControlStall = ImplicitScalingHelper::usePipeControl(); args.emitPipeControlStall = ImplicitScalingHelper::isPipeControlStallRequired();
args.emitBatchBufferEnd = false; args.emitBatchBufferEnd = false;
args.secondaryBatchBuffer = useSecondaryBatchBuffer; args.secondaryBatchBuffer = useSecondaryBatchBuffer;
args.staticPartitioning = staticPartitioning; args.staticPartitioning = staticPartitioning;

View File

@@ -28,10 +28,10 @@ struct WalkerPartitionArgs {
bool crossTileAtomicSynchronization = false; bool crossTileAtomicSynchronization = false;
bool semaphoreProgrammingRequired = false; bool semaphoreProgrammingRequired = false;
bool staticPartitioning = false; bool staticPartitioning = false;
bool nativeCrossTileAtomicSync = false; bool emitSelfCleanup = false;
bool useAtomicsForNativeCleanup = false; bool useAtomicsForSelfCleanup = false;
bool initializeWparidRegister = false; bool initializeWparidRegister = false;
bool usePipeControlStall = false; bool emitPipeControlStall = false;
}; };
template <typename GfxFamily> template <typename GfxFamily>
@@ -350,8 +350,8 @@ void programStoreMemImmediateDword(void *&inputAddress, uint32_t &totalBytesProg
} }
template <typename GfxFamily> template <typename GfxFamily>
uint64_t computeNativeCrossTileSyncControlSectionSize(bool useAtomicsForNativeCleanup) { uint64_t computeSelfCleanupSectionSize(bool useAtomicsForSelfCleanup) {
if (useAtomicsForNativeCleanup) { if (useAtomicsForSelfCleanup) {
return sizeof(MI_ATOMIC<GfxFamily>); return sizeof(MI_ATOMIC<GfxFamily>);
} else { } else {
return sizeof(MI_STORE_DATA_IMM<GfxFamily>); return sizeof(MI_STORE_DATA_IMM<GfxFamily>);
@@ -359,11 +359,11 @@ uint64_t computeNativeCrossTileSyncControlSectionSize(bool useAtomicsForNativeCl
} }
template <typename GfxFamily> template <typename GfxFamily>
void programNativeCrossTileSyncControl(void *&inputAddress, void programSelfCleanupSection(void *&inputAddress,
uint32_t &totalBytesProgrammed, uint32_t &totalBytesProgrammed,
uint64_t address, uint64_t address,
bool useAtomicsForNativeCleanup) { bool useAtomicsForSelfCleanup) {
if (useAtomicsForNativeCleanup) { if (useAtomicsForSelfCleanup) {
programMiAtomic<GfxFamily>(inputAddress, programMiAtomic<GfxFamily>(inputAddress,
totalBytesProgrammed, totalBytesProgrammed,
address, address,
@@ -393,28 +393,28 @@ void programTilesSynchronizationWithAtomics(void *&currentBatchBufferPointer,
} }
template <typename GfxFamily> template <typename GfxFamily>
uint64_t computeNativeCrossTileSyncCleanupSectionSize(size_t fieldsForCleanupCount, bool useAtomicsForNativeCleanup) { uint64_t computeSelfCleanupEndSectionSize(size_t fieldsForCleanupCount, bool useAtomicsForSelfCleanup) {
return fieldsForCleanupCount * computeNativeCrossTileSyncControlSectionSize<GfxFamily>(useAtomicsForNativeCleanup) + return fieldsForCleanupCount * computeSelfCleanupSectionSize<GfxFamily>(useAtomicsForSelfCleanup) +
2 * computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>(); 2 * computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>();
} }
template <typename GfxFamily> template <typename GfxFamily>
void programNativeCrossTileSyncCleanup(void *&inputAddress, void programSelfCleanupEndSection(void *&inputAddress,
uint32_t &totalBytesProgrammed, uint32_t &totalBytesProgrammed,
uint64_t finalSyncTileCountAddress, uint64_t finalSyncTileCountAddress,
uint64_t baseAddressForCleanup, uint64_t baseAddressForCleanup,
size_t fieldsForCleanupCount, size_t fieldsForCleanupCount,
uint32_t tileCount, uint32_t tileCount,
bool useAtomicsForNativeCleanup) { bool useAtomicsForSelfCleanup) {
// Synchronize tiles, so the fields are not cleared while still in use // Synchronize tiles, so the fields are not cleared while still in use
programTilesSynchronizationWithAtomics<GfxFamily>(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount); programTilesSynchronizationWithAtomics<GfxFamily>(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount);
for (auto fieldIndex = 0u; fieldIndex < fieldsForCleanupCount; fieldIndex++) { for (auto fieldIndex = 0u; fieldIndex < fieldsForCleanupCount; fieldIndex++) {
const uint64_t addressForCleanup = baseAddressForCleanup + fieldIndex * sizeof(uint32_t); const uint64_t addressForCleanup = baseAddressForCleanup + fieldIndex * sizeof(uint32_t);
programNativeCrossTileSyncControl<GfxFamily>(inputAddress, programSelfCleanupSection<GfxFamily>(inputAddress,
totalBytesProgrammed, totalBytesProgrammed,
addressForCleanup, addressForCleanup,
useAtomicsForNativeCleanup); useAtomicsForSelfCleanup);
} }
//this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables //this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables
@@ -450,12 +450,12 @@ uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) {
sizeof(BATCH_BUFFER_START<GfxFamily>) * 2; sizeof(BATCH_BUFFER_START<GfxFamily>) * 2;
size += (args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT<GfxFamily>) * args.partitionCount : 0u); size += (args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT<GfxFamily>) * args.partitionCount : 0u);
size += computeWalkerSectionSize<GfxFamily>(); size += computeWalkerSectionSize<GfxFamily>();
size += args.usePipeControlStall ? sizeof(PIPE_CONTROL<GfxFamily>) : 0u; size += args.emitPipeControlStall ? sizeof(PIPE_CONTROL<GfxFamily>) : 0u;
if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
size += computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>(); size += computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>();
} }
if (args.nativeCrossTileAtomicSync) { if (args.emitSelfCleanup) {
size += computeNativeCrossTileSyncControlSectionSize<GfxFamily>(args.useAtomicsForNativeCleanup); size += computeSelfCleanupSectionSize<GfxFamily>(args.useAtomicsForSelfCleanup);
} }
return size; return size;
} }
@@ -566,12 +566,12 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
//disable predication to not noop subsequent commands. //disable predication to not noop subsequent commands.
programWparidPredication<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, false); programWparidPredication<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, false);
if (args.nativeCrossTileAtomicSync) { if (args.emitSelfCleanup) {
const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount);
programNativeCrossTileSyncControl<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForNativeCleanup); programSelfCleanupSection<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup);
} }
if (args.usePipeControlStall) { if (args.emitPipeControlStall) {
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true); programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true);
} }
@@ -582,7 +582,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
} }
} }
if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, tileCount); auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, tileCount);
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount);
} }
@@ -608,15 +608,15 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
totalBytesProgrammed += sizeof(BatchBufferControlData); totalBytesProgrammed += sizeof(BatchBufferControlData);
currentBatchBufferPointer = ptrOffset(currentBatchBufferPointer, sizeof(BatchBufferControlData)); currentBatchBufferPointer = ptrOffset(currentBatchBufferPointer, sizeof(BatchBufferControlData));
if (args.nativeCrossTileAtomicSync) { if (args.emitSelfCleanup) {
const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount);
programNativeCrossTileSyncCleanup<GfxFamily>(currentBatchBufferPointer, programSelfCleanupEndSection<GfxFamily>(currentBatchBufferPointer,
totalBytesProgrammed, totalBytesProgrammed,
finalSyncTileCountAddress, finalSyncTileCountAddress,
gpuAddressOfAllocation + controlSectionOffset, gpuAddressOfAllocation + controlSectionOffset,
dynamicPartitioningFieldsForCleanupCount, dynamicPartitioningFieldsForCleanupCount,
args.tileCount, args.tileCount,
args.useAtomicsForNativeCleanup); args.useAtomicsForSelfCleanup);
} }
if (args.emitBatchBufferEnd) { if (args.emitBatchBufferEnd) {
@@ -637,26 +637,26 @@ uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args
const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution
? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>() ? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>()
: 0u; : 0u;
const auto afterExecutionSyncAtomicSize = (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) const auto afterExecutionSyncAtomicSize = (args.crossTileAtomicSynchronization || args.emitSelfCleanup)
? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>() ? computeTilesSynchronizationWithAtomicsSectionSize<GfxFamily>()
: 0u; : 0u;
const auto afterExecutionSyncPostSyncSize = args.semaphoreProgrammingRequired const auto afterExecutionSyncPostSyncSize = args.semaphoreProgrammingRequired
? sizeof(MI_SEMAPHORE_WAIT<GfxFamily>) * args.partitionCount ? sizeof(MI_SEMAPHORE_WAIT<GfxFamily>) * args.partitionCount
: 0u; : 0u;
const auto nativeCrossTileSyncSize = args.nativeCrossTileAtomicSync const auto selfCleanupSectionSize = args.emitSelfCleanup
? computeNativeCrossTileSyncControlSectionSize<GfxFamily>(args.useAtomicsForNativeCleanup) ? computeSelfCleanupSectionSize<GfxFamily>(args.useAtomicsForSelfCleanup)
: 0u; : 0u;
const auto wparidRegisterSize = args.initializeWparidRegister const auto wparidRegisterSize = args.initializeWparidRegister
? sizeof(LOAD_REGISTER_MEM<GfxFamily>) ? sizeof(LOAD_REGISTER_MEM<GfxFamily>)
: 0u; : 0u;
const auto pipeControlSize = args.usePipeControlStall const auto pipeControlSize = args.emitPipeControlStall
? sizeof(PIPE_CONTROL<GfxFamily>) ? sizeof(PIPE_CONTROL<GfxFamily>)
: 0u; : 0u;
return beforeExecutionSyncAtomicSize + return beforeExecutionSyncAtomicSize +
wparidRegisterSize + wparidRegisterSize +
pipeControlSize + pipeControlSize +
sizeof(COMPUTE_WALKER<GfxFamily>) + sizeof(COMPUTE_WALKER<GfxFamily>) +
nativeCrossTileSyncSize + selfCleanupSectionSize +
afterExecutionSyncAtomicSize + afterExecutionSyncAtomicSize +
afterExecutionSyncPostSyncSize + afterExecutionSyncPostSyncSize +
sizeof(BATCH_BUFFER_START<GfxFamily>); sizeof(BATCH_BUFFER_START<GfxFamily>);
@@ -688,12 +688,12 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
// Prepare for cleanup section // Prepare for cleanup section
if (args.nativeCrossTileAtomicSync) { if (args.emitSelfCleanup) {
const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
programNativeCrossTileSyncControl<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForNativeCleanup); programSelfCleanupSection<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup);
} }
if (args.usePipeControlStall) { if (args.emitPipeControlStall) {
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache
} }
@@ -702,7 +702,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
programTilesSynchronizationWithPostSyncs<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); programTilesSynchronizationWithPostSyncs<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
} }
if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount); programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount);
} }
@@ -719,15 +719,15 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset); DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset);
// Cleanup section // Cleanup section
if (args.nativeCrossTileAtomicSync) { if (args.emitSelfCleanup) {
const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
programNativeCrossTileSyncCleanup<GfxFamily>(currentBatchBufferPointer, programSelfCleanupEndSection<GfxFamily>(currentBatchBufferPointer,
totalBytesProgrammed, totalBytesProgrammed,
finalSyncTileCountAddress, finalSyncTileCountAddress,
gpuAddressOfAllocation + controlSectionOffset, gpuAddressOfAllocation + controlSectionOffset,
staticPartitioningFieldsForCleanupCount, staticPartitioningFieldsForCleanupCount,
args.tileCount, args.tileCount,
args.useAtomicsForNativeCleanup); args.useAtomicsForSelfCleanup);
} }
} }
@@ -738,12 +738,12 @@ uint64_t estimateSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args) {
if (args.staticPartitioning) { if (args.staticPartitioning) {
size += computeStaticPartitioningControlSectionOffset<GfxFamily>(args); size += computeStaticPartitioningControlSectionOffset<GfxFamily>(args);
size += sizeof(StaticPartitioningControlSection); size += sizeof(StaticPartitioningControlSection);
size += args.nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize<GfxFamily>(staticPartitioningFieldsForCleanupCount, args.useAtomicsForNativeCleanup) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(staticPartitioningFieldsForCleanupCount, args.useAtomicsForSelfCleanup) : 0u;
} else { } else {
size += computeControlSectionOffset<GfxFamily>(args); size += computeControlSectionOffset<GfxFamily>(args);
size += sizeof(BatchBufferControlData); size += sizeof(BatchBufferControlData);
size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END<GfxFamily>) : 0u; size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END<GfxFamily>) : 0u;
size += args.nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize<GfxFamily>(dynamicPartitioningFieldsForCleanupCount, args.useAtomicsForNativeCleanup) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize<GfxFamily>(dynamicPartitioningFieldsForCleanupCount, args.useAtomicsForSelfCleanup) : 0u;
} }
return size; return size;
} }

View File

@@ -239,8 +239,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disa
DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWalkerInWparidMode, -1, "-1: default, 0: do not synchronize 1: synchronize all tiles prior to doing work distrubution") DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWalkerInWparidMode, -1, "-1: default, 0: do not synchronize 1: synchronize all tiles prior to doing work distrubution")
DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWithSemaphores, -1, "-1: default (disabled), 1: Emit Semaphores waiting after Walker completion in WPARID mode 0: do not emit semaphores after Walker") DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWithSemaphores, -1, "-1: default (disabled), 1: Emit Semaphores waiting after Walker completion in WPARID mode 0: do not emit semaphores after Walker")
DECLARE_DEBUG_VARIABLE(int32_t, UseCrossAtomicSynchronization, -1, "-1: default (enabled), 1: Cross Tile Atomic Synchronization present 0: Cross tile atomic synchronization disabled") DECLARE_DEBUG_VARIABLE(int32_t, UseCrossAtomicSynchronization, -1, "-1: default (enabled), 1: Cross Tile Atomic Synchronization present 0: Cross tile atomic synchronization disabled")
DECLARE_DEBUG_VARIABLE(int32_t, UseAtomicsForNativeSectionCleanup, -1, "-1: default (disabled), 0: use store data op, 1: use atomic op") DECLARE_DEBUG_VARIABLE(int32_t, UseAtomicsForSelfCleanupSection, -1, "-1: default (disabled), 0: use store data op, 1: use atomic op")
DECLARE_DEBUG_VARIABLE(int32_t, ProgramNativeCleanup, -1, "-1: default (API dependent), 0: Do not program native cleanup, 1: program native cleanup") DECLARE_DEBUG_VARIABLE(int32_t, ProgramWalkerPartitionSelfCleanup, -1, "-1: default (API dependent), 0: Do not program self cleanup, 1: program self cleanup")
DECLARE_DEBUG_VARIABLE(int32_t, WparidRegisterProgramming, -1, "-1: default (enabled), 0: do not program wparid register, 1: programing wparid register") DECLARE_DEBUG_VARIABLE(int32_t, WparidRegisterProgramming, -1, "-1: default (enabled), 0: do not program wparid register, 1: programing wparid register")
DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: default (enabled), 0: do not add PipeControl, 1: add PipeControl") DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: default (enabled), 0: do not add PipeControl, 1: add PipeControl")

View File

@@ -975,7 +975,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EXPECT_EQ(expectedPartitionSize, partitionWalkerCmd->getPartitionSize()); EXPECT_EQ(expectedPartitionSize, partitionWalkerCmd->getPartitionSize());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImplicitScalingWhenEncodingDispatchKernelThenExpectNativeCrossTileCleanupSection) { HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImplicitScalingWhenEncodingDispatchKernelThenExpectSelfCleanupSection) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
@@ -1028,9 +1028,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
WalkerPartition::WalkerPartitionArgs args = {}; WalkerPartition::WalkerPartitionArgs args = {};
args.initializeWparidRegister = true; args.initializeWparidRegister = true;
args.usePipeControlStall = true; args.emitPipeControlStall = true;
args.partitionCount = partitionCount; args.partitionCount = partitionCount;
args.nativeCrossTileAtomicSync = true; args.emitSelfCleanup = true;
auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType>(args); auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType>(args);
uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() +

View File

@@ -46,64 +46,64 @@ TEST_F(ImplicitScalingTests, givenMultiTileApiEnabledWhenOsSupportOffAndForcedOn
EXPECT_FALSE(ImplicitScalingHelper::isImplicitScalingEnabled(twoTile, true)); EXPECT_FALSE(ImplicitScalingHelper::isImplicitScalingEnabled(twoTile, true));
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingAtomicsForNativeCleanupThenExpectFalse) { TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingAtomicsForSelfCleanupThenExpectFalse) {
EXPECT_FALSE(ImplicitScalingHelper::useAtomicsForNativeCleanup()); EXPECT_FALSE(ImplicitScalingHelper::isAtomicsUsedForSelfCleanup());
} }
TEST_F(ImplicitScalingTests, givenForceNotUseAtomicsWhenCheckingAtomicsForNativeCleanupThenExpectFalse) { TEST_F(ImplicitScalingTests, givenForceNotUseAtomicsWhenCheckingAtomicsForSelfCleanupThenExpectFalse) {
DebugManager.flags.UseAtomicsForNativeSectionCleanup.set(0); DebugManager.flags.UseAtomicsForSelfCleanupSection.set(0);
EXPECT_FALSE(ImplicitScalingHelper::useAtomicsForNativeCleanup()); EXPECT_FALSE(ImplicitScalingHelper::isAtomicsUsedForSelfCleanup());
} }
TEST_F(ImplicitScalingTests, givenForceUseAtomicsWhenCheckingAtomicsForNativeCleanupThenExpectTrue) { TEST_F(ImplicitScalingTests, givenForceUseAtomicsWhenCheckingAtomicsForSelfCleanupThenExpectTrue) {
DebugManager.flags.UseAtomicsForNativeSectionCleanup.set(1); DebugManager.flags.UseAtomicsForSelfCleanupSection.set(1);
EXPECT_TRUE(ImplicitScalingHelper::useAtomicsForNativeCleanup()); EXPECT_TRUE(ImplicitScalingHelper::isAtomicsUsedForSelfCleanup());
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsIsFalseWhenCheckingProgramNativeCleanupThenExpectFalse) { TEST_F(ImplicitScalingTests, givenDefaultSettingsIsFalseWhenCheckingProgramSelfCleanupThenExpectFalse) {
EXPECT_FALSE(ImplicitScalingHelper::programNativeCleanup(false)); EXPECT_FALSE(ImplicitScalingHelper::isSelfCleanupRequired(false));
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsIsTrueWhenCheckingProgramNativeCleanupThenExpectTrue) { TEST_F(ImplicitScalingTests, givenDefaultSettingsIsTrueWhenCheckingProgramSelfCleanupThenExpectTrue) {
EXPECT_TRUE(ImplicitScalingHelper::programNativeCleanup(true)); EXPECT_TRUE(ImplicitScalingHelper::isSelfCleanupRequired(true));
} }
TEST_F(ImplicitScalingTests, givenForceNotProgramNativeCleanupWhenDefaultNativeCleanupIsTrueThenExpectFalse) { TEST_F(ImplicitScalingTests, givenForceNotProgramSelfCleanupWhenDefaultSelfCleanupIsTrueThenExpectFalse) {
DebugManager.flags.ProgramNativeCleanup.set(0); DebugManager.flags.ProgramWalkerPartitionSelfCleanup.set(0);
EXPECT_FALSE(ImplicitScalingHelper::programNativeCleanup(true)); EXPECT_FALSE(ImplicitScalingHelper::isSelfCleanupRequired(true));
} }
TEST_F(ImplicitScalingTests, givenForceProgramNativeCleanupWhenDefaultNativeCleanupIsFalseThenExpectTrue) { TEST_F(ImplicitScalingTests, givenForceProgramSelfCleanupWhenDefaultSelfCleanupIsFalseThenExpectTrue) {
DebugManager.flags.ProgramNativeCleanup.set(1); DebugManager.flags.ProgramWalkerPartitionSelfCleanup.set(1);
EXPECT_TRUE(ImplicitScalingHelper::programNativeCleanup(false)); EXPECT_TRUE(ImplicitScalingHelper::isSelfCleanupRequired(false));
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToProgramWparidRegisterThenExpectTrue) { TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToProgramWparidRegisterThenExpectTrue) {
EXPECT_TRUE(ImplicitScalingHelper::initWparidRegister()); EXPECT_TRUE(ImplicitScalingHelper::isWparidRegisterInitializationRequired());
} }
TEST_F(ImplicitScalingTests, givenForceNotProgramWparidRegisterWhenCheckingRegisterProgramThenExpectFalse) { TEST_F(ImplicitScalingTests, givenForceNotProgramWparidRegisterWhenCheckingRegisterProgramThenExpectFalse) {
DebugManager.flags.WparidRegisterProgramming.set(0); DebugManager.flags.WparidRegisterProgramming.set(0);
EXPECT_FALSE(ImplicitScalingHelper::initWparidRegister()); EXPECT_FALSE(ImplicitScalingHelper::isWparidRegisterInitializationRequired());
} }
TEST_F(ImplicitScalingTests, givenForceProgramWparidRegisterWhenCheckingRegisterProgramThenExpectTrue) { TEST_F(ImplicitScalingTests, givenForceProgramWparidRegisterWhenCheckingRegisterProgramThenExpectTrue) {
DebugManager.flags.WparidRegisterProgramming.set(1); DebugManager.flags.WparidRegisterProgramming.set(1);
EXPECT_TRUE(ImplicitScalingHelper::initWparidRegister()); EXPECT_TRUE(ImplicitScalingHelper::isWparidRegisterInitializationRequired());
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToUsePipeControlThenExpectTrue) { TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToUsePipeControlThenExpectTrue) {
EXPECT_TRUE(ImplicitScalingHelper::usePipeControl()); EXPECT_TRUE(ImplicitScalingHelper::isPipeControlStallRequired());
} }
TEST_F(ImplicitScalingTests, givenForceNotUsePipeControlWhenCheckingPipeControlUseThenExpectFalse) { TEST_F(ImplicitScalingTests, givenForceNotUsePipeControlWhenCheckingPipeControlUseThenExpectFalse) {
DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(0); DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(0);
EXPECT_FALSE(ImplicitScalingHelper::usePipeControl()); EXPECT_FALSE(ImplicitScalingHelper::isPipeControlStallRequired());
} }
TEST_F(ImplicitScalingTests, givenForceUsePipeControlWhenCheckingPipeControlUseThenExpectTrue) { TEST_F(ImplicitScalingTests, givenForceUsePipeControlWhenCheckingPipeControlUseThenExpectTrue) {
DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1); DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1);
EXPECT_TRUE(ImplicitScalingHelper::usePipeControl()); EXPECT_TRUE(ImplicitScalingHelper::isPipeControlStallRequired());
} }
TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingSemaphoreUseThenExpectFalse) { TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingSemaphoreUseThenExpectFalse) {