From b65d8909e4a970eaae6e6dfbb5436dfbee09410a Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 13 Sep 2021 17:39:55 +0000 Subject: [PATCH] Refactor and modularize walker partition code Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz --- .../source/helpers/implicit_scaling_l0.cpp | 4 +- .../source/helpers/implicit_scaling_ocl.cpp | 4 +- .../unit_test/command_queue/CMakeLists.txt | 6 +- .../dispatch_walker_tests_xehp_and_later.cpp | 43 +- ...alker_partition_fixture_xehp_and_later.cpp | 25 + .../walker_partition_fixture_xehp_and_later.h | 38 + .../walker_partition_tests_xehp_and_later.cpp | 2486 ----------------- ...alker_partition_tests_xehp_and_later_1.cpp | 1278 +++++++++ ...alker_partition_tests_xehp_and_later_2.cpp | 1352 +++++++++ .../implicit_scaling_ocl_tests.cpp | 8 + .../test/unit_test/test_files/igdrcl.config | 9 +- .../command_container/implicit_scaling.cpp | 45 +- .../command_container/implicit_scaling.h | 12 +- .../implicit_scaling_xehp_and_later.inl | 58 +- .../walker_partition_xehp_and_later.h | 301 +- .../debug_settings/debug_variables_base.inl | 21 +- ..._encode_dispatch_kernel_xehp_and_later.cpp | 8 +- .../encoders/test_implicit_scaling.cpp | 78 +- .../test_implicit_scaling_xehp_and_later.cpp | 108 + 19 files changed, 3184 insertions(+), 2700 deletions(-) create mode 100644 opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.cpp create mode 100644 opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h delete mode 100644 opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later.cpp create mode 100644 opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_1.cpp create mode 100644 opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_2.cpp diff --git a/level_zero/core/source/helpers/implicit_scaling_l0.cpp b/level_zero/core/source/helpers/implicit_scaling_l0.cpp index f056110145..5bfb2e15b8 100644 --- a/level_zero/core/source/helpers/implicit_scaling_l0.cpp +++ b/level_zero/core/source/helpers/implicit_scaling_l0.cpp @@ -10,5 +10,7 @@ namespace NEO { namespace ImplicitScaling { bool apiSupport = false; -} +bool semaphoreProgrammingRequired = false; +bool crossTileAtomicSynchronization = true; +} // namespace ImplicitScaling } // namespace NEO diff --git a/opencl/source/helpers/implicit_scaling_ocl.cpp b/opencl/source/helpers/implicit_scaling_ocl.cpp index d6b23ababf..c2571e8302 100644 --- a/opencl/source/helpers/implicit_scaling_ocl.cpp +++ b/opencl/source/helpers/implicit_scaling_ocl.cpp @@ -10,5 +10,7 @@ namespace NEO { namespace ImplicitScaling { bool apiSupport = true; -} +bool semaphoreProgrammingRequired = false; +bool crossTileAtomicSynchronization = true; +} // namespace ImplicitScaling } // namespace NEO diff --git a/opencl/test/unit_test/command_queue/CMakeLists.txt b/opencl/test/unit_test/command_queue/CMakeLists.txt index 3422b4256b..5f0ef7d979 100644 --- a/opencl/test/unit_test/command_queue/CMakeLists.txt +++ b/opencl/test/unit_test/command_queue/CMakeLists.txt @@ -88,7 +88,6 @@ set(IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/read_write_buffer_cpu_copy.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sync_buffer_handler_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/work_group_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zero_size_enqueue_tests.cpp ) @@ -98,7 +97,10 @@ if(TESTS_XEHP_AND_LATER) ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests_xehp_and_later.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_fixture_xehp_and_later.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_fixture_xehp_and_later.h + ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later_1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later_2.cpp ) endif() diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index d769e78369..ce949a7b64 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -1072,8 +1072,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart MockClDevice *device = deviceFactory.rootDevices[0]; MockContext context{device}; - auto synchronizeBeforeExecution = false; - auto staticPartitioning = false; auto cmdQ = std::make_unique>(&context, device, nullptr); auto &csr = cmdQ->getUltCommandStreamReceiver(); @@ -1087,35 +1085,39 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); - synchronizeBeforeExecution = false; + WalkerPartition::WalkerPartitionArgs testArgs = {}; + testArgs.initializeWparidRegister = true; + testArgs.crossTileAtomicSynchronization = true; + testArgs.usePipeControlStall = true; + testArgs.partitionCount = 2u; + testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); + DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); - staticPartitioning = false; + testArgs.staticPartitioning = false; + testArgs.synchronizeBeforeExecution = false; csr.staticWorkPartitioningEnabled = false; - auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false); + auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); - synchronizeBeforeExecution = false; - DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); - staticPartitioning = true; + testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; - partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false); + partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); - synchronizeBeforeExecution = true; DebugManager.flags.SynchronizeWalkerInWparidMode.set(1); - staticPartitioning = false; + testArgs.synchronizeBeforeExecution = true; + testArgs.staticPartitioning = false; csr.staticWorkPartitioningEnabled = false; - partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false); + partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); - synchronizeBeforeExecution = true; - DebugManager.flags.SynchronizeWalkerInWparidMode.set(1); - staticPartitioning = true; + testArgs.synchronizeBeforeExecution = true; + testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; - partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false); + partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); } @@ -1167,7 +1169,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsMul HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); - auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, 16u, false, false, false, false); + WalkerPartition::WalkerPartitionArgs testArgs = {}; + testArgs.initializeWparidRegister = true; + testArgs.usePipeControlStall = true; + testArgs.crossTileAtomicSynchronization = true; + testArgs.partitionCount = 16u; + testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); + + auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); diff --git a/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.cpp new file mode 100644 index 0000000000..51383d8ec8 --- /dev/null +++ b/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h" + +void WalkerPartitionTests::SetUp() { + cmdBufferAddress = cmdBuffer; + + testArgs.synchronizeBeforeExecution = false; + testArgs.nativeCrossTileAtomicSync = false; + testArgs.initializeWparidRegister = true; + testArgs.usePipeControlStall = true; + testArgs.crossTileAtomicSynchronization = true; +} + +void WalkerPartitionTests::TearDown() { + auto initialCommandBufferPointer = cmdBuffer; + if (checkForProperCmdBufferAddressOffset) { + EXPECT_EQ(ptrDiff(cmdBufferAddress, initialCommandBufferPointer), totalBytesProgrammed); + } +} diff --git a/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h b/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h new file mode 100644 index 0000000000..fcac741f5a --- /dev/null +++ b/opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/walker_partition_xehp_and_later.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/unit_test_helper.h" + +#include "test.h" + +using namespace WalkerPartition; + +struct WalkerPartitionTests : public ::testing::Test { + void SetUp() override; + + void TearDown() override; + + template + auto createWalker(uint64_t postSyncAddress) { + WalkerPartition::COMPUTE_WALKER walker; + walker = GfxFamily::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + return walker; + } + + char cmdBuffer[4096u]; + WalkerPartition::WalkerPartitionArgs testArgs = {}; + void *cmdBufferAddress = nullptr; + uint32_t totalBytesProgrammed = 0u; + bool checkForProperCmdBufferAddressOffset = true; +}; diff --git a/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later.cpp deleted file mode 100644 index e91ca9fa99..0000000000 --- a/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later.cpp +++ /dev/null @@ -1,2486 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_container/walker_partition_xehp_and_later.h" -#include "shared/test/common/cmd_parse/gen_cmd_parse.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" -#include "shared/test/common/helpers/unit_test_helper.h" - -#include "test.h" - -using namespace WalkerPartition; - -struct WalkerPartitionTests : public ::testing::Test { - void SetUp() override { - cmdBufferAddress = cmdBuffer; - } - - void TearDown() override { - auto initialCommandBufferPointer = cmdBuffer; - if (checkForProperCmdBufferAddressOffset) { - EXPECT_EQ(ptrDiff(cmdBufferAddress, initialCommandBufferPointer), totalBytesProgrammed); - } - } - - template - auto createWalker(uint64_t postSyncAddress) { - WalkerPartition::COMPUTE_WALKER walker; - walker = GfxFamily::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - auto &postSync = walker.getPostSync(); - postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); - postSync.setDestinationAddress(postSyncAddress); - return walker; - } - - char cmdBuffer[4096u]; - uint32_t totalBytesProgrammed = 0u; - void *cmdBufferAddress = nullptr; - bool checkForProperCmdBufferAddressOffset = true; - bool synchronizeBeforeExecution = false; - bool nativeCrossTileSync = false; -}; - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - auto partitionCount = 16u; - checkForProperCmdBufferAddressOffset = false; - uint64_t gpuVirtualAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - auto &postSync = walker.getPostSync(); - postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); - postSync.setDestinationAddress(postSyncAddress); - uint32_t totalBytesProgrammed; - - auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::COMPUTE_WALKER); - - EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, false, false)); - - auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); - - auto totalProgrammedSize = optionalBatchBufferEndOffset + sizeof(WalkerPartition::BATCH_BUFFER_END); - - WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, - gpuVirtualAddress, - &walker, - totalBytesProgrammed, - partitionCount, - 4u, - true, - synchronizeBeforeExecution, - false, - false, - false); - - EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); - auto wparidMaskProgrammingLocation = cmdBufferAddress; - - auto expectedMask = 0xFFF0u; - auto expectedRegister = 0x21FCu; - - auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; - auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); - EXPECT_TRUE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterReg); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); - EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); - EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); - - auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_TRUE(batchBufferStart->getPredicationEnable()); - //address routes to WALKER section which is before control section - auto address = batchBufferStart->getBatchBufferStartAddress(); - EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); - EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); - - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); - auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 4u); - - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - //final batch buffer start that routes at the end of the batch buffer - auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, batchBufferStartFinal); - EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - - batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); - EXPECT_EQ(0u, controlSection->partitionCount); - EXPECT_EQ(0u, controlSection->tileCount); - parsedOffset += sizeof(BatchBufferControlData); - - auto batchBufferEnd = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, batchBufferEnd); - EXPECT_EQ(parsedOffset, optionalBatchBufferEndOffset); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - const auto tileCount = 4u; - const auto partitionCount = tileCount; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - false); - EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndPreWalkerSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - const auto tileCount = 4u; - const auto partitionCount = tileCount; - checkForProperCmdBufferAddressOffset = false; - synchronizeBeforeExecution = true; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - false); - EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(preWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndSynchronizationWithPostSyncsWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - DebugManagerStateRestore restore{}; - DebugManager.flags.ExperimentalSynchronizeWithSemaphores.set(1); - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - - const auto tileCount = 4u; - const auto partitionCount = tileCount; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, false); - EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu; - EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 16llu; - EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 32llu; - EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 48llu; - EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - const auto tileCount = 4u; - const auto partitionCount = tileCount; - nativeCrossTileSync = true; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false); - const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - false); - const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, - partitionCount, - synchronizeBeforeExecution, - nativeCrossTileSync, - true, - false); - EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(finalSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(preWalkerSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(postWalkerSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(2 * tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) { - DebugManagerStateRestore restore{}; - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - - const auto tileCount = 4u; - const auto partitionCount = tileCount; - nativeCrossTileSync = true; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false); - const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - false); - const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, - partitionCount, - synchronizeBeforeExecution, - nativeCrossTileSync, - true, - false); - EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(finalSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(preWalkerSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - EXPECT_EQ(postWalkerSyncAddress, storeDataImm->getAddress()); - EXPECT_FALSE(storeDataImm->getStoreQword()); - EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(2 * tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndAtomicsForNativeWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - const auto tileCount = 4u; - const auto partitionCount = tileCount; - bool useAtomicsForNative = true; - nativeCrossTileSync = true; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, useAtomicsForNative); - const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - useAtomicsForNative); - const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, - partitionCount, - synchronizeBeforeExecution, - nativeCrossTileSync, - true, - useAtomicsForNative); - EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(2 * tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenUsingAtomicForNativeAndConstructCommandBufferIsCalledThenStillProgramTheSync) { - DebugManagerStateRestore restore{}; - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - - const auto tileCount = 4u; - const auto partitionCount = tileCount; - nativeCrossTileSync = true; - bool useAtomicsForNative = true; - checkForProperCmdBufferAddressOffset = false; - uint64_t cmdBufferGpuAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - uint64_t workPartitionAllocationAddress = 0x8000444000; - auto walker = createWalker(postSyncAddress); - - uint32_t totalBytesProgrammed{}; - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, useAtomicsForNative); - const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); - WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, - cmdBufferGpuAddress, - &walker, - totalBytesProgrammed, - partitionCount, - tileCount, - synchronizeBeforeExecution, - false, - nativeCrossTileSync, - workPartitionAllocationAddress, - useAtomicsForNative); - const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, - partitionCount, - synchronizeBeforeExecution, - nativeCrossTileSync, - true, - useAtomicsForNative); - EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); - - auto parsedOffset = 0u; - { - auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterMem); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); - const auto expectedRegister = 0x221Cu; - EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); - EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); - EXPECT_EQ(workPartitionAllocationAddress, loadRegisterMem->getMemoryAddress()); - } - { - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); - EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); - } - { - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - parsedOffset += sizeof(StaticPartitioningControlSection); - StaticPartitioningControlSection expectedControlSection = {}; - EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getCsStall()); - } - { - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); - EXPECT_EQ(2 * tileCount, miSemaphoreWait->getSemaphoreDataDword()); - } - EXPECT_EQ(parsedOffset, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesFOrWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { - DebugManagerStateRestore restorer; - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - DebugManager.flags.ExperimentalSynchronizeWithSemaphores.set(1); - - auto partitionCount = 16u; - checkForProperCmdBufferAddressOffset = false; - uint64_t gpuVirtualAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - auto &postSync = walker.getPostSync(); - postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); - postSync.setDestinationAddress(postSyncAddress); - uint32_t totalBytesProgrammed; - - auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * partitionCount; - - auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::COMPUTE_WALKER); - - EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, false, false)); - - auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); - - auto totalProgrammedSize = optionalBatchBufferEndOffset + sizeof(WalkerPartition::BATCH_BUFFER_END); - - WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, - gpuVirtualAddress, &walker, totalBytesProgrammed, partitionCount, - 4u, true, synchronizeBeforeExecution, false, false, false); - - EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); - auto wparidMaskProgrammingLocation = cmdBufferAddress; - - auto expectedMask = 0xFFF0u; - auto expectedRegister = 0x21FCu; - - auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; - auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); - EXPECT_TRUE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterReg); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); - EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); - EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); - - auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_TRUE(batchBufferStart->getPredicationEnable()); - //address routes to WALKER section which is before control section - auto address = batchBufferStart->getBatchBufferStartAddress(); - EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); - EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); - - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - for (uint32_t partitionId = 0u; partitionId < partitionCount; partitionId++) { - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), postSyncAddress + 8llu + partitionId * 16llu); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 1u); - - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - } - - //final batch buffer start that routes at the end of the batch buffer - auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, batchBufferStartFinal); - EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - - batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); - EXPECT_EQ(0u, controlSection->partitionCount); - EXPECT_EQ(0u, controlSection->tileCount); - parsedOffset += sizeof(BatchBufferControlData); - - auto batchBufferEnd = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, batchBufferEnd); - EXPECT_EQ(parsedOffset, optionalBatchBufferEndOffset); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramRegisterCommandWhenItIsCalledThenLoadRegisterImmIsSetUnderPointer) { - uint32_t registerOffset = 120u; - uint32_t registerValue = 542u; - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - void *loadRegisterImmediateAddress = cmdBufferAddress; - WalkerPartition::programRegisterWithValue(cmdBufferAddress, registerOffset, totalBytesProgrammed, registerValue); - auto loadRegisterImmediate = genCmdCast *>(loadRegisterImmediateAddress); - - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - EXPECT_EQ(registerOffset, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(registerValue, loadRegisterImmediate->getDataDword()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledWithoutBatchBufferEndThenBatchBufferEndIsNotProgrammed) { - auto partitionCount = 16u; - checkForProperCmdBufferAddressOffset = false; - uint64_t gpuVirtualAddress = 0x8000123000; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - - WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, - gpuVirtualAddress, - &walker, - totalBytesProgrammed, - partitionCount, - 4u, false, - synchronizeBeforeExecution, - false, - false, - false); - auto totalProgrammedSize = computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, false, false) + - sizeof(BatchBufferControlData); - EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenItIsCalledThenProperSizeIsReturned) { - auto partitionCount = 16u; - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::BatchBufferControlData) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - auto requiresBatchBufferEnd = false; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); - - requiresBatchBufferEnd = true; - EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END), - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenPartitionCountIs4ThenSizeIsProperlyEstimated) { - auto partitionCount = 4u; - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::BatchBufferControlData) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - auto requiresBatchBufferEnd = false; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); - - requiresBatchBufferEnd = true; - EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END), - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { - auto partitionCount = 16u; - auto requiresBatchBufferEnd = false; - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::BatchBufferControlData) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - auto expectedDelta = sizeof(WalkerPartition::MI_ATOMIC) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - synchronizeBeforeExecution = false; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); - - synchronizeBeforeExecution = true; - EXPECT_EQ(expectedUsedSize + expectedDelta, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, false, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationWhenItIsCalledThenProperSizeIsReturned) { - const auto partitionCount = 16u; - const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::MI_ATOMIC) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::StaticPartitioningControlSection); - - auto requiresBatchBufferEnd = false; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, true, false)); - - requiresBatchBufferEnd = true; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, true, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { - const auto partitionCount = 16u; - const auto requiresBatchBufferEnd = false; - const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::MI_ATOMIC) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::StaticPartitioningControlSection); - - synchronizeBeforeExecution = false; - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, true, false)); - - synchronizeBeforeExecution = true; - const auto preExecutionSynchronizationSize = sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - EXPECT_EQ(expectedUsedSize + preExecutionSynchronizationSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, false, true, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenItIsCalledThenProperSizeIsReturned) { - auto partitionCount = 16u; - auto requiresBatchBufferEnd = false; - synchronizeBeforeExecution = false; - nativeCrossTileSync = true; - - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_STORE_DATA_IMM) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::BatchBufferControlData) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * 2 + - sizeof(WalkerPartition::MI_STORE_DATA_IMM) * 3; - - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false, false)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenAtomicsUsedForNativeThenProperSizeIsReturned) { - auto partitionCount = 16u; - auto requiresBatchBufferEnd = false; - synchronizeBeforeExecution = false; - nativeCrossTileSync = true; - - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_ATOMIC) + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::BatchBufferControlData) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * 2 + - sizeof(WalkerPartition::MI_ATOMIC) * 3; - - EXPECT_EQ(expectedUsedSize, - estimateSpaceRequiredInCommandBuffer(requiresBatchBufferEnd, partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false, true)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithWrongInputThenFalseIsReturnedAndNothingIsProgrammed) { - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 3)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 5)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 17)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 32)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 15)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 11)); - EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 9)); - EXPECT_EQ(0u, totalBytesProgrammed); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithPartitionCountThenProperMaskIsSet) { - auto wparidMaskProgrammingLocation = cmdBufferAddress; - EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 16)); - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - auto expectedMask = 0xFFF0u; - auto expectedRegister = 0x21FCu; - - auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - - EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 8)); - wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); - loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - expectedMask = 0xFFF8u; - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - - EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 4)); - wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); - loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - expectedMask = 0xFFFCu; - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - - EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 2)); - wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); - loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - expectedMask = 0xFFFEu; - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPredicationOnWhenItIsProgrammedThenCommandBufferContainsCorrectCommand) { - auto expectedUsedSize = sizeof(WalkerPartition::MI_SET_PREDICATE); - - void *miSetPredicateAddress = cmdBufferAddress; - programWparidPredication(cmdBufferAddress, totalBytesProgrammed, true); - auto miSetPredicate = genCmdCast *>(miSetPredicateAddress); - - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPredicationOffWhenItIsProgrammedThenCommandBufferContainsCorrectCommand) { - auto expectedUsedSize = sizeof(WalkerPartition::MI_SET_PREDICATE); - - void *miSetPredicateAddress = cmdBufferAddress; - programWparidPredication(cmdBufferAddress, totalBytesProgrammed, false); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - auto miSetPredicate = genCmdCast *>(miSetPredicateAddress); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); - EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWaitForSemaphoreWhenitisProgrammedThenAllFieldsAreSetCorrectly) { - auto expectedUsedSize = sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - uint64_t gpuAddress = 0x6432100llu; - uint32_t compareData = 1u; - - void *semaphoreWaitAddress = cmdBufferAddress; - programWaitForSemaphore(cmdBufferAddress, - totalBytesProgrammed, - gpuAddress, - compareData, - MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); - auto semaphoreWait = genCmdCast *>(semaphoreWaitAddress); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - ASSERT_NE(nullptr, semaphoreWait); - EXPECT_EQ(compareData, semaphoreWait->getSemaphoreDataDword()); - EXPECT_EQ(gpuAddress, semaphoreWait->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreWait->getCompareOperation()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreWait->getWaitMode()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::MEMORY_TYPE::MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS, semaphoreWait->getMemoryType()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, semaphoreWait->getRegisterPollMode()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenMiAtomicWhenItIsProgrammedThenAllFieldsAreSetCorrectly) { - auto expectedUsedSize = sizeof(WalkerPartition::MI_ATOMIC); - uint64_t gpuAddress = 0xFFFFFFDFEEDBAC10llu; - - void *miAtomicAddress = cmdBufferAddress; - programMiAtomic(cmdBufferAddress, - totalBytesProgrammed, gpuAddress, true, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - - auto miAtomic = genCmdCast *>(miAtomicAddress); - ASSERT_NE(nullptr, miAtomic); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - EXPECT_EQ(0u, miAtomic->getDataSize()); - EXPECT_TRUE(miAtomic->getCsStall()); - EXPECT_EQ(MI_ATOMIC::MEMORY_TYPE::MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS, miAtomic->getMemoryType()); - EXPECT_TRUE(miAtomic->getReturnDataControl()); - EXPECT_FALSE(miAtomic->getWorkloadPartitionIdOffsetEnable()); - auto memoryAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - - //bits 48-63 are zeroed - EXPECT_EQ((gpuAddress & 0xFFFFFFFFFFFF), memoryAddress); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenMiLoadRegisterRegWhenItIsProgrammedThenCommandIsProperlySet) { - auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_REG); - void *loadRegisterRegAddress = cmdBufferAddress; - WalkerPartition::programMiLoadRegisterReg(cmdBufferAddress, totalBytesProgrammed, generalPurposeRegister1, wparidCCSOffset); - auto loadRegisterReg = genCmdCast *>(loadRegisterRegAddress); - ASSERT_NE(nullptr, loadRegisterReg); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); - EXPECT_EQ(generalPurposeRegister1, loadRegisterReg->getSourceRegisterAddress()); - EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedThenItIsProperlySet) { - auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); - void *pipeControlCAddress = cmdBufferAddress; - WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, true); - auto pipeControl = genCmdCast *>(pipeControlCAddress); - ASSERT_NE(nullptr, pipeControl); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedWithDcFlushFalseThenExpectDcFlushFlagFalse) { - auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); - void *pipeControlCAddress = cmdBufferAddress; - WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, false); - auto pipeControl = genCmdCast *>(pipeControlCAddress); - ASSERT_NE(nullptr, pipeControl); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_FALSE(pipeControl->getDcFlushEnable()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedWithDebugDoNotFlushThenItIsProperlySetWithoutDcFlush) { - DebugManagerStateRestore restore; - DebugManager.flags.DoNotFlushCaches.set(true); - auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); - void *pipeControlCAddress = cmdBufferAddress; - WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, true); - auto pipeControl = genCmdCast *>(pipeControlCAddress); - ASSERT_NE(nullptr, pipeControl); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_FALSE(pipeControl->getDcFlushEnable()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramBatchBufferStartCommandWhenItIsCalledThenCommandIsProgrammedCorrectly) { - auto expectedUsedSize = sizeof(WalkerPartition::BATCH_BUFFER_START); - uint64_t gpuAddress = 0xFFFFFFDFEEDBAC10llu; - - void *batchBufferStartAddress = cmdBufferAddress; - WalkerPartition::programMiBatchBufferStart(cmdBufferAddress, totalBytesProgrammed, gpuAddress, true, false); - auto batchBufferStart = genCmdCast *>(batchBufferStartAddress); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - - //bits 48-63 are zeroed - EXPECT_EQ((gpuAddress & 0xFFFFFFFFFFFF), batchBufferStart->getBatchBufferStartAddress()); - - EXPECT_TRUE(batchBufferStart->getPredicationEnable()); - EXPECT_FALSE(batchBufferStart->getEnableCommandCache()); - EXPECT_EQ(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, batchBufferStart->getSecondLevelBatchBuffer()); - EXPECT_EQ(BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR::ADDRESS_SPACE_INDICATOR_PPGTT, batchBufferStart->getAddressSpaceIndicator()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhenItIsCalledThenWalkerIsProperlyProgrammed) { - auto expectedUsedSize = sizeof(WalkerPartition::COMPUTE_WALKER); - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(7u); - walker.setThreadGroupIdYDimension(10u); - walker.setThreadGroupIdZDimension(11u); - - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - void *walkerCommandAddress = cmdBufferAddress; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); - auto walkerCommand = genCmdCast *>(walkerCommandAddress); - - ASSERT_NE(nullptr, walkerCommand); - EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); - EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable()); - EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType()); - EXPECT_EQ(4u, walkerCommand->getPartitionSize()); - - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y); - walkerCommandAddress = cmdBufferAddress; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); - walkerCommand = genCmdCast *>(walkerCommandAddress); - - ASSERT_NE(nullptr, walkerCommand); - EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walkerCommand->getPartitionType()); - EXPECT_EQ(5u, walkerCommand->getPartitionSize()); - - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z); - walkerCommandAddress = cmdBufferAddress; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); - walkerCommand = genCmdCast *>(walkerCommandAddress); - - ASSERT_NE(nullptr, walkerCommand); - EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walkerCommand->getPartitionType()); - EXPECT_EQ(6u, walkerCommand->getPartitionSize()); - - //if we program with partition Count == 1 then do not trigger partition stuff - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - walkerCommandAddress = cmdBufferAddress; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 1u); - walkerCommand = genCmdCast *>(walkerCommandAddress); - - ASSERT_NE(nullptr, walkerCommand); - EXPECT_EQ(0u, walkerCommand->getPartitionSize()); - EXPECT_FALSE(walkerCommand->getWorkloadPartitionEnable()); - EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walkerCommand->getPartitionType()); -} -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWhenComputePartitionCountIsCalledThenDefaultSizeAndTypeIsReturned) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(16u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStartWhenComputePartitionCountIsCalledThenPartitionsAreDisabled) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdStartingX(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(1u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); - - walker.setThreadGroupIdStartingX(0u); - walker.setThreadGroupIdStartingY(1u); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(1u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); - - walker.setThreadGroupIdStartingY(0u); - walker.setThreadGroupIdStartingZ(1u); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(1u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenHighestDimensionIsPartitioned) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(64u); - walker.setThreadGroupIdYDimension(64u); - walker.setThreadGroupIdZDimension(64u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); - - walker.setThreadGroupIdYDimension(65u); - walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - walker.setThreadGroupIdZDimension(66u); - walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisalbedMinimalPartitionSizeWhenCoomputePartitionSizeThenProperValueIsReturned) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(64u); - walker.setThreadGroupIdYDimension(64u); - walker.setThreadGroupIdZDimension(64u); - - DebugManagerStateRestore restorer; - DebugManager.flags.SetMinimalPartitionSize.set(0); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(16u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); - - walker.setThreadGroupIdYDimension(65u); - walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(16u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - walker.setThreadGroupIdZDimension(66u); - walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(16u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenPartitionCountIsClampedToHighestDimension) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(15u); - walker.setThreadGroupIdYDimension(7u); - walker.setThreadGroupIdZDimension(4u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); - walker.setThreadGroupIdXDimension(1u); - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - walker.setThreadGroupIdYDimension(1u); - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeHintWhenPartitionCountIsObtainedThenSuggestedTypeIsUsedForPartition) { - DebugManagerStateRestore restore{}; - - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(8u); - walker.setThreadGroupIdYDimension(4u); - walker.setThreadGroupIdZDimension(2u); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(-1); - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X)); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y)); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z)); - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenInvalidPartitionTypeIsRequestedWhenPartitionCountIsObtainedThenFail) { - DebugManagerStateRestore restore{}; - - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(8u); - walker.setThreadGroupIdYDimension(4u); - walker.setThreadGroupIdZDimension(2u); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(0); - bool staticPartitioning = false; - EXPECT_ANY_THROW(computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning)); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithSmallXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsAdujsted) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(32u); - walker.setThreadGroupIdYDimension(1024u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithBigXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsNotAdjusted) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(16384u); - walker.setThreadGroupIdYDimension(1u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(16u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenCustomMinimalPartitionSizeWhenComputePartitionCountThenProperValueIsReturned) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(16384u); - walker.setThreadGroupIdYDimension(1u); - walker.setThreadGroupIdZDimension(1u); - - DebugManagerStateRestore restorer; - DebugManager.flags.SetMinimalPartitionSize.set(4096); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeProgrammedWhenPartitionCountIsObtainedAndItEqualsOneThenPartitionMechanismIsDisabled) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(1u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(1u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenXDimensionIsNotLargetAnd2DImagesAreUsedWhenPartitionTypeIsObtainedThenSelectXDimension) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(8u); - walker.setThreadGroupIdYDimension(64u); - walker.setThreadGroupIdZDimension(16u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, true, &staticPartitioning); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndNonPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(1u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(2u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndBigPartitionCountProgrammedInWalkerWhenPartitionCountIsObtainedThenNumberOfPartitionsIsEqualToNumberOfTiles) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(16384u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndAndNonUniformStartProgrammedInWalkerWhenPartitionCountIsObtainedThenDoNotAllowStaticPartitioningAndSetPartitionCountToOne) { - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(16384u); - walker.setThreadGroupIdZDimension(1u); - walker.setThreadGroupIdStartingX(0); - walker.setThreadGroupIdStartingY(0); - walker.setThreadGroupIdStartingZ(1); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_FALSE(staticPartitioning); - EXPECT_EQ(1u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionTypeHintIsUsedWhenPartitionCountIsObtainedThenUseRequestedPartitionType) { - DebugManagerStateRestore restore{}; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(16384u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = false; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z)); - staticPartitioning = false; - partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(4u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsNotDivisibleByTwoButIsAboveThreasholThenItIsSelected) { - DebugManagerStateRestore restore{}; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(1u); - walker.setThreadGroupIdYDimension(16384u); - walker.setThreadGroupIdZDimension(2u); - - bool staticPartitioning = true; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); - - DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenYDimensionIsDivisibleByTwoThenItIsSelected) { - DebugManagerStateRestore restore{}; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(16384u); - walker.setThreadGroupIdYDimension(2u); - walker.setThreadGroupIdZDimension(1u); - - bool staticPartitioning = true; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); - - DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsDivisibleByTwoThenItIsSelected) { - DebugManagerStateRestore restore{}; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setThreadGroupIdXDimension(512u); - walker.setThreadGroupIdYDimension(512u); - walker.setThreadGroupIdZDimension(513u); - - bool staticPartitioning = true; - auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); - - DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); - - partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); - EXPECT_TRUE(staticPartitioning); - EXPECT_EQ(2u, partitionCount); - EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { - DebugManagerStateRestore restorer; - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - - auto partitionCount = 16u; - checkForProperCmdBufferAddressOffset = false; - nativeCrossTileSync = true; - uint64_t gpuVirtualAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - auto &postSync = walker.getPostSync(); - postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); - postSync.setDestinationAddress(postSyncAddress); - uint32_t totalBytesProgrammed = 0u; - - auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::MI_STORE_DATA_IMM); - - auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::COMPUTE_WALKER); - - EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, false)); - - auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); - - auto totalProgrammedSize = cleanupSectionOffset + 3 * sizeof(WalkerPartition::MI_STORE_DATA_IMM) + - 2 * sizeof(WalkerPartition::MI_ATOMIC) + - 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - uint32_t tileCount = 4u; - WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, - gpuVirtualAddress, &walker, totalBytesProgrammed, partitionCount, - tileCount, false, synchronizeBeforeExecution, false, nativeCrossTileSync, false); - - EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); - auto wparidMaskProgrammingLocation = cmdBufferAddress; - - auto expectedMask = 0xFFF0u; - auto expectedRegister = 0x21FCu; - - auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; - auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); - EXPECT_TRUE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterReg); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); - EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); - EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); - - auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_TRUE(batchBufferStart->getPredicationEnable()); - //address routes to WALKER section which is before control section - auto address = batchBufferStart->getBatchBufferStartAddress(); - EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); - EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - uint64_t expectedCleanupGpuVa = gpuVirtualAddress + expectedCommandUsedSize + offsetof(BatchBufferControlData, finalSyncTileCount); - constexpr uint32_t expectedData = 0u; - auto finalSyncTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, finalSyncTileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, finalSyncTileCountFieldStore->getAddress()); - EXPECT_EQ(expectedData, finalSyncTileCountFieldStore->getDataDword0()); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); - auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), tileCount); - - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - //final batch buffer start that routes at the end of the batch buffer - auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStartFinal); - EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - - batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); - EXPECT_EQ(0u, controlSection->partitionCount); - EXPECT_EQ(0u, controlSection->tileCount); - EXPECT_EQ(0u, controlSection->inTileCount); - EXPECT_EQ(0u, controlSection->finalSyncTileCount); - - parsedOffset += sizeof(BatchBufferControlData); - EXPECT_EQ(parsedOffset, cleanupSectionOffset); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - miAtomicTileAddress = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData) + - 3 * sizeof(uint32_t); - miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), tileCount); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - expectedCleanupGpuVa = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData); - auto partitionCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, partitionCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, partitionCountFieldStore->getAddress()); - EXPECT_EQ(expectedData, partitionCountFieldStore->getDataDword0()); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - - expectedCleanupGpuVa += sizeof(BatchBufferControlData::partitionCount); - auto tileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, tileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, tileCountFieldStore->getAddress()); - EXPECT_EQ(expectedData, tileCountFieldStore->getDataDword0()); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - - expectedCleanupGpuVa += sizeof(BatchBufferControlData::tileCount); - auto inTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, inTileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, inTileCountFieldStore->getAddress()); - EXPECT_EQ(expectedData, inTileCountFieldStore->getDataDword0()); - parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * tileCount); -} - -HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncAndAtomicsUsedForNativeWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { - DebugManagerStateRestore restorer; - DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.set(0); - - auto partitionCount = 16u; - checkForProperCmdBufferAddressOffset = false; - nativeCrossTileSync = true; - bool useAtomicsForNative = true; - uint64_t gpuVirtualAddress = 0x8000123000; - uint64_t postSyncAddress = 0x8000456000; - WalkerPartition::COMPUTE_WALKER walker; - walker = FamilyType::cmdInitGpgpuWalker; - walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); - auto &postSync = walker.getPostSync(); - postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); - postSync.setDestinationAddress(postSyncAddress); - uint32_t totalBytesProgrammed = 0u; - - auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + - sizeof(WalkerPartition::MI_ATOMIC) * 2 + - sizeof(WalkerPartition::LOAD_REGISTER_REG) + - sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + - sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + - sizeof(WalkerPartition::PIPE_CONTROL) + - sizeof(WalkerPartition::COMPUTE_WALKER) + - sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + - sizeof(WalkerPartition::MI_ATOMIC); - - auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + - sizeof(WalkerPartition::COMPUTE_WALKER); - - EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileSync, useAtomicsForNative)); - - auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); - - auto totalProgrammedSize = cleanupSectionOffset + 3 * sizeof(WalkerPartition::MI_ATOMIC) + - 2 * sizeof(WalkerPartition::MI_ATOMIC) + - 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - uint32_t tileCount = 4u; - WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, - gpuVirtualAddress, &walker, totalBytesProgrammed, partitionCount, - tileCount, false, synchronizeBeforeExecution, false, nativeCrossTileSync, useAtomicsForNative); - - EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); - auto wparidMaskProgrammingLocation = cmdBufferAddress; - - auto expectedMask = 0xFFF0u; - auto expectedRegister = 0x21FCu; - - auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); - ASSERT_NE(nullptr, loadRegisterImmediate); - EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); - EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); - auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); - - auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; - auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); - EXPECT_TRUE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, loadRegisterReg); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); - EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); - EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); - EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); - parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); - - auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_TRUE(batchBufferStart->getPredicationEnable()); - //address routes to WALKER section which is before control section - auto address = batchBufferStart->getBatchBufferStartAddress(); - EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSetPredicate); - EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); - EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); - parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); - - uint64_t expectedCleanupGpuVa = gpuVirtualAddress + expectedCommandUsedSize + offsetof(BatchBufferControlData, finalSyncTileCount); - auto finalSyncTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, finalSyncTileCountFieldStore); - miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*finalSyncTileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); - EXPECT_FALSE(finalSyncTileCountFieldStore->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, finalSyncTileCountFieldStore->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); - parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); - auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), tileCount); - - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - //final batch buffer start that routes at the end of the batch buffer - auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStartFinal); - EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_NE(nullptr, computeWalker); - parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); - - batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, batchBufferStart); - EXPECT_FALSE(batchBufferStart->getPredicationEnable()); - EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); - parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); - - auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); - EXPECT_EQ(0u, controlSection->partitionCount); - EXPECT_EQ(0u, controlSection->tileCount); - EXPECT_EQ(0u, controlSection->inTileCount); - EXPECT_EQ(0u, controlSection->finalSyncTileCount); - - parsedOffset += sizeof(BatchBufferControlData); - EXPECT_EQ(parsedOffset, cleanupSectionOffset); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - miAtomicTileAddress = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData) + - 3 * sizeof(uint32_t); - miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), tileCount); - parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); - - expectedCleanupGpuVa = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData); - auto partitionCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, partitionCountFieldStore); - miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*partitionCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); - EXPECT_FALSE(partitionCountFieldStore->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, partitionCountFieldStore->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - expectedCleanupGpuVa += sizeof(BatchBufferControlData::partitionCount); - auto tileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, tileCountFieldStore); - miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*tileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); - EXPECT_FALSE(tileCountFieldStore->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, tileCountFieldStore->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - expectedCleanupGpuVa += sizeof(BatchBufferControlData::tileCount); - auto inTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, inTileCountFieldStore); - miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*inTileCountFieldStore); - EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); - EXPECT_FALSE(inTileCountFieldStore->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, inTileCountFieldStore->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); - - miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphoreWait); - EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); - EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * tileCount); -} diff --git a/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_1.cpp b/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_1.cpp new file mode 100644 index 0000000000..ff801b33b6 --- /dev/null +++ b/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_1.cpp @@ -0,0 +1,1278 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h" + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.partitionCount = 16u; + checkForProperCmdBufferAddressOffset = false; + uint64_t gpuVirtualAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + uint32_t totalBytesProgrammed; + + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::COMPUTE_WALKER); + + EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(testArgs)); + + auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); + + auto totalProgrammedSize = optionalBatchBufferEndOffset + sizeof(WalkerPartition::BATCH_BUFFER_END); + + testArgs.tileCount = 4u; + testArgs.emitBatchBufferEnd = true; + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); + auto wparidMaskProgrammingLocation = cmdBufferAddress; + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; + auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); + EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); + + auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + //address routes to WALKER section which is before control section + auto address = batchBufferStart->getBatchBufferStartAddress(); + EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); + auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 4u); + + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + //final batch buffer start that routes at the end of the batch buffer + auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, batchBufferStartFinal); + EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + + batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); + EXPECT_EQ(0u, controlSection->partitionCount); + EXPECT_EQ(0u, controlSection->tileCount); + parsedOffset += sizeof(BatchBufferControlData); + + auto batchBufferEnd = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, batchBufferEnd); + EXPECT_EQ(parsedOffset, optionalBatchBufferEndOffset); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndPreWalkerSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + checkForProperCmdBufferAddressOffset = false; + testArgs.synchronizeBeforeExecution = true; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(preWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAndSynchronizationWithPostSyncsWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.semaphoreProgrammingRequired = true; + testArgs.crossTileAtomicSynchronization = false; + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu; + EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 16llu; + EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 32llu; + EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + const auto expectedSemaphoreAddress = walker.getPostSync().getDestinationAddress() + 8llu + 48llu; + EXPECT_EQ(expectedSemaphoreAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.staticPartitioning = true; + + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(finalSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(preWalkerSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(postWalkerSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(2 * testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenConstructCommandBufferIsCalledThenStillProgramTheSync) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.staticPartitioning = true; + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(finalSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(preWalkerSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto storeDataImm = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, storeDataImm); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + EXPECT_EQ(postWalkerSyncAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(WalkerPartition::MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, storeDataImm->getDwordLength()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(2 * testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndAtomicsForNativeWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.useAtomicsForNativeCleanup = true; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.staticPartitioning = true; + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(2 * testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWithNativeCrossTileSyncAndSyncDisabledWithFlagWhenUsingAtomicForNativeAndConstructCommandBufferIsCalledThenStillProgramTheSync) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.useAtomicsForNativeCleanup = true; + testArgs.staticPartitioning = true; + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(postWalkerSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(preWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(postWalkerSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + EXPECT_EQ(finalSyncAddress, UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomic->getDataSize()); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getCsStall()); + } + { + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(finalSyncAddress, miSemaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphoreWait->getCompareOperation()); + EXPECT_EQ(2 * testArgs.tileCount, miSemaphoreWait->getSemaphoreDataDword()); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.semaphoreProgrammingRequired = true; + testArgs.tileCount = 4u; + testArgs.partitionCount = 16u; + testArgs.emitBatchBufferEnd = true; + + checkForProperCmdBufferAddressOffset = false; + uint64_t gpuVirtualAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + uint32_t totalBytesProgrammed; + + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * testArgs.partitionCount; + + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::COMPUTE_WALKER); + + EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(testArgs)); + + auto optionalBatchBufferEndOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); + + auto totalProgrammedSize = optionalBatchBufferEndOffset + sizeof(WalkerPartition::BATCH_BUFFER_END); + + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); + auto wparidMaskProgrammingLocation = cmdBufferAddress; + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; + auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); + EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); + + auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + //address routes to WALKER section which is before control section + auto address = batchBufferStart->getBatchBufferStartAddress(); + EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + for (uint32_t partitionId = 0u; partitionId < testArgs.partitionCount; partitionId++) { + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), postSyncAddress + 8llu + partitionId * 16llu); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 1u); + + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + } + + //final batch buffer start that routes at the end of the batch buffer + auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, batchBufferStartFinal); + EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + optionalBatchBufferEndOffset); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + + batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); + EXPECT_EQ(0u, controlSection->partitionCount); + EXPECT_EQ(0u, controlSection->tileCount); + parsedOffset += sizeof(BatchBufferControlData); + + auto batchBufferEnd = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, batchBufferEnd); + EXPECT_EQ(parsedOffset, optionalBatchBufferEndOffset); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenWparidRegisterProgrammingDisabledThenExpectNoMiLoadRegisterMemCommand) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.initializeWparidRegister = false; + testArgs.nativeCrossTileAtomicSync = false; + testArgs.crossTileAtomicSynchronization = false; + testArgs.useAtomicsForNativeCleanup = false; + testArgs.staticPartitioning = true; + + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BATCH_BUFFER_START); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) { + testArgs.tileCount = 4u; + testArgs.partitionCount = testArgs.tileCount; + testArgs.nativeCrossTileAtomicSync = false; + testArgs.usePipeControlStall = false; + testArgs.crossTileAtomicSynchronization = false; + testArgs.useAtomicsForNativeCleanup = false; + testArgs.staticPartitioning = true; + + checkForProperCmdBufferAddressOffset = false; + uint64_t cmdBufferGpuAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + testArgs.workPartitionAllocationGpuVa = 0x8000444000; + auto walker = createWalker(postSyncAddress); + + uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::BATCH_BUFFER_START); + + uint32_t totalBytesProgrammed{}; + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); + EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + cmdBufferGpuAddress, + &walker, + totalBytesProgrammed, + testArgs); + const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); + EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); + + auto parsedOffset = 0u; + { + auto loadRegisterMem = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterMem); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_MEM); + const auto expectedRegister = 0x221Cu; + EXPECT_TRUE(loadRegisterMem->getMmioRemapEnable()); + EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); + EXPECT_EQ(testArgs.workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); + } + { + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + } + { + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection); + EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress()); + } + { + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); + parsedOffset += sizeof(StaticPartitioningControlSection); + StaticPartitioningControlSection expectedControlSection = {}; + EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection))); + } + EXPECT_EQ(parsedOffset, totalBytesProgrammed); +} diff --git a/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_2.cpp b/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_2.cpp new file mode 100644 index 0000000000..fcf51b0052 --- /dev/null +++ b/opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_2.cpp @@ -0,0 +1,1352 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h" + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramRegisterCommandWhenItIsCalledThenLoadRegisterImmIsSetUnderPointer) { + uint32_t registerOffset = 120u; + uint32_t registerValue = 542u; + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + void *loadRegisterImmediateAddress = cmdBufferAddress; + WalkerPartition::programRegisterWithValue(cmdBufferAddress, registerOffset, totalBytesProgrammed, registerValue); + auto loadRegisterImmediate = genCmdCast *>(loadRegisterImmediateAddress); + + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + EXPECT_EQ(registerOffset, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(registerValue, loadRegisterImmediate->getDataDword()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledWithoutBatchBufferEndThenBatchBufferEndIsNotProgrammed) { + testArgs.partitionCount = 16u; + testArgs.tileCount = 4u; + checkForProperCmdBufferAddressOffset = false; + uint64_t gpuVirtualAddress = 0x8000123000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + auto totalProgrammedSize = computeControlSectionOffset(testArgs) + + sizeof(BatchBufferControlData); + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenItIsCalledThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BatchBufferControlData) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + testArgs.emitBatchBufferEnd = false; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); + + testArgs.emitBatchBufferEnd = true; + EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END), + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationWhenPartitionCountIs4ThenSizeIsProperlyEstimated) { + testArgs.partitionCount = 4u; + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BatchBufferControlData) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + testArgs.emitBatchBufferEnd = false; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); + + testArgs.emitBatchBufferEnd = true; + EXPECT_EQ(expectedUsedSize + sizeof(WalkerPartition::BATCH_BUFFER_END), + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + testArgs.emitBatchBufferEnd = false; + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BatchBufferControlData) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + auto expectedDelta = sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + testArgs.synchronizeBeforeExecution = false; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); + + testArgs.synchronizeBeforeExecution = true; + EXPECT_EQ(expectedUsedSize + expectedDelta, + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationWhenItIsCalledThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::StaticPartitioningControlSection); + + testArgs.emitBatchBufferEnd = false; + testArgs.staticPartitioning = true; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); + + testArgs.emitBatchBufferEnd = true; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningEstimationAndSynchronizeBeforeExecutionWhenItIsCalledThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + testArgs.emitBatchBufferEnd = false; + const auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::StaticPartitioningControlSection); + + testArgs.staticPartitioning = true; + testArgs.synchronizeBeforeExecution = false; + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); + + testArgs.synchronizeBeforeExecution = true; + const auto preExecutionSynchronizationSize = sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + EXPECT_EQ(expectedUsedSize + preExecutionSynchronizationSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenItIsCalledThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + testArgs.emitBatchBufferEnd = false; + testArgs.synchronizeBeforeExecution = false; + testArgs.nativeCrossTileAtomicSync = true; + + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_STORE_DATA_IMM) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BatchBufferControlData) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * 2 + + sizeof(WalkerPartition::MI_STORE_DATA_IMM) * 3; + + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenEstimationNativeSectionsWhenAtomicsUsedForNativeThenProperSizeIsReturned) { + testArgs.partitionCount = 16u; + testArgs.emitBatchBufferEnd = false; + testArgs.synchronizeBeforeExecution = false; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.useAtomicsForNativeCleanup = true; + + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::BatchBufferControlData) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) * 2 + + sizeof(WalkerPartition::MI_ATOMIC) * 3; + + EXPECT_EQ(expectedUsedSize, + estimateSpaceRequiredInCommandBuffer(testArgs)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithWrongInputThenFalseIsReturnedAndNothingIsProgrammed) { + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 3)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 5)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 17)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 32)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 15)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 11)); + EXPECT_FALSE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 9)); + EXPECT_EQ(0u, totalBytesProgrammed); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWparidPredicationMaskWhenItIsCalledWithPartitionCountThenProperMaskIsSet) { + auto wparidMaskProgrammingLocation = cmdBufferAddress; + EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 16)); + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + + EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 8)); + wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); + loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + expectedMask = 0xFFF8u; + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + + EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 4)); + wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); + loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + expectedMask = 0xFFFCu; + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + + EXPECT_TRUE(programWparidMask(cmdBufferAddress, totalBytesProgrammed, 2)); + wparidMaskProgrammingLocation = ptrOffset(wparidMaskProgrammingLocation, sizeof(LOAD_REGISTER_IMM)); + loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + expectedMask = 0xFFFEu; + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPredicationOnWhenItIsProgrammedThenCommandBufferContainsCorrectCommand) { + auto expectedUsedSize = sizeof(WalkerPartition::MI_SET_PREDICATE); + + void *miSetPredicateAddress = cmdBufferAddress; + programWparidPredication(cmdBufferAddress, totalBytesProgrammed, true); + auto miSetPredicate = genCmdCast *>(miSetPredicateAddress); + + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPredicationOffWhenItIsProgrammedThenCommandBufferContainsCorrectCommand) { + auto expectedUsedSize = sizeof(WalkerPartition::MI_SET_PREDICATE); + + void *miSetPredicateAddress = cmdBufferAddress; + programWparidPredication(cmdBufferAddress, totalBytesProgrammed, false); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + auto miSetPredicate = genCmdCast *>(miSetPredicateAddress); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramWaitForSemaphoreWhenitisProgrammedThenAllFieldsAreSetCorrectly) { + auto expectedUsedSize = sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + uint64_t gpuAddress = 0x6432100llu; + uint32_t compareData = 1u; + + void *semaphoreWaitAddress = cmdBufferAddress; + programWaitForSemaphore(cmdBufferAddress, + totalBytesProgrammed, + gpuAddress, + compareData, + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + auto semaphoreWait = genCmdCast *>(semaphoreWaitAddress); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + ASSERT_NE(nullptr, semaphoreWait); + EXPECT_EQ(compareData, semaphoreWait->getSemaphoreDataDword()); + EXPECT_EQ(gpuAddress, semaphoreWait->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreWait->getCompareOperation()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreWait->getWaitMode()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::MEMORY_TYPE::MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS, semaphoreWait->getMemoryType()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, semaphoreWait->getRegisterPollMode()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenMiAtomicWhenItIsProgrammedThenAllFieldsAreSetCorrectly) { + auto expectedUsedSize = sizeof(WalkerPartition::MI_ATOMIC); + uint64_t gpuAddress = 0xFFFFFFDFEEDBAC10llu; + + void *miAtomicAddress = cmdBufferAddress; + programMiAtomic(cmdBufferAddress, + totalBytesProgrammed, gpuAddress, true, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + + auto miAtomic = genCmdCast *>(miAtomicAddress); + ASSERT_NE(nullptr, miAtomic); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + EXPECT_EQ(0u, miAtomic->getDataSize()); + EXPECT_TRUE(miAtomic->getCsStall()); + EXPECT_EQ(MI_ATOMIC::MEMORY_TYPE::MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS, miAtomic->getMemoryType()); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_FALSE(miAtomic->getWorkloadPartitionIdOffsetEnable()); + auto memoryAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + + //bits 48-63 are zeroed + EXPECT_EQ((gpuAddress & 0xFFFFFFFFFFFF), memoryAddress); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenMiLoadRegisterRegWhenItIsProgrammedThenCommandIsProperlySet) { + auto expectedUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_REG); + void *loadRegisterRegAddress = cmdBufferAddress; + WalkerPartition::programMiLoadRegisterReg(cmdBufferAddress, totalBytesProgrammed, generalPurposeRegister1, wparidCCSOffset); + auto loadRegisterReg = genCmdCast *>(loadRegisterRegAddress); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(generalPurposeRegister1, loadRegisterReg->getSourceRegisterAddress()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedThenItIsProperlySet) { + auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); + void *pipeControlCAddress = cmdBufferAddress; + WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, true); + auto pipeControl = genCmdCast *>(pipeControlCAddress); + ASSERT_NE(nullptr, pipeControl); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedWithDcFlushFalseThenExpectDcFlushFlagFalse) { + auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); + void *pipeControlCAddress = cmdBufferAddress; + WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, false); + auto pipeControl = genCmdCast *>(pipeControlCAddress); + ASSERT_NE(nullptr, pipeControl); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_FALSE(pipeControl->getDcFlushEnable()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedWithDebugDoNotFlushThenItIsProperlySetWithoutDcFlush) { + DebugManagerStateRestore restore; + DebugManager.flags.DoNotFlushCaches.set(true); + auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL); + void *pipeControlCAddress = cmdBufferAddress; + WalkerPartition::programPipeControlCommand(cmdBufferAddress, totalBytesProgrammed, true); + auto pipeControl = genCmdCast *>(pipeControlCAddress); + ASSERT_NE(nullptr, pipeControl); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_FALSE(pipeControl->getDcFlushEnable()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramBatchBufferStartCommandWhenItIsCalledThenCommandIsProgrammedCorrectly) { + auto expectedUsedSize = sizeof(WalkerPartition::BATCH_BUFFER_START); + uint64_t gpuAddress = 0xFFFFFFDFEEDBAC10llu; + + void *batchBufferStartAddress = cmdBufferAddress; + WalkerPartition::programMiBatchBufferStart(cmdBufferAddress, totalBytesProgrammed, gpuAddress, true, false); + auto batchBufferStart = genCmdCast *>(batchBufferStartAddress); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + + //bits 48-63 are zeroed + EXPECT_EQ((gpuAddress & 0xFFFFFFFFFFFF), batchBufferStart->getBatchBufferStartAddress()); + + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + EXPECT_FALSE(batchBufferStart->getEnableCommandCache()); + EXPECT_EQ(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, batchBufferStart->getSecondLevelBatchBuffer()); + EXPECT_EQ(BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR::ADDRESS_SPACE_INDICATOR_PPGTT, batchBufferStart->getAddressSpaceIndicator()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhenItIsCalledThenWalkerIsProperlyProgrammed) { + auto expectedUsedSize = sizeof(WalkerPartition::COMPUTE_WALKER); + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(7u); + walker.setThreadGroupIdYDimension(10u); + walker.setThreadGroupIdZDimension(11u); + + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + void *walkerCommandAddress = cmdBufferAddress; + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); + auto walkerCommand = genCmdCast *>(walkerCommandAddress); + + ASSERT_NE(nullptr, walkerCommand); + EXPECT_EQ(expectedUsedSize, totalBytesProgrammed); + EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable()); + EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType()); + EXPECT_EQ(4u, walkerCommand->getPartitionSize()); + + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y); + walkerCommandAddress = cmdBufferAddress; + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); + walkerCommand = genCmdCast *>(walkerCommandAddress); + + ASSERT_NE(nullptr, walkerCommand); + EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walkerCommand->getPartitionType()); + EXPECT_EQ(5u, walkerCommand->getPartitionSize()); + + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z); + walkerCommandAddress = cmdBufferAddress; + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 2u); + walkerCommand = genCmdCast *>(walkerCommandAddress); + + ASSERT_NE(nullptr, walkerCommand); + EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walkerCommand->getPartitionType()); + EXPECT_EQ(6u, walkerCommand->getPartitionSize()); + + //if we program with partition Count == 1 then do not trigger partition stuff + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + walkerCommandAddress = cmdBufferAddress; + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, 1u); + walkerCommand = genCmdCast *>(walkerCommandAddress); + + ASSERT_NE(nullptr, walkerCommand); + EXPECT_EQ(0u, walkerCommand->getPartitionSize()); + EXPECT_FALSE(walkerCommand->getWorkloadPartitionEnable()); + EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walkerCommand->getPartitionType()); +} +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWhenComputePartitionCountIsCalledThenDefaultSizeAndTypeIsReturned) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(16u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithNonUniformStartWhenComputePartitionCountIsCalledThenPartitionsAreDisabled) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdStartingX(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(1u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); + + walker.setThreadGroupIdStartingX(0u); + walker.setThreadGroupIdStartingY(1u); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(1u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); + + walker.setThreadGroupIdStartingY(0u); + walker.setThreadGroupIdStartingZ(1u); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(1u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenHighestDimensionIsPartitioned) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(64u); + walker.setThreadGroupIdYDimension(64u); + walker.setThreadGroupIdZDimension(64u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); + + walker.setThreadGroupIdYDimension(65u); + walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + walker.setThreadGroupIdZDimension(66u); + walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisalbedMinimalPartitionSizeWhenCoomputePartitionSizeThenProperValueIsReturned) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(64u); + walker.setThreadGroupIdYDimension(64u); + walker.setThreadGroupIdZDimension(64u); + + DebugManagerStateRestore restorer; + DebugManager.flags.SetMinimalPartitionSize.set(0); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(16u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); + + walker.setThreadGroupIdYDimension(65u); + walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(16u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + walker.setThreadGroupIdZDimension(66u); + walker.setPartitionType(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(16u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkgroupCountsWhenPartitionCountIsObtainedThenPartitionCountIsClampedToHighestDimension) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(15u); + walker.setThreadGroupIdYDimension(7u); + walker.setThreadGroupIdZDimension(4u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); + walker.setThreadGroupIdXDimension(1u); + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + walker.setThreadGroupIdYDimension(1u); + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeHintWhenPartitionCountIsObtainedThenSuggestedTypeIsUsedForPartition) { + DebugManagerStateRestore restore{}; + + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(8u); + walker.setThreadGroupIdYDimension(4u); + walker.setThreadGroupIdZDimension(2u); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(-1); + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X)); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y)); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z)); + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenInvalidPartitionTypeIsRequestedWhenPartitionCountIsObtainedThenFail) { + DebugManagerStateRestore restore{}; + + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(8u); + walker.setThreadGroupIdYDimension(4u); + walker.setThreadGroupIdZDimension(2u); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(0); + bool staticPartitioning = false; + EXPECT_ANY_THROW(computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithSmallXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsAdujsted) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(32u); + walker.setThreadGroupIdYDimension(1024u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithBigXDimensionSizeWhenPartitionCountIsObtainedThenPartitionCountIsNotAdjusted) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(16384u); + walker.setThreadGroupIdYDimension(1u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(16u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenCustomMinimalPartitionSizeWhenComputePartitionCountThenProperValueIsReturned) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(16384u); + walker.setThreadGroupIdYDimension(1u); + walker.setThreadGroupIdZDimension(1u); + + DebugManagerStateRestore restorer; + DebugManager.flags.SetMinimalPartitionSize.set(4096); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithPartitionTypeProgrammedWhenPartitionCountIsObtainedAndItEqualsOneThenPartitionMechanismIsDisabled) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(1u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(1u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenXDimensionIsNotLargetAnd2DImagesAreUsedWhenPartitionTypeIsObtainedThenSelectXDimension) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(8u); + walker.setThreadGroupIdYDimension(64u); + walker.setThreadGroupIdZDimension(16u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, false, &staticPartitioning); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, false, true, &staticPartitioning); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndNonPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(1u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionableWalkerWhenPartitionCountIsObtainedThenAllowPartitioning) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(2u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndBigPartitionCountProgrammedInWalkerWhenPartitionCountIsObtainedThenNumberOfPartitionsIsEqualToNumberOfTiles) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(16384u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndAndNonUniformStartProgrammedInWalkerWhenPartitionCountIsObtainedThenDoNotAllowStaticPartitioningAndSetPartitionCountToOne) { + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(16384u); + walker.setThreadGroupIdZDimension(1u); + walker.setThreadGroupIdStartingX(0); + walker.setThreadGroupIdStartingY(0); + walker.setThreadGroupIdStartingZ(1); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_FALSE(staticPartitioning); + EXPECT_EQ(1u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningAndPartitionTypeHintIsUsedWhenPartitionCountIsObtainedThenUseRequestedPartitionType) { + DebugManagerStateRestore restore{}; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(16384u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = false; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + DebugManager.flags.ExperimentalSetWalkerPartitionType.set(static_cast(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z)); + staticPartitioning = false; + partitionCount = computePartitionCountAndSetPartitionType(&walker, 4u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(4u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsNotDivisibleByTwoButIsAboveThreasholThenItIsSelected) { + DebugManagerStateRestore restore{}; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1u); + walker.setThreadGroupIdYDimension(16384u); + walker.setThreadGroupIdZDimension(2u); + + bool staticPartitioning = true; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); + + DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenYDimensionIsDivisibleByTwoThenItIsSelected) { + DebugManagerStateRestore restore{}; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(16384u); + walker.setThreadGroupIdYDimension(2u); + walker.setThreadGroupIdZDimension(1u); + + bool staticPartitioning = true; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, walker.getPartitionType()); + + DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZDimensionIsDivisibleByTwoThenItIsSelected) { + DebugManagerStateRestore restore{}; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(512u); + walker.setThreadGroupIdYDimension(512u); + walker.setThreadGroupIdZDimension(513u); + + bool staticPartitioning = true; + auto partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); + + DebugManager.flags.WalkerPartitionPreferHighestDimension.set(0); + + partitionCount = computePartitionCountAndSetPartitionType(&walker, 2u, true, false, &staticPartitioning); + EXPECT_TRUE(staticPartitioning); + EXPECT_EQ(2u, partitionCount); + EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.partitionCount = 16u; + checkForProperCmdBufferAddressOffset = false; + testArgs.nativeCrossTileAtomicSync = true; + uint64_t gpuVirtualAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + uint32_t totalBytesProgrammed = 0u; + + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::MI_STORE_DATA_IMM); + + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::COMPUTE_WALKER); + + EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(testArgs)); + + auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); + + auto totalProgrammedSize = cleanupSectionOffset + 3 * sizeof(WalkerPartition::MI_STORE_DATA_IMM) + + 2 * sizeof(WalkerPartition::MI_ATOMIC) + + 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + testArgs.tileCount = 4u; + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); + auto wparidMaskProgrammingLocation = cmdBufferAddress; + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; + auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); + EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); + + auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + //address routes to WALKER section which is before control section + auto address = batchBufferStart->getBatchBufferStartAddress(); + EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + uint64_t expectedCleanupGpuVa = gpuVirtualAddress + expectedCommandUsedSize + offsetof(BatchBufferControlData, finalSyncTileCount); + constexpr uint32_t expectedData = 0u; + auto finalSyncTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, finalSyncTileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, finalSyncTileCountFieldStore->getAddress()); + EXPECT_EQ(expectedData, finalSyncTileCountFieldStore->getDataDword0()); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); + auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), testArgs.tileCount); + + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + //final batch buffer start that routes at the end of the batch buffer + auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStartFinal); + EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + + batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); + EXPECT_EQ(0u, controlSection->partitionCount); + EXPECT_EQ(0u, controlSection->tileCount); + EXPECT_EQ(0u, controlSection->inTileCount); + EXPECT_EQ(0u, controlSection->finalSyncTileCount); + + parsedOffset += sizeof(BatchBufferControlData); + EXPECT_EQ(parsedOffset, cleanupSectionOffset); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + miAtomicTileAddress = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData) + + 3 * sizeof(uint32_t); + miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), testArgs.tileCount); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + expectedCleanupGpuVa = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData); + auto partitionCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, partitionCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, partitionCountFieldStore->getAddress()); + EXPECT_EQ(expectedData, partitionCountFieldStore->getDataDword0()); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + + expectedCleanupGpuVa += sizeof(BatchBufferControlData::partitionCount); + auto tileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, tileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, tileCountFieldStore->getAddress()); + EXPECT_EQ(expectedData, tileCountFieldStore->getDataDword0()); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + + expectedCleanupGpuVa += sizeof(BatchBufferControlData::tileCount); + auto inTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, inTileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, inTileCountFieldStore->getAddress()); + EXPECT_EQ(expectedData, inTileCountFieldStore->getDataDword0()); + parsedOffset += sizeof(WalkerPartition::MI_STORE_DATA_IMM); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * testArgs.tileCount); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenNativeCrossTileSyncAndAtomicsUsedForNativeWhenDebugForceDisableCrossTileSyncThenNativeOverridesDebugAndAddsOwnCleanupSection) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.partitionCount = 16u; + checkForProperCmdBufferAddressOffset = false; + testArgs.nativeCrossTileAtomicSync = true; + testArgs.useAtomicsForNativeCleanup = true; + uint64_t gpuVirtualAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + uint32_t totalBytesProgrammed = 0u; + + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) * 2 + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::PIPE_CONTROL) + + sizeof(WalkerPartition::COMPUTE_WALKER) + + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + + sizeof(WalkerPartition::MI_ATOMIC); + + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::COMPUTE_WALKER); + + EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(testArgs)); + + auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); + + auto totalProgrammedSize = cleanupSectionOffset + 3 * sizeof(WalkerPartition::MI_ATOMIC) + + 2 * sizeof(WalkerPartition::MI_ATOMIC) + + 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + testArgs.tileCount = 4u; + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); + auto wparidMaskProgrammingLocation = cmdBufferAddress; + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; + auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); + EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); + + auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + //address routes to WALKER section which is before control section + auto address = batchBufferStart->getBatchBufferStartAddress(); + EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + uint64_t expectedCleanupGpuVa = gpuVirtualAddress + expectedCommandUsedSize + offsetof(BatchBufferControlData, finalSyncTileCount); + auto finalSyncTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, finalSyncTileCountFieldStore); + miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*finalSyncTileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); + EXPECT_FALSE(finalSyncTileCountFieldStore->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, finalSyncTileCountFieldStore->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, pipeControl); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), pipeControl->getDcFlushEnable()); + parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicTileAddress = gpuVirtualAddress + expectedCommandUsedSize + sizeof(uint32_t); + auto miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), testArgs.tileCount); + + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + //final batch buffer start that routes at the end of the batch buffer + auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStartFinal); + EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + + batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); + EXPECT_EQ(0u, controlSection->partitionCount); + EXPECT_EQ(0u, controlSection->tileCount); + EXPECT_EQ(0u, controlSection->inTileCount); + EXPECT_EQ(0u, controlSection->finalSyncTileCount); + + parsedOffset += sizeof(BatchBufferControlData); + EXPECT_EQ(parsedOffset, cleanupSectionOffset); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + miAtomicTileAddress = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData) + + 3 * sizeof(uint32_t); + miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), testArgs.tileCount); + parsedOffset += sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + + expectedCleanupGpuVa = gpuVirtualAddress + cleanupSectionOffset - sizeof(BatchBufferControlData); + auto partitionCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, partitionCountFieldStore); + miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*partitionCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); + EXPECT_FALSE(partitionCountFieldStore->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, partitionCountFieldStore->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + expectedCleanupGpuVa += sizeof(BatchBufferControlData::partitionCount); + auto tileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, tileCountFieldStore); + miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*tileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); + EXPECT_FALSE(tileCountFieldStore->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, tileCountFieldStore->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + expectedCleanupGpuVa += sizeof(BatchBufferControlData::tileCount); + auto inTileCountFieldStore = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, inTileCountFieldStore); + miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*inTileCountFieldStore); + EXPECT_EQ(expectedCleanupGpuVa, miAtomicProgrammedAddress); + EXPECT_FALSE(inTileCountFieldStore->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, inTileCountFieldStore->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + miAtomicTileProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicTileAddress, miAtomicTileProgrammedAddress); + EXPECT_FALSE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSemaphoreWait); + EXPECT_EQ(miSemaphoreWait->getSemaphoreGraphicsAddress(), miAtomicTileAddress); + EXPECT_EQ(miSemaphoreWait->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(miSemaphoreWait->getSemaphoreDataDword(), 2 * testArgs.tileCount); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenPipeControlProgrammingDisabledThenExpectNoPipeControlCommand) { + testArgs.crossTileAtomicSynchronization = false; + testArgs.partitionCount = 16u; + testArgs.tileCount = 4u; + testArgs.nativeCrossTileAtomicSync = false; + testArgs.useAtomicsForNativeCleanup = false; + testArgs.usePipeControlStall = false; + + checkForProperCmdBufferAddressOffset = false; + uint64_t gpuVirtualAddress = 0x8000123000; + uint64_t postSyncAddress = 0x8000456000; + WalkerPartition::COMPUTE_WALKER walker; + walker = FamilyType::cmdInitGpgpuWalker; + walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + uint32_t totalBytesProgrammed = 0u; + + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + + sizeof(WalkerPartition::MI_ATOMIC) + + sizeof(WalkerPartition::LOAD_REGISTER_REG) + + sizeof(WalkerPartition::MI_SET_PREDICATE) * 2 + + sizeof(WalkerPartition::BATCH_BUFFER_START) * 3 + + sizeof(WalkerPartition::COMPUTE_WALKER); + + auto walkerSectionCommands = sizeof(WalkerPartition::BATCH_BUFFER_START) + + sizeof(WalkerPartition::COMPUTE_WALKER); + + EXPECT_EQ(expectedCommandUsedSize, computeControlSectionOffset(testArgs)); + + auto cleanupSectionOffset = expectedCommandUsedSize + sizeof(BatchBufferControlData); + + auto totalProgrammedSize = cleanupSectionOffset; + + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + gpuVirtualAddress, + &walker, + totalBytesProgrammed, + testArgs); + + EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); + auto wparidMaskProgrammingLocation = cmdBufferAddress; + + auto expectedMask = 0xFFF0u; + auto expectedRegister = 0x21FCu; + + auto loadRegisterImmediate = genCmdCast *>(wparidMaskProgrammingLocation); + ASSERT_NE(nullptr, loadRegisterImmediate); + EXPECT_EQ(expectedRegister, loadRegisterImmediate->getRegisterOffset()); + EXPECT_EQ(expectedMask, loadRegisterImmediate->getDataDword()); + auto parsedOffset = sizeof(WalkerPartition::LOAD_REGISTER_IMM); + + auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miAtomic); + auto miAtomicAddress = gpuVirtualAddress + expectedCommandUsedSize; + auto miAtomicProgrammedAddress = UnitTestHelper::getAtomicMemoryAddress(*miAtomic); + EXPECT_EQ(miAtomicAddress, miAtomicProgrammedAddress); + EXPECT_TRUE(miAtomic->getReturnDataControl()); + EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); + parsedOffset += sizeof(WalkerPartition::MI_ATOMIC); + + auto loadRegisterReg = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, loadRegisterReg); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableDestination()); + EXPECT_TRUE(loadRegisterReg->getMmioRemapEnableSource()); + EXPECT_EQ(wparidCCSOffset, loadRegisterReg->getDestinationRegisterAddress()); + EXPECT_EQ(generalPurposeRegister4, loadRegisterReg->getSourceRegisterAddress()); + parsedOffset += sizeof(WalkerPartition::LOAD_REGISTER_REG); + + auto miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + auto batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_TRUE(batchBufferStart->getPredicationEnable()); + //address routes to WALKER section which is before control section + auto address = batchBufferStart->getBatchBufferStartAddress(); + EXPECT_EQ(address, gpuVirtualAddress + expectedCommandUsedSize - walkerSectionCommands); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + miSetPredicate = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, miSetPredicate); + EXPECT_EQ(miSetPredicate->getPredicateEnableWparid(), MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_NEVER); + EXPECT_EQ(miSetPredicate->getPredicateEnable(), MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + parsedOffset += sizeof(WalkerPartition::MI_SET_PREDICATE); + + //final batch buffer start that routes at the end of the batch buffer + auto batchBufferStartFinal = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStartFinal); + EXPECT_EQ(batchBufferStartFinal->getBatchBufferStartAddress(), gpuVirtualAddress + cleanupSectionOffset); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto computeWalker = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + EXPECT_NE(nullptr, computeWalker); + parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER); + + batchBufferStart = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); + ASSERT_NE(nullptr, batchBufferStart); + EXPECT_FALSE(batchBufferStart->getPredicationEnable()); + EXPECT_EQ(gpuVirtualAddress, batchBufferStart->getBatchBufferStartAddress()); + parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START); + + auto controlSection = reinterpret_cast(ptrOffset(cmdBuffer, expectedCommandUsedSize)); + EXPECT_EQ(0u, controlSection->partitionCount); + EXPECT_EQ(0u, controlSection->tileCount); + EXPECT_EQ(0u, controlSection->inTileCount); + EXPECT_EQ(0u, controlSection->finalSyncTileCount); + + parsedOffset += sizeof(BatchBufferControlData); + EXPECT_EQ(parsedOffset, cleanupSectionOffset); +} diff --git a/opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp b/opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp index e18ef56c89..6a6f609188 100644 --- a/opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp +++ b/opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp @@ -14,3 +14,11 @@ using namespace NEO; TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSupportEnabled) { EXPECT_TRUE(ImplicitScaling::apiSupport); } + +TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSemaphoreProgrammingRequiredIsFalse) { + EXPECT_FALSE(ImplicitScaling::semaphoreProgrammingRequired); +} + +TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenCrossTileAtomicSynchronization) { + EXPECT_TRUE(ImplicitScaling::crossTileAtomicSynchronization); +} diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 31da28b0f4..430e55cd0b 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -266,8 +266,8 @@ ForceWorkgroupSize1x1x1 = -1 ForceThreadGroupDispatchSize = -1 ForceStatelessL1CachingPolicy = -1 ForceMemoryBankIndexOverride = -1 -ExperimentalSynchronizeWithSemaphores = -1 -ExperimentalForceCrossAtomicSynchronization = -1 +SynchronizeWithSemaphores = -1 +UseCrossAtomicSynchronization = -1 EnableStatelessCompression = -1 EnableMultiTileCompression = -1 EnablePrivateScratchSlot1 = -1 @@ -313,7 +313,7 @@ OverrideUseKmdWaitFunction = -1 EnableCacheFlushAfterWalkerForAllQueues = -1 Force32BitDriverSupport = -1 OverrideCmdQueueSynchronousMode = -1 -ExperimentalUseAtomicsForNativeSectionCleanup = -1 +UseAtomicsForNativeSectionCleanup = -1 HBMSizePerTileInGigabytes = 0 OverrideSystolicPipelineSelect = -1 OverrideSystolicInComputeWalker = -1 @@ -324,6 +324,9 @@ DoNotFreeResources = 0 OverrideGmmResourceUsageField = -1 LogAllocationType = 0 ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0 +ProgramNativeCleanup = -1 +WparidRegisterProgramming = -1 +UsePipeControlAfterPartitionedWalker = -1 OverrideBufferSuitableForRenderCompression = -1 AllowMixingRegularAndCooperativeKernels = 0 AllowPatchingVfeStateInCommandLists = 0 \ No newline at end of file diff --git a/shared/source/command_container/implicit_scaling.cpp b/shared/source/command_container/implicit_scaling.cpp index 67c2d8bc0d..aa0c065177 100644 --- a/shared/source/command_container/implicit_scaling.cpp +++ b/shared/source/command_container/implicit_scaling.cpp @@ -34,12 +34,55 @@ bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() { return synchronizeBeforeExecution; } +bool ImplicitScalingHelper::isSemaphoreProgrammingRequired() { + auto semaphoreProgrammingRequired = ImplicitScaling::semaphoreProgrammingRequired; + if (NEO::DebugManager.flags.SynchronizeWithSemaphores.get() == 1) { + semaphoreProgrammingRequired = true; + } + return semaphoreProgrammingRequired; +} + +bool ImplicitScalingHelper::isCrossTileAtomicRequired() { + auto crossTileAtomicSynchronization = ImplicitScaling::crossTileAtomicSynchronization; + if (NEO::DebugManager.flags.UseCrossAtomicSynchronization.get() == 0) { + crossTileAtomicSynchronization = false; + } + return crossTileAtomicSynchronization; +} + bool ImplicitScalingHelper::useAtomicsForNativeCleanup() { bool useAtomics = false; - int overrideUseAtomics = DebugManager.flags.ExperimentalUseAtomicsForNativeSectionCleanup.get(); + int overrideUseAtomics = DebugManager.flags.UseAtomicsForNativeSectionCleanup.get(); if (overrideUseAtomics != -1) { useAtomics = !!(overrideUseAtomics); } return useAtomics; } + +bool ImplicitScalingHelper::programNativeCleanup(bool defaultNativeCleanup) { + int overrideProgramNativeCleanup = DebugManager.flags.ProgramNativeCleanup.get(); + if (overrideProgramNativeCleanup != -1) { + defaultNativeCleanup = !!(overrideProgramNativeCleanup); + } + return defaultNativeCleanup; +} + +bool ImplicitScalingHelper::initWparidRegister() { + bool initWparidRegister = true; + int overrideInitWparidRegister = DebugManager.flags.WparidRegisterProgramming.get(); + if (overrideInitWparidRegister != -1) { + initWparidRegister = !!(overrideInitWparidRegister); + } + return initWparidRegister; +} + +bool ImplicitScalingHelper::usePipeControl() { + bool usePipeControl = true; + int overrideUsePipeControl = DebugManager.flags.UsePipeControlAfterPartitionedWalker.get(); + if (overrideUsePipeControl != -1) { + usePipeControl = !!(overrideUsePipeControl); + } + return usePipeControl; +} + } // namespace NEO diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index d6077b4af4..20ce9e9a01 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -15,12 +15,22 @@ class LinearStream; namespace ImplicitScaling { extern bool apiSupport; -} +extern bool semaphoreProgrammingRequired; +extern bool crossTileAtomicSynchronization; + +constexpr uint32_t partitionAddressOffsetDwords = 2u; +constexpr uint32_t partitionAddressOffset = sizeof(uint32_t) * partitionAddressOffsetDwords; +} // namespace ImplicitScaling struct ImplicitScalingHelper { static bool isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition); + static bool isSemaphoreProgrammingRequired(); + static bool isCrossTileAtomicRequired(); static bool isSynchronizeBeforeExecutionRequired(); static bool useAtomicsForNativeCleanup(); + static bool programNativeCleanup(bool defaultNativeCleanup); + static bool initWparidRegister(); + static bool usePipeControl(); }; template diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 6feca1c3f7..8942741573 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -20,6 +20,7 @@ size_t ImplicitScalingDispatch::getSize(bool nativeCrossTileAtomicSyn typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{}; bool staticPartitioning = false; const uint32_t tileCount = static_cast(devices.count()); + const uint32_t partitionCount = WalkerPartition::computePartitionCountAndPartitionType(tileCount, preferStaticPartitioning, groupStart, @@ -28,15 +29,21 @@ size_t ImplicitScalingDispatch::getSize(bool nativeCrossTileAtomicSyn &partitionType, &staticPartitioning); UNRECOVERABLE_IF(staticPartitioning && (tileCount != partitionCount)); + WalkerPartition::WalkerPartitionArgs args = {}; - auto synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); - const bool useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); - return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer(false, - 16u, - synchronizeBeforeExecution, - nativeCrossTileAtomicSync, - staticPartitioning, - useAtomicsForNativeCleanup)); + args.partitionCount = partitionCount; + args.tileCount = tileCount; + args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); + args.useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); + args.nativeCrossTileAtomicSync = ImplicitScalingHelper::programNativeCleanup(nativeCrossTileAtomicSync); + args.initializeWparidRegister = ImplicitScalingHelper::initWparidRegister(); + args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(); + args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired(); + args.usePipeControlStall = ImplicitScalingHelper::usePipeControl(); + args.emitBatchBufferEnd = false; + args.staticPartitioning = staticPartitioning; + + return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer(args)); } template @@ -54,36 +61,43 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS bool staticPartitioning = false; partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType(&walkerCmd, tileCount, preferStaticPartitioning, usesImages, &staticPartitioning); - const bool synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); - const bool useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); + + WalkerPartition::WalkerPartitionArgs args = {}; + args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa; + args.partitionCount = partitionCount; + args.tileCount = tileCount; + args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); + args.useAtomicsForNativeCleanup = ImplicitScalingHelper::useAtomicsForNativeCleanup(); + args.nativeCrossTileAtomicSync = ImplicitScalingHelper::programNativeCleanup(nativeCrossTileAtomicSync); + args.initializeWparidRegister = ImplicitScalingHelper::initWparidRegister(); + args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(); + args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired(); + args.usePipeControlStall = ImplicitScalingHelper::usePipeControl(); + args.emitBatchBufferEnd = false; + args.secondaryBatchBuffer = useSecondaryBatchBuffer; + args.staticPartitioning = staticPartitioning; + if (staticPartitioning) { UNRECOVERABLE_IF(tileCount != partitionCount); WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandStream.getSpace(0u), commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), &walkerCmd, totalProgrammedSize, - partitionCount, - tileCount, - synchronizeBeforeExecution, - useSecondaryBatchBuffer, - nativeCrossTileAtomicSync, - workPartitionAllocationGpuVa, - useAtomicsForNativeCleanup); + args); } else { if (DebugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { partitionCount = DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); if (partitionCount == 1u) { walkerCmd.setPartitionType(GfxFamily::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); } + args.partitionCount = partitionCount; } WalkerPartition::constructDynamicallyPartitionedCommandBuffer(commandStream.getSpace(0u), commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), - &walkerCmd, totalProgrammedSize, - partitionCount, tileCount, - false, synchronizeBeforeExecution, useSecondaryBatchBuffer, - nativeCrossTileAtomicSync, - useAtomicsForNativeCleanup); + &walkerCmd, + totalProgrammedSize, + args); } commandStream.getSpace(totalProgrammedSize); } diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 206e1d3c16..0e2911e0cd 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -18,6 +18,22 @@ namespace WalkerPartition { +struct WalkerPartitionArgs { + uint64_t workPartitionAllocationGpuVa = 0; + uint32_t partitionCount = 0; + uint32_t tileCount = 0; + bool emitBatchBufferEnd = false; + bool secondaryBatchBuffer = false; + bool synchronizeBeforeExecution = false; + bool crossTileAtomicSynchronization = false; + bool semaphoreProgrammingRequired = false; + bool staticPartitioning = false; + bool nativeCrossTileAtomicSync = false; + bool useAtomicsForNativeCleanup = false; + bool initializeWparidRegister = false; + bool usePipeControlStall = false; +}; + template using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; template @@ -73,22 +89,6 @@ Command *putCommand(void *&inputAddress, uint32_t &totalBytesProgrammed) { return commandToReturn; } -bool inline isSemaphoreProgrammingRequired() { - auto semaphoreProgrammingRequired = false; - if (NEO::DebugManager.flags.ExperimentalSynchronizeWithSemaphores.get() == 1) { - semaphoreProgrammingRequired = true; - } - return semaphoreProgrammingRequired; -} - -bool inline isCrossTileAtomicRequired() { - auto crossTileAtomicSynchronization = true; - if (NEO::DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.get() == 0) { - crossTileAtomicSynchronization = false; - } - return crossTileAtomicSynchronization; -} - template uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitionCount, bool preferStaticPartitioning, @@ -349,25 +349,55 @@ void programStoreMemImmediateDword(void *&inputAddress, uint32_t &totalBytesProg *storeDataImmediate = cmd; } +template +uint64_t computeNativeCrossTileSyncControlSectionSize(bool useAtomicsForNativeCleanup) { + if (useAtomicsForNativeCleanup) { + return sizeof(MI_ATOMIC); + } else { + return sizeof(MI_STORE_DATA_IMM); + } +} + template void programNativeCrossTileSyncControl(void *&inputAddress, uint32_t &totalBytesProgrammed, - uint64_t finalSyncTileCountField, + uint64_t address, bool useAtomicsForNativeCleanup) { if (useAtomicsForNativeCleanup) { programMiAtomic(inputAddress, totalBytesProgrammed, - finalSyncTileCountField, + address, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE); } else { programStoreMemImmediateDword(inputAddress, totalBytesProgrammed, - finalSyncTileCountField, + address, 0u); } } +template +uint64_t computeTilesSynchronizationWithAtomicsSectionSize() { + return sizeof(MI_ATOMIC) + + sizeof(MI_SEMAPHORE_WAIT); +} + +template +void programTilesSynchronizationWithAtomics(void *¤tBatchBufferPointer, + uint32_t &totalBytesProgrammed, + uint64_t atomicAddress, + uint32_t tileCount) { + programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); +} + +template +uint64_t computeNativeCrossTileSyncCleanupSectionSize(size_t fieldsForCleanupCount, bool useAtomicsForNativeCleanup) { + return fieldsForCleanupCount * computeNativeCrossTileSyncControlSectionSize(useAtomicsForNativeCleanup) + + 2 * computeTilesSynchronizationWithAtomicsSectionSize(); +} + template void programNativeCrossTileSyncCleanup(void *&inputAddress, uint32_t &totalBytesProgrammed, @@ -377,28 +407,18 @@ void programNativeCrossTileSyncCleanup(void *&inputAddress, uint32_t tileCount, bool useAtomicsForNativeCleanup) { // Synchronize tiles, so the fields are not cleared while still in use - programMiAtomic(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - programWaitForSemaphore(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + programTilesSynchronizationWithAtomics(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount); for (auto fieldIndex = 0u; fieldIndex < fieldsForCleanupCount; fieldIndex++) { const uint64_t addressForCleanup = baseAddressForCleanup + fieldIndex * sizeof(uint32_t); - if (useAtomicsForNativeCleanup) { - programMiAtomic(inputAddress, - totalBytesProgrammed, - addressForCleanup, - false, - MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE); - } else { - programStoreMemImmediateDword(inputAddress, + programNativeCrossTileSyncControl(inputAddress, totalBytesProgrammed, addressForCleanup, - 0u); - } + useAtomicsForNativeCleanup); } //this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables - programMiAtomic(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - programWaitForSemaphore(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, 2 * tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + programTilesSynchronizationWithAtomics(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, 2 * tileCount); } template @@ -412,15 +432,6 @@ void programTilesSynchronizationWithPostSyncs(void *¤tBatchBufferPointer, } } -template -void programTilesSynchronizationWithAtomics(void *¤tBatchBufferPointer, - uint32_t &totalBytesProgrammed, - uint64_t atomicAddress, - uint32_t tileCount) { - programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); -} - template uint64_t computeWalkerSectionSize() { return sizeof(BATCH_BUFFER_START) + @@ -428,46 +439,30 @@ uint64_t computeWalkerSectionSize() { } template -uint64_t computeNativeCrossTileSyncControlSectionSize(bool useAtomicsForNativeCleanup) { - if (useAtomicsForNativeCleanup) { - return sizeof(MI_ATOMIC); - } else { - return sizeof(MI_STORE_DATA_IMM); +uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) { + uint64_t size = 0u; + + size += args.synchronizeBeforeExecution ? computeTilesSynchronizationWithAtomicsSectionSize() : 0; + size += sizeof(LOAD_REGISTER_IMM); //predication mask + size += sizeof(MI_ATOMIC); //current id for partition + size += sizeof(LOAD_REGISTER_REG); //id into register + size += sizeof(MI_SET_PREDICATE) * 2 + + sizeof(BATCH_BUFFER_START) * 2; + size += (args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT) * args.partitionCount : 0u); + size += computeWalkerSectionSize(); + size += args.usePipeControlStall ? sizeof(PIPE_CONTROL) : 0u; + if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { + size += computeTilesSynchronizationWithAtomicsSectionSize(); } -} - -template -uint64_t computeNativeCrossTileSyncCleanupSectionSize(size_t fieldsForCleanupCount, bool useAtomicsForNativeCleanup) { - return fieldsForCleanupCount * computeNativeCrossTileSyncControlSectionSize(useAtomicsForNativeCleanup) + - 2 * sizeof(MI_ATOMIC) + - 2 * sizeof(MI_SEMAPHORE_WAIT); -} - -template -uint64_t computeControlSectionOffset(uint32_t partitionCount, bool synchronizeBeforeExecution, bool nativeCrossTileAtomicSync, bool useAtomicsForNativeCleanup) { - auto synchronizationCount = (synchronizeBeforeExecution) ? 2u : 1u; - if (!isCrossTileAtomicRequired() && !nativeCrossTileAtomicSync) { - synchronizationCount--; + if (args.nativeCrossTileAtomicSync) { + size += computeNativeCrossTileSyncControlSectionSize(args.useAtomicsForNativeCleanup); } - - return sizeof(LOAD_REGISTER_IMM) + - sizeof(MI_ATOMIC) * (1u + synchronizationCount) + - sizeof(LOAD_REGISTER_REG) + - sizeof(MI_SET_PREDICATE) * 2 + - sizeof(BATCH_BUFFER_START) * 2 + - sizeof(PIPE_CONTROL) + - sizeof(MI_SEMAPHORE_WAIT) * synchronizationCount + - (isSemaphoreProgrammingRequired() ? sizeof(MI_SEMAPHORE_WAIT) * partitionCount : 0u) + - computeWalkerSectionSize() + - (nativeCrossTileAtomicSync ? computeNativeCrossTileSyncControlSectionSize(useAtomicsForNativeCleanup) : 0u); + return size; } template -uint64_t computeWalkerSectionStart(uint32_t partitionCount, - bool synchronizeBeforeExecution, - bool nativeCrossTileAtomicSync, - bool useAtomicsForNativeCleanup) { - return computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync, useAtomicsForNativeCleanup) - +uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) { + return computeControlSectionOffset(args) - computeWalkerSectionSize(); } @@ -537,26 +532,17 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, - uint32_t partitionCount, - uint32_t tileCount, - bool emitBatchBufferEnd, - bool synchronizeBeforeExecution, - bool secondaryBatchBuffer, - bool nativeCrossTileAtomicSync, - bool useAtomicsForNativeCleanup) { + WalkerPartitionArgs &args) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; - auto controlSectionOffset = computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync, useAtomicsForNativeCleanup); - if (synchronizeBeforeExecution) { + auto controlSectionOffset = computeControlSectionOffset(args); + if (args.synchronizeBeforeExecution) { auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, inTileCount); - programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - - //if all tiles hit the atomic, it means we may go further - programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); } - programWparidMask(currentBatchBufferPointer, totalBytesProgrammed, partitionCount); + programWparidMask(currentBatchBufferPointer, totalBytesProgrammed, args.partitionCount); programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, @@ -573,36 +559,32 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + - computeWalkerSectionStart(partitionCount, - synchronizeBeforeExecution, - nativeCrossTileAtomicSync, - useAtomicsForNativeCleanup), + computeWalkerSectionStart(args), true, - secondaryBatchBuffer); + args.secondaryBatchBuffer); //disable predication to not noop subsequent commands. programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, false); - if (nativeCrossTileAtomicSync) { + if (args.nativeCrossTileAtomicSync) { const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); - programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, useAtomicsForNativeCleanup); + programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForNativeCleanup); } - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); + if (args.usePipeControlStall) { + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); + } - if (isSemaphoreProgrammingRequired()) { + if (args.semaphoreProgrammingRequired) { auto postSyncAddress = inputWalker->getPostSync().getDestinationAddress() + 8llu; - for (uint32_t partitionId = 0u; partitionId < partitionCount; partitionId++) { + for (uint32_t partitionId = 0u; partitionId < args.partitionCount; partitionId++) { programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, postSyncAddress + partitionId * 16llu, 1u, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } } - if (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) { + if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, tileCount); - programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - - //if all tiles hit the atomic, it means we may go further - programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); } //this bb start goes to the end of partitioned command buffer @@ -611,12 +593,12 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, totalBytesProgrammed, gpuAddressOfAllocation + controlSectionOffset + sizeof(BatchBufferControlData), false, - secondaryBatchBuffer); + args.secondaryBatchBuffer); //Walker section - programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); - programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, secondaryBatchBuffer); + programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, args.secondaryBatchBuffer); auto controlSection = reinterpret_cast(ptrOffset(cpuPointer, static_cast(controlSectionOffset))); controlSection->partitionCount = 0u; @@ -626,18 +608,18 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, totalBytesProgrammed += sizeof(BatchBufferControlData); currentBatchBufferPointer = ptrOffset(currentBatchBufferPointer, sizeof(BatchBufferControlData)); - if (nativeCrossTileAtomicSync) { + if (args.nativeCrossTileAtomicSync) { const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); programNativeCrossTileSyncCleanup(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountAddress, gpuAddressOfAllocation + controlSectionOffset, dynamicPartitioningFieldsForCleanupCount, - tileCount, - useAtomicsForNativeCleanup); + args.tileCount, + args.useAtomicsForNativeCleanup); } - if (emitBatchBufferEnd) { + if (args.emitBatchBufferEnd) { auto batchBufferEnd = putCommand>(currentBatchBufferPointer, totalBytesProgrammed); *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; } @@ -651,14 +633,28 @@ struct StaticPartitioningControlSection { static constexpr inline size_t staticPartitioningFieldsForCleanupCount = sizeof(StaticPartitioningControlSection) / sizeof(uint32_t) - 1; template -uint64_t computeStaticPartitioningControlSectionOffset(uint32_t partitionCount, bool synchronizeBeforeExecution, bool nativeCrossTileAtomicSync, bool useAtomicsForNativeCleanup) { - const auto beforeExecutionSyncAtomicSize = synchronizeBeforeExecution ? (sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_ATOMIC)) : 0u; - const auto afterExecutionSyncAtomicSize = (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) ? (sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_ATOMIC)) : 0u; - const auto afterExecutionSyncPostSyncSize = isSemaphoreProgrammingRequired() ? sizeof(MI_SEMAPHORE_WAIT) * partitionCount : 0u; - const auto nativeCrossTileSyncSize = nativeCrossTileAtomicSync ? computeNativeCrossTileSyncControlSectionSize(useAtomicsForNativeCleanup) : 0u; +uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args) { + const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution + ? computeTilesSynchronizationWithAtomicsSectionSize() + : 0u; + const auto afterExecutionSyncAtomicSize = (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) + ? computeTilesSynchronizationWithAtomicsSectionSize() + : 0u; + const auto afterExecutionSyncPostSyncSize = args.semaphoreProgrammingRequired + ? sizeof(MI_SEMAPHORE_WAIT) * args.partitionCount + : 0u; + const auto nativeCrossTileSyncSize = args.nativeCrossTileAtomicSync + ? computeNativeCrossTileSyncControlSectionSize(args.useAtomicsForNativeCleanup) + : 0u; + const auto wparidRegisterSize = args.initializeWparidRegister + ? sizeof(LOAD_REGISTER_MEM) + : 0u; + const auto pipeControlSize = args.usePipeControlStall + ? sizeof(PIPE_CONTROL) + : 0u; return beforeExecutionSyncAtomicSize + - sizeof(LOAD_REGISTER_MEM) + - sizeof(PIPE_CONTROL) + + wparidRegisterSize + + pipeControlSize + sizeof(COMPUTE_WALKER) + nativeCrossTileSyncSize + afterExecutionSyncAtomicSize + @@ -671,49 +667,48 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, - uint32_t partitionCount, - uint32_t tileCount, - bool synchronizeBeforeExecution, - bool secondaryBatchBuffer, - bool nativeCrossTileAtomicSync, - uint64_t workPartitionAllocationGpuVa, - bool useAtomicsForNativeCleanup) { + WalkerPartitionArgs &args) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; // Get address of the control section - const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync, useAtomicsForNativeCleanup); + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(args); const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection); // Synchronize tiles before walker - if (synchronizeBeforeExecution) { + if (args.synchronizeBeforeExecution) { const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); - programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount); } // Load partition ID to wparid register and execute walker - programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, workPartitionAllocationGpuVa, wparidCCSOffset); - programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + if (args.initializeWparidRegister) { + programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset); + } + programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); // Prepare for cleanup section - if (nativeCrossTileAtomicSync) { + if (args.nativeCrossTileAtomicSync) { const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); - programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, useAtomicsForNativeCleanup); + programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForNativeCleanup); } - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache + if (args.usePipeControlStall) { + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache + } // Synchronize tiles after walker - if (isSemaphoreProgrammingRequired()) { - programTilesSynchronizationWithPostSyncs(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + if (args.semaphoreProgrammingRequired) { + programTilesSynchronizationWithPostSyncs(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); } - if (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) { + + if (args.crossTileAtomicSynchronization || args.nativeCrossTileAtomicSync) { const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); - programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount); } // Jump over the control section - programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, secondaryBatchBuffer); + programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, args.secondaryBatchBuffer); // Control section DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset); @@ -724,35 +719,31 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset); // Cleanup section - if (nativeCrossTileAtomicSync) { + if (args.nativeCrossTileAtomicSync) { const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); programNativeCrossTileSyncCleanup(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountAddress, gpuAddressOfAllocation + controlSectionOffset, staticPartitioningFieldsForCleanupCount, - tileCount, - useAtomicsForNativeCleanup); + args.tileCount, + args.useAtomicsForNativeCleanup); } } template -uint64_t estimateSpaceRequiredInCommandBuffer(bool requiresBatchBufferEnd, - uint32_t partitionCount, - bool synchronizeBeforeExecution, - bool nativeCrossTileAtomicSync, - bool staticPartitioning, - bool useAtomicsForNativeCleanup) { +uint64_t estimateSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args) { + uint64_t size = {}; - if (staticPartitioning) { - size += computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync, useAtomicsForNativeCleanup); + if (args.staticPartitioning) { + size += computeStaticPartitioningControlSectionOffset(args); size += sizeof(StaticPartitioningControlSection); - size += nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(staticPartitioningFieldsForCleanupCount, useAtomicsForNativeCleanup) : 0u; + size += args.nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(staticPartitioningFieldsForCleanupCount, args.useAtomicsForNativeCleanup) : 0u; } else { - size += computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync, useAtomicsForNativeCleanup); + size += computeControlSectionOffset(args); size += sizeof(BatchBufferControlData); - size += requiresBatchBufferEnd ? sizeof(BATCH_BUFFER_END) : 0u; - size += nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(dynamicPartitioningFieldsForCleanupCount, useAtomicsForNativeCleanup) : 0u; + size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END) : 0u; + size += args.nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(dynamicPartitioningFieldsForCleanupCount, args.useAtomicsForNativeCleanup) : 0u; } return size; } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index c81b6a20d2..39f12e609c 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -123,15 +123,11 @@ DECLARE_DEBUG_VARIABLE(int32_t, CFENumberOfWalkers, -1, "Set Number of Walkers i DECLARE_DEBUG_VARIABLE(int32_t, CFEMaximumNumberOfThreads, -1, "Set Maximum Number of Threads in CFE_STATE on XEHP, -1 - do not set") DECLARE_DEBUG_VARIABLE(int32_t, CFEOverDispatchControl, -1, "Set Over Dispatch Control in CFE_STATE on XEHP, -1 - do not set") DECLARE_DEBUG_VARIABLE(int32_t, CFELargeGRFThreadAdjustDisable, -1, "Set Large GRF thread adjust Disable field in CFE_STATE, -1 - do not set") -DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWalkerInWparidMode, -1, "-1: default, 0: do not synchronize 1: synchronize all tiles prior to doing work distrubution") -DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumComputeUnitsForScratch, -1, "Override number of compute units used for scratch size calculation") DECLARE_DEBUG_VARIABLE(int32_t, ForceWorkgroupSize1x1x1, -1, "-1: default, 0: disable, 1: enable, force workgroup size 1x1x1 in builtins") DECLARE_DEBUG_VARIABLE(int32_t, ForceThreadGroupDispatchSize, -1, "Set ThreadGroupDispatchSize in INTERFACE_DESCRIPTOR_DATA, -1 - default, 0 - TG size 8, 1 - TG size 4, 2 - TG size 2, 3 - Reserved") DECLARE_DEBUG_VARIABLE(int32_t, ForceStatelessL1CachingPolicy, -1, "-1: default, >=0 : program value for stateless L1 caching") DECLARE_DEBUG_VARIABLE(int32_t, ForceMemoryBankIndexOverride, -1, "-1: default, 0: disable, 1:enable, Force index=1 of memory bank for XEHP") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSynchronizeWithSemaphores, -1, "Experimental implementation: 1: Emit Semaphores waiting after Walker completion in WPARID mode 0: do not emit semaphores after Walker") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalForceCrossAtomicSynchronization, -1, "Experimental implementation: 1: Cross Tile Atomic Synchronization present 0: Cross tile atomic synchronization disabled") DECLARE_DEBUG_VARIABLE(int32_t, EnablePrivateScratchSlot1, -1, "-1: default, 0: disable, 1: enable Allows using private scratch space") DECLARE_DEBUG_VARIABLE(int32_t, DisablePipeControlPrecedingPostSyncCommand, -1, "-1 default - disabled adding PIPE_CONTROL, 0 - disabled adding PIPE_CONTROL, 1 - enabled adding PIPE_CONTROL") DECLARE_DEBUG_VARIABLE(int32_t, UseCachingPolicyForIndirectObjectHeap, -1, "Use selected caching policy for IOH, -1 - default, 0 - Uncached, 1 - L3 Caching, 2 - L1 Caching") @@ -142,13 +138,11 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuPartialWrites, -1, "-1: default - 0 DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuAtomicsInComputeMode, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuAtomics bit in STATE_COMPUTE_MODE, 1: program value 1 in MultiGpuAtomics bit in STATE_COMPUTE_MODE") DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuAtomics, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuAtomics controls 1: program value 1 in MultiGpuAtomics controls") DECLARE_DEBUG_VARIABLE(int32_t, ForceBufferCompressionFormat, -1, "-1: default, >0: Format value") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.") DECLARE_DEBUG_VARIABLE(int32_t, EnableHwGenerationLocalIds, -1, "-1: default, 0: disable, 1: enable : Enables generation of local ids on HW") DECLARE_DEBUG_VARIABLE(int32_t, WalkerPartitionPreferHighestDimension, -1, "-1: default, 0: prefer biggest dimension, 1: prefer Z over Y over X if they divide partition count evenly") DECLARE_DEBUG_VARIABLE(int32_t, SetMinimalPartitionSize, -1, "-1 default value set to 512 workgroups, 0 - disabled, >0 - minimal partition size in workgroups (should be power of 2)") DECLARE_DEBUG_VARIABLE(int32_t, OverrideBlitterTargetMemory, -1, "-1:default 0: overwrites to System 1: overwrites to Local") DECLARE_DEBUG_VARIABLE(int32_t, OverrideBlitterMocs, -1, "-1: default, >=0 SetGivenMocsInBlitterTransfers") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") DECLARE_DEBUG_VARIABLE(int32_t, OverridePostSyncMocs, -1, "-1: default, >=0 Override post sync mocs with value") DECLARE_DEBUG_VARIABLE(int32_t, EnableImmediateVmBindExt, -1, "Use immediate bind extension to a new residency model on Linux (requires kernel support), -1: default (enabled whith direct submission), 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, ForceExecutionTile, -1, "-1: default, 0+: given tile is choosen as submission, must be used with EnableWalkerPartition = 0.") @@ -237,11 +231,21 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Ove DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableMonitorFence, -1, "Disable dispatching monitor fence commands") -DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU") DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable direct submission terminating after given timeout, -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5 ms, >=0: timeout in ms") +/* IMPLICIT SCALING */ +DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.") +DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWalkerInWparidMode, -1, "-1: default, 0: do not synchronize 1: synchronize all tiles prior to doing work distrubution") +DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWithSemaphores, -1, "-1: default (disabled), 1: Emit Semaphores waiting after Walker completion in WPARID mode 0: do not emit semaphores after Walker") +DECLARE_DEBUG_VARIABLE(int32_t, UseCrossAtomicSynchronization, -1, "-1: default (enabled), 1: Cross Tile Atomic Synchronization present 0: Cross tile atomic synchronization disabled") +DECLARE_DEBUG_VARIABLE(int32_t, UseAtomicsForNativeSectionCleanup, -1, "-1: default (disabled), 0: use store data op, 1: use atomic op") +DECLARE_DEBUG_VARIABLE(int32_t, ProgramNativeCleanup, -1, "-1: default (API dependent), 0: Do not program native cleanup, 1: program native cleanup") +DECLARE_DEBUG_VARIABLE(int32_t, WparidRegisterProgramming, -1, "-1: default (enabled), 0: do not program wparid register, 1: programing wparid register") +DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: default (enabled), 0: do not add PipeControl, 1: add PipeControl") + /*FEATURE FLAGS*/ +DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU") DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension") DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension") DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter") @@ -305,8 +309,9 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicPipelineSelect, -1, "set SYSTOLI DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicInComputeWalker, -1, "set SYSTOLIC MODE ENABLE in COMPUTE_WALKER cmd, -1:default, 0:disable, 1:enable") /*EXPERIMENTAL TOGGLES*/ +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUseAtomicsForNativeSectionCleanup, -1, "-1: default (disabled), 0: use store data op, 1: use atomic op") /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index a124e9cae4..cfaf616331 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1026,7 +1026,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp uint32_t expectedPartitionSize = (dims[0] + partitionCount - 1u) / partitionCount; EXPECT_EQ(expectedPartitionSize, partitionWalkerCmd->getPartitionSize()); - auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset(partitionCount, false, true, false); + WalkerPartition::WalkerPartitionArgs args = {}; + args.initializeWparidRegister = true; + args.usePipeControlStall = true; + args.partitionCount = partitionCount; + args.nativeCrossTileAtomicSync = true; + + auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset(args); uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + cleanupSectionOffset; constexpr uint32_t expectedData = 0ull; diff --git a/shared/test/unit_test/encoders/test_implicit_scaling.cpp b/shared/test/unit_test/encoders/test_implicit_scaling.cpp index 69d5dcf016..d54ffbf5e6 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling.cpp @@ -51,11 +51,85 @@ TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingAtomicsForNativeCle } TEST_F(ImplicitScalingTests, givenForceNotUseAtomicsWhenCheckingAtomicsForNativeCleanupThenExpectFalse) { - DebugManager.flags.ExperimentalUseAtomicsForNativeSectionCleanup.set(0); + DebugManager.flags.UseAtomicsForNativeSectionCleanup.set(0); EXPECT_FALSE(ImplicitScalingHelper::useAtomicsForNativeCleanup()); } TEST_F(ImplicitScalingTests, givenForceUseAtomicsWhenCheckingAtomicsForNativeCleanupThenExpectTrue) { - DebugManager.flags.ExperimentalUseAtomicsForNativeSectionCleanup.set(1); + DebugManager.flags.UseAtomicsForNativeSectionCleanup.set(1); EXPECT_TRUE(ImplicitScalingHelper::useAtomicsForNativeCleanup()); } + +TEST_F(ImplicitScalingTests, givenDefaultSettingsIsFalseWhenCheckingProgramNativeCleanupThenExpectFalse) { + EXPECT_FALSE(ImplicitScalingHelper::programNativeCleanup(false)); +} + +TEST_F(ImplicitScalingTests, givenDefaultSettingsIsTrueWhenCheckingProgramNativeCleanupThenExpectTrue) { + EXPECT_TRUE(ImplicitScalingHelper::programNativeCleanup(true)); +} + +TEST_F(ImplicitScalingTests, givenForceNotProgramNativeCleanupWhenDefaultNativeCleanupIsTrueThenExpectFalse) { + DebugManager.flags.ProgramNativeCleanup.set(0); + EXPECT_FALSE(ImplicitScalingHelper::programNativeCleanup(true)); +} + +TEST_F(ImplicitScalingTests, givenForceProgramNativeCleanupWhenDefaultNativeCleanupIsFalseThenExpectTrue) { + DebugManager.flags.ProgramNativeCleanup.set(1); + EXPECT_TRUE(ImplicitScalingHelper::programNativeCleanup(false)); +} + +TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToProgramWparidRegisterThenExpectTrue) { + EXPECT_TRUE(ImplicitScalingHelper::initWparidRegister()); +} + +TEST_F(ImplicitScalingTests, givenForceNotProgramWparidRegisterWhenCheckingRegisterProgramThenExpectFalse) { + DebugManager.flags.WparidRegisterProgramming.set(0); + EXPECT_FALSE(ImplicitScalingHelper::initWparidRegister()); +} + +TEST_F(ImplicitScalingTests, givenForceProgramWparidRegisterWhenCheckingRegisterProgramThenExpectTrue) { + DebugManager.flags.WparidRegisterProgramming.set(1); + EXPECT_TRUE(ImplicitScalingHelper::initWparidRegister()); +} + +TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingToUsePipeControlThenExpectTrue) { + EXPECT_TRUE(ImplicitScalingHelper::usePipeControl()); +} + +TEST_F(ImplicitScalingTests, givenForceNotUsePipeControlWhenCheckingPipeControlUseThenExpectFalse) { + DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(0); + EXPECT_FALSE(ImplicitScalingHelper::usePipeControl()); +} + +TEST_F(ImplicitScalingTests, givenForceUsePipeControlWhenCheckingPipeControlUseThenExpectTrue) { + DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1); + EXPECT_TRUE(ImplicitScalingHelper::usePipeControl()); +} + +TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingSemaphoreUseThenExpectFalse) { + EXPECT_FALSE(ImplicitScalingHelper::isSemaphoreProgrammingRequired()); +} + +TEST_F(ImplicitScalingTests, givenForceSemaphoreNotUseWhenCheckingSemaphoreUseThenExpectFalse) { + DebugManager.flags.SynchronizeWithSemaphores.set(0); + EXPECT_FALSE(ImplicitScalingHelper::isSemaphoreProgrammingRequired()); +} + +TEST_F(ImplicitScalingTests, givenForceSemaphoreUseWhenCheckingSemaphoreUseThenExpectTrue) { + DebugManager.flags.SynchronizeWithSemaphores.set(1); + EXPECT_TRUE(ImplicitScalingHelper::isSemaphoreProgrammingRequired()); +} + +TEST_F(ImplicitScalingTests, givenDefaultSettingsWhenCheckingCrossTileAtomicSyncThenExpectTrue) { + EXPECT_TRUE(ImplicitScalingHelper::isCrossTileAtomicRequired()); +} + +TEST_F(ImplicitScalingTests, givenForceDisableWhenCheckingCrossTileAtomicSyncThenExpectFalse) { + DebugManager.flags.UseCrossAtomicSynchronization.set(0); + EXPECT_FALSE(ImplicitScalingHelper::isCrossTileAtomicRequired()); +} + +TEST_F(ImplicitScalingTests, givenForceEnableWhenCheckingCrossTileAtomicSyncThenExpectTrue) { + DebugManager.flags.UseCrossAtomicSynchronization.set(1); + EXPECT_TRUE(ImplicitScalingHelper::isCrossTileAtomicRequired()); +} diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index 6211f6b209..98de45c13a 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -232,3 +232,111 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer auto itorLrm = find(loadRegisterMemList.begin(), loadRegisterMemList.end()); ASSERT_NE(itorLrm, loadRegisterMemList.end()); } + +HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPreferredWhenForceDisabledWparidRegisterThenExpectNoCommandFound) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; + + DebugManager.flags.WparidRegisterProgramming.set(0); + + uint64_t workPartitionAllocationAddress = 0x987654; + uint64_t postSyncAddress = (1ull << 48) | (1ull << 24); + + WALKER_TYPE walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + + size_t expectedSize = 0; + size_t totalBytesProgrammed = 0; + + expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); + + uint32_t partitionCount = 0; + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + totalBytesProgrammed = commandStream.getUsed(); + EXPECT_EQ(expectedSize, totalBytesProgrammed); + EXPECT_EQ(twoTile.count(), partitionCount); + + HardwareParse hwParser; + hwParser.parseCommands(commandStream, 0); + + GenCmdList loadRegisterMemList = hwParser.getCommandsList(); + auto itorLrm = find(loadRegisterMemList.begin(), loadRegisterMemList.end()); + EXPECT_EQ(itorLrm, loadRegisterMemList.end()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPreferredWhenForceDisabledPipeControlThenExpectNoCommandFound) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(0); + + uint64_t workPartitionAllocationAddress = 0x987654; + uint64_t postSyncAddress = (1ull << 48) | (1ull << 24); + + WALKER_TYPE walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(1); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + + size_t expectedSize = 0; + size_t totalBytesProgrammed = 0; + + expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); + + uint32_t partitionCount = 0; + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + totalBytesProgrammed = commandStream.getUsed(); + EXPECT_EQ(expectedSize, totalBytesProgrammed); + EXPECT_EQ(twoTile.count(), partitionCount); + + HardwareParse hwParser; + hwParser.parseCommands(commandStream, 0); + + GenCmdList pipeControlList = hwParser.getCommandsList(); + auto itorPipeControl = find(pipeControlList.begin(), pipeControlList.end()); + EXPECT_EQ(itorPipeControl, pipeControlList.end()); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPreferredWhenForceDisabledPipeControlThenExpectNoCommandFound) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(0); + + uint64_t workPartitionAllocationAddress = 0x0; + uint64_t postSyncAddress = (1ull << 48) | (1ull << 24); + + WALKER_TYPE walker = FamilyType::cmdInitGpgpuWalker; + walker.setThreadGroupIdXDimension(32); + auto &postSync = walker.getPostSync(); + postSync.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + postSync.setDestinationAddress(postSyncAddress); + + size_t expectedSize = 0; + size_t totalBytesProgrammed = 0; + + expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); + + uint32_t partitionCount = 0; + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + totalBytesProgrammed = commandStream.getUsed(); + EXPECT_EQ(expectedSize, totalBytesProgrammed); + EXPECT_EQ(twoTile.count(), partitionCount); + + HardwareParse hwParser; + hwParser.parseCommands(commandStream, 0); + + GenCmdList pipeControlList = hwParser.getCommandsList(); + auto itorPipeControl = find(pipeControlList.begin(), pipeControlList.end()); + EXPECT_EQ(itorPipeControl, pipeControlList.end()); +}