mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 14:33:04 +08:00
Refactor and modularize walker partition code
Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e82c2e4653
commit
b65d8909e4
@@ -88,7 +88,6 @@ set(IGDRCL_SRCS_tests_command_queue
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/read_write_buffer_cpu_copy.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sync_buffer_handler_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/work_group_size_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/zero_size_enqueue_tests.cpp
|
||||
)
|
||||
@@ -98,7 +97,10 @@ if(TESTS_XEHP_AND_LATER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_fixture_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_fixture_xehp_and_later.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later_1.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_and_later_2.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -1072,8 +1072,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart
|
||||
MockClDevice *device = deviceFactory.rootDevices[0];
|
||||
MockContext context{device};
|
||||
|
||||
auto synchronizeBeforeExecution = false;
|
||||
auto staticPartitioning = false;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, device, nullptr);
|
||||
auto &csr = cmdQ->getUltCommandStreamReceiver();
|
||||
|
||||
@@ -1087,35 +1085,39 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart
|
||||
DispatchInfo dispatchInfo{};
|
||||
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
|
||||
|
||||
synchronizeBeforeExecution = false;
|
||||
WalkerPartition::WalkerPartitionArgs testArgs = {};
|
||||
testArgs.initializeWparidRegister = true;
|
||||
testArgs.crossTileAtomicSynchronization = true;
|
||||
testArgs.usePipeControlStall = true;
|
||||
testArgs.partitionCount = 2u;
|
||||
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
|
||||
|
||||
DebugManager.flags.SynchronizeWalkerInWparidMode.set(0);
|
||||
staticPartitioning = false;
|
||||
testArgs.staticPartitioning = false;
|
||||
testArgs.synchronizeBeforeExecution = false;
|
||||
csr.staticWorkPartitioningEnabled = false;
|
||||
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false);
|
||||
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
|
||||
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
|
||||
EXPECT_EQ(returnedSize, partitionSize + baseSize);
|
||||
|
||||
synchronizeBeforeExecution = false;
|
||||
DebugManager.flags.SynchronizeWalkerInWparidMode.set(0);
|
||||
staticPartitioning = true;
|
||||
testArgs.staticPartitioning = true;
|
||||
csr.staticWorkPartitioningEnabled = true;
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false);
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
|
||||
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
|
||||
EXPECT_EQ(returnedSize, partitionSize + baseSize);
|
||||
|
||||
synchronizeBeforeExecution = true;
|
||||
DebugManager.flags.SynchronizeWalkerInWparidMode.set(1);
|
||||
staticPartitioning = false;
|
||||
testArgs.synchronizeBeforeExecution = true;
|
||||
testArgs.staticPartitioning = false;
|
||||
csr.staticWorkPartitioningEnabled = false;
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false);
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
|
||||
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
|
||||
EXPECT_EQ(returnedSize, partitionSize + baseSize);
|
||||
|
||||
synchronizeBeforeExecution = true;
|
||||
DebugManager.flags.SynchronizeWalkerInWparidMode.set(1);
|
||||
staticPartitioning = true;
|
||||
testArgs.synchronizeBeforeExecution = true;
|
||||
testArgs.staticPartitioning = true;
|
||||
csr.staticWorkPartitioningEnabled = true;
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(false, 16u, synchronizeBeforeExecution, false, staticPartitioning, false);
|
||||
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
|
||||
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
|
||||
EXPECT_EQ(returnedSize, partitionSize + baseSize);
|
||||
}
|
||||
@@ -1167,7 +1169,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsMul
|
||||
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
|
||||
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize);
|
||||
|
||||
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(false, 16u, false, false, false, false);
|
||||
WalkerPartition::WalkerPartitionArgs testArgs = {};
|
||||
testArgs.initializeWparidRegister = true;
|
||||
testArgs.usePipeControlStall = true;
|
||||
testArgs.crossTileAtomicSynchronization = true;
|
||||
testArgs.partitionCount = 16u;
|
||||
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
|
||||
|
||||
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
|
||||
|
||||
DispatchInfo dispatchInfo{};
|
||||
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/test/unit_test/command_queue/walker_partition_fixture_xehp_and_later.h"
|
||||
|
||||
void WalkerPartitionTests::SetUp() {
|
||||
cmdBufferAddress = cmdBuffer;
|
||||
|
||||
testArgs.synchronizeBeforeExecution = false;
|
||||
testArgs.nativeCrossTileAtomicSync = false;
|
||||
testArgs.initializeWparidRegister = true;
|
||||
testArgs.usePipeControlStall = true;
|
||||
testArgs.crossTileAtomicSynchronization = true;
|
||||
}
|
||||
|
||||
void WalkerPartitionTests::TearDown() {
|
||||
auto initialCommandBufferPointer = cmdBuffer;
|
||||
if (checkForProperCmdBufferAddressOffset) {
|
||||
EXPECT_EQ(ptrDiff(cmdBufferAddress, initialCommandBufferPointer), totalBytesProgrammed);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using namespace WalkerPartition;
|
||||
|
||||
struct WalkerPartitionTests : public ::testing::Test {
|
||||
void SetUp() override;
|
||||
|
||||
void TearDown() override;
|
||||
|
||||
template <typename GfxFamily>
|
||||
auto createWalker(uint64_t postSyncAddress) {
|
||||
WalkerPartition::COMPUTE_WALKER<GfxFamily> walker;
|
||||
walker = GfxFamily::cmdInitGpgpuWalker;
|
||||
walker.setPartitionType(COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||
auto &postSync = walker.getPostSync();
|
||||
postSync.setOperation(POSTSYNC_DATA<GfxFamily>::OPERATION::OPERATION_WRITE_TIMESTAMP);
|
||||
postSync.setDestinationAddress(postSyncAddress);
|
||||
return walker;
|
||||
}
|
||||
|
||||
char cmdBuffer[4096u];
|
||||
WalkerPartition::WalkerPartitionArgs testArgs = {};
|
||||
void *cmdBufferAddress = nullptr;
|
||||
uint32_t totalBytesProgrammed = 0u;
|
||||
bool checkForProperCmdBufferAddressOffset = true;
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -14,3 +14,11 @@ using namespace NEO;
|
||||
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSupportEnabled) {
|
||||
EXPECT_TRUE(ImplicitScaling::apiSupport);
|
||||
}
|
||||
|
||||
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSemaphoreProgrammingRequiredIsFalse) {
|
||||
EXPECT_FALSE(ImplicitScaling::semaphoreProgrammingRequired);
|
||||
}
|
||||
|
||||
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenCrossTileAtomicSynchronization) {
|
||||
EXPECT_TRUE(ImplicitScaling::crossTileAtomicSynchronization);
|
||||
}
|
||||
|
||||
@@ -266,8 +266,8 @@ ForceWorkgroupSize1x1x1 = -1
|
||||
ForceThreadGroupDispatchSize = -1
|
||||
ForceStatelessL1CachingPolicy = -1
|
||||
ForceMemoryBankIndexOverride = -1
|
||||
ExperimentalSynchronizeWithSemaphores = -1
|
||||
ExperimentalForceCrossAtomicSynchronization = -1
|
||||
SynchronizeWithSemaphores = -1
|
||||
UseCrossAtomicSynchronization = -1
|
||||
EnableStatelessCompression = -1
|
||||
EnableMultiTileCompression = -1
|
||||
EnablePrivateScratchSlot1 = -1
|
||||
@@ -313,7 +313,7 @@ OverrideUseKmdWaitFunction = -1
|
||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||
Force32BitDriverSupport = -1
|
||||
OverrideCmdQueueSynchronousMode = -1
|
||||
ExperimentalUseAtomicsForNativeSectionCleanup = -1
|
||||
UseAtomicsForNativeSectionCleanup = -1
|
||||
HBMSizePerTileInGigabytes = 0
|
||||
OverrideSystolicPipelineSelect = -1
|
||||
OverrideSystolicInComputeWalker = -1
|
||||
@@ -324,6 +324,9 @@ DoNotFreeResources = 0
|
||||
OverrideGmmResourceUsageField = -1
|
||||
LogAllocationType = 0
|
||||
ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0
|
||||
ProgramNativeCleanup = -1
|
||||
WparidRegisterProgramming = -1
|
||||
UsePipeControlAfterPartitionedWalker = -1
|
||||
OverrideBufferSuitableForRenderCompression = -1
|
||||
AllowMixingRegularAndCooperativeKernels = 0
|
||||
AllowPatchingVfeStateInCommandLists = 0
|
||||
Reference in New Issue
Block a user