test: refactor aub tests to add fixture into separate file
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
9ba671fde3
commit
98fd7c9432
|
@ -9,6 +9,7 @@
|
||||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||||
#include "shared/source/helpers/array_count.h"
|
#include "shared/source/helpers/array_count.h"
|
||||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||||
|
#include "shared/source/helpers/compiler_product_helper.h"
|
||||||
#include "shared/source/helpers/file_io.h"
|
#include "shared/source/helpers/file_io.h"
|
||||||
#include "shared/source/helpers/register_offsets.h"
|
#include "shared/source/helpers/register_offsets.h"
|
||||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||||
|
@ -36,7 +37,9 @@ struct DebuggerAubFixture : AUBFixtureL0 {
|
||||||
AUBFixtureL0::setUp(NEO::defaultHwInfo.get(), true);
|
AUBFixtureL0::setUp(NEO::defaultHwInfo.get(), true);
|
||||||
}
|
}
|
||||||
void tearDown() {
|
void tearDown() {
|
||||||
module->destroy();
|
if (module != nullptr) {
|
||||||
|
module->destroy();
|
||||||
|
}
|
||||||
AUBFixtureL0::tearDown();
|
AUBFixtureL0::tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +66,9 @@ using DebuggerSingleAddressSpaceAub = Test<DebuggerSingleAddressSpaceAubFixture>
|
||||||
using PlatformsSupportingSingleAddressSpace = MatchAny;
|
using PlatformsSupportingSingleAddressSpace = MatchAny;
|
||||||
|
|
||||||
HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked, PlatformsSupportingSingleAddressSpace) {
|
HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked, PlatformsSupportingSingleAddressSpace) {
|
||||||
|
if (neoDevice->getCompilerProductHelper().isHeaplessModeEnabled()) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||||
const uint32_t groupSize[] = {32, 1, 1};
|
const uint32_t groupSize[] = {32, 1, 1};
|
||||||
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
|
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
|
||||||
|
|
|
@ -5,262 +5,29 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
|
||||||
#include "shared/source/helpers/array_count.h"
|
#include "shared/source/helpers/array_count.h"
|
||||||
#include "shared/source/helpers/basic_math.h"
|
#include "shared/source/helpers/basic_math.h"
|
||||||
#include "shared/source/helpers/timestamp_packet.h"
|
|
||||||
#include "shared/source/utilities/io_functions.h"
|
#include "shared/source/utilities/io_functions.h"
|
||||||
#include "shared/source/utilities/tag_allocator.h"
|
#include "shared/source/utilities/tag_allocator.h"
|
||||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
|
||||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
|
||||||
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
|
||||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
#include "shared/test/common/test_macros/hw_test.h"
|
|
||||||
|
|
||||||
#include "opencl/source/command_queue/command_queue.h"
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
#include "opencl/source/event/event.h"
|
#include "opencl/source/event/event.h"
|
||||||
#include "opencl/source/mem_obj/buffer.h"
|
#include "opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h"
|
||||||
#include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h"
|
|
||||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
|
||||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
|
||||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
|
||||||
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
|
||||||
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
|
||||||
|
|
||||||
using namespace NEO;
|
using namespace NEO;
|
||||||
using namespace WalkerPartition;
|
using namespace WalkerPartition;
|
||||||
|
|
||||||
static int32_t testPartitionCount[] = {1, 2, 4, 8, 16};
|
int32_t testPartitionCount[] = {1, 2, 4, 8, 16};
|
||||||
static int32_t testPartitionType[] = {1, 2, 3};
|
int32_t testPartitionType[] = {1, 2, 3};
|
||||||
static uint32_t testWorkingDimensions[] = {3};
|
uint32_t testWorkingDimensions[] = {3};
|
||||||
|
|
||||||
extern bool generateRandomInput;
|
DispatchParameters dispatchParametersForTests[] = {
|
||||||
|
|
||||||
struct DispatchParameters {
|
|
||||||
size_t globalWorkSize[3];
|
|
||||||
size_t localWorkSize[3];
|
|
||||||
} dispatchParametersForTests[] = {
|
|
||||||
{{12, 25, 21}, {3, 5, 7}},
|
{{12, 25, 21}, {3, 5, 7}},
|
||||||
{{8, 16, 20}, {8, 4, 2}},
|
{{8, 16, 20}, {8, 4, 2}},
|
||||||
{{7, 13, 17}, {1, 1, 1}},
|
{{7, 13, 17}, {1, 1, 1}},
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AubWalkerPartitionFixture : public KernelAUBFixture<SimpleKernelFixture> {
|
|
||||||
void setUp() {
|
|
||||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
|
||||||
debugManager.flags.EnableTimestampPacket.set(1);
|
|
||||||
kernelIds |= (1 << 5);
|
|
||||||
KernelAUBFixture<SimpleKernelFixture>::setUp();
|
|
||||||
|
|
||||||
size_t userMemorySize = 16 * MemoryConstants::kiloByte;
|
|
||||||
if (generateRandomInput) {
|
|
||||||
userMemorySize = 16000 * MemoryConstants::kiloByte;
|
|
||||||
}
|
|
||||||
|
|
||||||
sizeUserMemory = userMemorySize;
|
|
||||||
auto destMemory = alignedMalloc(sizeUserMemory, 4096);
|
|
||||||
ASSERT_NE(nullptr, destMemory);
|
|
||||||
memset(destMemory, 0x0, sizeUserMemory);
|
|
||||||
|
|
||||||
dstBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, sizeUserMemory, destMemory, retVal));
|
|
||||||
ASSERT_NE(nullptr, dstBuffer);
|
|
||||||
alignedFree(destMemory);
|
|
||||||
|
|
||||||
kernels[5]->setArg(0, dstBuffer.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
void tearDown() {
|
|
||||||
pCmdQ->flush();
|
|
||||||
|
|
||||||
KernelAUBFixture<SimpleKernelFixture>::tearDown();
|
|
||||||
}
|
|
||||||
template <typename FamilyType>
|
|
||||||
void validatePartitionProgramming(uint64_t postSyncAddress, int32_t partitionCount) {
|
|
||||||
using WalkerVariant = typename FamilyType::WalkerVariant;
|
|
||||||
uint32_t totalWorkgroupCount = 1u;
|
|
||||||
uint32_t totalWorkItemsInWorkgroup = 1u;
|
|
||||||
uint32_t totalWorkItemsCount = 1;
|
|
||||||
|
|
||||||
for (auto dimension = 0u; dimension < workingDimensions; dimension++) {
|
|
||||||
totalWorkgroupCount *= static_cast<uint32_t>(dispatchParamters.globalWorkSize[dimension] / dispatchParamters.localWorkSize[dimension]);
|
|
||||||
totalWorkItemsInWorkgroup *= static_cast<uint32_t>(dispatchParamters.localWorkSize[dimension]);
|
|
||||||
totalWorkItemsCount *= static_cast<uint32_t>(dispatchParamters.globalWorkSize[dimension]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint32_t workgroupCount = static_cast<uint32_t>(dispatchParamters.globalWorkSize[partitionType - 1] / dispatchParamters.localWorkSize[partitionType - 1]);
|
|
||||||
auto partitionSize = Math::divideAndRoundUp(workgroupCount, partitionCount);
|
|
||||||
|
|
||||||
if (static_cast<uint32_t>(partitionType) > workingDimensions) {
|
|
||||||
partitionSize = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
|
||||||
hwParser.findHardwareCommands<FamilyType>();
|
|
||||||
|
|
||||||
WalkerVariant walkerVariant = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(*hwParser.itorWalker);
|
|
||||||
|
|
||||||
std::visit([&](auto &&walkerCmd) {
|
|
||||||
using WalkerType = std::decay_t<decltype(*walkerCmd)>;
|
|
||||||
using PostSyncType = decltype(FamilyType::template getPostSyncType<WalkerType>());
|
|
||||||
|
|
||||||
EXPECT_EQ(0u, walkerCmd->getPartitionId());
|
|
||||||
|
|
||||||
if (partitionCount > 1) {
|
|
||||||
EXPECT_TRUE(walkerCmd->getWorkloadPartitionEnable());
|
|
||||||
EXPECT_EQ(partitionSize, walkerCmd->getPartitionSize());
|
|
||||||
EXPECT_EQ(partitionType, walkerCmd->getPartitionType());
|
|
||||||
} else {
|
|
||||||
EXPECT_FALSE(walkerCmd->getWorkloadPartitionEnable());
|
|
||||||
EXPECT_EQ(0u, walkerCmd->getPartitionSize());
|
|
||||||
EXPECT_EQ(0u, walkerCmd->getPartitionType());
|
|
||||||
}
|
|
||||||
|
|
||||||
EXPECT_EQ(PostSyncType::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
|
||||||
EXPECT_EQ(postSyncAddress, walkerCmd->getPostSync().getDestinationAddress());
|
|
||||||
|
|
||||||
int notExpectedValue[] = {1, 1, 1, 1};
|
|
||||||
|
|
||||||
for (auto partitionId = 0; partitionId < debugManager.flags.ExperimentalSetWalkerPartitionCount.get(); partitionId++) {
|
|
||||||
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(postSyncAddress), ¬ExpectedValue, sizeof(notExpectedValue));
|
|
||||||
postSyncAddress += 16; // next post sync needs to be right after the previous one
|
|
||||||
}
|
|
||||||
},
|
|
||||||
walkerVariant);
|
|
||||||
|
|
||||||
auto dstGpuAddress = addrToPtr(ptrOffset(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), dstBuffer->getOffset()));
|
|
||||||
expectMemory<FamilyType>(dstGpuAddress, &totalWorkItemsCount, sizeof(uint32_t));
|
|
||||||
auto groupSpecificWorkCounts = ptrOffset(dstGpuAddress, 4);
|
|
||||||
StackVec<uint32_t, 8> workgroupCounts;
|
|
||||||
workgroupCounts.resize(totalWorkgroupCount);
|
|
||||||
|
|
||||||
for (uint32_t workgroupId = 0u; workgroupId < totalWorkgroupCount; workgroupId++) {
|
|
||||||
workgroupCounts[workgroupId] = totalWorkItemsInWorkgroup;
|
|
||||||
}
|
|
||||||
|
|
||||||
expectMemory<FamilyType>(groupSpecificWorkCounts, workgroupCounts.begin(), workgroupCounts.size() * sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename FamilyType>
|
|
||||||
typename FamilyType::PIPE_CONTROL *retrieveSyncPipeControl(void *startAddress,
|
|
||||||
const RootDeviceEnvironment &rootDeviceEnvironment) {
|
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
|
||||||
|
|
||||||
uint8_t buffer[256];
|
|
||||||
LinearStream stream(buffer, 256);
|
|
||||||
MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, 0ull, rootDeviceEnvironment);
|
|
||||||
void *syncPipeControlAddress = reinterpret_cast<void *>(reinterpret_cast<size_t>(startAddress) + stream.getUsed());
|
|
||||||
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(syncPipeControlAddress);
|
|
||||||
return pipeControl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
|
||||||
std::unique_ptr<Buffer> dstBuffer;
|
|
||||||
size_t sizeUserMemory = 0;
|
|
||||||
|
|
||||||
cl_uint workingDimensions = 1;
|
|
||||||
int32_t partitionCount;
|
|
||||||
int32_t partitionType;
|
|
||||||
|
|
||||||
HardwareParse hwParser;
|
|
||||||
DispatchParameters dispatchParamters;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AubWalkerPartitionTest : public AubWalkerPartitionFixture,
|
|
||||||
public ::testing::TestWithParam<std::tuple<int32_t, int32_t, DispatchParameters, uint32_t>> {
|
|
||||||
void SetUp() override {
|
|
||||||
AubWalkerPartitionFixture::setUp();
|
|
||||||
std::tie(partitionCount, partitionType, dispatchParamters, workingDimensions) = GetParam();
|
|
||||||
|
|
||||||
if (generateRandomInput) {
|
|
||||||
workingDimensions = (rand() % 3 + 1);
|
|
||||||
partitionType = (rand() % 3 + 1);
|
|
||||||
partitionCount = rand() % 16 + 1;
|
|
||||||
|
|
||||||
// now generate dimensions that makes sense
|
|
||||||
auto goodWorkingSizeGenerated = false;
|
|
||||||
while (!goodWorkingSizeGenerated) {
|
|
||||||
dispatchParamters.localWorkSize[0] = rand() % 128 + 1;
|
|
||||||
dispatchParamters.localWorkSize[1] = rand() % 128 + 1;
|
|
||||||
dispatchParamters.localWorkSize[2] = rand() % 128 + 1;
|
|
||||||
auto totalWorkItemsInWorkgroup = 1;
|
|
||||||
for (auto dimension = 0u; dimension < workingDimensions; dimension++) {
|
|
||||||
totalWorkItemsInWorkgroup *= static_cast<uint32_t>(dispatchParamters.localWorkSize[dimension]);
|
|
||||||
}
|
|
||||||
if (totalWorkItemsInWorkgroup <= 1024) {
|
|
||||||
dispatchParamters.globalWorkSize[0] = dispatchParamters.localWorkSize[0] * (rand() % 32 + 1);
|
|
||||||
dispatchParamters.globalWorkSize[1] = dispatchParamters.localWorkSize[1] * (rand() % 32 + 1);
|
|
||||||
dispatchParamters.globalWorkSize[2] = dispatchParamters.localWorkSize[2] * (rand() % 32 + 1);
|
|
||||||
|
|
||||||
printf("\n generated following dispatch paramters work dim %u gws %zu %zu %zu lws %zu %zu %zu, partition type %d partitionCount %d",
|
|
||||||
workingDimensions,
|
|
||||||
dispatchParamters.globalWorkSize[0],
|
|
||||||
dispatchParamters.globalWorkSize[1],
|
|
||||||
dispatchParamters.globalWorkSize[2],
|
|
||||||
dispatchParamters.localWorkSize[0],
|
|
||||||
dispatchParamters.localWorkSize[1],
|
|
||||||
dispatchParamters.localWorkSize[2],
|
|
||||||
partitionType,
|
|
||||||
partitionCount);
|
|
||||||
IoFunctions::fflushPtr(stdout);
|
|
||||||
goodWorkingSizeGenerated = true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
debugManager.flags.ExperimentalSetWalkerPartitionCount.set(partitionCount);
|
|
||||||
debugManager.flags.ExperimentalSetWalkerPartitionType.set(partitionType);
|
|
||||||
debugManager.flags.EnableWalkerPartition.set(1u);
|
|
||||||
}
|
|
||||||
void TearDown() override {
|
|
||||||
AubWalkerPartitionFixture::tearDown();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AubWalkerPartitionZeroFixture : public AubWalkerPartitionFixture {
|
|
||||||
void setUp() {
|
|
||||||
AubWalkerPartitionFixture::setUp();
|
|
||||||
|
|
||||||
partitionCount = 0;
|
|
||||||
partitionType = 0;
|
|
||||||
|
|
||||||
workingDimensions = 1;
|
|
||||||
|
|
||||||
debugManager.flags.ExperimentalSetWalkerPartitionCount.set(0);
|
|
||||||
debugManager.flags.ExperimentalSetWalkerPartitionType.set(0);
|
|
||||||
|
|
||||||
commandBufferProperties = std::make_unique<AllocationProperties>(device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::commandBuffer, false, device->getDeviceBitfield());
|
|
||||||
auto memoryManager = this->device->getMemoryManager();
|
|
||||||
streamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties);
|
|
||||||
helperSurface = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties);
|
|
||||||
memset(helperSurface->getUnderlyingBuffer(), 0, MemoryConstants::pageSize);
|
|
||||||
taskStream = std::make_unique<LinearStream>(streamAllocation);
|
|
||||||
}
|
|
||||||
void tearDown() {
|
|
||||||
auto memoryManager = this->device->getMemoryManager();
|
|
||||||
memoryManager->freeGraphicsMemory(streamAllocation);
|
|
||||||
memoryManager->freeGraphicsMemory(helperSurface);
|
|
||||||
AubWalkerPartitionFixture::tearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
void flushStream() {
|
|
||||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
|
||||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
|
||||||
|
|
||||||
csr->makeResident(*helperSurface);
|
|
||||||
csr->flushTask(*taskStream, 0,
|
|
||||||
&csr->getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
|
||||||
&csr->getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
|
||||||
&csr->getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
|
||||||
0u, dispatchFlags, device->getDevice());
|
|
||||||
|
|
||||||
csr->flushBatchedSubmissions();
|
|
||||||
}
|
|
||||||
std::unique_ptr<LinearStream> taskStream;
|
|
||||||
GraphicsAllocation *streamAllocation = nullptr;
|
|
||||||
GraphicsAllocation *helperSurface = nullptr;
|
|
||||||
std::unique_ptr<AllocationProperties> commandBufferProperties;
|
|
||||||
};
|
|
||||||
|
|
||||||
using AubWalkerPartitionZeroTest = Test<AubWalkerPartitionZeroFixture>;
|
using AubWalkerPartitionZeroTest = Test<AubWalkerPartitionZeroFixture>;
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPartitionCountSetToZeroThenProvideEqualSingleWalker) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPartitionCountSetToZeroThenProvideEqualSingleWalker) {
|
||||||
|
|
|
@ -8,6 +8,8 @@ target_sources(igdrcl_aub_tests PRIVATE
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h
|
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/aub_walker_partition_fixture.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/aub_walker_partition_fixture.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h
|
${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/image_aub_fixture.h
|
${CMAKE_CURRENT_SOURCE_DIR}/image_aub_fixture.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/multicontext_ocl_aub_fixture.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/multicontext_ocl_aub_fixture.cpp
|
||||||
|
|
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2022-2024 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h"
|
||||||
|
|
||||||
|
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||||
|
#include "shared/source/helpers/array_count.h"
|
||||||
|
#include "shared/source/helpers/basic_math.h"
|
||||||
|
#include "shared/source/helpers/timestamp_packet.h"
|
||||||
|
#include "shared/source/utilities/io_functions.h"
|
||||||
|
#include "shared/source/utilities/tag_allocator.h"
|
||||||
|
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
|
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||||
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
|
|
||||||
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
|
#include "opencl/source/event/event.h"
|
||||||
|
|
||||||
|
using namespace NEO;
|
||||||
|
using namespace WalkerPartition;
|
||||||
|
|
||||||
|
void AubWalkerPartitionFixture::setUp() {
|
||||||
|
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||||
|
debugManager.flags.EnableTimestampPacket.set(1);
|
||||||
|
kernelIds |= (1 << 5);
|
||||||
|
KernelAUBFixture<SimpleKernelFixture>::setUp();
|
||||||
|
|
||||||
|
size_t userMemorySize = 16 * MemoryConstants::kiloByte;
|
||||||
|
if (generateRandomInput) {
|
||||||
|
userMemorySize = 16000 * MemoryConstants::kiloByte;
|
||||||
|
}
|
||||||
|
|
||||||
|
sizeUserMemory = userMemorySize;
|
||||||
|
auto destMemory = alignedMalloc(sizeUserMemory, 4096);
|
||||||
|
ASSERT_NE(nullptr, destMemory);
|
||||||
|
memset(destMemory, 0x0, sizeUserMemory);
|
||||||
|
|
||||||
|
dstBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, sizeUserMemory, destMemory, retVal));
|
||||||
|
ASSERT_NE(nullptr, dstBuffer);
|
||||||
|
alignedFree(destMemory);
|
||||||
|
|
||||||
|
kernels[5]->setArg(0, dstBuffer.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
void AubWalkerPartitionFixture::tearDown() {
|
||||||
|
pCmdQ->flush();
|
||||||
|
|
||||||
|
KernelAUBFixture<SimpleKernelFixture>::tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AubWalkerPartitionTest::SetUp() {
|
||||||
|
AubWalkerPartitionFixture::setUp();
|
||||||
|
std::tie(partitionCount, partitionType, dispatchParamters, workingDimensions) = GetParam();
|
||||||
|
|
||||||
|
if (generateRandomInput) {
|
||||||
|
workingDimensions = (rand() % 3 + 1);
|
||||||
|
partitionType = (rand() % 3 + 1);
|
||||||
|
partitionCount = rand() % 16 + 1;
|
||||||
|
|
||||||
|
// now generate dimensions that makes sense
|
||||||
|
auto goodWorkingSizeGenerated = false;
|
||||||
|
while (!goodWorkingSizeGenerated) {
|
||||||
|
dispatchParamters.localWorkSize[0] = rand() % 128 + 1;
|
||||||
|
dispatchParamters.localWorkSize[1] = rand() % 128 + 1;
|
||||||
|
dispatchParamters.localWorkSize[2] = rand() % 128 + 1;
|
||||||
|
auto totalWorkItemsInWorkgroup = 1;
|
||||||
|
for (auto dimension = 0u; dimension < workingDimensions; dimension++) {
|
||||||
|
totalWorkItemsInWorkgroup *= static_cast<uint32_t>(dispatchParamters.localWorkSize[dimension]);
|
||||||
|
}
|
||||||
|
if (totalWorkItemsInWorkgroup <= 1024) {
|
||||||
|
dispatchParamters.globalWorkSize[0] = dispatchParamters.localWorkSize[0] * (rand() % 32 + 1);
|
||||||
|
dispatchParamters.globalWorkSize[1] = dispatchParamters.localWorkSize[1] * (rand() % 32 + 1);
|
||||||
|
dispatchParamters.globalWorkSize[2] = dispatchParamters.localWorkSize[2] * (rand() % 32 + 1);
|
||||||
|
|
||||||
|
printf("\n generated following dispatch paramters work dim %u gws %zu %zu %zu lws %zu %zu %zu, partition type %d partitionCount %d",
|
||||||
|
workingDimensions,
|
||||||
|
dispatchParamters.globalWorkSize[0],
|
||||||
|
dispatchParamters.globalWorkSize[1],
|
||||||
|
dispatchParamters.globalWorkSize[2],
|
||||||
|
dispatchParamters.localWorkSize[0],
|
||||||
|
dispatchParamters.localWorkSize[1],
|
||||||
|
dispatchParamters.localWorkSize[2],
|
||||||
|
partitionType,
|
||||||
|
partitionCount);
|
||||||
|
IoFunctions::fflushPtr(stdout);
|
||||||
|
goodWorkingSizeGenerated = true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
debugManager.flags.ExperimentalSetWalkerPartitionCount.set(partitionCount);
|
||||||
|
debugManager.flags.ExperimentalSetWalkerPartitionType.set(partitionType);
|
||||||
|
debugManager.flags.EnableWalkerPartition.set(1u);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AubWalkerPartitionTest::TearDown() {
|
||||||
|
AubWalkerPartitionFixture::tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AubWalkerPartitionZeroFixture::setUp() {
|
||||||
|
AubWalkerPartitionFixture::setUp();
|
||||||
|
|
||||||
|
partitionCount = 0;
|
||||||
|
partitionType = 0;
|
||||||
|
|
||||||
|
workingDimensions = 1;
|
||||||
|
|
||||||
|
debugManager.flags.ExperimentalSetWalkerPartitionCount.set(0);
|
||||||
|
debugManager.flags.ExperimentalSetWalkerPartitionType.set(0);
|
||||||
|
|
||||||
|
commandBufferProperties = std::make_unique<AllocationProperties>(device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::commandBuffer, false, device->getDeviceBitfield());
|
||||||
|
auto memoryManager = this->device->getMemoryManager();
|
||||||
|
streamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties);
|
||||||
|
helperSurface = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties);
|
||||||
|
memset(helperSurface->getUnderlyingBuffer(), 0, MemoryConstants::pageSize);
|
||||||
|
taskStream = std::make_unique<LinearStream>(streamAllocation);
|
||||||
|
}
|
||||||
|
void AubWalkerPartitionZeroFixture::tearDown() {
|
||||||
|
auto memoryManager = this->device->getMemoryManager();
|
||||||
|
memoryManager->freeGraphicsMemory(streamAllocation);
|
||||||
|
memoryManager->freeGraphicsMemory(helperSurface);
|
||||||
|
AubWalkerPartitionFixture::tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AubWalkerPartitionZeroFixture::flushStream() {
|
||||||
|
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||||
|
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||||
|
|
||||||
|
csr->makeResident(*helperSurface);
|
||||||
|
csr->flushTask(*taskStream, 0,
|
||||||
|
&csr->getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
||||||
|
&csr->getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
||||||
|
&csr->getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
||||||
|
0u, dispatchFlags, device->getDevice());
|
||||||
|
|
||||||
|
csr->flushBatchedSubmissions();
|
||||||
|
}
|
|
@ -0,0 +1,141 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2022-2024 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||||
|
#include "shared/source/helpers/timestamp_packet.h"
|
||||||
|
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
|
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||||
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
|
|
||||||
|
#include "opencl/source/mem_obj/buffer.h"
|
||||||
|
#include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h"
|
||||||
|
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||||
|
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
||||||
|
|
||||||
|
extern bool generateRandomInput;
|
||||||
|
|
||||||
|
struct DispatchParameters {
|
||||||
|
size_t globalWorkSize[3];
|
||||||
|
size_t localWorkSize[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
extern DispatchParameters dispatchParametersForTests[];
|
||||||
|
|
||||||
|
struct AubWalkerPartitionFixture : public KernelAUBFixture<SimpleKernelFixture> {
|
||||||
|
void setUp();
|
||||||
|
|
||||||
|
void tearDown();
|
||||||
|
|
||||||
|
template <typename FamilyType>
|
||||||
|
void validatePartitionProgramming(uint64_t postSyncAddress, int32_t partitionCount) {
|
||||||
|
using WalkerVariant = typename FamilyType::WalkerVariant;
|
||||||
|
uint32_t totalWorkgroupCount = 1u;
|
||||||
|
uint32_t totalWorkItemsInWorkgroup = 1u;
|
||||||
|
uint32_t totalWorkItemsCount = 1;
|
||||||
|
|
||||||
|
for (auto dimension = 0u; dimension < workingDimensions; dimension++) {
|
||||||
|
totalWorkgroupCount *= static_cast<uint32_t>(dispatchParamters.globalWorkSize[dimension] / dispatchParamters.localWorkSize[dimension]);
|
||||||
|
totalWorkItemsInWorkgroup *= static_cast<uint32_t>(dispatchParamters.localWorkSize[dimension]);
|
||||||
|
totalWorkItemsCount *= static_cast<uint32_t>(dispatchParamters.globalWorkSize[dimension]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint32_t workgroupCount = static_cast<uint32_t>(dispatchParamters.globalWorkSize[partitionType - 1] / dispatchParamters.localWorkSize[partitionType - 1]);
|
||||||
|
auto partitionSize = Math::divideAndRoundUp(workgroupCount, partitionCount);
|
||||||
|
|
||||||
|
if (static_cast<uint32_t>(partitionType) > workingDimensions) {
|
||||||
|
partitionSize = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||||
|
hwParser.findHardwareCommands<FamilyType>();
|
||||||
|
|
||||||
|
WalkerVariant walkerVariant = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(*hwParser.itorWalker);
|
||||||
|
|
||||||
|
std::visit([&](auto &&walkerCmd) {
|
||||||
|
using WalkerType = std::decay_t<decltype(*walkerCmd)>;
|
||||||
|
using PostSyncType = decltype(FamilyType::template getPostSyncType<WalkerType>());
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, walkerCmd->getPartitionId());
|
||||||
|
|
||||||
|
if (partitionCount > 1) {
|
||||||
|
EXPECT_TRUE(walkerCmd->getWorkloadPartitionEnable());
|
||||||
|
EXPECT_EQ(partitionSize, walkerCmd->getPartitionSize());
|
||||||
|
EXPECT_EQ(partitionType, walkerCmd->getPartitionType());
|
||||||
|
} else {
|
||||||
|
EXPECT_FALSE(walkerCmd->getWorkloadPartitionEnable());
|
||||||
|
EXPECT_EQ(0u, walkerCmd->getPartitionSize());
|
||||||
|
EXPECT_EQ(0u, walkerCmd->getPartitionType());
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(PostSyncType::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(postSyncAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
int notExpectedValue[] = {1, 1, 1, 1};
|
||||||
|
|
||||||
|
for (auto partitionId = 0; partitionId < debugManager.flags.ExperimentalSetWalkerPartitionCount.get(); partitionId++) {
|
||||||
|
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(postSyncAddress), ¬ExpectedValue, sizeof(notExpectedValue));
|
||||||
|
postSyncAddress += 16; // next post sync needs to be right after the previous one
|
||||||
|
}
|
||||||
|
},
|
||||||
|
walkerVariant);
|
||||||
|
|
||||||
|
auto dstGpuAddress = addrToPtr(ptrOffset(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), dstBuffer->getOffset()));
|
||||||
|
expectMemory<FamilyType>(dstGpuAddress, &totalWorkItemsCount, sizeof(uint32_t));
|
||||||
|
auto groupSpecificWorkCounts = ptrOffset(dstGpuAddress, 4);
|
||||||
|
StackVec<uint32_t, 8> workgroupCounts;
|
||||||
|
workgroupCounts.resize(totalWorkgroupCount);
|
||||||
|
|
||||||
|
for (uint32_t workgroupId = 0u; workgroupId < totalWorkgroupCount; workgroupId++) {
|
||||||
|
workgroupCounts[workgroupId] = totalWorkItemsInWorkgroup;
|
||||||
|
}
|
||||||
|
|
||||||
|
expectMemory<FamilyType>(groupSpecificWorkCounts, workgroupCounts.begin(), workgroupCounts.size() * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename FamilyType>
|
||||||
|
typename FamilyType::PIPE_CONTROL *retrieveSyncPipeControl(void *startAddress,
|
||||||
|
const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
|
||||||
|
uint8_t buffer[256];
|
||||||
|
LinearStream stream(buffer, 256);
|
||||||
|
MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, 0ull, rootDeviceEnvironment);
|
||||||
|
void *syncPipeControlAddress = reinterpret_cast<void *>(reinterpret_cast<size_t>(startAddress) + stream.getUsed());
|
||||||
|
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(syncPipeControlAddress);
|
||||||
|
return pipeControl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
||||||
|
std::unique_ptr<Buffer> dstBuffer;
|
||||||
|
size_t sizeUserMemory = 0;
|
||||||
|
|
||||||
|
cl_uint workingDimensions = 1;
|
||||||
|
int32_t partitionCount;
|
||||||
|
int32_t partitionType;
|
||||||
|
|
||||||
|
HardwareParse hwParser;
|
||||||
|
DispatchParameters dispatchParamters;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AubWalkerPartitionTest : public AubWalkerPartitionFixture,
|
||||||
|
public ::testing::TestWithParam<std::tuple<int32_t, int32_t, DispatchParameters, uint32_t>> {
|
||||||
|
void SetUp();
|
||||||
|
void TearDown();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AubWalkerPartitionZeroFixture : public AubWalkerPartitionFixture {
|
||||||
|
void setUp();
|
||||||
|
void tearDown();
|
||||||
|
|
||||||
|
void flushStream();
|
||||||
|
|
||||||
|
std::unique_ptr<LinearStream> taskStream;
|
||||||
|
GraphicsAllocation *streamAllocation = nullptr;
|
||||||
|
GraphicsAllocation *helperSurface = nullptr;
|
||||||
|
std::unique_ptr<AllocationProperties> commandBufferProperties;
|
||||||
|
};
|
|
@ -27,6 +27,10 @@ void MulticontextAubFixture::setUp(uint32_t numberOfTiles, EnabledCommandStreame
|
||||||
|
|
||||||
HardwareInfo localHwInfo = *defaultHwInfo;
|
HardwareInfo localHwInfo = *defaultHwInfo;
|
||||||
|
|
||||||
|
if (debugManager.flags.BlitterEnableMaskOverride.get() > 0) {
|
||||||
|
localHwInfo.featureTable.ftrBcsInfo = debugManager.flags.BlitterEnableMaskOverride.get();
|
||||||
|
}
|
||||||
|
|
||||||
if (numberOfEnabledTiles > 1 && localHwInfo.gtSystemInfo.MultiTileArchInfo.IsValid == 0) {
|
if (numberOfEnabledTiles > 1 && localHwInfo.gtSystemInfo.MultiTileArchInfo.IsValid == 0) {
|
||||||
skipped = true;
|
skipped = true;
|
||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
|
|
Loading…
Reference in New Issue