mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-12 17:33:00 +08:00
Add command queue aub tests
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
010186d0da
commit
43e147d84f
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -27,6 +27,20 @@ target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/single_tile_products_excludes.cpp
|
||||
)
|
||||
|
||||
if(TESTS_XEHP_AND_LATER)
|
||||
target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_enqueue_resource_barrier_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_inline_data_local_id_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_multicontext_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_one_va_multi_physical_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_postsync_write_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_scratch_space_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compression_aub_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_buffers_aub_tests_xehp_and_later.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/command_queue/resource_barrier.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/cmd_buffer_validator.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using ResourceBarrierAubTest = Test<KernelAUBFixture<SimpleKernelFixture>>;
|
||||
|
||||
struct L3ControlSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::l3ControlSupported;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(ResourceBarrierAubTest, givenAllocationsWhenEnqueueResourceBarrierCalledThenL3FlushCommandWasSubmitted, L3ControlSupportedMatcher) {
|
||||
using L3_CONTROL = typename FamilyType::L3_CONTROL;
|
||||
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
char bufferAMemory[bufferSize];
|
||||
char bufferBMemory[bufferSize];
|
||||
|
||||
memset(bufferAMemory, 1, bufferSize);
|
||||
memset(bufferBMemory, 129, bufferSize);
|
||||
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferAMemory, retVal));
|
||||
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
auto dstBuffer1 = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferBMemory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer1);
|
||||
|
||||
auto dstBuffer2 = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferBMemory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer2);
|
||||
|
||||
retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer1.get(),
|
||||
0, 0,
|
||||
bufferSize, 0,
|
||||
nullptr, nullptr);
|
||||
|
||||
retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer2.get(),
|
||||
0, 0,
|
||||
bufferSize, 0,
|
||||
nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
cl_resource_barrier_descriptor_intel descriptor{};
|
||||
cl_resource_barrier_descriptor_intel descriptor2{};
|
||||
|
||||
descriptor.mem_object = dstBuffer1.get();
|
||||
descriptor2.mem_object = dstBuffer2.get();
|
||||
|
||||
const cl_resource_barrier_descriptor_intel descriptors[] = {descriptor, descriptor2};
|
||||
|
||||
BarrierCommand bCmd(pCmdQ, descriptors, 2);
|
||||
|
||||
auto sizeUsed = pCmdQ->getCS(0).getUsed();
|
||||
|
||||
retVal = pCmdQ->enqueueResourceBarrier(&bCmd, 0, nullptr, nullptr);
|
||||
|
||||
LinearStream &l3FlushCmdStream = pCmdQ->getCS(0);
|
||||
|
||||
std::string err;
|
||||
auto cmdBuffOk = expectCmdBuff<FamilyType>(l3FlushCmdStream, sizeUsed,
|
||||
std::vector<MatchCmd *>{
|
||||
new MatchAnyCmd(AnyNumber),
|
||||
new MatchHwCmd<FamilyType, L3_CONTROL>(1),
|
||||
new MatchAnyCmd(AnyNumber),
|
||||
},
|
||||
&err);
|
||||
EXPECT_TRUE(cmdBuffOk) << err;
|
||||
|
||||
retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer2.get(),
|
||||
0, 0,
|
||||
bufferSize, 0,
|
||||
nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(dstBuffer1->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()),
|
||||
bufferAMemory, bufferSize);
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(dstBuffer2->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()),
|
||||
bufferAMemory, bufferSize);
|
||||
}
|
||||
@@ -0,0 +1,475 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct AubDispatchThreadDataFixture : public KernelAUBFixture<SimpleKernelFixture> {
|
||||
struct TestVariables {
|
||||
Buffer *destBuffer = nullptr;
|
||||
void *destMemory = nullptr;
|
||||
size_t sizeUserMemory = 0;
|
||||
size_t sizeWrittenMemory = 0;
|
||||
size_t sizeRemainderMemory = 0;
|
||||
void *expectedMemory = nullptr;
|
||||
void *expectedRemainderMemory = nullptr;
|
||||
char *remainderDestMemory = nullptr;
|
||||
unsigned int scalarArg = 0;
|
||||
size_t typeSize = 0;
|
||||
size_t gwsSize = 0;
|
||||
size_t lwsSize = 0;
|
||||
};
|
||||
void SetUp() override {
|
||||
KernelAUBFixture<SimpleKernelFixture>::SetUp();
|
||||
variablesCount = arrayCount(variables);
|
||||
|
||||
BufferDefaults::context = context;
|
||||
for (size_t i = 0; i < variablesCount; i++) {
|
||||
if (variables[i].sizeUserMemory) {
|
||||
variables[i].destBuffer = Buffer::create(
|
||||
context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL,
|
||||
variables[i].sizeUserMemory,
|
||||
nullptr,
|
||||
retVal);
|
||||
ASSERT_NE(nullptr, variables[i].destBuffer);
|
||||
variables[i].destMemory = reinterpret_cast<void *>(variables[i].destBuffer->getCpuAddressForMapping());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
pCmdQ->flush();
|
||||
|
||||
for (size_t i = 0; i < variablesCount; i++) {
|
||||
if (variables[i].destBuffer) {
|
||||
delete variables[i].destBuffer;
|
||||
variables[i].destBuffer = nullptr;
|
||||
}
|
||||
if (variables[i].expectedMemory) {
|
||||
alignedFree(variables[i].expectedMemory);
|
||||
variables[i].expectedMemory = nullptr;
|
||||
}
|
||||
if (variables[i].expectedRemainderMemory) {
|
||||
alignedFree(variables[i].expectedRemainderMemory);
|
||||
variables[i].expectedRemainderMemory = nullptr;
|
||||
}
|
||||
}
|
||||
BufferDefaults::context = nullptr;
|
||||
KernelAUBFixture<SimpleKernelFixture>::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
||||
TestVariables variables[5] = {};
|
||||
size_t variablesCount;
|
||||
|
||||
HardwareParse hwParser;
|
||||
};
|
||||
|
||||
struct InlineDataFixture : AubDispatchThreadDataFixture {
|
||||
void SetUp() override {
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.EnablePassInlineData.set(true);
|
||||
|
||||
initializeKernel3Variables();
|
||||
initializeKernel4Variables();
|
||||
|
||||
AubDispatchThreadDataFixture::SetUp();
|
||||
|
||||
setUpKernel3();
|
||||
}
|
||||
|
||||
void initializeKernel4Variables() {
|
||||
kernelIds |= (1 << 4);
|
||||
variables[4].gwsSize = 1;
|
||||
variables[4].lwsSize = 1;
|
||||
}
|
||||
|
||||
void initializeKernel3Variables() {
|
||||
kernelIds |= (1 << 3);
|
||||
variables[3].sizeUserMemory = 4096;
|
||||
variables[3].typeSize = sizeof(unsigned int);
|
||||
variables[3].gwsSize = 128;
|
||||
variables[3].lwsSize = 32;
|
||||
}
|
||||
|
||||
void setUpKernel3() {
|
||||
memset(variables[3].destMemory, 0xFE, variables[3].sizeUserMemory);
|
||||
|
||||
kernels[3]->setArg(0, variables[3].destBuffer);
|
||||
|
||||
variables[3].sizeWrittenMemory = variables[3].gwsSize * variables[3].typeSize;
|
||||
variables[3].expectedMemory = alignedMalloc(variables[3].sizeWrittenMemory, 4096);
|
||||
memset(variables[3].expectedMemory, 0, variables[3].sizeWrittenMemory);
|
||||
variables[3].sizeRemainderMemory = variables[3].sizeUserMemory - variables[3].sizeWrittenMemory;
|
||||
variables[3].expectedRemainderMemory = alignedMalloc(variables[3].sizeRemainderMemory, 4096);
|
||||
memcpy_s(variables[3].expectedRemainderMemory,
|
||||
variables[3].sizeRemainderMemory,
|
||||
variables[3].destMemory,
|
||||
variables[3].sizeRemainderMemory);
|
||||
|
||||
variables[3].remainderDestMemory = static_cast<char *>(variables[3].destMemory) + variables[3].sizeWrittenMemory;
|
||||
}
|
||||
};
|
||||
|
||||
using XeHPAndLaterAubInlineDataTest = Test<InlineDataFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubInlineDataTest, givenCrossThreadFitIntoSingleGrfWhenInlineDataAllowedThenCopyAllCrossThreadIntoInline) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INLINE_DATA = typename FamilyType::INLINE_DATA;
|
||||
|
||||
if (!HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*kernels[4])) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {variables[4].gwsSize, 1, 1};
|
||||
size_t localWorkSize[3] = {variables[4].lwsSize, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[4].get(),
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end());
|
||||
|
||||
auto walker = genCmdCast<WALKER_TYPE *>(*hwParser.itorWalker);
|
||||
EXPECT_EQ(1u, walker->getEmitInlineParameter());
|
||||
|
||||
auto localId = kernels[4]->getKernelInfo().kernelDescriptor.kernelAttributes.localId;
|
||||
uint32_t expectedEmitLocal = 0;
|
||||
if (localId[0]) {
|
||||
expectedEmitLocal |= (1 << 0);
|
||||
}
|
||||
if (localId[1]) {
|
||||
expectedEmitLocal |= (1 << 1);
|
||||
}
|
||||
if (localId[2]) {
|
||||
expectedEmitLocal |= (1 << 2);
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId());
|
||||
EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), kernels[4]->getCrossThreadData(), sizeof(INLINE_DATA)));
|
||||
//this kernel does nothing, so no expectMemory because only such kernel can fit into single GRF
|
||||
//this is for sake of testing inline data data copying by COMPUTE_WALKER
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubInlineDataTest, givenCrossThreadSizeMoreThanSingleGrfWhenInlineDataAllowedThenCopyGrfCrossThreadToInline) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INLINE_DATA = typename FamilyType::INLINE_DATA;
|
||||
|
||||
if (!HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*kernels[3])) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {variables[3].gwsSize, 1, 1};
|
||||
size_t localWorkSize[3] = {variables[3].lwsSize, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
IndirectHeap &ih = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 2048);
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[3].get(),
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end());
|
||||
|
||||
auto walker = genCmdCast<WALKER_TYPE *>(*hwParser.itorWalker);
|
||||
EXPECT_EQ(1u, walker->getEmitInlineParameter());
|
||||
|
||||
auto localId = kernels[3]->getKernelInfo().kernelDescriptor.kernelAttributes.localId;
|
||||
uint32_t expectedEmitLocal = 0;
|
||||
if (localId[0]) {
|
||||
expectedEmitLocal |= (1 << 0);
|
||||
}
|
||||
if (localId[1]) {
|
||||
expectedEmitLocal |= (1 << 1);
|
||||
}
|
||||
if (localId[2]) {
|
||||
expectedEmitLocal |= (1 << 2);
|
||||
}
|
||||
EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId());
|
||||
char *crossThreadData = kernels[3]->getCrossThreadData();
|
||||
size_t crossThreadDataSize = kernels[3]->getCrossThreadDataSize();
|
||||
auto inlineSize = sizeof(INLINE_DATA);
|
||||
EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadData, inlineSize));
|
||||
|
||||
crossThreadDataSize -= inlineSize;
|
||||
crossThreadData += inlineSize;
|
||||
|
||||
void *payloadData = ih.getCpuBase();
|
||||
EXPECT_EQ(0, memcmp(payloadData, crossThreadData, crossThreadDataSize));
|
||||
|
||||
expectMemory<FamilyType>(variables[3].destMemory, variables[3].expectedMemory, variables[3].sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(variables[3].remainderDestMemory, variables[3].expectedRemainderMemory, variables[3].sizeRemainderMemory);
|
||||
}
|
||||
|
||||
struct HwLocalIdsFixture : AubDispatchThreadDataFixture {
|
||||
void SetUp() override {
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.EnableHwGenerationLocalIds.set(1);
|
||||
|
||||
initializeKernel2Variables();
|
||||
|
||||
AubDispatchThreadDataFixture::SetUp();
|
||||
|
||||
if (kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData) {
|
||||
DebugManager.flags.EnablePassInlineData.set(true);
|
||||
}
|
||||
|
||||
setUpKernel2();
|
||||
}
|
||||
|
||||
void initializeKernel2Variables() {
|
||||
kernelIds |= (1 << 2);
|
||||
variables[2].sizeUserMemory = 4096;
|
||||
variables[2].scalarArg = 0xAA;
|
||||
variables[2].typeSize = sizeof(unsigned int);
|
||||
variables[2].gwsSize = 256;
|
||||
variables[2].lwsSize = 32;
|
||||
}
|
||||
|
||||
void setUpKernel2() {
|
||||
memset(variables[2].destMemory, 0xFE, variables[2].sizeUserMemory);
|
||||
|
||||
kernels[2]->setArg(0, sizeof(variables[2].scalarArg), &variables[2].scalarArg);
|
||||
kernels[2]->setArg(1, variables[2].destBuffer);
|
||||
|
||||
variables[2].sizeWrittenMemory = variables[2].gwsSize * variables[2].typeSize;
|
||||
variables[2].expectedMemory = alignedMalloc(variables[2].sizeWrittenMemory, 4096);
|
||||
unsigned int *expectedData = static_cast<unsigned int *>(variables[2].expectedMemory);
|
||||
for (size_t i = 0; i < variables[2].gwsSize; i++) {
|
||||
*(expectedData + i) = variables[2].scalarArg;
|
||||
}
|
||||
variables[2].sizeRemainderMemory = variables[2].sizeUserMemory - variables[2].sizeWrittenMemory;
|
||||
variables[2].expectedRemainderMemory = alignedMalloc(variables[2].sizeRemainderMemory, 4096);
|
||||
memcpy_s(variables[2].expectedRemainderMemory,
|
||||
variables[2].sizeRemainderMemory,
|
||||
variables[2].destMemory,
|
||||
variables[2].sizeRemainderMemory);
|
||||
|
||||
variables[2].remainderDestMemory = static_cast<char *>(variables[2].destMemory) + variables[2].sizeWrittenMemory;
|
||||
}
|
||||
};
|
||||
|
||||
using XeHPAndLaterAubHwLocalIdsTest = Test<HwLocalIdsFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, WhenEnqueueDimensionsArePow2ThenSetEmitLocalIdsAndGenerateLocalIdsFields) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {variables[2].gwsSize, 1, 1};
|
||||
size_t localWorkSize[3] = {variables[2].lwsSize, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[2].get(),
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end());
|
||||
|
||||
auto walker = genCmdCast<WALKER_TYPE *>(*hwParser.itorWalker);
|
||||
|
||||
auto localId = kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.localId;
|
||||
uint32_t expectedEmitLocal = 0;
|
||||
if (localId[0]) {
|
||||
expectedEmitLocal |= (1 << 0);
|
||||
}
|
||||
if (localId[1]) {
|
||||
expectedEmitLocal |= (1 << 1);
|
||||
}
|
||||
if (localId[2]) {
|
||||
expectedEmitLocal |= (1 << 2);
|
||||
}
|
||||
EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId());
|
||||
EXPECT_EQ(1u, walker->getGenerateLocalId());
|
||||
|
||||
auto kernelAllocationGpuAddr = kernels[2]->getKernelInfo().kernelAllocation->getGpuAddressToPatch();
|
||||
auto skipOffset = kernels[2]->getKernelInfo().kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
uint64_t kernelStartPointer = kernelAllocationGpuAddr + skipOffset;
|
||||
|
||||
INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor();
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelStartPointer), idd.getKernelStartPointer());
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
expectMemory<FamilyType>(variables[2].destMemory, variables[2].expectedMemory, variables[2].sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(variables[2].remainderDestMemory, variables[2].expectedRemainderMemory, variables[2].sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, givenNonPowOf2LocalWorkSizeButCompatibleWorkOrderWhenLocalIdsAreUsedThenDataVerifiesCorrectly) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkSize[3] = {200, 1, 1};
|
||||
size_t localWorkSize[3] = {200, 1, 1};
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[2].get(),
|
||||
workDim,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end());
|
||||
|
||||
auto walker = genCmdCast<WALKER_TYPE *>(*hwParser.itorWalker);
|
||||
|
||||
auto localId = kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.localId;
|
||||
uint32_t expectedEmitLocal = 0;
|
||||
if (localId[0]) {
|
||||
expectedEmitLocal |= (1 << 0);
|
||||
}
|
||||
if (localId[1]) {
|
||||
expectedEmitLocal |= (1 << 1);
|
||||
}
|
||||
if (localId[2]) {
|
||||
expectedEmitLocal |= (1 << 2);
|
||||
}
|
||||
EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId());
|
||||
EXPECT_EQ(1u, walker->getGenerateLocalId());
|
||||
EXPECT_EQ(4u, walker->getWalkOrder());
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
expectMemory<FamilyType>(variables[2].destMemory, variables[2].expectedMemory, globalWorkSize[0] * variables[2].typeSize);
|
||||
}
|
||||
|
||||
struct HwLocalIdsWithSubGroups : AubDispatchThreadDataFixture {
|
||||
void SetUp() override {
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.EnableHwGenerationLocalIds.set(1);
|
||||
|
||||
kernelIds |= (1 << 9);
|
||||
variables[0].sizeUserMemory = 16 * KB;
|
||||
AubDispatchThreadDataFixture::SetUp();
|
||||
|
||||
memset(variables[0].destMemory, 0, variables[0].sizeUserMemory);
|
||||
variables[0].expectedMemory = alignedMalloc(variables[0].sizeUserMemory, 4096);
|
||||
kernels[9]->setArg(0, variables[0].destBuffer);
|
||||
}
|
||||
};
|
||||
|
||||
using XeHPAndLaterAubHwLocalIdsWithSubgroupsTest = Test<HwLocalIdsWithSubGroups>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsWithSubgroupsTest, givenKernelUsingSubgroupsWhenLocalIdsAreGeneratedByHwThenValuesAreCorrect) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkSize[3] = {200, 1, 1};
|
||||
size_t localWorkSize[3] = {200, 1, 1};
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[9].get(),
|
||||
workDim,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(pCmdQ->getCS(0), 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end());
|
||||
|
||||
auto walker = genCmdCast<WALKER_TYPE *>(*hwParser.itorWalker);
|
||||
|
||||
auto localId = kernels[9]->getKernelInfo().kernelDescriptor.kernelAttributes.localId;
|
||||
uint32_t expectedEmitLocal = 0;
|
||||
if (localId[0]) {
|
||||
expectedEmitLocal |= (1 << 0);
|
||||
}
|
||||
if (localId[1]) {
|
||||
expectedEmitLocal |= (1 << 1);
|
||||
}
|
||||
if (localId[2]) {
|
||||
expectedEmitLocal |= (1 << 2);
|
||||
}
|
||||
EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId());
|
||||
EXPECT_EQ(1u, walker->getGenerateLocalId());
|
||||
EXPECT_EQ(4u, walker->getWalkOrder());
|
||||
|
||||
pCmdQ->finish();
|
||||
|
||||
//we expect sequence of local ids from 0..199
|
||||
auto expectedMemory = reinterpret_cast<uint32_t *>(variables[0].expectedMemory);
|
||||
auto currentWorkItem = 0u;
|
||||
|
||||
while (currentWorkItem < localWorkSize[0]) {
|
||||
expectedMemory[0] = currentWorkItem++;
|
||||
expectedMemory++;
|
||||
}
|
||||
|
||||
expectMemory<FamilyType>(variables[0].destMemory, variables[0].expectedMemory, ptrDiff(expectedMemory, variables[0].expectedMemory));
|
||||
}
|
||||
@@ -0,0 +1,620 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
template <uint32_t numberOfTiles, MulticontextAubFixture::EnabledCommandStreamers enabledCommandStreamers>
|
||||
struct MultitileMulticontextTests : public MulticontextAubFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
MulticontextAubFixture::SetUp(numberOfTiles, enabledCommandStreamers, false);
|
||||
}
|
||||
void TearDown() override {
|
||||
MulticontextAubFixture::TearDown();
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void runAubTest() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
const uint32_t bufferSize = 64 * KB;
|
||||
uint8_t writePattern[bufferSize];
|
||||
uint8_t initPattern[bufferSize];
|
||||
std::fill(writePattern, writePattern + sizeof(writePattern), 1);
|
||||
std::fill(initPattern, initPattern + sizeof(initPattern), 0);
|
||||
|
||||
std::vector<std::vector<std::unique_ptr<Buffer>>> regularBuffers;
|
||||
std::vector<std::vector<std::unique_ptr<Buffer>>> tileOnlyBuffers;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
|
||||
|
||||
regularBuffers.resize(tileDevices.size());
|
||||
tileOnlyBuffers.resize(tileDevices.size());
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
|
||||
auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0, 0,
|
||||
&context->getDevice(0)->getDevice());
|
||||
auto regularBuffer = Buffer::create(
|
||||
context.get(), memoryProperties, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0, bufferSize, initPattern, retVal);
|
||||
auto tileOnlyProperties = ClMemoryPropertiesHelper::createMemoryProperties(
|
||||
flags, 0, 0, context->getDevice(0)->getDevice().getNearestGenericSubDevice(tile));
|
||||
auto tileOnlyBuffer = Buffer::create(context.get(), tileOnlyProperties, flags, 0, bufferSize, initPattern, retVal);
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
|
||||
regularBuffer->forceDisallowCPUCopy = true;
|
||||
tileOnlyBuffer->forceDisallowCPUCopy = true;
|
||||
regularBuffers[tile].push_back(std::unique_ptr<Buffer>(regularBuffer));
|
||||
tileOnlyBuffers[tile].push_back(std::unique_ptr<Buffer>(tileOnlyBuffer));
|
||||
|
||||
commandQueues[tile][tileEngine]->enqueueWriteBuffer(regularBuffer, CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr);
|
||||
commandQueues[tile][tileEngine]->enqueueWriteBuffer(tileOnlyBuffer, CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
commandQueues[tile][tileEngine]->flush();
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
getSimulatedCsr<FamilyType>(tile, tileEngine)->pollForCompletion();
|
||||
|
||||
auto regularBufferGpuAddress = static_cast<uintptr_t>(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
|
||||
auto tileOnlyBufferGpuAddress = static_cast<uintptr_t>(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(regularBufferGpuAddress), writePattern, bufferSize, tile, tileEngine);
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(tileOnlyBufferGpuAddress), writePattern, bufferSize, tile, tileEngine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void runAubWriteImageTest() {
|
||||
if (!tileDevices[0]->getSharedDeviceInfo().imageSupport) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto testWidth = 5u;
|
||||
auto testHeight = 5u;
|
||||
auto testDepth = 1u;
|
||||
auto numPixels = testWidth * testHeight * testDepth;
|
||||
|
||||
cl_image_format imageFormat;
|
||||
imageFormat.image_channel_data_type = CL_FLOAT;
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
|
||||
cl_mem_flags flags = 0;
|
||||
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
|
||||
|
||||
cl_image_desc imageDesc;
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
imageDesc.image_width = testWidth;
|
||||
imageDesc.image_height = testHeight;
|
||||
imageDesc.image_depth = testDepth;
|
||||
imageDesc.image_array_size = 1;
|
||||
imageDesc.image_row_pitch = 0;
|
||||
imageDesc.image_slice_pitch = 0;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.num_samples = 0;
|
||||
imageDesc.mem_object = NULL;
|
||||
|
||||
auto perChannelDataSize = 4u;
|
||||
auto numChannels = 4u;
|
||||
auto elementSize = perChannelDataSize * numChannels;
|
||||
auto srcMemory = (uint8_t *)alignedMalloc(elementSize * numPixels, MemoryConstants::pageSize);
|
||||
for (size_t i = 0; i < numPixels * elementSize; ++i) {
|
||||
auto origValue = static_cast<uint8_t>(i);
|
||||
memcpy(srcMemory + i, &origValue, sizeof(origValue));
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {testWidth, testHeight, testDepth};
|
||||
size_t inputRowPitch = testWidth * elementSize;
|
||||
size_t inputSlicePitch = inputRowPitch * testHeight;
|
||||
|
||||
std::vector<std::vector<std::unique_ptr<Image>>> images;
|
||||
images.resize(tileDevices.size());
|
||||
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
Image *dstImage = Image::create(
|
||||
context.get(),
|
||||
ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
|
||||
flags,
|
||||
0,
|
||||
surfaceFormat,
|
||||
&imageDesc,
|
||||
nullptr,
|
||||
retVal);
|
||||
ASSERT_NE(nullptr, dstImage);
|
||||
memset(dstImage->getCpuAddress(), 0xFF, dstImage->getSize());
|
||||
|
||||
retVal = commandQueues[tile][tileEngine]->enqueueWriteImage(
|
||||
dstImage,
|
||||
CL_FALSE,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
srcMemory,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
images[tile].push_back(std::unique_ptr<Image>(dstImage));
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
commandQueues[tile][tileEngine]->flush();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<uint8_t[]> dstMemory;
|
||||
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
|
||||
dstMemory.reset(new uint8_t[images[tile][tileEngine]->getSize()]);
|
||||
memset(dstMemory.get(), 0xFF, images[tile][tileEngine]->getSize());
|
||||
|
||||
commandQueues[tile][tileEngine]->enqueueReadImage(
|
||||
images[tile][tileEngine].get(), CL_FALSE, origin, region, 0, 0, dstMemory.get(), nullptr, 0, nullptr, nullptr);
|
||||
|
||||
commandQueues[tile][tileEngine]->flush();
|
||||
|
||||
auto rowPitch = images[tile][tileEngine]->getHostPtrRowPitch();
|
||||
auto slicePitch = images[tile][tileEngine]->getHostPtrSlicePitch();
|
||||
|
||||
auto pSrcMemory = srcMemory;
|
||||
auto pDstMemory = dstMemory.get();
|
||||
for (size_t z = 0; z < testDepth; ++z) {
|
||||
for (size_t y = 0; y < testHeight; ++y) {
|
||||
expectMemory<FamilyType>(pDstMemory, pSrcMemory, testWidth * elementSize, tile, tileEngine);
|
||||
pSrcMemory = ptrOffset(pSrcMemory, testWidth * elementSize);
|
||||
pDstMemory = ptrOffset(pDstMemory, rowPitch);
|
||||
}
|
||||
pDstMemory = ptrOffset(pDstMemory, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
alignedFree(srcMemory);
|
||||
}
|
||||
};
|
||||
|
||||
// 4 Tiles
|
||||
using FourTilesAllContextsTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
using FourTilesDualContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesDualContextTest, HEAVY_givenFourTilesAndDualContextWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
using FourTilesSingleContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Single>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
struct EnqueueWithWalkerPartitionFourTilesTests : public FourTilesSingleContextTest, SimpleKernelFixture {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableWalkerPartition.set(1u);
|
||||
kernelIds |= (1 << 5);
|
||||
kernelIds |= (1 << 8);
|
||||
|
||||
FourTilesSingleContextTest::SetUp();
|
||||
SimpleKernelFixture::SetUp(rootDevice, context.get());
|
||||
|
||||
rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
|
||||
EXPECT_EQ(4u, rootCsr->getOsContext().getNumSupportedDevices());
|
||||
engineControlForFusedQueue = {rootCsr, &rootCsr->getOsContext()};
|
||||
|
||||
bufferSize = 16 * MemoryConstants::kiloByte;
|
||||
|
||||
auto destMemory = std::make_unique<uint8_t[]>(bufferSize);
|
||||
memset(destMemory.get(), 0x0, bufferSize);
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
buffer.reset(Buffer::create(multiTileDefaultContext.get(), CL_MEM_COPY_HOST_PTR, bufferSize, destMemory.get(), retVal));
|
||||
|
||||
clBuffer = buffer.get();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
SimpleKernelFixture::TearDown();
|
||||
FourTilesSingleContextTest::TearDown();
|
||||
}
|
||||
|
||||
void *getGpuAddress(Buffer &buffer) {
|
||||
return reinterpret_cast<void *>(buffer.getGraphicsAllocation(this->rootDeviceIndex)->getGpuAddress());
|
||||
}
|
||||
|
||||
uint32_t bufferSize = 0;
|
||||
std::unique_ptr<Buffer> buffer;
|
||||
cl_mem clBuffer;
|
||||
EngineControl engineControlForFusedQueue = {};
|
||||
CommandStreamReceiver *rootCsr = nullptr;
|
||||
};
|
||||
|
||||
struct DynamicWalkerPartitionFourTilesTests : EnqueueWithWalkerPartitionFourTilesTests {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableStaticPartitioning.set(0);
|
||||
EnqueueWithWalkerPartitionFourTilesTests::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore{};
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithAtomicThenOutputDataIsValid) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto mockCommandQueue = new MockCommandQueueHw<FamilyType>(multiTileDefaultContext.get(), rootDevice, nullptr);
|
||||
|
||||
commandQueues[0][0].reset(mockCommandQueue);
|
||||
|
||||
constexpr size_t globalWorkOffset[] = {0, 0, 0};
|
||||
constexpr size_t gwsSize[] = {512, 1, 1};
|
||||
constexpr size_t lwsSize[] = {32, 1, 1};
|
||||
constexpr cl_uint workingDimensions = 1;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
kernels[5]->setArg(0, sizeof(cl_mem), &clBuffer);
|
||||
retVal = mockCommandQueue->enqueueKernel(kernels[5].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
mockCommandQueue->flush();
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto &cmdStream = mockCommandQueue->getCS(0);
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
||||
bool lastSemaphoreFound = false;
|
||||
uint64_t tileAtomicGpuAddress = 0;
|
||||
for (auto it = hwParser.cmdList.rbegin(); it != hwParser.cmdList.rend(); it++) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
|
||||
if (semaphoreCmd) {
|
||||
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreCmd)) {
|
||||
continue;
|
||||
}
|
||||
EXPECT_EQ(4u, semaphoreCmd->getSemaphoreDataDword());
|
||||
tileAtomicGpuAddress = semaphoreCmd->getSemaphoreGraphicsAddress();
|
||||
lastSemaphoreFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired()) {
|
||||
EXPECT_TRUE(lastSemaphoreFound);
|
||||
EXPECT_NE(0u, tileAtomicGpuAddress);
|
||||
} else {
|
||||
EXPECT_FALSE(lastSemaphoreFound);
|
||||
EXPECT_EQ(0u, tileAtomicGpuAddress);
|
||||
}
|
||||
|
||||
expectMemory<FamilyType>(getGpuAddress(*buffer), &gwsSize[workingDimensions - 1], sizeof(uint32_t), 0, 0);
|
||||
uint32_t expectedAtomicValue = 4;
|
||||
if (ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired()) {
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(tileAtomicGpuAddress), &expectedAtomicValue, sizeof(uint32_t), 0, 0);
|
||||
}
|
||||
|
||||
constexpr uint32_t workgroupCount = static_cast<uint32_t>(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]);
|
||||
auto groupSpecificWorkCounts = ptrOffset(getGpuAddress(*buffer), 4);
|
||||
std::array<uint32_t, workgroupCount> workgroupCounts;
|
||||
std::fill(workgroupCounts.begin(), workgroupCounts.end(), static_cast<uint32_t>(lwsSize[workingDimensions - 1]));
|
||||
|
||||
expectMemory<FamilyType>(groupSpecificWorkCounts, &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t), 0, 0);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithoutAtomicThenOutputDataIsValid) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto mockCommandQueue = new MockCommandQueueHw<FamilyType>(multiTileDefaultContext.get(), rootDevice, nullptr);
|
||||
|
||||
commandQueues[0][0].reset(mockCommandQueue);
|
||||
|
||||
constexpr size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
constexpr size_t gwsSize[3] = {1024, 1, 1};
|
||||
constexpr size_t lwsSize[3] = {32, 1, 1};
|
||||
constexpr cl_uint workingDimensions = 1;
|
||||
cl_uint kernelIncrementCounter = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
kernels[8]->setArg(0, sizeof(cl_mem), &clBuffer);
|
||||
kernels[8]->setArg(1, kernelIncrementCounter);
|
||||
retVal = mockCommandQueue->enqueueKernel(kernels[8].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
mockCommandQueue->flush();
|
||||
|
||||
constexpr uint32_t workgroupCount = static_cast<uint32_t>(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]);
|
||||
std::array<uint32_t, workgroupCount> workgroupCounts;
|
||||
std::fill(workgroupCounts.begin(), workgroupCounts.end(), kernelIncrementCounter);
|
||||
|
||||
expectMemory<FamilyType>(getGpuAddress(*buffer), &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t), 0, 0);
|
||||
}
|
||||
|
||||
struct StaticWalkerPartitionFourTilesTests : EnqueueWithWalkerPartitionFourTilesTests {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableStaticPartitioning.set(1);
|
||||
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
||||
EnqueueWithWalkerPartitionFourTilesTests::SetUp();
|
||||
}
|
||||
|
||||
std::unique_ptr<LinearStream> createTaskStream() {
|
||||
const AllocationProperties commandStreamAllocationProperties{rootDevice->getRootDeviceIndex(),
|
||||
true,
|
||||
MemoryConstants::pageSize,
|
||||
GraphicsAllocation::AllocationType::COMMAND_BUFFER,
|
||||
true,
|
||||
false,
|
||||
rootDevice->getDeviceBitfield()};
|
||||
GraphicsAllocation *streamAllocation = rootDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
|
||||
return std::make_unique<LinearStream>(streamAllocation);
|
||||
}
|
||||
|
||||
void destroyTaskStream(LinearStream &stream) {
|
||||
rootDevice->getMemoryManager()->freeGraphicsMemory(stream.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
void flushTaskStream(LinearStream &stream) {
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||
|
||||
rootCsr->flushTask(stream, 0,
|
||||
rootCsr->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
|
||||
rootCsr->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
|
||||
rootCsr->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
0u, dispatchFlags, rootDevice->getDevice());
|
||||
|
||||
rootCsr->flushBatchedSubmissions();
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void expectMemoryOnRootCsr(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
auto csr = static_cast<AUBCommandStreamReceiverHw<FamilyType> *>(rootCsr);
|
||||
csr->expectMemoryEqual(gfxAddress, srcAddress, length);
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore{};
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenFourTilesWhenStaticWalkerPartitionIsEnabledForKernelThenOutputDataIsValid) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto mockCommandQueue = new MockCommandQueueHw<FamilyType>(multiTileDefaultContext.get(), rootDevice, nullptr);
|
||||
|
||||
commandQueues[0][0].reset(mockCommandQueue);
|
||||
|
||||
constexpr size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
constexpr size_t gwsSize[3] = {1024, 1, 1};
|
||||
constexpr size_t lwsSize[3] = {32, 1, 1};
|
||||
constexpr cl_uint workingDimensions = 1;
|
||||
cl_uint kernelIncrementCounter = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
kernels[8]->setArg(0, sizeof(cl_mem), &clBuffer);
|
||||
kernels[8]->setArg(1, kernelIncrementCounter);
|
||||
retVal = mockCommandQueue->enqueueKernel(kernels[8].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
mockCommandQueue->flush();
|
||||
|
||||
constexpr uint32_t workgroupCount = static_cast<uint32_t>(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]);
|
||||
std::array<uint32_t, workgroupCount> workgroupCounts;
|
||||
std::fill(workgroupCounts.begin(), workgroupCounts.end(), kernelIncrementCounter);
|
||||
|
||||
expectMemoryOnRootCsr<FamilyType>(getGpuAddress(*buffer), &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalkerSyncWhenStaticWalkerPartitionIsThenAtomicsAreIncrementedCorrectly) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
|
||||
auto taskStream = createTaskStream();
|
||||
auto taskStreamCpu = taskStream->getSpace(0);
|
||||
auto taskStreamGpu = taskStream->getGraphicsAllocation()->getGpuAddress();
|
||||
|
||||
uint32_t totalBytesProgrammed = 0u;
|
||||
WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
walkerCmd.setPartitionType(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||
walkerCmd.getInterfaceDescriptor().setNumberOfThreadsInGpgpuThreadGroup(1u);
|
||||
|
||||
WalkerPartition::WalkerPartitionArgs testArgs = {};
|
||||
testArgs.initializeWparidRegister = true;
|
||||
testArgs.crossTileAtomicSynchronization = true;
|
||||
testArgs.emitPipeControlStall = true;
|
||||
testArgs.tileCount = static_cast<uint32_t>(rootDevice->getDeviceBitfield().count());
|
||||
testArgs.partitionCount = testArgs.tileCount;
|
||||
testArgs.synchronizeBeforeExecution = true;
|
||||
testArgs.secondaryBatchBuffer = false;
|
||||
testArgs.emitSelfCleanup = false;
|
||||
testArgs.staticPartitioning = true;
|
||||
testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress();
|
||||
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(
|
||||
taskStreamCpu,
|
||||
taskStreamGpu,
|
||||
&walkerCmd,
|
||||
totalBytesProgrammed,
|
||||
testArgs,
|
||||
*defaultHwInfo);
|
||||
taskStream->getSpace(totalBytesProgrammed);
|
||||
flushTaskStream(*taskStream);
|
||||
|
||||
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
|
||||
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
|
||||
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
|
||||
uint32_t expectedValue = 0x4;
|
||||
expectMemoryOnRootCsr<FamilyType>(reinterpret_cast<void *>(preWalkerSyncAddress), &expectedValue, sizeof(expectedValue));
|
||||
expectMemoryOnRootCsr<FamilyType>(reinterpret_cast<void *>(postWalkerSyncAddress), &expectedValue, sizeof(expectedValue));
|
||||
|
||||
destroyTaskStream(*taskStream);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalkerSyncThenAtomicsAreIncrementedCorrectly) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
|
||||
auto taskStream = createTaskStream();
|
||||
auto taskStreamCpu = taskStream->getSpace(0);
|
||||
auto taskStreamGpu = taskStream->getGraphicsAllocation()->getGpuAddress();
|
||||
|
||||
uint32_t totalBytesProgrammed = 0u;
|
||||
WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
walkerCmd.setPartitionType(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||
walkerCmd.getInterfaceDescriptor().setNumberOfThreadsInGpgpuThreadGroup(1u);
|
||||
|
||||
WalkerPartition::WalkerPartitionArgs testArgs = {};
|
||||
testArgs.initializeWparidRegister = true;
|
||||
testArgs.crossTileAtomicSynchronization = true;
|
||||
testArgs.emitPipeControlStall = true;
|
||||
testArgs.tileCount = static_cast<uint32_t>(rootDevice->getDeviceBitfield().count());
|
||||
testArgs.partitionCount = testArgs.tileCount;
|
||||
testArgs.synchronizeBeforeExecution = false;
|
||||
testArgs.secondaryBatchBuffer = false;
|
||||
testArgs.emitSelfCleanup = false;
|
||||
testArgs.staticPartitioning = true;
|
||||
testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress();
|
||||
|
||||
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(
|
||||
taskStreamCpu,
|
||||
taskStreamGpu,
|
||||
&walkerCmd,
|
||||
totalBytesProgrammed,
|
||||
testArgs,
|
||||
*defaultHwInfo);
|
||||
taskStream->getSpace(totalBytesProgrammed);
|
||||
flushTaskStream(*taskStream);
|
||||
|
||||
const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
|
||||
const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
|
||||
const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
|
||||
uint32_t expectedValue = 0x0;
|
||||
expectMemoryOnRootCsr<FamilyType>(reinterpret_cast<void *>(preWalkerSyncAddress), &expectedValue, sizeof(expectedValue));
|
||||
expectedValue = 0x4;
|
||||
expectMemoryOnRootCsr<FamilyType>(reinterpret_cast<void *>(postWalkerSyncAddress), &expectedValue, sizeof(expectedValue));
|
||||
|
||||
destroyTaskStream(*taskStream);
|
||||
}
|
||||
|
||||
// 2 Tiles
|
||||
using TwoTilesAllContextsTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesAllContextsTest, HEAVY_givenTwoTilesAndAllContextsWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
using TwoTilesDualContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
using TwoTilesSingleContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Single>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
// 1 Tile
|
||||
|
||||
using SingleTileAllContextsTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileAllContextsTest, GENERATEONLY_givenSingleTileAndAllContextsWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
using SingleTileDualContextTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleTileAndDualContextWhenSubmittingThenDataIsValid) {
|
||||
runAubTest<FamilyType>();
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleAllocationWhenUpdatedFromDifferentContextThenDataIsValid) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
const uint32_t bufferSize = 256;
|
||||
const uint32_t halfBufferSize = bufferSize / 2;
|
||||
uint8_t writePattern1[halfBufferSize];
|
||||
uint8_t writePattern2[halfBufferSize];
|
||||
uint8_t initPattern[bufferSize];
|
||||
std::fill(initPattern, initPattern + sizeof(initPattern), 0);
|
||||
std::fill(writePattern1, writePattern1 + sizeof(writePattern1), 1);
|
||||
std::fill(writePattern2, writePattern2 + sizeof(writePattern2), 2);
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, initPattern, retVal));
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
auto simulatedCsr0 = getSimulatedCsr<FamilyType>(0, 0);
|
||||
simulatedCsr0->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
auto simulatedCsr1 = getSimulatedCsr<FamilyType>(0, 1);
|
||||
simulatedCsr1->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
commandQueues[0][0]->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, halfBufferSize, writePattern1, nullptr, 0, nullptr, nullptr);
|
||||
commandQueues[0][1]->enqueueWriteBuffer(buffer.get(), CL_FALSE, halfBufferSize, halfBufferSize, writePattern2, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
commandQueues[0][1]->finish(); // submit second enqueue first to make sure that residency flow is correct
|
||||
commandQueues[0][0]->finish();
|
||||
|
||||
auto gpuPtr = reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
|
||||
expectMemory<FamilyType>(gpuPtr, writePattern1, halfBufferSize, 0, 0);
|
||||
expectMemory<FamilyType>(ptrOffset(gpuPtr, halfBufferSize), writePattern2, halfBufferSize, 0, 1);
|
||||
}
|
||||
|
||||
// 1 |Tile
|
||||
using SingleTileDualContextTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleTileAndDualContextWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
using SingleTileAllContextsTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileAllContextsTest, HEAVY_givenSingleTileAndAllContextsWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
// 2 Tiles
|
||||
using TwoTilesSingleContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Single>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
using TwoTilesDualContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
using TwoTilesAllContextsTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesAllContextsTest, GENERATEONLY_givenTwoTilesAndAllContextsWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
// 4 Tiles
|
||||
using FourTilesSingleContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Single>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
using FourTilesDualContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Dual>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesDualContextTest, GENERATEONLY_givenFourTilesAndDualContextWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
|
||||
using FourTilesAllContextsTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::All>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenWritingImageThenDataIsValid) {
|
||||
runAubWriteImageTest<FamilyType>();
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/tests_configuration.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct OneVAFourPhysicalStoragesTest : public MulticontextAubFixture, public ::testing::Test {
|
||||
static const uint32_t numTiles = 4;
|
||||
void SetUp() override {
|
||||
MulticontextAubFixture::SetUp(numTiles, MulticontextAubFixture::EnabledCommandStreamers::Single, false);
|
||||
}
|
||||
void TearDown() override {
|
||||
MulticontextAubFixture::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueReadBufferThenReadFromCorrectBank) {
|
||||
if (is32bit) {
|
||||
return;
|
||||
}
|
||||
cl_int retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
const uint32_t bufferSize = MemoryConstants::pageSize64k;
|
||||
uint8_t *memoryToWrite[numTiles];
|
||||
uint8_t *memoryToRead[numTiles];
|
||||
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool());
|
||||
auto gpuAddress = allocation->getGpuAddress();
|
||||
allocation->storageInfo.cloningOfPageTables = false;
|
||||
allocation->storageInfo.memoryBanks = 0;
|
||||
allocation->setAubWritable(false, static_cast<uint32_t>(maxNBitValue(numTiles)));
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
memoryToWrite[tile] = reinterpret_cast<uint8_t *>(alignedMalloc(bufferSize, MemoryConstants::pageSize64k));
|
||||
std::fill(memoryToWrite[tile], ptrOffset(memoryToWrite[tile], bufferSize), tile + 1);
|
||||
|
||||
auto hardwareContext = getSimulatedCsr<FamilyType>(tile, 0)->hardwareContextController->hardwareContexts[0].get();
|
||||
hardwareContext->writeMemory2({gpuAddress, memoryToWrite[tile], bufferSize, (1u << tile), AubMemDump::DataTypeHintValues::TraceNotype, MemoryConstants::pageSize64k});
|
||||
}
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
memoryToRead[tile] = reinterpret_cast<uint8_t *>(alignedMalloc(bufferSize, MemoryConstants::pageSize64k));
|
||||
|
||||
commandQueues[tile][0]->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, memoryToRead[tile], nullptr, 0, nullptr, nullptr);
|
||||
|
||||
commandQueues[tile][0]->flush();
|
||||
}
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
expectMemory<FamilyType>(memoryToRead[tile], memoryToWrite[tile], bufferSize, tile, 0);
|
||||
alignedFree(memoryToWrite[tile]);
|
||||
alignedFree(memoryToRead[tile]);
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank) {
|
||||
if (is32bit) {
|
||||
return;
|
||||
}
|
||||
cl_int retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
const uint32_t bufferSize = MemoryConstants::pageSize64k;
|
||||
uint8_t *memoryToWrite[numTiles];
|
||||
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool());
|
||||
auto gpuAddress = allocation->getGpuAddress();
|
||||
allocation->storageInfo.cloningOfPageTables = false;
|
||||
allocation->storageInfo.memoryBanks = 0;
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
memoryToWrite[tile] = reinterpret_cast<uint8_t *>(alignedMalloc(bufferSize, MemoryConstants::pageSize64k));
|
||||
std::fill(memoryToWrite[tile], ptrOffset(memoryToWrite[tile], bufferSize), tile + 1);
|
||||
allocation->setAubWritable(true, 0xffffffff);
|
||||
|
||||
commandQueues[tile][0]->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, memoryToWrite[tile], nullptr, 0, nullptr, nullptr);
|
||||
}
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(gpuAddress), memoryToWrite[tile], bufferSize, tile, 0);
|
||||
alignedFree(memoryToWrite[tile]);
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenColouredBufferWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank) {
|
||||
if (is32bit) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_int retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
const uint32_t bufferSize = numTiles * MemoryConstants::pageSize64k;
|
||||
const auto allTilesValue = maxNBitValue(numTiles);
|
||||
uint8_t *memoryToWrite = reinterpret_cast<uint8_t *>(alignedMalloc(bufferSize, MemoryConstants::pageSize64k));
|
||||
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool());
|
||||
EXPECT_EQ(allTilesValue, allocation->storageInfo.memoryBanks.to_ullong());
|
||||
EXPECT_EQ(allTilesValue, allocation->storageInfo.pageTablesVisibility.to_ullong());
|
||||
EXPECT_TRUE(allocation->storageInfo.cloningOfPageTables);
|
||||
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
std::fill(ptrOffset(memoryToWrite, tile * MemoryConstants::pageSize64k), ptrOffset(memoryToWrite, (tile + 1) * MemoryConstants::pageSize64k), tile + 1);
|
||||
}
|
||||
|
||||
commandQueues[0][0]->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, memoryToWrite, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
auto gpuAddress = allocation->getGpuAddress();
|
||||
for (uint32_t tile = 0; tile < numTiles; tile++) {
|
||||
for (uint32_t offset = 0; offset < bufferSize; offset += MemoryConstants::pageSize64k) {
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(gpuAddress + offset), ptrOffset(memoryToWrite, offset), MemoryConstants::pageSize64k, tile, 0);
|
||||
}
|
||||
}
|
||||
|
||||
alignedFree(memoryToWrite);
|
||||
}
|
||||
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct PostSyncWriteXeHPTests : public HelloWorldFixture<AUBHelloWorldFixtureFactory>, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableTimestampPacket.set(true);
|
||||
|
||||
HelloWorldFixture<AUBHelloWorldFixtureFactory>::SetUp();
|
||||
EXPECT_TRUE(pCommandStreamReceiver->peekTimestampPacketWriteEnabled());
|
||||
};
|
||||
|
||||
void TearDown() override {
|
||||
HelloWorldFixture<AUBHelloWorldFixtureFactory>::TearDown();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTimestampWriteEnabledWhenEnqueueingThenWritePostsyncOperation) {
|
||||
MockCommandQueueHw<FamilyType> cmdQ(pContext, pClDevice, nullptr);
|
||||
|
||||
const uint32_t bufferSize = 4;
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal));
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
|
||||
memset(graphicsAllocation->getUnderlyingBuffer(), 0, graphicsAllocation->getUnderlyingBufferSize());
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
uint8_t writeData[bufferSize] = {1, 2, 3, 4};
|
||||
cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr);
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), writeData, bufferSize);
|
||||
|
||||
typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 1, 1};
|
||||
auto tagGpuAddress = reinterpret_cast<void *>(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress());
|
||||
expectMemoryNotEqual<FamilyType>(tagGpuAddress, expectedTimestampValues, 4 * sizeof(typename FamilyType::TimestampPacketType));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenDebugVariableEnabledWhenEnqueueingThenWritePostsyncOperationInImmWriteMode) {
|
||||
DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.set(true);
|
||||
MockCommandQueueHw<FamilyType> cmdQ(pContext, pClDevice, nullptr);
|
||||
|
||||
const uint32_t bufferSize = 4;
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal));
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
|
||||
memset(graphicsAllocation->getUnderlyingBuffer(), 0, graphicsAllocation->getUnderlyingBufferSize());
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
uint8_t writeData[bufferSize] = {1, 2, 3, 4};
|
||||
cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr);
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), writeData, bufferSize);
|
||||
|
||||
auto tagGpuAddress = reinterpret_cast<void *>(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress());
|
||||
|
||||
constexpr auto timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType);
|
||||
if constexpr (timestampPacketTypeSize == 4u) {
|
||||
typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 2, 2};
|
||||
expectMemory<FamilyType>(tagGpuAddress, expectedTimestampValues, 4 * timestampPacketTypeSize);
|
||||
} else {
|
||||
typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 0x2'0000'0002u, 1};
|
||||
expectMemory<FamilyType>(tagGpuAddress, expectedTimestampValues, 4 * timestampPacketTypeSize);
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTwoBatchedEnqueuesWhenDependencyIsResolvedThenDecrementCounterOnGpu) {
|
||||
MockContext context(pCmdQ->getDevice().getSpecializedDevice<ClDevice>());
|
||||
pCommandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
const size_t bufferSize = 1024;
|
||||
auto retVal = CL_SUCCESS;
|
||||
uint8_t initialMemory[bufferSize] = {};
|
||||
uint8_t writePattern1[bufferSize];
|
||||
uint8_t writePattern2[bufferSize];
|
||||
std::fill(writePattern1, writePattern1 + sizeof(writePattern1), 1);
|
||||
std::fill(writePattern2, writePattern2 + sizeof(writePattern2), 1);
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, CL_MEM_COPY_HOST_PTR, bufferSize, initialMemory, retVal));
|
||||
//make sure that GPU copy is used
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
cl_event outEvent1, outEvent2;
|
||||
|
||||
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, bufferSize, writePattern1, nullptr, 0, nullptr, &outEvent1);
|
||||
auto node1 = castToObject<Event>(outEvent1)->getTimestampPacketNodes()->peekNodes().at(0);
|
||||
node1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->setAubWritable(true, 0xffffffff); // allow to write again after Buffer::create
|
||||
|
||||
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writePattern2, nullptr, 0, nullptr, &outEvent2);
|
||||
auto node2 = castToObject<Event>(outEvent2)->getTimestampPacketNodes()->peekNodes().at(0);
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), writePattern2, bufferSize);
|
||||
|
||||
typename FamilyType::TimestampPacketType expectedEndTimestamp = 1;
|
||||
auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*node1);
|
||||
auto endTimestampAddress2 = TimestampPacketHelper::getGlobalEndGpuAddress(*node1);
|
||||
auto endTimestampAddress3 = TimestampPacketHelper::getContextEndGpuAddress(*node2);
|
||||
auto endTimestampAddress4 = TimestampPacketHelper::getGlobalEndGpuAddress(*node2);
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress1), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress2), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress3), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress4), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
|
||||
clReleaseEvent(outEvent1);
|
||||
clReleaseEvent(outEvent2);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenMultipleWalkersWhenEnqueueingThenWriteAllTimestamps) {
|
||||
MockContext context(pCmdQ->getDevice().getSpecializedDevice<ClDevice>());
|
||||
const size_t bufferSize = 70;
|
||||
const size_t writeSize = bufferSize - 2;
|
||||
uint8_t writeData[writeSize] = {};
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_event outEvent;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal));
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 1, writeSize, writeData, nullptr, 0, nullptr, &outEvent);
|
||||
|
||||
auto ×tampNodes = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes();
|
||||
|
||||
EXPECT_EQ(2u, timestampNodes.size());
|
||||
|
||||
typename FamilyType::TimestampPacketType expectedEndTimestamp = 1;
|
||||
auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*timestampNodes[0]);
|
||||
auto endTimestampAddress2 = TimestampPacketHelper::getGlobalEndGpuAddress(*timestampNodes[0]);
|
||||
auto endTimestampAddress3 = TimestampPacketHelper::getContextEndGpuAddress(*timestampNodes[1]);
|
||||
auto endTimestampAddress4 = TimestampPacketHelper::getGlobalEndGpuAddress(*timestampNodes[1]);
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress1), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress2), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress3), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
expectMemoryNotEqual<FamilyType>(reinterpret_cast<void *>(endTimestampAddress4), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType));
|
||||
|
||||
clReleaseEvent(outEvent);
|
||||
}
|
||||
@@ -0,0 +1,327 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct Gen12AubScratchSpaceForPrivateFixture : public KernelAUBFixture<SimpleKernelFixture> {
|
||||
void SetUp() override {
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
|
||||
kernelIdx = 6;
|
||||
kernelIds |= (1 << kernelIdx);
|
||||
KernelAUBFixture<SimpleKernelFixture>::SetUp();
|
||||
|
||||
arraySize = 32;
|
||||
vectorSize = 2;
|
||||
typeSize = sizeof(uint32_t);
|
||||
|
||||
gwsSize = arraySize;
|
||||
lwsSize = 32;
|
||||
maxIterations1 = static_cast<uint32_t>(arraySize);
|
||||
maxIterations2 = static_cast<uint32_t>(arraySize);
|
||||
scalar = 0x4;
|
||||
|
||||
expectedMemorySize = arraySize * vectorSize * typeSize;
|
||||
|
||||
srcBuffer = alignedMalloc(expectedMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
auto srcBufferUint = static_cast<uint32_t *>(srcBuffer);
|
||||
uint32_t valOdd = 0x1;
|
||||
uint32_t valEven = 0x3;
|
||||
for (uint32_t i = 0; i < arraySize * vectorSize; ++i) {
|
||||
if (i % 2) {
|
||||
srcBufferUint[i] = valOdd;
|
||||
} else {
|
||||
srcBufferUint[i] = valEven;
|
||||
}
|
||||
}
|
||||
uint32_t sumOdd = 0;
|
||||
uint32_t sumEven = 0;
|
||||
for (uint32_t i = 0; i < arraySize; ++i) {
|
||||
sumOdd += ((i + scalar) + valOdd);
|
||||
sumEven += (i + valEven);
|
||||
}
|
||||
|
||||
dstBuffer = alignedMalloc(expectedMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
memset(dstBuffer, 0, expectedMemorySize);
|
||||
|
||||
expectedMemory = alignedMalloc(expectedMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, expectedMemory);
|
||||
auto expectedMemoryUint = static_cast<uint32_t *>(expectedMemory);
|
||||
for (uint32_t i = 0; i < arraySize * vectorSize; ++i) {
|
||||
if (i % 2) {
|
||||
expectedMemoryUint[i] = sumOdd;
|
||||
} else {
|
||||
expectedMemoryUint[i] = sumEven;
|
||||
}
|
||||
}
|
||||
|
||||
kernels[kernelIdx]->setArgSvm(0, expectedMemorySize, dstBuffer, nullptr, 0u);
|
||||
dstAllocation = createHostPtrAllocationFromSvmPtr(dstBuffer, expectedMemorySize);
|
||||
|
||||
kernels[kernelIdx]->setArgSvm(1, expectedMemorySize, srcBuffer, nullptr, 0u);
|
||||
srcAllocation = createHostPtrAllocationFromSvmPtr(srcBuffer, expectedMemorySize);
|
||||
|
||||
kernels[kernelIdx]->setArg(2, sizeof(uint32_t), &scalar);
|
||||
kernels[kernelIdx]->setArg(3, sizeof(uint32_t), &maxIterations1);
|
||||
kernels[kernelIdx]->setArg(4, sizeof(uint32_t), &maxIterations2);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
pCmdQ->flush();
|
||||
|
||||
if (expectedMemory) {
|
||||
alignedFree(expectedMemory);
|
||||
expectedMemory = nullptr;
|
||||
}
|
||||
if (srcBuffer) {
|
||||
alignedFree(srcBuffer);
|
||||
srcBuffer = nullptr;
|
||||
}
|
||||
if (dstBuffer) {
|
||||
alignedFree(dstBuffer);
|
||||
dstBuffer = nullptr;
|
||||
}
|
||||
|
||||
KernelAUBFixture<SimpleKernelFixture>::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
||||
|
||||
size_t arraySize;
|
||||
size_t vectorSize;
|
||||
size_t typeSize;
|
||||
size_t gwsSize;
|
||||
size_t lwsSize;
|
||||
uint32_t kernelIdx;
|
||||
|
||||
void *expectedMemory = nullptr;
|
||||
size_t expectedMemorySize = 0;
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
void *dstBuffer = nullptr;
|
||||
GraphicsAllocation *srcAllocation;
|
||||
GraphicsAllocation *dstAllocation;
|
||||
|
||||
uint32_t scalar;
|
||||
uint32_t maxIterations1;
|
||||
uint32_t maxIterations2;
|
||||
};
|
||||
|
||||
using Gen12AubScratchSpaceForPrivateTest = Test<Gen12AubScratchSpaceForPrivateFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, Gen12AubScratchSpaceForPrivateTest, WhenKernelUsesScratchSpaceForPrivateThenExpectCorrectResults) {
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {gwsSize, 1, 1};
|
||||
size_t localWorkSize[3] = {lwsSize, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernels[kernelIdx].get(),
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->flush();
|
||||
|
||||
expectMemory<FamilyType>(dstBuffer, expectedMemory, expectedMemorySize);
|
||||
}
|
||||
|
||||
class DefaultGrfKernelFixture : public ProgramFixture {
|
||||
public:
|
||||
using ProgramFixture::SetUp;
|
||||
|
||||
protected:
|
||||
void SetUp(ClDevice *device, Context *context) {
|
||||
ProgramFixture::SetUp();
|
||||
|
||||
std::string programName("simple_spill_fill_kernel");
|
||||
CreateProgramFromBinary(
|
||||
context,
|
||||
context->getDevices(),
|
||||
programName);
|
||||
ASSERT_NE(nullptr, pProgram);
|
||||
|
||||
retVal = pProgram->build(
|
||||
pProgram->getDevices(),
|
||||
nullptr,
|
||||
false);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
kernel.reset(Kernel::create<MockKernel>(
|
||||
pProgram,
|
||||
pProgram->getKernelInfoForKernel("spill_test"),
|
||||
*device,
|
||||
&retVal));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (kernel) {
|
||||
kernel.reset(nullptr);
|
||||
}
|
||||
|
||||
ProgramFixture::TearDown();
|
||||
}
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernel;
|
||||
};
|
||||
|
||||
struct Gen12AubScratchSpaceForSpillFillFixture : public KernelAUBFixture<DefaultGrfKernelFixture> {
|
||||
void SetUp() override {
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
|
||||
KernelAUBFixture<DefaultGrfKernelFixture>::SetUp();
|
||||
|
||||
arraySize = 32;
|
||||
typeSize = sizeof(cl_int);
|
||||
|
||||
gwsSize = arraySize;
|
||||
lwsSize = 32;
|
||||
|
||||
expectedMemorySize = (arraySize * 2 + 1) * typeSize - 4;
|
||||
inMemorySize = expectedMemorySize;
|
||||
outMemorySize = expectedMemorySize;
|
||||
offsetMemorySize = 128 * arraySize;
|
||||
|
||||
srcBuffer = alignedMalloc(inMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
memset(srcBuffer, 0, inMemorySize);
|
||||
|
||||
outBuffer = alignedMalloc(outMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, outBuffer);
|
||||
memset(outBuffer, 0, outMemorySize);
|
||||
|
||||
expectedMemory = alignedMalloc(expectedMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, expectedMemory);
|
||||
memset(expectedMemory, 0, expectedMemorySize);
|
||||
|
||||
offsetBuffer = alignedMalloc(offsetMemorySize, 0x1000);
|
||||
ASSERT_NE(nullptr, expectedMemory);
|
||||
memset(offsetBuffer, 0, offsetMemorySize);
|
||||
|
||||
auto srcBufferInt = static_cast<cl_int *>(srcBuffer);
|
||||
auto expectedMemoryInt = static_cast<cl_int *>(expectedMemory);
|
||||
const int expectedVal1 = 16256;
|
||||
const int expectedVal2 = 512;
|
||||
|
||||
for (uint32_t i = 0; i < arraySize; ++i) {
|
||||
srcBufferInt[i] = 2;
|
||||
expectedMemoryInt[i * 2] = expectedVal1;
|
||||
expectedMemoryInt[i * 2 + 1] = expectedVal2;
|
||||
}
|
||||
|
||||
auto &kernelInfo = kernel->getKernelInfo();
|
||||
EXPECT_NE(0u, kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
|
||||
EXPECT_EQ(128u, kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
|
||||
kernel->setArgSvm(0, inMemorySize, srcBuffer, nullptr, 0u);
|
||||
inAllocation = createHostPtrAllocationFromSvmPtr(srcBuffer, inMemorySize);
|
||||
|
||||
kernel->setArgSvm(1, outMemorySize, outBuffer, nullptr, 0u);
|
||||
outAllocation = createHostPtrAllocationFromSvmPtr(outBuffer, outMemorySize);
|
||||
|
||||
kernel->setArgSvm(2, offsetMemorySize, offsetBuffer, nullptr, 0u);
|
||||
offsetAllocation = createHostPtrAllocationFromSvmPtr(offsetBuffer, offsetMemorySize);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
pCmdQ->flush();
|
||||
|
||||
if (expectedMemory) {
|
||||
alignedFree(expectedMemory);
|
||||
expectedMemory = nullptr;
|
||||
}
|
||||
if (srcBuffer) {
|
||||
alignedFree(srcBuffer);
|
||||
srcBuffer = nullptr;
|
||||
}
|
||||
if (outBuffer) {
|
||||
alignedFree(outBuffer);
|
||||
outBuffer = nullptr;
|
||||
}
|
||||
if (offsetBuffer) {
|
||||
alignedFree(offsetBuffer);
|
||||
offsetBuffer = nullptr;
|
||||
}
|
||||
|
||||
KernelAUBFixture<DefaultGrfKernelFixture>::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
||||
|
||||
size_t arraySize;
|
||||
size_t vectorSize;
|
||||
size_t typeSize;
|
||||
size_t gwsSize;
|
||||
size_t lwsSize;
|
||||
|
||||
void *expectedMemory = nullptr;
|
||||
size_t expectedMemorySize = 0;
|
||||
size_t inMemorySize = 0;
|
||||
size_t outMemorySize = 0;
|
||||
size_t offsetMemorySize = 0;
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
void *outBuffer = nullptr;
|
||||
void *offsetBuffer = nullptr;
|
||||
GraphicsAllocation *inAllocation;
|
||||
GraphicsAllocation *outAllocation;
|
||||
GraphicsAllocation *offsetAllocation;
|
||||
};
|
||||
|
||||
using Gen12AubScratchSpaceForSpillFillTest = Test<Gen12AubScratchSpaceForSpillFillFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, Gen12AubScratchSpaceForSpillFillTest, givenSurfaceStateScratchSpaceEnabledWhenKernelUsesScratchForSpillFillThenExpectCorrectResults) {
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {gwsSize, 1, 1};
|
||||
size_t localWorkSize[3] = {lwsSize, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
kernel.get(),
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->finish();
|
||||
|
||||
expectMemory<FamilyType>(outBuffer, expectedMemory, expectedMemorySize);
|
||||
}
|
||||
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/resource_info.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
template <bool useLocalMemory = true>
|
||||
struct CompressionXeHPAndLater : public AUBFixture,
|
||||
public ::testing::Test,
|
||||
public ::testing::WithParamInterface<uint32_t /*EngineType*/> {
|
||||
void SetUp() override {
|
||||
REQUIRE_64BIT_OR_SKIP();
|
||||
|
||||
debugRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(true);
|
||||
DebugManager.flags.RenderCompressedImagesEnabled.set(true);
|
||||
DebugManager.flags.EnableLocalMemory.set(useLocalMemory);
|
||||
DebugManager.flags.NodeOrdinal.set(GetParam());
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
|
||||
auto expectedEngine = static_cast<aub_stream::EngineType>(GetParam());
|
||||
bool engineSupported = false;
|
||||
for (auto &engine : hwHelper.getGpgpuEngineInstances(*defaultHwInfo)) {
|
||||
if (engine.first == expectedEngine) {
|
||||
engineSupported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!engineSupported) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
AUBFixture::SetUp(defaultHwInfo.get());
|
||||
auto &ftrTable = device->getHardwareInfo().featureTable;
|
||||
if ((!ftrTable.flags.ftrFlatPhysCCS) ||
|
||||
(!ftrTable.flags.ftrLocalMemory && useLocalMemory)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
context->contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
}
|
||||
void TearDown() override {
|
||||
AUBFixture::TearDown();
|
||||
}
|
||||
std::unique_ptr<DebugManagerStateRestore> debugRestorer;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
template <typename FamilyType>
|
||||
void givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect();
|
||||
template <typename FamilyType>
|
||||
void givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect();
|
||||
template <typename FamilyType>
|
||||
void givenCompressedImageWhenReadingThenResultsAreCorrect();
|
||||
};
|
||||
|
||||
template <bool testLocalMemory>
|
||||
template <typename FamilyType>
|
||||
void CompressionXeHPAndLater<testLocalMemory>::givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect() {
|
||||
const size_t bufferSize = 2048;
|
||||
uint8_t writePattern[bufferSize];
|
||||
std::fill(writePattern, writePattern + sizeof(writePattern), 1);
|
||||
|
||||
device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
auto compressedBuffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal));
|
||||
auto compressedAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
memset(compressedAllocation->getUnderlyingBuffer(), 0, bufferSize);
|
||||
EXPECT_NE(nullptr, compressedAllocation->getDefaultGmm()->gmmResourceInfo->peekHandle());
|
||||
EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled);
|
||||
if (testLocalMemory) {
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool());
|
||||
} else {
|
||||
EXPECT_EQ(MemoryPool::System4KBPages, compressedAllocation->getMemoryPool());
|
||||
}
|
||||
|
||||
auto notCompressedBuffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal));
|
||||
auto nonCompressedAllocation = notCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
nonCompressedAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
if (nonCompressedAllocation->getDefaultGmm()) {
|
||||
nonCompressedAllocation->getDefaultGmm()->isCompressionEnabled = false;
|
||||
}
|
||||
memset(nonCompressedAllocation->getUnderlyingBuffer(), 0, bufferSize);
|
||||
|
||||
pCmdQ->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueCopyBuffer(compressedBuffer.get(), notCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr);
|
||||
pCmdQ->finish();
|
||||
|
||||
expectNotEqualMemory<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation),
|
||||
writePattern, bufferSize);
|
||||
|
||||
expectMemory<FamilyType>(AUBFixture::getGpuPointer(nonCompressedAllocation),
|
||||
writePattern, bufferSize);
|
||||
}
|
||||
|
||||
template <bool testLocalMemory>
|
||||
template <typename FamilyType>
|
||||
void CompressionXeHPAndLater<testLocalMemory>::givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect() {
|
||||
const size_t imageWidth = 16;
|
||||
const size_t imageHeight = 16;
|
||||
|
||||
const size_t bufferSize = 64 * KB;
|
||||
uint8_t writePattern[bufferSize];
|
||||
std::fill(writePattern, writePattern + sizeof(writePattern), 1);
|
||||
|
||||
device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
auto compressedBuffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_COPY_HOST_PTR | CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, writePattern, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
//now create image2DFromBuffer
|
||||
|
||||
cl_image_desc imageDescriptor = {};
|
||||
imageDescriptor.mem_object = compressedBuffer.get();
|
||||
imageDescriptor.image_height = imageWidth;
|
||||
imageDescriptor.image_width = imageHeight;
|
||||
imageDescriptor.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
cl_image_format imageFormat = {};
|
||||
imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
|
||||
auto clCompressedImage = clCreateImage(context, CL_MEM_READ_WRITE, &imageFormat, &imageDescriptor, nullptr, &retVal);
|
||||
auto compressedImage = castToObject<Image>(clCompressedImage);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
const size_t perChannelDataSize = sizeof(cl_uint);
|
||||
const size_t numChannels = 4;
|
||||
const auto imageSize = imageWidth * imageHeight * perChannelDataSize * numChannels;
|
||||
cl_uint destMemory[imageSize / sizeof(cl_uint)] = {0};
|
||||
const size_t origin[] = {0, 0, 0};
|
||||
const size_t region[] = {imageWidth, imageHeight, 1};
|
||||
|
||||
retVal = pCmdQ->enqueueReadImage(
|
||||
compressedImage,
|
||||
CL_FALSE,
|
||||
origin,
|
||||
region,
|
||||
0,
|
||||
0,
|
||||
destMemory,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = pCmdQ->flush();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
expectMemory<FamilyType>(destMemory, writePattern, imageSize);
|
||||
|
||||
//make sure our objects are in in fact compressed
|
||||
auto graphicsAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
EXPECT_NE(nullptr, graphicsAllocation->getDefaultGmm());
|
||||
EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled);
|
||||
EXPECT_TRUE(compressedImage->getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled);
|
||||
|
||||
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), writePattern, bufferSize);
|
||||
|
||||
clReleaseMemObject(clCompressedImage);
|
||||
}
|
||||
|
||||
template <bool testLocalMemory>
|
||||
template <typename FamilyType>
|
||||
void CompressionXeHPAndLater<testLocalMemory>::givenCompressedImageWhenReadingThenResultsAreCorrect() {
|
||||
const size_t imageWidth = 8;
|
||||
const size_t imageHeight = 4;
|
||||
const size_t perChannelDataSize = sizeof(cl_float);
|
||||
const size_t numChannels = 4;
|
||||
const auto imageSize = imageWidth * imageHeight * perChannelDataSize * numChannels;
|
||||
const auto rowSize = imageSize / imageHeight;
|
||||
cl_float srcMemory[imageSize / sizeof(cl_float)] = {0};
|
||||
|
||||
const cl_float row[rowSize] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
|
||||
cl_float *pixel = srcMemory;
|
||||
for (uint32_t height = 0; height < imageHeight; height++) {
|
||||
memcpy(pixel, row, rowSize);
|
||||
pixel += imageWidth;
|
||||
}
|
||||
|
||||
cl_float destMemory[imageSize / sizeof(cl_float)] = {0};
|
||||
|
||||
cl_image_format imageFormat;
|
||||
cl_image_desc imageDesc;
|
||||
imageFormat.image_channel_data_type = CL_FLOAT;
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
imageDesc.image_width = imageWidth;
|
||||
imageDesc.image_height = imageHeight;
|
||||
imageDesc.image_depth = 1;
|
||||
imageDesc.image_array_size = 1;
|
||||
imageDesc.image_row_pitch = 0;
|
||||
imageDesc.image_slice_pitch = 0;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.num_samples = 0;
|
||||
imageDesc.mem_object = NULL;
|
||||
|
||||
auto allocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, imageSize}, destMemory);
|
||||
csr->makeResidentHostPtrAllocation(allocation);
|
||||
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
|
||||
|
||||
cl_mem_flags flags = CL_MEM_USE_HOST_PTR;
|
||||
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
std::unique_ptr<Image> srcImage(Image::create(
|
||||
context,
|
||||
ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
|
||||
flags,
|
||||
0,
|
||||
surfaceFormat,
|
||||
&imageDesc,
|
||||
srcMemory,
|
||||
retVal));
|
||||
ASSERT_NE(nullptr, srcImage);
|
||||
|
||||
cl_bool blockingRead = CL_FALSE;
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
const size_t origin[] = {0, 0, 0};
|
||||
const size_t region[] = {imageWidth, imageHeight, 1};
|
||||
|
||||
retVal = pCmdQ->enqueueReadImage(
|
||||
srcImage.get(),
|
||||
blockingRead,
|
||||
origin,
|
||||
region,
|
||||
0,
|
||||
0,
|
||||
destMemory,
|
||||
nullptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
allocation = csr->getTemporaryAllocations().peekHead();
|
||||
while (allocation && allocation->getUnderlyingBuffer() != destMemory) {
|
||||
allocation = allocation->next;
|
||||
}
|
||||
auto pDestGpuAddress = reinterpret_cast<void *>(allocation->getGpuAddress());
|
||||
|
||||
pCmdQ->flush();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
expectMemory<FamilyType>(pDestGpuAddress, srcMemory, imageSize);
|
||||
expectNotEqualMemory<FamilyType>(AUBFixture::getGpuPointer(srcImage->getGraphicsAllocation(rootDeviceIndex)), srcMemory, imageSize);
|
||||
}
|
||||
|
||||
struct CompressionLocalAubsSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::localMemCompressionAubsSupported;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompressionSystemAubsSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::systemMemCompressionAubsSupported;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
using CompressionLocalXeHPAndLater = CompressionXeHPAndLater<true>;
|
||||
HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect, CompressionLocalAubsSupportedMatcher) {
|
||||
givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect<FamilyType>();
|
||||
}
|
||||
HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect, CompressionLocalAubsSupportedMatcher) {
|
||||
givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect<FamilyType>();
|
||||
}
|
||||
HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedImageWhenReadingThenResultsAreCorrect, CompressionLocalAubsSupportedMatcher) {
|
||||
givenCompressedImageWhenReadingThenResultsAreCorrect<FamilyType>();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(,
|
||||
CompressionLocalXeHPAndLater,
|
||||
::testing::Values(aub_stream::ENGINE_RCS,
|
||||
aub_stream::ENGINE_CCS));
|
||||
|
||||
using CompressionSystemXeHPAndLater = CompressionXeHPAndLater<false>;
|
||||
HWTEST2_P(CompressionSystemXeHPAndLater, GENERATEONLY_givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect, CompressionSystemAubsSupportedMatcher) {
|
||||
givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect<FamilyType>();
|
||||
}
|
||||
HWTEST2_P(CompressionSystemXeHPAndLater, GENERATEONLY_givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect, CompressionSystemAubsSupportedMatcher) {
|
||||
givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect<FamilyType>();
|
||||
}
|
||||
HWTEST2_P(CompressionSystemXeHPAndLater, givenCompressedImageWhenReadingThenResultsAreCorrect, CompressionSystemAubsSupportedMatcher) {
|
||||
givenCompressedImageWhenReadingThenResultsAreCorrect<FamilyType>();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(,
|
||||
CompressionSystemXeHPAndLater,
|
||||
::testing::Values(aub_stream::ENGINE_RCS,
|
||||
aub_stream::ENGINE_CCS));
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
struct MultiTileBuffersXeHPAndLater : public MulticontextAubFixture, public ::testing::Test {
|
||||
static constexpr uint32_t numTiles = 2;
|
||||
|
||||
void SetUp() override {
|
||||
MulticontextAubFixture::SetUp(numTiles, EnabledCommandStreamers::Single, false);
|
||||
}
|
||||
void TearDown() override {
|
||||
MulticontextAubFixture::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileBuffersXeHPAndLater, givenTwoBuffersAllocatedOnDifferentTilesWhenCopiedThenDataValidates) {
|
||||
if constexpr (is64bit) {
|
||||
|
||||
constexpr size_t bufferSize = 64 * 1024u;
|
||||
|
||||
char bufferTile0Memory[bufferSize] = {};
|
||||
char bufferTile1Memory[bufferSize] = {};
|
||||
|
||||
for (auto index = 0u; index < bufferSize; index++) {
|
||||
bufferTile0Memory[index] = index % 255;
|
||||
bufferTile1Memory[index] = index % 255;
|
||||
}
|
||||
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
|
||||
MemoryProperties memoryProperties =
|
||||
ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice());
|
||||
memoryProperties.pDevice = &context->getDevice(1)->getDevice();
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), memoryProperties, flags, 0, bufferSize, bufferTile0Memory, retVal));
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
|
||||
flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
|
||||
memoryProperties.pDevice = &context->getDevice(2)->getDevice();
|
||||
auto dstBuffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), memoryProperties, flags, 0, bufferSize, bufferTile1Memory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
|
||||
auto cmdQ = commandQueues[0][0].get();
|
||||
|
||||
expectMemory<FamilyType>(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile0Memory, bufferSize, 0, 0);
|
||||
expectMemory<FamilyType>(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile1Memory, bufferSize, 0, 0);
|
||||
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
retVal = cmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(),
|
||||
0, 0,
|
||||
bufferSize, numEventsInWaitList,
|
||||
eventWaitList, event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
cmdQ->flush();
|
||||
|
||||
expectMemory<FamilyType>(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile0Memory, bufferSize, 0, 0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesDualContextTest, HEAVY_givenFourTilesAndDualContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithAtomicThenOutputDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithoutAtomicThenOutputDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesAllContextsTest, HEAVY_givenTwoTilesAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(TwoTilesAllContextsTest, GENERATEONLY_givenTwoTilesAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesDualContextTest, GENERATEONLY_givenFourTilesAndDualContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueReadBufferThenReadFromCorrectBank, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenColouredBufferWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(MultiTileBuffersXeHPAndLater, givenTwoBuffersAllocatedOnDifferentTilesWhenCopiedThenDataValidates, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, givenFourTilesWhenStaticWalkerPartitionIsEnabledForKernelThenOutputDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, givenPreWalkerSyncWhenStaticWalkerPartitionIsThenAtomicsAreIncrementedCorrectly, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, whenNoPreWalkerSyncThenAtomicsAreIncrementedCorrectly, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(SingleTileAllContextsTest, HEAVY_givenSingleTileAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(SingleTileAllContextsTest, GENERATEONLY_givenSingleTileAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE);
|
||||
Reference in New Issue
Block a user