compute-runtime/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp

737 lines
30 KiB
C++

/*
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "shared/test/common/fixtures/memory_management_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/mocks/ult_cl_device_factory.h"
#include "CL/cl.h"
#include "gtest/gtest.h"
#include <memory>
using namespace NEO;
struct KernelArgBufferTest : public Test<KernelArgBufferFixture> {
struct AllocationTypeHelper {
AllocationType allocationType;
bool compressed;
};
};
TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
Buffer *buffer = new MockBuffer();
auto val = (cl_mem)buffer;
auto pVal = &val;
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
this->pKernelInfo->argAsPtr(0).stateless);
EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg);
delete buffer;
}
struct MultiDeviceKernelArgBufferTest : public ::testing::Test {
void SetUp() override {
ClDeviceVector devicesForContext;
devicesForContext.push_back(deviceFactory.rootDevices[1]);
devicesForContext.push_back(deviceFactory.subDevices[4]);
devicesForContext.push_back(deviceFactory.subDevices[5]);
pContext = std::make_unique<MockContext>(devicesForContext);
kernelInfos.resize(3);
kernelInfos[0] = nullptr;
pKernelInfosStorage[0] = std::make_unique<MockKernelInfo>();
pKernelInfosStorage[1] = std::make_unique<MockKernelInfo>();
kernelInfos[1] = pKernelInfosStorage[0].get();
kernelInfos[2] = pKernelInfosStorage[1].get();
auto &gfxCoreHelper = pContext->getDevice(0)->getGfxCoreHelper();
for (auto i = 0u; i < 2; i++) {
pKernelInfosStorage[i]->heapInfo.pSsh = pSshLocal[i];
pKernelInfosStorage[i]->heapInfo.surfaceStateHeapSize = sizeof(pSshLocal[i]);
pKernelInfosStorage[i]->kernelDescriptor.kernelAttributes.simdSize = gfxCoreHelper.getMinimalSIMDSize();
auto crossThreadDataPointer = &pCrossThreadData[i];
memcpy_s(ptrOffset(&pCrossThreadData[i], i * sizeof(void *)), sizeof(void *), &crossThreadDataPointer, sizeof(void *));
pKernelInfosStorage[i]->crossThreadData = pCrossThreadData[i];
pKernelInfosStorage[i]->addArgBuffer(0, static_cast<NEO::CrossThreadDataOffset>(i * sizeof(void *)), sizeof(void *));
pKernelInfosStorage[i]->setCrossThreadDataSize(static_cast<uint16_t>((i + 1) * sizeof(void *)));
}
auto retVal = CL_INVALID_PROGRAM;
pBuffer = std::unique_ptr<Buffer>(Buffer::create(pContext.get(), 0u, MemoryConstants::pageSize, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, pBuffer);
pProgram = std::make_unique<MockProgram>(pContext.get(), false, pContext->getDevices());
}
void TearDown() override {
for (auto i = 0u; i < 2; i++) {
pKernelInfosStorage[i]->crossThreadData = nullptr;
}
}
UltClDeviceFactory deviceFactory{3, 2};
std::unique_ptr<MockContext> pContext;
std::unique_ptr<MockKernelInfo> pKernelInfosStorage[2];
char pCrossThreadData[2][64]{};
char pSshLocal[2][64]{};
KernelInfoContainer kernelInfos;
std::unique_ptr<Buffer> pBuffer;
std::unique_ptr<MockProgram> pProgram;
};
TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
int32_t retVal = CL_INVALID_VALUE;
auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
cl_mem val = pBuffer.get();
auto pVal = &val;
retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
auto pKernelArg = reinterpret_cast<size_t *>(pKernel->getCrossThreadData() +
kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg);
}
}
TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
char *ptr = new char[sizeof(Buffer)];
auto val = (cl_mem *)ptr;
auto pVal = &val;
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
delete[] ptr;
}
TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
auto val = (cl_mem *)nullptr;
auto pVal = &val;
this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
this->pKernelInfo->argAsPtr(0).stateless);
EXPECT_EQ(nullptr, *pKernelArg);
}
TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
int32_t retVal = CL_INVALID_VALUE;
auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto val = nullptr;
auto pVal = &val;
pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
auto pKernelArg = reinterpret_cast<void **>(pKernel->getCrossThreadData() +
kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
EXPECT_EQ(nullptr, *pKernelArg);
}
}
TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) {
auto val = (cl_mem *)nullptr;
auto pVal = &val;
auto &argAsPtr = pKernelInfo->argAsPtr(0);
argAsPtr.pointerSize = 4;
auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
*pKernelArg64bit = 0xffffffffffffffff;
this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
uint64_t expValue = 0u;
EXPECT_EQ(0u, *pKernelArg32bit);
EXPECT_NE(expValue, *pKernelArg64bit);
}
TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) {
auto pVal = nullptr;
auto &argAsPtr = pKernelInfo->argAsPtr(0);
argAsPtr.pointerSize = 4;
auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
*pKernelArg64bit = 0xffffffffffffffff;
this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
uint64_t expValue = 0u;
EXPECT_EQ(0u, *pKernelArg32bit);
EXPECT_NE(expValue, *pKernelArg64bit);
}
TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::buffer);
auto val = (cl_mem)&buffer;
auto pVal = &val;
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::sharedBuffer);
auto val = (cl_mem)&buffer;
auto pVal = &val;
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToSharedBuffer());
}
}
TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::bufferHostMemory);
auto val = (cl_mem)&buffer;
auto pVal = &val;
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
if (devInfo.svmCapabilities == 0) {
GTEST_SKIP();
}
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(AllocationType::buffer);
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
if (devInfo.svmCapabilities == 0) {
GTEST_SKIP();
}
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
KernelInfo kernelInfo;
MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory());
pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ;
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ;
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ;
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
}
TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
KernelInfo kernelInfo;
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
EXPECT_FALSE(kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory());
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory());
const auto allocationTypes = {AllocationType::buffer,
AllocationType::bufferHostMemory};
MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice);
MockGraphicsAllocation gfxAllocation;
for (const auto type : allocationTypes) {
gfxAllocation.setAllocationType(type);
kernelWithIndirectUnifiedMemoryAllocation.setUnifiedMemoryExecInfo(&gfxAllocation);
if (type == AllocationType::bufferHostMemory) {
EXPECT_TRUE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
} else {
EXPECT_FALSE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
}
kernelWithIndirectUnifiedMemoryAllocation.clearUnifiedMemoryExecInfo();
}
}
TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
KernelInfo kernelInfo;
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
MockKernel mockKernel(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed);
EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
auto svmAllocationsManager = mockKernel.getContext().getSVMAllocsManager();
if (svmAllocationsManager == nullptr) {
return;
}
mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true;
EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
deviceProperties.device = &pClDevice->getDevice();
auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties);
EXPECT_TRUE(mockKernel.hasIndirectStatelessAccessToHostMemory());
svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
}
TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) {
for (auto auxTranslationRequired : {false, true}) {
pKernel->setAuxTranslationRequired(auxTranslationRequired);
EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired());
}
}
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::sharedBuffer);
auto val = (cl_mem)&buffer;
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToSharedBuffer());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::bufferHostMemory);
auto val = (cl_mem)&buffer;
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
MockBuffer buffer;
auto val = (cl_mem)&buffer;
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
if (devInfo.svmCapabilities == 0) {
GTEST_SKIP();
}
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
if (devInfo.svmCapabilities == 0) {
GTEST_SKIP();
}
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
MockGraphicsAllocation gfxAllocation;
gfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
const auto allocationTypes = {AllocationType::buffer,
AllocationType::bufferHostMemory};
MockGraphicsAllocation gfxAllocation;
for (const auto type : allocationTypes) {
gfxAllocation.setAllocationType(type);
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
if (type == AllocationType::bufferHostMemory) {
EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
} else {
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
}
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
if (type == AllocationType::bufferHostMemory) {
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
} else {
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
pKernel->clearUnifiedMemoryExecInfo();
pKernel->setAuxTranslationRequired(false);
}
}
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{AllocationType::buffer, false},
{AllocationType::buffer, true},
{AllocationType::bufferHostMemory, false},
{AllocationType::svmGpu, true}}};
GmmRequirements gmmRequirements{};
gmmRequirements.allowLargePages = true;
gmmRequirements.preferCompressed = false;
auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmHelper(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, StorageInfo{}, gmmRequirements);
MockGraphicsAllocation gfxAllocation;
gfxAllocation.setDefaultGmm(gmm.get());
for (const auto type : allocationTypes) {
gfxAllocation.setAllocationType(type.allocationType);
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
gmm->isCompressionEnabled = type.compressed;
auto kernelObjsForAuxTranslation = pKernel->fillWithKernelObjsForAuxTranslation();
if (type.compressed) {
EXPECT_EQ(1u, kernelObjsForAuxTranslation->size());
auto kernelObj = *kernelObjsForAuxTranslation->find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
kernelObjsForAuxTranslation->erase(kernelObj);
} else {
EXPECT_EQ(0u, kernelObjsForAuxTranslation->size());
}
pKernel->clearUnifiedMemoryExecInfo();
pKernel->setAuxTranslationRequired(false);
}
}
TEST_F(KernelArgBufferTest, givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
if (pContext->getSVMAllocsManager() == nullptr) {
return;
}
DebugManagerStateRestore debugRestorer;
debugManager.flags.EnableStatelessCompression.set(1);
constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{AllocationType::buffer, false},
{AllocationType::buffer, true},
{AllocationType::bufferHostMemory, false},
{AllocationType::svmGpu, true}}};
GmmRequirements gmmRequirements{};
gmmRequirements.allowLargePages = true;
gmmRequirements.preferCompressed = false;
auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmHelper(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, StorageInfo{}, gmmRequirements);
MockGraphicsAllocation gfxAllocation;
gfxAllocation.setDefaultGmm(gmm.get());
SvmAllocationData allocData(0);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.device = &pClDevice->getDevice();
for (const auto type : allocationTypes) {
gfxAllocation.setAllocationType(type.allocationType);
gmm->isCompressionEnabled = type.compressed;
pContext->getSVMAllocsManager()->insertSVMAlloc(allocData);
auto kernelObjsForAuxTranslation = pKernel->fillWithKernelObjsForAuxTranslation();
if (type.compressed) {
EXPECT_EQ(1u, kernelObjsForAuxTranslation->size());
auto kernelObj = *kernelObjsForAuxTranslation->find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
kernelObjsForAuxTranslation->erase(kernelObj);
} else {
EXPECT_EQ(0u, kernelObjsForAuxTranslation->size());
}
pContext->getSVMAllocsManager()->removeSVMAlloc(allocData);
}
}
class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
public:
void setUp() {
debugManager.flags.UseBindlessMode.set(1);
KernelArgBufferFixture::setUp();
pBuffer = new MockBuffer();
ASSERT_NE(nullptr, pBuffer);
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
pKernelInfo->argAsPtr(0).stateless = undefined<CrossThreadDataOffset>;
pKernelInfo->argAsPtr(0).bindful = undefined<SurfaceStateHeapOffset>;
}
void tearDown() {
delete pBuffer;
KernelArgBufferFixture::tearDown();
}
DebugManagerStateRestore restorer;
MockBuffer *pBuffer;
const CrossThreadDataOffset bindlessOffset = 0x10;
};
typedef Test<KernelArgBufferFixtureBindless> KernelArgBufferTestBindless;
HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenSettingKernelArgThenOffsetInCrossThreadDataIsNotPatched) {
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset));
*patchLocation = 0xdead;
cl_mem memObj = pBuffer;
retVal = pKernel->setArg(0, sizeof(memObj), &memObj);
EXPECT_EQ(0xdeadu, *patchLocation);
}
HWTEST_F(KernelArgBufferTestBindless, givenBindlessBuffersWhenPatchBindlessOffsetCalledThenBindlessOffsetToSurfaceStateWrittenInCrossThreadData) {
pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pClDevice->getMemoryManager(),
pClDevice->getNumGenericSubDevices() > 1,
pClDevice->getRootDeviceIndex(),
pClDevice->getDeviceBitfield());
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset));
*patchLocation = 0xdead;
pKernel->patchBindlessSurfaceState(pBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()), bindlessOffset);
EXPECT_NE(0xdeadu, *patchLocation);
}
TEST_F(KernelArgBufferTest, givenBufferAsHostMemoryWhenSettingKernelArgThenKernelUsesSystemMemory) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::bufferHostMemory);
auto memVal = (cl_mem)&buffer;
auto val = &memVal;
EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory());
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory());
}
TEST_F(KernelArgBufferTest, givenBufferAsDeviceMemoryWhenSettingKernelArgThenKernelNotUsesSystemMemory) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::buffer);
auto memVal = (cl_mem)&buffer;
auto val = &memVal;
EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory());
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory());
}
TEST_F(KernelArgBufferTest, givenBufferAsDeviceMemoryAndKernelIsAlreadySetToUseSystemWhenSettingKernelArgThenKernelUsesSystemMemory) {
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::buffer);
auto memVal = (cl_mem)&buffer;
auto val = &memVal;
EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory());
pKernel->anyKernelArgumentUsingSystemMemory = true;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory());
}