Fix mutex order for event task and move args to gpu

This commit fixes problem with untransfered shared usm memory to gpu
when there is submit to gpu trigerred by user event. Also there is a fix
for dead lock problem caused by mixed orders of locking mutexes in csr
and in direct submission controller.

Related-To: NEO-6762

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2022-05-19 10:06:08 +00:00 committed by Compute-Runtime-Automation
parent d308df254c
commit 6ab6e1abff
18 changed files with 427 additions and 139 deletions

View File

@ -88,7 +88,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
}
if (!deferCmdQBcsInitialization) {
this->initializeBcsEngine(internalUsage);
this->constructBcsEngine(internalUsage);
}
}
@ -274,7 +274,7 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
return *selectedCsr;
}
void CommandQueue::initializeBcsEngine(bool internalUsage) {
void CommandQueue::constructBcsEngine(bool internalUsage) {
if (bcsAllowed && !bcsInitialized) {
auto &hwInfo = device->getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
@ -293,6 +293,10 @@ void CommandQueue::initializeBcsEngine(bool internalUsage) {
}
}
void CommandQueue::initializeBcsEngine(bool internalUsage) {
constructBcsEngine(internalUsage);
}
Device &CommandQueue::getDevice() const noexcept {
return device->getDevice();
}

View File

@ -228,7 +228,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType);
CommandStreamReceiver *getBcsForAuxTranslation();
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args);
void initializeBcsEngine(bool internalUsage);
void constructBcsEngine(bool internalUsage);
MOCKABLE_VIRTUAL void initializeBcsEngine(bool internalUsage);
Device &getDevice() const noexcept;
ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; }

View File

@ -570,6 +570,7 @@ void Event::transitionExecutionStatus(int32_t newExecutionStatus) const {
void Event::submitCommand(bool abortTasks) {
std::unique_ptr<Command> cmdToProcess(cmdToSubmit.exchange(nullptr));
if (cmdToProcess.get() != nullptr) {
getCommandQueue()->initializeBcsEngine(getCommandQueue()->isSpecial());
auto lockCSR = getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
if (this->isProfilingEnabled()) {

View File

@ -1280,8 +1280,14 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
if (kernelArguments[argIndex].object) {
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
bool needsMigration = false;
auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager();
if (pageFaultManager &&
this->isUnifiedMemorySyncRequired) {
needsMigration = true;
}
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
dst.push_back(new GeneralSurface(pSVMAlloc));
dst.push_back(new GeneralSurface(pSVMAlloc, needsMigration));
} else if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
auto memObj = castToObject<MemObj>(clMem);

View File

@ -1830,6 +1830,50 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke
EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired);
}
class MockCommandQueueInitializeBcs : public MockCommandQueue {
public:
MockCommandQueueInitializeBcs() : MockCommandQueue(nullptr, nullptr, 0, false) {}
MockCommandQueueInitializeBcs(Context &context) : MockCommandQueueInitializeBcs(&context, context.getDevice(0), nullptr, false) {}
MockCommandQueueInitializeBcs(Context *context, ClDevice *device, const cl_queue_properties *props, bool internalUsage)
: MockCommandQueue(context, device, props, internalUsage) {
}
void initializeBcsEngine(bool internalUsage) override {
if (initializeBcsEngineCalledTimes == 0) {
auto th = std::thread([&]() {
isCsrLocked = reinterpret_cast<MockCommandStreamReceiver *>(&this->getGpgpuCommandStreamReceiver())->isOwnershipMutexLocked();
});
th.join();
}
initializeBcsEngineCalledTimes++;
MockCommandQueue::initializeBcsEngine(internalUsage);
}
int initializeBcsEngineCalledTimes = 0;
bool isCsrLocked = false;
};
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenInitializeBcsCalledThenCrsIsNotLocked) {
MockContext mockContext;
auto csr = new MockCommandStreamReceiver(*pDevice->executionEnvironment, 0, pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(csr);
uint32_t numGrfRequired = 666u;
auto pCmdQ = std::make_unique<MockCommandQueueInitializeBcs>(&mockContext, pClDevice, nullptr, false);
auto mockProgram = std::make_unique<MockProgram>(&mockContext, false, toClDeviceVector(*pClDevice));
auto pKernel = MockKernel::create(*pDevice, mockProgram.get(), numGrfRequired);
auto kernelInfos = MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), rootDeviceIndex);
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(pKernel), kernelInfos);
auto event = std::make_unique<MockEvent<Event>>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0);
auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}));
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
std::vector<Surface *> surfaces;
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1));
event->submitCommand(false);
EXPECT_FALSE(pCmdQ->isCsrLocked);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCallingAddPipeControlThenDcFlushIsEnabled) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);

View File

@ -145,7 +145,7 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou
new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockAlignedMallocManagerDevice>(&hwInfo)}};
MockContext context{device.get()};
MockCommandQueue queue{context};
queue.initializeBcsEngine(false);
queue.constructBcsEngine(false);
queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19);
Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0};

View File

@ -1328,6 +1328,57 @@ HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenM
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
}
class MockGeneralSurface : public GeneralSurface {
public:
using GeneralSurface::needsMigration;
};
HWTEST_F(KernelResidencyTest, givenSvmArgWhenKernelDoesNotRequireUnifiedMemorySyncThenSurfaceDoesNotNeedMigration) {
auto mockPageFaultManager = new MockPageFaultManager();
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true);
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)};
mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false);
std::vector<NEO::Surface *> residencySurfaces;
mockKernel.mockKernel->getResidency(residencySurfaces);
EXPECT_FALSE(reinterpret_cast<MockGeneralSurface *>(residencySurfaces[0])->needsMigration);
for (auto surface : residencySurfaces) {
delete surface;
}
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
}
HWTEST_F(KernelResidencyTest, givenSvmArgWhenKernelRequireUnifiedMemorySyncThenSurfaceNeedMigration) {
auto mockPageFaultManager = new MockPageFaultManager();
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true);
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)};
mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true);
std::vector<NEO::Surface *> residencySurfaces;
mockKernel.mockKernel->getResidency(residencySurfaces);
EXPECT_TRUE(reinterpret_cast<MockGeneralSurface *>(residencySurfaces[0])->needsMigration);
for (auto surface : residencySurfaces) {
delete surface;
}
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
}
HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsDecommited) {
auto mockPageFaultManager = new MockPageFaultManager();
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
@ -1836,6 +1887,60 @@ HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTun
EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics);
}
HWTEST_F(KernelResidencyTest, givenSimpleKernelWhenExecEnvDoesNotHavePageFaultManagerThenPageFaultDoesNotMoveAllocation) {
auto mockPageFaultManager = std::make_unique<MockPageFaultManager>();
MockKernelWithInternals mockKernel(*this->pClDevice);
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
mockPageFaultManager->insertAllocation(reinterpret_cast<void *>(unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
Kernel::SimpleKernelArgInfo kernelArgInfo;
kernelArgInfo.object = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
kernelArgInfo.type = Kernel::kernelArgType::SVM_ALLOC_OBJ;
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
kernelArguments.resize(1);
kernelArguments[0] = kernelArgInfo;
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
mockKernel.mockKernel->setKernelArguments(kernelArguments);
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0);
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset();
}
HWTEST_F(KernelResidencyTest, givenSimpleKernelWhenIsUnifiedMemorySyncRequiredIsFalseThenPageFaultDoesNotMoveAllocation) {
auto mockPageFaultManager = new MockPageFaultManager();
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
MockKernelWithInternals mockKernel(*this->pClDevice);
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
mockPageFaultManager->insertAllocation(reinterpret_cast<void *>(unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
Kernel::SimpleKernelArgInfo kernelArgInfo;
kernelArgInfo.object = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
kernelArgInfo.type = Kernel::kernelArgType::SVM_ALLOC_OBJ;
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
kernelArguments.resize(1);
kernelArguments[0] = kernelArgInfo;
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
mockKernel.mockKernel->setKernelArguments(kernelArguments);
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->isUnifiedMemorySyncRequired = false;
EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0);
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset();
}
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;

View File

@ -10,8 +10,8 @@ set(IGDRCL_SRCS_tests_memory_manager
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_preferred_pool_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/migraton_controller_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_token_tests.cpp
)

View File

@ -0,0 +1,82 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/memory_manager/mem_obj_surface.h"
#include "opencl/source/platform/platform.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "gtest/gtest.h"
#include <type_traits>
using namespace NEO;
namespace createSurface {
Surface *create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) {
return new MemObjSurface(buffer);
}
} // namespace createSurface
class SurfaceTest : public ::testing::Test {
public:
char data[10];
MockBuffer buffer;
MockGraphicsAllocation gfxAllocation{nullptr, 0};
};
HWTEST_F(SurfaceTest, GivenSurfaceWhenInterfaceIsUsedThenSurfaceBehavesCorrectly) {
int32_t execStamp;
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
executionEnvironment->initializeMemoryManager();
DeviceBitfield deviceBitfield(1);
auto csr = std::make_unique<MockCsr<FamilyType>>(execStamp, *executionEnvironment, 0, deviceBitfield);
auto hwInfo = *defaultHwInfo;
auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0];
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo)));
csr->setupContext(*osContext);
Surface *surface = createSurface::create(this->data,
&this->buffer,
&this->gfxAllocation);
ASSERT_NE(nullptr, surface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
Surface *duplicatedSurface = surface->duplicate();
ASSERT_NE(nullptr, duplicatedSurface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
surface->makeResident(*csr);
EXPECT_EQ(1u, csr->madeResidentGfxAllocations.size());
delete duplicatedSurface;
delete surface;
}
class CoherentMemObjSurface : public SurfaceTest {
public:
CoherentMemObjSurface() {
this->buffer.getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
}
};
TEST_F(CoherentMemObjSurface, GivenCoherentMemObjWhenCreatingSurfaceFromMemObjThenSurfaceIsCoherent) {
Surface *surface = createSurface::create(this->data,
&this->buffer,
&this->gfxAllocation);
EXPECT_TRUE(surface->IsCoherent);
delete surface;
}

View File

@ -1,131 +0,0 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/memory_manager/mem_obj_surface.h"
#include "opencl/source/platform/platform.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "gtest/gtest.h"
#include <type_traits>
using namespace NEO;
typedef ::testing::Types<NullSurface, HostPtrSurface, MemObjSurface, GeneralSurface> SurfaceTypes;
namespace createSurface {
template <typename surfType>
Surface *create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation);
template <>
Surface *create<NullSurface>(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) {
return new NullSurface;
}
template <>
Surface *create<HostPtrSurface>(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) {
return new HostPtrSurface(data, 10, gfxAllocation);
}
template <>
Surface *create<MemObjSurface>(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) {
return new MemObjSurface(buffer);
}
template <>
Surface *create<GeneralSurface>(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) {
return new GeneralSurface(gfxAllocation);
}
} // namespace createSurface
template <typename T>
class SurfaceTest : public ::testing::Test {
public:
char data[10];
MockBuffer buffer;
MockGraphicsAllocation gfxAllocation{nullptr, 0};
};
TYPED_TEST_CASE(SurfaceTest, SurfaceTypes);
HWTEST_TYPED_TEST(SurfaceTest, GivenSurfaceWhenInterfaceIsUsedThenSurfaceBehavesCorrectly) {
int32_t execStamp;
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
executionEnvironment->initializeMemoryManager();
DeviceBitfield deviceBitfield(1);
auto csr = std::make_unique<MockCsr<FamilyType>>(execStamp, *executionEnvironment, 0, deviceBitfield);
auto hwInfo = *defaultHwInfo;
auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0];
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo)));
csr->setupContext(*osContext);
Surface *surface = createSurface::create<TypeParam>(this->data,
&this->buffer,
&this->gfxAllocation);
ASSERT_NE(nullptr, surface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
Surface *duplicatedSurface = surface->duplicate();
ASSERT_NE(nullptr, duplicatedSurface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
surface->makeResident(*csr);
if (std::is_same<TypeParam, HostPtrSurface>::value ||
std::is_same<TypeParam, MemObjSurface>::value ||
std::is_same<TypeParam, GeneralSurface>::value) {
EXPECT_EQ(1u, csr->madeResidentGfxAllocations.size());
}
delete duplicatedSurface;
delete surface;
}
class CoherentMemObjSurface : public SurfaceTest<MemObjSurface> {
public:
CoherentMemObjSurface() {
this->buffer.getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
}
};
TEST_F(CoherentMemObjSurface, GivenCoherentMemObjWhenCreatingSurfaceFromMemObjThenSurfaceIsCoherent) {
Surface *surface = createSurface::create<MemObjSurface>(this->data,
&this->buffer,
&this->gfxAllocation);
EXPECT_TRUE(surface->IsCoherent);
delete surface;
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithoutSpecifyingPtrCopyAllowanceThenPtrCopyIsNotAllowed) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory));
EXPECT_FALSE(surface.peekIsPtrCopyAllowed());
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyAllowedThenQueryReturnsTrue) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory), true);
EXPECT_TRUE(surface.peekIsPtrCopyAllowed());
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyNotAllowedThenQueryReturnsFalse) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory), false);
EXPECT_FALSE(surface.peekIsPtrCopyAllowed());
}

View File

@ -258,7 +258,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
MockCommandQueueHw(Context *context,
ClDevice *device,
cl_queue_properties *properties) : BaseClass(context, device, properties, false) {
this->initializeBcsEngine(false);
this->constructBcsEngine(false);
}
void clearBcsEngines() {

View File

@ -106,6 +106,7 @@ class MockKernel : public Kernel {
using Kernel::hasDirectStatelessAccessToHostMemory;
using Kernel::hasDirectStatelessAccessToSharedBuffer;
using Kernel::hasIndirectStatelessAccessToHostMemory;
using Kernel::isUnifiedMemorySyncRequired;
using Kernel::kernelArgHandlers;
using Kernel::kernelArgRequiresCacheFlush;
using Kernel::kernelArguments;
@ -250,6 +251,7 @@ class MockKernel : public Kernel {
uint32_t makeResidentCalls = 0;
uint32_t getResidencyCalls = 0;
uint32_t setArgSvmAllocCalls = 0;
uint32_t moveArgsToGpuDomainCalls = 0;
bool canKernelTransformImages = true;
bool isPatchedOverride = true;

View File

@ -9,6 +9,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
namespace NEO {
@ -99,9 +100,15 @@ class GeneralSurface : public Surface {
GeneralSurface(GraphicsAllocation *gfxAlloc) : Surface(gfxAlloc->isCoherent()) {
gfxAllocation = gfxAlloc;
};
GeneralSurface(GraphicsAllocation *gfxAlloc, bool needsMigration) : GeneralSurface(gfxAlloc) {
this->needsMigration = needsMigration;
}
~GeneralSurface() override = default;
void makeResident(CommandStreamReceiver &csr) override {
if (needsMigration) {
csr.getMemoryManager()->getPageFaultManager()->moveAllocationToGpuDomain(reinterpret_cast<void *>(gfxAllocation->getGpuAddress()));
}
csr.makeResident(*gfxAllocation);
};
Surface *duplicate() override { return new GeneralSurface(gfxAllocation); };
@ -111,6 +118,7 @@ class GeneralSurface : public Surface {
}
protected:
bool needsMigration = false;
GraphicsAllocation *gfxAllocation;
};
} // namespace NEO

View File

@ -104,6 +104,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
using BaseClass::CommandStreamReceiver::newResources;
using BaseClass::CommandStreamReceiver::osContext;
using BaseClass::CommandStreamReceiver::ownershipMutex;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::postSyncWriteOffset;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
@ -124,6 +125,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
using BaseClass::CommandStreamReceiver::workPartitionAllocation;
;
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,

View File

@ -39,6 +39,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
using CommandStreamReceiver::latestSentTaskCount;
using CommandStreamReceiver::newResources;
using CommandStreamReceiver::osContext;
using CommandStreamReceiver::ownershipMutex;
using CommandStreamReceiver::postSyncWriteOffset;
using CommandStreamReceiver::preemptionAllocation;
using CommandStreamReceiver::tagAddress;
@ -159,6 +160,13 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
return status;
}
void postInitFlagsSetup() override {}
bool isOwnershipMutexLocked() {
bool isLocked = !this->ownershipMutex.try_lock();
if (!isLocked) {
this->ownershipMutex.unlock();
}
return isLocked;
}
static constexpr size_t tagSize = 256;
static volatile uint32_t mockTagAddress[tagSize];

View File

@ -24,6 +24,7 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/physical_address_allocator_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/special_heap_pool_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/storage_info_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_cache_tests.cpp
)

View File

@ -0,0 +1,150 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
#include "gtest/gtest.h"
#include <type_traits>
using namespace NEO;
typedef ::testing::Types<NullSurface, HostPtrSurface, GeneralSurface> SurfaceTypes;
namespace createSurface {
template <typename surfType>
Surface *create(char *data, GraphicsAllocation *gfxAllocation);
template <>
Surface *create<NullSurface>(char *data, GraphicsAllocation *gfxAllocation) {
return new NullSurface;
}
template <>
Surface *create<HostPtrSurface>(char *data, GraphicsAllocation *gfxAllocation) {
return new HostPtrSurface(data, 10, gfxAllocation);
}
template <>
Surface *create<GeneralSurface>(char *data, GraphicsAllocation *gfxAllocation) {
return new GeneralSurface(gfxAllocation);
}
} // namespace createSurface
template <typename T>
class SurfaceTest : public ::testing::Test {
public:
char data[10];
MockGraphicsAllocation gfxAllocation;
};
TYPED_TEST_CASE(SurfaceTest, SurfaceTypes);
HWTEST_TYPED_TEST(SurfaceTest, GivenSurfaceWhenInterfaceIsUsedThenSurfaceBehavesCorrectly) {
int32_t execStamp;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto memoryManager = std::make_unique<MockMemoryManager>();
executionEnvironment->memoryManager.reset(memoryManager.release());
DeviceBitfield deviceBitfield(1);
auto csr = std::make_unique<MockCsr<FamilyType>>(execStamp, *executionEnvironment, 0, deviceBitfield);
auto hwInfo = *defaultHwInfo;
auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0];
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo)));
csr->setupContext(*osContext);
Surface *surface = createSurface::create<TypeParam>(this->data,
&this->gfxAllocation);
ASSERT_NE(nullptr, surface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
Surface *duplicatedSurface = surface->duplicate();
ASSERT_NE(nullptr, duplicatedSurface); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
surface->makeResident(*csr);
if (std::is_same<TypeParam, HostPtrSurface>::value ||
std::is_same<TypeParam, GeneralSurface>::value) {
EXPECT_EQ(1u, csr->madeResidentGfxAllocations.size());
}
delete duplicatedSurface;
delete surface;
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithoutSpecifyingPtrCopyAllowanceThenPtrCopyIsNotAllowed) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory));
EXPECT_FALSE(surface.peekIsPtrCopyAllowed());
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyAllowedThenQueryReturnsTrue) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory), true);
EXPECT_TRUE(surface.peekIsPtrCopyAllowed());
}
TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyNotAllowedThenQueryReturnsFalse) {
char memory[2] = {};
HostPtrSurface surface(memory, sizeof(memory), false);
EXPECT_FALSE(surface.peekIsPtrCopyAllowed());
}
using GeneralSurfaceTest = ::testing::Test;
HWTEST_F(GeneralSurfaceTest, givenGeneralSurfaceWhenMigrationNeededThenMoveToGpuDomainCalled) {
int32_t execStamp;
MockGraphicsAllocation allocation;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto memoryManager = std::make_unique<MockMemoryManager>();
executionEnvironment->memoryManager.reset(memoryManager.release());
auto pageFaultManager = std::make_unique<MockPageFaultManager>();
auto pageFaultManagerPtr = pageFaultManager.get();
static_cast<MockMemoryManager *>(executionEnvironment->memoryManager.get())->pageFaultManager.reset(pageFaultManager.release());
DeviceBitfield deviceBitfield(1);
auto csr = std::make_unique<MockCsr<FamilyType>>(execStamp, *executionEnvironment, 0, deviceBitfield);
auto hwInfo = *defaultHwInfo;
auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0];
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo)));
csr->setupContext(*osContext);
auto surface = std::make_unique<GeneralSurface>(&allocation, true);
surface->makeResident(*csr);
EXPECT_EQ(pageFaultManagerPtr->moveAllocationToGpuDomainCalled, 1);
}
HWTEST_F(GeneralSurfaceTest, givenGeneralSurfaceWhenMigrationNotNeededThenMoveToGpuDomainNotCalled) {
int32_t execStamp;
MockGraphicsAllocation allocation;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto memoryManager = std::make_unique<MockMemoryManager>();
executionEnvironment->memoryManager.reset(memoryManager.release());
auto pageFaultManager = std::make_unique<MockPageFaultManager>();
auto pageFaultManagerPtr = pageFaultManager.get();
static_cast<MockMemoryManager *>(executionEnvironment->memoryManager.get())->pageFaultManager.reset(pageFaultManager.release());
DeviceBitfield deviceBitfield(1);
auto csr = std::make_unique<MockCsr<FamilyType>>(execStamp, *executionEnvironment, 0, deviceBitfield);
auto hwInfo = *defaultHwInfo;
auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0];
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo)));
csr->setupContext(*osContext);
auto surface = std::make_unique<GeneralSurface>(&allocation, false);
surface->makeResident(*csr);
EXPECT_EQ(pageFaultManagerPtr->moveAllocationToGpuDomainCalled, 0);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -62,11 +62,16 @@ class MockPageFaultManager : public PageFaultManager {
void *getAubAndTbxHandlerAddress() {
return reinterpret_cast<void *>(PageFaultManager::handleGpuDomainTransferForAubAndTbx);
}
void moveAllocationToGpuDomain(void *ptr) override {
moveAllocationToGpuDomainCalled++;
PageFaultManager::moveAllocationToGpuDomain(ptr);
}
int allowMemoryAccessCalled = 0;
int protectMemoryCalled = 0;
int transferToCpuCalled = 0;
int transferToGpuCalled = 0;
int moveAllocationToGpuDomainCalled = 0;
void *transferToCpuAddress = nullptr;
void *transferToGpuAddress = nullptr;
void *allowedMemoryAccessAddress = nullptr;