Store indirect residency at command queue level

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>

Related-To: NEO-7211
This commit is contained in:
Maciej Plewka 2022-09-14 09:39:13 +00:00 committed by Compute-Runtime-Automation
parent fc9352cfcb
commit ffad5c6c09
18 changed files with 336 additions and 114 deletions

View File

@ -163,28 +163,6 @@ void CommandList::migrateSharedAllocations() {
}
}
void CommandList::handleIndirectAllocationResidency() {
bool indirectAllocationsAllowed = this->hasIndirectAllocationsAllowed();
NEO::Device *neoDevice = this->device->getNEODevice();
if (indirectAllocationsAllowed) {
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
}
if (submitAsPack) {
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
} else {
UnifiedMemoryControls unifiedMemoryControls = this->getUnifiedMemoryControls();
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
this->commandContainer.getResidencyContainer(),
unifiedMemoryControls.generateMask());
}
}
}
bool CommandList::setupTimestampEventForMultiTile(Event *signalEvent) {
if (this->partitionCount > 1 &&
signalEvent) {

View File

@ -217,8 +217,6 @@ struct CommandList : _ze_command_list_handle_t {
return indirectAllocationsAllowed;
}
void handleIndirectAllocationResidency();
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
std::vector<Kernel *> &getPrintfKernelContainer() {

View File

@ -91,8 +91,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
size_t commandStreamStart = this->cmdListCurrentStartOffset;
auto lockCSR = this->csr->obtainUniqueOwnership();
this->handleIndirectAllocationResidency();
std::unique_lock<std::recursive_mutex> lockForIndirect;
if (this->hasIndirectAllocationsAllowed()) {
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect);
}
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());

View File

@ -256,4 +256,21 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman
return waitStatus;
}
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::recursive_mutex> &lockForIndirect) {
NEO::Device *neoDevice = this->device->getNEODevice();
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
}
if (submitAsPack) {
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
} else {
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
this->csr->getResidencyAllocations(),
unifiedMemoryControls.generateMask());
}
}
} // namespace L0

View File

@ -12,6 +12,7 @@
#include <level_zero/ze_api.h>
#include <atomic>
#include <mutex>
struct _ze_command_queue_handle_t {};
@ -19,6 +20,8 @@ namespace NEO {
class CommandStreamReceiver;
}
struct UnifiedMemoryControls;
namespace L0 {
struct Device;
@ -49,6 +52,8 @@ struct CommandQueue : _ze_command_queue_handle_t {
return static_cast<CommandQueue *>(handle);
}
virtual void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::recursive_mutex> &lockForIndirect) = 0;
ze_command_queue_handle_t toHandle() { return this; }
bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; }

View File

@ -9,6 +9,7 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
@ -89,6 +90,8 @@ struct CommandQueueHw : public CommandQueueImp {
bool isMigrationRequested{};
bool isDirectSubmissionEnabled{};
bool isDispatchTaskCountPostSyncRequired{};
bool hasIndirectAccess{};
UnifiedMemoryControls unifiedMemoryControls;
};
ze_result_t validateCommandListsParams(CommandListExecutionContext &ctx,

View File

@ -93,7 +93,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
this->device->activateMetricGroups();
if (this->isCopyOnlyCommandQueue) {
ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence);
} else {
@ -117,9 +116,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->setupCmdListsAndContextParams(ctx, phCommandLists, numCommandLists, hFence);
ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled();
std::unique_lock<std::recursive_mutex> lockForIndirect;
if (ctx.hasIndirectAccess) {
handleIndirectAllocationResidency(ctx.unifiedMemoryControls, lockForIndirect);
}
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
this->setFrontEndStateProperties(ctx);
@ -443,6 +446,12 @@ CommandQueueHw<gfxCoreFamily>::CommandListExecutionContext::CommandListExecution
if (commandList->isMemoryPrefetchRequested()) {
this->performMemoryPrefetch = true;
}
hasIndirectAccess |= commandList->hasIndirectAllocationsAllowed();
if (commandList->hasIndirectAllocationsAllowed()) {
unifiedMemoryControls.indirectDeviceAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed;
unifiedMemoryControls.indirectHostAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectHostAllocationsAllowed;
unifiedMemoryControls.indirectSharedAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectSharedAllocationsAllowed;
}
}
this->isDevicePreemptionModeMidThread = device->getDevicePreemptionMode() == NEO::PreemptionMode::MidThread;
this->stateSipRequired = (this->isPreemptionModeInitial && this->isDevicePreemptionModeMidThread) ||
@ -522,10 +531,8 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
auto commandList = CommandList::fromHandle(phCommandLists[i]);
commandList->csr = this->csr;
commandList->handleIndirectAllocationResidency();
ctx.containsAnyRegularCmdList |= commandList->cmdListType == CommandList::CommandListType::TYPE_REGULAR;
ctx.spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
if (!isCopyOnlyCommandQueue) {
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());

View File

@ -17,6 +17,8 @@
#include <vector>
struct UnifiedMemoryControls;
namespace NEO {
class LinearStream;
class GraphicsAllocation;
@ -81,6 +83,7 @@ struct CommandQueueImp : public CommandQueue {
MOCKABLE_VIRTUAL NEO::WaitStatus reserveLinearStreamSize(size_t size);
ze_command_queue_mode_t getSynchronousMode() const;
virtual bool getPreemptionCmdProgramming() = 0;
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::recursive_mutex> &lockForIndirect) override;
protected:
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,

View File

@ -132,6 +132,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
using CommandListCoreFamilyImmediate<gfxCoreFamily>::requiredStreamState;
using CommandListCoreFamilyImmediate<gfxCoreFamily>::containsAnyKernel;
using CommandListCoreFamilyImmediate<gfxCoreFamily>::indirectAllocationsAllowed;
};
template <>

View File

@ -19,6 +19,7 @@ namespace ult {
template <>
struct WhiteBox<::L0::DriverHandle> : public ::L0::DriverHandleImp {
using ::L0::DriverHandleImp::enableProgramDebugging;
using ::L0::DriverHandleImp::svmAllocsManager;
};
using DriverHandle = WhiteBox<::L0::DriverHandle>;

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -647,5 +648,52 @@ TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) {
EXPECT_EQ(nullptr, cmdList.asMutable());
}
class MockCommandQueueIndirectAccess : public Mock<CommandQueue> {
public:
MockCommandQueueIndirectAccess(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : Mock(device, csr, desc) {}
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::recursive_mutex> &lockForIndirect) override {
handleIndirectAllocationResidencyCalledTimes++;
}
uint32_t handleIndirectAllocationResidencyCalledTimes = 0;
};
HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessCalled, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
MockCommandQueueIndirectAccess mockCommandQueue(device, &mockCommandStreamReceiver, &desc);
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
commandList->cmdQImmediate = oldCommandQueue;
}
HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessNotCalled, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
MockCommandQueueIndirectAccess mockCommandQueue(device, &mockCommandStreamReceiver, &desc);
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = false;
commandListImmediate.executeCommandListImmediateWithFlushTask(false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
commandList->cmdQImmediate = oldCommandQueue;
}
} // namespace ult
} // namespace L0

View File

@ -19,6 +19,7 @@
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
#include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h"
namespace L0 {
@ -1815,5 +1816,126 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingMultiReturnPointFl
commandQueue->destroy();
}
struct SVMAllocsManagerMock : public NEO::SVMAllocsManager {
using SVMAllocsManager::mtx;
SVMAllocsManagerMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {}
void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) override {
makeIndirectAllocationsResidentCalledTimes++;
}
void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
ResidencyContainer &residencyContainer,
uint32_t requestedTypesMask) override {
addInternalAllocationsToResidencyContainerCalledTimes++;
passedContainer = residencyContainer.data();
}
uint32_t makeIndirectAllocationsResidentCalledTimes = 0;
uint32_t addInternalAllocationsToResidencyContainerCalledTimes = 0;
GraphicsAllocation **passedContainer;
};
TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackEnabledThenMakeIndirectAllocResidentCalled) {
DebugManagerStateRestore restore;
DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(1);
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
std::unique_lock<std::recursive_mutex> lock;
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 1u);
EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 0u);
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenAddInternalAllocationsToResidencyContainer) {
DebugManagerStateRestore restore;
DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
std::unique_lock<std::recursive_mutex> lock;
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 0u);
EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 1u);
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
lock.unlock();
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenSVMAllocsMtxIsLocked) {
DebugManagerStateRestore restore;
DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
std::unique_lock<std::recursive_mutex> lock;
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
std::thread th([&] {
EXPECT_FALSE(mockSvmAllocsManager->mtx.try_lock());
});
th.join();
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
lock.unlock();
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenResidencyContainerFromCsrIsUsed) {
DebugManagerStateRestore restore;
DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
std::unique_lock<std::recursive_mutex> lock;
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
EXPECT_EQ(commandQueue->csr->getResidencyAllocations().data(), mockSvmAllocsManager->passedContainer);
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
lock.unlock();
commandQueue->destroy();
}
} // namespace ult
} // namespace L0

View File

@ -14,6 +14,8 @@
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@ -386,68 +388,6 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerP
}
using CommandQueueIndirectAllocations = Test<ModuleFixture>;
HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAddedToResidencyContainer) {
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr.createPreemptionAllocation();
}
ze_result_t returnValue;
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device,
&csr,
&desc,
false,
false,
returnValue);
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
void *deviceAlloc = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr,
launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
std::end(commandList->commandContainer.getResidencyContainer()),
gpuAlloc);
EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
auto commandListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
std::end(commandList->commandContainer.getResidencyContainer()),
gpuAlloc);
EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);
commandQueue->destroy();
}
HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocationsAsOnePackWhenIndirectAccessIsUsedThenWholePackIsMadeResident) {
DebugManagerStateRestore restorer;
@ -776,6 +716,63 @@ HWTEST2_F(EngineInstancedDeviceExecuteTests, givenEngineInstancedDeviceWhenExecu
commandQueue->destroy();
}
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandQueueHandleIndirectAllocs : public MockCommandQueueHw<gfxCoreFamily> {
public:
using typename MockCommandQueueHw<gfxCoreFamily>::CommandListExecutionContext;
using MockCommandQueueHw<gfxCoreFamily>::executeCommandListsRegular;
MockCommandQueueHandleIndirectAllocs(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : MockCommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::recursive_mutex> &lockForIndirect) override {
handleIndirectAllocationResidencyCalledTimes++;
}
uint32_t handleIndirectAllocationResidencyCalledTimes = 0;
};
HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWithIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessCalled, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
auto commandQueue = new MockCommandQueueHandleIndirectAllocs<gfxCoreFamily>(device, csr, &desc);
commandQueue->initialize(false, false);
auto ctx = typename MockCommandQueueHandleIndirectAllocs<gfxCoreFamily>::CommandListExecutionContext{nullptr,
0,
csr->getPreemptionMode(),
device,
false,
csr->isProgramActivePartitionConfigRequired(),
false};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
ctx.hasIndirectAccess = true;
ctx.isDispatchTaskCountPostSyncRequired = false;
auto cmdListHandle = commandList.get()->toHandle();
commandQueue->executeCommandListsRegular(ctx, 0, &cmdListHandle, nullptr);
EXPECT_EQ(commandQueue->handleIndirectAllocationResidencyCalledTimes, 1u);
commandQueue->destroy();
}
HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWitNohIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessNotCalled, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
auto commandQueue = new MockCommandQueueHandleIndirectAllocs<gfxCoreFamily>(device, csr, &desc);
commandQueue->initialize(false, false);
auto ctx = typename MockCommandQueueHandleIndirectAllocs<gfxCoreFamily>::CommandListExecutionContext{nullptr,
0,
csr->getPreemptionMode(),
device,
false,
csr->isProgramActivePartitionConfigRequired(),
false};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
ctx.hasIndirectAccess = false;
ctx.isDispatchTaskCountPostSyncRequired = false;
auto cmdListHandle = commandList.get()->toHandle();
commandQueue->executeCommandListsRegular(ctx, 0, &cmdListHandle, nullptr);
EXPECT_EQ(commandQueue->handleIndirectAllocationResidencyCalledTimes, 0u);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0

View File

@ -107,7 +107,7 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP
void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
ResidencyContainer &residencyContainer,
uint32_t requestedTypesMask) {
std::shared_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
for (auto &allocation : this->SVMAllocs.allocations) {
if (rootDeviceIndex >= allocation.second.gpuAllocations.getGraphicsAllocations().size()) {
continue;
@ -124,7 +124,7 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD
}
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
std::shared_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
for (auto &allocation : this->SVMAllocs.allocations) {
if (allocation.second.memoryType & requestedTypesMask) {
auto gpuAllocation = allocation.second.gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
@ -211,7 +211,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
allocData.pageSizeForAlignment = pageSizeForAlignment;
allocData.setAllocId(this->allocationsCounter++);
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
this->SVMAllocs.insert(allocData);
return usmPtr;
@ -288,7 +288,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
allocData.device = memoryProperties.device;
allocData.setAllocId(this->allocationsCounter++);
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
this->SVMAllocs.insert(allocData);
return reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
}
@ -370,7 +370,7 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv
allocData.pageSizeForAlignment = pageSizeForAlignment;
allocData.setAllocId(this->allocationsCounter++);
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
this->SVMAllocs.insert(allocData);
return allocationGpu->getUnderlyingBuffer();
}
@ -381,17 +381,17 @@ void SVMAllocsManager::setUnifiedAllocationProperties(GraphicsAllocation *alloca
}
SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) {
std::shared_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
return SVMAllocs.get(ptr);
}
void SVMAllocsManager::insertSVMAlloc(const SvmAllocationData &svmAllocData) {
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
SVMAllocs.insert(svmAllocData);
}
void SVMAllocsManager::removeSVMAlloc(const SvmAllocationData &svmAllocData) {
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
SVMAllocs.remove(svmAllocData);
}
@ -428,7 +428,7 @@ void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationD
if (pageFaultManager) {
pageFaultManager->removeAllocation(ptr);
}
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) {
freeZeroCopySvmAllocation(svmData);
} else {
@ -470,7 +470,7 @@ void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAlloca
}
allocData.size = size;
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
this->SVMAllocs.insert(allocData);
return usmPtr;
}
@ -534,7 +534,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
allocData.size = size;
allocData.setAllocId(this->allocationsCounter++);
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
this->SVMAllocs.insert(allocData);
return svmPtr;
}
@ -564,7 +564,7 @@ void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svm
}
bool SVMAllocsManager::hasHostAllocations() {
std::shared_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
for (auto &allocation : this->SVMAllocs.allocations) {
if (allocation.second.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) {
return true;
@ -574,7 +574,7 @@ bool SVMAllocsManager::hasHostAllocations() {
}
void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) {
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
bool parseAllAllocations = false;
auto entry = indirectAllocationsResidency.find(&commandStreamReceiver);
@ -608,7 +608,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
}
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData) {
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
if (this->indirectAllocationsResidency.size() > 0u) {
for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) {
auto commandStreamReceiver = internalAllocationsHandling.first;
@ -627,7 +627,7 @@ void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData
}
SvmMapOperation *SVMAllocsManager::getSvmMapOperation(const void *ptr) {
std::shared_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
return svmMapOperations.get(ptr);
}
@ -638,12 +638,12 @@ void SVMAllocsManager::insertSvmMapOperation(void *regionSvmPtr, size_t regionSi
svmMapOperation.offset = offset;
svmMapOperation.regionSize = regionSize;
svmMapOperation.readOnlyMap = readOnlyMap;
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
svmMapOperations.insert(svmMapOperation);
}
void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) {
std::unique_lock<std::shared_mutex> lock(mtx);
std::unique_lock<std::recursive_mutex> lock(mtx);
svmMapOperations.remove(regionSvmPtr);
}
@ -686,4 +686,7 @@ void SVMAllocsManager::prefetchMemory(Device &device, SvmAllocationData &svmData
}
}
std::unique_lock<std::recursive_mutex> SVMAllocsManager::obtainOwnership() {
return std::unique_lock<std::recursive_mutex>(mtx);
}
} // namespace NEO

View File

@ -173,17 +173,18 @@ class SVMAllocsManager {
MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
void removeSvmMapOperation(const void *regionSvmPtr);
SvmMapOperation *getSvmMapOperation(const void *regionPtr);
void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
ResidencyContainer &residencyContainer,
uint32_t requestedTypesMask);
MOCKABLE_VIRTUAL void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
ResidencyContainer &residencyContainer,
uint32_t requestedTypesMask);
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
void *createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
bool hasHostAllocations();
std::atomic<uint32_t> allocationsCounter = 0;
void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
void prefetchMemory(Device &device, SvmAllocationData &svmData);
std::unique_lock<std::recursive_mutex> obtainOwnership();
std::map<CommandStreamReceiver *, InternalAllocationsTracker> indirectAllocationsResidency;
@ -203,9 +204,9 @@ class SVMAllocsManager {
MapBasedAllocationTracker SVMAllocs;
MapOperationsTracker svmMapOperations;
MemoryManager *memoryManager;
std::shared_mutex mtx;
bool multiOsContextSupport;
SvmAllocationCache usmDeviceAllocationsCache;
bool usmDeviceAllocationsCacheEnabled = false;
std::recursive_mutex mtx;
};
} // namespace NEO

View File

@ -11,6 +11,7 @@ namespace NEO {
struct MockSVMAllocsManager : public SVMAllocsManager {
public:
using SVMAllocsManager::memoryManager;
using SVMAllocsManager::mtx;
using SVMAllocsManager::multiOsContextSupport;
using SVMAllocsManager::SVMAllocs;
using SVMAllocsManager::SVMAllocsManager;

View File

@ -27,6 +27,7 @@ target_sources(neo_shared_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/storage_info_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_cache_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp
)
add_subdirectories()

View File

@ -0,0 +1,34 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_svm_manager.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/test.h"
#include "gtest/gtest.h"
using namespace NEO;
TEST(SvmDeviceAllocationTest, givenGivenSvmAllocsManagerWhenObtainOwnershipCalledThenLockedUniqueLockReturned) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
auto lock = svmManager->obtainOwnership();
std::thread th1([&] {
EXPECT_FALSE(svmManager->mtx.try_lock());
});
th1.join();
lock.unlock();
std::thread th2([&] {
EXPECT_TRUE(svmManager->mtx.try_lock());
svmManager->mtx.unlock();
});
th2.join();
}