Add pageFault migration support for immediate cmdlist submission via flushTask.

Move logic for makeResident & pageFault migration to command place for re-use.

Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
Vinod Tipparaju 2021-07-16 16:17:08 +05:30 committed by Compute-Runtime-Automation
parent 37670aeb91
commit a2012e04dc
6 changed files with 231 additions and 20 deletions

View File

@ -7,10 +7,13 @@
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/device/device_info.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "level_zero/core/source/device/device_imp.h"
namespace L0 {
CommandList::~CommandList() {
@ -124,4 +127,30 @@ NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
}
void CommandList::makeResidentAndMigrate(bool performMigration) {
for (auto alloc : commandContainer.getResidencyContainer()) {
if (csr->getResidencyAllocations().end() ==
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
csr->makeResident(*alloc);
if (performMigration &&
(alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
}
}
}
}
void CommandList::migrateSharedAllocations() {
auto deviceImp = static_cast<DeviceImp *>(device);
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(deviceImp->getDriverHandle());
std::lock_guard<std::mutex> lock(driverHandleImp->sharedMakeResidentAllocationsLock);
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
for (auto alloc : driverHandleImp->sharedMakeResidentAllocations) {
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc.second->getGpuAddress()));
}
}
} // namespace L0

View File

@ -248,6 +248,9 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
bool isFlushTaskSubmissionEnabled = false;
void makeResidentAndMigrate(bool);
void migrateSharedAllocations();
protected:
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
NEO::EngineGroupType engineGroupType;

View File

@ -65,6 +65,20 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadScratchSize());
if (performMigration) {
auto deviceImp = static_cast<DeviceImp *>(this->device);
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
if (pageFaultManager == nullptr) {
performMigration = false;
}
}
this->makeResidentAndMigrate(performMigration);
if (performMigration) {
this->migrateSharedAllocations();
}
auto completionStamp = this->csr->flushTask(
*commandStream,
commandStreamStart,
@ -319,9 +333,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstptr, srcptr, size, flushHost);
if (ret == ZE_RESULT_SUCCESS) {
executeCommandListImmediate(false);
if (this->isFlushTaskSubmissionEnabled) {
executeCommandListImmediateWithFlushTask(false);
} else {
executeCommandListImmediate(false);
}
}
return ret;
}

View File

@ -359,28 +359,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
commandList->getPrintfFunctionContainer().begin(),
commandList->getPrintfFunctionContainer().end());
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
if (csr->getResidencyAllocations().end() ==
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
csr->makeResident(*alloc);
if (performMigration) {
if (alloc &&
(alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
}
}
}
}
commandList->csr = csr;
commandList->makeResidentAndMigrate(performMigration);
}
if (performMigration) {
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
std::lock_guard<std::mutex> lock(driverHandleImp->sharedMakeResidentAllocationsLock);
for (auto alloc : driverHandleImp->sharedMakeResidentAllocations) {
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc.second->getGpuAddress()));
}
auto commandList = CommandList::fromHandle(phCommandLists[0]);
commandList->migrateSharedAllocations();
}
if (stateSipRequired) {

View File

@ -7,9 +7,12 @@
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_compilers.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "opencl/test/unit_test/mocks/mock_svm_manager.h"
#include "test.h"
#include "level_zero/core/source/context/context_imp.h"
@ -396,6 +399,8 @@ struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResi
ContextMakeMemoryResidentTests::SetUp();
mockMemoryManager = std::make_unique<MockMemoryManager>();
mockPageFaultManager = new MockResidentTestsPageFaultManager;
svmManager = std::make_unique<MockSVMAllocsManager>(mockMemoryManager.get(), false);
mockMemoryManager->pageFaultManager.reset(mockPageFaultManager);
memoryManager = device->getDriverHandle()->getMemoryManager();
device->getDriverHandle()->setMemoryManager(mockMemoryManager.get());
@ -420,6 +425,7 @@ struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResi
void *ptr = nullptr;
std::unique_ptr<MockMemoryManager> mockMemoryManager;
std::unique_ptr<MockSVMAllocsManager> svmManager;
MockResidentTestsPageFaultManager *mockPageFaultManager = nullptr;
NEO::MemoryManager *memoryManager = nullptr;
};
@ -526,6 +532,126 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
context->freeMem(ptr);
}
HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
whenExecutingImmediateCommandListsHavingSharedAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) {
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(hostDriverHandle.get());
size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size();
EXPECT_CALL(*mockMemoryInterface, makeResident)
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
ze_result_t res = context->makeMemoryResident(device, ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size();
EXPECT_EQ(previousSize + 1, currentSize);
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t result = ZE_RESULT_SUCCESS;
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
device,
&desc,
false,
NEO::EngineGroupType::RenderCompute,
result));
ASSERT_NE(nullptr, commandList0);
void *dst_buffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4090u, &dst_buffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
int one = 1;
result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u);
EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr);
EXPECT_CALL(*mockMemoryInterface, evict)
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
res = context->evictMemory(device, ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
context->freeMem(ptr);
context->freeMem(dst_buffer);
}
HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
whenExecutingImmediateCommandListsHavingHostAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) {
MockCommandQueue cmdQ;
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(hostDriverHandle.get());
size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size();
EXPECT_CALL(*mockMemoryInterface, makeResident)
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
ze_result_t res = context->makeMemoryResident(device, ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size();
EXPECT_EQ(previousSize + 1, currentSize);
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t result = ZE_RESULT_SUCCESS;
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
device,
&desc,
false,
NEO::EngineGroupType::RenderCompute,
result));
ASSERT_NE(nullptr, commandList0);
DebugManagerStateRestore restore;
DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true);
std::set<uint32_t> rootDeviceIndices{mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
auto sharedPtr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
EXPECT_NE(nullptr, sharedPtr);
auto allocation = svmManager->getSVMAlloc(sharedPtr);
auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex);
auto &commandContainer = commandList0->commandContainer;
commandContainer.addToResidencyContainer(gpuAllocation);
commandContainer.addToResidencyContainer(allocation->cpuAllocation);
void *dst_buffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, 4096u, 0u, &dst_buffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
int one = 1;
result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 3u);
context->freeMem(ptr);
svmManager->freeSVMAlloc(sharedPtr);
context->freeMem(dst_buffer);
}
TEST_F(ContextTest, whenGettingDriverThenDriverIsRetrievedSuccessfully) {
ze_context_handle_t hContext;
ze_context_desc_t desc;

View File

@ -1087,6 +1087,52 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularC
commandQueue->destroy();
}
HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsSklOrAbove) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
size_t size = (sizeof(uint32_t) * 4);
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
ASSERT_NE(nullptr, commandList);
NEO::GraphicsAllocation srcPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::GraphicsAllocation dstPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
}
HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsSklOrAbove) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false);
size_t size = (sizeof(uint32_t) * 4);
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
ASSERT_NE(nullptr, commandList);
NEO::GraphicsAllocation srcPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::GraphicsAllocation dstPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
}
HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);