Add pageFault migration support for immediate cmdlist submission via flushTask.
Move logic for makeResident & pageFault migration to command place for re-use. Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
parent
37670aeb91
commit
a2012e04dc
|
@ -7,10 +7,13 @@
|
|||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/device/device_info.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
CommandList::~CommandList() {
|
||||
|
@ -124,4 +127,30 @@ NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
|
|||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
}
|
||||
|
||||
void CommandList::makeResidentAndMigrate(bool performMigration) {
|
||||
for (auto alloc : commandContainer.getResidencyContainer()) {
|
||||
if (csr->getResidencyAllocations().end() ==
|
||||
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
|
||||
csr->makeResident(*alloc);
|
||||
|
||||
if (performMigration &&
|
||||
(alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
|
||||
alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
|
||||
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::migrateSharedAllocations() {
|
||||
auto deviceImp = static_cast<DeviceImp *>(device);
|
||||
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(deviceImp->getDriverHandle());
|
||||
std::lock_guard<std::mutex> lock(driverHandleImp->sharedMakeResidentAllocationsLock);
|
||||
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
for (auto alloc : driverHandleImp->sharedMakeResidentAllocations) {
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc.second->getGpuAddress()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -248,6 +248,9 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
uint32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
|
||||
bool isFlushTaskSubmissionEnabled = false;
|
||||
|
||||
void makeResidentAndMigrate(bool);
|
||||
void migrateSharedAllocations();
|
||||
|
||||
protected:
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
NEO::EngineGroupType engineGroupType;
|
||||
|
|
|
@ -65,6 +65,20 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
|||
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadScratchSize());
|
||||
|
||||
if (performMigration) {
|
||||
auto deviceImp = static_cast<DeviceImp *>(this->device);
|
||||
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
if (pageFaultManager == nullptr) {
|
||||
performMigration = false;
|
||||
}
|
||||
}
|
||||
|
||||
this->makeResidentAndMigrate(performMigration);
|
||||
|
||||
if (performMigration) {
|
||||
this->migrateSharedAllocations();
|
||||
}
|
||||
|
||||
auto completionStamp = this->csr->flushTask(
|
||||
*commandStream,
|
||||
commandStreamStart,
|
||||
|
@ -319,9 +333,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) {
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace();
|
||||
}
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstptr, srcptr, size, flushHost);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(false);
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
executeCommandListImmediateWithFlushTask(false);
|
||||
} else {
|
||||
executeCommandListImmediate(false);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -359,28 +359,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||
commandList->getPrintfFunctionContainer().begin(),
|
||||
commandList->getPrintfFunctionContainer().end());
|
||||
|
||||
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
|
||||
if (csr->getResidencyAllocations().end() ==
|
||||
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
|
||||
csr->makeResident(*alloc);
|
||||
|
||||
if (performMigration) {
|
||||
if (alloc &&
|
||||
(alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
|
||||
alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
commandList->csr = csr;
|
||||
commandList->makeResidentAndMigrate(performMigration);
|
||||
}
|
||||
|
||||
if (performMigration) {
|
||||
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
std::lock_guard<std::mutex> lock(driverHandleImp->sharedMakeResidentAllocationsLock);
|
||||
for (auto alloc : driverHandleImp->sharedMakeResidentAllocations) {
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc.second->getGpuAddress()));
|
||||
}
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[0]);
|
||||
commandList->migrateSharedAllocations();
|
||||
}
|
||||
|
||||
if (stateSipRequired) {
|
||||
|
|
|
@ -7,9 +7,12 @@
|
|||
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_svm_manager.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/context/context_imp.h"
|
||||
|
@ -396,6 +399,8 @@ struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResi
|
|||
ContextMakeMemoryResidentTests::SetUp();
|
||||
mockMemoryManager = std::make_unique<MockMemoryManager>();
|
||||
mockPageFaultManager = new MockResidentTestsPageFaultManager;
|
||||
svmManager = std::make_unique<MockSVMAllocsManager>(mockMemoryManager.get(), false);
|
||||
|
||||
mockMemoryManager->pageFaultManager.reset(mockPageFaultManager);
|
||||
memoryManager = device->getDriverHandle()->getMemoryManager();
|
||||
device->getDriverHandle()->setMemoryManager(mockMemoryManager.get());
|
||||
|
@ -420,6 +425,7 @@ struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResi
|
|||
void *ptr = nullptr;
|
||||
|
||||
std::unique_ptr<MockMemoryManager> mockMemoryManager;
|
||||
std::unique_ptr<MockSVMAllocsManager> svmManager;
|
||||
MockResidentTestsPageFaultManager *mockPageFaultManager = nullptr;
|
||||
NEO::MemoryManager *memoryManager = nullptr;
|
||||
};
|
||||
|
@ -526,6 +532,126 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
|
|||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
|
||||
whenExecutingImmediateCommandListsHavingSharedAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) {
|
||||
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(hostDriverHandle.get());
|
||||
size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size();
|
||||
|
||||
EXPECT_CALL(*mockMemoryInterface, makeResident)
|
||||
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
|
||||
ze_result_t res = context->makeMemoryResident(device, ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size();
|
||||
EXPECT_EQ(previousSize + 1, currentSize);
|
||||
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
csr.initializeTagAllocation();
|
||||
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
device,
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
result));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
|
||||
void *dst_buffer = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4090u, &dst_buffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
int one = 1;
|
||||
result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u);
|
||||
EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr);
|
||||
|
||||
EXPECT_CALL(*mockMemoryInterface, evict)
|
||||
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
|
||||
res = context->evictMemory(device, ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
context->freeMem(ptr);
|
||||
context->freeMem(dst_buffer);
|
||||
}
|
||||
|
||||
HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
|
||||
whenExecutingImmediateCommandListsHavingHostAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) {
|
||||
MockCommandQueue cmdQ;
|
||||
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(hostDriverHandle.get());
|
||||
size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size();
|
||||
|
||||
EXPECT_CALL(*mockMemoryInterface, makeResident)
|
||||
.WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS));
|
||||
ze_result_t res = context->makeMemoryResident(device, ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size();
|
||||
EXPECT_EQ(previousSize + 1, currentSize);
|
||||
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
csr.initializeTagAllocation();
|
||||
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
device,
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
result));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true);
|
||||
|
||||
std::set<uint32_t> rootDeviceIndices{mockRootDeviceIndex};
|
||||
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
|
||||
|
||||
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
|
||||
auto sharedPtr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
|
||||
EXPECT_NE(nullptr, sharedPtr);
|
||||
|
||||
auto allocation = svmManager->getSVMAlloc(sharedPtr);
|
||||
auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex);
|
||||
|
||||
auto &commandContainer = commandList0->commandContainer;
|
||||
commandContainer.addToResidencyContainer(gpuAllocation);
|
||||
commandContainer.addToResidencyContainer(allocation->cpuAllocation);
|
||||
|
||||
void *dst_buffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocHostMem(&hostDesc, 4096u, 0u, &dst_buffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
int one = 1;
|
||||
result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 3u);
|
||||
|
||||
context->freeMem(ptr);
|
||||
svmManager->freeSVMAlloc(sharedPtr);
|
||||
context->freeMem(dst_buffer);
|
||||
}
|
||||
|
||||
TEST_F(ContextTest, whenGettingDriverThenDriverIsRetrievedSuccessfully) {
|
||||
ze_context_handle_t hContext;
|
||||
ze_context_desc_t desc;
|
||||
|
|
|
@ -1087,6 +1087,52 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularC
|
|||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsSklOrAbove) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
|
||||
size_t size = (sizeof(uint32_t) * 4);
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
NEO::GraphicsAllocation srcPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
NEO::GraphicsAllocation dstPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
|
||||
auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsSklOrAbove) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false);
|
||||
|
||||
size_t size = (sizeof(uint32_t) * 4);
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
NEO::GraphicsAllocation srcPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
NEO::GraphicsAllocation dstPtr(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
|
||||
auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
|
||||
|
|
Loading…
Reference in New Issue