Add regkey to force prefetch of shared memory in cmd list execute

Add the regkey ForceMemoryPrefetchForKmdMigratedSharedAllocations
to force meory prefetch of kmd-migrated shared allocation
in zeCommandQueueExecuteCommandLists().

Related-To: NEO-7841

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2023-04-03 08:39:06 +00:00
committed by Compute-Runtime-Automation
parent 1498ae30bd
commit 50da94dc56
5 changed files with 49 additions and 0 deletions

View File

@@ -28,6 +28,7 @@
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/residency_container.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "shared/source/utilities/software_tags_manager.h"
@@ -60,6 +61,14 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto lockCSR = this->csr->obtainUniqueOwnership();
if (NEO::DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.get()) {
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
for (auto &allocation : svmAllocMgr->getSVMAllocs()->allocations) {
NEO::SvmAllocationData allocData = allocation.second;
svmAllocMgr->prefetchMemory(*device->getNEODevice(), *csr, allocData);
}
}
if (this->clientId == CommandQueue::clientNotRegistered) {
this->clientId = this->csr->registerClient();
}

View File

@@ -14,6 +14,7 @@
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"

View File

@@ -116,6 +116,43 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenExecutingCommandListsOnCommandQueueThenMemoryPrefetchIsCalled, IsXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.UseKmdMigration.set(true);
DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc = {};
ze_command_list_handle_t commandListHandle = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
auto commandList = CommandList::fromHandle(commandListHandle);
auto commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue);
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto mockMemoryManager = reinterpret_cast<NEO::MockMemoryManager *>(neoDevice->getMemoryManager());
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
commandList->destroy();
commandQueue->destroy();
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiIsCalledThenRequestMemoryPrefetch, IsXeHpcCore) {
DebugManagerStateRestore restore;

View File

@@ -79,6 +79,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableForceToStateless, false, "Do not force state
DECLARE_DEBUG_VARIABLE(bool, ForceTheoreticalMaxWorkGroupCount, false, "Do not apply any limitation to max cooperative/concurrent work-group count queries")
DECLARE_DEBUG_VARIABLE(bool, DontDisableZebinIfVmeUsed, false, "When enabled, driver will not add -cl-intel-disable-zebin internal option when vme is used")
DECLARE_DEBUG_VARIABLE(bool, AppendMemoryPrefetchForKmdMigratedSharedAllocations, true, "Allow prefetching shared memory to the device associated with the specified command list")
DECLARE_DEBUG_VARIABLE(bool, ForceMemoryPrefetchForKmdMigratedSharedAllocations, false, "Force prefetch of shared memory in command queue execute command lists")
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "Override device id in AUB/TBX mode")
DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened; ignored when unk")
DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened; ignored when unk")

View File

@@ -434,6 +434,7 @@ LimitEngineCountForVirtualBcs = -1
LimitEngineCountForVirtualCcs = -1
ForceRunAloneContext = -1
AppendMemoryPrefetchForKmdMigratedSharedAllocations = 1
ForceMemoryPrefetchForKmdMigratedSharedAllocations = 0
CreateContextWithAccessCounters = -1
AccessCountersTrigger = -1
AccessCountersGranularity = -1