Extended regkey to force prefetch of shared memory in enqueue commands

Extended the regkey ForceMemoryPrefetchForKmdMigratedSharedAllocations
to force meory prefetch of kmd-migrated shared allocation
in clEnqueueNDRangeKernel(), clEnqueueMemFillINTEL, ...

Related-To: NEO-7841

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2023-04-10 22:40:54 +00:00
committed by Compute-Runtime-Automation
parent d31b950b9a
commit 01d03aa5b6
6 changed files with 78 additions and 4 deletions

View File

@@ -16,6 +16,7 @@
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/program/sync_buffer_handler.h"
#include "shared/source/program/sync_buffer_handler.inl"
@@ -147,6 +148,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
TagNodeBase *hwTimeStamps = nullptr;
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
if (NEO::DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.get()) {
auto pSvmAllocMgr = this->context->getSVMAllocsManager();
pSvmAllocMgr->prefetchSVMAllocs(this->getDevice(), computeCommandStreamReceiver);
}
EventBuilder eventBuilder;
setupEvent(eventBuilder, event, commandType);

View File

@@ -11,11 +11,13 @@
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/kernel_binary_helper.h"
#include "shared/test/common/helpers/raii_gfx_core_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
#include "opencl/source/api/api.h"
@@ -1682,6 +1684,40 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIs
EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u);
}
HWTEST_F(EnqueueKernelTest, GivenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenEnqueingKernelWithoutSharedAllocationsThenMemoryPrefetchIsNotCalled) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.UseKmdMigration.set(true);
DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
MockKernelWithInternals mockKernel(*pClDevice);
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
auto memoryManager = static_cast<MockMemoryManager *>(context->getMemoryManager());
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
}
HWTEST_F(EnqueueKernelTest, GivenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenEnqueingKernelWithSharedAllocationsThenMemoryPrefetchIsCalled) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.UseKmdMigration.set(true);
DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields());
auto ptr = context->getSVMAllocsManager()->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, pCmdQ);
EXPECT_NE(nullptr, ptr);
MockKernelWithInternals mockKernel(*pClDevice);
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
auto memoryManager = static_cast<MockMemoryManager *>(context->getMemoryManager());
EXPECT_TRUE(memoryManager->setMemPrefetchCalled);
context->getSVMAllocsManager()->freeSVMAlloc(ptr);
}
struct PauseOnGpuTests : public EnqueueKernelTest {
void SetUp() override {
EnqueueKernelTest::SetUp();