diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 2adeaa2667..946d625d24 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -432,12 +432,13 @@ template inline ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, NEO::AppendOperations appendOperation, bool requireTaskCountUpdate, CommandQueue *cmdQ, MutexLock *outerLock) { + auto cmdQImp = static_cast(cmdQ); this->commandContainer.removeDuplicatesFromResidencyContainer(); auto commandStream = this->commandContainer.getCommandStream(); size_t commandStreamStart = this->cmdListCurrentStartOffset; if (appendOperation == NEO::AppendOperations::cmdList && this->dispatchCmdListBatchBufferAsPrimary) { - auto cmdListStartCmdBufferStream = reinterpret_cast(cmdQ)->getStartingCmdBuffer(); + auto cmdListStartCmdBufferStream = cmdQImp->getStartingCmdBuffer(); // check if queue starting stream is the same as immediate, // if they are the same - immediate command list buffer has preamble in it including jump from immediate to regular cmdlist - proceed normal // if not - regular cmdlist is the starting command buffer - no queue preamble or waiting commands @@ -447,15 +448,17 @@ inline ze_result_t CommandListCoreFamilyImmediate::executeCommand } } - auto csr = static_cast(cmdQ)->getCsr(); + auto csr = cmdQImp->getCsr(); auto lockCSR = outerLock != nullptr ? std::move(*outerLock) : csr->obtainUniqueOwnership(); - if (NEO::ApiSpecificConfig::isSharedAllocPrefetchEnabled()) { - auto svmAllocMgr = this->device->getDriverHandle()->getSvmAllocsManager(); - svmAllocMgr->prefetchSVMAllocs(*this->device->getNEODevice(), *csr); - } + if (appendOperation != NEO::AppendOperations::cmdList) { + if (NEO::ApiSpecificConfig::isSharedAllocPrefetchEnabled()) { + auto svmAllocMgr = this->device->getDriverHandle()->getSvmAllocsManager(); + svmAllocMgr->prefetchSVMAllocs(*this->device->getNEODevice(), *csr); + } - cmdQ->registerCsrClient(); + cmdQ->registerCsrClient(); + } std::unique_lock lockForIndirect; if (this->hasIndirectAllocationsAllowed()) { @@ -501,7 +504,6 @@ inline ze_result_t CommandListCoreFamilyImmediate::executeCommand return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } - auto cmdQImp = static_cast(cmdQ); cmdQImp->clearHeapContainer(); // save offset from immediate stream - even when not used to dispatch commands, can be used for epilogue diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 67a854e3f6..53c470f77c 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -223,6 +223,44 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigra commandQueue->destroy(); } +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenExecutingCommandListsOnImmediateCommandListThenMemoryPrefetchIsCalledOnce, IsXeHpcCore) { + DebugManagerStateRestore restore; + debugManager.flags.UseKmdMigration.set(true); + debugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.set(true); + debugManager.flags.EnableBOChunkingPrefetch.set(false); + + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + ze_result_t returnValue; + ze_command_queue_desc_t queueDesc = {}; + + ze_command_list_handle_t commandListHandle = CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false)->toHandle(); + auto commandList = CommandList::fromHandle(commandListHandle); + commandList->close(); + + auto commandListImmediate = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, result); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandListImmediate->appendCommandLists(1, &commandListHandle, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto mockMemoryManager = reinterpret_cast(neoDevice->getMemoryManager()); + EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled); + EXPECT_EQ(1u, mockMemoryManager->setMemPrefetchCalledCount); + + context->freeMem(ptr); + commandList->destroy(); + commandListImmediate->destroy(); +} + HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenNoForceMemoryPrefetchForKmdMigratedSharedAllocationsAndNoEnableBOChunkingPrefetchWhenExecutingCommandListsOnCommandQueueThenMemoryPrefetchIsNotCalled, IsXeHpcCore) { DebugManagerStateRestore restore; debugManager.flags.UseKmdMigration.set(true); diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 01d04dc434..eb42d03b77 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -215,6 +215,7 @@ class MockMemoryManager : public MemoryManagerCreate { bool setMemPrefetch(GraphicsAllocation *gfxAllocation, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) override { memPrefetchSubDeviceIds = subDeviceIds; setMemPrefetchCalled = true; + setMemPrefetchCalledCount++; return MemoryManager::setMemPrefetch(gfxAllocation, subDeviceIds, rootDeviceIndex); } @@ -318,6 +319,7 @@ class MockMemoryManager : public MemoryManagerCreate { uint32_t handleFenceCompletionCalled = 0u; uint32_t waitForEnginesCompletionCalled = 0u; uint32_t allocateGraphicsMemoryWithPropertiesCount = 0; + uint32_t setMemPrefetchCalledCount = 0; osHandle capturedSharedHandle = 0u; bool allocationCreated = false; bool allocation64kbPageCreated = false;