diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index 02962d5c72..563a5812f8 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -121,7 +121,7 @@ struct BcsSplit { result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle); if (cmdList->flushTaskSubmissionEnabled()) { - cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, cmdQsForSplit[i], nullptr, nullptr); + cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, true, cmdQsForSplit[i], nullptr, nullptr); } else { cmdList->executeCommandListImmediateImpl(performMigration, cmdQsForSplit[i]); } diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index 4385fd5534..f7d95808ce 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -911,6 +911,89 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC context->freeMem(dstPtr); } +HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenUpdateTaskCount, IsXeHpcCore) { + DebugManagerStateRestore restorer; + debugManager.flags.SplitBcsCopy.set(1); + + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + ze_command_queue_desc_t desc = {}; + desc.ordinal = static_cast(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::copy)); + + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + testL0Device.get(), + &desc, + false, + NEO::EngineGroupType::copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + auto whiteBoxCmdList = static_cast(commandList0.get()); + + EXPECT_EQ(static_cast(testL0Device.get())->bcsSplit.cmdQs.size(), 4u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + + constexpr size_t alignment = 4096u; + constexpr size_t size = 8 * MemoryConstants::megaByte; + void *srcPtr; + void *dstPtr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + context->allocDeviceMem(device->toHandle(), + &deviceDesc, + size, alignment, &srcPtr); + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, size, alignment, &dstPtr); + auto ultCsr = static_cast *>(whiteBoxCmdList->getCsr(false)); + ultCsr->recordFlushedBatchBuffer = true; + int client; + ultCsr->registerClient(&client); + + auto cmdStream = commandList0->getCmdContainer().getCommandStream(); + auto offset = cmdStream->getUsed(); + + auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, copyParams); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto csr2 = static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr(); + auto csr3 = static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr(); + + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 0u); + EXPECT_EQ(csr2->peekTaskCount(), 1u); + EXPECT_EQ(csr3->peekTaskCount(), 1u); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); + + bool csr2TaskCountFound = false; + bool csr3TaskCountFound = false; + + for (auto &cmd : cmdList) { + if (auto miFlushCmd = genCmdCast(cmd)) { + if (miFlushCmd->getDestinationAddress() == csr2->getTagAllocation()->getGpuAddress()) { + csr2TaskCountFound = true; + } else if (miFlushCmd->getDestinationAddress() == csr3->getTagAllocation()->getGpuAddress()) { + csr3TaskCountFound = true; + } + } + } + + EXPECT_TRUE(csr2TaskCountFound); + EXPECT_TRUE(csr3TaskCountFound); + + context->freeMem(srcPtr); + context->freeMem(dstPtr); +} + HWTEST2_F(CommandQueueCommandsXeHpc, givenSyncCmdListAndSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsXeHpcCore) { DebugManagerStateRestore restorer; debugManager.flags.SplitBcsCopy.set(1); diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 3cdd731cba..c916f47564 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -92,7 +92,7 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu supportsMultiStorageResources = !!debugManager.flags.EnableMultiStorageResources.get(); } - if (debugManager.flags.UseSingleListForTemporaryAllocations.get() == 1) { + if (debugManager.flags.UseSingleListForTemporaryAllocations.get() != 0) { singleTemporaryAllocationsList = true; temporaryAllocations = std::make_unique(AllocationUsage::TEMPORARY_ALLOCATION); } @@ -115,14 +115,16 @@ void MemoryManager::cleanTemporaryAllocations(const CommandStreamReceiver &csr, auto *nextAlloc = currentAlloc->next; bool freeAllocation = false; - if (currentAlloc->isUsedByOsContext(waitedOsContextId)) { - if (currentAlloc->hostPtrTaskCountAssignment == 0 && currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) { - if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) { - freeAllocation = true; + if (currentAlloc->hostPtrTaskCountAssignment == 0) { + if (currentAlloc->isUsedByOsContext(waitedOsContextId)) { + if (currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) { + if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) { + freeAllocation = true; + } } + } else if (!allocInUse(*currentAlloc)) { + freeAllocation = true; } - } else if (!allocInUse(*currentAlloc)) { - freeAllocation = true; } if (freeAllocation) { diff --git a/shared/test/common/fixtures/memory_allocator_fixture.h b/shared/test/common/fixtures/memory_allocator_fixture.h index c3e50ff771..0720526cf4 100644 --- a/shared/test/common/fixtures/memory_allocator_fixture.h +++ b/shared/test/common/fixtures/memory_allocator_fixture.h @@ -31,15 +31,9 @@ class MemoryAllocatorFixture : public MemoryManagementFixture { executionEnvironment->calculateMaxOsContextCount(); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0u)); - memoryManager = new MockMemoryManager(false, false, *executionEnvironment); - executionEnvironment->memoryManager.reset(memoryManager); - csr = &device->getGpgpuCommandStreamReceiver(); + memoryManager = static_cast(device->getMemoryManager()); - auto &gfxCoreHelper = device->getGfxCoreHelper(); - auto engineType = gfxCoreHelper.getGpgpuEngineInstances(device->getRootDeviceEnvironment())[0].first; - auto osContext = memoryManager->createAndRegisterOsContext(csr, EngineDescriptorHelper::getDefaultDescriptor({engineType, EngineUsage::regular}, - PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); - csr->setupContext(*osContext); + csr = &device->getGpgpuCommandStreamReceiver(); } void tearDown() { diff --git a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp index a2e29f6d37..dbc7b52a07 100644 --- a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp @@ -1129,18 +1129,18 @@ HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorag TEST_F(HostPtrAllocationTest, givenDebugFlagSetWhenCreatingMemoryManagerThenEnableSingleTempAllocationsList) { DebugManagerStateRestore debugRestorer; - { - auto memoryManager = std::make_unique(executionEnvironment); - EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled()); - EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get()); - } - - debugManager.flags.UseSingleListForTemporaryAllocations.set(1); { auto memoryManager = std::make_unique(executionEnvironment); EXPECT_TRUE(memoryManager->isSingleTemporaryAllocationsListEnabled()); EXPECT_NE(nullptr, memoryManager->temporaryAllocations.get()); } + + debugManager.flags.UseSingleListForTemporaryAllocations.set(0); + { + auto memoryManager = std::make_unique(executionEnvironment); + EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled()); + EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get()); + } } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) { diff --git a/shared/test/unit_test/utilities/tag_allocator_tests.cpp b/shared/test/unit_test/utilities/tag_allocator_tests.cpp index 5ac1e583e9..caec5e01e3 100644 --- a/shared/test/unit_test/utilities/tag_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/tag_allocator_tests.cpp @@ -179,6 +179,7 @@ TEST_F(TagAllocatorTest, WhenGettingAndReturningTagThenFreeAndUsedListsAreUpdate TEST_F(TagAllocatorTest, WhenTagAllocatorIsCreatedThenItPopulatesTagsWithProperDeviceBitfield) { size_t alignment = 64; + memoryManager->recentlyPassedDeviceBitfield = 0; EXPECT_NE(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); MockTagAllocator tagAllocator(memoryManager, 10, alignment, deviceBitfield); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield);