diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index b0add42c1b..9f988fdf44 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -326,14 +326,9 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect, bool performMigration) { NEO::Device *neoDevice = this->device->getNEODevice(); auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager(); - auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex()); - if (NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) { - submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get(); - } + auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(*(this->csr)); - if (submitAsPack) { - svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u); - } else { + if (!submittedAsPack) { lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership(); NEO::ResidencyContainer residencyAllocations; svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 26a5e7c7b2..ac7a20a3bf 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1410,7 +1410,11 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || unifiedMemoryControls.indirectHostAllocationsAllowed || unifiedMemoryControls.indirectSharedAllocationsAllowed)) { - this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); + auto svmAllocsManager = this->getContext().getSVMAllocsManager(); + auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(commandStreamReceiver); + if (!submittedAsPack) { + svmAllocsManager->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); + } } } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index ec0a30c651..1bb9a4b6d5 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1414,7 +1414,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAl svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) { +HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenAllAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); @@ -1430,14 +1430,13 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResident mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); - EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); - EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation)); + EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size()); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) { +HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenAllSharedAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); @@ -1452,8 +1451,7 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); - EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); - EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation)); + EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size()); svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); @@ -2465,6 +2463,74 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); } +HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenIndirectAllocationsArePacked) { + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true); + + auto &csr = pDevice->getGpgpuCommandStreamReceiver(); + auto svmAllocationsManager = ctx.getSVMAllocsManager(); + auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields()); + deviceProperties.device = pDevice; + auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); + auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); + auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation(); + + // Verify that indirect allocation is always resident + kernel->makeResident(csr); + EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId())); + + // Force to non-resident + graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId()); + + // Verify that packed allocation is tracked and makeResident is called once + kernel->makeResident(csr); + EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId())); + + svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledAndPackingIsDisabledThenIndirectAllocationsAreNotPacked) { + DebugManagerStateRestore dbgStateRestore; + debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); + + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true); + + auto &csr = pDevice->getGpgpuCommandStreamReceiver(); + auto svmAllocationsManager = ctx.getSVMAllocsManager(); + auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields()); + deviceProperties.device = pDevice; + auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); + auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); + auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation(); + + kernel->makeResident(csr); + EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId())); + + // Force to non-resident + graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId()); + + // Verify that makeResident is always called when allocation is not packed + kernel->makeResident(csr); + EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId())); + + svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); +} + struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { ClDeviceFixture::setUp(); diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index aaa8eb5d78..2dd6d9ce55 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -909,4 +909,15 @@ void SVMAllocsManager::makeResidentForAllocationsWithId(uint32_t allocationId, C } } +bool SVMAllocsManager::submitIndirectAllocationsAsPack(CommandStreamReceiver &csr) { + auto submitAsPack = memoryManager->allowIndirectAllocationsAsPack(csr.getRootDeviceIndex()); + if (debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) { + submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get(); + } + + if (submitAsPack) { + makeIndirectAllocationsResident(csr, csr.peekTaskCount() + 1u); + } + return submitAsPack; +} } // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 679e288522..3ba83d4ca1 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -238,6 +238,8 @@ class SVMAllocsManager { void initUsmAllocationsCaches(Device &device); + bool submitIndirectAllocationsAsPack(CommandStreamReceiver &csr); + protected: void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties, const RootDeviceIndicesContainer &rootDeviceIndices, diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 07d10eb603..5a4fc92935 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -464,7 +464,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size()); - svmManager->makeIndirectAllocationsResident(*csr, 1u); + EXPECT_TRUE(svmManager->submitIndirectAllocationsAsPack(*csr)); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(csr->getOsContext().getContextId())); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable()); @@ -477,6 +477,39 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT svmManager->freeSVMAlloc(ptr); } +TEST_F(SVMLocalMemoryAllocatorTest, whenSubmitIndirectAllocationsAsPackCalledButAllocationsAsPackNotAllowedThenDontMakeResident) { + DebugManagerStateRestore restore; + debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + auto device = deviceFactory->rootDevices[0]; + auto memoryManager = static_cast(device->getMemoryManager()); + auto svmManager = std::make_unique(memoryManager, false); + auto csr = std::make_unique(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + csr->setupContext(*device->getDefaultEngine().osContext); + + void *cmdQ = reinterpret_cast(0x12345); + auto mockPageFaultManager = new MockPageFaultManager(); + memoryManager->pageFaultManager.reset(mockPageFaultManager); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + + auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); + + ASSERT_NE(nullptr, ptr); + auto graphicsAllocation = svmManager->getSVMAlloc(ptr); + + EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId())); + EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size()); + + EXPECT_FALSE(svmManager->submitIndirectAllocationsAsPack(*csr)); + + EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId())); + EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size()); + EXPECT_EQ(svmManager->indirectAllocationsResidency.find(csr.get()), svmManager->indirectAllocationsResidency.end()); + + svmManager->freeSVMAlloc(ptr); +} + TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); auto device = deviceFactory->rootDevices[0];