diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index bb8f40b939..93c0423f03 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -635,10 +635,12 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(csr.getDeferredAllocations().peekIsEmpty()); kernel.reset(nullptr); - EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), privateSurface); + EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(csr.getDeferredAllocations().peekHead(), privateSurface); } TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResourcesErrorIsReturned) { diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp index 197f459a94..b99b2803c3 100644 --- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp +++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -127,12 +127,14 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe char newHeap[newHeapSize]; EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(commandStreamReceiver.getDeferredAllocations().peekIsEmpty()); kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; - EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(commandStreamReceiver.getTemporaryAllocations().peekHead(), firstAllocation); + EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(commandStreamReceiver.getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(commandStreamReceiver.getDeferredAllocations().peekHead(), firstAllocation); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); commandStreamReceiver.getInternalAllocationStorage()->cleanAllocationList(notReadyTaskCount, TEMPORARY_ALLOCATION); } diff --git a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp index fbc55d60a5..02ff027377 100644 --- a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp +++ b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp @@ -159,7 +159,7 @@ TEST_P(MemObjAsyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyn } else { makeMemObjNotReady(); } - auto &allocationList = csr->getTemporaryAllocations(); + auto &allocationList = csr->getDeferredAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; @@ -467,7 +467,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenMemObjWithMapAllocationWithoutMemUseHo makeMemObjUsed(); - auto &allocationList = mockCsr->getTemporaryAllocations(); + auto &allocationList = mockCsr->getDeferredAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; @@ -507,7 +507,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenMemObjWithMapAllocationWithMemUseHostP makeMemObjUsed(); - auto &allocationList = mockCsr->getTemporaryAllocations(); + auto &allocationList = mockCsr->getDeferredAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; diff --git a/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp b/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp index 1f6e214270..0ce89b195d 100644 --- a/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp +++ b/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp @@ -165,7 +165,7 @@ TEST(MemObj, givenNotReadyGraphicsAllocationWhenMemObjDestroysAllocationAsyncThe auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); - auto &allocationList = defaultEngine.commandStreamReceiver->getTemporaryAllocations(); + auto &allocationList = defaultEngine.commandStreamReceiver->getDeferredAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 43c803891c..d874ea4983 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -1965,8 +1965,9 @@ TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsNotCompletedWhen usedAllocationAndNotGpuCompleted->updateTaskCount(*tagAddress + 1, csr->getOsContext().getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(usedAllocationAndNotGpuCompleted); - EXPECT_FALSE(csr->getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(csr->getTemporaryAllocations().peekHead(), usedAllocationAndNotGpuCompleted); + EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(csr->getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(csr->getDeferredAllocations().peekHead(), usedAllocationAndNotGpuCompleted); // change task count so cleanup will not clear alloc in use usedAllocationAndNotGpuCompleted->updateTaskCount(csr->peekLatestFlushedTaskCount(), csr->getOsContext().getContextId()); @@ -2136,7 +2137,7 @@ HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenChec graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); - EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); + EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getDeferredAllocations().peekHead()); (*nonDefaultCsr->getTagAddress())++; // no need to call freeGraphicsAllocation } @@ -2156,9 +2157,11 @@ HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenC nonDefaultCommandStreamReceiver.latestFlushedTaskCount = notReadyTaskCount; graphicsAllocation->updateTaskCount(notReadyTaskCount, nonDefaultCommandStreamReceiver.getOsContext().getContextId()); + EXPECT_TRUE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getDeferredAllocations().peekIsEmpty()); EXPECT_TRUE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); - EXPECT_FALSE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getDeferredAllocations().peekIsEmpty()); + EXPECT_TRUE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); (*nonDefaultCommandStreamReceiver.getTagAddress())++; // no need to call freeGraphicsAllocation } diff --git a/opencl/test/unit_test/program/program_data_tests.cpp b/opencl/test/unit_test/program/program_data_tests.cpp index 8bebb8ecdb..fe8f178232 100644 --- a/opencl/test/unit_test/program/program_data_tests.cpp +++ b/opencl/test/unit_test/program/program_data_tests.cpp @@ -334,10 +334,12 @@ TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBein constantSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(csr.getDeferredAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; - EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(constantSurface, csr.getTemporaryAllocations().peekHead()); + EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(constantSurface, csr.getDeferredAllocations().peekHead()); } TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) { @@ -351,10 +353,12 @@ TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingD globalSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(csr.getDeferredAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; - EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(globalSurface, csr.getTemporaryAllocations().peekHead()); + EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(globalSurface, csr.getDeferredAllocations().peekHead()); } TEST_F(ProgramDataTest, GivenDeviceForcing32BitMessagesWhenConstAllocationIsPresentInProgramBinariesThen32BitStorageIsAllocated) { diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 981fe92422..d3dc99e329 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -667,8 +667,9 @@ HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledBu kernelAllocation->updateTaskCount(100, csr.getOsContext().getContextId()); *csr.getTagAddress() = 0; pProgram->cleanCurrentKernelInfo(rootDeviceIndex); - EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), kernelAllocation); + EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty()); + EXPECT_EQ(csr.getDeferredAllocations().peekHead(), kernelAllocation); EXPECT_TRUE(this->pDevice->getUltCommandStreamReceiver().requiresInstructionCacheFlush); } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 0636120cc8..f7208ff0c6 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -724,6 +724,7 @@ std::unique_lock CommandStreamReceiver::obtain } AllocationsList &CommandStreamReceiver::getTemporaryAllocations() { return internalAllocationStorage->getTemporaryAllocations(); } AllocationsList &CommandStreamReceiver::getAllocationsForReuse() { return internalAllocationStorage->getAllocationsForReuse(); } +AllocationsList &CommandStreamReceiver::getDeferredAllocations() { return internalAllocationStorage->getDeferredAllocations(); } bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) { std::unique_lock lock = this->obtainHostPtrSurfaceCreationLock(); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 164a33f7ce..e64ff28844 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -206,6 +206,7 @@ class CommandStreamReceiver { AllocationsList &getTemporaryAllocations(); AllocationsList &getAllocationsForReuse(); + AllocationsList &getDeferredAllocations(); InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); } MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush); virtual size_t getPreferredTagPoolSize() const; diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp index 3bda7377a5..9ea10ae9c7 100644 --- a/shared/source/memory_manager/internal_allocation_storage.cpp +++ b/shared/source/memory_manager/internal_allocation_storage.cpp @@ -14,9 +14,7 @@ namespace NEO { InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) - : commandStreamReceiver(commandStreamReceiver), - temporaryAllocations(TEMPORARY_ALLOCATION), - allocationsForReuse(REUSABLE_ALLOCATION){}; + : commandStreamReceiver(commandStreamReceiver){}; void InternalAllocationStorage::storeAllocation(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage) { uint32_t taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId()); @@ -34,13 +32,17 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptrupdateTaskCount(taskCount, commandStreamReceiver.getOsContext().getContextId()); allocationsList.pushTailOne(*gfxAllocation.release()); } void InternalAllocationStorage::cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) { - freeAllocationsList(waitTaskCount, (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse); + freeAllocationsList(waitTaskCount, allocationLists[allocationUsage]); + + if (allocationUsage == TEMPORARY_ALLOCATION) { + freeAllocationsList(waitTaskCount, allocationLists[DEFERRED_DEALLOCATION]); + } } void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList) { @@ -66,12 +68,12 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo } std::unique_ptr InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, AllocationType allocationType) { - auto allocation = allocationsForReuse.detachAllocation(requiredSize, nullptr, &commandStreamReceiver, allocationType); + auto allocation = allocationLists[REUSABLE_ALLOCATION].detachAllocation(requiredSize, nullptr, &commandStreamReceiver, allocationType); return allocation; } std::unique_ptr InternalAllocationStorage::obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType) { - auto allocation = temporaryAllocations.detachAllocation(requiredSize, requiredPtr, &commandStreamReceiver, allocationType); + auto allocation = allocationLists[TEMPORARY_ALLOCATION].detachAllocation(requiredSize, requiredPtr, &commandStreamReceiver, allocationType); return allocation; } diff --git a/shared/source/memory_manager/internal_allocation_storage.h b/shared/source/memory_manager/internal_allocation_storage.h index 4fd29dd1ab..cad4f5d8f9 100644 --- a/shared/source/memory_manager/internal_allocation_storage.h +++ b/shared/source/memory_manager/internal_allocation_storage.h @@ -9,6 +9,8 @@ #include "shared/source/helpers/common_types.h" #include "shared/source/memory_manager/allocations_list.h" +#include + namespace NEO { class InternalAllocationStorage { @@ -20,15 +22,15 @@ class InternalAllocationStorage { void storeAllocationWithTaskCount(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, uint32_t taskCount); std::unique_ptr obtainReusableAllocation(size_t requiredSize, AllocationType allocationType); std::unique_ptr obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType); - AllocationsList &getTemporaryAllocations() { return temporaryAllocations; } - AllocationsList &getAllocationsForReuse() { return allocationsForReuse; } + AllocationsList &getTemporaryAllocations() { return allocationLists[TEMPORARY_ALLOCATION]; } + AllocationsList &getAllocationsForReuse() { return allocationLists[REUSABLE_ALLOCATION]; } + AllocationsList &getDeferredAllocations() { return allocationLists[DEFERRED_DEALLOCATION]; } DeviceBitfield getDeviceBitfield() const; protected: void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList); CommandStreamReceiver &commandStreamReceiver; - AllocationsList temporaryAllocations; - AllocationsList allocationsForReuse; + std::array allocationLists = {AllocationsList(TEMPORARY_ALLOCATION), AllocationsList(REUSABLE_ALLOCATION), AllocationsList(DEFERRED_DEALLOCATION)}; }; } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 86baf1a228..c9f29e37e3 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -227,7 +227,7 @@ void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocatio if (gfxAllocation->isUsedByOsContext(osContextId) && allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) { engine.commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(gfxAllocation), - TEMPORARY_ALLOCATION); + DEFERRED_DEALLOCATION); return; } } diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index 00f028c389..8caa833245 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -40,7 +40,8 @@ class OsContext; enum AllocationUsage { TEMPORARY_ALLOCATION, - REUSABLE_ALLOCATION + REUSABLE_ALLOCATION, + DEFERRED_DEALLOCATION }; struct AlignedMallocRestrictions { diff --git a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp index 1c7a09a116..24db47022b 100644 --- a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp +++ b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp @@ -87,6 +87,10 @@ TEST_F(InternalAllocationStorageTest, whenAllocationIsStoredAsReusableButIsStill storage->cleanAllocationList(2u, REUSABLE_ALLOCATION); } +TEST_F(InternalAllocationStorageTest, whenGetDeferredAllocationsThenReturnDeferredAllocationsListFromInternalStorage) { + EXPECT_EQ(&csr->getDeferredAllocations(), &csr->getInternalAllocationStorage()->getDeferredAllocations()); +} + TEST_F(InternalAllocationStorageTest, whenAllocationIsStoredAsTemporaryAndIsStillUsedThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield});