diff --git a/opencl/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp b/opencl/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp index 28b8254959..8ccbbdb484 100644 --- a/opencl/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp +++ b/opencl/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp @@ -297,3 +297,25 @@ HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachi memoryManager->freeGraphicsMemory(allocationReusable.release()); } +TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasBeenAssignedThenAllocIsRemoved) { + auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); + uint32_t expectedTaskCount = 10u; + *csr->getTagAddress() = expectedTaskCount; + allocation->updateTaskCount(expectedTaskCount, csr->getOsContext().getContextId()); + allocation->hostPtrTaskCountAssignment = 0; + storage->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); + storage->cleanAllocationList(expectedTaskCount, TEMPORARY_ALLOCATION); + EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); +} + +TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasNotBeenAssignedThenAllocIsNotRemoved) { + auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); + uint32_t expectedTaskCount = 10u; + *csr->getTagAddress() = expectedTaskCount; + allocation->updateTaskCount(expectedTaskCount, csr->getOsContext().getContextId()); + allocation->hostPtrTaskCountAssignment = 1; + storage->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); + storage->cleanAllocationList(expectedTaskCount, TEMPORARY_ALLOCATION); + EXPECT_FALSE(csr->getTemporaryAllocations().peekIsEmpty()); + allocation->hostPtrTaskCountAssignment = 0; +} \ No newline at end of file diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index fc13c9d607..9dd5e38a35 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -734,7 +734,8 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa if (allocation == nullptr) { return false; } - allocation->updateTaskCount(CompletionStamp::notReady, osContext->getContextId()); + allocation->hostPtrTaskCountAssignment++; + allocation->updateTaskCount(0u, osContext->getContextId()); surface.setAllocation(allocation.get()); internalAllocationStorage->storeAllocation(std::move(allocation), TEMPORARY_ALLOCATION); return true; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 873a385b69..2673d8f8af 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1072,7 +1072,8 @@ uint32_t CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesCo } blitProperties.csrDependencies.makeResident(*this); - + blitProperties.srcAllocation->prepareHostPtrForResidency(this); + blitProperties.dstAllocation->prepareHostPtrForResidency(this); makeResident(*blitProperties.srcAllocation); makeResident(*blitProperties.dstAllocation); if (blitProperties.clearColorAllocation) { diff --git a/shared/source/memory_manager/graphics_allocation.cpp b/shared/source/memory_manager/graphics_allocation.cpp index 62821d6d54..825a7da816 100644 --- a/shared/source/memory_manager/graphics_allocation.cpp +++ b/shared/source/memory_manager/graphics_allocation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "graphics_allocation.h" +#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" @@ -102,6 +103,17 @@ bool GraphicsAllocation::isTbxWritable(uint32_t banks) const { return isAnyBitSet(aubInfo.tbxWritable, banks); } +void GraphicsAllocation::prepareHostPtrForResidency(CommandStreamReceiver *csr) { + if (hostPtrTaskCountAssignment > 0) { + auto allocTaskCount = getTaskCount(csr->getOsContext().getContextId()); + auto currentTaskCount = *csr->getTagAddress() + 1; + if (currentTaskCount > allocTaskCount) { + updateTaskCount(currentTaskCount, csr->getOsContext().getContextId()); + hostPtrTaskCountAssignment--; + } + } +} + constexpr uint32_t GraphicsAllocation::objectNotUsed; constexpr uint32_t GraphicsAllocation::objectNotResident; constexpr uint32_t GraphicsAllocation::objectAlwaysResident; diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 02fe659f5b..394f1499b1 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -43,6 +43,7 @@ constexpr auto nonSharedResource = 0u; class Gmm; class MemoryManager; +class CommandStreamReceiver; struct AubInfo { uint32_t aubWritable = std::numeric_limits::max(); @@ -155,8 +156,8 @@ class GraphicsAllocation : public IDNode { bool isUsed() const { return registeredContextsNum > 0; } bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; } bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); } - void updateTaskCount(uint32_t newTaskCount, uint32_t contextId); - uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; } + MOCKABLE_VIRTUAL void updateTaskCount(uint32_t newTaskCount, uint32_t contextId); + MOCKABLE_VIRTUAL uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; } void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); } uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; } void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; } @@ -215,6 +216,8 @@ class GraphicsAllocation : public IDNode { this->reservedAddressRangeInfo.rangeSize = size; } + void prepareHostPtrForResidency(CommandStreamReceiver *csr); + Gmm *getDefaultGmm() const { return getGmm(0u); } @@ -252,6 +255,7 @@ class GraphicsAllocation : public IDNode { constexpr static uint32_t objectNotResident = std::numeric_limits::max(); constexpr static uint32_t objectNotUsed = std::numeric_limits::max(); constexpr static uint32_t objectAlwaysResident = std::numeric_limits::max() - 1; + std::atomic hostPtrTaskCountAssignment{0}; protected: struct UsageInfo { diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp index 66503e0565..26434fca75 100644 --- a/shared/source/memory_manager/internal_allocation_storage.cpp +++ b/shared/source/memory_manager/internal_allocation_storage.cpp @@ -52,7 +52,7 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo IDList allocationsLeft; while (curr != nullptr) { auto *next = curr->next; - if (curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount) { + if (curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount && curr->hostPtrTaskCountAssignment == 0) { memoryManager->freeGraphicsMemory(curr); } else { allocationsLeft.pushTailOne(*curr); diff --git a/shared/source/memory_manager/surface.h b/shared/source/memory_manager/surface.h index 0d159c1e76..ee2203cdcb 100644 --- a/shared/source/memory_manager/surface.h +++ b/shared/source/memory_manager/surface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,6 +9,7 @@ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/source/os_interface/os_context.h" namespace NEO { @@ -49,6 +50,7 @@ class HostPtrSurface : public Surface { void makeResident(CommandStreamReceiver &csr) override { DEBUG_BREAK_IF(!gfxAllocation); + gfxAllocation->prepareHostPtrForResidency(&csr); csr.makeResidentHostPtrAllocation(gfxAllocation); } Surface *duplicate() override { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index bd58f10a11..bb8dc0498d 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -318,7 +318,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { createAllocationForHostSurfaceCalled++; cpuCopyForHostPtrSurfaceAllowed = surface.peekIsPtrCopyAllowed(); - return BaseClass::createAllocationForHostSurface(surface, requiresL3Flush); + auto status = BaseClass::createAllocationForHostSurface(surface, requiresL3Flush); + if (status) + surface.getAllocation()->hostPtrTaskCountAssignment--; + return status; } void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) override { diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 14d3d20594..f71ae1ecbc 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -15,6 +15,7 @@ #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" @@ -151,7 +152,12 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { ++hostPtrSurfaceCreationMutexLockCount; return CommandStreamReceiver::obtainHostPtrSurfaceCreationLock(); } - + bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { + bool status = CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush); + if (status) + surface.getAllocation()->hostPtrTaskCountAssignment--; + return status; + } void postInitFlagsSetup() override {} static constexpr size_t tagSize = 256; diff --git a/shared/test/common/mocks/mock_graphics_allocation.h b/shared/test/common/mocks/mock_graphics_allocation.h index 39eaf52e49..57c7abb277 100644 --- a/shared/test/common/mocks/mock_graphics_allocation.h +++ b/shared/test/common/mocks/mock_graphics_allocation.h @@ -51,6 +51,20 @@ class MockGraphicsAllocation : public MemoryAllocation { } }; +class MockGraphicsAllocationTaskCount : public MockGraphicsAllocation { + public: + uint32_t getTaskCount(uint32_t contextId) const override { + getTaskCountCalleedTimes++; + return MockGraphicsAllocation::getTaskCount(contextId); + } + void updateTaskCount(uint32_t newTaskCount, uint32_t contextId) override { + updateTaskCountCalleedTimes++; + MockGraphicsAllocation::updateTaskCount(newTaskCount, contextId); + } + static uint32_t getTaskCountCalleedTimes; + uint32_t updateTaskCountCalleedTimes = 0; +}; + namespace GraphicsAllocationHelper { static inline MultiGraphicsAllocation toMultiGraphicsAllocation(GraphicsAllocation *graphicsAllocation) { diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a3d4ac3a6c..28dbdd7e9d 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1502,6 +1502,50 @@ TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAll EXPECT_EQ(allocationPtr, hostSurfaceAllocationPtr); } +class MockCommandStreamReceiverHostPtrCreate : public MockCommandStreamReceiver { + public: + MockCommandStreamReceiverHostPtrCreate(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) + : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} + bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { + return CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush); + } +}; +TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAllocationForHostSurfaceThenHostPtrTaskCountAssignmentWillIncrease) { + auto mockCsr = std::make_unique(executionEnvironment, 0u, device->getDeviceBitfield()); + mockCsr->internalAllocationStorage = std::make_unique(*mockCsr.get()); + mockCsr->osContext = &commandStreamReceiver->getOsContext(); + auto hostPtr = reinterpret_cast(0x1234); + size_t size = 100; + auto temporaryAllocation = std::make_unique(0, + AllocationType::EXTERNAL_HOST_PTR, hostPtr, size, 0, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount); + auto allocationPtr = temporaryAllocation.get(); + temporaryAllocation->updateTaskCount(0u, 0u); + mockCsr->getInternalAllocationStorage()->storeAllocation(std::move(temporaryAllocation), TEMPORARY_ALLOCATION); + *mockCsr->getTagAddress() = 1u; + HostPtrSurface hostSurface(hostPtr, size); + + uint32_t valueBefore = allocationPtr->hostPtrTaskCountAssignment; + mockCsr->createAllocationForHostSurface(hostSurface, false); + EXPECT_EQ(valueBefore + 1, hostSurface.getAllocation()->hostPtrTaskCountAssignment); + allocationPtr->hostPtrTaskCountAssignment--; +} + +TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAllocationForHostSurfaceThenAllocTaskCountEqualZero) { + auto hostPtr = reinterpret_cast(0x1234); + size_t size = 100; + auto temporaryAllocation = std::make_unique(0, + AllocationType::EXTERNAL_HOST_PTR, hostPtr, size, 0, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount); + auto allocationPtr = temporaryAllocation.get(); + temporaryAllocation->updateTaskCount(10u, 0u); + commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::move(temporaryAllocation), TEMPORARY_ALLOCATION); + *commandStreamReceiver->getTagAddress() = 1u; + HostPtrSurface hostSurface(hostPtr, size); + + EXPECT_EQ(allocationPtr->getTaskCount(0u), 10u); + commandStreamReceiver->createAllocationForHostSurface(hostSurface, false); + EXPECT_EQ(allocationPtr->getTaskCount(0u), 0u); +} + TEST_F(CreateAllocationForHostSurfaceTest, whenCreatingAllocationFromHostPtrSurfaceThenLockMutex) { const char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; size_t size = sizeof(memory); diff --git a/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp b/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp index bf6f9f0af6..c542479fb1 100644 --- a/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp +++ b/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/test/common/mocks/mock_aub_csr.h" +#include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" @@ -294,7 +295,7 @@ HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationThatHasPageTablesClonin EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); - //modify non default bank + // modify non default bank graphicsAllocation.setAubWritable(false, 0x2); EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); @@ -399,3 +400,68 @@ HWTEST_F(GraphicsAllocationTests, givenMultiStorageGraphicsAllocationWhenTbxWrit EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b101)); EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1010)); } + +uint32_t MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0; + +TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenAssignedTaskCountEqualZeroThenPrepareForResidencyDoeNotCallGetTaskCount) { + MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0; + MockGraphicsAllocationTaskCount graphicsAllocation; + graphicsAllocation.hostPtrTaskCountAssignment = 0; + graphicsAllocation.prepareHostPtrForResidency(nullptr); + EXPECT_EQ(MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes, 0u); + MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0; +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenAssignedTaskCountAbovelZeroThenPrepareForResidencyGetTaskCountWasCalled) { + executionEnvironment.initializeMemoryManager(); + auto osContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor())); + MockCommandStreamReceiver csr(executionEnvironment, 0, 1); + csr.osContext = osContext.get(); + MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0; + MockGraphicsAllocationTaskCount graphicsAllocation; + graphicsAllocation.hostPtrTaskCountAssignment = 1; + graphicsAllocation.prepareHostPtrForResidency(&csr); + EXPECT_EQ(MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes, 1u); + MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0; +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountHigherThanInCsrThenUpdateTaskCountWasNotCalled) { + executionEnvironment.initializeMemoryManager(); + auto osContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor())); + MockCommandStreamReceiver csr(executionEnvironment, 0, 1); + csr.osContext = osContext.get(); + MockGraphicsAllocationTaskCount graphicsAllocation; + graphicsAllocation.updateTaskCount(10u, 0u); + *csr.getTagAddress() = 5; + graphicsAllocation.hostPtrTaskCountAssignment = 1; + auto calledTimesBefore = graphicsAllocation.updateTaskCountCalleedTimes; + graphicsAllocation.prepareHostPtrForResidency(&csr); + EXPECT_EQ(graphicsAllocation.updateTaskCountCalleedTimes, calledTimesBefore); +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountLowerThanInCsrThenUpdateTaskCountWasCalled) { + executionEnvironment.initializeMemoryManager(); + auto osContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor())); + MockCommandStreamReceiver csr(executionEnvironment, 0, 1); + csr.osContext = osContext.get(); + MockGraphicsAllocationTaskCount graphicsAllocation; + graphicsAllocation.updateTaskCount(5u, 0u); + *csr.getTagAddress() = 10; + graphicsAllocation.hostPtrTaskCountAssignment = 1; + auto calledTimesBefore = graphicsAllocation.updateTaskCountCalleedTimes; + graphicsAllocation.prepareHostPtrForResidency(&csr); + EXPECT_EQ(graphicsAllocation.updateTaskCountCalleedTimes, calledTimesBefore + 1u); +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountLowerThanInCsrThenAssignmentCountIsDecremented) { + executionEnvironment.initializeMemoryManager(); + auto osContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor())); + MockCommandStreamReceiver csr(executionEnvironment, 0, 1); + csr.osContext = osContext.get(); + MockGraphicsAllocationTaskCount graphicsAllocation; + graphicsAllocation.updateTaskCount(5u, 0u); + *csr.getTagAddress() = 10; + graphicsAllocation.hostPtrTaskCountAssignment = 1; + graphicsAllocation.prepareHostPtrForResidency(&csr); + EXPECT_EQ(graphicsAllocation.hostPtrTaskCountAssignment, 0u); +} \ No newline at end of file