From 43fd32b3ad6f9ebd9fd23b343cea6aed3d98ffe8 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Tue, 4 Dec 2018 15:11:29 +0100 Subject: [PATCH] Enable aggregating command buffers with multiple osContexts - Store inspectionId for each osContext in GraphicsAllocation - Pass osContextId to aggregateCommandBuffer and use it to select inspectionId Change-Id: I2c377ad7577a8c882cc89c1205430cb581c2c0d5 Signed-off-by: Maciej Dziuban --- .../command_stream_receiver_hw.inl | 4 +- .../command_stream/submissions_aggregator.cpp | 14 +-- .../command_stream/submissions_aggregator.h | 4 +- runtime/memory_manager/graphics_allocation.h | 4 +- .../submissions_aggregator_tests.cpp | 88 +++++++++++++++---- .../graphics_allocation_tests.cpp | 7 ++ unit_tests/mocks/mock_graphics_allocation.h | 9 +- unit_tests/mocks/mock_program.cpp | 2 +- 8 files changed, 102 insertions(+), 30 deletions(-) diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 0565d21191..b5cd628729 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -505,7 +505,7 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { auto &commandBufferList = this->submissionAggregator->peekCmdBufferList(); if (!commandBufferList.peekIsEmpty()) { - auto &device = commandBufferList.peekHead()->device; + const auto totalMemoryBudget = static_cast(commandBufferList.peekHead()->device.getDeviceInfo().globalMemSize / 2); ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; @@ -515,7 +515,7 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { while (!commandBufferList.peekIsEmpty()) { size_t totalUsedSize = 0u; - this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, (size_t)device.getDeviceInfo().globalMemSize * 5 / 10); + this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, osContext->getContextId()); auto primaryCmdBuffer = commandBufferList.removeFrontOne(); auto nextCommandBuffer = commandBufferList.peekHead(); auto currentBBendLocation = primaryCmdBuffer->batchBufferEndLocation; diff --git a/runtime/command_stream/submissions_aggregator.cpp b/runtime/command_stream/submissions_aggregator.cpp index cab47a294a..1e844cdb5c 100644 --- a/runtime/command_stream/submissions_aggregator.cpp +++ b/runtime/command_stream/submissions_aggregator.cpp @@ -13,7 +13,7 @@ void OCLRT::SubmissionAggregator::recordCommandBuffer(CommandBuffer *commandBuff this->cmdBuffers.pushTailOne(*commandBuffer); } -void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget) { +void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId) { auto primaryCommandBuffer = this->cmdBuffers.peekHead(); auto currentInspection = this->inspectionId; @@ -27,8 +27,8 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou //primary command buffers must fix to budget for (auto &graphicsAllocation : primaryCommandBuffer->surfaces) { - if (graphicsAllocation->inspectionId < currentInspection) { - graphicsAllocation->inspectionId = currentInspection; + if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { + graphicsAllocation->setInspectionId(currentInspection, osContextId); resourcePackage.push_back(graphicsAllocation); totalUsedSize += graphicsAllocation->getUnderlyingBufferSize(); } @@ -62,16 +62,16 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou if (graphicsAllocation == primaryBatchGraphicsAllocation) { continue; } - if (graphicsAllocation->inspectionId < currentInspection) { - graphicsAllocation->inspectionId = currentInspection; + if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { + graphicsAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(graphicsAllocation); nextCommandBufferNewResourcesSize += graphicsAllocation->getUnderlyingBufferSize(); } } if (nextCommandBuffer->batchBuffer.commandBufferAllocation && (nextCommandBuffer->batchBuffer.commandBufferAllocation != primaryBatchGraphicsAllocation)) { - if (nextCommandBuffer->batchBuffer.commandBufferAllocation->inspectionId < currentInspection) { - nextCommandBuffer->batchBuffer.commandBufferAllocation->inspectionId = currentInspection; + if (nextCommandBuffer->batchBuffer.commandBufferAllocation->getInspectionId(osContextId) < currentInspection) { + nextCommandBuffer->batchBuffer.commandBufferAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(nextCommandBuffer->batchBuffer.commandBufferAllocation); nextCommandBufferNewResourcesSize += nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBufferSize(); } diff --git a/runtime/command_stream/submissions_aggregator.h b/runtime/command_stream/submissions_aggregator.h index d317433df3..4b6243cbe1 100644 --- a/runtime/command_stream/submissions_aggregator.h +++ b/runtime/command_stream/submissions_aggregator.h @@ -43,7 +43,7 @@ struct BatchBuffer { }; struct CommandBuffer : public IDNode { - CommandBuffer(Device &); + CommandBuffer(Device &device); ResidencyContainer surfaces; BatchBuffer batchBuffer; void *batchBufferEndLocation = nullptr; @@ -62,7 +62,7 @@ using ResourcePackage = StackVec; class SubmissionAggregator { public: void recordCommandBuffer(CommandBuffer *commandBuffer); - void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget); + void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId); CommandBufferList &peekCmdBufferList() { return cmdBuffers; } protected: diff --git a/runtime/memory_manager/graphics_allocation.h b/runtime/memory_manager/graphics_allocation.h index cafb66004b..676e8ad956 100644 --- a/runtime/memory_manager/graphics_allocation.h +++ b/runtime/memory_manager/graphics_allocation.h @@ -123,6 +123,8 @@ class GraphicsAllocation : public IDNode { void updateTaskCount(uint32_t newTaskCount, uint32_t contextId); uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; } void resetTaskCount(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); } + uint32_t getInspectionId(uint32_t contextId) { return usageInfos[contextId].inspectionId; } + void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; } void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) { usageInfos[contextId].residencyTaskCount = newTaskCount; } uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; } @@ -135,6 +137,7 @@ class GraphicsAllocation : public IDNode { struct UsageInfo { uint32_t taskCount = objectNotUsed; uint32_t residencyTaskCount = objectNotResident; + uint32_t inspectionId = 0u; }; //this variable can only be modified from SubmissionAggregator @@ -148,7 +151,6 @@ class GraphicsAllocation : public IDNode { uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources bool evictable = true; MemoryPool::Type memoryPool = MemoryPool::MemoryNull; - uint32_t inspectionId = 0; AllocationType allocationType = AllocationType::UNKNOWN; bool aubWritable = true; bool allocDumpable = false; diff --git a/unit_tests/command_stream/submissions_aggregator_tests.cpp b/unit_tests/command_stream/submissions_aggregator_tests.cpp index 759029c521..9180930a2c 100644 --- a/unit_tests/command_stream/submissions_aggregator_tests.cpp +++ b/unit_tests/command_stream/submissions_aggregator_tests.cpp @@ -72,13 +72,13 @@ TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThen size_t totalMemoryBudget = -1; ResourcePackage resourcePackage; - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(0u, totalUsedSize); submissionsAggregator.recordCommandBuffer(cmdBuffer); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(15u, totalUsedSize); totalUsedSize = 0; @@ -92,7 +92,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThen EXPECT_EQ(5u, cmdBuffer->surfaces.size()); EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; @@ -153,7 +153,7 @@ TEST(SubmissionsAggregator, givenSubmissionAggregatorWhenThreeCommandBuffersAreS EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); EXPECT_EQ(2u, cmdBuffer3->surfaces.size()); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 3 and 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; @@ -207,7 +207,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenTheyAreAggreagateWith submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1, comand buffer 3 becomes command buffer 2 EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); @@ -263,7 +263,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMult submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffers not aggregated due to too low limit EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); @@ -275,7 +275,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMult resourcePackage.clear(); totalUsedSize = 0; - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //all cmd buffers are merged to 1 EXPECT_EQ(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); @@ -313,7 +313,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWithDifferentGraphicsAllo submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(4u, resourcePackage.size()); EXPECT_EQ(15u, totalUsedSize); @@ -347,7 +347,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsFirstOnResou submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(3u, resourcePackage.size()); @@ -379,7 +379,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsTheFirstComm submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(2u, resourcePackage.size()); @@ -408,7 +408,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentCoherencySettin ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); @@ -437,7 +437,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentThrottleSetting ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); @@ -466,7 +466,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentPrioritySetting ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; - submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); @@ -479,14 +479,72 @@ TEST(SubmissionsAggregator, dontAllocateFlushStamp) { EXPECT_EQ(nullptr, cmdBuffer.flushStamp->getStampReference()); } +TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenUseInspectionIdCorrespondingWithOsContextId) { + SubmissionAggregator submissionsAggregator; + ResourcePackage resourcePackage; + const auto totalMemoryBudget = 3u; + size_t totalUsedSize = 0; + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); + CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); + CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); + + MockGraphicsAllocation alloc0(nullptr, 1); + MockGraphicsAllocation alloc1(nullptr, 1); + MockGraphicsAllocation alloc2(nullptr, 1); + MockGraphicsAllocation alloc3(nullptr, 1); + + cmdBuffer0->surfaces.push_back(&alloc0); + cmdBuffer0->surfaces.push_back(&alloc1); + cmdBuffer1->surfaces.push_back(&alloc2); + cmdBuffer1->surfaces.push_back(&alloc3); + + submissionsAggregator.recordCommandBuffer(cmdBuffer0); + submissionsAggregator.recordCommandBuffer(cmdBuffer1); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 3u); + EXPECT_EQ(1u, alloc0.getInspectionId(3u)); + EXPECT_EQ(1u, alloc1.getInspectionId(3u)); + EXPECT_EQ(1u, alloc2.getInspectionId(3u)); + EXPECT_EQ(1u, alloc3.getInspectionId(3u)); +} + +TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenDoNotUpdateInspectionIdsOfOtherContexts) { + SubmissionAggregator submissionsAggregator; + ResourcePackage resourcePackage; + const auto totalMemoryBudget = 2u; + size_t totalUsedSize = 0; + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); + CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); + CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); + + MockGraphicsAllocation alloc0(nullptr, 1); + MockGraphicsAllocation alloc1(nullptr, 1); + + cmdBuffer0->surfaces.push_back(&alloc0); + cmdBuffer0->surfaces.push_back(&alloc1); + + submissionsAggregator.recordCommandBuffer(cmdBuffer0); + submissionsAggregator.recordCommandBuffer(cmdBuffer1); + submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 3u); + + for (auto osContextId = 0u; osContextId < alloc1.usageInfos.size(); osContextId++) { + if (osContextId != 3u) { + EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); + } + } + for (auto osContextId = 0u; osContextId < alloc0.usageInfos.size(); osContextId++) { + if (osContextId != 3u) { + EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); + } + } +} + struct SubmissionsAggregatorTests : public ::testing::Test { void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); context.reset(new MockContext(device.get())); } - template - void overrideCsr(T *newCsr) { + void overrideCsr(CommandStreamReceiver *newCsr) { device->resetCommandStreamReceiver(newCsr); newCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); } diff --git a/unit_tests/memory_manager/graphics_allocation_tests.cpp b/unit_tests/memory_manager/graphics_allocation_tests.cpp index 5c2102a8ab..d49e4c234f 100644 --- a/unit_tests/memory_manager/graphics_allocation_tests.cpp +++ b/unit_tests/memory_manager/graphics_allocation_tests.cpp @@ -10,6 +10,13 @@ using namespace OCLRT; +TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenAllInspectionIdsAreSetToZero) { + MockGraphicsAllocation graphicsAllocation(nullptr, 0u, 0u, maxOsContextCount, true); + for (auto i = 0u; i < maxOsContextCount; i++) { + EXPECT_EQ(0u, graphicsAllocation.getInspectionId(i)); + } +} + TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenTaskCountsAreInitializedProperly) { GraphicsAllocation graphicsAllocation1(nullptr, 0u, 0u, 0u, maxOsContextCount, true); GraphicsAllocation graphicsAllocation2(nullptr, 0u, 0u, maxOsContextCount, true); diff --git a/unit_tests/mocks/mock_graphics_allocation.h b/unit_tests/mocks/mock_graphics_allocation.h index 2631da2dc6..0f3266d6c0 100644 --- a/unit_tests/mocks/mock_graphics_allocation.h +++ b/unit_tests/mocks/mock_graphics_allocation.h @@ -14,14 +14,19 @@ class MockGraphicsAllocation : public GraphicsAllocation { using GraphicsAllocation::GraphicsAllocation; using GraphicsAllocation::objectNotResident; using GraphicsAllocation::objectNotUsed; + using GraphicsAllocation::usageInfos; MockGraphicsAllocation() : MockGraphicsAllocation(true) {} MockGraphicsAllocation(bool shareable) : GraphicsAllocation(nullptr, 0u, 0, maxOsContextCount, shareable) {} MockGraphicsAllocation(void *buffer, size_t sizeIn) : GraphicsAllocation(buffer, castToUint64(buffer), 0llu, sizeIn, maxOsContextCount, false) { } - void resetInspectionId() { - this->inspectionId = 0; + + void resetInspectionIds() { + for (auto &usageInfo : usageInfos) { + usageInfo.inspectionId = 0u; + } } + void overrideMemoryPool(MemoryPool::Type pool) { this->memoryPool = pool; } diff --git a/unit_tests/mocks/mock_program.cpp b/unit_tests/mocks/mock_program.cpp index b06a236f5e..a577f8b447 100644 --- a/unit_tests/mocks/mock_program.cpp +++ b/unit_tests/mocks/mock_program.cpp @@ -36,7 +36,7 @@ void GlobalMockSipProgram::resetAllocationState() { for (uint32_t index = 0u; index < maxOsContextCount; index++) { this->kernelInfoArray[0]->kernelAllocation->resetResidencyTaskCount(index); } - static_cast(this->kernelInfoArray[0]->kernelAllocation)->resetInspectionId(); + static_cast(this->kernelInfoArray[0]->kernelAllocation)->resetInspectionIds(); } void GlobalMockSipProgram::initSipProgram() { cl_int retVal = 0;