Enable aggregating command buffers with multiple osContexts

- Store inspectionId for each osContext in GraphicsAllocation
- Pass osContextId to aggregateCommandBuffer and use it to select inspectionId

Change-Id: I2c377ad7577a8c882cc89c1205430cb581c2c0d5
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban 2018-12-04 15:11:29 +01:00 committed by sys_ocldev
parent a35e3b792d
commit 43fd32b3ad
8 changed files with 102 additions and 30 deletions

View File

@ -505,7 +505,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
auto &commandBufferList = this->submissionAggregator->peekCmdBufferList();
if (!commandBufferList.peekIsEmpty()) {
auto &device = commandBufferList.peekHead()->device;
const auto totalMemoryBudget = static_cast<size_t>(commandBufferList.peekHead()->device.getDeviceInfo().globalMemSize / 2);
ResidencyContainer surfacesForSubmit;
ResourcePackage resourcePackage;
@ -515,7 +515,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
while (!commandBufferList.peekIsEmpty()) {
size_t totalUsedSize = 0u;
this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, (size_t)device.getDeviceInfo().globalMemSize * 5 / 10);
this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, osContext->getContextId());
auto primaryCmdBuffer = commandBufferList.removeFrontOne();
auto nextCommandBuffer = commandBufferList.peekHead();
auto currentBBendLocation = primaryCmdBuffer->batchBufferEndLocation;

View File

@ -13,7 +13,7 @@ void OCLRT::SubmissionAggregator::recordCommandBuffer(CommandBuffer *commandBuff
this->cmdBuffers.pushTailOne(*commandBuffer);
}
void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget) {
void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId) {
auto primaryCommandBuffer = this->cmdBuffers.peekHead();
auto currentInspection = this->inspectionId;
@ -27,8 +27,8 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou
//primary command buffers must fix to budget
for (auto &graphicsAllocation : primaryCommandBuffer->surfaces) {
if (graphicsAllocation->inspectionId < currentInspection) {
graphicsAllocation->inspectionId = currentInspection;
if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) {
graphicsAllocation->setInspectionId(currentInspection, osContextId);
resourcePackage.push_back(graphicsAllocation);
totalUsedSize += graphicsAllocation->getUnderlyingBufferSize();
}
@ -62,16 +62,16 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou
if (graphicsAllocation == primaryBatchGraphicsAllocation) {
continue;
}
if (graphicsAllocation->inspectionId < currentInspection) {
graphicsAllocation->inspectionId = currentInspection;
if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) {
graphicsAllocation->setInspectionId(currentInspection, osContextId);
newResources.push_back(graphicsAllocation);
nextCommandBufferNewResourcesSize += graphicsAllocation->getUnderlyingBufferSize();
}
}
if (nextCommandBuffer->batchBuffer.commandBufferAllocation && (nextCommandBuffer->batchBuffer.commandBufferAllocation != primaryBatchGraphicsAllocation)) {
if (nextCommandBuffer->batchBuffer.commandBufferAllocation->inspectionId < currentInspection) {
nextCommandBuffer->batchBuffer.commandBufferAllocation->inspectionId = currentInspection;
if (nextCommandBuffer->batchBuffer.commandBufferAllocation->getInspectionId(osContextId) < currentInspection) {
nextCommandBuffer->batchBuffer.commandBufferAllocation->setInspectionId(currentInspection, osContextId);
newResources.push_back(nextCommandBuffer->batchBuffer.commandBufferAllocation);
nextCommandBufferNewResourcesSize += nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBufferSize();
}

View File

@ -43,7 +43,7 @@ struct BatchBuffer {
};
struct CommandBuffer : public IDNode<CommandBuffer> {
CommandBuffer(Device &);
CommandBuffer(Device &device);
ResidencyContainer surfaces;
BatchBuffer batchBuffer;
void *batchBufferEndLocation = nullptr;
@ -62,7 +62,7 @@ using ResourcePackage = StackVec<GraphicsAllocation *, 128>;
class SubmissionAggregator {
public:
void recordCommandBuffer(CommandBuffer *commandBuffer);
void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget);
void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId);
CommandBufferList &peekCmdBufferList() { return cmdBuffers; }
protected:

View File

@ -123,6 +123,8 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
void resetTaskCount(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
uint32_t getInspectionId(uint32_t contextId) { return usageInfos[contextId].inspectionId; }
void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) { usageInfos[contextId].residencyTaskCount = newTaskCount; }
uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
@ -135,6 +137,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
struct UsageInfo {
uint32_t taskCount = objectNotUsed;
uint32_t residencyTaskCount = objectNotResident;
uint32_t inspectionId = 0u;
};
//this variable can only be modified from SubmissionAggregator
@ -148,7 +151,6 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
bool evictable = true;
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
uint32_t inspectionId = 0;
AllocationType allocationType = AllocationType::UNKNOWN;
bool aubWritable = true;
bool allocDumpable = false;

View File

@ -72,13 +72,13 @@ TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThen
size_t totalMemoryBudget = -1;
ResourcePackage resourcePackage;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(0u, totalUsedSize);
submissionsAggregator.recordCommandBuffer(cmdBuffer);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(15u, totalUsedSize);
totalUsedSize = 0;
@ -92,7 +92,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThen
EXPECT_EQ(5u, cmdBuffer->surfaces.size());
EXPECT_EQ(4u, cmdBuffer2->surfaces.size());
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//command buffer 2 is aggregated to command buffer 1
auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId;
@ -153,7 +153,7 @@ TEST(SubmissionsAggregator, givenSubmissionAggregatorWhenThreeCommandBuffersAreS
EXPECT_EQ(4u, cmdBuffer2->surfaces.size());
EXPECT_EQ(2u, cmdBuffer3->surfaces.size());
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//command buffer 3 and 2 is aggregated to command buffer 1
auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId;
@ -207,7 +207,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenTheyAreAggreagateWith
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
submissionsAggregator.recordCommandBuffer(cmdBuffer3);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//command buffer 2 is aggregated to command buffer 1, comand buffer 3 becomes command buffer 2
EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer);
@ -263,7 +263,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMult
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
submissionsAggregator.recordCommandBuffer(cmdBuffer3);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//command buffers not aggregated due to too low limit
EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer);
@ -275,7 +275,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMult
resourcePackage.clear();
totalUsedSize = 0;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//all cmd buffers are merged to 1
EXPECT_EQ(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId);
EXPECT_EQ(cmdBuffer->inspectionId, cmdBuffer2->inspectionId);
@ -313,7 +313,7 @@ TEST(SubmissionsAggregator, givenMultipleCommandBuffersWithDifferentGraphicsAllo
submissionsAggregator.recordCommandBuffer(cmdBuffer);
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(4u, resourcePackage.size());
EXPECT_EQ(15u, totalUsedSize);
@ -347,7 +347,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsFirstOnResou
submissionsAggregator.recordCommandBuffer(cmdBuffer);
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//resource pack shuold have 3 surfaces
EXPECT_EQ(3u, resourcePackage.size());
@ -379,7 +379,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsTheFirstComm
submissionsAggregator.recordCommandBuffer(cmdBuffer);
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
//resource pack shuold have 3 surfaces
EXPECT_EQ(2u, resourcePackage.size());
@ -408,7 +408,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentCoherencySettin
ResourcePackage resourcePackage;
size_t totalUsedSize = 0;
size_t totalMemoryBudget = 200;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(1u, totalUsedSize);
EXPECT_EQ(1u, resourcePackage.size());
EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId);
@ -437,7 +437,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentThrottleSetting
ResourcePackage resourcePackage;
size_t totalUsedSize = 0;
size_t totalMemoryBudget = 200;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(1u, totalUsedSize);
EXPECT_EQ(1u, resourcePackage.size());
EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId);
@ -466,7 +466,7 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentPrioritySetting
ResourcePackage resourcePackage;
size_t totalUsedSize = 0;
size_t totalMemoryBudget = 200;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u);
EXPECT_EQ(1u, totalUsedSize);
EXPECT_EQ(1u, resourcePackage.size());
EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId);
@ -479,14 +479,72 @@ TEST(SubmissionsAggregator, dontAllocateFlushStamp) {
EXPECT_EQ(nullptr, cmdBuffer.flushStamp->getStampReference());
}
TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenUseInspectionIdCorrespondingWithOsContextId) {
SubmissionAggregator submissionsAggregator;
ResourcePackage resourcePackage;
const auto totalMemoryBudget = 3u;
size_t totalUsedSize = 0;
std::unique_ptr<Device> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
CommandBuffer *cmdBuffer0 = new CommandBuffer(*device);
CommandBuffer *cmdBuffer1 = new CommandBuffer(*device);
MockGraphicsAllocation alloc0(nullptr, 1);
MockGraphicsAllocation alloc1(nullptr, 1);
MockGraphicsAllocation alloc2(nullptr, 1);
MockGraphicsAllocation alloc3(nullptr, 1);
cmdBuffer0->surfaces.push_back(&alloc0);
cmdBuffer0->surfaces.push_back(&alloc1);
cmdBuffer1->surfaces.push_back(&alloc2);
cmdBuffer1->surfaces.push_back(&alloc3);
submissionsAggregator.recordCommandBuffer(cmdBuffer0);
submissionsAggregator.recordCommandBuffer(cmdBuffer1);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 3u);
EXPECT_EQ(1u, alloc0.getInspectionId(3u));
EXPECT_EQ(1u, alloc1.getInspectionId(3u));
EXPECT_EQ(1u, alloc2.getInspectionId(3u));
EXPECT_EQ(1u, alloc3.getInspectionId(3u));
}
TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenDoNotUpdateInspectionIdsOfOtherContexts) {
SubmissionAggregator submissionsAggregator;
ResourcePackage resourcePackage;
const auto totalMemoryBudget = 2u;
size_t totalUsedSize = 0;
std::unique_ptr<Device> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
CommandBuffer *cmdBuffer0 = new CommandBuffer(*device);
CommandBuffer *cmdBuffer1 = new CommandBuffer(*device);
MockGraphicsAllocation alloc0(nullptr, 1);
MockGraphicsAllocation alloc1(nullptr, 1);
cmdBuffer0->surfaces.push_back(&alloc0);
cmdBuffer0->surfaces.push_back(&alloc1);
submissionsAggregator.recordCommandBuffer(cmdBuffer0);
submissionsAggregator.recordCommandBuffer(cmdBuffer1);
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 3u);
for (auto osContextId = 0u; osContextId < alloc1.usageInfos.size(); osContextId++) {
if (osContextId != 3u) {
EXPECT_EQ(0u, alloc0.getInspectionId(osContextId));
}
}
for (auto osContextId = 0u; osContextId < alloc0.usageInfos.size(); osContextId++) {
if (osContextId != 3u) {
EXPECT_EQ(0u, alloc0.getInspectionId(osContextId));
}
}
}
struct SubmissionsAggregatorTests : public ::testing::Test {
void SetUp() override {
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
context.reset(new MockContext(device.get()));
}
template <typename T>
void overrideCsr(T *newCsr) {
void overrideCsr(CommandStreamReceiver *newCsr) {
device->resetCommandStreamReceiver(newCsr);
newCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
}

View File

@ -10,6 +10,13 @@
using namespace OCLRT;
TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenAllInspectionIdsAreSetToZero) {
MockGraphicsAllocation graphicsAllocation(nullptr, 0u, 0u, maxOsContextCount, true);
for (auto i = 0u; i < maxOsContextCount; i++) {
EXPECT_EQ(0u, graphicsAllocation.getInspectionId(i));
}
}
TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenTaskCountsAreInitializedProperly) {
GraphicsAllocation graphicsAllocation1(nullptr, 0u, 0u, 0u, maxOsContextCount, true);
GraphicsAllocation graphicsAllocation2(nullptr, 0u, 0u, maxOsContextCount, true);

View File

@ -14,14 +14,19 @@ class MockGraphicsAllocation : public GraphicsAllocation {
using GraphicsAllocation::GraphicsAllocation;
using GraphicsAllocation::objectNotResident;
using GraphicsAllocation::objectNotUsed;
using GraphicsAllocation::usageInfos;
MockGraphicsAllocation() : MockGraphicsAllocation(true) {}
MockGraphicsAllocation(bool shareable) : GraphicsAllocation(nullptr, 0u, 0, maxOsContextCount, shareable) {}
MockGraphicsAllocation(void *buffer, size_t sizeIn) : GraphicsAllocation(buffer, castToUint64(buffer), 0llu, sizeIn, maxOsContextCount, false) {
}
void resetInspectionId() {
this->inspectionId = 0;
void resetInspectionIds() {
for (auto &usageInfo : usageInfos) {
usageInfo.inspectionId = 0u;
}
}
void overrideMemoryPool(MemoryPool::Type pool) {
this->memoryPool = pool;
}

View File

@ -36,7 +36,7 @@ void GlobalMockSipProgram::resetAllocationState() {
for (uint32_t index = 0u; index < maxOsContextCount; index++) {
this->kernelInfoArray[0]->kernelAllocation->resetResidencyTaskCount(index);
}
static_cast<MockGraphicsAllocation *>(this->kernelInfoArray[0]->kernelAllocation)->resetInspectionId();
static_cast<MockGraphicsAllocation *>(this->kernelInfoArray[0]->kernelAllocation)->resetInspectionIds();
}
void GlobalMockSipProgram::initSipProgram() {
cl_int retVal = 0;