Add support for batched dispatch to userspace AUBs

This commit as aimed to add support for batched dispatch,
but doesn't make it the default mode for AubCSR yet.

Change-Id: I4dc366ec5f01adf2c4793009da2100ba0230c60a
This commit is contained in:
Milczarek, Slawomir
2018-01-10 22:03:23 +01:00
committed by sys_ocldev
parent 920d952a4a
commit 7c42353c4c
14 changed files with 166 additions and 71 deletions

2
Jenkinsfile vendored
View File

@@ -2,4 +2,4 @@
neoDependenciesRev='727168-748'
strategy='EQUAL'
allowedF=49
allowedCD=373
allowedCD=371

View File

@@ -40,6 +40,9 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
void makeResident(GraphicsAllocation &gfxAllocation) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
void processResidency(ResidencyContainer *allocationsForResidency) override;
void writeMemory(GraphicsAllocation &gfxAllocation);
// Family specific version
void submitLRCA(EngineType engineOrdinal, const MiContextDescriptorReg &contextDescriptor);
void pollForCompletion(EngineType engineOrdinal);

View File

@@ -206,6 +206,15 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
DEBUG_BREAK_IF(!engineInfo.pLRCA);
}
if (this->dispatchMode == CommandStreamReceiver::DispatchMode::ImmediateDispatch) {
makeResident(*batchBuffer.commandBufferAllocation);
} else {
allocationsForResidency->push_back(batchBuffer.commandBufferAllocation);
batchBuffer.commandBufferAllocation->residencyTaskCount = this->taskCount;
}
processResidency(allocationsForResidency);
// Write our batch buffer
auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
auto currentOffset = batchBuffer.usedSize;
@@ -349,6 +358,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
}
pollForCompletion(engineOrdinal);
return 0;
}
@@ -378,37 +388,50 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion(EngineType engineO
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.residencyTaskCount < (int)this->taskCount) {
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
auto gpuAddress = gfxAllocation.getGpuAddress();
auto size = gfxAllocation.getUnderlyingBufferSize();
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
return;
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
stream.addComment(str.str().c_str());
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
static const size_t pageSize = 4096;
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
auto pAddr = physAddress & ~(pageSize - 1);
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
AUB::addMemoryWrite(stream, physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
};
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
this->getMemoryManager()->pushAllocationForResidency(&gfxAllocation);
}
gfxAllocation.residencyTaskCount = (int)this->taskCount;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
auto gpuAddress = gfxAllocation.getGpuAddress();
auto size = gfxAllocation.getUnderlyingBufferSize();
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
return;
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
stream.addComment(str.str().c_str());
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
static const size_t pageSize = 4096;
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
auto pAddr = physAddress & ~(pageSize - 1);
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
AUB::addMemoryWrite(stream, physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
};
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
for (auto &gfxAllocation : residencyAllocations) {
writeMemory(*gfxAllocation);
gfxAllocation->residencyTaskCount = (int)this->taskCount;
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.residencyTaskCount != ObjectNotResident) {

View File

@@ -72,12 +72,16 @@ void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) {
gfxAllocation.residencyTaskCount = ObjectNotResident;
}
void CommandStreamReceiver::makeSurfacePackNonResident() {
auto &surfacesForResidency = getMemoryManager()->getResidencyAllocations();
for (auto &surface : surfacesForResidency) {
void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
for (auto &surface : residencyAllocations) {
this->makeNonResident(*surface);
}
getMemoryManager()->clearResidencyAllocations();
if (allocationsForResidency) {
residencyAllocations.clear();
} else {
this->getMemoryManager()->clearResidencyAllocations();
}
this->processEviction();
}

View File

@@ -64,7 +64,7 @@ class CommandStreamReceiver {
virtual void makeCoherent(void *address, size_t length){};
virtual void makeResident(GraphicsAllocation &gfxAllocation);
virtual void makeNonResident(GraphicsAllocation &gfxAllocation);
void makeSurfacePackNonResident();
void makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency);
virtual void processResidency(ResidencyContainer *allocationsForResidency) {}
virtual void processEviction();

View File

@@ -336,7 +336,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
flushStamp->setStamp(this->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr));
this->latestFlushedTaskCount = this->taskCount + 1;
this->makeSurfacePackNonResident();
this->makeSurfacePackNonResident(nullptr);
} else {
auto commandBuffer = new CommandBuffer;
commandBuffer->batchBuffer = batchBuffer;
@@ -348,7 +348,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->submissionAggregator->recordCommandBuffer(commandBuffer);
}
} else {
this->makeSurfacePackNonResident();
this->makeSurfacePackNonResident(nullptr);
}
//check if we are not over the budget, if we are do implicit flush
@@ -436,13 +436,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
this->latestFlushedTaskCount = lastTaskCount;
this->flushStamp->setStamp(flushStamp);
for (auto &graphicsAllocation : surfacesForSubmit) {
this->makeNonResident(*graphicsAllocation);
}
surfacesForSubmit.clear();
this->makeSurfacePackNonResident(&surfacesForSubmit);
resourcePackage.clear();
this->processEviction();
}
this->totalMemoryUsed = 0;
}

View File

@@ -168,28 +168,28 @@ HWTEST_P(AUBMapImage, MapUpdateUnmapVerify) {
auto mappedPtr = pCmdQ->enqueueMapImage(srcImage, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ,
origin, region, &mappedRowPitch, &mappedSlicePitch,
0, nullptr, nullptr, retVal);
if (!srcImage->allowTiling()) {
pCommandStreamReceiver->makeResident(*srcImage->getGraphicsAllocation());
pCommandStreamReceiver->makeNonResident(*srcImage->getGraphicsAllocation());
}
EXPECT_EQ(CL_SUCCESS, retVal);
auto mappedPtrStart = static_cast<uint8_t *>(mappedPtr);
auto srcMemoryStart = srcMemory;
uint8_t *mappedPtrStart;
uint8_t *srcMemoryStart;
// validate mapped region
srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch * origin[2]);
srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch * origin[1]);
srcMemoryStart = ptrOffset(srcMemoryStart, elementSize * origin[0]);
for (size_t z = 0; z < region[2]; z++) {
for (size_t y = 0; y < region[1]; y++) {
AUBCommandStreamFixture::expectMemory<FamilyType>(mappedPtrStart, srcMemoryStart, elementSize * region[0]);
mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch);
srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch);
if (srcImage->allowTiling()) {
mappedPtrStart = static_cast<uint8_t *>(mappedPtr);
srcMemoryStart = srcMemory;
// validate mapped region
srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch * origin[2]);
srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch * origin[1]);
srcMemoryStart = ptrOffset(srcMemoryStart, elementSize * origin[0]);
for (size_t z = 0; z < region[2]; z++) {
for (size_t y = 0; y < region[1]; y++) {
AUBCommandStreamFixture::expectMemory<FamilyType>(mappedPtrStart, srcMemoryStart, elementSize * region[0]);
mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch);
srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch);
}
mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1]));
srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (region[1])));
}
mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1]));
srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (region[1])));
}
// write to mapped ptr

View File

@@ -107,14 +107,18 @@ TEST_F(AUBcommandstreamTests, makeResident) {
uint8_t buffer[0x10000];
size_t size = sizeof(buffer);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
commandStreamReceiver.createAllocationAndHandleResidency(buffer, size);
auto graphicsAllocation = commandStreamReceiver.createAllocationAndHandleResidency(buffer, size);
ResidencyContainer allocationsForResidency = {graphicsAllocation};
commandStreamReceiver.processResidency(&allocationsForResidency);
}
HWTEST_F(AUBcommandstreamTests, expectMemorySingle) {
uint32_t buffer = 0xdeadbeef;
size_t size = sizeof(buffer);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
commandStreamReceiver.createAllocationAndHandleResidency(&buffer, size);
auto graphicsAllocation = commandStreamReceiver.createAllocationAndHandleResidency(&buffer, size);
ResidencyContainer allocationsForResidency = {graphicsAllocation};
commandStreamReceiver.processResidency(&allocationsForResidency);
AUBCommandStreamFixture::expectMemory<FamilyType>(&buffer, &buffer, size);
}
@@ -128,7 +132,10 @@ HWTEST_F(AUBcommandstreamTests, expectMemoryLarge) {
}
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
commandStreamReceiver.createAllocationAndHandleResidency(buffer, sizeBuffer);
auto graphicsAllocation = commandStreamReceiver.createAllocationAndHandleResidency(buffer, sizeBuffer);
ResidencyContainer allocationsForResidency = {graphicsAllocation};
commandStreamReceiver.processResidency(&allocationsForResidency);
AUBCommandStreamFixture::expectMemory<FamilyType>(buffer, buffer, sizeBuffer);
delete[] buffer;
}

View File

@@ -31,6 +31,7 @@ using OCLRT::AUBCommandStreamReceiverHw;
using OCLRT::BatchBuffer;
using OCLRT::CommandStreamReceiver;
using OCLRT::GraphicsAllocation;
using OCLRT::ResidencyContainer;
using OCLRT::HardwareInfo;
using OCLRT::LinearStream;
using OCLRT::MemoryManager;
@@ -116,4 +117,66 @@ HWTEST_F(AubCommandStreamReceiverTests, flushShouldLeaveProperRingTailAlignment)
mm->freeGraphicsMemory(commandBuffer);
delete csr;
delete mm;
}
}
HWTEST_F(AubCommandStreamReceiverTests, flushShouldCallMakeResidentOnCommandBufferAllocation) {
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> csr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0]));
std::unique_ptr<MemoryManager> mm(csr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = mm->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
auto engineOrdinal = OCLRT::ENGINE_RCS;
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);
csr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::ImmediateDispatch);
csr->flush(batchBuffer, engineOrdinal, nullptr);
EXPECT_NE(ObjectNotResident, commandBuffer->residencyTaskCount);
EXPECT_EQ((int)csr->peekTaskCount(), commandBuffer->residencyTaskCount);
csr->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);
mm->freeGraphicsMemoryImpl(commandBuffer);
csr->setMemoryManager(nullptr);
}
HWTEST_F(AubCommandStreamReceiverTests, flushShouldCallMakeResidentOnResidencyAllocations) {
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> csr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0]));
std::unique_ptr<MemoryManager> mm(csr->createMemoryManager(false));
auto gfxAllocation = mm->allocateGraphicsMemory(sizeof(uint32_t), sizeof(uint32_t), false, false);
GraphicsAllocation *commandBuffer = mm->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
auto engineOrdinal = OCLRT::ENGINE_RCS;
ResidencyContainer allocationsForResidency = {gfxAllocation};
EXPECT_EQ(ObjectNotResident, gfxAllocation->residencyTaskCount);
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);
csr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
csr->flush(batchBuffer, engineOrdinal, &allocationsForResidency);
EXPECT_NE(ObjectNotResident, gfxAllocation->residencyTaskCount);
EXPECT_EQ((int)csr->peekTaskCount(), gfxAllocation->residencyTaskCount);
EXPECT_NE(ObjectNotResident, commandBuffer->residencyTaskCount);
EXPECT_EQ((int)csr->peekTaskCount(), commandBuffer->residencyTaskCount);
csr->makeSurfacePackNonResident(&allocationsForResidency);
EXPECT_EQ(ObjectNotResident, gfxAllocation->residencyTaskCount);
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);
mm->freeGraphicsMemory(commandBuffer);
mm->freeGraphicsMemoryImpl(gfxAllocation);
csr->setMemoryManager(nullptr);
}

View File

@@ -275,7 +275,7 @@ HWTEST_F(KernelImageArgTest, givenImgWithMcsAllocWhenMakeResidentThenMakeMcsAllo
pKernel->makeResident(*csr.get());
EXPECT_TRUE(csr->isMadeResident(mcsAlloc));
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
EXPECT_TRUE(csr->isMadeNonResident(mcsAlloc));

View File

@@ -503,7 +503,7 @@ TEST_F(KernelPrivateSurfaceTest, testPrivateSurface) {
pKernel->makeResident(*csr.get());
EXPECT_EQ(1u, csr->residency.size());
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
pKernel->updateWithCompletionStamp(*csr.get(), nullptr);
EXPECT_EQ(0u, csr->residency.size());

View File

@@ -1194,7 +1194,7 @@ TEST_F(DrmCommandStreamLeaksTest, givenFragmentedAllocationsWithResuedFragmentsW
EXPECT_EQ(3u, residency->size());
tCsr->makeSurfacePackNonResident();
tCsr->makeSurfacePackNonResident(nullptr);
//check that each packet is not resident
EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident);
@@ -1216,7 +1216,7 @@ TEST_F(DrmCommandStreamLeaksTest, givenFragmentedAllocationsWithResuedFragmentsW
EXPECT_EQ(3u, residency->size());
tCsr->makeSurfacePackNonResident();
tCsr->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(0u, residency->size());
@@ -1484,7 +1484,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushMultipleTimes) {
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer3{cs.getGraphicsAllocation(), 16, false, false, cs.getUsed(), &cs};
csr->flush(batchBuffer3, EngineType::ENGINE_RCS, nullptr);
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
mm->freeGraphicsMemory(allocation);
mm->freeGraphicsMemory(allocation2);
@@ -1575,7 +1575,7 @@ TEST_F(DrmCommandStreamLeaksTest, MakeResidentClearResidencyAllocationsInMemoryM
EXPECT_NE(0u, mm->getResidencyAllocations().size());
csr->processResidency(nullptr);
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(0u, mm->getResidencyAllocations().size());
mm->freeGraphicsMemory(allocation1);
@@ -1593,7 +1593,7 @@ TEST_F(DrmCommandStreamLeaksTest, givenMultipleMakeResidentWhenMakeNonResidentIs
EXPECT_NE(0u, mm->getResidencyAllocations().size());
csr->processResidency(nullptr);
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(0u, mm->getResidencyAllocations().size());
EXPECT_FALSE(allocation1->isResident());

View File

@@ -408,7 +408,7 @@ TEST_F(WddmCommandStreamMockGdiTest, makeResidentClearsResidencyAllocations) {
csr->processResidency(nullptr);
csr->makeSurfacePackNonResident();
csr->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(0u, mm->getResidencyAllocations().size());
EXPECT_EQ(0u, mm->getEvictionAllocations().size());

View File

@@ -1353,7 +1353,7 @@ HWTEST_F(PatchTokenTests, AllocateConstantSurface) {
EXPECT_EQ(*pDst, reinterpret_cast<uintptr_t>(constBuffGpuAddr));
pKernel->updateWithCompletionStamp(*pCommandStreamReceiver, nullptr);
pCommandStreamReceiver->makeSurfacePackNonResident();
pCommandStreamReceiver->makeSurfacePackNonResident(nullptr);
EXPECT_EQ(0u, pCommandStreamReceiver->residency.size());
std::vector<Surface *> surfaces;