Add support for batched dispatch to userspace AUBs

This commit as aimed to add support for batched dispatch,
but doesn't make it the default mode for AubCSR yet.

Change-Id: I4dc366ec5f01adf2c4793009da2100ba0230c60a
This commit is contained in:
Milczarek, Slawomir
2018-01-10 22:03:23 +01:00
committed by sys_ocldev
parent 920d952a4a
commit 7c42353c4c
14 changed files with 166 additions and 71 deletions

View File

@@ -40,6 +40,9 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
void makeResident(GraphicsAllocation &gfxAllocation) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
void processResidency(ResidencyContainer *allocationsForResidency) override;
void writeMemory(GraphicsAllocation &gfxAllocation);
// Family specific version
void submitLRCA(EngineType engineOrdinal, const MiContextDescriptorReg &contextDescriptor);
void pollForCompletion(EngineType engineOrdinal);

View File

@@ -206,6 +206,15 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
DEBUG_BREAK_IF(!engineInfo.pLRCA);
}
if (this->dispatchMode == CommandStreamReceiver::DispatchMode::ImmediateDispatch) {
makeResident(*batchBuffer.commandBufferAllocation);
} else {
allocationsForResidency->push_back(batchBuffer.commandBufferAllocation);
batchBuffer.commandBufferAllocation->residencyTaskCount = this->taskCount;
}
processResidency(allocationsForResidency);
// Write our batch buffer
auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
auto currentOffset = batchBuffer.usedSize;
@@ -349,6 +358,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
}
pollForCompletion(engineOrdinal);
return 0;
}
@@ -378,37 +388,50 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion(EngineType engineO
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.residencyTaskCount < (int)this->taskCount) {
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
auto gpuAddress = gfxAllocation.getGpuAddress();
auto size = gfxAllocation.getUnderlyingBufferSize();
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
return;
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
stream.addComment(str.str().c_str());
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
static const size_t pageSize = 4096;
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
auto pAddr = physAddress & ~(pageSize - 1);
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
AUB::addMemoryWrite(stream, physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
};
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
this->getMemoryManager()->pushAllocationForResidency(&gfxAllocation);
}
gfxAllocation.residencyTaskCount = (int)this->taskCount;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
auto gpuAddress = gfxAllocation.getGpuAddress();
auto size = gfxAllocation.getUnderlyingBufferSize();
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
return;
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
stream.addComment(str.str().c_str());
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
static const size_t pageSize = 4096;
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
auto pAddr = physAddress & ~(pageSize - 1);
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
AUB::addMemoryWrite(stream, physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
};
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
for (auto &gfxAllocation : residencyAllocations) {
writeMemory(*gfxAllocation);
gfxAllocation->residencyTaskCount = (int)this->taskCount;
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.residencyTaskCount != ObjectNotResident) {

View File

@@ -72,12 +72,16 @@ void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) {
gfxAllocation.residencyTaskCount = ObjectNotResident;
}
void CommandStreamReceiver::makeSurfacePackNonResident() {
auto &surfacesForResidency = getMemoryManager()->getResidencyAllocations();
for (auto &surface : surfacesForResidency) {
void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
for (auto &surface : residencyAllocations) {
this->makeNonResident(*surface);
}
getMemoryManager()->clearResidencyAllocations();
if (allocationsForResidency) {
residencyAllocations.clear();
} else {
this->getMemoryManager()->clearResidencyAllocations();
}
this->processEviction();
}

View File

@@ -64,7 +64,7 @@ class CommandStreamReceiver {
virtual void makeCoherent(void *address, size_t length){};
virtual void makeResident(GraphicsAllocation &gfxAllocation);
virtual void makeNonResident(GraphicsAllocation &gfxAllocation);
void makeSurfacePackNonResident();
void makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency);
virtual void processResidency(ResidencyContainer *allocationsForResidency) {}
virtual void processEviction();

View File

@@ -336,7 +336,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
flushStamp->setStamp(this->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr));
this->latestFlushedTaskCount = this->taskCount + 1;
this->makeSurfacePackNonResident();
this->makeSurfacePackNonResident(nullptr);
} else {
auto commandBuffer = new CommandBuffer;
commandBuffer->batchBuffer = batchBuffer;
@@ -348,7 +348,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->submissionAggregator->recordCommandBuffer(commandBuffer);
}
} else {
this->makeSurfacePackNonResident();
this->makeSurfacePackNonResident(nullptr);
}
//check if we are not over the budget, if we are do implicit flush
@@ -436,13 +436,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
this->latestFlushedTaskCount = lastTaskCount;
this->flushStamp->setStamp(flushStamp);
for (auto &graphicsAllocation : surfacesForSubmit) {
this->makeNonResident(*graphicsAllocation);
}
surfacesForSubmit.clear();
this->makeSurfacePackNonResident(&surfacesForSubmit);
resourcePackage.clear();
this->processEviction();
}
this->totalMemoryUsed = 0;
}