mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Add support for batched dispatch to userspace AUBs
This commit as aimed to add support for batched dispatch, but doesn't make it the default mode for AubCSR yet. Change-Id: I4dc366ec5f01adf2c4793009da2100ba0230c60a
This commit is contained in:
committed by
sys_ocldev
parent
920d952a4a
commit
7c42353c4c
@@ -40,6 +40,9 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
|
||||
void makeResident(GraphicsAllocation &gfxAllocation) override;
|
||||
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
|
||||
|
||||
void processResidency(ResidencyContainer *allocationsForResidency) override;
|
||||
void writeMemory(GraphicsAllocation &gfxAllocation);
|
||||
|
||||
// Family specific version
|
||||
void submitLRCA(EngineType engineOrdinal, const MiContextDescriptorReg &contextDescriptor);
|
||||
void pollForCompletion(EngineType engineOrdinal);
|
||||
|
||||
@@ -206,6 +206,15 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
DEBUG_BREAK_IF(!engineInfo.pLRCA);
|
||||
}
|
||||
|
||||
if (this->dispatchMode == CommandStreamReceiver::DispatchMode::ImmediateDispatch) {
|
||||
makeResident(*batchBuffer.commandBufferAllocation);
|
||||
} else {
|
||||
allocationsForResidency->push_back(batchBuffer.commandBufferAllocation);
|
||||
batchBuffer.commandBufferAllocation->residencyTaskCount = this->taskCount;
|
||||
}
|
||||
|
||||
processResidency(allocationsForResidency);
|
||||
|
||||
// Write our batch buffer
|
||||
auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
|
||||
auto currentOffset = batchBuffer.usedSize;
|
||||
@@ -349,6 +358,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
}
|
||||
|
||||
pollForCompletion(engineOrdinal);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -378,37 +388,50 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion(EngineType engineO
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::makeResident(GraphicsAllocation &gfxAllocation) {
|
||||
if (gfxAllocation.residencyTaskCount < (int)this->taskCount) {
|
||||
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
|
||||
auto gpuAddress = gfxAllocation.getGpuAddress();
|
||||
auto size = gfxAllocation.getUnderlyingBufferSize();
|
||||
|
||||
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
|
||||
return;
|
||||
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
|
||||
stream.addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
|
||||
static const size_t pageSize = 4096;
|
||||
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
|
||||
auto pAddr = physAddress & ~(pageSize - 1);
|
||||
|
||||
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
|
||||
|
||||
AUB::addMemoryWrite(stream, physAddress,
|
||||
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
|
||||
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
|
||||
};
|
||||
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
|
||||
|
||||
this->getMemoryManager()->pushAllocationForResidency(&gfxAllocation);
|
||||
}
|
||||
gfxAllocation.residencyTaskCount = (int)this->taskCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
|
||||
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
|
||||
auto gpuAddress = gfxAllocation.getGpuAddress();
|
||||
auto size = gfxAllocation.getUnderlyingBufferSize();
|
||||
|
||||
if (size == 0 || !(((MemoryAllocation *)&gfxAllocation)->allowAubFileWrite))
|
||||
return;
|
||||
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
|
||||
stream.addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
|
||||
static const size_t pageSize = 4096;
|
||||
auto vmAddr = (static_cast<uintptr_t>(gpuAddress) + offset) & ~(pageSize - 1);
|
||||
auto pAddr = physAddress & ~(pageSize - 1);
|
||||
|
||||
AUB::reserveAddressPPGTT(stream, vmAddr, pageSize, pAddr);
|
||||
|
||||
AUB::addMemoryWrite(stream, physAddress,
|
||||
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(cpuAddress) + offset),
|
||||
size, AubMemDump::AddressSpaceValues::TraceNonlocal);
|
||||
};
|
||||
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency) {
|
||||
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
|
||||
|
||||
for (auto &gfxAllocation : residencyAllocations) {
|
||||
writeMemory(*gfxAllocation);
|
||||
gfxAllocation->residencyTaskCount = (int)this->taskCount;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
|
||||
if (gfxAllocation.residencyTaskCount != ObjectNotResident) {
|
||||
|
||||
@@ -72,12 +72,16 @@ void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) {
|
||||
gfxAllocation.residencyTaskCount = ObjectNotResident;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::makeSurfacePackNonResident() {
|
||||
auto &surfacesForResidency = getMemoryManager()->getResidencyAllocations();
|
||||
for (auto &surface : surfacesForResidency) {
|
||||
void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency) {
|
||||
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->getMemoryManager()->getResidencyAllocations();
|
||||
for (auto &surface : residencyAllocations) {
|
||||
this->makeNonResident(*surface);
|
||||
}
|
||||
getMemoryManager()->clearResidencyAllocations();
|
||||
if (allocationsForResidency) {
|
||||
residencyAllocations.clear();
|
||||
} else {
|
||||
this->getMemoryManager()->clearResidencyAllocations();
|
||||
}
|
||||
this->processEviction();
|
||||
}
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ class CommandStreamReceiver {
|
||||
virtual void makeCoherent(void *address, size_t length){};
|
||||
virtual void makeResident(GraphicsAllocation &gfxAllocation);
|
||||
virtual void makeNonResident(GraphicsAllocation &gfxAllocation);
|
||||
void makeSurfacePackNonResident();
|
||||
void makeSurfacePackNonResident(ResidencyContainer *allocationsForResidency);
|
||||
virtual void processResidency(ResidencyContainer *allocationsForResidency) {}
|
||||
virtual void processEviction();
|
||||
|
||||
|
||||
@@ -336,7 +336,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
flushStamp->setStamp(this->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr));
|
||||
this->latestFlushedTaskCount = this->taskCount + 1;
|
||||
this->makeSurfacePackNonResident();
|
||||
this->makeSurfacePackNonResident(nullptr);
|
||||
} else {
|
||||
auto commandBuffer = new CommandBuffer;
|
||||
commandBuffer->batchBuffer = batchBuffer;
|
||||
@@ -348,7 +348,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
this->submissionAggregator->recordCommandBuffer(commandBuffer);
|
||||
}
|
||||
} else {
|
||||
this->makeSurfacePackNonResident();
|
||||
this->makeSurfacePackNonResident(nullptr);
|
||||
}
|
||||
|
||||
//check if we are not over the budget, if we are do implicit flush
|
||||
@@ -436,13 +436,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
||||
|
||||
this->latestFlushedTaskCount = lastTaskCount;
|
||||
this->flushStamp->setStamp(flushStamp);
|
||||
|
||||
for (auto &graphicsAllocation : surfacesForSubmit) {
|
||||
this->makeNonResident(*graphicsAllocation);
|
||||
}
|
||||
surfacesForSubmit.clear();
|
||||
this->makeSurfacePackNonResident(&surfacesForSubmit);
|
||||
resourcePackage.clear();
|
||||
this->processEviction();
|
||||
}
|
||||
this->totalMemoryUsed = 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user