[26/n] Internal 4GB allocator.

- change the way we handle blocked commands.
- instead of allocating CPU pointer and populating it with commands, create
real IndirectHeap that may be later submitted to the GPU
- that removes a lot of copy operations that were happening on submit time
- for device enqueue, this requires dsh & shh to be passed directly to the
underlying commands, in that scenario device queue buffers are not used

Change-Id: I1124a8edbb46777ea7f7d3a5946f302e7fdf9665
This commit is contained in:
Mrozek, Michal
2018-04-05 15:12:28 +02:00
committed by sys_ocldev
parent 100f559daa
commit ffa9b097f5
20 changed files with 331 additions and 319 deletions

View File

@@ -239,35 +239,7 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
}
if (!heapMemory) {
size_t reservedSize = 0;
auto finalHeapSize = defaultHeapSize;
minRequiredSize += reservedSize;
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
if (!heapMemory) {
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
} else {
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
}
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize;
}
if (heap) {
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
heap->replaceGraphicsAllocation(heapMemory);
} else {
heap = new IndirectHeap(heapMemory);
heap->overrideMaxSize(finalHeapSize);
}
allocateHeapMemory(heapType, minRequiredSize, heap);
}
return *heap;
@@ -650,4 +622,37 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
return true;
}
void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
size_t minRequiredSize, IndirectHeap *&indirectHeap) {
auto memoryManager = device->getMemoryManager();
size_t reservedSize = 0;
auto finalHeapSize = defaultHeapSize;
minRequiredSize += reservedSize;
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
if (!heapMemory) {
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
} else {
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
}
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize;
}
if (indirectHeap) {
indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
indirectHeap->replaceGraphicsAllocation(heapMemory);
} else {
indirectHeap = new IndirectHeap(heapMemory);
indirectHeap->overrideMaxSize(finalHeapSize);
}
}
} // namespace OCLRT

View File

@@ -336,6 +336,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
size_t minRequiredSize = 0u);
void allocateHeapMemory(IndirectHeap::Type heapType,
size_t minRequiredSize, IndirectHeap *&indirectHeap);
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
cl_command_queue_properties getCommandQueueProperties() const {

View File

@@ -275,6 +275,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
multiDispatchInfo.begin()->getKernel(),
(uint32_t)multiDispatchInfo.size(),
taskCount,
@@ -297,7 +298,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
*this,
*devQueueHw,
preemption,
scheduler);
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
scheduler.makeResident(commandStreamReceiver);

View File

@@ -119,13 +119,6 @@ inline cl_uint computeDimensions(const size_t workItems[3]) {
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
}
template <typename SizeAndAllocCalcT, typename... CalcArgsT>
IndirectHeap *allocateIndirectHeap(SizeAndAllocCalcT &&calc, CalcArgsT &&... args) {
size_t alignment = MemoryConstants::pageSize;
size_t size = calc(std::forward<CalcArgsT>(args)...);
return new IndirectHeap(alignedMalloc(size, alignment), size);
}
template <typename GfxFamily>
class GpgpuWalkerHelper {
public:
@@ -227,7 +220,9 @@ class GpgpuWalkerHelper {
CommandQueue &commandQueue,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler);
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh);
};
template <typename GfxFamily, uint32_t eventType>

View File

@@ -458,20 +458,27 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
using KCH = KernelCommandsHelper<GfxFamily>;
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
if (executionModelKernel) {
uint32_t offsetDsh = commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset();
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
dsh = allocateIndirectHeap([&multiDispatchInfo, offsetDsh] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo) + KCH::getTotalSizeRequiredIOH(multiDispatchInfo) + offsetDsh; });
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(),
dsh);
dsh->getSpace(colorCalcSize);
ioh = dsh;
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
ssh);
} else {
dsh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo); });
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh);
commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh);
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh);
}
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
using UniqueIH = std::unique_ptr<IndirectHeap>;
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh));
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
*commandQueue.getDevice().getMemoryManager());
if (executionModelKernel) {
(*blockedCommandsData)->doNotFreeISH = true;
}
@@ -671,7 +678,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
CommandQueue &commandQueue,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler) {
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh) {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
@@ -679,13 +688,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
OCLRT::LinearStream *commandStream = nullptr;
OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
OCLRT::IndirectHeap *ioh = nullptr;
commandStream = &commandQueue.getCS(0);
// note : below code assumes that caller to dispatchScheduler "preallocated" memory
// required for execution model in below heap managers
dsh = devQueueHw.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ssh = &commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE);
bool dcFlush = false;
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);