[26/n] Internal 4GB allocator.

- change the way we handle blocked commands.
- instead of allocating CPU pointer and populating it with commands, create
real IndirectHeap that may be later submitted to the GPU
- that removes a lot of copy operations that were happening on submit time
- for device enqueue, this requires dsh & shh to be passed directly to the
underlying commands, in that scenario device queue buffers are not used

Change-Id: I1124a8edbb46777ea7f7d3a5946f302e7fdf9665
This commit is contained in:
Mrozek, Michal
2018-04-05 15:12:28 +02:00
committed by sys_ocldev
parent 100f559daa
commit ffa9b097f5
20 changed files with 331 additions and 319 deletions

View File

@@ -239,35 +239,7 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
}
if (!heapMemory) {
size_t reservedSize = 0;
auto finalHeapSize = defaultHeapSize;
minRequiredSize += reservedSize;
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
if (!heapMemory) {
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
} else {
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
}
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize;
}
if (heap) {
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
heap->replaceGraphicsAllocation(heapMemory);
} else {
heap = new IndirectHeap(heapMemory);
heap->overrideMaxSize(finalHeapSize);
}
allocateHeapMemory(heapType, minRequiredSize, heap);
}
return *heap;
@@ -650,4 +622,37 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
return true;
}
void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
size_t minRequiredSize, IndirectHeap *&indirectHeap) {
auto memoryManager = device->getMemoryManager();
size_t reservedSize = 0;
auto finalHeapSize = defaultHeapSize;
minRequiredSize += reservedSize;
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
if (!heapMemory) {
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
} else {
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
}
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize;
}
if (indirectHeap) {
indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
indirectHeap->replaceGraphicsAllocation(heapMemory);
} else {
indirectHeap = new IndirectHeap(heapMemory);
indirectHeap->overrideMaxSize(finalHeapSize);
}
}
} // namespace OCLRT

View File

@@ -336,6 +336,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
size_t minRequiredSize = 0u);
void allocateHeapMemory(IndirectHeap::Type heapType,
size_t minRequiredSize, IndirectHeap *&indirectHeap);
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
cl_command_queue_properties getCommandQueueProperties() const {

View File

@@ -275,6 +275,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
multiDispatchInfo.begin()->getKernel(),
(uint32_t)multiDispatchInfo.size(),
taskCount,
@@ -297,7 +298,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
*this,
*devQueueHw,
preemption,
scheduler);
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
scheduler.makeResident(commandStreamReceiver);

View File

@@ -119,13 +119,6 @@ inline cl_uint computeDimensions(const size_t workItems[3]) {
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
}
template <typename SizeAndAllocCalcT, typename... CalcArgsT>
IndirectHeap *allocateIndirectHeap(SizeAndAllocCalcT &&calc, CalcArgsT &&... args) {
size_t alignment = MemoryConstants::pageSize;
size_t size = calc(std::forward<CalcArgsT>(args)...);
return new IndirectHeap(alignedMalloc(size, alignment), size);
}
template <typename GfxFamily>
class GpgpuWalkerHelper {
public:
@@ -227,7 +220,9 @@ class GpgpuWalkerHelper {
CommandQueue &commandQueue,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler);
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh);
};
template <typename GfxFamily, uint32_t eventType>

View File

@@ -458,20 +458,27 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
using KCH = KernelCommandsHelper<GfxFamily>;
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
if (executionModelKernel) {
uint32_t offsetDsh = commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset();
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
dsh = allocateIndirectHeap([&multiDispatchInfo, offsetDsh] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo) + KCH::getTotalSizeRequiredIOH(multiDispatchInfo) + offsetDsh; });
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(),
dsh);
dsh->getSpace(colorCalcSize);
ioh = dsh;
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
ssh);
} else {
dsh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo); });
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh);
commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh);
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh);
}
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
using UniqueIH = std::unique_ptr<IndirectHeap>;
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh));
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
*commandQueue.getDevice().getMemoryManager());
if (executionModelKernel) {
(*blockedCommandsData)->doNotFreeISH = true;
}
@@ -671,7 +678,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
CommandQueue &commandQueue,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler) {
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh) {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
@@ -679,13 +688,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
OCLRT::LinearStream *commandStream = nullptr;
OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
OCLRT::IndirectHeap *ioh = nullptr;
commandStream = &commandQueue.getCS(0);
// note : below code assumes that caller to dispatchScheduler "preallocated" memory
// required for execution model in below heap managers
dsh = devQueueHw.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ssh = &commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE);
bool dcFlush = false;
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);

View File

@@ -156,12 +156,12 @@ void DeviceQueue::initDeviceQueue() {
igilEventPool->m_size = caps.maxOnDeviceEvents;
}
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
setupIndirectState(surfaceStateHeap, parentKernel, parentCount);
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
}
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
return;
}
@@ -173,7 +173,7 @@ void DeviceQueue::resetDeviceQueue() {
return;
}
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
return;
}

View File

@@ -81,9 +81,9 @@ class DeviceQueue : public BaseObject<_device_queue> {
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet);
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
@@ -93,7 +93,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
}
virtual void resetDeviceQueue();
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode);
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh);
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
void acquireEMCriticalSection() {

View File

@@ -72,11 +72,11 @@ class DeviceQueueHw : public DeviceQueue {
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
void resetDeviceQueue() override;
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override;
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
uint32_t getSchedulerReturnInstance() {
return igilQueue->m_controls.m_SchedulerEarlyReturn;

View File

@@ -290,11 +290,8 @@ IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type)
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
void *pDSH = dshBuffer->getUnderlyingBuffer();
// Heap and dshBuffer shoud be the same if heap is created
DEBUG_BREAK_IF(!((heaps[IndirectHeap::DYNAMIC_STATE] == nullptr) || (heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase() == pDSH)));
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
void *pDSH = dynamicStateHeap.getCpuBase();
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
@@ -386,11 +383,13 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(cmdQ,
*this,
preemptionMode,
scheduler);
scheduler,
ssh,
dsh);
return;
}

View File

@@ -28,6 +28,7 @@
#include "runtime/device_queue/device_queue.h"
#include "runtime/gtpin/gtpin_notify.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/memory_manager/surface.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/string.h"
@@ -35,13 +36,14 @@
namespace OCLRT {
KernelOperation::~KernelOperation() {
alignedFree(dsh->getCpuBase());
if (doNotFreeISH) {
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(dsh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
if (ioh.get() == dsh.get()) {
ioh.release();
} else {
alignedFree(ioh->getCpuBase());
}
alignedFree(ssh->getCpuBase());
if (ioh) {
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ioh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
}
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ssh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
alignedFree(commandStream->getCpuBase());
}
@@ -163,43 +165,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
//transfer the memory to commandStream of the queue.
memcpy_s(pDst, commandsSize, commandStream.getCpuBase(), commandsSize);
size_t requestedDshSize = kernelOperation->dsh->getUsed();
size_t requestedIohSize = kernelOperation->ioh->getUsed();
size_t requestedSshSize = kernelOperation->ssh->getUsed() + kernelOperation->surfaceStateHeapSizeEM;
IndirectHeap *dsh = nullptr;
IndirectHeap *ioh = nullptr;
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE};
for (auto trackedHeap = 0u; trackedHeap < ARRAY_COUNT(trackedHeaps); trackedHeap++) {
if (commandQueue.getIndirectHeap(trackedHeaps[trackedHeap], 0).getUsed() > 0) {
commandQueue.releaseIndirectHeap(trackedHeaps[trackedHeap]);
}
}
if (executionModelKernel) {
dsh = devQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
ioh = dsh;
memcpy_s(dsh->getSpace(0), dsh->getAvailableSpace(), ptrOffset(kernelOperation->dsh->getCpuBase(), devQueue->colorCalcStateSize), kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
dsh->getSpace(kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
} else {
dsh = &commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, requestedDshSize);
ioh = &commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, requestedIohSize);
memcpy_s(dsh->getCpuBase(), requestedDshSize, kernelOperation->dsh->getCpuBase(), kernelOperation->dsh->getUsed());
dsh->getSpace(requestedDshSize);
memcpy_s(ioh->getCpuBase(), requestedIohSize, kernelOperation->ioh->getCpuBase(), kernelOperation->ioh->getUsed());
ioh->getSpace(requestedIohSize);
}
IndirectHeap &ssh = commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, requestedSshSize);
memcpy_s(ssh.getCpuBase(), requestedSshSize, kernelOperation->ssh->getCpuBase(), kernelOperation->ssh->getUsed());
ssh.getSpace(kernelOperation->ssh->getUsed());
IndirectHeap *dsh = kernelOperation->dsh.get();
IndirectHeap *ioh = kernelOperation->ioh.get();
IndirectHeap *ssh = kernelOperation->ssh.get();
auto requiresCoherency = false;
for (auto &surface : surfaces) {
@@ -214,7 +182,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
if (executionModelKernel) {
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
devQueue->setupExecutionModelDispatch(ssh, kernel, kernelCount, taskCount, timestamp);
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount, taskCount, timestamp);
BuiltIns &builtIns = BuiltIns::getInstance();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
@@ -223,16 +191,18 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
devQueue->getStackBuffer(),
devQueue->getEventPoolBuffer(),
devQueue->getSlbBuffer(),
devQueue->getDshBuffer(),
dsh->getGraphicsAllocation(),
kernel->getKernelReflectionSurface(),
devQueue->getQueueStorageBuffer(),
ssh.getGraphicsAllocation(),
ssh->getGraphicsAllocation(),
devQueue->getDebugQueue());
devQueue->dispatchScheduler(
commandQueue,
scheduler,
preemptionMode);
preemptionMode,
ssh,
dsh);
scheduler.makeResident(commandStreamReceiver);
@@ -261,14 +231,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
offset,
*dsh,
*ioh,
ssh,
*ssh,
taskLevel,
dispatchFlags);
for (auto &surface : surfaces) {
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
}
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
if (printfHandler) {
printfHandler.get()->printEnqueueOutput();
}

View File

@@ -39,6 +39,7 @@ class MemObj;
class Surface;
class PrintfHandler;
struct HwTimeStamps;
class MemoryManager;
enum MapOperationType {
MAP,
@@ -77,10 +78,11 @@ class CommandMapUnmap : public Command {
};
struct KernelOperation {
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh)
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh,
MemoryManager &memoryManager)
: commandStream(std::move(commandStream)), dsh(std::move(dsh)),
ioh(std::move(ioh)), ssh(std::move(ssh)),
surfaceStateHeapSizeEM(0), doNotFreeISH(false) {
surfaceStateHeapSizeEM(0), doNotFreeISH(false), memoryManager(memoryManager) {
}
~KernelOperation();
@@ -92,6 +94,7 @@ struct KernelOperation {
size_t surfaceStateHeapSizeEM;
bool doNotFreeISH;
MemoryManager &memoryManager;
};
class CommandComputeKernel : public Command {

View File

@@ -392,95 +392,55 @@ HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIs
}
typedef CommandQueueHwTest BlockedCommandQueueTest;
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUsedHeapsWhenBlockedCommandIsBeingSubmittedItReloadsThemToZeroToKeepProperOffsets) {
DebugManagerStateRestore debugStateRestore;
bool oldMemsetAllocationsFlag = MemoryManagement::memsetNewAllocations;
MemoryManagement::memsetNewAllocations = true;
DebugManager.flags.ForcePreemptionMode.set(-1); // allow default preemption mode
auto deviceWithDefaultPreemptionMode = std::unique_ptr<MockDevice>(DeviceHelper<>::create(nullptr));
this->pDevice->setPreemptionMode(deviceWithDefaultPreemptionMode->getPreemptionMode());
this->pDevice->getCommandStreamReceiver().setPreemptionCsrAllocation(deviceWithDefaultPreemptionMode->getPreemptionAllocation());
DebugManager.flags.DisableResourceRecycling.set(true);
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
UserEvent userEvent(context);
cl_event blockedEvent = &userEvent;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
mockKernelWithInternals.kernelHeader.KernelHeapSize = sizeof(mockKernelWithInternals.kernelIsa);
auto mockKernel = mockKernelWithInternals.mockKernel;
IndirectHeap::Type heaps[] = {IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE, IndirectHeap::SURFACE_STATE};
size_t prealocatedHeapSize = 2 * 64 * KB;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, prealocatedHeapSize);
heap.getSpace(16);
memset(heap.getCpuBase(), 0, prealocatedHeapSize);
}
// preallocating memsetted allocations to get predictable results
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
DebugManager.flags.DisableResourceRecycling.set(false);
std::set<void *> reusableHeaps;
for (unsigned int i = 0; i < 4; ++i) {
auto allocSize = prealocatedHeapSize;
void *mem = alignedMalloc(allocSize, 64);
reusableHeaps.insert(mem);
memset(mem, 0, allocSize);
std::unique_ptr<GraphicsAllocation> reusableAlloc{new MockGraphicsAllocation(mem, allocSize)};
pCmdQ->getDevice().getMemoryManager()->storeAllocation(std::move(reusableAlloc), REUSABLE_ALLOCATION);
}
// disable further allocation reuse
DebugManager.flags.DisableResourceRecycling.set(true);
size_t offset = 0;
size_t size = 1;
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // blocked command
cl_event blockedEvent = &userEvent;
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent.setStatus(CL_COMPLETE);
// make sure used heaps are from preallocated pool
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0).getCpuBase()));
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0).getCpuBase()));
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getCpuBase()));
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
std::unordered_map<int, std::vector<char>> blockedCommandHeaps;
int i = 0;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
blockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
EXPECT_EQ(0u, ioh.getUsed());
EXPECT_EQ(0u, dsh.getUsed());
EXPECT_EQ(0u, ssh.getUsed());
}
// prepare new heaps for nonblocked command
pCmdQ->releaseIndirectHeap(heapType);
++i;
}
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); // nonblocked command
i = 0;
std::unordered_map<int, std::vector<char>> nonblockedCommandHeaps;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
nonblockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
++i;
}
size_t offset = 0;
size_t size = 1;
// expecting blocked command to be programmed indentically to a non-blocked counterpart
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)]));
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)]));
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)]));
cl_event blockedEvent = &userEvent;
for (auto ptr : reusableHeaps) {
alignedFree(ptr);
}
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
BuiltIns::shutDown();
MemoryManagement::memsetNewAllocations = oldMemsetAllocationsFlag;
auto spaceToUse = 4u;
ioh.getSpace(spaceToUse);
dsh.getSpace(spaceToUse);
ssh.getSpace(spaceToUse);
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(spaceToUse, ioh.getUsed());
EXPECT_EQ(spaceToUse, dsh.getUsed());
EXPECT_EQ(spaceToUse, ssh.getUsed());
}
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {

View File

@@ -616,6 +616,33 @@ TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalle
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, indirectHeapAllocation->getAllocationType());
}
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledThenHeapIsCreated) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
CommandQueue cmdQ(&context, pDevice, props);
IndirectHeap *indirectHeap = nullptr;
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
EXPECT_NE(nullptr, indirectHeap);
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
delete indirectHeap;
}
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledWithAlreadyAllocatedHeapThenGraphicsAllocationIsCreated) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
CommandQueue cmdQ(&context, pDevice, props);
IndirectHeap heap(nullptr, 100);
IndirectHeap *indirectHeap = &heap;
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
EXPECT_EQ(&heap, indirectHeap);
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
}
INSTANTIATE_TEST_CASE_P(
Device,
CommandQueueIndirectHeapTest,

View File

@@ -710,9 +710,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
delete blockedCommandsData;
}
@@ -745,9 +745,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
delete blockedCommandsData;
}

View File

@@ -534,7 +534,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectState) {
auto usedBeforeSSH = ssh->getUsed();
auto usedBeforeDSH = dsh->getUsed();
devQueueHw->setupIndirectState(*ssh, pKernel, 1);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1);
auto usedAfterSSH = ssh->getUsed();
auto usedAfterDSH = dsh->getUsed();
@@ -564,7 +564,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectStartBlockID) {
uint32_t parentCount = 4;
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
@@ -594,7 +594,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectDSHValues) {
uint32_t parentCount = 1;
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));

View File

@@ -40,7 +40,8 @@ TEST(DeviceQueueSimpleTest, setupExecutionModelDispatchDoesNothing) {
size_t size = 20;
IndirectHeap ssh(buffer, size);
devQueue.setupExecutionModelDispatch(ssh, nullptr, 0, 0, 0);
IndirectHeap dsh(buffer, size);
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0);
EXPECT_EQ(0u, ssh.getUsed());
@@ -320,7 +321,7 @@ TEST_F(DeviceQueueTest, dispatchScheduler) {
CommandQueue cmdQ(nullptr, nullptr, 0);
KernelInfo info;
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode());
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode(), nullptr, nullptr);
delete kernel;
}

View File

@@ -449,60 +449,18 @@ class SurfaceMock : public Surface {
SurfaceMock(SurfaceMock *parent) : parent(parent){};
};
TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger) {
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
IndirectHeap &cmdQueueDsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096);
IndirectHeap &cmdQueueIoh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096);
IndirectHeap &cmdQueueSsh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096);
auto requestedSize = cmdQueueDsh.getMaxAvailableSpace() * 2;
auto cmdStream = new LinearStream(alignedMalloc(requestedSize, requestedSize), requestedSize);
auto createFullHeap = [](size_t size) {
auto heap = new IndirectHeap(alignedMalloc(size, size), size);
heap->getSpace(heap->getAvailableSpace());
return heap;
};
auto dsh = createFullHeap(requestedSize);
auto ioh = createFullHeap(requestedSize);
auto ssh = createFullHeap(maxSshSize);
using UniqueIH = std::unique_ptr<IndirectHeap>;
auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
UniqueIH(ioh), UniqueIH(ssh));
std::vector<Surface *> v;
SurfaceMock *surface = new SurfaceMock;
v.push_back(surface);
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmdComputeKernel = new CommandComputeKernel(*pCmdQ, pDevice->getCommandStreamReceiver(),
std::unique_ptr<KernelOperation>(kernelOperation), v, false, false, false, nullptr, preemptionMode);
EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
EXPECT_EQ(maxSshSize, ssh->getMaxAvailableSpace());
cmdComputeKernel->submit(0, false);
EXPECT_GE(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
EXPECT_GE(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
EXPECT_GE(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace());
delete pCmdQ;
delete cmdComputeKernel;
}
TEST_F(InternalsEventTest, processBlockedCommandsKernelOperation) {
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
using UniqueIH = std::unique_ptr<IndirectHeap>;
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
UniqueIH(ioh), UniqueIH(ssh));
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
auto &csr = pDevice->getCommandStreamReceiver();
std::vector<Surface *> v;
@@ -534,12 +492,13 @@ TEST_F(InternalsEventTest, processBlockedCommandsAbortKernelOperation) {
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
using UniqueIH = std::unique_ptr<IndirectHeap>;
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
UniqueIH(ioh), UniqueIH(ssh));
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
auto &csr = pDevice->getCommandStreamReceiver();
std::vector<Surface *> v;
@@ -565,12 +524,13 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
using UniqueIH = std::unique_ptr<IndirectHeap>;
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
UniqueIH(ioh), UniqueIH(ssh));
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
pPrintfSurface->DataParamOffset = 0;
@@ -1477,12 +1437,13 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
csr.flushStamp->setStamp(5);
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
using UniqueIH = std::unique_ptr<IndirectHeap>;
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
UniqueIH(ioh), UniqueIH(ssh));
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
std::vector<Surface *> v;
auto cmd = new CommandComputeKernel(*pCmdQ, csr, std::unique_ptr<KernelOperation>(blockedCommandsData), v, false, false, false, nullptr, preemptionMode);

View File

@@ -76,7 +76,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchScheduler) {
*pCmdQ,
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler);
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
@@ -192,7 +194,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQ
*pCmdQ,
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler);
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
@@ -223,7 +227,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToF
*pCmdQ,
mockDevQueue,
device->getPreemptionMode(),
scheduler);
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);

View File

@@ -20,6 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_queue/gpgpu_walker.h"
#include "runtime/event/hw_timestamps.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/task_information.h"
@@ -65,18 +66,18 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
}
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
indirectStateSetup = true;
return BaseClass::setupIndirectState(surfaceStateHeap, parentKernel, parentIDCount);
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
}
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
cleanupSectionAdded = true;
timestampAddedInCleanupSection = hwTimeStamp;
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
}
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override {
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
schedulerDispatched = true;
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode);
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode, ssh, dsh);
}
uint32_t criticalSectioncheckCounter = 0;
@@ -98,17 +99,22 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
mockDevQueue.acquireEMCriticalSection();
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -124,7 +130,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
}
}
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenDeviceQueueDshIsUsed) {
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
if (device->getSupportedClVersion() >= 20) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*device);
@@ -135,14 +141,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
// add initial offset of colorCalState
dsh->getSpace(DeviceQueue::colorCalcStateSize);
uint64_t ValueToFillDsh = 5;
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
// Fill Interface Descriptor Data
*dshVal = ValueToFillDsh;
@@ -155,15 +166,15 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
*dshVal = ValueToFillDsh;
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
uint64_t *devQueueDshValue = (uint64_t *)dshOfDevQueue->getSpace(0);
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
@@ -175,13 +186,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
cmdComputeKernel->submit(0, false);
//device queue dsh is not changed
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
EXPECT_EQ(mockDevQueue.getDshOffset() + sizeof(uint64_t), usedDSHAfterSubmit);
EXPECT_EQ(ValueToFillDsh, *devQueueDshValue);
uint64_t *devQueueDshParent = (uint64_t *)ptrOffset((char *)dshOfDevQueue->getCpuBase(), mockDevQueue.getDshOffset());
EXPECT_EQ(ValueToFillDsh, *devQueueDshParent);
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
delete cmdComputeKernel;
delete parentKernel;
@@ -197,15 +204,20 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
@@ -234,15 +246,18 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
@@ -274,15 +289,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
@@ -301,7 +320,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
}
}
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmittedThenNewSSHIsAllocated) {
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
if (device->getSupportedClVersion() >= 20) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*device);
@@ -314,20 +333,30 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
// use some SSH
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE).getSpace(4);
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
size_t usedSize = 4u;
queueSsh.getSpace(usedSize);
queueDsh.getSpace(usedSize);
queueIoh.getSpace(usedSize);
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -337,7 +366,10 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
cmdComputeKernel->submit(0, false);
EXPECT_TRUE(cmdQ.releaseIndirectHeapCalled);
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
EXPECT_EQ(usedSize, queueDsh.getUsed());
EXPECT_EQ(usedSize, queueIoh.getUsed());
EXPECT_EQ(usedSize, queueSsh.getUsed());
delete cmdComputeKernel;
delete parentKernel;
@@ -355,14 +387,14 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
size_t heapSize = 20;
size_t alignement = 64;
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
size_t sshSize = 1000;
IndirectHeap *ssh = new IndirectHeap(alignedMalloc(sshSize, 4096), sshSize);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
EXPECT_EQ(0u, ssh->getUsed());
@@ -372,8 +404,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
std::unique_ptr<IndirectHeap>(dsh),
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
std::unique_ptr<IndirectHeap>(ssh));
std::unique_ptr<IndirectHeap>(ioh),
std::unique_ptr<IndirectHeap>(ssh),
*pCmdQ->getDevice().getMemoryManager());
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -391,3 +424,43 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
delete parentKernel;
}
}
HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
if (device->getSupportedClVersion() >= 20) {
cl_queue_properties properties[3] = {0};
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*device));
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
*parentKernel,
1,
globalOffsets,
workItems,
nullptr,
0,
nullptr,
&blockedCommandsData,
nullptr,
nullptr,
device->getPreemptionMode(),
true);
EXPECT_NE(nullptr, blockedCommandsData);
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
delete blockedCommandsData;
}
}

View File

@@ -55,7 +55,9 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
*pCmdQ,
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler);
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);