mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
[26/n] Internal 4GB allocator.
- change the way we handle blocked commands. - instead of allocating CPU pointer and populating it with commands, create real IndirectHeap that may be later submitted to the GPU - that removes a lot of copy operations that were happening on submit time - for device enqueue, this requires dsh & shh to be passed directly to the underlying commands, in that scenario device queue buffers are not used Change-Id: I1124a8edbb46777ea7f7d3a5946f302e7fdf9665
This commit is contained in:
committed by
sys_ocldev
parent
100f559daa
commit
ffa9b097f5
@@ -239,35 +239,7 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
|
||||
}
|
||||
|
||||
if (!heapMemory) {
|
||||
size_t reservedSize = 0;
|
||||
auto finalHeapSize = defaultHeapSize;
|
||||
|
||||
minRequiredSize += reservedSize;
|
||||
|
||||
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
|
||||
|
||||
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
|
||||
|
||||
if (!heapMemory) {
|
||||
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
|
||||
} else {
|
||||
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
|
||||
}
|
||||
|
||||
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
|
||||
|
||||
if (IndirectHeap::SURFACE_STATE == heapType) {
|
||||
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
|
||||
finalHeapSize = maxSshSize;
|
||||
}
|
||||
|
||||
if (heap) {
|
||||
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
|
||||
heap->replaceGraphicsAllocation(heapMemory);
|
||||
} else {
|
||||
heap = new IndirectHeap(heapMemory);
|
||||
heap->overrideMaxSize(finalHeapSize);
|
||||
}
|
||||
allocateHeapMemory(heapType, minRequiredSize, heap);
|
||||
}
|
||||
|
||||
return *heap;
|
||||
@@ -650,4 +622,37 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
|
||||
size_t minRequiredSize, IndirectHeap *&indirectHeap) {
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
size_t reservedSize = 0;
|
||||
auto finalHeapSize = defaultHeapSize;
|
||||
|
||||
minRequiredSize += reservedSize;
|
||||
|
||||
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
|
||||
|
||||
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
|
||||
|
||||
if (!heapMemory) {
|
||||
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
|
||||
} else {
|
||||
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
|
||||
}
|
||||
|
||||
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
|
||||
|
||||
if (IndirectHeap::SURFACE_STATE == heapType) {
|
||||
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
|
||||
finalHeapSize = maxSshSize;
|
||||
}
|
||||
|
||||
if (indirectHeap) {
|
||||
indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
|
||||
indirectHeap->replaceGraphicsAllocation(heapMemory);
|
||||
} else {
|
||||
indirectHeap = new IndirectHeap(heapMemory);
|
||||
indirectHeap->overrideMaxSize(finalHeapSize);
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -336,6 +336,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
|
||||
size_t minRequiredSize = 0u);
|
||||
|
||||
void allocateHeapMemory(IndirectHeap::Type heapType,
|
||||
size_t minRequiredSize, IndirectHeap *&indirectHeap);
|
||||
|
||||
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
||||
|
||||
cl_command_queue_properties getCommandQueueProperties() const {
|
||||
|
||||
@@ -275,6 +275,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
multiDispatchInfo.begin()->getKernel(),
|
||||
(uint32_t)multiDispatchInfo.size(),
|
||||
taskCount,
|
||||
@@ -297,7 +298,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
*this,
|
||||
*devQueueHw,
|
||||
preemption,
|
||||
scheduler);
|
||||
scheduler,
|
||||
&getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
|
||||
@@ -119,13 +119,6 @@ inline cl_uint computeDimensions(const size_t workItems[3]) {
|
||||
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <typename SizeAndAllocCalcT, typename... CalcArgsT>
|
||||
IndirectHeap *allocateIndirectHeap(SizeAndAllocCalcT &&calc, CalcArgsT &&... args) {
|
||||
size_t alignment = MemoryConstants::pageSize;
|
||||
size_t size = calc(std::forward<CalcArgsT>(args)...);
|
||||
return new IndirectHeap(alignedMalloc(size, alignment), size);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
class GpgpuWalkerHelper {
|
||||
public:
|
||||
@@ -227,7 +220,9 @@ class GpgpuWalkerHelper {
|
||||
CommandQueue &commandQueue,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler);
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh);
|
||||
};
|
||||
|
||||
template <typename GfxFamily, uint32_t eventType>
|
||||
|
||||
@@ -458,20 +458,27 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
|
||||
using KCH = KernelCommandsHelper<GfxFamily>;
|
||||
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
|
||||
if (executionModelKernel) {
|
||||
uint32_t offsetDsh = commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset();
|
||||
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
|
||||
|
||||
dsh = allocateIndirectHeap([&multiDispatchInfo, offsetDsh] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo) + KCH::getTotalSizeRequiredIOH(multiDispatchInfo) + offsetDsh; });
|
||||
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
|
||||
commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(),
|
||||
dsh);
|
||||
|
||||
dsh->getSpace(colorCalcSize);
|
||||
ioh = dsh;
|
||||
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
|
||||
KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
|
||||
KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
|
||||
ssh);
|
||||
} else {
|
||||
dsh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo); });
|
||||
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
|
||||
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh);
|
||||
commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh);
|
||||
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh);
|
||||
}
|
||||
|
||||
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh));
|
||||
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
|
||||
*commandQueue.getDevice().getMemoryManager());
|
||||
if (executionModelKernel) {
|
||||
(*blockedCommandsData)->doNotFreeISH = true;
|
||||
}
|
||||
@@ -671,7 +678,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
CommandQueue &commandQueue,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler) {
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh) {
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
@@ -679,13 +688,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
OCLRT::LinearStream *commandStream = nullptr;
|
||||
OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
OCLRT::IndirectHeap *ioh = nullptr;
|
||||
|
||||
commandStream = &commandQueue.getCS(0);
|
||||
// note : below code assumes that caller to dispatchScheduler "preallocated" memory
|
||||
// required for execution model in below heap managers
|
||||
dsh = devQueueHw.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ssh = &commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE);
|
||||
|
||||
bool dcFlush = false;
|
||||
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
|
||||
|
||||
@@ -156,12 +156,12 @@ void DeviceQueue::initDeviceQueue() {
|
||||
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
||||
}
|
||||
|
||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
||||
setupIndirectState(surfaceStateHeap, parentKernel, parentCount);
|
||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
||||
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
||||
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||
}
|
||||
|
||||
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -173,7 +173,7 @@ void DeviceQueue::resetDeviceQueue() {
|
||||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
||||
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -81,9 +81,9 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
||||
|
||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
|
||||
|
||||
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
||||
@@ -93,7 +93,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
||||
}
|
||||
|
||||
virtual void resetDeviceQueue();
|
||||
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode);
|
||||
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh);
|
||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||
|
||||
void acquireEMCriticalSection() {
|
||||
|
||||
@@ -72,11 +72,11 @@ class DeviceQueueHw : public DeviceQueue {
|
||||
|
||||
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override;
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
||||
|
||||
uint32_t getSchedulerReturnInstance() {
|
||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||
|
||||
@@ -290,11 +290,8 @@ IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type)
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
void *pDSH = dshBuffer->getUnderlyingBuffer();
|
||||
|
||||
// Heap and dshBuffer shoud be the same if heap is created
|
||||
DEBUG_BREAK_IF(!((heaps[IndirectHeap::DYNAMIC_STATE] == nullptr) || (heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase() == pDSH)));
|
||||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
void *pDSH = dynamicStateHeap.getCpuBase();
|
||||
|
||||
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
@@ -386,11 +383,13 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(cmdQ,
|
||||
*this,
|
||||
preemptionMode,
|
||||
scheduler);
|
||||
scheduler,
|
||||
ssh,
|
||||
dsh);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
@@ -35,13 +36,14 @@
|
||||
|
||||
namespace OCLRT {
|
||||
KernelOperation::~KernelOperation() {
|
||||
alignedFree(dsh->getCpuBase());
|
||||
if (doNotFreeISH) {
|
||||
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(dsh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||
if (ioh.get() == dsh.get()) {
|
||||
ioh.release();
|
||||
} else {
|
||||
alignedFree(ioh->getCpuBase());
|
||||
}
|
||||
alignedFree(ssh->getCpuBase());
|
||||
if (ioh) {
|
||||
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ioh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||
}
|
||||
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ssh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||
alignedFree(commandStream->getCpuBase());
|
||||
}
|
||||
|
||||
@@ -163,43 +165,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
//transfer the memory to commandStream of the queue.
|
||||
memcpy_s(pDst, commandsSize, commandStream.getCpuBase(), commandsSize);
|
||||
|
||||
size_t requestedDshSize = kernelOperation->dsh->getUsed();
|
||||
size_t requestedIohSize = kernelOperation->ioh->getUsed();
|
||||
size_t requestedSshSize = kernelOperation->ssh->getUsed() + kernelOperation->surfaceStateHeapSizeEM;
|
||||
|
||||
IndirectHeap *dsh = nullptr;
|
||||
IndirectHeap *ioh = nullptr;
|
||||
|
||||
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE};
|
||||
|
||||
for (auto trackedHeap = 0u; trackedHeap < ARRAY_COUNT(trackedHeaps); trackedHeap++) {
|
||||
if (commandQueue.getIndirectHeap(trackedHeaps[trackedHeap], 0).getUsed() > 0) {
|
||||
commandQueue.releaseIndirectHeap(trackedHeaps[trackedHeap]);
|
||||
}
|
||||
}
|
||||
|
||||
if (executionModelKernel) {
|
||||
dsh = devQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
|
||||
ioh = dsh;
|
||||
|
||||
memcpy_s(dsh->getSpace(0), dsh->getAvailableSpace(), ptrOffset(kernelOperation->dsh->getCpuBase(), devQueue->colorCalcStateSize), kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
|
||||
dsh->getSpace(kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
|
||||
} else {
|
||||
dsh = &commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, requestedDshSize);
|
||||
ioh = &commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, requestedIohSize);
|
||||
|
||||
memcpy_s(dsh->getCpuBase(), requestedDshSize, kernelOperation->dsh->getCpuBase(), kernelOperation->dsh->getUsed());
|
||||
dsh->getSpace(requestedDshSize);
|
||||
|
||||
memcpy_s(ioh->getCpuBase(), requestedIohSize, kernelOperation->ioh->getCpuBase(), kernelOperation->ioh->getUsed());
|
||||
ioh->getSpace(requestedIohSize);
|
||||
}
|
||||
|
||||
IndirectHeap &ssh = commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, requestedSshSize);
|
||||
|
||||
memcpy_s(ssh.getCpuBase(), requestedSshSize, kernelOperation->ssh->getCpuBase(), kernelOperation->ssh->getUsed());
|
||||
ssh.getSpace(kernelOperation->ssh->getUsed());
|
||||
IndirectHeap *dsh = kernelOperation->dsh.get();
|
||||
IndirectHeap *ioh = kernelOperation->ioh.get();
|
||||
IndirectHeap *ssh = kernelOperation->ssh.get();
|
||||
|
||||
auto requiresCoherency = false;
|
||||
for (auto &surface : surfaces) {
|
||||
@@ -214,7 +182,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
if (executionModelKernel) {
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueue->setupExecutionModelDispatch(ssh, kernel, kernelCount, taskCount, timestamp);
|
||||
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount, taskCount, timestamp);
|
||||
|
||||
BuiltIns &builtIns = BuiltIns::getInstance();
|
||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
|
||||
@@ -223,16 +191,18 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
devQueue->getStackBuffer(),
|
||||
devQueue->getEventPoolBuffer(),
|
||||
devQueue->getSlbBuffer(),
|
||||
devQueue->getDshBuffer(),
|
||||
dsh->getGraphicsAllocation(),
|
||||
kernel->getKernelReflectionSurface(),
|
||||
devQueue->getQueueStorageBuffer(),
|
||||
ssh.getGraphicsAllocation(),
|
||||
ssh->getGraphicsAllocation(),
|
||||
devQueue->getDebugQueue());
|
||||
|
||||
devQueue->dispatchScheduler(
|
||||
commandQueue,
|
||||
scheduler,
|
||||
preemptionMode);
|
||||
preemptionMode,
|
||||
ssh,
|
||||
dsh);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
@@ -261,14 +231,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
offset,
|
||||
*dsh,
|
||||
*ioh,
|
||||
ssh,
|
||||
*ssh,
|
||||
taskLevel,
|
||||
dispatchFlags);
|
||||
for (auto &surface : surfaces) {
|
||||
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
|
||||
}
|
||||
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
|
||||
|
||||
if (printfHandler) {
|
||||
printfHandler.get()->printEnqueueOutput();
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ class MemObj;
|
||||
class Surface;
|
||||
class PrintfHandler;
|
||||
struct HwTimeStamps;
|
||||
class MemoryManager;
|
||||
|
||||
enum MapOperationType {
|
||||
MAP,
|
||||
@@ -77,10 +78,11 @@ class CommandMapUnmap : public Command {
|
||||
};
|
||||
|
||||
struct KernelOperation {
|
||||
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh)
|
||||
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh,
|
||||
MemoryManager &memoryManager)
|
||||
: commandStream(std::move(commandStream)), dsh(std::move(dsh)),
|
||||
ioh(std::move(ioh)), ssh(std::move(ssh)),
|
||||
surfaceStateHeapSizeEM(0), doNotFreeISH(false) {
|
||||
surfaceStateHeapSizeEM(0), doNotFreeISH(false), memoryManager(memoryManager) {
|
||||
}
|
||||
|
||||
~KernelOperation();
|
||||
@@ -92,6 +94,7 @@ struct KernelOperation {
|
||||
|
||||
size_t surfaceStateHeapSizeEM;
|
||||
bool doNotFreeISH;
|
||||
MemoryManager &memoryManager;
|
||||
};
|
||||
|
||||
class CommandComputeKernel : public Command {
|
||||
|
||||
@@ -392,95 +392,55 @@ HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIs
|
||||
}
|
||||
|
||||
typedef CommandQueueHwTest BlockedCommandQueueTest;
|
||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUsedHeapsWhenBlockedCommandIsBeingSubmittedItReloadsThemToZeroToKeepProperOffsets) {
|
||||
DebugManagerStateRestore debugStateRestore;
|
||||
bool oldMemsetAllocationsFlag = MemoryManagement::memsetNewAllocations;
|
||||
MemoryManagement::memsetNewAllocations = true;
|
||||
|
||||
DebugManager.flags.ForcePreemptionMode.set(-1); // allow default preemption mode
|
||||
auto deviceWithDefaultPreemptionMode = std::unique_ptr<MockDevice>(DeviceHelper<>::create(nullptr));
|
||||
this->pDevice->setPreemptionMode(deviceWithDefaultPreemptionMode->getPreemptionMode());
|
||||
this->pDevice->getCommandStreamReceiver().setPreemptionCsrAllocation(deviceWithDefaultPreemptionMode->getPreemptionAllocation());
|
||||
|
||||
DebugManager.flags.DisableResourceRecycling.set(true);
|
||||
|
||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
|
||||
UserEvent userEvent(context);
|
||||
cl_event blockedEvent = &userEvent;
|
||||
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||
mockKernelWithInternals.kernelHeader.KernelHeapSize = sizeof(mockKernelWithInternals.kernelIsa);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
|
||||
IndirectHeap::Type heaps[] = {IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE, IndirectHeap::SURFACE_STATE};
|
||||
|
||||
size_t prealocatedHeapSize = 2 * 64 * KB;
|
||||
for (auto heapType : heaps) {
|
||||
auto &heap = pCmdQ->getIndirectHeap(heapType, prealocatedHeapSize);
|
||||
heap.getSpace(16);
|
||||
memset(heap.getCpuBase(), 0, prealocatedHeapSize);
|
||||
}
|
||||
|
||||
// preallocating memsetted allocations to get predictable results
|
||||
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
|
||||
DebugManager.flags.DisableResourceRecycling.set(false);
|
||||
|
||||
std::set<void *> reusableHeaps;
|
||||
for (unsigned int i = 0; i < 4; ++i) {
|
||||
auto allocSize = prealocatedHeapSize;
|
||||
void *mem = alignedMalloc(allocSize, 64);
|
||||
reusableHeaps.insert(mem);
|
||||
memset(mem, 0, allocSize);
|
||||
std::unique_ptr<GraphicsAllocation> reusableAlloc{new MockGraphicsAllocation(mem, allocSize)};
|
||||
pCmdQ->getDevice().getMemoryManager()->storeAllocation(std::move(reusableAlloc), REUSABLE_ALLOCATION);
|
||||
}
|
||||
|
||||
// disable further allocation reuse
|
||||
DebugManager.flags.DisableResourceRecycling.set(true);
|
||||
|
||||
size_t offset = 0;
|
||||
size_t size = 1;
|
||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // blocked command
|
||||
|
||||
cl_event blockedEvent = &userEvent;
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
// make sure used heaps are from preallocated pool
|
||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0).getCpuBase()));
|
||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0).getCpuBase()));
|
||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getCpuBase()));
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
|
||||
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
|
||||
|
||||
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
|
||||
std::unordered_map<int, std::vector<char>> blockedCommandHeaps;
|
||||
int i = 0;
|
||||
for (auto heapType : heaps) {
|
||||
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
|
||||
blockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
|
||||
EXPECT_EQ(0u, ioh.getUsed());
|
||||
EXPECT_EQ(0u, dsh.getUsed());
|
||||
EXPECT_EQ(0u, ssh.getUsed());
|
||||
}
|
||||
|
||||
// prepare new heaps for nonblocked command
|
||||
pCmdQ->releaseIndirectHeap(heapType);
|
||||
++i;
|
||||
}
|
||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
|
||||
UserEvent userEvent(context);
|
||||
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); // nonblocked command
|
||||
i = 0;
|
||||
std::unordered_map<int, std::vector<char>> nonblockedCommandHeaps;
|
||||
for (auto heapType : heaps) {
|
||||
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
|
||||
nonblockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
|
||||
++i;
|
||||
}
|
||||
size_t offset = 0;
|
||||
size_t size = 1;
|
||||
|
||||
// expecting blocked command to be programmed indentically to a non-blocked counterpart
|
||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)],
|
||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)]));
|
||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)],
|
||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)]));
|
||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)],
|
||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)]));
|
||||
cl_event blockedEvent = &userEvent;
|
||||
|
||||
for (auto ptr : reusableHeaps) {
|
||||
alignedFree(ptr);
|
||||
}
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
|
||||
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
|
||||
|
||||
BuiltIns::shutDown();
|
||||
MemoryManagement::memsetNewAllocations = oldMemsetAllocationsFlag;
|
||||
auto spaceToUse = 4u;
|
||||
|
||||
ioh.getSpace(spaceToUse);
|
||||
dsh.getSpace(spaceToUse);
|
||||
ssh.getSpace(spaceToUse);
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
EXPECT_EQ(spaceToUse, ioh.getUsed());
|
||||
EXPECT_EQ(spaceToUse, dsh.getUsed());
|
||||
EXPECT_EQ(spaceToUse, ssh.getUsed());
|
||||
}
|
||||
|
||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {
|
||||
|
||||
@@ -616,6 +616,33 @@ TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalle
|
||||
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, indirectHeapAllocation->getAllocationType());
|
||||
}
|
||||
|
||||
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledThenHeapIsCreated) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
CommandQueue cmdQ(&context, pDevice, props);
|
||||
|
||||
IndirectHeap *indirectHeap = nullptr;
|
||||
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
|
||||
EXPECT_NE(nullptr, indirectHeap);
|
||||
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
|
||||
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
|
||||
delete indirectHeap;
|
||||
}
|
||||
|
||||
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledWithAlreadyAllocatedHeapThenGraphicsAllocationIsCreated) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
CommandQueue cmdQ(&context, pDevice, props);
|
||||
|
||||
IndirectHeap heap(nullptr, 100);
|
||||
|
||||
IndirectHeap *indirectHeap = &heap;
|
||||
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
|
||||
EXPECT_EQ(&heap, indirectHeap);
|
||||
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
|
||||
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
Device,
|
||||
CommandQueueIndirectHeapTest,
|
||||
|
||||
@@ -710,9 +710,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
|
||||
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
|
||||
|
||||
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||
|
||||
delete blockedCommandsData;
|
||||
}
|
||||
@@ -745,9 +745,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
|
||||
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
||||
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||
|
||||
delete blockedCommandsData;
|
||||
}
|
||||
|
||||
@@ -534,7 +534,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectState) {
|
||||
auto usedBeforeSSH = ssh->getUsed();
|
||||
auto usedBeforeDSH = dsh->getUsed();
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, pKernel, 1);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1);
|
||||
auto usedAfterSSH = ssh->getUsed();
|
||||
auto usedAfterDSH = dsh->getUsed();
|
||||
|
||||
@@ -564,7 +564,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectStartBlockID) {
|
||||
|
||||
uint32_t parentCount = 4;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||
@@ -594,7 +594,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectDSHValues) {
|
||||
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
|
||||
@@ -40,7 +40,8 @@ TEST(DeviceQueueSimpleTest, setupExecutionModelDispatchDoesNothing) {
|
||||
|
||||
size_t size = 20;
|
||||
IndirectHeap ssh(buffer, size);
|
||||
devQueue.setupExecutionModelDispatch(ssh, nullptr, 0, 0, 0);
|
||||
IndirectHeap dsh(buffer, size);
|
||||
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0);
|
||||
|
||||
EXPECT_EQ(0u, ssh.getUsed());
|
||||
|
||||
@@ -320,7 +321,7 @@ TEST_F(DeviceQueueTest, dispatchScheduler) {
|
||||
CommandQueue cmdQ(nullptr, nullptr, 0);
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
|
||||
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode());
|
||||
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode(), nullptr, nullptr);
|
||||
delete kernel;
|
||||
}
|
||||
|
||||
|
||||
@@ -449,60 +449,18 @@ class SurfaceMock : public Surface {
|
||||
SurfaceMock(SurfaceMock *parent) : parent(parent){};
|
||||
};
|
||||
|
||||
TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger) {
|
||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||
IndirectHeap &cmdQueueDsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096);
|
||||
IndirectHeap &cmdQueueIoh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096);
|
||||
IndirectHeap &cmdQueueSsh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096);
|
||||
|
||||
auto requestedSize = cmdQueueDsh.getMaxAvailableSpace() * 2;
|
||||
auto cmdStream = new LinearStream(alignedMalloc(requestedSize, requestedSize), requestedSize);
|
||||
|
||||
auto createFullHeap = [](size_t size) {
|
||||
auto heap = new IndirectHeap(alignedMalloc(size, size), size);
|
||||
heap->getSpace(heap->getAvailableSpace());
|
||||
return heap;
|
||||
};
|
||||
|
||||
auto dsh = createFullHeap(requestedSize);
|
||||
auto ioh = createFullHeap(requestedSize);
|
||||
auto ssh = createFullHeap(maxSshSize);
|
||||
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||
UniqueIH(ioh), UniqueIH(ssh));
|
||||
std::vector<Surface *> v;
|
||||
SurfaceMock *surface = new SurfaceMock;
|
||||
v.push_back(surface);
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmdComputeKernel = new CommandComputeKernel(*pCmdQ, pDevice->getCommandStreamReceiver(),
|
||||
std::unique_ptr<KernelOperation>(kernelOperation), v, false, false, false, nullptr, preemptionMode);
|
||||
|
||||
EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
|
||||
EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
|
||||
EXPECT_EQ(maxSshSize, ssh->getMaxAvailableSpace());
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_GE(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
|
||||
EXPECT_GE(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
|
||||
EXPECT_GE(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace());
|
||||
|
||||
delete pCmdQ;
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, processBlockedCommandsKernelOperation) {
|
||||
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||
|
||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||
UniqueIH(ioh), UniqueIH(ssh));
|
||||
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
auto &csr = pDevice->getCommandStreamReceiver();
|
||||
std::vector<Surface *> v;
|
||||
@@ -534,12 +492,13 @@ TEST_F(InternalsEventTest, processBlockedCommandsAbortKernelOperation) {
|
||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||
|
||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||
UniqueIH(ioh), UniqueIH(ssh));
|
||||
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
auto &csr = pDevice->getCommandStreamReceiver();
|
||||
std::vector<Surface *> v;
|
||||
@@ -565,12 +524,13 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
|
||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||
|
||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||
UniqueIH(ioh), UniqueIH(ssh));
|
||||
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
|
||||
pPrintfSurface->DataParamOffset = 0;
|
||||
@@ -1477,12 +1437,13 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
|
||||
csr.flushStamp->setStamp(5);
|
||||
|
||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||
UniqueIH(ioh), UniqueIH(ssh));
|
||||
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
std::vector<Surface *> v;
|
||||
auto cmd = new CommandComputeKernel(*pCmdQ, csr, std::unique_ptr<KernelOperation>(blockedCommandsData), v, false, false, false, nullptr, preemptionMode);
|
||||
|
||||
@@ -76,7 +76,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchScheduler) {
|
||||
*pCmdQ,
|
||||
*pDevQueueHw,
|
||||
pDevice->getPreemptionMode(),
|
||||
scheduler);
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
|
||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
|
||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
|
||||
@@ -192,7 +194,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQ
|
||||
*pCmdQ,
|
||||
*pDevQueueHw,
|
||||
pDevice->getPreemptionMode(),
|
||||
scheduler);
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
||||
|
||||
@@ -223,7 +227,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToF
|
||||
*pCmdQ,
|
||||
mockDevQueue,
|
||||
device->getPreemptionMode(),
|
||||
scheduler);
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "runtime/event/hw_timestamps.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/task_information.h"
|
||||
@@ -65,18 +66,18 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
|
||||
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
||||
}
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
|
||||
indirectStateSetup = true;
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, parentKernel, parentIDCount);
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
||||
}
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
|
||||
cleanupSectionAdded = true;
|
||||
timestampAddedInCleanupSection = hwTimeStamp;
|
||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||
}
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override {
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
||||
schedulerDispatched = true;
|
||||
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode);
|
||||
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode, ssh, dsh);
|
||||
}
|
||||
|
||||
uint32_t criticalSectioncheckCounter = 0;
|
||||
@@ -98,17 +99,22 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
|
||||
mockDevQueue.acquireEMCriticalSection();
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
@@ -124,7 +130,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenDeviceQueueDshIsUsed) {
|
||||
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
|
||||
if (device->getSupportedClVersion() >= 20) {
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
||||
@@ -135,14 +141,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
// add initial offset of colorCalState
|
||||
dsh->getSpace(DeviceQueue::colorCalcStateSize);
|
||||
|
||||
uint64_t ValueToFillDsh = 5;
|
||||
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
|
||||
|
||||
// Fill Interface Descriptor Data
|
||||
*dshVal = ValueToFillDsh;
|
||||
|
||||
@@ -155,15 +166,15 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
*dshVal = ValueToFillDsh;
|
||||
|
||||
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
|
||||
uint64_t *devQueueDshValue = (uint64_t *)dshOfDevQueue->getSpace(0);
|
||||
|
||||
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
@@ -175,13 +186,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
//device queue dsh is not changed
|
||||
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
|
||||
|
||||
EXPECT_EQ(mockDevQueue.getDshOffset() + sizeof(uint64_t), usedDSHAfterSubmit);
|
||||
EXPECT_EQ(ValueToFillDsh, *devQueueDshValue);
|
||||
|
||||
uint64_t *devQueueDshParent = (uint64_t *)ptrOffset((char *)dshOfDevQueue->getCpuBase(), mockDevQueue.getDshOffset());
|
||||
EXPECT_EQ(ValueToFillDsh, *devQueueDshParent);
|
||||
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
delete parentKernel;
|
||||
@@ -197,15 +204,20 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
@@ -234,15 +246,18 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
@@ -274,15 +289,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
@@ -301,7 +320,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmittedThenNewSSHIsAllocated) {
|
||||
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
|
||||
if (device->getSupportedClVersion() >= 20) {
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
||||
@@ -314,20 +333,30 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||
// use some SSH
|
||||
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE).getSpace(4);
|
||||
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
|
||||
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
|
||||
|
||||
size_t usedSize = 4u;
|
||||
|
||||
queueSsh.getSpace(usedSize);
|
||||
queueDsh.getSpace(usedSize);
|
||||
queueIoh.getSpace(usedSize);
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
@@ -337,7 +366,10 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_TRUE(cmdQ.releaseIndirectHeapCalled);
|
||||
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
|
||||
EXPECT_EQ(usedSize, queueDsh.getUsed());
|
||||
EXPECT_EQ(usedSize, queueIoh.getUsed());
|
||||
EXPECT_EQ(usedSize, queueSsh.getUsed());
|
||||
|
||||
delete cmdComputeKernel;
|
||||
delete parentKernel;
|
||||
@@ -355,14 +387,14 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t alignement = 64;
|
||||
|
||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
size_t sshSize = 1000;
|
||||
IndirectHeap *ssh = new IndirectHeap(alignedMalloc(sshSize, 4096), sshSize);
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
EXPECT_EQ(0u, ssh->getUsed());
|
||||
|
||||
@@ -372,8 +404,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
||||
|
||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||
std::unique_ptr<IndirectHeap>(dsh),
|
||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
||||
std::unique_ptr<IndirectHeap>(ssh));
|
||||
std::unique_ptr<IndirectHeap>(ioh),
|
||||
std::unique_ptr<IndirectHeap>(ssh),
|
||||
*pCmdQ->getDevice().getMemoryManager());
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
@@ -391,3 +424,43 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
||||
delete parentKernel;
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
|
||||
if (device->getSupportedClVersion() >= 20) {
|
||||
cl_queue_properties properties[3] = {0};
|
||||
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*device));
|
||||
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
||||
*parentKernel,
|
||||
1,
|
||||
globalOffsets,
|
||||
workItems,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
&blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
device->getPreemptionMode(),
|
||||
true);
|
||||
|
||||
EXPECT_NE(nullptr, blockedCommandsData);
|
||||
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
|
||||
|
||||
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
|
||||
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
|
||||
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
|
||||
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
|
||||
|
||||
delete blockedCommandsData;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,7 +55,9 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
|
||||
*pCmdQ,
|
||||
*pDevQueueHw,
|
||||
pDevice->getPreemptionMode(),
|
||||
scheduler);
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
|
||||
Reference in New Issue
Block a user