mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
[26/n] Internal 4GB allocator.
- change the way we handle blocked commands. - instead of allocating CPU pointer and populating it with commands, create real IndirectHeap that may be later submitted to the GPU - that removes a lot of copy operations that were happening on submit time - for device enqueue, this requires dsh & shh to be passed directly to the underlying commands, in that scenario device queue buffers are not used Change-Id: I1124a8edbb46777ea7f7d3a5946f302e7fdf9665
This commit is contained in:
committed by
sys_ocldev
parent
100f559daa
commit
ffa9b097f5
@@ -239,35 +239,7 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!heapMemory) {
|
if (!heapMemory) {
|
||||||
size_t reservedSize = 0;
|
allocateHeapMemory(heapType, minRequiredSize, heap);
|
||||||
auto finalHeapSize = defaultHeapSize;
|
|
||||||
|
|
||||||
minRequiredSize += reservedSize;
|
|
||||||
|
|
||||||
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
|
|
||||||
|
|
||||||
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
|
|
||||||
|
|
||||||
if (!heapMemory) {
|
|
||||||
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
|
|
||||||
} else {
|
|
||||||
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
|
|
||||||
|
|
||||||
if (IndirectHeap::SURFACE_STATE == heapType) {
|
|
||||||
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
|
|
||||||
finalHeapSize = maxSshSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (heap) {
|
|
||||||
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
|
|
||||||
heap->replaceGraphicsAllocation(heapMemory);
|
|
||||||
} else {
|
|
||||||
heap = new IndirectHeap(heapMemory);
|
|
||||||
heap->overrideMaxSize(finalHeapSize);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return *heap;
|
return *heap;
|
||||||
@@ -650,4 +622,37 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
|
||||||
|
size_t minRequiredSize, IndirectHeap *&indirectHeap) {
|
||||||
|
auto memoryManager = device->getMemoryManager();
|
||||||
|
size_t reservedSize = 0;
|
||||||
|
auto finalHeapSize = defaultHeapSize;
|
||||||
|
|
||||||
|
minRequiredSize += reservedSize;
|
||||||
|
|
||||||
|
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
|
||||||
|
|
||||||
|
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
|
||||||
|
|
||||||
|
if (!heapMemory) {
|
||||||
|
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
|
||||||
|
} else {
|
||||||
|
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
|
||||||
|
|
||||||
|
if (IndirectHeap::SURFACE_STATE == heapType) {
|
||||||
|
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
|
||||||
|
finalHeapSize = maxSshSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indirectHeap) {
|
||||||
|
indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
|
||||||
|
indirectHeap->replaceGraphicsAllocation(heapMemory);
|
||||||
|
} else {
|
||||||
|
indirectHeap = new IndirectHeap(heapMemory);
|
||||||
|
indirectHeap->overrideMaxSize(finalHeapSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|||||||
@@ -336,6 +336,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
|
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
|
||||||
size_t minRequiredSize = 0u);
|
size_t minRequiredSize = 0u);
|
||||||
|
|
||||||
|
void allocateHeapMemory(IndirectHeap::Type heapType,
|
||||||
|
size_t minRequiredSize, IndirectHeap *&indirectHeap);
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
||||||
|
|
||||||
cl_command_queue_properties getCommandQueueProperties() const {
|
cl_command_queue_properties getCommandQueueProperties() const {
|
||||||
|
|||||||
@@ -275,6 +275,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
|
|
||||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||||
|
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||||
multiDispatchInfo.begin()->getKernel(),
|
multiDispatchInfo.begin()->getKernel(),
|
||||||
(uint32_t)multiDispatchInfo.size(),
|
(uint32_t)multiDispatchInfo.size(),
|
||||||
taskCount,
|
taskCount,
|
||||||
@@ -297,7 +298,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
*this,
|
*this,
|
||||||
*devQueueHw,
|
*devQueueHw,
|
||||||
preemption,
|
preemption,
|
||||||
scheduler);
|
scheduler,
|
||||||
|
&getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||||
|
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
scheduler.makeResident(commandStreamReceiver);
|
scheduler.makeResident(commandStreamReceiver);
|
||||||
|
|
||||||
|
|||||||
@@ -119,13 +119,6 @@ inline cl_uint computeDimensions(const size_t workItems[3]) {
|
|||||||
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
|
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename SizeAndAllocCalcT, typename... CalcArgsT>
|
|
||||||
IndirectHeap *allocateIndirectHeap(SizeAndAllocCalcT &&calc, CalcArgsT &&... args) {
|
|
||||||
size_t alignment = MemoryConstants::pageSize;
|
|
||||||
size_t size = calc(std::forward<CalcArgsT>(args)...);
|
|
||||||
return new IndirectHeap(alignedMalloc(size, alignment), size);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
class GpgpuWalkerHelper {
|
class GpgpuWalkerHelper {
|
||||||
public:
|
public:
|
||||||
@@ -227,7 +220,9 @@ class GpgpuWalkerHelper {
|
|||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||||
PreemptionMode preemptionMode,
|
PreemptionMode preemptionMode,
|
||||||
SchedulerKernel &scheduler);
|
SchedulerKernel &scheduler,
|
||||||
|
IndirectHeap *ssh,
|
||||||
|
IndirectHeap *dsh);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily, uint32_t eventType>
|
template <typename GfxFamily, uint32_t eventType>
|
||||||
|
|||||||
@@ -458,20 +458,27 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
|
|||||||
using KCH = KernelCommandsHelper<GfxFamily>;
|
using KCH = KernelCommandsHelper<GfxFamily>;
|
||||||
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
|
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
|
||||||
if (executionModelKernel) {
|
if (executionModelKernel) {
|
||||||
uint32_t offsetDsh = commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset();
|
|
||||||
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
|
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
|
||||||
|
|
||||||
dsh = allocateIndirectHeap([&multiDispatchInfo, offsetDsh] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo) + KCH::getTotalSizeRequiredIOH(multiDispatchInfo) + offsetDsh; });
|
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
|
||||||
|
commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(),
|
||||||
|
dsh);
|
||||||
|
|
||||||
dsh->getSpace(colorCalcSize);
|
dsh->getSpace(colorCalcSize);
|
||||||
ioh = dsh;
|
ioh = dsh;
|
||||||
|
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
|
||||||
|
KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
|
||||||
|
KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
|
||||||
|
ssh);
|
||||||
} else {
|
} else {
|
||||||
dsh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo); });
|
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh);
|
||||||
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
|
commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh);
|
||||||
|
commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh);
|
||||||
}
|
}
|
||||||
|
|
||||||
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
|
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||||
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh));
|
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
|
||||||
|
*commandQueue.getDevice().getMemoryManager());
|
||||||
if (executionModelKernel) {
|
if (executionModelKernel) {
|
||||||
(*blockedCommandsData)->doNotFreeISH = true;
|
(*blockedCommandsData)->doNotFreeISH = true;
|
||||||
}
|
}
|
||||||
@@ -671,7 +678,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||||
PreemptionMode preemptionMode,
|
PreemptionMode preemptionMode,
|
||||||
SchedulerKernel &scheduler) {
|
SchedulerKernel &scheduler,
|
||||||
|
IndirectHeap *ssh,
|
||||||
|
IndirectHeap *dsh) {
|
||||||
|
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||||
@@ -679,13 +688,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||||
|
|
||||||
OCLRT::LinearStream *commandStream = nullptr;
|
OCLRT::LinearStream *commandStream = nullptr;
|
||||||
OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
OCLRT::IndirectHeap *ioh = nullptr;
|
||||||
|
|
||||||
commandStream = &commandQueue.getCS(0);
|
commandStream = &commandQueue.getCS(0);
|
||||||
// note : below code assumes that caller to dispatchScheduler "preallocated" memory
|
|
||||||
// required for execution model in below heap managers
|
|
||||||
dsh = devQueueHw.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
|
||||||
ssh = &commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE);
|
|
||||||
|
|
||||||
bool dcFlush = false;
|
bool dcFlush = false;
|
||||||
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
|
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
|
||||||
|
|||||||
@@ -156,12 +156,12 @@ void DeviceQueue::initDeviceQueue() {
|
|||||||
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
||||||
setupIndirectState(surfaceStateHeap, parentKernel, parentCount);
|
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
||||||
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -173,7 +173,7 @@ void DeviceQueue::resetDeviceQueue() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -81,9 +81,9 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||||||
size_t paramValueSize, void *paramValue,
|
size_t paramValueSize, void *paramValue,
|
||||||
size_t *paramValueSizeRet);
|
size_t *paramValueSizeRet);
|
||||||
|
|
||||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
||||||
|
|
||||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
|
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
||||||
@@ -93,7 +93,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual void resetDeviceQueue();
|
virtual void resetDeviceQueue();
|
||||||
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode);
|
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh);
|
||||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||||
|
|
||||||
void acquireEMCriticalSection() {
|
void acquireEMCriticalSection() {
|
||||||
|
|||||||
@@ -72,11 +72,11 @@ class DeviceQueueHw : public DeviceQueue {
|
|||||||
|
|
||||||
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
||||||
|
|
||||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||||
|
|
||||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
||||||
void resetDeviceQueue() override;
|
void resetDeviceQueue() override;
|
||||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override;
|
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
||||||
|
|
||||||
uint32_t getSchedulerReturnInstance() {
|
uint32_t getSchedulerReturnInstance() {
|
||||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||||
|
|||||||
@@ -290,11 +290,8 @@ IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||||
void *pDSH = dshBuffer->getUnderlyingBuffer();
|
void *pDSH = dynamicStateHeap.getCpuBase();
|
||||||
|
|
||||||
// Heap and dshBuffer shoud be the same if heap is created
|
|
||||||
DEBUG_BREAK_IF(!((heaps[IndirectHeap::DYNAMIC_STATE] == nullptr) || (heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase() == pDSH)));
|
|
||||||
|
|
||||||
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
|
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
|
||||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||||
@@ -386,11 +383,13 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(cmdQ,
|
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(cmdQ,
|
||||||
*this,
|
*this,
|
||||||
preemptionMode,
|
preemptionMode,
|
||||||
scheduler);
|
scheduler,
|
||||||
|
ssh,
|
||||||
|
dsh);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include "runtime/device_queue/device_queue.h"
|
#include "runtime/device_queue/device_queue.h"
|
||||||
#include "runtime/gtpin/gtpin_notify.h"
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/mem_obj/mem_obj.h"
|
#include "runtime/mem_obj/mem_obj.h"
|
||||||
|
#include "runtime/memory_manager/memory_manager.h"
|
||||||
#include "runtime/memory_manager/surface.h"
|
#include "runtime/memory_manager/surface.h"
|
||||||
#include "runtime/helpers/aligned_memory.h"
|
#include "runtime/helpers/aligned_memory.h"
|
||||||
#include "runtime/helpers/string.h"
|
#include "runtime/helpers/string.h"
|
||||||
@@ -35,13 +36,14 @@
|
|||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
KernelOperation::~KernelOperation() {
|
KernelOperation::~KernelOperation() {
|
||||||
alignedFree(dsh->getCpuBase());
|
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(dsh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||||
if (doNotFreeISH) {
|
if (ioh.get() == dsh.get()) {
|
||||||
ioh.release();
|
ioh.release();
|
||||||
} else {
|
|
||||||
alignedFree(ioh->getCpuBase());
|
|
||||||
}
|
}
|
||||||
alignedFree(ssh->getCpuBase());
|
if (ioh) {
|
||||||
|
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ioh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||||
|
}
|
||||||
|
memoryManager.storeAllocation(std::unique_ptr<GraphicsAllocation>(ssh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||||
alignedFree(commandStream->getCpuBase());
|
alignedFree(commandStream->getCpuBase());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,43 +165,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
//transfer the memory to commandStream of the queue.
|
//transfer the memory to commandStream of the queue.
|
||||||
memcpy_s(pDst, commandsSize, commandStream.getCpuBase(), commandsSize);
|
memcpy_s(pDst, commandsSize, commandStream.getCpuBase(), commandsSize);
|
||||||
|
|
||||||
size_t requestedDshSize = kernelOperation->dsh->getUsed();
|
IndirectHeap *dsh = kernelOperation->dsh.get();
|
||||||
size_t requestedIohSize = kernelOperation->ioh->getUsed();
|
IndirectHeap *ioh = kernelOperation->ioh.get();
|
||||||
size_t requestedSshSize = kernelOperation->ssh->getUsed() + kernelOperation->surfaceStateHeapSizeEM;
|
IndirectHeap *ssh = kernelOperation->ssh.get();
|
||||||
|
|
||||||
IndirectHeap *dsh = nullptr;
|
|
||||||
IndirectHeap *ioh = nullptr;
|
|
||||||
|
|
||||||
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE};
|
|
||||||
|
|
||||||
for (auto trackedHeap = 0u; trackedHeap < ARRAY_COUNT(trackedHeaps); trackedHeap++) {
|
|
||||||
if (commandQueue.getIndirectHeap(trackedHeaps[trackedHeap], 0).getUsed() > 0) {
|
|
||||||
commandQueue.releaseIndirectHeap(trackedHeaps[trackedHeap]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (executionModelKernel) {
|
|
||||||
dsh = devQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
|
||||||
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
|
|
||||||
ioh = dsh;
|
|
||||||
|
|
||||||
memcpy_s(dsh->getSpace(0), dsh->getAvailableSpace(), ptrOffset(kernelOperation->dsh->getCpuBase(), devQueue->colorCalcStateSize), kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
|
|
||||||
dsh->getSpace(kernelOperation->dsh->getUsed() - devQueue->colorCalcStateSize);
|
|
||||||
} else {
|
|
||||||
dsh = &commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, requestedDshSize);
|
|
||||||
ioh = &commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, requestedIohSize);
|
|
||||||
|
|
||||||
memcpy_s(dsh->getCpuBase(), requestedDshSize, kernelOperation->dsh->getCpuBase(), kernelOperation->dsh->getUsed());
|
|
||||||
dsh->getSpace(requestedDshSize);
|
|
||||||
|
|
||||||
memcpy_s(ioh->getCpuBase(), requestedIohSize, kernelOperation->ioh->getCpuBase(), kernelOperation->ioh->getUsed());
|
|
||||||
ioh->getSpace(requestedIohSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
IndirectHeap &ssh = commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, requestedSshSize);
|
|
||||||
|
|
||||||
memcpy_s(ssh.getCpuBase(), requestedSshSize, kernelOperation->ssh->getCpuBase(), kernelOperation->ssh->getUsed());
|
|
||||||
ssh.getSpace(kernelOperation->ssh->getUsed());
|
|
||||||
|
|
||||||
auto requiresCoherency = false;
|
auto requiresCoherency = false;
|
||||||
for (auto &surface : surfaces) {
|
for (auto &surface : surfaces) {
|
||||||
@@ -214,7 +182,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
|
|
||||||
if (executionModelKernel) {
|
if (executionModelKernel) {
|
||||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||||
devQueue->setupExecutionModelDispatch(ssh, kernel, kernelCount, taskCount, timestamp);
|
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount, taskCount, timestamp);
|
||||||
|
|
||||||
BuiltIns &builtIns = BuiltIns::getInstance();
|
BuiltIns &builtIns = BuiltIns::getInstance();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
|
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
|
||||||
@@ -223,16 +191,18 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
devQueue->getStackBuffer(),
|
devQueue->getStackBuffer(),
|
||||||
devQueue->getEventPoolBuffer(),
|
devQueue->getEventPoolBuffer(),
|
||||||
devQueue->getSlbBuffer(),
|
devQueue->getSlbBuffer(),
|
||||||
devQueue->getDshBuffer(),
|
dsh->getGraphicsAllocation(),
|
||||||
kernel->getKernelReflectionSurface(),
|
kernel->getKernelReflectionSurface(),
|
||||||
devQueue->getQueueStorageBuffer(),
|
devQueue->getQueueStorageBuffer(),
|
||||||
ssh.getGraphicsAllocation(),
|
ssh->getGraphicsAllocation(),
|
||||||
devQueue->getDebugQueue());
|
devQueue->getDebugQueue());
|
||||||
|
|
||||||
devQueue->dispatchScheduler(
|
devQueue->dispatchScheduler(
|
||||||
commandQueue,
|
commandQueue,
|
||||||
scheduler,
|
scheduler,
|
||||||
preemptionMode);
|
preemptionMode,
|
||||||
|
ssh,
|
||||||
|
dsh);
|
||||||
|
|
||||||
scheduler.makeResident(commandStreamReceiver);
|
scheduler.makeResident(commandStreamReceiver);
|
||||||
|
|
||||||
@@ -261,14 +231,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
offset,
|
offset,
|
||||||
*dsh,
|
*dsh,
|
||||||
*ioh,
|
*ioh,
|
||||||
ssh,
|
*ssh,
|
||||||
taskLevel,
|
taskLevel,
|
||||||
dispatchFlags);
|
dispatchFlags);
|
||||||
for (auto &surface : surfaces) {
|
for (auto &surface : surfaces) {
|
||||||
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
|
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
|
||||||
}
|
}
|
||||||
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
|
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
|
||||||
|
|
||||||
if (printfHandler) {
|
if (printfHandler) {
|
||||||
printfHandler.get()->printEnqueueOutput();
|
printfHandler.get()->printEnqueueOutput();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ class MemObj;
|
|||||||
class Surface;
|
class Surface;
|
||||||
class PrintfHandler;
|
class PrintfHandler;
|
||||||
struct HwTimeStamps;
|
struct HwTimeStamps;
|
||||||
|
class MemoryManager;
|
||||||
|
|
||||||
enum MapOperationType {
|
enum MapOperationType {
|
||||||
MAP,
|
MAP,
|
||||||
@@ -77,10 +78,11 @@ class CommandMapUnmap : public Command {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct KernelOperation {
|
struct KernelOperation {
|
||||||
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh)
|
KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh,
|
||||||
|
MemoryManager &memoryManager)
|
||||||
: commandStream(std::move(commandStream)), dsh(std::move(dsh)),
|
: commandStream(std::move(commandStream)), dsh(std::move(dsh)),
|
||||||
ioh(std::move(ioh)), ssh(std::move(ssh)),
|
ioh(std::move(ioh)), ssh(std::move(ssh)),
|
||||||
surfaceStateHeapSizeEM(0), doNotFreeISH(false) {
|
surfaceStateHeapSizeEM(0), doNotFreeISH(false), memoryManager(memoryManager) {
|
||||||
}
|
}
|
||||||
|
|
||||||
~KernelOperation();
|
~KernelOperation();
|
||||||
@@ -92,6 +94,7 @@ struct KernelOperation {
|
|||||||
|
|
||||||
size_t surfaceStateHeapSizeEM;
|
size_t surfaceStateHeapSizeEM;
|
||||||
bool doNotFreeISH;
|
bool doNotFreeISH;
|
||||||
|
MemoryManager &memoryManager;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CommandComputeKernel : public Command {
|
class CommandComputeKernel : public Command {
|
||||||
|
|||||||
@@ -392,95 +392,55 @@ HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIs
|
|||||||
}
|
}
|
||||||
|
|
||||||
typedef CommandQueueHwTest BlockedCommandQueueTest;
|
typedef CommandQueueHwTest BlockedCommandQueueTest;
|
||||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUsedHeapsWhenBlockedCommandIsBeingSubmittedItReloadsThemToZeroToKeepProperOffsets) {
|
|
||||||
DebugManagerStateRestore debugStateRestore;
|
|
||||||
bool oldMemsetAllocationsFlag = MemoryManagement::memsetNewAllocations;
|
|
||||||
MemoryManagement::memsetNewAllocations = true;
|
|
||||||
|
|
||||||
DebugManager.flags.ForcePreemptionMode.set(-1); // allow default preemption mode
|
|
||||||
auto deviceWithDefaultPreemptionMode = std::unique_ptr<MockDevice>(DeviceHelper<>::create(nullptr));
|
|
||||||
this->pDevice->setPreemptionMode(deviceWithDefaultPreemptionMode->getPreemptionMode());
|
|
||||||
this->pDevice->getCommandStreamReceiver().setPreemptionCsrAllocation(deviceWithDefaultPreemptionMode->getPreemptionAllocation());
|
|
||||||
|
|
||||||
DebugManager.flags.DisableResourceRecycling.set(true);
|
|
||||||
|
|
||||||
|
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
|
||||||
UserEvent userEvent(context);
|
UserEvent userEvent(context);
|
||||||
cl_event blockedEvent = &userEvent;
|
|
||||||
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||||
mockKernelWithInternals.kernelHeader.KernelHeapSize = sizeof(mockKernelWithInternals.kernelIsa);
|
|
||||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||||
|
|
||||||
IndirectHeap::Type heaps[] = {IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE, IndirectHeap::SURFACE_STATE};
|
|
||||||
|
|
||||||
size_t prealocatedHeapSize = 2 * 64 * KB;
|
|
||||||
for (auto heapType : heaps) {
|
|
||||||
auto &heap = pCmdQ->getIndirectHeap(heapType, prealocatedHeapSize);
|
|
||||||
heap.getSpace(16);
|
|
||||||
memset(heap.getCpuBase(), 0, prealocatedHeapSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
// preallocating memsetted allocations to get predictable results
|
|
||||||
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
|
|
||||||
DebugManager.flags.DisableResourceRecycling.set(false);
|
|
||||||
|
|
||||||
std::set<void *> reusableHeaps;
|
|
||||||
for (unsigned int i = 0; i < 4; ++i) {
|
|
||||||
auto allocSize = prealocatedHeapSize;
|
|
||||||
void *mem = alignedMalloc(allocSize, 64);
|
|
||||||
reusableHeaps.insert(mem);
|
|
||||||
memset(mem, 0, allocSize);
|
|
||||||
std::unique_ptr<GraphicsAllocation> reusableAlloc{new MockGraphicsAllocation(mem, allocSize)};
|
|
||||||
pCmdQ->getDevice().getMemoryManager()->storeAllocation(std::move(reusableAlloc), REUSABLE_ALLOCATION);
|
|
||||||
}
|
|
||||||
|
|
||||||
// disable further allocation reuse
|
|
||||||
DebugManager.flags.DisableResourceRecycling.set(true);
|
|
||||||
|
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
size_t size = 1;
|
size_t size = 1;
|
||||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // blocked command
|
|
||||||
|
cl_event blockedEvent = &userEvent;
|
||||||
|
|
||||||
|
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
|
||||||
userEvent.setStatus(CL_COMPLETE);
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
|
||||||
// make sure used heaps are from preallocated pool
|
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
|
||||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0).getCpuBase()));
|
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
|
||||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0).getCpuBase()));
|
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
|
||||||
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getCpuBase()));
|
|
||||||
|
|
||||||
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
|
EXPECT_EQ(0u, ioh.getUsed());
|
||||||
std::unordered_map<int, std::vector<char>> blockedCommandHeaps;
|
EXPECT_EQ(0u, dsh.getUsed());
|
||||||
int i = 0;
|
EXPECT_EQ(0u, ssh.getUsed());
|
||||||
for (auto heapType : heaps) {
|
}
|
||||||
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
|
|
||||||
blockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
|
|
||||||
|
|
||||||
// prepare new heaps for nonblocked command
|
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
|
||||||
pCmdQ->releaseIndirectHeap(heapType);
|
UserEvent userEvent(context);
|
||||||
++i;
|
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||||
}
|
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||||
|
|
||||||
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); // nonblocked command
|
size_t offset = 0;
|
||||||
i = 0;
|
size_t size = 1;
|
||||||
std::unordered_map<int, std::vector<char>> nonblockedCommandHeaps;
|
|
||||||
for (auto heapType : heaps) {
|
|
||||||
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
|
|
||||||
nonblockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getCpuBase()), reinterpret_cast<char *>(heap.getCpuBase()) + heap.getUsed());
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// expecting blocked command to be programmed indentically to a non-blocked counterpart
|
cl_event blockedEvent = &userEvent;
|
||||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)],
|
|
||||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)]));
|
|
||||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)],
|
|
||||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)]));
|
|
||||||
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)],
|
|
||||||
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)]));
|
|
||||||
|
|
||||||
for (auto ptr : reusableHeaps) {
|
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
|
||||||
alignedFree(ptr);
|
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
|
||||||
}
|
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
|
||||||
|
|
||||||
BuiltIns::shutDown();
|
auto spaceToUse = 4u;
|
||||||
MemoryManagement::memsetNewAllocations = oldMemsetAllocationsFlag;
|
|
||||||
|
ioh.getSpace(spaceToUse);
|
||||||
|
dsh.getSpace(spaceToUse);
|
||||||
|
ssh.getSpace(spaceToUse);
|
||||||
|
|
||||||
|
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
|
||||||
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
|
||||||
|
EXPECT_EQ(spaceToUse, ioh.getUsed());
|
||||||
|
EXPECT_EQ(spaceToUse, dsh.getUsed());
|
||||||
|
EXPECT_EQ(spaceToUse, ssh.getUsed());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {
|
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {
|
||||||
|
|||||||
@@ -616,6 +616,33 @@ TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalle
|
|||||||
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, indirectHeapAllocation->getAllocationType());
|
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, indirectHeapAllocation->getAllocationType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledThenHeapIsCreated) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
|
CommandQueue cmdQ(&context, pDevice, props);
|
||||||
|
|
||||||
|
IndirectHeap *indirectHeap = nullptr;
|
||||||
|
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
|
||||||
|
EXPECT_NE(nullptr, indirectHeap);
|
||||||
|
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
|
||||||
|
|
||||||
|
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
|
||||||
|
delete indirectHeap;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledWithAlreadyAllocatedHeapThenGraphicsAllocationIsCreated) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
|
CommandQueue cmdQ(&context, pDevice, props);
|
||||||
|
|
||||||
|
IndirectHeap heap(nullptr, 100);
|
||||||
|
|
||||||
|
IndirectHeap *indirectHeap = &heap;
|
||||||
|
cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap);
|
||||||
|
EXPECT_EQ(&heap, indirectHeap);
|
||||||
|
EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation());
|
||||||
|
|
||||||
|
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation());
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
Device,
|
Device,
|
||||||
CommandQueueIndirectHeapTest,
|
CommandQueueIndirectHeapTest,
|
||||||
|
|||||||
@@ -710,9 +710,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
|
|||||||
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
|
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
|
||||||
|
|
||||||
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||||
|
|
||||||
delete blockedCommandsData;
|
delete blockedCommandsData;
|
||||||
}
|
}
|
||||||
@@ -745,9 +745,9 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
|
|||||||
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
auto expectedSizeSSH = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||||
|
|
||||||
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
|
||||||
EXPECT_EQ(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
|
||||||
|
|
||||||
delete blockedCommandsData;
|
delete blockedCommandsData;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -534,7 +534,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectState) {
|
|||||||
auto usedBeforeSSH = ssh->getUsed();
|
auto usedBeforeSSH = ssh->getUsed();
|
||||||
auto usedBeforeDSH = dsh->getUsed();
|
auto usedBeforeDSH = dsh->getUsed();
|
||||||
|
|
||||||
devQueueHw->setupIndirectState(*ssh, pKernel, 1);
|
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1);
|
||||||
auto usedAfterSSH = ssh->getUsed();
|
auto usedAfterSSH = ssh->getUsed();
|
||||||
auto usedAfterDSH = dsh->getUsed();
|
auto usedAfterDSH = dsh->getUsed();
|
||||||
|
|
||||||
@@ -564,7 +564,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectStartBlockID) {
|
|||||||
|
|
||||||
uint32_t parentCount = 4;
|
uint32_t parentCount = 4;
|
||||||
|
|
||||||
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
|
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||||
|
|
||||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||||
@@ -594,7 +594,7 @@ HWTEST_P(DeviceQueueHwWithKernel, setupIndirectStateSetsCorrectDSHValues) {
|
|||||||
|
|
||||||
uint32_t parentCount = 1;
|
uint32_t parentCount = 1;
|
||||||
|
|
||||||
devQueueHw->setupIndirectState(*ssh, pKernel, parentCount);
|
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||||
|
|
||||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||||
|
|||||||
@@ -40,7 +40,8 @@ TEST(DeviceQueueSimpleTest, setupExecutionModelDispatchDoesNothing) {
|
|||||||
|
|
||||||
size_t size = 20;
|
size_t size = 20;
|
||||||
IndirectHeap ssh(buffer, size);
|
IndirectHeap ssh(buffer, size);
|
||||||
devQueue.setupExecutionModelDispatch(ssh, nullptr, 0, 0, 0);
|
IndirectHeap dsh(buffer, size);
|
||||||
|
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0);
|
||||||
|
|
||||||
EXPECT_EQ(0u, ssh.getUsed());
|
EXPECT_EQ(0u, ssh.getUsed());
|
||||||
|
|
||||||
@@ -320,7 +321,7 @@ TEST_F(DeviceQueueTest, dispatchScheduler) {
|
|||||||
CommandQueue cmdQ(nullptr, nullptr, 0);
|
CommandQueue cmdQ(nullptr, nullptr, 0);
|
||||||
KernelInfo info;
|
KernelInfo info;
|
||||||
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
|
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
|
||||||
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode());
|
devQueue.dispatchScheduler(cmdQ, *kernel, device->getPreemptionMode(), nullptr, nullptr);
|
||||||
delete kernel;
|
delete kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -449,60 +449,18 @@ class SurfaceMock : public Surface {
|
|||||||
SurfaceMock(SurfaceMock *parent) : parent(parent){};
|
SurfaceMock(SurfaceMock *parent) : parent(parent){};
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger) {
|
|
||||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
|
||||||
IndirectHeap &cmdQueueDsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096);
|
|
||||||
IndirectHeap &cmdQueueIoh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096);
|
|
||||||
IndirectHeap &cmdQueueSsh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096);
|
|
||||||
|
|
||||||
auto requestedSize = cmdQueueDsh.getMaxAvailableSpace() * 2;
|
|
||||||
auto cmdStream = new LinearStream(alignedMalloc(requestedSize, requestedSize), requestedSize);
|
|
||||||
|
|
||||||
auto createFullHeap = [](size_t size) {
|
|
||||||
auto heap = new IndirectHeap(alignedMalloc(size, size), size);
|
|
||||||
heap->getSpace(heap->getAvailableSpace());
|
|
||||||
return heap;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto dsh = createFullHeap(requestedSize);
|
|
||||||
auto ioh = createFullHeap(requestedSize);
|
|
||||||
auto ssh = createFullHeap(maxSshSize);
|
|
||||||
|
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
|
||||||
auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
|
||||||
UniqueIH(ioh), UniqueIH(ssh));
|
|
||||||
std::vector<Surface *> v;
|
|
||||||
SurfaceMock *surface = new SurfaceMock;
|
|
||||||
v.push_back(surface);
|
|
||||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
|
||||||
auto cmdComputeKernel = new CommandComputeKernel(*pCmdQ, pDevice->getCommandStreamReceiver(),
|
|
||||||
std::unique_ptr<KernelOperation>(kernelOperation), v, false, false, false, nullptr, preemptionMode);
|
|
||||||
|
|
||||||
EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
|
|
||||||
EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
|
|
||||||
EXPECT_EQ(maxSshSize, ssh->getMaxAvailableSpace());
|
|
||||||
|
|
||||||
cmdComputeKernel->submit(0, false);
|
|
||||||
|
|
||||||
EXPECT_GE(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
|
|
||||||
EXPECT_GE(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
|
|
||||||
EXPECT_GE(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace());
|
|
||||||
|
|
||||||
delete pCmdQ;
|
|
||||||
delete cmdComputeKernel;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(InternalsEventTest, processBlockedCommandsKernelOperation) {
|
TEST_F(InternalsEventTest, processBlockedCommandsKernelOperation) {
|
||||||
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
||||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||||
|
|
||||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||||
UniqueIH(ioh), UniqueIH(ssh));
|
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
auto &csr = pDevice->getCommandStreamReceiver();
|
auto &csr = pDevice->getCommandStreamReceiver();
|
||||||
std::vector<Surface *> v;
|
std::vector<Surface *> v;
|
||||||
@@ -534,12 +492,13 @@ TEST_F(InternalsEventTest, processBlockedCommandsAbortKernelOperation) {
|
|||||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||||
|
|
||||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||||
UniqueIH(ioh), UniqueIH(ssh));
|
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
auto &csr = pDevice->getCommandStreamReceiver();
|
auto &csr = pDevice->getCommandStreamReceiver();
|
||||||
std::vector<Surface *> v;
|
std::vector<Surface *> v;
|
||||||
@@ -565,12 +524,13 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
|
|||||||
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);
|
||||||
|
|
||||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||||
UniqueIH(ioh), UniqueIH(ssh));
|
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
|
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
|
||||||
pPrintfSurface->DataParamOffset = 0;
|
pPrintfSurface->DataParamOffset = 0;
|
||||||
@@ -1477,12 +1437,13 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
|
|||||||
csr.flushStamp->setStamp(5);
|
csr.flushStamp->setStamp(5);
|
||||||
|
|
||||||
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
auto cmdStream = new LinearStream(alignedMalloc(4096, 4096), 4096);
|
||||||
auto dsh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
auto ioh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
|
||||||
auto ssh = new IndirectHeap(alignedMalloc(4096, 4096), 4096);
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
|
||||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||||
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
|
||||||
UniqueIH(ioh), UniqueIH(ssh));
|
UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getDevice().getMemoryManager());
|
||||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||||
std::vector<Surface *> v;
|
std::vector<Surface *> v;
|
||||||
auto cmd = new CommandComputeKernel(*pCmdQ, csr, std::unique_ptr<KernelOperation>(blockedCommandsData), v, false, false, false, nullptr, preemptionMode);
|
auto cmd = new CommandComputeKernel(*pCmdQ, csr, std::unique_ptr<KernelOperation>(blockedCommandsData), v, false, false, false, nullptr, preemptionMode);
|
||||||
|
|||||||
@@ -76,7 +76,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchScheduler) {
|
|||||||
*pCmdQ,
|
*pCmdQ,
|
||||||
*pDevQueueHw,
|
*pDevQueueHw,
|
||||||
pDevice->getPreemptionMode(),
|
pDevice->getPreemptionMode(),
|
||||||
scheduler);
|
scheduler,
|
||||||
|
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||||
|
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
|
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
|
||||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
|
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
|
||||||
@@ -192,7 +194,9 @@ HWTEST_F(ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQ
|
|||||||
*pCmdQ,
|
*pCmdQ,
|
||||||
*pDevQueueHw,
|
*pDevQueueHw,
|
||||||
pDevice->getPreemptionMode(),
|
pDevice->getPreemptionMode(),
|
||||||
scheduler);
|
scheduler,
|
||||||
|
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||||
|
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
||||||
|
|
||||||
@@ -223,7 +227,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToF
|
|||||||
*pCmdQ,
|
*pCmdQ,
|
||||||
mockDevQueue,
|
mockDevQueue,
|
||||||
device->getPreemptionMode(),
|
device->getPreemptionMode(),
|
||||||
scheduler);
|
scheduler,
|
||||||
|
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||||
|
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
HardwareParse hwParser;
|
HardwareParse hwParser;
|
||||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "runtime/command_queue/gpgpu_walker.h"
|
||||||
#include "runtime/event/hw_timestamps.h"
|
#include "runtime/event/hw_timestamps.h"
|
||||||
#include "runtime/helpers/kernel_commands.h"
|
#include "runtime/helpers/kernel_commands.h"
|
||||||
#include "runtime/helpers/task_information.h"
|
#include "runtime/helpers/task_information.h"
|
||||||
@@ -65,18 +66,18 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
|
|||||||
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
|
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
|
||||||
indirectStateSetup = true;
|
indirectStateSetup = true;
|
||||||
return BaseClass::setupIndirectState(surfaceStateHeap, parentKernel, parentIDCount);
|
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
||||||
}
|
}
|
||||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
|
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
|
||||||
cleanupSectionAdded = true;
|
cleanupSectionAdded = true;
|
||||||
timestampAddedInCleanupSection = hwTimeStamp;
|
timestampAddedInCleanupSection = hwTimeStamp;
|
||||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||||
}
|
}
|
||||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override {
|
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
||||||
schedulerDispatched = true;
|
schedulerDispatched = true;
|
||||||
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode);
|
return BaseClass::dispatchScheduler(cmdQ, scheduler, preemptionMode, ssh, dsh);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t criticalSectioncheckCounter = 0;
|
uint32_t criticalSectioncheckCounter = 0;
|
||||||
@@ -98,17 +99,22 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
|
|||||||
mockDevQueue.acquireEMCriticalSection();
|
mockDevQueue.acquireEMCriticalSection();
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
|
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||||
@@ -124,7 +130,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentK
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenDeviceQueueDshIsUsed) {
|
HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
|
||||||
if (device->getSupportedClVersion() >= 20) {
|
if (device->getSupportedClVersion() >= 20) {
|
||||||
cl_queue_properties properties[3] = {0};
|
cl_queue_properties properties[3] = {0};
|
||||||
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
||||||
@@ -135,14 +141,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
|
|
||||||
// add initial offset of colorCalState
|
// add initial offset of colorCalState
|
||||||
dsh->getSpace(DeviceQueue::colorCalcStateSize);
|
dsh->getSpace(DeviceQueue::colorCalcStateSize);
|
||||||
|
|
||||||
uint64_t ValueToFillDsh = 5;
|
uint64_t ValueToFillDsh = 5;
|
||||||
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
|
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
|
||||||
|
|
||||||
// Fill Interface Descriptor Data
|
// Fill Interface Descriptor Data
|
||||||
*dshVal = ValueToFillDsh;
|
*dshVal = ValueToFillDsh;
|
||||||
|
|
||||||
@@ -155,15 +166,15 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
*dshVal = ValueToFillDsh;
|
*dshVal = ValueToFillDsh;
|
||||||
|
|
||||||
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
|
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
|
||||||
uint64_t *devQueueDshValue = (uint64_t *)dshOfDevQueue->getSpace(0);
|
|
||||||
|
|
||||||
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
|
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
|
||||||
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
|
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
@@ -175,13 +186,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
|
|
||||||
cmdComputeKernel->submit(0, false);
|
cmdComputeKernel->submit(0, false);
|
||||||
|
|
||||||
|
//device queue dsh is not changed
|
||||||
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
|
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
|
||||||
|
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
|
||||||
EXPECT_EQ(mockDevQueue.getDshOffset() + sizeof(uint64_t), usedDSHAfterSubmit);
|
|
||||||
EXPECT_EQ(ValueToFillDsh, *devQueueDshValue);
|
|
||||||
|
|
||||||
uint64_t *devQueueDshParent = (uint64_t *)ptrOffset((char *)dshOfDevQueue->getCpuBase(), mockDevQueue.getDshOffset());
|
|
||||||
EXPECT_EQ(ValueToFillDsh, *devQueueDshParent);
|
|
||||||
|
|
||||||
delete cmdComputeKernel;
|
delete cmdComputeKernel;
|
||||||
delete parentKernel;
|
delete parentKernel;
|
||||||
@@ -197,15 +204,20 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
|
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
@@ -234,15 +246,18 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
|
|||||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
@@ -274,15 +289,19 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
@@ -301,7 +320,7 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmitte
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmittedThenNewSSHIsAllocated) {
|
HWTEST_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
|
||||||
if (device->getSupportedClVersion() >= 20) {
|
if (device->getSupportedClVersion() >= 20) {
|
||||||
cl_queue_properties properties[3] = {0};
|
cl_queue_properties properties[3] = {0};
|
||||||
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
MockParentKernel *parentKernel = MockParentKernel::create(*device);
|
||||||
@@ -314,20 +333,30 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
|
|||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
|
||||||
|
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||||
|
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||||
// use some SSH
|
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
|
||||||
cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE).getSpace(4);
|
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
|
||||||
|
|
||||||
|
size_t usedSize = 4u;
|
||||||
|
|
||||||
|
queueSsh.getSpace(usedSize);
|
||||||
|
queueDsh.getSpace(usedSize);
|
||||||
|
queueIoh.getSpace(usedSize);
|
||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||||
@@ -337,7 +366,10 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenUsedSSHWhenParentKernelIsSubmitte
|
|||||||
|
|
||||||
cmdComputeKernel->submit(0, false);
|
cmdComputeKernel->submit(0, false);
|
||||||
|
|
||||||
EXPECT_TRUE(cmdQ.releaseIndirectHeapCalled);
|
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
|
||||||
|
EXPECT_EQ(usedSize, queueDsh.getUsed());
|
||||||
|
EXPECT_EQ(usedSize, queueIoh.getUsed());
|
||||||
|
EXPECT_EQ(usedSize, queueSsh.getUsed());
|
||||||
|
|
||||||
delete cmdComputeKernel;
|
delete cmdComputeKernel;
|
||||||
delete parentKernel;
|
delete parentKernel;
|
||||||
@@ -355,14 +387,14 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
|||||||
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
size_t minSizeSSHForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
size_t heapSize = 20;
|
size_t heapSize = 20;
|
||||||
size_t alignement = 64;
|
|
||||||
|
|
||||||
size_t dshSize = heapSize + mockDevQueue.getDshOffset();
|
|
||||||
IndirectHeap *dsh = new IndirectHeap(alignedMalloc(dshSize, alignement), dshSize);
|
|
||||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
|
||||||
|
|
||||||
|
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||||
size_t sshSize = 1000;
|
size_t sshSize = 1000;
|
||||||
IndirectHeap *ssh = new IndirectHeap(alignedMalloc(sshSize, 4096), sshSize);
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||||
|
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
|
||||||
|
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||||
|
|
||||||
EXPECT_EQ(0u, ssh->getUsed());
|
EXPECT_EQ(0u, ssh->getUsed());
|
||||||
|
|
||||||
@@ -372,8 +404,9 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
|||||||
|
|
||||||
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream()),
|
||||||
std::unique_ptr<IndirectHeap>(dsh),
|
std::unique_ptr<IndirectHeap>(dsh),
|
||||||
std::unique_ptr<IndirectHeap>(new IndirectHeap(alignedMalloc(heapSize, alignement), heapSize)),
|
std::unique_ptr<IndirectHeap>(ioh),
|
||||||
std::unique_ptr<IndirectHeap>(ssh));
|
std::unique_ptr<IndirectHeap>(ssh),
|
||||||
|
*pCmdQ->getDevice().getMemoryManager());
|
||||||
|
|
||||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||||
@@ -391,3 +424,43 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmi
|
|||||||
delete parentKernel;
|
delete parentKernel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
|
||||||
|
if (device->getSupportedClVersion() >= 20) {
|
||||||
|
cl_queue_properties properties[3] = {0};
|
||||||
|
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*device));
|
||||||
|
|
||||||
|
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||||
|
parentKernel->createReflectionSurface();
|
||||||
|
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||||
|
|
||||||
|
KernelOperation *blockedCommandsData = nullptr;
|
||||||
|
const size_t globalOffsets[3] = {0, 0, 0};
|
||||||
|
const size_t workItems[3] = {1, 1, 1};
|
||||||
|
|
||||||
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
||||||
|
*parentKernel,
|
||||||
|
1,
|
||||||
|
globalOffsets,
|
||||||
|
workItems,
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
&blockedCommandsData,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
device->getPreemptionMode(),
|
||||||
|
true);
|
||||||
|
|
||||||
|
EXPECT_NE(nullptr, blockedCommandsData);
|
||||||
|
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
|
||||||
|
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
|
||||||
|
|
||||||
|
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
|
||||||
|
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
|
||||||
|
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
|
||||||
|
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
|
||||||
|
|
||||||
|
delete blockedCommandsData;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -55,7 +55,9 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
|
|||||||
*pCmdQ,
|
*pCmdQ,
|
||||||
*pDevQueueHw,
|
*pDevQueueHw,
|
||||||
pDevice->getPreemptionMode(),
|
pDevice->getPreemptionMode(),
|
||||||
scheduler);
|
scheduler,
|
||||||
|
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||||
|
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
HardwareParse hwParser;
|
HardwareParse hwParser;
|
||||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||||
|
|||||||
Reference in New Issue
Block a user