Execution Model heaps programming cleanup

Change-Id: I501357f5ad3f7308397364073691b4efd3e7260d
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2019-12-17 10:43:19 +01:00 committed by sys_ocldev
parent 72d59aaff6
commit 559c67aa7c
12 changed files with 39 additions and 91 deletions

View File

@ -346,7 +346,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (blockQueue) {
if (parentKernel) {
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
}
@ -531,7 +531,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
TagNode<HwTimeStamps> *hwTimeStamps,
bool &blocking) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
bool isCcsUsed = isCcs(gpgpuEngine->osContext->getEngineType());
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;

View File

@ -192,7 +192,7 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
if (heapType == IndirectHeap::SURFACE_STATE) {
expectedSize += HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(heapType, *parentKernel);
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
{
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());

View File

@ -200,7 +200,7 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
if (parentKernel) {
dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize();
sshSize += HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
iohEqualsDsh = true;
colorCalcSize = static_cast<size_t>(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize);
} else {

View File

@ -150,18 +150,12 @@ void DeviceQueueHw<GfxFamily>::resetDSH() {
template <typename GfxFamily>
IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type) {
UNRECOVERABLE_IF(type != IndirectHeap::DYNAMIC_STATE);
if (!heaps[type]) {
switch (type) {
case IndirectHeap::DYNAMIC_STATE: {
heaps[type] = new IndirectHeap(dshBuffer);
// get space for colorCalc and 2 ID tables at the beginning
heaps[type]->getSpace(colorCalcStateSize);
break;
}
default:
break;
}
heaps[type] = new IndirectHeap(dshBuffer);
// get space for colorCalc and 2 ID tables at the beginning
heaps[type]->getSpace(colorCalcStateSize);
}
return heaps[type];
}

View File

@ -164,7 +164,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
static size_t getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo);
static size_t getSizeRequiredForExecutionModel(IndirectHeap::Type heapType, const Kernel &kernel);
static size_t getSshSizeForExecutionModel(const Kernel &kernel);
static void setInterfaceDescriptorOffset(
WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &interfaceDescriptorIndex);

View File

@ -133,7 +133,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(
}
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::Type heapType, const Kernel &kernel) {
size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kernel &kernel) {
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
size_t totalSize = 0;
@ -141,31 +141,24 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(Indir
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
uint32_t maxBindingTableCount = 0;
if (heapType == IndirectHeap::SURFACE_STATE) {
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1;
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1;
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
}
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
}
if (heapType == IndirectHeap::INDIRECT_OBJECT || heapType == IndirectHeap::SURFACE_STATE) {
BuiltIns &builtIns = *kernel.getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext());
BuiltIns &builtIns = *kernel.getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext());
if (heapType == IndirectHeap::INDIRECT_OBJECT) {
totalSize += getSizeRequiredIOH(scheduler);
} else {
totalSize += getSizeRequiredSSH(scheduler);
totalSize += getSizeRequiredSSH(scheduler);
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
}
return totalSize;
}

View File

@ -464,18 +464,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, getIndirectHeapDSH) {
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, getIndirectHeapNonExistent) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(deviceQueue);
auto heap = devQueueHw->getIndirectHeap(IndirectHeap::GENERAL_STATE);
EXPECT_EQ(nullptr, heap);
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, getDshOffset) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
@ -535,7 +523,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectState) {
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto usedBeforeSSH = ssh->getUsed();
@ -565,7 +553,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
@ -595,7 +583,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
@ -635,8 +623,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
}
auto surfaceStateHeapSize =
HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE,
const_cast<const Kernel &>(*pKernel));
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);

View File

@ -140,7 +140,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
EXPECT_LE(pKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *pKernel);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
}
@ -172,7 +172,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
ASSERT_NE(nullptr, blockedCommandsData);
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *pKernel);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t sshUsed = blockedCommandsData->ssh->getUsed();

View File

@ -53,7 +53,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
EXPECT_NE(nullptr, executionModelDsh);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
@ -175,7 +175,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = pDevice->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);

View File

@ -94,7 +94,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
@ -159,7 +159,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -200,7 +200,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -238,7 +238,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -279,7 +279,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -305,7 +305,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
MockCommandQueue cmdQ(context, device, properties);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t heapSize = 20;
@ -357,7 +357,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t heapSize = 20;

View File

@ -30,7 +30,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = pDevice->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);

View File

@ -1187,33 +1187,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, getSizeRequiredF
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *pKernel));
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, getSizeRequiredForExecutionModelForIOHReturnsSchedulerSize) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
BuiltIns &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
auto &scheduler = builtIns.getSchedulerKernel(*pContext);
size_t totalSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(scheduler);
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::INDIRECT_OBJECT, *pKernel));
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, getSizeRequiredForExecutionModelForGSH) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
size_t totalSize = 0;
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::GENERAL_STATE, *pKernel));
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
}
}