Add scratch space support to global stateless heap model

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-03-08 14:08:34 +00:00
committed by Compute-Runtime-Automation
parent a6dfb3a6c2
commit 37768a15d3
3 changed files with 129 additions and 6 deletions

View File

@@ -542,12 +542,14 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
for (auto element : commandContainer.sshAllocations) {
heapContainer.push_back(element);
if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::PrivateHeaps) {
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
for (auto &element : commandContainer.sshAllocations) {
heapContainer.push_back(element);
}
}
}
}

View File

@@ -150,6 +150,11 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) {
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation();
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
}
if (sshHeaps.size() > 0) {
uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u;
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
@@ -2139,5 +2140,120 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
EXPECT_EQ((statlessMocs << 1), sbaCmd->getStatelessDataPortAccessMemoryObjectControlState());
}
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingRegularCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched,
IsAtLeastXeHpCore) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using CFE_STATE = typename FamilyType::CFE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &cmdQueueStream = commandQueue->commandStream;
size_t queueBefore = cmdQueueStream.getUsed();
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t queueAfter = cmdQueueStream.getUsed();
auto globalSurfaceHeap = commandQueue->getCsr()->getGlobalStatelessHeap();
auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), queueBefore),
queueAfter - queueBefore));
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress());
auto frontEndCmds = findAll<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, frontEndCmds.size());
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
auto frontEndCmd = reinterpret_cast<CFE_STATE *>(*frontEndCmds[0]);
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
auto scratchSpaceController = commandQueue->csr->getScratchSpaceController();
EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress());
auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation();
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
}
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingImmediateCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched,
IsAtLeastXeHpCore) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using CFE_STATE = typename FamilyType::CFE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
auto &csrImmediate = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = csrImmediate.commandStream;
auto globalSurfaceHeap = csrImmediate.getGlobalStatelessHeap();
size_t csrUsedBefore = csrStream.getUsed();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t csrUsedAfter = csrStream.getUsed();
auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
csrUsedAfter - csrUsedBefore));
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress());
auto frontEndCmds = findAll<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, frontEndCmds.size());
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
auto frontEndCmd = reinterpret_cast<CFE_STATE *>(*frontEndCmds[0]);
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
auto scratchSpaceController = commandQueue->csr->getScratchSpaceController();
EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress());
auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation();
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
}
} // namespace ult
} // namespace L0