mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Add scratch space support to global stateless heap model
Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a6dfb3a6c2
commit
37768a15d3
@@ -542,12 +542,14 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
|
||||
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
|
||||
|
||||
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
|
||||
if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
|
||||
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
|
||||
}
|
||||
for (auto element : commandContainer.sshAllocations) {
|
||||
heapContainer.push_back(element);
|
||||
if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::PrivateHeaps) {
|
||||
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
|
||||
if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
|
||||
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
|
||||
}
|
||||
for (auto &element : commandContainer.sshAllocations) {
|
||||
heapContainer.push_back(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,6 +150,11 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
|
||||
if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) {
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
|
||||
auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation();
|
||||
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
if (sshHeaps.size() > 0) {
|
||||
uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u;
|
||||
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
@@ -2139,5 +2140,120 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
EXPECT_EQ((statlessMocs << 1), sbaCmd->getStatelessDataPortAccessMemoryObjectControlState());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingRegularCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched,
|
||||
IsAtLeastXeHpCore) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
|
||||
size_t queueBefore = cmdQueueStream.getUsed();
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t queueAfter = cmdQueueStream.getUsed();
|
||||
|
||||
auto globalSurfaceHeap = commandQueue->getCsr()->getGlobalStatelessHeap();
|
||||
|
||||
auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), queueBefore),
|
||||
queueAfter - queueBefore));
|
||||
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
|
||||
|
||||
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
|
||||
|
||||
EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress());
|
||||
|
||||
auto frontEndCmds = findAll<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, frontEndCmds.size());
|
||||
|
||||
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
auto frontEndCmd = reinterpret_cast<CFE_STATE *>(*frontEndCmds[0]);
|
||||
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
|
||||
|
||||
auto scratchSpaceController = commandQueue->csr->getScratchSpaceController();
|
||||
EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress());
|
||||
|
||||
auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation();
|
||||
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
|
||||
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
|
||||
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
|
||||
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingImmediateCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched,
|
||||
IsAtLeastXeHpCore) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
|
||||
|
||||
auto &csrImmediate = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto &csrStream = csrImmediate.commandStream;
|
||||
auto globalSurfaceHeap = csrImmediate.getGlobalStatelessHeap();
|
||||
|
||||
size_t csrUsedBefore = csrStream.getUsed();
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t csrUsedAfter = csrStream.getUsed();
|
||||
|
||||
auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
|
||||
csrUsedAfter - csrUsedBefore));
|
||||
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
|
||||
|
||||
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
|
||||
|
||||
EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress());
|
||||
|
||||
auto frontEndCmds = findAll<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, frontEndCmds.size());
|
||||
|
||||
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
auto frontEndCmd = reinterpret_cast<CFE_STATE *>(*frontEndCmds[0]);
|
||||
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
|
||||
|
||||
auto scratchSpaceController = commandQueue->csr->getScratchSpaceController();
|
||||
EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress());
|
||||
|
||||
auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation();
|
||||
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
|
||||
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
|
||||
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
|
||||
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user