feature: add field to reserve extra payload heap space

Related-To: NEO-10066

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-04-24 10:23:29 +00:00 committed by Compute-Runtime-Automation
parent 44dfa5187a
commit 149523b73c
10 changed files with 62 additions and 2 deletions

View File

@ -207,6 +207,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
launchParams.additionalSizeParam, // additionalSizeParam
0, // partitionCount
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
false, // isTimestampEvent

View File

@ -334,6 +334,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
launchParams.additionalSizeParam, // additionalSizeParam
this->partitionCount, // partitionCount
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
isTimestampEvent, // isTimestampEvent

View File

@ -54,6 +54,7 @@ struct CmdListKernelLaunchParams {
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
uint32_t numKernelsInSplitLaunch = 0;
uint32_t numKernelsExecutedInSplitLaunch = 0;
uint32_t reserveExtraPayloadSpace = 0;
bool isIndirect = false;
bool isPredicate = false;
bool isCooperative = false;

View File

@ -202,6 +202,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
0, // partitionCount
0, // reserveExtraPayloadSpace
false, // isIndirect
false, // isPredicate
false, // isTimestampEvent

View File

@ -699,6 +699,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
0, // partitionCount
0, // reserveExtraPayloadSpace
false, // isIndirect
false, // isPredicate
false, // isTimestampEvent

View File

@ -9,8 +9,10 @@
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/gmm_lib.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/os_interface/product_helper.h"
@ -2871,5 +2873,35 @@ HWTEST2_F(CommandListAppendLaunchKernel,
EXPECT_EQ(0, memcmp(walkerGfxMemory, launchParams.cmdWalkerBuffer, sizeof(DefaultWalkerType)));
}
HWTEST2_F(CommandListAppendLaunchKernel,
givenCmdListParamHasExtraSpaceReserveWhenAppendingKernelThenExtraSpaceIsConsumed,
IsAtLeastXeHpCore) {
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
kernel.perThreadDataSizeForWholeThreadGroup = 0;
kernel.crossThreadDataSize = 64;
kernel.crossThreadData = std::make_unique<uint8_t[]>(kernel.crossThreadDataSize);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &commandContainer = commandList->getCmdContainer();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
launchParams.reserveExtraPayloadSpace = 1024;
result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeapType::indirectObject);
size_t totalSize = 1024 + 64;
size_t expectedSize = alignUp(totalSize, device->getGfxCoreHelper().getIOHAlignment());
EXPECT_EQ(expectedSize, ioh->getUsed());
}
} // namespace ult
} // namespace L0

View File

@ -61,6 +61,7 @@ struct EncodeDispatchKernelArgs {
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
uint32_t partitionCount = 0u;
uint32_t reserveExtraPayloadSpace = 0;
bool isIndirect = false;
bool isPredicate = false;
bool isTimestampEvent = false;

View File

@ -241,7 +241,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment);
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching;
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace;
{
auto heap = container.getIndirectHeap(HeapType::indirectObject);
UNRECOVERABLE_IF(!heap);
@ -253,7 +253,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
ptr = container.getHeapSpaceAllowGrow(HeapType::indirectObject, iohRequiredSize);
}
UNRECOVERABLE_IF(!ptr);
offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast<uint64_t>(heap->getUsed() - sizeThreadData);
offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - args.reserveExtraPayloadSpace);
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
if (pImplicitArgs) {
offsetThreadData -= ImplicitArgs::getSize();

View File

@ -1594,3 +1594,24 @@ HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenCpuWalkerPointer
EXPECT_EQ(0, memcmp(cmdWalkerGfxMemory, cpuWalkerPointer, sizeof(DefaultWalkerType)));
}
HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenRequestingExtraPayloadSpaceThenConsumeExtraIndirectHeapSpace, IsAtLeastXeHpCore) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
uint32_t dims[] = {1, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
dispatchInterface->kernelDescriptor.kernelAttributes.flags.passInlineData = false;
dispatchInterface->getCrossThreadDataSizeResult = 64;
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.reserveExtraPayloadSpace = 1024;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
size_t expectedConsumedSize = 64 + 1024;
expectedConsumedSize = alignUp(expectedConsumedSize, pDevice->getGfxCoreHelper().getIOHAlignment());
EXPECT_EQ(expectedConsumedSize, heap->getUsed());
}

View File

@ -56,6 +56,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam
1, // partitionCount
0, // reserveExtraPayloadSpace
false, // isIndirect
false, // isPredicate
false, // isTimestampEvent