feature: initial support for patching region params

Related-To: NEO-8070

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-12-19 15:08:54 +00:00 committed by Compute-Runtime-Automation
parent 27b988b124
commit f17f45d63f
8 changed files with 41 additions and 17 deletions

View File

@ -16,6 +16,7 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_imp.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_launch_params.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_extended${BRANCH_DIR_SUFFIX}cmdlist_extended.inl
)

View File

@ -18,6 +18,7 @@
#include "shared/source/unified_memory/unified_memory.h"
#include "shared/source/utilities/stackvec.h"
#include "level_zero/core/source/cmdlist/cmdlist_launch_params.h"
#include <level_zero/ze_api.h>
#include <level_zero/zet_api.h>
@ -34,23 +35,6 @@ struct Event;
struct Kernel;
struct CommandQueue;
struct CmdListKernelLaunchParams {
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
uint32_t numKernelsInSplitLaunch = 0;
uint32_t numKernelsExecutedInSplitLaunch = 0;
bool isIndirect = false;
bool isPredicate = false;
bool isCooperative = false;
bool isKernelSplitOperation = false;
bool isBuiltInKernel = false;
bool isDestinationAllocationInSystemMemory = false;
bool isHostSignalScopeEvent = false;
bool skipInOrderNonWalkerSignaling = false;
bool pipeControlSignalling = false;
};
struct CmdListReturnPoint {
NEO::StreamProperties configSnapshot;
uint64_t gpuAddress = 0;

View File

@ -163,6 +163,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
kernel->patchGlobalOffset();
kernel->patchRegionParams(launchParams);
this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
if (launchParams.isIndirect) {

View File

@ -0,0 +1,31 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include <cstdint>
namespace L0 {
struct CmdListKernelLaunchParams {
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
uint32_t numKernelsInSplitLaunch = 0;
uint32_t numKernelsExecutedInSplitLaunch = 0;
bool isIndirect = false;
bool isPredicate = false;
bool isCooperative = false;
bool isKernelSplitOperation = false;
bool isBuiltInKernel = false;
bool isDestinationAllocationInSystemMemory = false;
bool isHostSignalScopeEvent = false;
bool skipInOrderNonWalkerSignaling = false;
bool pipeControlSignalling = false;
};
} // namespace L0

View File

@ -30,6 +30,7 @@ class MemoryManager;
namespace L0 {
struct Device;
struct Module;
struct CmdListKernelLaunchParams;
struct KernelImmutableData {
KernelImmutableData(L0::Device *l0device = nullptr);
@ -137,6 +138,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
virtual uint32_t *getGlobalOffsets() = 0;
virtual ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) = 0;
virtual void patchGlobalOffset() = 0;
virtual void patchRegionParams(const CmdListKernelLaunchParams &launchParams) = 0;
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
bool isEngineInstanced) = 0;

View File

@ -58,6 +58,7 @@ struct KernelImp : Kernel {
ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override;
void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override;
void patchRegionParams(const CmdListKernelLaunchParams &launchParams) override;
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) override;

View File

@ -5,6 +5,7 @@
*
*/
#include "level_zero/core/source/cmdlist/cmdlist_launch_params.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
namespace L0 {
@ -13,4 +14,6 @@ KernelExt *KernelImp::getExtension(uint32_t extensionType) { return nullptr; }
void KernelImp::getExtendedKernelProperties(ze_base_desc_t *pExtendedProperties) {}
void KernelImp::patchRegionParams(const CmdListKernelLaunchParams &launchParams) {}
} // namespace L0

View File

@ -88,6 +88,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
using KernelImp::kernelRequiresUncachedMocsCount;
using KernelImp::midThreadPreemptionDisallowedForRayTracingKernels;
using KernelImp::patchBindlessOffsetsInCrossThreadData;
using KernelImp::pImplicitArgs;
using KernelImp::printfBuffer;
using KernelImp::privateMemoryGraphicsAllocation;
using KernelImp::requiredWorkgroupOrder;