Add additional kernel flag + capability to pull extra patch tokens

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-12-12 13:35:16 +00:00
committed by Compute-Runtime-Automation
parent 470c6d4660
commit bcecd069b4
12 changed files with 42 additions and 10 deletions

View File

@@ -98,7 +98,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
false, false, 0u);
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false};
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false, kernel.getKernelInfo().kernelDescriptor};
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, encodeWalkerArgs);
*walkerCmdBuf = walkerCmd;
}

View File

@@ -118,7 +118,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory();
}
bool requiredSystemFence = kernelSystemAllocation && walkerArgs.event != nullptr;
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence};
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor};
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, encodeWalkerArgs);
auto devices = queueCsr.getOsContext().getDeviceBitfield();

View File

@@ -80,7 +80,8 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, whenProgramComputeWalkerThenApplyL3WAFo
auto hwInfo = *defaultHwInfo;
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor};
{
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs);
@@ -106,7 +107,8 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramCompute
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
auto hwInfo = *defaultHwInfo;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor};
for (auto forceL3PrefetchForComputeWalker : {false, true}) {
DebugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs);

View File

@@ -33,6 +33,7 @@ class Gmm;
struct HardwareInfo;
struct KernelInfo;
struct StateComputeModeProperties;
struct KernelDescriptor;
struct EncodeDispatchKernelArgs {
uint64_t eventAddress = 0ull;
@@ -69,8 +70,11 @@ enum class CompareOperation : uint32_t {
};
struct EncodeWalkerArgs {
EncodeWalkerArgs() = delete;
KernelExecutionType kernelExecutionType = KernelExecutionType::Default;
bool requiredSystemFence = false;
const KernelDescriptor &kernelDescriptor;
};
template <typename GfxFamily>

View File

@@ -296,7 +296,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeWalkerArgs walkerArgs{
args.isCooperative ? KernelExecutionType::Concurrent : KernelExecutionType::Default,
args.isHostScopeSignalEvent && args.isKernelUsingSystemAllocation};
args.isHostScopeSignalEvent && args.isKernelUsingSystemAllocation,
kernelDescriptor};
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs);
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);

View File

@@ -17,6 +17,7 @@ set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extra.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/local_ids_cache.cpp

View File

@@ -206,7 +206,7 @@ struct KernelDescriptor {
bool useStackCalls : 1;
bool hasRTCalls : 1;
bool isInvalid : 1;
bool reserved : 1;
bool hasSample : 1;
};
std::array<bool, 3> packed;
} flags = {};

View File

@@ -65,6 +65,8 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
dst.kernelAttributes.flags.hasRTCalls = (0 != execEnv.HasRTCalls);
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
populateKernelDescriptorExtra(dst, execEnv);
}
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {

View File

@@ -11,7 +11,8 @@
namespace iOpenCL {
struct SPatchKernelAttributesInfo;
}
struct SPatchExecutionEnvironment;
} // namespace iOpenCL
namespace NEO {
struct KernelDescriptor;
@@ -22,5 +23,6 @@ struct KernelFromPatchtokens;
void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
void populateKernelDescriptor(KernelDescriptor &dst, const iOpenCL::SPatchKernelAttributesInfo &token);
void populateKernelDescriptorExtra(KernelDescriptor &dst, const iOpenCL::SPatchExecutionEnvironment &execEnv);
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device_binary_format/patchtokens_decoder.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
namespace NEO {
void populateKernelDescriptorExtra(KernelDescriptor &dst, const iOpenCL::SPatchExecutionEnvironment &execEnv) {
}
} // namespace NEO

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
@@ -24,7 +25,8 @@ XE_HPC_CORETEST_F(WalkerDispatchTestsXeHpcCore, givenXeHpcWhenEncodeAdditionalWa
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
auto hwInfo = *defaultHwInfo;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor};
{
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/xe_hpc_core/hw_cmds_pvc.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -37,7 +38,8 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
auto &postSyncData = walkerCmd.getPostSync();
auto hwInfo = *defaultHwInfo;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor};
for (auto &testInput : testInputs) {
for (auto &deviceId : pvcXlDeviceIds) {
hwInfo.platform.usDeviceID = deviceId;
@@ -58,7 +60,8 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcSupportsSystemMemoryFenceWhenNoSystemF
auto hwInfo = *defaultHwInfo;
hwInfo.platform.usRevId = 0x3;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, false};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, false, kernelDescriptor};
for (auto &deviceId : pvcXlDeviceIds) {
hwInfo.platform.usDeviceID = deviceId;