From bcecd069b466b43fbeac25385ee8d21bbf8e800c Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Mon, 12 Dec 2022 13:35:16 +0000 Subject: [PATCH] Add additional kernel flag + capability to pull extra patch tokens Signed-off-by: Dunajski, Bartosz --- .../hardware_interface_bdw_and_later.inl | 2 +- .../hardware_interface_xehp_and_later.inl | 2 +- .../dispatch_walker_tests_dg2_and_later.cpp | 6 ++++-- shared/source/command_container/command_encoder.h | 4 ++++ .../command_encoder_xehp_and_later.inl | 3 ++- shared/source/kernel/CMakeLists.txt | 1 + shared/source/kernel/kernel_descriptor.h | 2 +- .../kernel/kernel_descriptor_from_patchtokens.cpp | 2 ++ .../kernel/kernel_descriptor_from_patchtokens.h | 4 +++- .../kernel_descriptor_from_patchtokens_extra.cpp | 15 +++++++++++++++ .../dispatch_walker_tests_xe_hpc_core.cpp | 4 +++- .../xe_hpc_core/pvc/dispatch_walker_tests_pvc.cpp | 7 +++++-- 12 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 shared/source/kernel/kernel_descriptor_from_patchtokens_extra.cpp diff --git a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl index 4948c2fc30..03e261c1b8 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl @@ -98,7 +98,7 @@ inline void HardwareInterface::programWalker( numWorkGroups, walkerArgs.localWorkSizes, simd, dim, false, false, 0u); - EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false}; + EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false, kernel.getKernelInfo().kernelDescriptor}; EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, encodeWalkerArgs); *walkerCmdBuf = walkerCmd; } diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 53814c65c2..56c3e2ec70 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -118,7 +118,7 @@ inline void HardwareInterface::programWalker( kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory(); } bool requiredSystemFence = kernelSystemAllocation && walkerArgs.event != nullptr; - EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence}; + EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor}; EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, encodeWalkerArgs); auto devices = queueCsr.getOsContext().getDeviceBitfield(); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp index ead1c54619..6d220016d4 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp @@ -80,7 +80,8 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, whenProgramComputeWalkerThenApplyL3WAFo auto hwInfo = *defaultHwInfo; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); - EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true}; + KernelDescriptor kernelDescriptor; + EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor}; { hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs); @@ -106,7 +107,8 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramCompute auto walkerCmd = FamilyType::cmdInitGpgpuWalker; auto hwInfo = *defaultHwInfo; - EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true}; + KernelDescriptor kernelDescriptor; + EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor}; for (auto forceL3PrefetchForComputeWalker : {false, true}) { DebugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index d95eade370..8091e31b4d 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -33,6 +33,7 @@ class Gmm; struct HardwareInfo; struct KernelInfo; struct StateComputeModeProperties; +struct KernelDescriptor; struct EncodeDispatchKernelArgs { uint64_t eventAddress = 0ull; @@ -69,8 +70,11 @@ enum class CompareOperation : uint32_t { }; struct EncodeWalkerArgs { + EncodeWalkerArgs() = delete; + KernelExecutionType kernelExecutionType = KernelExecutionType::Default; bool requiredSystemFence = false; + const KernelDescriptor &kernelDescriptor; }; template diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 97a7f8e396..476116593a 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -296,7 +296,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeWalkerArgs walkerArgs{ args.isCooperative ? KernelExecutionType::Concurrent : KernelExecutionType::Default, - args.isHostScopeSignalEvent && args.isKernelUsingSystemAllocation}; + args.isHostScopeSignalEvent && args.isKernelUsingSystemAllocation, + kernelDescriptor}; EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs); PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *args.device); diff --git a/shared/source/kernel/CMakeLists.txt b/shared/source/kernel/CMakeLists.txt index 5c677cb96b..9fd16a8a09 100644 --- a/shared/source/kernel/CMakeLists.txt +++ b/shared/source/kernel/CMakeLists.txt @@ -17,6 +17,7 @@ set(NEO_CORE_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/local_ids_cache.cpp diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index b24c1d3f29..ca0b153eec 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -206,7 +206,7 @@ struct KernelDescriptor { bool useStackCalls : 1; bool hasRTCalls : 1; bool isInvalid : 1; - bool reserved : 1; + bool hasSample : 1; }; std::array packed; } flags = {}; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index fd80f03ee3..e09d27bb3a 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -65,6 +65,8 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro dst.kernelAttributes.flags.hasRTCalls = (0 != execEnv.HasRTCalls); dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber; + + populateKernelDescriptorExtra(dst, execEnv); } void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) { diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.h b/shared/source/kernel/kernel_descriptor_from_patchtokens.h index e14916c7a7..6fe23e42e5 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.h +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.h @@ -11,7 +11,8 @@ namespace iOpenCL { struct SPatchKernelAttributesInfo; -} +struct SPatchExecutionEnvironment; +} // namespace iOpenCL namespace NEO { struct KernelDescriptor; @@ -22,5 +23,6 @@ struct KernelFromPatchtokens; void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes); void populateKernelDescriptor(KernelDescriptor &dst, const iOpenCL::SPatchKernelAttributesInfo &token); +void populateKernelDescriptorExtra(KernelDescriptor &dst, const iOpenCL::SPatchExecutionEnvironment &execEnv); } // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens_extra.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens_extra.cpp new file mode 100644 index 0000000000..4e98b3b1e4 --- /dev/null +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens_extra.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/kernel/kernel_descriptor.h" +#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" + +namespace NEO { +void populateKernelDescriptorExtra(KernelDescriptor &dst, const iOpenCL::SPatchExecutionEnvironment &execEnv) { +} +} // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/xe_hpc_core/dispatch_walker_tests_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/dispatch_walker_tests_xe_hpc_core.cpp index 5feba0418e..91d0b81ddc 100644 --- a/shared/test/unit_test/xe_hpc_core/dispatch_walker_tests_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/dispatch_walker_tests_xe_hpc_core.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" @@ -24,7 +25,8 @@ XE_HPC_CORETEST_F(WalkerDispatchTestsXeHpcCore, givenXeHpcWhenEncodeAdditionalWa auto walkerCmd = FamilyType::cmdInitGpgpuWalker; auto hwInfo = *defaultHwInfo; - EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true}; + KernelDescriptor kernelDescriptor; + EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor}; { EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs); EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable()); diff --git a/shared/test/unit_test/xe_hpc_core/pvc/dispatch_walker_tests_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/dispatch_walker_tests_pvc.cpp index 2c97d9ae87..83666c64e8 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/dispatch_walker_tests_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/dispatch_walker_tests_pvc.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/xe_hpc_core/hw_cmds_pvc.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -37,7 +38,8 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo auto &postSyncData = walkerCmd.getPostSync(); auto hwInfo = *defaultHwInfo; - EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true}; + KernelDescriptor kernelDescriptor; + EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, true, kernelDescriptor}; for (auto &testInput : testInputs) { for (auto &deviceId : pvcXlDeviceIds) { hwInfo.platform.usDeviceID = deviceId; @@ -58,7 +60,8 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcSupportsSystemMemoryFenceWhenNoSystemF auto hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = 0x3; - EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, false}; + KernelDescriptor kernelDescriptor; + EncodeWalkerArgs walkerArgs{KernelExecutionType::Default, false, kernelDescriptor}; for (auto &deviceId : pvcXlDeviceIds) { hwInfo.platform.usDeviceID = deviceId;