From ff79c84115d2b86cd7a75fc4043d82a63212f47a Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Thu, 13 Jan 2022 23:57:00 +0000 Subject: [PATCH] Correct INTERFACE_DESCRIPTOR_DATA definitions for XeHp and later Related-To: NEO-6466 Signed-off-by: Mateusz Jablonski --- .../source/helpers/hardware_commands_helper.h | 2 +- .../helpers/hardware_commands_helper_base.inl | 9 +- .../dispatch_walker_tests_xehp_and_later.cpp | 6 +- .../command_encoder_xehp_and_later.inl | 5 +- shared/source/gen11/hw_cmds_base.h | 3 +- shared/source/gen12lp/hw_cmds_base.h | 3 +- shared/source/gen8/hw_cmds_base.h | 3 +- shared/source/gen9/hw_cmds_base.h | 3 +- .../hw_cmds_generated_xe_hp_core.inl | 146 ++++++----- .../hw_cmds_generated_xe_hpc_core.inl | 245 ++++++++---------- .../hw_cmds_generated_xe_hpg_core.inl | 201 +++++++------- .../xe_hp_core/command_encoder_xe_hp_core.cpp | 7 +- shared/source/xe_hp_core/hw_cmds_base.h | 3 +- .../command_encoder_xe_hpc_core.cpp | 49 ++-- shared/source/xe_hpc_core/hw_cmds_base.h | 3 +- .../command_encoder_xe_hpg_core.cpp | 38 ++- shared/source/xe_hpg_core/hw_cmds_base.h | 3 +- .../pvc/test_encode_dispatch_kernel_pvc.cpp | 9 +- .../dg2/test_encode_dispatch_kernel_dg2.cpp | 17 +- ...t_encode_dispatch_kernel_dg2_and_later.cpp | 45 ++-- ...est_encode_dispatch_kernel_dg2_and_later.h | 13 +- ...t_encode_dispatch_kernel_pvc_and_later.cpp | 41 ++- ..._encode_dispatch_kernel_xehp_and_later.cpp | 29 ++- .../test/unit_test/xe_hpg_core/CMakeLists.txt | 3 +- ...est_encode_dispatch_kernel_xe_hpg_core.cpp | 28 ++ 25 files changed, 481 insertions(+), 433 deletions(-) create mode 100644 shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index 03ac2a29a1..21e74191dc 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -51,7 +51,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, - size_t offsetSamplerState, + [[maybe_unused]] size_t offsetSamplerState, uint32_t numSamplers, uint32_t numThreadsPerThreadGroup, const Kernel &kernel, diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index d6c80469a8..99a1f6bc85 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -140,7 +140,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, - size_t offsetSamplerState, + [[maybe_unused]] size_t offsetSamplerState, uint32_t numSamplers, uint32_t threadsPerThreadGroup, const Kernel &kernel, @@ -158,8 +158,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData; // Program the kernel start pointer - interfaceDescriptor.setKernelStartPointerHigh(kernelStartOffset >> 32); - interfaceDescriptor.setKernelStartPointer((uint32_t)kernelStartOffset); + interfaceDescriptor.setKernelStartPointer(static_cast(kernelStartOffset & std::numeric_limits::max())); // # of threads in thread group should be based on LWS. interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); @@ -173,7 +172,9 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( interfaceDescriptor.setBindingTablePointer(static_cast(bindingTablePointer)); - interfaceDescriptor.setSamplerStatePointer(static_cast(offsetSamplerState)); + if constexpr (GfxFamily::supportsSampler) { + interfaceDescriptor.setSamplerStatePointer(static_cast(offsetSamplerState)); + } EncodeDispatchKernel::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 26d5254d92..30ae3bd710 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -636,7 +636,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); - EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, false, *cmdQ.get(), multiDispatchInfo, false, false); @@ -710,7 +709,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); - EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, false, *cmdQ.get(), multiDispatchInfo, false, false); @@ -1009,7 +1007,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); - EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } @@ -1062,7 +1059,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); - EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index a6f5ca22f6..13cd2ce52c 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -87,7 +87,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; } idd.setKernelStartPointer(offset); - idd.setKernelStartPointerHigh(0u); } auto threadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup(); @@ -141,7 +140,9 @@ void EncodeDispatchKernel::encode(CommandContainer &container, } } - idd.setSamplerStatePointer(samplerStateOffset); + if constexpr (Family::supportsSampler) { + idd.setSamplerStatePointer(samplerStateOffset); + } EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); diff --git a/shared/source/gen11/hw_cmds_base.h b/shared/source/gen11/hw_cmds_base.h index 26a01401a1..0d9a53f107 100644 --- a/shared/source/gen11/hw_cmds_base.h +++ b/shared/source/gen11/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,6 +20,7 @@ namespace NEO { struct GEN11 { #include "shared/source/generated/gen11/hw_cmds_generated_gen11.inl" + static constexpr bool supportsSampler = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { struct { diff --git a/shared/source/gen12lp/hw_cmds_base.h b/shared/source/gen12lp/hw_cmds_base.h index 032a3244a1..8666383ff6 100644 --- a/shared/source/gen12lp/hw_cmds_base.h +++ b/shared/source/gen12lp/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ namespace NEO { struct GEN12LP { #include "shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl" + static constexpr bool supportsSampler = true; static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3); struct DataPortBindlessSurfaceExtendedMessageDescriptor { diff --git a/shared/source/gen8/hw_cmds_base.h b/shared/source/gen8/hw_cmds_base.h index ab6481ce42..201202998f 100644 --- a/shared/source/gen8/hw_cmds_base.h +++ b/shared/source/gen8/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ namespace NEO { struct GEN8 { #include "shared/source/generated/gen8/hw_cmds_generated_gen8.inl" + static constexpr bool supportsSampler = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { struct { diff --git a/shared/source/gen9/hw_cmds_base.h b/shared/source/gen9/hw_cmds_base.h index c80cb8566c..340d877097 100644 --- a/shared/source/gen9/hw_cmds_base.h +++ b/shared/source/gen9/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ namespace NEO { struct GEN9 { #include "shared/source/generated/gen9/hw_cmds_generated_gen9.inl" + static constexpr bool supportsSampler = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { struct { diff --git a/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl b/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl index 93836d75d3..006752b0d7 100644 --- a/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl +++ b/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -5085,46 +5085,47 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA)); typedef struct tagINTERFACE_DESCRIPTOR_DATA { union tagTheStructure { struct tagCommon { - uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5); - uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31); - - uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15); - uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31); - - uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6); + // DWORD 0 + uint64_t Reserved_0 : BITFIELD_RANGE(0, 5); + uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31); + // DWORD 1 + uint64_t Reserved_32 : BITFIELD_RANGE(32, 63); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 6); uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7); - uint32_t Reserved_2_8_10 : BITFIELD_RANGE(8, 10); + uint32_t Reserved_72 : BITFIELD_RANGE(8, 10); uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11); - uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12); + uint32_t Reserved_76 : BITFIELD_RANGE(12, 12); uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13); - uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16); - uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_81 : BITFIELD_RANGE(17, 17); uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18); uint32_t DenormMode : BITFIELD_RANGE(19, 19); uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20); - uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31); - - uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1); + uint32_t Reserved_85 : BITFIELD_RANGE(21, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 1); uint32_t SamplerCount : BITFIELD_RANGE(2, 4); uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31); - + // DWORD 4 uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4); uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20); - uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31); - + uint32_t Reserved_149 : BITFIELD_RANGE(21, 31); + // DWORD 5 uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9); - uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15); + uint32_t Reserved_170 : BITFIELD_RANGE(10, 15); uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20); uint32_t BarrierEnable : BITFIELD_RANGE(21, 21); uint32_t RoundingMode : BITFIELD_RANGE(22, 23); - uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 25); uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27); - uint32_t Reserved_5_28_31 : BITFIELD_RANGE(28, 31); - - uint32_t Reserved_6_0_31 : BITFIELD_RANGE(0, 31); - - uint32_t Reserved_7; + uint32_t Reserved_188 : BITFIELD_RANGE(28, 30); + uint32_t BtdMode : BITFIELD_RANGE(31, 31); + // DWORD 6 + uint32_t Reserved_192; + // DWORD 7 + uint32_t Reserved_224; } Common; uint32_t RawData[8]; } TheStructure; @@ -5151,6 +5152,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3, SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4, } SAMPLER_COUNT; + typedef enum tagBINDING_TABLE_ENTRY_COUNT { + BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f, + } BINDING_TABLE_ENTRY_COUNT; typedef enum tagSHARED_LOCAL_MEMORY_SIZE { SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0, SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1, @@ -5167,18 +5173,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { ROUNDING_MODE_RD = 0x2, ROUNDING_MODE_RTZ = 0x3, } ROUNDING_MODE; - + typedef enum tagTHREAD_GROUP_DISPATCH_SIZE { + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3, + } THREAD_GROUP_DISPATCH_SIZE; + typedef enum tagBTD_MODE { + BTD_MODE_DISABLE = 0x0, + BTD_MODE_ENABLE = 0x1, + } BTD_MODE; inline void init() { memset(&TheStructure, 0, sizeof(TheStructure)); TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754; TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE; TheStructure.Common.DenormMode = DENORM_MODE_FTZ; - TheStructure.Common.ThreadPreemptionDisable = - THREAD_PREEMPTION_DISABLE_DISABLE; + TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE; TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED; - TheStructure.Common.SharedLocalMemorySize = - SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; + TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED; + TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE; + TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8; + TheStructure.Common.BtdMode = BTD_MODE_DISABLE; } static tagINTERFACE_DESCRIPTOR_DATA sInit() { INTERFACE_DESCRIPTOR_DATA state; @@ -5186,7 +5202,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { return state; } inline uint32_t &getRawData(const uint32_t index) { - DEBUG_BREAK_IF(index >= 8); + UNRECOVERABLE_IF(index >= 8); return TheStructure.RawData[index]; } typedef enum tagKERNELSTARTPOINTER { @@ -5194,35 +5210,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { KERNELSTARTPOINTER_ALIGN_SIZE = 0x40, } KERNELSTARTPOINTER; inline void setKernelStartPointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT; + TheStructure.Common.KernelStartPointer = static_cast(value) >> KERNELSTARTPOINTER_BIT_SHIFT; } - inline uint32_t getKernelStartPointer() const { - return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT); + inline uint64_t getKernelStartPointer() const { + return static_cast(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched } - inline void setKernelStartPointerHigh(const uint32_t value) { - TheStructure.Common.KernelStartPointerHigh = value; - } - inline uint32_t getKernelStartPointerHigh() const { - return (TheStructure.Common.KernelStartPointerHigh); - } - inline void setSoftwareExceptionEnable(const uint32_t value) { + inline void setSoftwareExceptionEnable(const bool value) { TheStructure.Common.SoftwareExceptionEnable = value; } - inline uint32_t getSoftwareExceptionEnable() const { - return (TheStructure.Common.SoftwareExceptionEnable); + inline bool getSoftwareExceptionEnable() const { + return TheStructure.Common.SoftwareExceptionEnable; } - inline void setMaskStackExceptionEnable(const uint32_t value) { + inline void setMaskStackExceptionEnable(const bool value) { TheStructure.Common.MaskStackExceptionEnable = value; } - inline uint32_t getMaskStackExceptionEnable() const { - return (TheStructure.Common.MaskStackExceptionEnable); + inline bool getMaskStackExceptionEnable() const { + return TheStructure.Common.MaskStackExceptionEnable; } - inline void setIllegalOpcodeExceptionEnable(const uint32_t value) { + inline void setIllegalOpcodeExceptionEnable(const bool value) { TheStructure.Common.IllegalOpcodeExceptionEnable = value; } - inline uint32_t getIllegalOpcodeExceptionEnable() const { - return (TheStructure.Common.IllegalOpcodeExceptionEnable); + inline bool getIllegalOpcodeExceptionEnable() const { + return TheStructure.Common.IllegalOpcodeExceptionEnable; } inline void setFloatingPointMode(const FLOATING_POINT_MODE value) { TheStructure.Common.FloatingPointMode = value; @@ -5258,35 +5267,34 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5, SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, } SAMPLERSTATEPOINTER; - inline void setSamplerStatePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT; + inline void setSamplerStatePointer(const uint32_t value) { + TheStructure.Common.SamplerStatePointer = static_cast(value) >> SAMPLERSTATEPOINTER_BIT_SHIFT; } inline uint32_t getSamplerStatePointer() const { - return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT); + return TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT; } inline void setBindingTableEntryCount(const uint32_t value) { TheStructure.Common.BindingTableEntryCount = value; } inline uint32_t getBindingTableEntryCount() const { - return (TheStructure.Common.BindingTableEntryCount); + return TheStructure.Common.BindingTableEntryCount; } typedef enum tagBINDINGTABLEPOINTER { BINDINGTABLEPOINTER_BIT_SHIFT = 0x5, BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20, } BINDINGTABLEPOINTER; - inline void setBindingTablePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT; + inline void setBindingTablePointer(const uint32_t value) { + TheStructure.Common.BindingTablePointer = static_cast(value) >> BINDINGTABLEPOINTER_BIT_SHIFT; } inline uint32_t getBindingTablePointer() const { - return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT); + return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT; } inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x3ff); TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value; } inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const { - return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup); + return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup; } inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) { TheStructure.Common.SharedLocalMemorySize = value; @@ -5294,11 +5302,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const { return static_cast(TheStructure.Common.SharedLocalMemorySize); } - inline void setBarrierEnable(const uint32_t value) { - TheStructure.Common.BarrierEnable = (value > 0u) ? 1u : 0u; + inline void setBarrierEnable(const bool value) { + TheStructure.Common.BarrierEnable = value; } inline bool getBarrierEnable() const { - return (TheStructure.Common.BarrierEnable); + return TheStructure.Common.BarrierEnable; } inline void setRoundingMode(const ROUNDING_MODE value) { TheStructure.Common.RoundingMode = value; @@ -5306,11 +5314,17 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { inline ROUNDING_MODE getRoundingMode() const { return static_cast(TheStructure.Common.RoundingMode); } - inline void setThreadGroupDispatchSize(const uint32_t value) { + inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) { TheStructure.Common.ThreadGroupDispatchSize = value; } - inline uint32_t getThreadGroupDispatchSize() const { - return (TheStructure.Common.ThreadGroupDispatchSize); + inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const { + return static_cast(TheStructure.Common.ThreadGroupDispatchSize); + } + inline void setBtdMode(const BTD_MODE value) { + TheStructure.Common.BtdMode = value; + } + inline BTD_MODE getBtdMode() const { + return static_cast(TheStructure.Common.BtdMode); } } INTERFACE_DESCRIPTOR_DATA; STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA)); diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index c0b6505241..6012af7175 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -5152,49 +5152,46 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA)); typedef struct tagINTERFACE_DESCRIPTOR_DATA { union tagTheStructure { struct tagCommon { - uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5); - uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31); - - uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15); - uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31); - - uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6); + // DWORD 0 + uint64_t Reserved_0 : BITFIELD_RANGE(0, 5); + uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31); + // DWORD 1 + uint64_t Reserved_32 : BITFIELD_RANGE(32, 63); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 6); uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7); - uint32_t RegistersPerThread : BITFIELD_RANGE(8, 10); + uint32_t Reserved_72 : BITFIELD_RANGE(8, 10); uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11); - uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12); + uint32_t Reserved_76 : BITFIELD_RANGE(12, 12); uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13); - uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16); - uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_81 : BITFIELD_RANGE(17, 17); uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18); uint32_t DenormMode : BITFIELD_RANGE(19, 19); uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20); - uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31); - - uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1); - uint32_t SamplerCount : BITFIELD_RANGE(2, 4); - uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31); - + uint32_t Reserved_85 : BITFIELD_RANGE(21, 31); + // DWORD 3 + uint32_t Reserved_96; + // DWORD 4 uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4); uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20); - uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31); - + uint32_t Reserved_149 : BITFIELD_RANGE(21, 31); + // DWORD 5 uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9); - uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15); + uint32_t Reserved_170 : BITFIELD_RANGE(10, 15); uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20); - uint32_t Reserved_5_21_21 : BITFIELD_RANGE(21, 21); + uint32_t Reserved_181 : BITFIELD_RANGE(21, 21); uint32_t RoundingMode : BITFIELD_RANGE(22, 23); - uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 25); uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27); uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30); - uint32_t Reserved_5_31_31 : BITFIELD_RANGE(31, 31); - - uint32_t PreferredSlmAllocationSizePerDss : BITFIELD_RANGE(0, 2); - uint32_t PreferredSlmSizeOverride : BITFIELD_RANGE(3, 3); - uint32_t Reserved_6_4_31 : BITFIELD_RANGE(4, 31); - - uint32_t Reserved_7; + uint32_t BtdMode : BITFIELD_RANGE(31, 31); + // DWORD 6 + uint32_t PreferredSlmAllocationSize : BITFIELD_RANGE(0, 3); + uint32_t Reserved_196 : BITFIELD_RANGE(4, 31); + // DWORD 7 + uint32_t Reserved_224; } Common; uint32_t RawData[8]; } TheStructure; @@ -5214,13 +5211,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { THREAD_PREEMPTION_DISABLE_DISABLE = 0x0, THREAD_PREEMPTION_DISABLE_ENABLE = 0x1, } THREAD_PREEMPTION_DISABLE; - typedef enum tagSAMPLER_COUNT { - SAMPLER_COUNT_NO_SAMPLERS_USED = 0x0, - SAMPLER_COUNT_BETWEEN_1_AND_4_SAMPLERS_USED = 0x1, - SAMPLER_COUNT_BETWEEN_5_AND_8_SAMPLERS_USED = 0x2, - SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3, - SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4, - } SAMPLER_COUNT; + typedef enum tagBINDING_TABLE_ENTRY_COUNT { + BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f, + } BINDING_TABLE_ENTRY_COUNT; typedef enum tagSHARED_LOCAL_MEMORY_SIZE { SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0, SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1, @@ -5228,12 +5223,12 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K = 0x3, SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K = 0x4, SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K = 0x5, - SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K = 0x8, SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K = 0x6, - SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K = 0x9, SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K = 0x7, - SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K = 0xA, - SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K = 0xB, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K = 0x8, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K = 0x9, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K = 0xa, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K = 0xb, } SHARED_LOCAL_MEMORY_SIZE; typedef enum tagROUNDING_MODE { ROUNDING_MODE_RTNE = 0x0, @@ -5241,42 +5236,51 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { ROUNDING_MODE_RD = 0x2, ROUNDING_MODE_RTZ = 0x3, } ROUNDING_MODE; - typedef enum tagREGISTERS_PER_THREAD { - REGISTERS_PER_THREAD_DEFAULT = 0x0, - REGISTERS_PER_THREAD_64_REGISTERS = 0x1, - REGISTERS_PER_THREAD_96_REGISTERS = 0x2, - REGISTERS_PER_THREAD_128_REGISTERS = 0x3, - REGISTERS_PER_THREAD_160_REGISTERS = 0x4, - REGISTERS_PER_THREAD_192_REGISTERS = 0x5, - REGISTERS_PER_THREAD_256_REGISTERS = 0x6, - } REGISTERS_PER_THREAD; - typedef enum tagPREFERRED_SLM_SIZE_OVERRIDE { - PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED = 0x0, - PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED = 0x1, - } PREFERRED_SLM_SIZE_OVERRIDE; - typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE_PER_DSS { - PREFERRED_SLM_SIZE_IS_0K = 0x0, - PREFERRED_SLM_SIZE_IS_16K = 0x1, - PREFERRED_SLM_SIZE_IS_32K = 0x2, - PREFERRED_SLM_SIZE_IS_64K = 0x3, - PREFERRED_SLM_SIZE_IS_96K = 0x4, - PREFERRED_SLM_SIZE_IS_128K = 0x5, - } PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; - + typedef enum tagTHREAD_GROUP_DISPATCH_SIZE { + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3, + } THREAD_GROUP_DISPATCH_SIZE; + typedef enum tagNUMBER_OF_BARRIERS { + NUMBER_OF_BARRIERS_NONE = 0x0, + NUMBER_OF_BARRIERS_B1 = 0x1, + NUMBER_OF_BARRIERS_B2 = 0x2, + NUMBER_OF_BARRIERS_B4 = 0x3, + NUMBER_OF_BARRIERS_B8 = 0x4, + NUMBER_OF_BARRIERS_B16 = 0x5, + NUMBER_OF_BARRIERS_B24 = 0x6, + NUMBER_OF_BARRIERS_B32 = 0x7, + } NUMBER_OF_BARRIERS; + typedef enum tagBTD_MODE { + BTD_MODE_DISABLE = 0x0, + BTD_MODE_ENABLE = 0x1, + } BTD_MODE; + typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE { + PREFERRED_SLM_ALLOCATION_SIZE_MAX = 0x0, + PREFERRED_SLM_ALLOCATION_SIZE_0K = 0x8, + PREFERRED_SLM_ALLOCATION_SIZE_16K = 0x9, + PREFERRED_SLM_ALLOCATION_SIZE_32K = 0xa, + PREFERRED_SLM_ALLOCATION_SIZE_64K = 0xb, + PREFERRED_SLM_ALLOCATION_SIZE_96K = 0xc, + PREFERRED_SLM_ALLOCATION_SIZE_128K = 0xd, + } PREFERRED_SLM_ALLOCATION_SIZE; + typedef enum tagSAMPLERSTATEPOINTER { + SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, // patched + } SAMPLERSTATEPOINTER; inline void init() { memset(&TheStructure, 0, sizeof(TheStructure)); TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754; TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE; TheStructure.Common.DenormMode = DENORM_MODE_FTZ; - TheStructure.Common.ThreadPreemptionDisable = - THREAD_PREEMPTION_DISABLE_DISABLE; - TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED; - TheStructure.Common.SharedLocalMemorySize = - SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; + TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE; + TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED; + TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE; - TheStructure.Common.RegistersPerThread = REGISTERS_PER_THREAD_DEFAULT; - TheStructure.Common.PreferredSlmSizeOverride = PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED; - TheStructure.Common.PreferredSlmAllocationSizePerDss = PREFERRED_SLM_SIZE_IS_0K; + TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8; + TheStructure.Common.NumberOfBarriers = NUMBER_OF_BARRIERS_NONE; + TheStructure.Common.BtdMode = BTD_MODE_DISABLE; + TheStructure.Common.PreferredSlmAllocationSize = PREFERRED_SLM_ALLOCATION_SIZE_MAX; } static tagINTERFACE_DESCRIPTOR_DATA sInit() { INTERFACE_DESCRIPTOR_DATA state; @@ -5284,7 +5288,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { return state; } inline uint32_t &getRawData(const uint32_t index) { - DEBUG_BREAK_IF(index >= 8); + UNRECOVERABLE_IF(index >= 8); return TheStructure.RawData[index]; } typedef enum tagKERNELSTARTPOINTER { @@ -5292,35 +5296,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { KERNELSTARTPOINTER_ALIGN_SIZE = 0x40, } KERNELSTARTPOINTER; inline void setKernelStartPointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT; + TheStructure.Common.KernelStartPointer = static_cast(value) >> KERNELSTARTPOINTER_BIT_SHIFT; } - inline uint32_t getKernelStartPointer() const { - return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT); + inline uint64_t getKernelStartPointer() const { + return static_cast(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched } - inline void setKernelStartPointerHigh(const uint32_t value) { - TheStructure.Common.KernelStartPointerHigh = value; - } - inline uint32_t getKernelStartPointerHigh() const { - return (TheStructure.Common.KernelStartPointerHigh); - } - inline void setSoftwareExceptionEnable(const uint32_t value) { + inline void setSoftwareExceptionEnable(const bool value) { TheStructure.Common.SoftwareExceptionEnable = value; } - inline uint32_t getSoftwareExceptionEnable() const { - return (TheStructure.Common.SoftwareExceptionEnable); + inline bool getSoftwareExceptionEnable() const { + return TheStructure.Common.SoftwareExceptionEnable; } - inline void setMaskStackExceptionEnable(const uint32_t value) { + inline void setMaskStackExceptionEnable(const bool value) { TheStructure.Common.MaskStackExceptionEnable = value; } - inline uint32_t getMaskStackExceptionEnable() const { - return (TheStructure.Common.MaskStackExceptionEnable); + inline bool getMaskStackExceptionEnable() const { + return TheStructure.Common.MaskStackExceptionEnable; } - inline void setIllegalOpcodeExceptionEnable(const uint32_t value) { + inline void setIllegalOpcodeExceptionEnable(const bool value) { TheStructure.Common.IllegalOpcodeExceptionEnable = value; } - inline uint32_t getIllegalOpcodeExceptionEnable() const { - return (TheStructure.Common.IllegalOpcodeExceptionEnable); + inline bool getIllegalOpcodeExceptionEnable() const { + return TheStructure.Common.IllegalOpcodeExceptionEnable; } inline void setFloatingPointMode(const FLOATING_POINT_MODE value) { TheStructure.Common.FloatingPointMode = value; @@ -5346,46 +5343,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { inline THREAD_PREEMPTION_DISABLE getThreadPreemptionDisable() const { return static_cast(TheStructure.Common.ThreadPreemptionDisable); } - inline void setSamplerCount(const SAMPLER_COUNT value) { - TheStructure.Common.SamplerCount = value; - } - inline SAMPLER_COUNT getSamplerCount() const { - return static_cast(TheStructure.Common.SamplerCount); - } - typedef enum tagSAMPLERSTATEPOINTER { - SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5, - SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, - } SAMPLERSTATEPOINTER; - inline void setSamplerStatePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT; - } - inline uint32_t getSamplerStatePointer() const { - return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT); - } inline void setBindingTableEntryCount(const uint32_t value) { TheStructure.Common.BindingTableEntryCount = value; } inline uint32_t getBindingTableEntryCount() const { - return (TheStructure.Common.BindingTableEntryCount); + return TheStructure.Common.BindingTableEntryCount; } typedef enum tagBINDINGTABLEPOINTER { BINDINGTABLEPOINTER_BIT_SHIFT = 0x5, BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20, } BINDINGTABLEPOINTER; - inline void setBindingTablePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT; + inline void setBindingTablePointer(const uint32_t value) { + TheStructure.Common.BindingTablePointer = static_cast(value) >> BINDINGTABLEPOINTER_BIT_SHIFT; } inline uint32_t getBindingTablePointer() const { - return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT); + return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT; } inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) { - UNRECOVERABLE_IF(value > 128); + UNRECOVERABLE_IF(value > 0x3ff); TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value; } inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const { - return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup); + return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup; } inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) { TheStructure.Common.SharedLocalMemorySize = value; @@ -5393,41 +5372,35 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const { return static_cast(TheStructure.Common.SharedLocalMemorySize); } - inline void setNumberOfBarriers(const uint32_t value) { - TheStructure.Common.NumberOfBarriers = value; - } - inline uint32_t getNumberOfBarriers() const { - return TheStructure.Common.NumberOfBarriers; - } inline void setRoundingMode(const ROUNDING_MODE value) { TheStructure.Common.RoundingMode = value; } inline ROUNDING_MODE getRoundingMode() const { return static_cast(TheStructure.Common.RoundingMode); } - inline void setThreadGroupDispatchSize(const uint32_t value) { + inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) { TheStructure.Common.ThreadGroupDispatchSize = value; } - inline uint32_t getThreadGroupDispatchSize() const { - return (TheStructure.Common.ThreadGroupDispatchSize); + inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const { + return static_cast(TheStructure.Common.ThreadGroupDispatchSize); } - inline void setRegistersPerThread(const REGISTERS_PER_THREAD value) { - TheStructure.Common.RegistersPerThread = value; + inline void setNumberOfBarriers(const NUMBER_OF_BARRIERS value) { + TheStructure.Common.NumberOfBarriers = value; } - inline REGISTERS_PER_THREAD getRegistersPerThread() const { - return static_cast(TheStructure.Common.RegistersPerThread); + inline NUMBER_OF_BARRIERS getNumberOfBarriers() const { + return static_cast(TheStructure.Common.NumberOfBarriers); } - inline void setPreferredSlmSizeOverride(const PREFERRED_SLM_SIZE_OVERRIDE value) { - TheStructure.Common.PreferredSlmSizeOverride = value; + inline void setBtdMode(const BTD_MODE value) { + TheStructure.Common.BtdMode = value; } - inline PREFERRED_SLM_SIZE_OVERRIDE getPreferredSlmSizeOverride() const { - return static_cast(TheStructure.Common.PreferredSlmSizeOverride); + inline BTD_MODE getBtdMode() const { + return static_cast(TheStructure.Common.BtdMode); } - inline void setPreferredSlmAllocationSizePerDss(const PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS value) { - TheStructure.Common.PreferredSlmAllocationSizePerDss = value; + inline void setPreferredSlmAllocationSize(const PREFERRED_SLM_ALLOCATION_SIZE value) { + TheStructure.Common.PreferredSlmAllocationSize = value; } - inline PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS getPreferredSlmAllocationSizePerDss() const { - return static_cast(TheStructure.Common.PreferredSlmAllocationSizePerDss); + inline PREFERRED_SLM_ALLOCATION_SIZE getPreferredSlmAllocationSize() const { + return static_cast(TheStructure.Common.PreferredSlmAllocationSize); } } INTERFACE_DESCRIPTOR_DATA; STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA)); diff --git a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl index 0d27a9370a..9690adecc1 100644 --- a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl +++ b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -5150,49 +5150,48 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA)); typedef struct tagINTERFACE_DESCRIPTOR_DATA { union tagTheStructure { struct tagCommon { - uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5); - uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31); - - uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15); - uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31); - - uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6); + // DWORD 0 + uint64_t Reserved_0 : BITFIELD_RANGE(0, 5); + uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31); + // DWORD 1 + uint64_t Reserved_32 : BITFIELD_RANGE(32, 63); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 6); uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7); - uint32_t Reserved_2_8_10 : BITFIELD_RANGE(8, 10); + uint32_t Reserved_72 : BITFIELD_RANGE(8, 10); uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11); - uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12); + uint32_t Reserved_76 : BITFIELD_RANGE(12, 12); uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13); - uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16); - uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_81 : BITFIELD_RANGE(17, 17); uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18); uint32_t DenormMode : BITFIELD_RANGE(19, 19); uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20); - uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31); - - uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1); + uint32_t Reserved_85 : BITFIELD_RANGE(21, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 1); uint32_t SamplerCount : BITFIELD_RANGE(2, 4); uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31); - + // DWORD 4 uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4); uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20); - uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31); - + uint32_t Reserved_149 : BITFIELD_RANGE(21, 31); + // DWORD 5 uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9); - uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15); + uint32_t Reserved_170 : BITFIELD_RANGE(10, 15); uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20); - uint32_t BarrierEnable : BITFIELD_RANGE(21, 21); + uint32_t Reserved_181 : BITFIELD_RANGE(21, 21); uint32_t RoundingMode : BITFIELD_RANGE(22, 23); - uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 25); uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27); uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30); - uint32_t Reserved_5_31_31 : BITFIELD_RANGE(31, 31); - - uint32_t PreferredSlmAllocationSizePerDss : BITFIELD_RANGE(0, 2); - uint32_t PreferredSlmSizeOverride : BITFIELD_RANGE(3, 3); - uint32_t Reserved_6_4_31 : BITFIELD_RANGE(4, 31); - - uint32_t Reserved_7; + uint32_t BtdMode : BITFIELD_RANGE(31, 31); + // DWORD 6 + uint32_t PreferredSlmAllocationSize : BITFIELD_RANGE(0, 3); + uint32_t Reserved_196 : BITFIELD_RANGE(4, 31); + // DWORD 7 + uint32_t Reserved_224; } Common; uint32_t RawData[8]; } TheStructure; @@ -5219,6 +5218,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3, SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4, } SAMPLER_COUNT; + typedef enum tagBINDING_TABLE_ENTRY_COUNT { + BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1, + BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f, + } BINDING_TABLE_ENTRY_COUNT; typedef enum tagSHARED_LOCAL_MEMORY_SIZE { SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0, SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1, @@ -5235,32 +5239,43 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { ROUNDING_MODE_RD = 0x2, ROUNDING_MODE_RTZ = 0x3, } ROUNDING_MODE; - typedef enum tagPREFERRED_SLM_SIZE_OVERRIDE { - PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED = 0x0, - PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED = 0x1, - } PREFERRED_SLM_SIZE_OVERRIDE; - typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE_PER_DSS { - PREFERRED_SLM_SIZE_IS_0K = 0x0, - PREFERRED_SLM_SIZE_IS_16K = 0x1, - PREFERRED_SLM_SIZE_IS_32K = 0x2, - PREFERRED_SLM_SIZE_IS_64K = 0x3, - PREFERRED_SLM_SIZE_IS_96K = 0x4, - PREFERRED_SLM_SIZE_IS_128K = 0x5, - } PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; - + typedef enum tagTHREAD_GROUP_DISPATCH_SIZE { + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2, + THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3, + } THREAD_GROUP_DISPATCH_SIZE; + typedef enum tagNUMBER_OF_BARRIERS { + NUMBER_OF_BARRIERS_NONE = 0x0, + NUMBER_OF_BARRIERS_B1 = 0x1, + } NUMBER_OF_BARRIERS; + typedef enum tagBTD_MODE { + BTD_MODE_DISABLE = 0x0, + BTD_MODE_ENABLE = 0x1, + } BTD_MODE; + typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE { + PREFERRED_SLM_ALLOCATION_SIZE_MAX = 0x0, + PREFERRED_SLM_ALLOCATION_SIZE_0K = 0x8, + PREFERRED_SLM_ALLOCATION_SIZE_16K = 0x9, + PREFERRED_SLM_ALLOCATION_SIZE_32K = 0xa, + PREFERRED_SLM_ALLOCATION_SIZE_64K = 0xb, + PREFERRED_SLM_ALLOCATION_SIZE_96K = 0xc, + PREFERRED_SLM_ALLOCATION_SIZE_128K = 0xd, + } PREFERRED_SLM_ALLOCATION_SIZE; inline void init() { memset(&TheStructure, 0, sizeof(TheStructure)); TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754; TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE; TheStructure.Common.DenormMode = DENORM_MODE_FTZ; - TheStructure.Common.ThreadPreemptionDisable = - THREAD_PREEMPTION_DISABLE_DISABLE; + TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE; TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED; - TheStructure.Common.SharedLocalMemorySize = - SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; + TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED; + TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE; - TheStructure.Common.PreferredSlmSizeOverride = PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED; - TheStructure.Common.PreferredSlmAllocationSizePerDss = PREFERRED_SLM_SIZE_IS_0K; + TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8; + TheStructure.Common.NumberOfBarriers = NUMBER_OF_BARRIERS_NONE; + TheStructure.Common.BtdMode = BTD_MODE_DISABLE; + TheStructure.Common.PreferredSlmAllocationSize = PREFERRED_SLM_ALLOCATION_SIZE_MAX; } static tagINTERFACE_DESCRIPTOR_DATA sInit() { INTERFACE_DESCRIPTOR_DATA state; @@ -5268,7 +5283,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { return state; } inline uint32_t &getRawData(const uint32_t index) { - DEBUG_BREAK_IF(index >= 8); + UNRECOVERABLE_IF(index >= 8); return TheStructure.RawData[index]; } typedef enum tagKERNELSTARTPOINTER { @@ -5276,35 +5291,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { KERNELSTARTPOINTER_ALIGN_SIZE = 0x40, } KERNELSTARTPOINTER; inline void setKernelStartPointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT; + TheStructure.Common.KernelStartPointer = static_cast(value) >> KERNELSTARTPOINTER_BIT_SHIFT; } - inline uint32_t getKernelStartPointer() const { - return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT); + inline uint64_t getKernelStartPointer() const { + return static_cast(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched } - inline void setKernelStartPointerHigh(const uint32_t value) { - TheStructure.Common.KernelStartPointerHigh = value; - } - inline uint32_t getKernelStartPointerHigh() const { - return (TheStructure.Common.KernelStartPointerHigh); - } - inline void setSoftwareExceptionEnable(const uint32_t value) { + inline void setSoftwareExceptionEnable(const bool value) { TheStructure.Common.SoftwareExceptionEnable = value; } - inline uint32_t getSoftwareExceptionEnable() const { - return (TheStructure.Common.SoftwareExceptionEnable); + inline bool getSoftwareExceptionEnable() const { + return TheStructure.Common.SoftwareExceptionEnable; } - inline void setMaskStackExceptionEnable(const uint32_t value) { + inline void setMaskStackExceptionEnable(const bool value) { TheStructure.Common.MaskStackExceptionEnable = value; } - inline uint32_t getMaskStackExceptionEnable() const { - return (TheStructure.Common.MaskStackExceptionEnable); + inline bool getMaskStackExceptionEnable() const { + return TheStructure.Common.MaskStackExceptionEnable; } - inline void setIllegalOpcodeExceptionEnable(const uint32_t value) { + inline void setIllegalOpcodeExceptionEnable(const bool value) { TheStructure.Common.IllegalOpcodeExceptionEnable = value; } - inline uint32_t getIllegalOpcodeExceptionEnable() const { - return (TheStructure.Common.IllegalOpcodeExceptionEnable); + inline bool getIllegalOpcodeExceptionEnable() const { + return TheStructure.Common.IllegalOpcodeExceptionEnable; } inline void setFloatingPointMode(const FLOATING_POINT_MODE value) { TheStructure.Common.FloatingPointMode = value; @@ -5340,35 +5348,34 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5, SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, } SAMPLERSTATEPOINTER; - inline void setSamplerStatePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT; + inline void setSamplerStatePointer(const uint32_t value) { + TheStructure.Common.SamplerStatePointer = static_cast(value) >> SAMPLERSTATEPOINTER_BIT_SHIFT; } inline uint32_t getSamplerStatePointer() const { - return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT); + return TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT; } inline void setBindingTableEntryCount(const uint32_t value) { TheStructure.Common.BindingTableEntryCount = value; } inline uint32_t getBindingTableEntryCount() const { - return (TheStructure.Common.BindingTableEntryCount); + return TheStructure.Common.BindingTableEntryCount; } typedef enum tagBINDINGTABLEPOINTER { BINDINGTABLEPOINTER_BIT_SHIFT = 0x5, BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20, } BINDINGTABLEPOINTER; - inline void setBindingTablePointer(const uint64_t value) { - DEBUG_BREAK_IF(value >= 0x100000000); - TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT; + inline void setBindingTablePointer(const uint32_t value) { + TheStructure.Common.BindingTablePointer = static_cast(value) >> BINDINGTABLEPOINTER_BIT_SHIFT; } inline uint32_t getBindingTablePointer() const { - return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT); + return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT; } inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x3ff); TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value; } inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const { - return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup); + return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup; } inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) { TheStructure.Common.SharedLocalMemorySize = value; @@ -5376,41 +5383,35 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const { return static_cast(TheStructure.Common.SharedLocalMemorySize); } - inline void setBarrierEnable(const uint32_t value) { - TheStructure.Common.BarrierEnable = (value > 0u) ? 1u : 0u; - } - inline void setNumberOfBarriers(const uint32_t value) { - TheStructure.Common.NumberOfBarriers = value; - } - inline bool getBarrierEnable() const { - return (TheStructure.Common.BarrierEnable); - } - inline uint32_t getNumberOfBarriers() const { - return TheStructure.Common.NumberOfBarriers; - } inline void setRoundingMode(const ROUNDING_MODE value) { TheStructure.Common.RoundingMode = value; } inline ROUNDING_MODE getRoundingMode() const { return static_cast(TheStructure.Common.RoundingMode); } - inline void setThreadGroupDispatchSize(const uint32_t value) { + inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) { TheStructure.Common.ThreadGroupDispatchSize = value; } - inline uint32_t getThreadGroupDispatchSize() const { - return (TheStructure.Common.ThreadGroupDispatchSize); + inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const { + return static_cast(TheStructure.Common.ThreadGroupDispatchSize); } - inline void setPreferredSlmSizeOverride(const PREFERRED_SLM_SIZE_OVERRIDE value) { - TheStructure.Common.PreferredSlmSizeOverride = value; + inline void setNumberOfBarriers(const NUMBER_OF_BARRIERS value) { + TheStructure.Common.NumberOfBarriers = value; } - inline PREFERRED_SLM_SIZE_OVERRIDE getPreferredSlmSizeOverride() const { - return static_cast(TheStructure.Common.PreferredSlmSizeOverride); + inline NUMBER_OF_BARRIERS getNumberOfBarriers() const { + return static_cast(TheStructure.Common.NumberOfBarriers); } - inline void setPreferredSlmAllocationSizePerDss(const PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS value) { - TheStructure.Common.PreferredSlmAllocationSizePerDss = value; + inline void setBtdMode(const BTD_MODE value) { + TheStructure.Common.BtdMode = value; } - inline PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS getPreferredSlmAllocationSizePerDss() const { - return static_cast(TheStructure.Common.PreferredSlmAllocationSizePerDss); + inline BTD_MODE getBtdMode() const { + return static_cast(TheStructure.Common.BtdMode); + } + inline void setPreferredSlmAllocationSize(const PREFERRED_SLM_ALLOCATION_SIZE value) { + TheStructure.Common.PreferredSlmAllocationSize = value; + } + inline PREFERRED_SLM_ALLOCATION_SIZE getPreferredSlmAllocationSize() const { + return static_cast(TheStructure.Common.PreferredSlmAllocationSize); } } INTERFACE_DESCRIPTOR_DATA; STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA)); diff --git a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp index c352bc427b..7e10b92352 100644 --- a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,11 +44,12 @@ template <> void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) { const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { - interfaceDescriptor.setThreadGroupDispatchSize(3u); + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1); } if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) { - interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get()); + interfaceDescriptor.setThreadGroupDispatchSize(static_cast( + DebugManager.flags.ForceThreadGroupDispatchSize.get())); } } diff --git a/shared/source/xe_hp_core/hw_cmds_base.h b/shared/source/xe_hp_core/hw_cmds_base.h index ea23bef90a..7b10794830 100644 --- a/shared/source/xe_hp_core/hw_cmds_base.h +++ b/shared/source/xe_hp_core/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ struct XeHpCore { static constexpr bool isUsingL3Control = true; static constexpr bool isUsingMediaSamplerDopClockGate = true; + static constexpr bool supportsSampler = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index adcddeaf52..e5d471e7b7 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -34,11 +34,12 @@ template <> void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) { const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { - interfaceDescriptor.setThreadGroupDispatchSize(3u); + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1); } if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) { - interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get()); + interfaceDescriptor.setThreadGroupDispatchSize(static_cast( + DebugManager.flags.ForceThreadGroupDispatchSize.get())); } } @@ -169,7 +170,7 @@ template <> void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { - interfaceDescriptor.setNumberOfBarriers(value); + interfaceDescriptor.setNumberOfBarriers(static_cast(value)); } template <> @@ -200,8 +201,7 @@ void EncodeDispatchKernel::encodeAdditionalWalkerFields(const HardwareIn template <> void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { - using PREFERRED_SLM_SIZE_OVERRIDE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount; const uint32_t workGroupCountPerDss = static_cast(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup)); @@ -221,18 +221,18 @@ void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTO struct SizeToPreferredSlmValue { uint32_t upperLimit; - PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS valueToProgram; + PREFERRED_SLM_ALLOCATION_SIZE valueToProgram; }; const std::array ranges = {{ // upper limit, retVal - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K}, - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K}, - {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K}, + {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K}, }}; - auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K; + auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K; for (auto &range : ranges) { if (slmSize <= range.upperLimit) { programmableIdPreferredSlmSize = range.valueToProgram; @@ -241,16 +241,29 @@ void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTO } if ((slmSize == 0) && (Family::isXlA0(hwInfo))) { - programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K; + programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K; } - pInterfaceDescriptor->setPreferredSlmSizeOverride(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED); - pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(programmableIdPreferredSlmSize); + pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize); if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) { auto toProgram = - static_cast(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get()); - pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(toProgram); + static_cast(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get()); + pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram); + } +} + +template <> +void EncodeDispatchKernel::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) { + auto enablePrefetch = EncodeSurfaceState::doBindingTablePrefetch(); + if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) { + enablePrefetch = static_cast(DebugManager.flags.ForceBtpPrefetchMode.get()); + } + + if (enablePrefetch) { + interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u)); + } else { + interfaceDescriptor.setBindingTableEntryCount(0u); } } diff --git a/shared/source/xe_hpc_core/hw_cmds_base.h b/shared/source/xe_hpc_core/hw_cmds_base.h index 6ac494e9ce..3fa2286db1 100644 --- a/shared/source/xe_hpc_core/hw_cmds_base.h +++ b/shared/source/xe_hpc_core/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -35,6 +35,7 @@ struct XE_HPC_CORE { static constexpr bool isUsingL3Control = false; static constexpr bool isUsingMediaSamplerDopClockGate = false; + static constexpr bool supportsSampler = false; static bool isXlA0(const HardwareInfo &hwInfo) { auto revId = hwInfo.platform.usRevId & pvcSteppingBits; diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index 195f07a523..84e3d75833 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,8 +33,7 @@ void EncodeDispatchKernel::adjustTimestampPacket(WALKER_TYPE &walkerCmd, template <> void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { - using PREFERRED_SLM_SIZE_OVERRIDE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount; const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup; @@ -54,18 +53,18 @@ void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTO struct SizeToPreferredSlmValue { uint32_t upperLimit; - PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS valueToProgram; + PREFERRED_SLM_ALLOCATION_SIZE valueToProgram; }; const std::array ranges = {{ // upper limit, retVal - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K}, - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K}, - {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K}, + {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K}, }}; - auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K; + auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K; for (auto &range : ranges) { if (slmSize <= range.upperLimit) { programmableIdPreferredSlmSize = range.valueToProgram; @@ -73,18 +72,16 @@ void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTO } } - pInterfaceDescriptor->setPreferredSlmSizeOverride(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED); - if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAllocationSizeAdjustmentRequired(hwInfo)) { - pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K); + pInterfaceDescriptor->setPreferredSlmAllocationSize(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K); } else { - pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(programmableIdPreferredSlmSize); + pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize); } if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) { auto toProgram = - static_cast(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get()); - pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(toProgram); + static_cast(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get()); + pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram); } } @@ -93,20 +90,21 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) { - interfaceDescriptor.setThreadGroupDispatchSize(2u); + interfaceDescriptor.setThreadGroupDispatchSize(static_cast(2u)); } else { - interfaceDescriptor.setThreadGroupDispatchSize(3u); + interfaceDescriptor.setThreadGroupDispatchSize(static_cast(3u)); } } if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) { - interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get()); + interfaceDescriptor.setThreadGroupDispatchSize( + static_cast(DebugManager.flags.ForceThreadGroupDispatchSize.get())); } } template <> void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { - interfaceDescriptor.setNumberOfBarriers(value); + interfaceDescriptor.setNumberOfBarriers(static_cast(value)); } template <> diff --git a/shared/source/xe_hpg_core/hw_cmds_base.h b/shared/source/xe_hpg_core/hw_cmds_base.h index 76b1dd3b8e..0d41b3ffdf 100644 --- a/shared/source/xe_hpg_core/hw_cmds_base.h +++ b/shared/source/xe_hpg_core/hw_cmds_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ struct XE_HPG_CORE { static constexpr bool isUsingL3Control = true; static constexpr bool isUsingMediaSamplerDopClockGate = false; + static constexpr bool supportsSampler = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { diff --git a/shared/test/common/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp b/shared/test/common/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp index 7d36a828dc..0f631aff26 100644 --- a/shared/test/common/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp +++ b/shared/test/common/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,8 +18,7 @@ using CommandEncodeStatesPvcTest = ::testing::Test; PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; HardwareInfo hwInfo = *defaultHwInfo; uint32_t threadsCount = 1; @@ -42,9 +41,9 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone); if (revisionToTest.isWaRequired) { - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K, idd.getPreferredSlmAllocationSize()); } else { - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K, idd.getPreferredSlmAllocationSize()); } } } diff --git a/shared/test/common/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp b/shared/test/common/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp index cab80fb958..60f23e93a4 100644 --- a/shared/test/common/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp +++ b/shared/test/common/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp @@ -20,8 +20,7 @@ using CommandEncodeStatesDg2Test = ::testing::Test; DG2TEST_F(CommandEncodeStatesDg2Test, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; VariableBackup revisionId(&defaultHwInfo->platform.usRevId); uint32_t threadsCount = 1; @@ -31,19 +30,19 @@ DG2TEST_F(CommandEncodeStatesDg2Test, GivenSmallSlmTotalSizesWhenSetAdditionalIn revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_A0, *defaultHwInfo); INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K, idd.getPreferredSlmAllocationSize()); } { revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo); INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K, idd.getPreferredSlmAllocationSize()); } } DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPreferredSlmSizePerDssThenUseDssCount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo); @@ -56,27 +55,27 @@ DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPrefer const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 8; // 16 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 9; // 14 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 50; // 2 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 16 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp index 0ce08832f7..7867368cb8 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp @@ -76,21 +76,21 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndDG2T } HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpgCore) { - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; const std::vector> valuesToTest = { - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K}, //since we can't set 48KB as SLM size for workgroup, we need to ask for 64KB here. - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K}, }; const std::vector> valuesToTestForDg2AStep = { - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, }; const std::array revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K}; @@ -106,12 +106,12 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettin } HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenDebugOverrideWhenSetAdditionalInfoIsCalledThenDebugValuesAreSet, IsAtLeastXeHpgCore) { - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; DebugManagerStateRestore stateRestore; - PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS debugOverrideValues[] = {PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K, - PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, - PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}; + PREFERRED_SLM_ALLOCATION_SIZE debugOverrideValues[] = {PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K, + PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, + PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}; for (auto debugOverrideValue : debugOverrideValues) { DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(debugOverrideValue); @@ -124,25 +124,10 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenDebugOverrideWhenSetAdditiona } } -HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpgCore) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - MockDevice device; - auto hwInfo = device.getHardwareInfo(); - - uint32_t barrierCounts[] = {0, 1, 2, 7}; - - for (auto barrierCount : barrierCounts) { - EncodeDispatchKernel::programBarrierEnable(idd, barrierCount, hwInfo); - - EXPECT_EQ(barrierCount, idd.getNumberOfBarriers()); - } -} - HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocationSizePerDssWhenDispatchingKernelThenCorrectValueIsSet, IsAtLeastXeHpgCore) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; DebugManagerStateRestore restorer; DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(5); uint32_t dims[] = {2, 1, 1}; @@ -165,5 +150,5 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio auto cmd = genCmdCast(*itor); auto &idd = cmd->getInterfaceDescriptor(); - EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K, idd.getPreferredSlmAllocationSizePerDss()); + EXPECT_EQ(5u, static_cast(idd.getPreferredSlmAllocationSize())); } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h index 7b039ac61c..70810db9da 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,21 +15,19 @@ template struct PreferredSlmTestValues { uint32_t preferredSlmAllocationSizePerDss; - typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS expectedValueInIdd; + typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE expectedValueInIdd; }; template void verifyPreferredSlmValues(std::vector> valuesToTest, NEO::HardwareInfo &hwInfo) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE; - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; auto threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount; uint32_t localWorkGroupsPerDssCounts[] = {1, 2, 4}; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EXPECT_EQ(0u, idd.getPreferredSlmAllocationSizePerDss()); - EXPECT_EQ(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED, idd.getPreferredSlmSizeOverride()); + EXPECT_EQ(0u, idd.getPreferredSlmAllocationSize()); const std::array slmPolicies = { NEO::SlmPolicy::SlmPolicyNone, @@ -50,8 +48,7 @@ void verifyPreferredSlmValues(std::vector> va slmTotalSize, slmPolicy); - EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSizePerDss()); - EXPECT_EQ(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED, idd.getPreferredSlmSizeOverride()); + EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSize()); } } } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index e8463e5c62..fd82e46576 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -53,6 +53,21 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari } } +HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpcCore) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; + MockDevice device; + auto hwInfo = device.getHardwareInfo(); + + uint32_t barrierCounts[] = {0, 1, 2, 7}; + + for (auto barrierCount : barrierCounts) { + EncodeDispatchKernel::programBarrierEnable(idd, barrierCount, hwInfo); + + EXPECT_EQ(barrierCount, idd.getNumberOfBarriers()); + } +} + HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandContainerWhenNumGrfRequiredIsGreaterThanDefaultThenLargeGrfModeEnabled) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; @@ -71,24 +86,24 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon } HWTEST2_F(CommandEncodeStatesTestPvcAndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpcCore) { - using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS; + using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE; const std::vector> valuesToTest = { - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K}, - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K}, - {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K}, - {128 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K}, + {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K}, + {128 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, }; const std::vector> valuesToTestForPvcAStep = { - {0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K}, - {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K}, - {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K}, - {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K}, - {128 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K}, + {0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K}, + {32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K}, + {64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K}, + {96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K}, + {128 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}, }; const std::array revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K}; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 3340a7f830..113b6050ce 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -212,8 +212,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhe EXPECT_EQ(idd.getBindingTablePointer(), 0u); } +struct SamplerSupportedMatcher { + template + static constexpr bool isMatched() { + if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { + return HwMapper::GfxProduct::supportsSampler; + } + return false; + } +}; -HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensamplerStateWasCopied) { +HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensamplerStateWasCopied, SamplerSupportedMatcher) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; @@ -370,10 +379,16 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_NE(0u, idd.getBindingTableEntryCount()); - EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); } else { EXPECT_EQ(0u, idd.getBindingTableEntryCount()); - EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + } + + if constexpr (FamilyType::supportsSampler) { + if (EncodeSurfaceState::doBindingTablePrefetch()) { + EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + } else { + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + } } } @@ -394,7 +409,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD auto &idd = cmd->getInterfaceDescriptor(); EXPECT_EQ(0u, idd.getBindingTableEntryCount()); - EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + if constexpr (FamilyType::supportsSampler) { + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + } } { @@ -414,7 +431,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD auto &idd = cmd->getInterfaceDescriptor(); EXPECT_NE(0u, idd.getBindingTableEntryCount()); - EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + if constexpr (FamilyType::supportsSampler) { + EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount()); + } } } diff --git a/shared/test/unit_test/xe_hpg_core/CMakeLists.txt b/shared/test/unit_test/xe_hpg_core/CMakeLists.txt index 09a974db09..3bc1eb7e98 100644 --- a/shared/test/unit_test/xe_hpg_core/CMakeLists.txt +++ b/shared/test/unit_test/xe_hpg_core/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -8,6 +8,7 @@ if(TESTS_XE_HPG_CORE) set(NEO_SHARED_tests_xe_hpg_core ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_xe_hpg_core_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel_xe_hpg_core.cpp ) if(DEFINED AUB_STREAM_PROJECT_NAME) diff --git a/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp new file mode 100644 index 0000000000..94b4a23230 --- /dev/null +++ b/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/xe_hpg_core/hw_cmds_base.h" +#include "shared/test/common/helpers/default_hw_info.h" +#include "shared/test/common/test_macros/test.h" + +using namespace NEO; + +using CommandEncodeStatesTestXeHpgCore = ::testing::Test; + +HWTEST2_F(CommandEncodeStatesTestXeHpgCore, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsXeHpgCore) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; + + uint32_t barrierCounts[] = {0, 1}; + + for (auto barrierCount : barrierCounts) { + EncodeDispatchKernel::programBarrierEnable(idd, barrierCount, *defaultHwInfo); + + EXPECT_EQ(barrierCount, idd.getNumberOfBarriers()); + } +}