Correct INTERFACE_DESCRIPTOR_DATA definitions for XeHp and later
Related-To: NEO-6466 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
de5e1dcd1e
commit
ff79c84115
|
@ -51,7 +51,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||||
size_t sizeCrossThreadData,
|
size_t sizeCrossThreadData,
|
||||||
size_t sizePerThreadData,
|
size_t sizePerThreadData,
|
||||||
size_t bindingTablePointer,
|
size_t bindingTablePointer,
|
||||||
size_t offsetSamplerState,
|
[[maybe_unused]] size_t offsetSamplerState,
|
||||||
uint32_t numSamplers,
|
uint32_t numSamplers,
|
||||||
uint32_t numThreadsPerThreadGroup,
|
uint32_t numThreadsPerThreadGroup,
|
||||||
const Kernel &kernel,
|
const Kernel &kernel,
|
||||||
|
|
|
@ -140,7 +140,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||||
size_t sizeCrossThreadData,
|
size_t sizeCrossThreadData,
|
||||||
size_t sizePerThreadData,
|
size_t sizePerThreadData,
|
||||||
size_t bindingTablePointer,
|
size_t bindingTablePointer,
|
||||||
size_t offsetSamplerState,
|
[[maybe_unused]] size_t offsetSamplerState,
|
||||||
uint32_t numSamplers,
|
uint32_t numSamplers,
|
||||||
uint32_t threadsPerThreadGroup,
|
uint32_t threadsPerThreadGroup,
|
||||||
const Kernel &kernel,
|
const Kernel &kernel,
|
||||||
|
@ -158,8 +158,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||||
auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
|
auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
|
||||||
|
|
||||||
// Program the kernel start pointer
|
// Program the kernel start pointer
|
||||||
interfaceDescriptor.setKernelStartPointerHigh(kernelStartOffset >> 32);
|
interfaceDescriptor.setKernelStartPointer(static_cast<uint32_t>(kernelStartOffset & std::numeric_limits<uint32_t>::max()));
|
||||||
interfaceDescriptor.setKernelStartPointer((uint32_t)kernelStartOffset);
|
|
||||||
|
|
||||||
// # of threads in thread group should be based on LWS.
|
// # of threads in thread group should be based on LWS.
|
||||||
interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
|
interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
|
||||||
|
@ -173,7 +172,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||||
|
|
||||||
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
|
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
|
||||||
|
|
||||||
interfaceDescriptor.setSamplerStatePointer(static_cast<uint32_t>(offsetSamplerState));
|
if constexpr (GfxFamily::supportsSampler) {
|
||||||
|
interfaceDescriptor.setSamplerStatePointer(static_cast<uint32_t>(offsetSamplerState));
|
||||||
|
}
|
||||||
|
|
||||||
EncodeDispatchKernel<GfxFamily>::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize);
|
EncodeDispatchKernel<GfxFamily>::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize);
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -636,7 +636,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal
|
||||||
kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||||
|
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh());
|
|
||||||
|
|
||||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
||||||
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
||||||
|
@ -710,7 +709,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
|
||||||
uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch();
|
uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch();
|
||||||
|
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh());
|
|
||||||
|
|
||||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
||||||
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
||||||
|
@ -1009,7 +1007,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
|
||||||
kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||||
|
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh());
|
|
||||||
|
|
||||||
memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation);
|
memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation);
|
||||||
}
|
}
|
||||||
|
@ -1062,7 +1059,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
|
||||||
uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch();
|
uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch();
|
||||||
|
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset >> 32), idd.getKernelStartPointerHigh());
|
|
||||||
|
|
||||||
memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation);
|
memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation);
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,7 +87,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||||
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||||
}
|
}
|
||||||
idd.setKernelStartPointer(offset);
|
idd.setKernelStartPointer(offset);
|
||||||
idd.setKernelStartPointerHigh(0u);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto threadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
|
auto threadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
|
||||||
|
@ -141,7 +140,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idd.setSamplerStatePointer(samplerStateOffset);
|
if constexpr (Family::supportsSampler) {
|
||||||
|
idd.setSamplerStatePointer(samplerStateOffset);
|
||||||
|
}
|
||||||
|
|
||||||
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
|
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2019-2021 Intel Corporation
|
* Copyright (C) 2019-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -20,6 +20,7 @@ namespace NEO {
|
||||||
struct GEN11 {
|
struct GEN11 {
|
||||||
#include "shared/source/generated/gen11/hw_cmds_generated_gen11.inl"
|
#include "shared/source/generated/gen11/hw_cmds_generated_gen11.inl"
|
||||||
|
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2019-2021 Intel Corporation
|
* Copyright (C) 2019-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -22,6 +22,7 @@ namespace NEO {
|
||||||
struct GEN12LP {
|
struct GEN12LP {
|
||||||
#include "shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl"
|
#include "shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl"
|
||||||
|
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3);
|
static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3);
|
||||||
|
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2021 Intel Corporation
|
* Copyright (C) 2018-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -22,6 +22,7 @@ namespace NEO {
|
||||||
struct GEN8 {
|
struct GEN8 {
|
||||||
#include "shared/source/generated/gen8/hw_cmds_generated_gen8.inl"
|
#include "shared/source/generated/gen8/hw_cmds_generated_gen8.inl"
|
||||||
|
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2021 Intel Corporation
|
* Copyright (C) 2018-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -22,6 +22,7 @@ namespace NEO {
|
||||||
struct GEN9 {
|
struct GEN9 {
|
||||||
#include "shared/source/generated/gen9/hw_cmds_generated_gen9.inl"
|
#include "shared/source/generated/gen9/hw_cmds_generated_gen9.inl"
|
||||||
|
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -5085,46 +5085,47 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA));
|
||||||
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
union tagTheStructure {
|
union tagTheStructure {
|
||||||
struct tagCommon {
|
struct tagCommon {
|
||||||
uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5);
|
// DWORD 0
|
||||||
uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
uint64_t Reserved_0 : BITFIELD_RANGE(0, 5);
|
||||||
|
uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
||||||
uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15);
|
// DWORD 1
|
||||||
uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31);
|
uint64_t Reserved_32 : BITFIELD_RANGE(32, 63);
|
||||||
|
// DWORD 2
|
||||||
uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6);
|
uint32_t Reserved_64 : BITFIELD_RANGE(0, 6);
|
||||||
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
||||||
uint32_t Reserved_2_8_10 : BITFIELD_RANGE(8, 10);
|
uint32_t Reserved_72 : BITFIELD_RANGE(8, 10);
|
||||||
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
||||||
uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12);
|
uint32_t Reserved_76 : BITFIELD_RANGE(12, 12);
|
||||||
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
||||||
uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15);
|
uint32_t Reserved_78 : BITFIELD_RANGE(14, 15);
|
||||||
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
||||||
uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17);
|
uint32_t Reserved_81 : BITFIELD_RANGE(17, 17);
|
||||||
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
||||||
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
||||||
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
||||||
uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_85 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 3
|
||||||
uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1);
|
uint32_t Reserved_96 : BITFIELD_RANGE(0, 1);
|
||||||
uint32_t SamplerCount : BITFIELD_RANGE(2, 4);
|
uint32_t SamplerCount : BITFIELD_RANGE(2, 4);
|
||||||
uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31);
|
uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31);
|
||||||
|
// DWORD 4
|
||||||
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
||||||
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
||||||
uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_149 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 5
|
||||||
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
||||||
uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15);
|
uint32_t Reserved_170 : BITFIELD_RANGE(10, 15);
|
||||||
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
||||||
uint32_t BarrierEnable : BITFIELD_RANGE(21, 21);
|
uint32_t BarrierEnable : BITFIELD_RANGE(21, 21);
|
||||||
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
||||||
uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25);
|
uint32_t Reserved_184 : BITFIELD_RANGE(24, 25);
|
||||||
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
||||||
uint32_t Reserved_5_28_31 : BITFIELD_RANGE(28, 31);
|
uint32_t Reserved_188 : BITFIELD_RANGE(28, 30);
|
||||||
|
uint32_t BtdMode : BITFIELD_RANGE(31, 31);
|
||||||
uint32_t Reserved_6_0_31 : BITFIELD_RANGE(0, 31);
|
// DWORD 6
|
||||||
|
uint32_t Reserved_192;
|
||||||
uint32_t Reserved_7;
|
// DWORD 7
|
||||||
|
uint32_t Reserved_224;
|
||||||
} Common;
|
} Common;
|
||||||
uint32_t RawData[8];
|
uint32_t RawData[8];
|
||||||
} TheStructure;
|
} TheStructure;
|
||||||
|
@ -5151,6 +5152,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3,
|
SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3,
|
||||||
SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4,
|
SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4,
|
||||||
} SAMPLER_COUNT;
|
} SAMPLER_COUNT;
|
||||||
|
typedef enum tagBINDING_TABLE_ENTRY_COUNT {
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0,
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1,
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f,
|
||||||
|
} BINDING_TABLE_ENTRY_COUNT;
|
||||||
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
||||||
|
@ -5167,18 +5173,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
ROUNDING_MODE_RD = 0x2,
|
ROUNDING_MODE_RD = 0x2,
|
||||||
ROUNDING_MODE_RTZ = 0x3,
|
ROUNDING_MODE_RTZ = 0x3,
|
||||||
} ROUNDING_MODE;
|
} ROUNDING_MODE;
|
||||||
|
typedef enum tagTHREAD_GROUP_DISPATCH_SIZE {
|
||||||
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0,
|
||||||
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1,
|
||||||
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2,
|
||||||
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3,
|
||||||
|
} THREAD_GROUP_DISPATCH_SIZE;
|
||||||
|
typedef enum tagBTD_MODE {
|
||||||
|
BTD_MODE_DISABLE = 0x0,
|
||||||
|
BTD_MODE_ENABLE = 0x1,
|
||||||
|
} BTD_MODE;
|
||||||
inline void init() {
|
inline void init() {
|
||||||
memset(&TheStructure, 0, sizeof(TheStructure));
|
memset(&TheStructure, 0, sizeof(TheStructure));
|
||||||
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
||||||
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
||||||
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
||||||
TheStructure.Common.ThreadPreemptionDisable =
|
TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE;
|
||||||
THREAD_PREEMPTION_DISABLE_DISABLE;
|
|
||||||
TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED;
|
TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED;
|
||||||
TheStructure.Common.SharedLocalMemorySize =
|
TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED;
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
||||||
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
||||||
|
TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8;
|
||||||
|
TheStructure.Common.BtdMode = BTD_MODE_DISABLE;
|
||||||
}
|
}
|
||||||
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
||||||
INTERFACE_DESCRIPTOR_DATA state;
|
INTERFACE_DESCRIPTOR_DATA state;
|
||||||
|
@ -5186,7 +5202,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
inline uint32_t &getRawData(const uint32_t index) {
|
inline uint32_t &getRawData(const uint32_t index) {
|
||||||
DEBUG_BREAK_IF(index >= 8);
|
UNRECOVERABLE_IF(index >= 8);
|
||||||
return TheStructure.RawData[index];
|
return TheStructure.RawData[index];
|
||||||
}
|
}
|
||||||
typedef enum tagKERNELSTARTPOINTER {
|
typedef enum tagKERNELSTARTPOINTER {
|
||||||
|
@ -5194,35 +5210,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
||||||
} KERNELSTARTPOINTER;
|
} KERNELSTARTPOINTER;
|
||||||
inline void setKernelStartPointer(const uint64_t value) {
|
inline void setKernelStartPointer(const uint64_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.KernelStartPointer = static_cast<uint32_t>(value) >> KERNELSTARTPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getKernelStartPointer() const {
|
inline uint64_t getKernelStartPointer() const {
|
||||||
return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT);
|
return static_cast<uint64_t>(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched
|
||||||
}
|
}
|
||||||
inline void setKernelStartPointerHigh(const uint32_t value) {
|
inline void setSoftwareExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.KernelStartPointerHigh = value;
|
|
||||||
}
|
|
||||||
inline uint32_t getKernelStartPointerHigh() const {
|
|
||||||
return (TheStructure.Common.KernelStartPointerHigh);
|
|
||||||
}
|
|
||||||
inline void setSoftwareExceptionEnable(const uint32_t value) {
|
|
||||||
TheStructure.Common.SoftwareExceptionEnable = value;
|
TheStructure.Common.SoftwareExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getSoftwareExceptionEnable() const {
|
inline bool getSoftwareExceptionEnable() const {
|
||||||
return (TheStructure.Common.SoftwareExceptionEnable);
|
return TheStructure.Common.SoftwareExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setMaskStackExceptionEnable(const uint32_t value) {
|
inline void setMaskStackExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.MaskStackExceptionEnable = value;
|
TheStructure.Common.MaskStackExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getMaskStackExceptionEnable() const {
|
inline bool getMaskStackExceptionEnable() const {
|
||||||
return (TheStructure.Common.MaskStackExceptionEnable);
|
return TheStructure.Common.MaskStackExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setIllegalOpcodeExceptionEnable(const uint32_t value) {
|
inline void setIllegalOpcodeExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getIllegalOpcodeExceptionEnable() const {
|
inline bool getIllegalOpcodeExceptionEnable() const {
|
||||||
return (TheStructure.Common.IllegalOpcodeExceptionEnable);
|
return TheStructure.Common.IllegalOpcodeExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
||||||
TheStructure.Common.FloatingPointMode = value;
|
TheStructure.Common.FloatingPointMode = value;
|
||||||
|
@ -5258,35 +5267,34 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5,
|
SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5,
|
||||||
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20,
|
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20,
|
||||||
} SAMPLERSTATEPOINTER;
|
} SAMPLERSTATEPOINTER;
|
||||||
inline void setSamplerStatePointer(const uint64_t value) {
|
inline void setSamplerStatePointer(const uint32_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.SamplerStatePointer = static_cast<uint32_t>(value) >> SAMPLERSTATEPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getSamplerStatePointer() const {
|
inline uint32_t getSamplerStatePointer() const {
|
||||||
return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT);
|
return TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
inline void setBindingTableEntryCount(const uint32_t value) {
|
inline void setBindingTableEntryCount(const uint32_t value) {
|
||||||
TheStructure.Common.BindingTableEntryCount = value;
|
TheStructure.Common.BindingTableEntryCount = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTableEntryCount() const {
|
inline uint32_t getBindingTableEntryCount() const {
|
||||||
return (TheStructure.Common.BindingTableEntryCount);
|
return TheStructure.Common.BindingTableEntryCount;
|
||||||
}
|
}
|
||||||
typedef enum tagBINDINGTABLEPOINTER {
|
typedef enum tagBINDINGTABLEPOINTER {
|
||||||
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
||||||
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
||||||
} BINDINGTABLEPOINTER;
|
} BINDINGTABLEPOINTER;
|
||||||
inline void setBindingTablePointer(const uint64_t value) {
|
inline void setBindingTablePointer(const uint32_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.BindingTablePointer = static_cast<uint32_t>(value) >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTablePointer() const {
|
inline uint32_t getBindingTablePointer() const {
|
||||||
return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT);
|
return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
||||||
|
UNRECOVERABLE_IF(value > 0x3ff);
|
||||||
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
||||||
return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup);
|
return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup;
|
||||||
}
|
}
|
||||||
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
||||||
TheStructure.Common.SharedLocalMemorySize = value;
|
TheStructure.Common.SharedLocalMemorySize = value;
|
||||||
|
@ -5294,11 +5302,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
||||||
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
||||||
}
|
}
|
||||||
inline void setBarrierEnable(const uint32_t value) {
|
inline void setBarrierEnable(const bool value) {
|
||||||
TheStructure.Common.BarrierEnable = (value > 0u) ? 1u : 0u;
|
TheStructure.Common.BarrierEnable = value;
|
||||||
}
|
}
|
||||||
inline bool getBarrierEnable() const {
|
inline bool getBarrierEnable() const {
|
||||||
return (TheStructure.Common.BarrierEnable);
|
return TheStructure.Common.BarrierEnable;
|
||||||
}
|
}
|
||||||
inline void setRoundingMode(const ROUNDING_MODE value) {
|
inline void setRoundingMode(const ROUNDING_MODE value) {
|
||||||
TheStructure.Common.RoundingMode = value;
|
TheStructure.Common.RoundingMode = value;
|
||||||
|
@ -5306,11 +5314,17 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
inline ROUNDING_MODE getRoundingMode() const {
|
inline ROUNDING_MODE getRoundingMode() const {
|
||||||
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
||||||
}
|
}
|
||||||
inline void setThreadGroupDispatchSize(const uint32_t value) {
|
inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) {
|
||||||
TheStructure.Common.ThreadGroupDispatchSize = value;
|
TheStructure.Common.ThreadGroupDispatchSize = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getThreadGroupDispatchSize() const {
|
inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const {
|
||||||
return (TheStructure.Common.ThreadGroupDispatchSize);
|
return static_cast<THREAD_GROUP_DISPATCH_SIZE>(TheStructure.Common.ThreadGroupDispatchSize);
|
||||||
|
}
|
||||||
|
inline void setBtdMode(const BTD_MODE value) {
|
||||||
|
TheStructure.Common.BtdMode = value;
|
||||||
|
}
|
||||||
|
inline BTD_MODE getBtdMode() const {
|
||||||
|
return static_cast<BTD_MODE>(TheStructure.Common.BtdMode);
|
||||||
}
|
}
|
||||||
} INTERFACE_DESCRIPTOR_DATA;
|
} INTERFACE_DESCRIPTOR_DATA;
|
||||||
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -5152,49 +5152,46 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA));
|
||||||
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
union tagTheStructure {
|
union tagTheStructure {
|
||||||
struct tagCommon {
|
struct tagCommon {
|
||||||
uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5);
|
// DWORD 0
|
||||||
uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
uint64_t Reserved_0 : BITFIELD_RANGE(0, 5);
|
||||||
|
uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
||||||
uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15);
|
// DWORD 1
|
||||||
uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31);
|
uint64_t Reserved_32 : BITFIELD_RANGE(32, 63);
|
||||||
|
// DWORD 2
|
||||||
uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6);
|
uint32_t Reserved_64 : BITFIELD_RANGE(0, 6);
|
||||||
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
||||||
uint32_t RegistersPerThread : BITFIELD_RANGE(8, 10);
|
uint32_t Reserved_72 : BITFIELD_RANGE(8, 10);
|
||||||
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
||||||
uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12);
|
uint32_t Reserved_76 : BITFIELD_RANGE(12, 12);
|
||||||
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
||||||
uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15);
|
uint32_t Reserved_78 : BITFIELD_RANGE(14, 15);
|
||||||
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
||||||
uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17);
|
uint32_t Reserved_81 : BITFIELD_RANGE(17, 17);
|
||||||
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
||||||
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
||||||
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
||||||
uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_85 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 3
|
||||||
uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1);
|
uint32_t Reserved_96;
|
||||||
uint32_t SamplerCount : BITFIELD_RANGE(2, 4);
|
// DWORD 4
|
||||||
uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31);
|
|
||||||
|
|
||||||
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
||||||
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
||||||
uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_149 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 5
|
||||||
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
||||||
uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15);
|
uint32_t Reserved_170 : BITFIELD_RANGE(10, 15);
|
||||||
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
||||||
uint32_t Reserved_5_21_21 : BITFIELD_RANGE(21, 21);
|
uint32_t Reserved_181 : BITFIELD_RANGE(21, 21);
|
||||||
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
||||||
uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25);
|
uint32_t Reserved_184 : BITFIELD_RANGE(24, 25);
|
||||||
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
||||||
uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30);
|
uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30);
|
||||||
uint32_t Reserved_5_31_31 : BITFIELD_RANGE(31, 31);
|
uint32_t BtdMode : BITFIELD_RANGE(31, 31);
|
||||||
|
// DWORD 6
|
||||||
uint32_t PreferredSlmAllocationSizePerDss : BITFIELD_RANGE(0, 2);
|
uint32_t PreferredSlmAllocationSize : BITFIELD_RANGE(0, 3);
|
||||||
uint32_t PreferredSlmSizeOverride : BITFIELD_RANGE(3, 3);
|
uint32_t Reserved_196 : BITFIELD_RANGE(4, 31);
|
||||||
uint32_t Reserved_6_4_31 : BITFIELD_RANGE(4, 31);
|
// DWORD 7
|
||||||
|
uint32_t Reserved_224;
|
||||||
uint32_t Reserved_7;
|
|
||||||
} Common;
|
} Common;
|
||||||
uint32_t RawData[8];
|
uint32_t RawData[8];
|
||||||
} TheStructure;
|
} TheStructure;
|
||||||
|
@ -5214,13 +5211,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
THREAD_PREEMPTION_DISABLE_DISABLE = 0x0,
|
THREAD_PREEMPTION_DISABLE_DISABLE = 0x0,
|
||||||
THREAD_PREEMPTION_DISABLE_ENABLE = 0x1,
|
THREAD_PREEMPTION_DISABLE_ENABLE = 0x1,
|
||||||
} THREAD_PREEMPTION_DISABLE;
|
} THREAD_PREEMPTION_DISABLE;
|
||||||
typedef enum tagSAMPLER_COUNT {
|
typedef enum tagBINDING_TABLE_ENTRY_COUNT {
|
||||||
SAMPLER_COUNT_NO_SAMPLERS_USED = 0x0,
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0,
|
||||||
SAMPLER_COUNT_BETWEEN_1_AND_4_SAMPLERS_USED = 0x1,
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1,
|
||||||
SAMPLER_COUNT_BETWEEN_5_AND_8_SAMPLERS_USED = 0x2,
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f,
|
||||||
SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3,
|
} BINDING_TABLE_ENTRY_COUNT;
|
||||||
SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4,
|
|
||||||
} SAMPLER_COUNT;
|
|
||||||
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
||||||
|
@ -5228,12 +5223,12 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K = 0x3,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K = 0x3,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K = 0x4,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K = 0x4,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K = 0x5,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K = 0x5,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K = 0x8,
|
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K = 0x6,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K = 0x6,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K = 0x9,
|
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K = 0x7,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K = 0x7,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K = 0xA,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K = 0x8,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K = 0xB,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K = 0x9,
|
||||||
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K = 0xa,
|
||||||
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K = 0xb,
|
||||||
} SHARED_LOCAL_MEMORY_SIZE;
|
} SHARED_LOCAL_MEMORY_SIZE;
|
||||||
typedef enum tagROUNDING_MODE {
|
typedef enum tagROUNDING_MODE {
|
||||||
ROUNDING_MODE_RTNE = 0x0,
|
ROUNDING_MODE_RTNE = 0x0,
|
||||||
|
@ -5241,42 +5236,51 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
ROUNDING_MODE_RD = 0x2,
|
ROUNDING_MODE_RD = 0x2,
|
||||||
ROUNDING_MODE_RTZ = 0x3,
|
ROUNDING_MODE_RTZ = 0x3,
|
||||||
} ROUNDING_MODE;
|
} ROUNDING_MODE;
|
||||||
typedef enum tagREGISTERS_PER_THREAD {
|
typedef enum tagTHREAD_GROUP_DISPATCH_SIZE {
|
||||||
REGISTERS_PER_THREAD_DEFAULT = 0x0,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0,
|
||||||
REGISTERS_PER_THREAD_64_REGISTERS = 0x1,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1,
|
||||||
REGISTERS_PER_THREAD_96_REGISTERS = 0x2,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2,
|
||||||
REGISTERS_PER_THREAD_128_REGISTERS = 0x3,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3,
|
||||||
REGISTERS_PER_THREAD_160_REGISTERS = 0x4,
|
} THREAD_GROUP_DISPATCH_SIZE;
|
||||||
REGISTERS_PER_THREAD_192_REGISTERS = 0x5,
|
typedef enum tagNUMBER_OF_BARRIERS {
|
||||||
REGISTERS_PER_THREAD_256_REGISTERS = 0x6,
|
NUMBER_OF_BARRIERS_NONE = 0x0,
|
||||||
} REGISTERS_PER_THREAD;
|
NUMBER_OF_BARRIERS_B1 = 0x1,
|
||||||
typedef enum tagPREFERRED_SLM_SIZE_OVERRIDE {
|
NUMBER_OF_BARRIERS_B2 = 0x2,
|
||||||
PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED = 0x0,
|
NUMBER_OF_BARRIERS_B4 = 0x3,
|
||||||
PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED = 0x1,
|
NUMBER_OF_BARRIERS_B8 = 0x4,
|
||||||
} PREFERRED_SLM_SIZE_OVERRIDE;
|
NUMBER_OF_BARRIERS_B16 = 0x5,
|
||||||
typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE_PER_DSS {
|
NUMBER_OF_BARRIERS_B24 = 0x6,
|
||||||
PREFERRED_SLM_SIZE_IS_0K = 0x0,
|
NUMBER_OF_BARRIERS_B32 = 0x7,
|
||||||
PREFERRED_SLM_SIZE_IS_16K = 0x1,
|
} NUMBER_OF_BARRIERS;
|
||||||
PREFERRED_SLM_SIZE_IS_32K = 0x2,
|
typedef enum tagBTD_MODE {
|
||||||
PREFERRED_SLM_SIZE_IS_64K = 0x3,
|
BTD_MODE_DISABLE = 0x0,
|
||||||
PREFERRED_SLM_SIZE_IS_96K = 0x4,
|
BTD_MODE_ENABLE = 0x1,
|
||||||
PREFERRED_SLM_SIZE_IS_128K = 0x5,
|
} BTD_MODE;
|
||||||
} PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE {
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_MAX = 0x0,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_0K = 0x8,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_16K = 0x9,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_32K = 0xa,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_64K = 0xb,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_96K = 0xc,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_128K = 0xd,
|
||||||
|
} PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
|
typedef enum tagSAMPLERSTATEPOINTER {
|
||||||
|
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, // patched
|
||||||
|
} SAMPLERSTATEPOINTER;
|
||||||
inline void init() {
|
inline void init() {
|
||||||
memset(&TheStructure, 0, sizeof(TheStructure));
|
memset(&TheStructure, 0, sizeof(TheStructure));
|
||||||
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
||||||
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
||||||
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
||||||
TheStructure.Common.ThreadPreemptionDisable =
|
TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE;
|
||||||
THREAD_PREEMPTION_DISABLE_DISABLE;
|
TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED;
|
||||||
TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED;
|
TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
||||||
TheStructure.Common.SharedLocalMemorySize =
|
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
|
||||||
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
||||||
TheStructure.Common.RegistersPerThread = REGISTERS_PER_THREAD_DEFAULT;
|
TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8;
|
||||||
TheStructure.Common.PreferredSlmSizeOverride = PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED;
|
TheStructure.Common.NumberOfBarriers = NUMBER_OF_BARRIERS_NONE;
|
||||||
TheStructure.Common.PreferredSlmAllocationSizePerDss = PREFERRED_SLM_SIZE_IS_0K;
|
TheStructure.Common.BtdMode = BTD_MODE_DISABLE;
|
||||||
|
TheStructure.Common.PreferredSlmAllocationSize = PREFERRED_SLM_ALLOCATION_SIZE_MAX;
|
||||||
}
|
}
|
||||||
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
||||||
INTERFACE_DESCRIPTOR_DATA state;
|
INTERFACE_DESCRIPTOR_DATA state;
|
||||||
|
@ -5284,7 +5288,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
inline uint32_t &getRawData(const uint32_t index) {
|
inline uint32_t &getRawData(const uint32_t index) {
|
||||||
DEBUG_BREAK_IF(index >= 8);
|
UNRECOVERABLE_IF(index >= 8);
|
||||||
return TheStructure.RawData[index];
|
return TheStructure.RawData[index];
|
||||||
}
|
}
|
||||||
typedef enum tagKERNELSTARTPOINTER {
|
typedef enum tagKERNELSTARTPOINTER {
|
||||||
|
@ -5292,35 +5296,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
||||||
} KERNELSTARTPOINTER;
|
} KERNELSTARTPOINTER;
|
||||||
inline void setKernelStartPointer(const uint64_t value) {
|
inline void setKernelStartPointer(const uint64_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.KernelStartPointer = static_cast<uint32_t>(value) >> KERNELSTARTPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getKernelStartPointer() const {
|
inline uint64_t getKernelStartPointer() const {
|
||||||
return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT);
|
return static_cast<uint64_t>(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched
|
||||||
}
|
}
|
||||||
inline void setKernelStartPointerHigh(const uint32_t value) {
|
inline void setSoftwareExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.KernelStartPointerHigh = value;
|
|
||||||
}
|
|
||||||
inline uint32_t getKernelStartPointerHigh() const {
|
|
||||||
return (TheStructure.Common.KernelStartPointerHigh);
|
|
||||||
}
|
|
||||||
inline void setSoftwareExceptionEnable(const uint32_t value) {
|
|
||||||
TheStructure.Common.SoftwareExceptionEnable = value;
|
TheStructure.Common.SoftwareExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getSoftwareExceptionEnable() const {
|
inline bool getSoftwareExceptionEnable() const {
|
||||||
return (TheStructure.Common.SoftwareExceptionEnable);
|
return TheStructure.Common.SoftwareExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setMaskStackExceptionEnable(const uint32_t value) {
|
inline void setMaskStackExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.MaskStackExceptionEnable = value;
|
TheStructure.Common.MaskStackExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getMaskStackExceptionEnable() const {
|
inline bool getMaskStackExceptionEnable() const {
|
||||||
return (TheStructure.Common.MaskStackExceptionEnable);
|
return TheStructure.Common.MaskStackExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setIllegalOpcodeExceptionEnable(const uint32_t value) {
|
inline void setIllegalOpcodeExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getIllegalOpcodeExceptionEnable() const {
|
inline bool getIllegalOpcodeExceptionEnable() const {
|
||||||
return (TheStructure.Common.IllegalOpcodeExceptionEnable);
|
return TheStructure.Common.IllegalOpcodeExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
||||||
TheStructure.Common.FloatingPointMode = value;
|
TheStructure.Common.FloatingPointMode = value;
|
||||||
|
@ -5346,46 +5343,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
inline THREAD_PREEMPTION_DISABLE getThreadPreemptionDisable() const {
|
inline THREAD_PREEMPTION_DISABLE getThreadPreemptionDisable() const {
|
||||||
return static_cast<THREAD_PREEMPTION_DISABLE>(TheStructure.Common.ThreadPreemptionDisable);
|
return static_cast<THREAD_PREEMPTION_DISABLE>(TheStructure.Common.ThreadPreemptionDisable);
|
||||||
}
|
}
|
||||||
inline void setSamplerCount(const SAMPLER_COUNT value) {
|
|
||||||
TheStructure.Common.SamplerCount = value;
|
|
||||||
}
|
|
||||||
inline SAMPLER_COUNT getSamplerCount() const {
|
|
||||||
return static_cast<SAMPLER_COUNT>(TheStructure.Common.SamplerCount);
|
|
||||||
}
|
|
||||||
typedef enum tagSAMPLERSTATEPOINTER {
|
|
||||||
SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5,
|
|
||||||
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20,
|
|
||||||
} SAMPLERSTATEPOINTER;
|
|
||||||
inline void setSamplerStatePointer(const uint64_t value) {
|
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
|
||||||
TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
|
||||||
inline uint32_t getSamplerStatePointer() const {
|
|
||||||
return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT);
|
|
||||||
}
|
|
||||||
inline void setBindingTableEntryCount(const uint32_t value) {
|
inline void setBindingTableEntryCount(const uint32_t value) {
|
||||||
TheStructure.Common.BindingTableEntryCount = value;
|
TheStructure.Common.BindingTableEntryCount = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTableEntryCount() const {
|
inline uint32_t getBindingTableEntryCount() const {
|
||||||
return (TheStructure.Common.BindingTableEntryCount);
|
return TheStructure.Common.BindingTableEntryCount;
|
||||||
}
|
}
|
||||||
typedef enum tagBINDINGTABLEPOINTER {
|
typedef enum tagBINDINGTABLEPOINTER {
|
||||||
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
||||||
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
||||||
} BINDINGTABLEPOINTER;
|
} BINDINGTABLEPOINTER;
|
||||||
inline void setBindingTablePointer(const uint64_t value) {
|
inline void setBindingTablePointer(const uint32_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.BindingTablePointer = static_cast<uint32_t>(value) >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTablePointer() const {
|
inline uint32_t getBindingTablePointer() const {
|
||||||
return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT);
|
return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
||||||
UNRECOVERABLE_IF(value > 128);
|
UNRECOVERABLE_IF(value > 0x3ff);
|
||||||
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
||||||
return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup);
|
return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup;
|
||||||
}
|
}
|
||||||
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
||||||
TheStructure.Common.SharedLocalMemorySize = value;
|
TheStructure.Common.SharedLocalMemorySize = value;
|
||||||
|
@ -5393,41 +5372,35 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
||||||
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
||||||
}
|
}
|
||||||
inline void setNumberOfBarriers(const uint32_t value) {
|
|
||||||
TheStructure.Common.NumberOfBarriers = value;
|
|
||||||
}
|
|
||||||
inline uint32_t getNumberOfBarriers() const {
|
|
||||||
return TheStructure.Common.NumberOfBarriers;
|
|
||||||
}
|
|
||||||
inline void setRoundingMode(const ROUNDING_MODE value) {
|
inline void setRoundingMode(const ROUNDING_MODE value) {
|
||||||
TheStructure.Common.RoundingMode = value;
|
TheStructure.Common.RoundingMode = value;
|
||||||
}
|
}
|
||||||
inline ROUNDING_MODE getRoundingMode() const {
|
inline ROUNDING_MODE getRoundingMode() const {
|
||||||
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
||||||
}
|
}
|
||||||
inline void setThreadGroupDispatchSize(const uint32_t value) {
|
inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) {
|
||||||
TheStructure.Common.ThreadGroupDispatchSize = value;
|
TheStructure.Common.ThreadGroupDispatchSize = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getThreadGroupDispatchSize() const {
|
inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const {
|
||||||
return (TheStructure.Common.ThreadGroupDispatchSize);
|
return static_cast<THREAD_GROUP_DISPATCH_SIZE>(TheStructure.Common.ThreadGroupDispatchSize);
|
||||||
}
|
}
|
||||||
inline void setRegistersPerThread(const REGISTERS_PER_THREAD value) {
|
inline void setNumberOfBarriers(const NUMBER_OF_BARRIERS value) {
|
||||||
TheStructure.Common.RegistersPerThread = value;
|
TheStructure.Common.NumberOfBarriers = value;
|
||||||
}
|
}
|
||||||
inline REGISTERS_PER_THREAD getRegistersPerThread() const {
|
inline NUMBER_OF_BARRIERS getNumberOfBarriers() const {
|
||||||
return static_cast<REGISTERS_PER_THREAD>(TheStructure.Common.RegistersPerThread);
|
return static_cast<NUMBER_OF_BARRIERS>(TheStructure.Common.NumberOfBarriers);
|
||||||
}
|
}
|
||||||
inline void setPreferredSlmSizeOverride(const PREFERRED_SLM_SIZE_OVERRIDE value) {
|
inline void setBtdMode(const BTD_MODE value) {
|
||||||
TheStructure.Common.PreferredSlmSizeOverride = value;
|
TheStructure.Common.BtdMode = value;
|
||||||
}
|
}
|
||||||
inline PREFERRED_SLM_SIZE_OVERRIDE getPreferredSlmSizeOverride() const {
|
inline BTD_MODE getBtdMode() const {
|
||||||
return static_cast<PREFERRED_SLM_SIZE_OVERRIDE>(TheStructure.Common.PreferredSlmSizeOverride);
|
return static_cast<BTD_MODE>(TheStructure.Common.BtdMode);
|
||||||
}
|
}
|
||||||
inline void setPreferredSlmAllocationSizePerDss(const PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS value) {
|
inline void setPreferredSlmAllocationSize(const PREFERRED_SLM_ALLOCATION_SIZE value) {
|
||||||
TheStructure.Common.PreferredSlmAllocationSizePerDss = value;
|
TheStructure.Common.PreferredSlmAllocationSize = value;
|
||||||
}
|
}
|
||||||
inline PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS getPreferredSlmAllocationSizePerDss() const {
|
inline PREFERRED_SLM_ALLOCATION_SIZE getPreferredSlmAllocationSize() const {
|
||||||
return static_cast<PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS>(TheStructure.Common.PreferredSlmAllocationSizePerDss);
|
return static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(TheStructure.Common.PreferredSlmAllocationSize);
|
||||||
}
|
}
|
||||||
} INTERFACE_DESCRIPTOR_DATA;
|
} INTERFACE_DESCRIPTOR_DATA;
|
||||||
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -5150,49 +5150,48 @@ STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA));
|
||||||
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
union tagTheStructure {
|
union tagTheStructure {
|
||||||
struct tagCommon {
|
struct tagCommon {
|
||||||
uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5);
|
// DWORD 0
|
||||||
uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
uint64_t Reserved_0 : BITFIELD_RANGE(0, 5);
|
||||||
|
uint64_t KernelStartPointer : BITFIELD_RANGE(6, 31);
|
||||||
uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15);
|
// DWORD 1
|
||||||
uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31);
|
uint64_t Reserved_32 : BITFIELD_RANGE(32, 63);
|
||||||
|
// DWORD 2
|
||||||
uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6);
|
uint32_t Reserved_64 : BITFIELD_RANGE(0, 6);
|
||||||
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7);
|
||||||
uint32_t Reserved_2_8_10 : BITFIELD_RANGE(8, 10);
|
uint32_t Reserved_72 : BITFIELD_RANGE(8, 10);
|
||||||
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11);
|
||||||
uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12);
|
uint32_t Reserved_76 : BITFIELD_RANGE(12, 12);
|
||||||
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13);
|
||||||
uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15);
|
uint32_t Reserved_78 : BITFIELD_RANGE(14, 15);
|
||||||
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16);
|
||||||
uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17);
|
uint32_t Reserved_81 : BITFIELD_RANGE(17, 17);
|
||||||
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18);
|
||||||
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
uint32_t DenormMode : BITFIELD_RANGE(19, 19);
|
||||||
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20);
|
||||||
uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_85 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 3
|
||||||
uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1);
|
uint32_t Reserved_96 : BITFIELD_RANGE(0, 1);
|
||||||
uint32_t SamplerCount : BITFIELD_RANGE(2, 4);
|
uint32_t SamplerCount : BITFIELD_RANGE(2, 4);
|
||||||
uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31);
|
uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31);
|
||||||
|
// DWORD 4
|
||||||
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4);
|
||||||
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20);
|
||||||
uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31);
|
uint32_t Reserved_149 : BITFIELD_RANGE(21, 31);
|
||||||
|
// DWORD 5
|
||||||
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9);
|
||||||
uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15);
|
uint32_t Reserved_170 : BITFIELD_RANGE(10, 15);
|
||||||
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20);
|
||||||
uint32_t BarrierEnable : BITFIELD_RANGE(21, 21);
|
uint32_t Reserved_181 : BITFIELD_RANGE(21, 21);
|
||||||
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
uint32_t RoundingMode : BITFIELD_RANGE(22, 23);
|
||||||
uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25);
|
uint32_t Reserved_184 : BITFIELD_RANGE(24, 25);
|
||||||
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27);
|
||||||
uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30);
|
uint32_t NumberOfBarriers : BITFIELD_RANGE(28, 30);
|
||||||
uint32_t Reserved_5_31_31 : BITFIELD_RANGE(31, 31);
|
uint32_t BtdMode : BITFIELD_RANGE(31, 31);
|
||||||
|
// DWORD 6
|
||||||
uint32_t PreferredSlmAllocationSizePerDss : BITFIELD_RANGE(0, 2);
|
uint32_t PreferredSlmAllocationSize : BITFIELD_RANGE(0, 3);
|
||||||
uint32_t PreferredSlmSizeOverride : BITFIELD_RANGE(3, 3);
|
uint32_t Reserved_196 : BITFIELD_RANGE(4, 31);
|
||||||
uint32_t Reserved_6_4_31 : BITFIELD_RANGE(4, 31);
|
// DWORD 7
|
||||||
|
uint32_t Reserved_224;
|
||||||
uint32_t Reserved_7;
|
|
||||||
} Common;
|
} Common;
|
||||||
uint32_t RawData[8];
|
uint32_t RawData[8];
|
||||||
} TheStructure;
|
} TheStructure;
|
||||||
|
@ -5219,6 +5218,11 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3,
|
SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3,
|
||||||
SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4,
|
SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4,
|
||||||
} SAMPLER_COUNT;
|
} SAMPLER_COUNT;
|
||||||
|
typedef enum tagBINDING_TABLE_ENTRY_COUNT {
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED = 0x0,
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MIN = 0x1,
|
||||||
|
BINDING_TABLE_ENTRY_COUNT_PREFETCH_COUNT_MAX = 0x1f,
|
||||||
|
} BINDING_TABLE_ENTRY_COUNT;
|
||||||
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
typedef enum tagSHARED_LOCAL_MEMORY_SIZE {
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0,
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1,
|
||||||
|
@ -5235,32 +5239,43 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
ROUNDING_MODE_RD = 0x2,
|
ROUNDING_MODE_RD = 0x2,
|
||||||
ROUNDING_MODE_RTZ = 0x3,
|
ROUNDING_MODE_RTZ = 0x3,
|
||||||
} ROUNDING_MODE;
|
} ROUNDING_MODE;
|
||||||
typedef enum tagPREFERRED_SLM_SIZE_OVERRIDE {
|
typedef enum tagTHREAD_GROUP_DISPATCH_SIZE {
|
||||||
PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED = 0x0,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8 = 0x0,
|
||||||
PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED = 0x1,
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4 = 0x1,
|
||||||
} PREFERRED_SLM_SIZE_OVERRIDE;
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2 = 0x2,
|
||||||
typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE_PER_DSS {
|
THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 = 0x3,
|
||||||
PREFERRED_SLM_SIZE_IS_0K = 0x0,
|
} THREAD_GROUP_DISPATCH_SIZE;
|
||||||
PREFERRED_SLM_SIZE_IS_16K = 0x1,
|
typedef enum tagNUMBER_OF_BARRIERS {
|
||||||
PREFERRED_SLM_SIZE_IS_32K = 0x2,
|
NUMBER_OF_BARRIERS_NONE = 0x0,
|
||||||
PREFERRED_SLM_SIZE_IS_64K = 0x3,
|
NUMBER_OF_BARRIERS_B1 = 0x1,
|
||||||
PREFERRED_SLM_SIZE_IS_96K = 0x4,
|
} NUMBER_OF_BARRIERS;
|
||||||
PREFERRED_SLM_SIZE_IS_128K = 0x5,
|
typedef enum tagBTD_MODE {
|
||||||
} PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
BTD_MODE_DISABLE = 0x0,
|
||||||
|
BTD_MODE_ENABLE = 0x1,
|
||||||
|
} BTD_MODE;
|
||||||
|
typedef enum tagPREFERRED_SLM_ALLOCATION_SIZE {
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_MAX = 0x0,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_0K = 0x8,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_16K = 0x9,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_32K = 0xa,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_64K = 0xb,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_96K = 0xc,
|
||||||
|
PREFERRED_SLM_ALLOCATION_SIZE_128K = 0xd,
|
||||||
|
} PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
inline void init() {
|
inline void init() {
|
||||||
memset(&TheStructure, 0, sizeof(TheStructure));
|
memset(&TheStructure, 0, sizeof(TheStructure));
|
||||||
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754;
|
||||||
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE;
|
||||||
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
TheStructure.Common.DenormMode = DENORM_MODE_FTZ;
|
||||||
TheStructure.Common.ThreadPreemptionDisable =
|
TheStructure.Common.ThreadPreemptionDisable = THREAD_PREEMPTION_DISABLE_DISABLE;
|
||||||
THREAD_PREEMPTION_DISABLE_DISABLE;
|
|
||||||
TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED;
|
TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED;
|
||||||
TheStructure.Common.SharedLocalMemorySize =
|
TheStructure.Common.BindingTableEntryCount = BINDING_TABLE_ENTRY_COUNT_PREFETCH_DISABLED;
|
||||||
SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
TheStructure.Common.SharedLocalMemorySize = SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
||||||
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE;
|
||||||
TheStructure.Common.PreferredSlmSizeOverride = PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED;
|
TheStructure.Common.ThreadGroupDispatchSize = THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8;
|
||||||
TheStructure.Common.PreferredSlmAllocationSizePerDss = PREFERRED_SLM_SIZE_IS_0K;
|
TheStructure.Common.NumberOfBarriers = NUMBER_OF_BARRIERS_NONE;
|
||||||
|
TheStructure.Common.BtdMode = BTD_MODE_DISABLE;
|
||||||
|
TheStructure.Common.PreferredSlmAllocationSize = PREFERRED_SLM_ALLOCATION_SIZE_MAX;
|
||||||
}
|
}
|
||||||
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
static tagINTERFACE_DESCRIPTOR_DATA sInit() {
|
||||||
INTERFACE_DESCRIPTOR_DATA state;
|
INTERFACE_DESCRIPTOR_DATA state;
|
||||||
|
@ -5268,7 +5283,7 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
inline uint32_t &getRawData(const uint32_t index) {
|
inline uint32_t &getRawData(const uint32_t index) {
|
||||||
DEBUG_BREAK_IF(index >= 8);
|
UNRECOVERABLE_IF(index >= 8);
|
||||||
return TheStructure.RawData[index];
|
return TheStructure.RawData[index];
|
||||||
}
|
}
|
||||||
typedef enum tagKERNELSTARTPOINTER {
|
typedef enum tagKERNELSTARTPOINTER {
|
||||||
|
@ -5276,35 +5291,28 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
KERNELSTARTPOINTER_ALIGN_SIZE = 0x40,
|
||||||
} KERNELSTARTPOINTER;
|
} KERNELSTARTPOINTER;
|
||||||
inline void setKernelStartPointer(const uint64_t value) {
|
inline void setKernelStartPointer(const uint64_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.KernelStartPointer = static_cast<uint32_t>(value) >> KERNELSTARTPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getKernelStartPointer() const {
|
inline uint64_t getKernelStartPointer() const {
|
||||||
return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT);
|
return static_cast<uint64_t>(TheStructure.Common.KernelStartPointer) << KERNELSTARTPOINTER_BIT_SHIFT; // patched
|
||||||
}
|
}
|
||||||
inline void setKernelStartPointerHigh(const uint32_t value) {
|
inline void setSoftwareExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.KernelStartPointerHigh = value;
|
|
||||||
}
|
|
||||||
inline uint32_t getKernelStartPointerHigh() const {
|
|
||||||
return (TheStructure.Common.KernelStartPointerHigh);
|
|
||||||
}
|
|
||||||
inline void setSoftwareExceptionEnable(const uint32_t value) {
|
|
||||||
TheStructure.Common.SoftwareExceptionEnable = value;
|
TheStructure.Common.SoftwareExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getSoftwareExceptionEnable() const {
|
inline bool getSoftwareExceptionEnable() const {
|
||||||
return (TheStructure.Common.SoftwareExceptionEnable);
|
return TheStructure.Common.SoftwareExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setMaskStackExceptionEnable(const uint32_t value) {
|
inline void setMaskStackExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.MaskStackExceptionEnable = value;
|
TheStructure.Common.MaskStackExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getMaskStackExceptionEnable() const {
|
inline bool getMaskStackExceptionEnable() const {
|
||||||
return (TheStructure.Common.MaskStackExceptionEnable);
|
return TheStructure.Common.MaskStackExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setIllegalOpcodeExceptionEnable(const uint32_t value) {
|
inline void setIllegalOpcodeExceptionEnable(const bool value) {
|
||||||
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
TheStructure.Common.IllegalOpcodeExceptionEnable = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getIllegalOpcodeExceptionEnable() const {
|
inline bool getIllegalOpcodeExceptionEnable() const {
|
||||||
return (TheStructure.Common.IllegalOpcodeExceptionEnable);
|
return TheStructure.Common.IllegalOpcodeExceptionEnable;
|
||||||
}
|
}
|
||||||
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
inline void setFloatingPointMode(const FLOATING_POINT_MODE value) {
|
||||||
TheStructure.Common.FloatingPointMode = value;
|
TheStructure.Common.FloatingPointMode = value;
|
||||||
|
@ -5340,35 +5348,34 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5,
|
SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5,
|
||||||
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20,
|
SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20,
|
||||||
} SAMPLERSTATEPOINTER;
|
} SAMPLERSTATEPOINTER;
|
||||||
inline void setSamplerStatePointer(const uint64_t value) {
|
inline void setSamplerStatePointer(const uint32_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.SamplerStatePointer = static_cast<uint32_t>(value) >> SAMPLERSTATEPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getSamplerStatePointer() const {
|
inline uint32_t getSamplerStatePointer() const {
|
||||||
return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT);
|
return TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
inline void setBindingTableEntryCount(const uint32_t value) {
|
inline void setBindingTableEntryCount(const uint32_t value) {
|
||||||
TheStructure.Common.BindingTableEntryCount = value;
|
TheStructure.Common.BindingTableEntryCount = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTableEntryCount() const {
|
inline uint32_t getBindingTableEntryCount() const {
|
||||||
return (TheStructure.Common.BindingTableEntryCount);
|
return TheStructure.Common.BindingTableEntryCount;
|
||||||
}
|
}
|
||||||
typedef enum tagBINDINGTABLEPOINTER {
|
typedef enum tagBINDINGTABLEPOINTER {
|
||||||
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
BINDINGTABLEPOINTER_BIT_SHIFT = 0x5,
|
||||||
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20,
|
||||||
} BINDINGTABLEPOINTER;
|
} BINDINGTABLEPOINTER;
|
||||||
inline void setBindingTablePointer(const uint64_t value) {
|
inline void setBindingTablePointer(const uint32_t value) {
|
||||||
DEBUG_BREAK_IF(value >= 0x100000000);
|
TheStructure.Common.BindingTablePointer = static_cast<uint32_t>(value) >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT;
|
|
||||||
}
|
}
|
||||||
inline uint32_t getBindingTablePointer() const {
|
inline uint32_t getBindingTablePointer() const {
|
||||||
return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT);
|
return TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) {
|
||||||
|
UNRECOVERABLE_IF(value > 0x3ff);
|
||||||
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
inline uint32_t getNumberOfThreadsInGpgpuThreadGroup() const {
|
||||||
return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup);
|
return TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup;
|
||||||
}
|
}
|
||||||
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) {
|
||||||
TheStructure.Common.SharedLocalMemorySize = value;
|
TheStructure.Common.SharedLocalMemorySize = value;
|
||||||
|
@ -5376,41 +5383,35 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA {
|
||||||
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize() const {
|
||||||
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
return static_cast<SHARED_LOCAL_MEMORY_SIZE>(TheStructure.Common.SharedLocalMemorySize);
|
||||||
}
|
}
|
||||||
inline void setBarrierEnable(const uint32_t value) {
|
|
||||||
TheStructure.Common.BarrierEnable = (value > 0u) ? 1u : 0u;
|
|
||||||
}
|
|
||||||
inline void setNumberOfBarriers(const uint32_t value) {
|
|
||||||
TheStructure.Common.NumberOfBarriers = value;
|
|
||||||
}
|
|
||||||
inline bool getBarrierEnable() const {
|
|
||||||
return (TheStructure.Common.BarrierEnable);
|
|
||||||
}
|
|
||||||
inline uint32_t getNumberOfBarriers() const {
|
|
||||||
return TheStructure.Common.NumberOfBarriers;
|
|
||||||
}
|
|
||||||
inline void setRoundingMode(const ROUNDING_MODE value) {
|
inline void setRoundingMode(const ROUNDING_MODE value) {
|
||||||
TheStructure.Common.RoundingMode = value;
|
TheStructure.Common.RoundingMode = value;
|
||||||
}
|
}
|
||||||
inline ROUNDING_MODE getRoundingMode() const {
|
inline ROUNDING_MODE getRoundingMode() const {
|
||||||
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
return static_cast<ROUNDING_MODE>(TheStructure.Common.RoundingMode);
|
||||||
}
|
}
|
||||||
inline void setThreadGroupDispatchSize(const uint32_t value) {
|
inline void setThreadGroupDispatchSize(const THREAD_GROUP_DISPATCH_SIZE value) {
|
||||||
TheStructure.Common.ThreadGroupDispatchSize = value;
|
TheStructure.Common.ThreadGroupDispatchSize = value;
|
||||||
}
|
}
|
||||||
inline uint32_t getThreadGroupDispatchSize() const {
|
inline THREAD_GROUP_DISPATCH_SIZE getThreadGroupDispatchSize() const {
|
||||||
return (TheStructure.Common.ThreadGroupDispatchSize);
|
return static_cast<THREAD_GROUP_DISPATCH_SIZE>(TheStructure.Common.ThreadGroupDispatchSize);
|
||||||
}
|
}
|
||||||
inline void setPreferredSlmSizeOverride(const PREFERRED_SLM_SIZE_OVERRIDE value) {
|
inline void setNumberOfBarriers(const NUMBER_OF_BARRIERS value) {
|
||||||
TheStructure.Common.PreferredSlmSizeOverride = value;
|
TheStructure.Common.NumberOfBarriers = value;
|
||||||
}
|
}
|
||||||
inline PREFERRED_SLM_SIZE_OVERRIDE getPreferredSlmSizeOverride() const {
|
inline NUMBER_OF_BARRIERS getNumberOfBarriers() const {
|
||||||
return static_cast<PREFERRED_SLM_SIZE_OVERRIDE>(TheStructure.Common.PreferredSlmSizeOverride);
|
return static_cast<NUMBER_OF_BARRIERS>(TheStructure.Common.NumberOfBarriers);
|
||||||
}
|
}
|
||||||
inline void setPreferredSlmAllocationSizePerDss(const PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS value) {
|
inline void setBtdMode(const BTD_MODE value) {
|
||||||
TheStructure.Common.PreferredSlmAllocationSizePerDss = value;
|
TheStructure.Common.BtdMode = value;
|
||||||
}
|
}
|
||||||
inline PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS getPreferredSlmAllocationSizePerDss() const {
|
inline BTD_MODE getBtdMode() const {
|
||||||
return static_cast<PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS>(TheStructure.Common.PreferredSlmAllocationSizePerDss);
|
return static_cast<BTD_MODE>(TheStructure.Common.BtdMode);
|
||||||
|
}
|
||||||
|
inline void setPreferredSlmAllocationSize(const PREFERRED_SLM_ALLOCATION_SIZE value) {
|
||||||
|
TheStructure.Common.PreferredSlmAllocationSize = value;
|
||||||
|
}
|
||||||
|
inline PREFERRED_SLM_ALLOCATION_SIZE getPreferredSlmAllocationSize() const {
|
||||||
|
return static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(TheStructure.Common.PreferredSlmAllocationSize);
|
||||||
}
|
}
|
||||||
} INTERFACE_DESCRIPTOR_DATA;
|
} INTERFACE_DESCRIPTOR_DATA;
|
||||||
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -44,11 +44,12 @@ template <>
|
||||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
||||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||||
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(3u);
|
interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get());
|
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(
|
||||||
|
DebugManager.flags.ForceThreadGroupDispatchSize.get()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -29,6 +29,7 @@ struct XeHpCore {
|
||||||
|
|
||||||
static constexpr bool isUsingL3Control = true;
|
static constexpr bool isUsingL3Control = true;
|
||||||
static constexpr bool isUsingMediaSamplerDopClockGate = true;
|
static constexpr bool isUsingMediaSamplerDopClockGate = true;
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
|
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
union {
|
union {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -34,11 +34,12 @@ template <>
|
||||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
||||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||||
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(3u);
|
interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get());
|
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(
|
||||||
|
DebugManager.flags.ForceThreadGroupDispatchSize.get()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,7 +170,7 @@ template <>
|
||||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
|
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
|
||||||
uint32_t value,
|
uint32_t value,
|
||||||
const HardwareInfo &hwInfo) {
|
const HardwareInfo &hwInfo) {
|
||||||
interfaceDescriptor.setNumberOfBarriers(value);
|
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -200,8 +201,7 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
|
||||||
|
|
||||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||||
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||||
|
@ -221,18 +221,18 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||||
|
|
||||||
struct SizeToPreferredSlmValue {
|
struct SizeToPreferredSlmValue {
|
||||||
uint32_t upperLimit;
|
uint32_t upperLimit;
|
||||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS valueToProgram;
|
PREFERRED_SLM_ALLOCATION_SIZE valueToProgram;
|
||||||
};
|
};
|
||||||
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
||||||
// upper limit, retVal
|
// upper limit, retVal
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||||
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K},
|
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||||
}};
|
}};
|
||||||
|
|
||||||
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K;
|
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K;
|
||||||
for (auto &range : ranges) {
|
for (auto &range : ranges) {
|
||||||
if (slmSize <= range.upperLimit) {
|
if (slmSize <= range.upperLimit) {
|
||||||
programmableIdPreferredSlmSize = range.valueToProgram;
|
programmableIdPreferredSlmSize = range.valueToProgram;
|
||||||
|
@ -241,16 +241,29 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((slmSize == 0) && (Family::isXlA0(hwInfo))) {
|
if ((slmSize == 0) && (Family::isXlA0(hwInfo))) {
|
||||||
programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K;
|
programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K;
|
||||||
}
|
}
|
||||||
|
|
||||||
pInterfaceDescriptor->setPreferredSlmSizeOverride(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED);
|
pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize);
|
||||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(programmableIdPreferredSlmSize);
|
|
||||||
|
|
||||||
if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
||||||
auto toProgram =
|
auto toProgram =
|
||||||
static_cast<PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS>(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
||||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(toProgram);
|
pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
|
||||||
|
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
|
||||||
|
if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) {
|
||||||
|
enablePrefetch = static_cast<bool>(DebugManager.flags.ForceBtpPrefetchMode.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enablePrefetch) {
|
||||||
|
interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));
|
||||||
|
} else {
|
||||||
|
interfaceDescriptor.setBindingTableEntryCount(0u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -35,6 +35,7 @@ struct XE_HPC_CORE {
|
||||||
|
|
||||||
static constexpr bool isUsingL3Control = false;
|
static constexpr bool isUsingL3Control = false;
|
||||||
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
||||||
|
static constexpr bool supportsSampler = false;
|
||||||
|
|
||||||
static bool isXlA0(const HardwareInfo &hwInfo) {
|
static bool isXlA0(const HardwareInfo &hwInfo) {
|
||||||
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;
|
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -33,8 +33,7 @@ void EncodeDispatchKernel<Family>::adjustTimestampPacket(WALKER_TYPE &walkerCmd,
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
|
||||||
|
|
||||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||||
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
|
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
|
||||||
|
@ -54,18 +53,18 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||||
|
|
||||||
struct SizeToPreferredSlmValue {
|
struct SizeToPreferredSlmValue {
|
||||||
uint32_t upperLimit;
|
uint32_t upperLimit;
|
||||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS valueToProgram;
|
PREFERRED_SLM_ALLOCATION_SIZE valueToProgram;
|
||||||
};
|
};
|
||||||
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
||||||
// upper limit, retVal
|
// upper limit, retVal
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||||
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K},
|
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||||
}};
|
}};
|
||||||
|
|
||||||
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K;
|
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K;
|
||||||
for (auto &range : ranges) {
|
for (auto &range : ranges) {
|
||||||
if (slmSize <= range.upperLimit) {
|
if (slmSize <= range.upperLimit) {
|
||||||
programmableIdPreferredSlmSize = range.valueToProgram;
|
programmableIdPreferredSlmSize = range.valueToProgram;
|
||||||
|
@ -73,18 +72,16 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pInterfaceDescriptor->setPreferredSlmSizeOverride(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED);
|
|
||||||
|
|
||||||
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAllocationSizeAdjustmentRequired(hwInfo)) {
|
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAllocationSizeAdjustmentRequired(hwInfo)) {
|
||||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K);
|
pInterfaceDescriptor->setPreferredSlmAllocationSize(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K);
|
||||||
} else {
|
} else {
|
||||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(programmableIdPreferredSlmSize);
|
pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
||||||
auto toProgram =
|
auto toProgram =
|
||||||
static_cast<PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS>(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
||||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(toProgram);
|
pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,20 +90,21 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
|
||||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||||
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
||||||
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
|
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(2u);
|
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
|
||||||
} else {
|
} else {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(3u);
|
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
||||||
interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get());
|
interfaceDescriptor.setThreadGroupDispatchSize(
|
||||||
|
static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(DebugManager.flags.ForceThreadGroupDispatchSize.get()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
|
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
|
||||||
interfaceDescriptor.setNumberOfBarriers(value);
|
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -29,6 +29,7 @@ struct XE_HPG_CORE {
|
||||||
|
|
||||||
static constexpr bool isUsingL3Control = true;
|
static constexpr bool isUsingL3Control = true;
|
||||||
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
||||||
|
static constexpr bool supportsSampler = true;
|
||||||
|
|
||||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||||
union {
|
union {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -18,8 +18,7 @@ using CommandEncodeStatesPvcTest = ::testing::Test;
|
||||||
|
|
||||||
PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) {
|
PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) {
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
|
||||||
|
|
||||||
HardwareInfo hwInfo = *defaultHwInfo;
|
HardwareInfo hwInfo = *defaultHwInfo;
|
||||||
uint32_t threadsCount = 1;
|
uint32_t threadsCount = 1;
|
||||||
|
@ -42,9 +41,9 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
||||||
if (revisionToTest.isWaRequired) {
|
if (revisionToTest.isWaRequired) {
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K, idd.getPreferredSlmAllocationSize());
|
||||||
} else {
|
} else {
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,7 @@ using CommandEncodeStatesDg2Test = ::testing::Test;
|
||||||
|
|
||||||
DG2TEST_F(CommandEncodeStatesDg2Test, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) {
|
DG2TEST_F(CommandEncodeStatesDg2Test, GivenSmallSlmTotalSizesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet) {
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
|
||||||
|
|
||||||
VariableBackup<unsigned short> revisionId(&defaultHwInfo->platform.usRevId);
|
VariableBackup<unsigned short> revisionId(&defaultHwInfo->platform.usRevId);
|
||||||
uint32_t threadsCount = 1;
|
uint32_t threadsCount = 1;
|
||||||
|
@ -31,19 +30,19 @@ DG2TEST_F(CommandEncodeStatesDg2Test, GivenSmallSlmTotalSizesWhenSetAdditionalIn
|
||||||
revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_A0, *defaultHwInfo);
|
revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_A0, *defaultHwInfo);
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo);
|
revisionId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo);
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, *defaultHwInfo, threadsCount, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPreferredSlmSizePerDssThenUseDssCount) {
|
DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPreferredSlmSizePerDssThenUseDssCount) {
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
|
|
||||||
HardwareInfo hwInfo = *defaultHwInfo;
|
HardwareInfo hwInfo = *defaultHwInfo;
|
||||||
hwInfo.platform.usRevId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo);
|
hwInfo.platform.usRevId = HwInfoConfig::get(productFamily)->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo);
|
||||||
|
@ -56,27 +55,27 @@ DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPrefer
|
||||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const uint32_t threadsPerThreadGroup = 8; // 16 groups will fit in one DSS
|
const uint32_t threadsPerThreadGroup = 8; // 16 groups will fit in one DSS
|
||||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const uint32_t threadsPerThreadGroup = 9; // 14 groups will fit in one DSS
|
const uint32_t threadsPerThreadGroup = 9; // 14 groups will fit in one DSS
|
||||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const uint32_t threadsPerThreadGroup = 50; // 2 groups will fit in one DSS
|
const uint32_t threadsPerThreadGroup = 50; // 2 groups will fit in one DSS
|
||||||
const uint32_t slmSizePerThreadGroup = 16 * MemoryConstants::kiloByte;
|
const uint32_t slmSizePerThreadGroup = 16 * MemoryConstants::kiloByte;
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::SlmPolicyLargeSlm);
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,21 +76,21 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndDG2T
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpgCore) {
|
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpgCore) {
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
|
|
||||||
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
|
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||||
//since we can't set 48KB as SLM size for workgroup, we need to ask for 64KB here.
|
//since we can't set 48KB as SLM size for workgroup, we need to ask for 64KB here.
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTestForDg2AStep = {
|
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTestForDg2AStep = {
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::array<REVID, 5> revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K};
|
const std::array<REVID, 5> revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K};
|
||||||
|
@ -106,12 +106,12 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettin
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenDebugOverrideWhenSetAdditionalInfoIsCalledThenDebugValuesAreSet, IsAtLeastXeHpgCore) {
|
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenDebugOverrideWhenSetAdditionalInfoIsCalledThenDebugValuesAreSet, IsAtLeastXeHpgCore) {
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
|
|
||||||
DebugManagerStateRestore stateRestore;
|
DebugManagerStateRestore stateRestore;
|
||||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS debugOverrideValues[] = {PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K,
|
PREFERRED_SLM_ALLOCATION_SIZE debugOverrideValues[] = {PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K,
|
||||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K,
|
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K,
|
||||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K};
|
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K};
|
||||||
|
|
||||||
for (auto debugOverrideValue : debugOverrideValues) {
|
for (auto debugOverrideValue : debugOverrideValues) {
|
||||||
DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(debugOverrideValue);
|
DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(debugOverrideValue);
|
||||||
|
@ -124,25 +124,10 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenDebugOverrideWhenSetAdditiona
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpgCore) {
|
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
|
||||||
MockDevice device;
|
|
||||||
auto hwInfo = device.getHardwareInfo();
|
|
||||||
|
|
||||||
uint32_t barrierCounts[] = {0, 1, 2, 7};
|
|
||||||
|
|
||||||
for (auto barrierCount : barrierCounts) {
|
|
||||||
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, hwInfo);
|
|
||||||
|
|
||||||
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocationSizePerDssWhenDispatchingKernelThenCorrectValueIsSet, IsAtLeastXeHpgCore) {
|
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocationSizePerDssWhenDispatchingKernelThenCorrectValueIsSet, IsAtLeastXeHpgCore) {
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(5);
|
DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.set(5);
|
||||||
uint32_t dims[] = {2, 1, 1};
|
uint32_t dims[] = {2, 1, 1};
|
||||||
|
@ -165,5 +150,5 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio
|
||||||
auto cmd = genCmdCast<WALKER_TYPE *>(*itor);
|
auto cmd = genCmdCast<WALKER_TYPE *>(*itor);
|
||||||
auto &idd = cmd->getInterfaceDescriptor();
|
auto &idd = cmd->getInterfaceDescriptor();
|
||||||
|
|
||||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(5u, static_cast<uint32_t>(idd.getPreferredSlmAllocationSize()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021 Intel Corporation
|
* Copyright (C) 2021-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -15,21 +15,19 @@
|
||||||
template <typename FamilyType>
|
template <typename FamilyType>
|
||||||
struct PreferredSlmTestValues {
|
struct PreferredSlmTestValues {
|
||||||
uint32_t preferredSlmAllocationSizePerDss;
|
uint32_t preferredSlmAllocationSizePerDss;
|
||||||
typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS expectedValueInIdd;
|
typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE expectedValueInIdd;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename FamilyType>
|
template <typename FamilyType>
|
||||||
void verifyPreferredSlmValues(std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest, NEO::HardwareInfo &hwInfo) {
|
void verifyPreferredSlmValues(std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest, NEO::HardwareInfo &hwInfo) {
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
|
||||||
|
|
||||||
auto threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
|
auto threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
|
||||||
uint32_t localWorkGroupsPerDssCounts[] = {1, 2, 4};
|
uint32_t localWorkGroupsPerDssCounts[] = {1, 2, 4};
|
||||||
|
|
||||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
EXPECT_EQ(0u, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(0u, idd.getPreferredSlmAllocationSize());
|
||||||
EXPECT_EQ(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_DISABLED, idd.getPreferredSlmSizeOverride());
|
|
||||||
|
|
||||||
const std::array<NEO::SlmPolicy, 3> slmPolicies = {
|
const std::array<NEO::SlmPolicy, 3> slmPolicies = {
|
||||||
NEO::SlmPolicy::SlmPolicyNone,
|
NEO::SlmPolicy::SlmPolicyNone,
|
||||||
|
@ -50,8 +48,7 @@ void verifyPreferredSlmValues(std::vector<PreferredSlmTestValues<FamilyType>> va
|
||||||
slmTotalSize,
|
slmTotalSize,
|
||||||
slmPolicy);
|
slmPolicy);
|
||||||
|
|
||||||
EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSizePerDss());
|
EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSize());
|
||||||
EXPECT_EQ(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED, idd.getPreferredSlmSizeOverride());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,21 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpcCore) {
|
||||||
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
|
MockDevice device;
|
||||||
|
auto hwInfo = device.getHardwareInfo();
|
||||||
|
|
||||||
|
uint32_t barrierCounts[] = {0, 1, 2, 7};
|
||||||
|
|
||||||
|
for (auto barrierCount : barrierCounts) {
|
||||||
|
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, hwInfo);
|
||||||
|
|
||||||
|
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandContainerWhenNumGrfRequiredIsGreaterThanDefaultThenLargeGrfModeEnabled) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandContainerWhenNumGrfRequiredIsGreaterThanDefaultThenLargeGrfModeEnabled) {
|
||||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||||
|
@ -71,24 +86,24 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandEncodeStatesTestPvcAndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpcCore) {
|
HWTEST2_F(CommandEncodeStatesTestPvcAndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet, IsXeHpcCore) {
|
||||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
using PREFERRED_SLM_ALLOCATION_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||||
|
|
||||||
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
|
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||||
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K},
|
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||||
{128 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{128 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTestForPvcAStep = {
|
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTestForPvcAStep = {
|
||||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||||
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K},
|
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||||
{128 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K},
|
{128 * KB, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::array<REVID, 5> revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K};
|
const std::array<REVID, 5> revs{REVISION_A0, REVISION_B, REVISION_C, REVISION_D, REVISION_K};
|
||||||
|
|
|
@ -212,8 +212,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhe
|
||||||
|
|
||||||
EXPECT_EQ(idd.getBindingTablePointer(), 0u);
|
EXPECT_EQ(idd.getBindingTablePointer(), 0u);
|
||||||
}
|
}
|
||||||
|
struct SamplerSupportedMatcher {
|
||||||
|
template <PRODUCT_FAMILY productFamily>
|
||||||
|
static constexpr bool isMatched() {
|
||||||
|
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||||
|
return HwMapper<productFamily>::GfxProduct::supportsSampler;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensamplerStateWasCopied) {
|
HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensamplerStateWasCopied, SamplerSupportedMatcher) {
|
||||||
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
|
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||||
|
@ -370,10 +379,16 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
|
||||||
|
|
||||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||||
EXPECT_NE(0u, idd.getBindingTableEntryCount());
|
EXPECT_NE(0u, idd.getBindingTableEntryCount());
|
||||||
EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
|
||||||
} else {
|
} else {
|
||||||
EXPECT_EQ(0u, idd.getBindingTableEntryCount());
|
EXPECT_EQ(0u, idd.getBindingTableEntryCount());
|
||||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
}
|
||||||
|
|
||||||
|
if constexpr (FamilyType::supportsSampler) {
|
||||||
|
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||||
|
EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,7 +409,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
|
||||||
auto &idd = cmd->getInterfaceDescriptor();
|
auto &idd = cmd->getInterfaceDescriptor();
|
||||||
|
|
||||||
EXPECT_EQ(0u, idd.getBindingTableEntryCount());
|
EXPECT_EQ(0u, idd.getBindingTableEntryCount());
|
||||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
if constexpr (FamilyType::supportsSampler) {
|
||||||
|
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -414,7 +431,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
|
||||||
auto &idd = cmd->getInterfaceDescriptor();
|
auto &idd = cmd->getInterfaceDescriptor();
|
||||||
|
|
||||||
EXPECT_NE(0u, idd.getBindingTableEntryCount());
|
EXPECT_NE(0u, idd.getBindingTableEntryCount());
|
||||||
EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
if constexpr (FamilyType::supportsSampler) {
|
||||||
|
EXPECT_NE(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd.getSamplerCount());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Copyright (C) 2021 Intel Corporation
|
# Copyright (C) 2021-2022 Intel Corporation
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
#
|
#
|
||||||
|
@ -8,6 +8,7 @@ if(TESTS_XE_HPG_CORE)
|
||||||
set(NEO_SHARED_tests_xe_hpg_core
|
set(NEO_SHARED_tests_xe_hpg_core
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_xe_hpg_core_tests.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_xe_hpg_core_tests.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel_xe_hpg_core.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if(DEFINED AUB_STREAM_PROJECT_NAME)
|
if(DEFINED AUB_STREAM_PROJECT_NAME)
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/command_container/command_encoder.h"
|
||||||
|
#include "shared/source/xe_hpg_core/hw_cmds_base.h"
|
||||||
|
#include "shared/test/common/helpers/default_hw_info.h"
|
||||||
|
#include "shared/test/common/test_macros/test.h"
|
||||||
|
|
||||||
|
using namespace NEO;
|
||||||
|
|
||||||
|
using CommandEncodeStatesTestXeHpgCore = ::testing::Test;
|
||||||
|
|
||||||
|
HWTEST2_F(CommandEncodeStatesTestXeHpgCore, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsXeHpgCore) {
|
||||||
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
|
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||||
|
|
||||||
|
uint32_t barrierCounts[] = {0, 1};
|
||||||
|
|
||||||
|
for (auto barrierCount : barrierCounts) {
|
||||||
|
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, *defaultHwInfo);
|
||||||
|
|
||||||
|
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue