refactor: change additional walker fields encoder 1/n

- move encoding l3 prefetch field into dedicated function

Related-To: NEO-12639

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-10-29 10:57:38 +00:00
committed by Compute-Runtime-Automation
parent 2dccdd886a
commit 6f4994c269
7 changed files with 14 additions and 57 deletions

View File

@@ -148,6 +148,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor, kernelAttributes.walkOrder, kernelAttributes.additionalSize, maxFrontEndThreads};
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
EncodeDispatchKernel<GfxFamily>::template overrideDefaultValues<WalkerType, InterfaceDescriptorType>(walkerCmd, *interfaceDescriptor);
auto devices = queueCsr.getOsContext().getDeviceBitfield();
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true);

View File

@@ -81,14 +81,11 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramCompute
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
DebugManagerStateRestore restore;
auto walkerCmd = FamilyType::template getInitGpuWalker<COMPUTE_WALKER>();
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto idd = FamilyType::cmdInitInterfaceDescriptorData;
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::defaultType, true, kernelDescriptor, NEO::RequiredDispatchWalkOrder::none, 0, 0};
for (auto forceL3PrefetchForComputeWalker : {false, true}) {
debugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::overrideDefaultValues(walkerCmd, idd);
EXPECT_EQ(!forceL3PrefetchForComputeWalker, walkerCmd.getL3PrefetchDisable());
}
}

View File

@@ -1120,12 +1120,16 @@ void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t a
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
int32_t forceL3PrefetchForComputeWalker = debugManager.flags.ForceL3PrefetchForComputeWalker.get();
if (forceL3PrefetchForComputeWalker != -1) {
walkerCmd.setL3PrefetchDisable(!forceL3PrefetchForComputeWalker);
}
}
template <typename GfxFamily>
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
const auto &productHelper = device.getProductHelper();
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {

View File

@@ -240,11 +240,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
auto &postSyncData = walkerCmd.getPostSync();
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
int32_t forceL3PrefetchForComputeWalker = debugManager.flags.ForceL3PrefetchForComputeWalker.get();
if (forceL3PrefetchForComputeWalker != -1) {
walkerCmd.setL3PrefetchDisable(!forceL3PrefetchForComputeWalker);
}
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
if (overrideComputeDispatchAllWalkerEnable != -1) {

View File

@@ -174,11 +174,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
auto &postSyncData = walkerCmd.getPostSync();
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
int32_t forceL3PrefetchForComputeWalker = debugManager.flags.ForceL3PrefetchForComputeWalker.get();
if (forceL3PrefetchForComputeWalker != -1) {
walkerCmd.setL3PrefetchDisable(!forceL3PrefetchForComputeWalker);
}
int32_t overrideDispatchAllWalkerEnableInComputeWalker = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
if (overrideDispatchAllWalkerEnableInComputeWalker != -1) {
walkerCmd.setComputeDispatchAllWalkerEnable(overrideDispatchAllWalkerEnableInComputeWalker);

View File

@@ -59,13 +59,6 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
template <>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
bool l3PrefetchDisable = releaseHelper->isPrefetchDisablingRequired();
int32_t overrideL3PrefetchDisable = debugManager.flags.ForceL3PrefetchForComputeWalker.get();
if (overrideL3PrefetchDisable != -1) {
l3PrefetchDisable = !overrideL3PrefetchDisable;
}
walkerCmd.setL3PrefetchDisable(l3PrefetchDisable);
}
template <>

View File

@@ -59,41 +59,13 @@ DG2TEST_F(CommandEncodeDG2Test, whenProgrammingStateComputeModeThenProperFieldsA
EXPECT_TRUE(pScm->getLargeGrfMode());
}
DG2TEST_F(CommandEncodeDG2Test, whenProgramComputeWalkerThenApplyL3WAForDg2G10A0) {
DG2TEST_F(CommandEncodeDG2Test, whenProgramComputeWalkerThenSetL3PrefetchDefaultValue) {
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
MockExecutionEnvironment executionEnvironment{};
auto &compilerProductHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<CompilerProductHelper>();
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
auto idd = FamilyType::cmdInitInterfaceDescriptorData;
std::vector<std::pair<unsigned short, uint16_t>> dg2Configs =
{{dg2G10DeviceIds[0], revIdA0},
{dg2G10DeviceIds[0], revIdA1},
{dg2G10DeviceIds[0], revIdB0},
{dg2G10DeviceIds[0], revIdC0},
{dg2G11DeviceIds[0], revIdA0},
{dg2G11DeviceIds[0], revIdB0},
{dg2G11DeviceIds[0], revIdB1},
{dg2G12DeviceIds[0], revIdA0}};
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::defaultType, true, kernelDescriptor, NEO::RequiredDispatchWalkOrder::none, 0};
for (const auto &[deviceID, revisionID] : dg2Configs) {
hwInfo.platform.usRevId = revisionID;
hwInfo.platform.usDeviceID = deviceID;
hwInfo.ipVersion = compilerProductHelper.getHwIpVersion(hwInfo);
rootDeviceEnvironment.releaseHelper = ReleaseHelper::create(hwInfo.ipVersion);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
if (DG2::isG10(hwInfo) && revisionID < revIdB0) {
EXPECT_TRUE(walkerCmd.getL3PrefetchDisable());
} else {
EXPECT_FALSE(walkerCmd.getL3PrefetchDisable());
}
}
EncodeDispatchKernel<FamilyType>::overrideDefaultValues(walkerCmd, idd);
EXPECT_FALSE(walkerCmd.getL3PrefetchDisable());
}
using Dg2SbaTest = SbaTest;