refactor: change additional walker fields encoder 4/n
- move post sync system fence into dedicated encoder Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
64061b623b
commit
32fd00e150
|
@ -148,6 +148,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor, kernelAttributes.walkOrder, kernelAttributes.additionalSize, maxFrontEndThreads};
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
||||
EncodeDispatchKernel<GfxFamily>::template encodeWalkerPostSyncFields<WalkerType>(walkerCmd, encodeWalkerArgs);
|
||||
EncodeDispatchKernel<GfxFamily>::template overrideDefaultValues<WalkerType, InterfaceDescriptorType>(walkerCmd, *interfaceDescriptor);
|
||||
|
||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||
|
|
|
@ -229,6 +229,8 @@ struct EncodeDispatchKernel {
|
|||
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
static void overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor);
|
||||
template <typename WalkerType>
|
||||
static void encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -278,6 +278,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
|
||||
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
|
||||
EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
|
||||
EncodeWalkerArgs walkerArgs{
|
||||
KernelExecutionType::defaultType,
|
||||
args.requiresSystemMemoryFence(),
|
||||
kernelDescriptor,
|
||||
args.requiredDispatchWalkOrder,
|
||||
args.additionalSizeParam,
|
||||
args.device->getDeviceInfo().maxFrontEndThreads};
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
|
||||
|
||||
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
|
||||
|
||||
|
@ -404,6 +413,10 @@ template <typename Family>
|
|||
template <typename WalkerType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename WalkerType>
|
||||
inline void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
|
|
@ -32,6 +32,7 @@ template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlush
|
|||
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
|
||||
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
|
||||
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(Family::DefaultWalkerType &walkerCmd, Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
|
||||
template struct NEO::EncodeStates<Family>;
|
||||
template struct NEO::EncodeMath<Family>;
|
||||
|
|
|
@ -412,6 +412,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
args.additionalSizeParam,
|
||||
args.device->getDeviceInfo().maxFrontEndThreads};
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||
|
||||
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
||||
|
||||
|
@ -1235,4 +1236,16 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
|
|||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
|
||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
if (overrideProgramSystemMemoryFence != -1) {
|
||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
||||
}
|
||||
auto &postSyncData = walkerCmd.getPostSync();
|
||||
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -232,14 +232,6 @@ void EncodeSurfaceState<Family>::disableCompressionFlags(R_SURFACE_STATE *surfac
|
|||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
|
||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
if (overrideProgramSystemMemoryFence != -1) {
|
||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
||||
}
|
||||
auto &postSyncData = walkerCmd.getPostSync();
|
||||
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
|
||||
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||
if (overrideComputeDispatchAllWalkerEnable != -1) {
|
||||
|
|
|
@ -163,17 +163,6 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
|||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = productHelper.isGlobalFenceInCommandStreamRequired(hwInfo) &&
|
||||
walkerArgs.requiredSystemFence;
|
||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
if (overrideProgramSystemMemoryFence != -1) {
|
||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
||||
}
|
||||
auto &postSyncData = walkerCmd.getPostSync();
|
||||
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||
|
||||
int32_t overrideDispatchAllWalkerEnableInComputeWalker = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||
if (overrideDispatchAllWalkerEnableInComputeWalker != -1) {
|
||||
walkerCmd.setComputeDispatchAllWalkerEnable(overrideDispatchAllWalkerEnableInComputeWalker);
|
||||
|
|
|
@ -58,8 +58,11 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
|||
|
||||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
}
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <>
|
||||
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
|
|
|
@ -27,7 +27,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
|
|||
int32_t programGlobalFenceAsPostSyncOperationInComputeWalker;
|
||||
bool expectSystemMemoryFenceRequest;
|
||||
} testInputs[] = {
|
||||
{0x0, -1, false},
|
||||
{0x0, -1, true},
|
||||
{0x3, -1, true},
|
||||
{0x0, 0, false},
|
||||
{0x3, 0, false},
|
||||
|
@ -53,7 +53,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
|
|||
testInput.programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||
|
||||
postSyncData.setSystemMemoryFenceRequest(false);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<FamilyType>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||
EXPECT_EQ(testInput.expectSystemMemoryFenceRequest, postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcSupportsSystemMemoryFenceWhenNoSystemF
|
|||
hwInfo.platform.usDeviceID = deviceId;
|
||||
|
||||
postSyncData.setSystemMemoryFenceRequest(true);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<FamilyType>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -128,39 +128,3 @@ PVCTEST_F(CommandEncodeStatesTestPvc, GivenVariousSlmTotalSizesWhenSetPreferredS
|
|||
|
||||
verifyPreferredSlmValues<FamilyType>(valuesToTest, pDevice->getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
PVCTEST_F(EncodeKernelPvcTest, givenDefaultSettingForFenceAsPostSyncOperationInComputeWalkerWhenEnqueueKernelIsCalledThenDoNotGenerateFenceCommands) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
||||
|
||||
auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
auto &productHelper = pDevice->getProductHelper();
|
||||
|
||||
hwInfo.platform.usDeviceID = pvcXlDeviceIds[0];
|
||||
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
|
||||
hwRevId = productHelper.getHwRevIdFromStepping(REVISION_A0, hwInfo);
|
||||
|
||||
uint32_t dims[] = {1, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
dispatchInterface->getCrossThreadDataSizeResult = 0u;
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.isKernelUsingSystemAllocation = true;
|
||||
dispatchArgs.isHostScopeSignalEvent = true;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
||||
|
||||
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue