refactor: change additional walker fields encoder 4/n
- move post sync system fence into dedicated encoder Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
64061b623b
commit
32fd00e150
|
@ -148,6 +148,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||||
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor, kernelAttributes.walkOrder, kernelAttributes.additionalSize, maxFrontEndThreads};
|
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence, kernelInfo.kernelDescriptor, kernelAttributes.walkOrder, kernelAttributes.additionalSize, maxFrontEndThreads};
|
||||||
|
|
||||||
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
|
||||||
|
EncodeDispatchKernel<GfxFamily>::template encodeWalkerPostSyncFields<WalkerType>(walkerCmd, encodeWalkerArgs);
|
||||||
EncodeDispatchKernel<GfxFamily>::template overrideDefaultValues<WalkerType, InterfaceDescriptorType>(walkerCmd, *interfaceDescriptor);
|
EncodeDispatchKernel<GfxFamily>::template overrideDefaultValues<WalkerType, InterfaceDescriptorType>(walkerCmd, *interfaceDescriptor);
|
||||||
|
|
||||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||||
|
|
|
@ -229,6 +229,8 @@ struct EncodeDispatchKernel {
|
||||||
|
|
||||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||||
static void overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor);
|
static void overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor);
|
||||||
|
template <typename WalkerType>
|
||||||
|
static void encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -278,6 +278,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
|
|
||||||
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
|
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
|
||||||
EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
|
EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
|
||||||
|
EncodeWalkerArgs walkerArgs{
|
||||||
|
KernelExecutionType::defaultType,
|
||||||
|
args.requiresSystemMemoryFence(),
|
||||||
|
kernelDescriptor,
|
||||||
|
args.requiredDispatchWalkOrder,
|
||||||
|
args.additionalSizeParam,
|
||||||
|
args.device->getDeviceInfo().maxFrontEndThreads};
|
||||||
|
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
|
||||||
|
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
|
||||||
|
|
||||||
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
|
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
|
||||||
|
|
||||||
|
@ -404,6 +413,10 @@ template <typename Family>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||||
|
|
||||||
|
template <typename Family>
|
||||||
|
template <typename WalkerType>
|
||||||
|
inline void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <typename InterfaceDescriptorType>
|
template <typename InterfaceDescriptorType>
|
||||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||||
|
|
|
@ -32,6 +32,7 @@ template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlush
|
||||||
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
|
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
|
||||||
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
|
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(Family::DefaultWalkerType &walkerCmd, Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor);
|
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(Family::DefaultWalkerType &walkerCmd, Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor);
|
||||||
|
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||||
|
|
||||||
template struct NEO::EncodeStates<Family>;
|
template struct NEO::EncodeStates<Family>;
|
||||||
template struct NEO::EncodeMath<Family>;
|
template struct NEO::EncodeMath<Family>;
|
||||||
|
|
|
@ -412,6 +412,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
args.additionalSizeParam,
|
args.additionalSizeParam,
|
||||||
args.device->getDeviceInfo().maxFrontEndThreads};
|
args.device->getDeviceInfo().maxFrontEndThreads};
|
||||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||||
|
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||||
|
|
||||||
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
||||||
|
|
||||||
|
@ -1235,4 +1236,16 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Family>
|
||||||
|
template <typename WalkerType>
|
||||||
|
void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||||
|
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
|
||||||
|
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||||
|
if (overrideProgramSystemMemoryFence != -1) {
|
||||||
|
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
||||||
|
}
|
||||||
|
auto &postSyncData = walkerCmd.getPostSync();
|
||||||
|
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -232,14 +232,6 @@ void EncodeSurfaceState<Family>::disableCompressionFlags(R_SURFACE_STATE *surfac
|
||||||
template <>
|
template <>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
|
|
||||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
|
||||||
if (overrideProgramSystemMemoryFence != -1) {
|
|
||||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
|
||||||
}
|
|
||||||
auto &postSyncData = walkerCmd.getPostSync();
|
|
||||||
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
|
||||||
|
|
||||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
|
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
|
||||||
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||||
if (overrideComputeDispatchAllWalkerEnable != -1) {
|
if (overrideComputeDispatchAllWalkerEnable != -1) {
|
||||||
|
|
|
@ -163,17 +163,6 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||||
template <>
|
template <>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
|
||||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
|
||||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = productHelper.isGlobalFenceInCommandStreamRequired(hwInfo) &&
|
|
||||||
walkerArgs.requiredSystemFence;
|
|
||||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
|
||||||
if (overrideProgramSystemMemoryFence != -1) {
|
|
||||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!overrideProgramSystemMemoryFence;
|
|
||||||
}
|
|
||||||
auto &postSyncData = walkerCmd.getPostSync();
|
|
||||||
postSyncData.setSystemMemoryFenceRequest(programGlobalFenceAsPostSyncOperationInComputeWalker);
|
|
||||||
|
|
||||||
int32_t overrideDispatchAllWalkerEnableInComputeWalker = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
int32_t overrideDispatchAllWalkerEnableInComputeWalker = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||||
if (overrideDispatchAllWalkerEnableInComputeWalker != -1) {
|
if (overrideDispatchAllWalkerEnableInComputeWalker != -1) {
|
||||||
walkerCmd.setComputeDispatchAllWalkerEnable(overrideDispatchAllWalkerEnableInComputeWalker);
|
walkerCmd.setComputeDispatchAllWalkerEnable(overrideDispatchAllWalkerEnableInComputeWalker);
|
||||||
|
|
|
@ -58,8 +58,11 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
template <typename WalkerType>
|
template <typename WalkerType>
|
||||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||||
}
|
|
||||||
|
template <>
|
||||||
|
template <typename WalkerType>
|
||||||
|
void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||||
|
|
|
@ -27,7 +27,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
|
||||||
int32_t programGlobalFenceAsPostSyncOperationInComputeWalker;
|
int32_t programGlobalFenceAsPostSyncOperationInComputeWalker;
|
||||||
bool expectSystemMemoryFenceRequest;
|
bool expectSystemMemoryFenceRequest;
|
||||||
} testInputs[] = {
|
} testInputs[] = {
|
||||||
{0x0, -1, false},
|
{0x0, -1, true},
|
||||||
{0x3, -1, true},
|
{0x3, -1, true},
|
||||||
{0x0, 0, false},
|
{0x0, 0, false},
|
||||||
{0x3, 0, false},
|
{0x3, 0, false},
|
||||||
|
@ -53,7 +53,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
|
||||||
testInput.programGlobalFenceAsPostSyncOperationInComputeWalker);
|
testInput.programGlobalFenceAsPostSyncOperationInComputeWalker);
|
||||||
|
|
||||||
postSyncData.setSystemMemoryFenceRequest(false);
|
postSyncData.setSystemMemoryFenceRequest(false);
|
||||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||||
EXPECT_EQ(testInput.expectSystemMemoryFenceRequest, postSyncData.getSystemMemoryFenceRequest());
|
EXPECT_EQ(testInput.expectSystemMemoryFenceRequest, postSyncData.getSystemMemoryFenceRequest());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,7 +75,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcSupportsSystemMemoryFenceWhenNoSystemF
|
||||||
hwInfo.platform.usDeviceID = deviceId;
|
hwInfo.platform.usDeviceID = deviceId;
|
||||||
|
|
||||||
postSyncData.setSystemMemoryFenceRequest(true);
|
postSyncData.setSystemMemoryFenceRequest(true);
|
||||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,39 +128,3 @@ PVCTEST_F(CommandEncodeStatesTestPvc, GivenVariousSlmTotalSizesWhenSetPreferredS
|
||||||
|
|
||||||
verifyPreferredSlmValues<FamilyType>(valuesToTest, pDevice->getRootDeviceEnvironment());
|
verifyPreferredSlmValues<FamilyType>(valuesToTest, pDevice->getRootDeviceEnvironment());
|
||||||
}
|
}
|
||||||
|
|
||||||
PVCTEST_F(EncodeKernelPvcTest, givenDefaultSettingForFenceAsPostSyncOperationInComputeWalkerWhenEnqueueKernelIsCalledThenDoNotGenerateFenceCommands) {
|
|
||||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
|
||||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
|
||||||
|
|
||||||
DebugManagerStateRestore restore;
|
|
||||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
|
||||||
|
|
||||||
auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
|
||||||
auto &productHelper = pDevice->getProductHelper();
|
|
||||||
|
|
||||||
hwInfo.platform.usDeviceID = pvcXlDeviceIds[0];
|
|
||||||
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
|
|
||||||
hwRevId = productHelper.getHwRevIdFromStepping(REVISION_A0, hwInfo);
|
|
||||||
|
|
||||||
uint32_t dims[] = {1, 1, 1};
|
|
||||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
|
||||||
dispatchInterface->getCrossThreadDataSizeResult = 0u;
|
|
||||||
|
|
||||||
bool requiresUncachedMocs = false;
|
|
||||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
|
||||||
dispatchArgs.isKernelUsingSystemAllocation = true;
|
|
||||||
dispatchArgs.isHostScopeSignalEvent = true;
|
|
||||||
|
|
||||||
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
|
|
||||||
|
|
||||||
GenCmdList commands;
|
|
||||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
|
||||||
|
|
||||||
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
|
|
||||||
ASSERT_NE(itor, commands.end());
|
|
||||||
|
|
||||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
|
||||||
auto &postSyncData = walkerCmd->getPostSync();
|
|
||||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue