Move barrier programming to Encode class

Related-To: NEO-4576

Change-Id: I34b93b3118528b449c4e1b81826f9784633377a9
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-07-16 20:00:52 +02:00
committed by sys_ocldev
parent f38d5976cd
commit 134462919d
11 changed files with 45 additions and 45 deletions

View File

@@ -57,6 +57,8 @@ struct EncodeDispatchKernel {
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder);
static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
};
template <typename GfxFamily>

View File

@@ -66,7 +66,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup();
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
idd.setBarrierEnable(kernelDescriptor.kernelAttributes.flags.usesBarriers);
EncodeDispatchKernel<Family>::programBarrierEnable(&idd,
kernelDescriptor.kernelAttributes.hasBarriers,
container.getDevice()->getHardwareInfo());
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
HwHelperHw<Family>::get().computeSlmValues(dispatchInterface->getSlmTotalSize()));
idd.setSharedLocalMemorySize(
@@ -339,6 +341,13 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setBottomExecutionMask(maxDword);
}
template <typename GfxFamily>
void EncodeDispatchKernel<GfxFamily>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor,
uint32_t value,
const HardwareInfo &hwInfo) {
pInterfaceDescriptor->setBarrierEnable(value);
}
template <typename GfxFamily>
void EncodeMiFlushDW<GfxFamily>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {}

View File

@@ -46,6 +46,7 @@ struct KernelDescriptor final {
uint32_t perThreadScratchSize[2] = {0U, 0U};
uint32_t perThreadPrivateMemorySize = 0U;
uint32_t perThreadSystemThreadSurfaceSize = 0U;
uint32_t hasBarriers = 0u;
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
uint16_t crossThreadDataSize = 0U;
uint16_t perThreadDataSize = 0U;

View File

@@ -49,6 +49,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue);
dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers);
dst.kernelAttributes.hasBarriers = execEnv.HasBarriers;
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages);