mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
Move barrier programming to Encode class
Related-To: NEO-4576 Change-Id: I34b93b3118528b449c4e1b81826f9784633377a9 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
f38d5976cd
commit
134462919d
@@ -57,6 +57,8 @@ struct EncodeDispatchKernel {
|
||||
bool inlineDataProgrammingRequired,
|
||||
bool isIndirect,
|
||||
uint32_t requiredWorkGroupOrder);
|
||||
|
||||
static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -66,7 +66,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup();
|
||||
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
|
||||
|
||||
idd.setBarrierEnable(kernelDescriptor.kernelAttributes.flags.usesBarriers);
|
||||
EncodeDispatchKernel<Family>::programBarrierEnable(&idd,
|
||||
kernelDescriptor.kernelAttributes.hasBarriers,
|
||||
container.getDevice()->getHardwareInfo());
|
||||
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
|
||||
HwHelperHw<Family>::get().computeSlmValues(dispatchInterface->getSlmTotalSize()));
|
||||
idd.setSharedLocalMemorySize(
|
||||
@@ -339,6 +341,13 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
||||
walkerCmd.setBottomExecutionMask(maxDword);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeDispatchKernel<GfxFamily>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor,
|
||||
uint32_t value,
|
||||
const HardwareInfo &hwInfo) {
|
||||
pInterfaceDescriptor->setBarrierEnable(value);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeMiFlushDW<GfxFamily>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {}
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ struct KernelDescriptor final {
|
||||
uint32_t perThreadScratchSize[2] = {0U, 0U};
|
||||
uint32_t perThreadPrivateMemorySize = 0U;
|
||||
uint32_t perThreadSystemThreadSurfaceSize = 0U;
|
||||
uint32_t hasBarriers = 0u;
|
||||
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
|
||||
uint16_t crossThreadDataSize = 0U;
|
||||
uint16_t perThreadDataSize = 0U;
|
||||
|
||||
@@ -49,6 +49,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
|
||||
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
|
||||
dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue);
|
||||
dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers);
|
||||
dst.kernelAttributes.hasBarriers = execEnv.HasBarriers;
|
||||
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
|
||||
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
|
||||
dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages);
|
||||
|
||||
Reference in New Issue
Block a user