Add thread programming to Encode class

Related-To: NEO-4585

Change-Id: I45e57038af23a60f52b57eb1888f8220b77f5e56
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-06-26 16:03:30 +02:00
committed by sys_ocldev
parent 33faf4a1e7
commit c4cb8c1c81
4 changed files with 153 additions and 0 deletions

View File

@@ -38,6 +38,17 @@ struct EncodeDispatchKernel {
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, size_t *lws, std::array<uint8_t, 3> walkOrder,
bool requireInputWalkOrder, uint32_t &requiredWalkOrder, uint32_t simd);
static void encodeThreadData(WALKER_TYPE &walkerCmd,
const size_t *startWorkGroup,
const size_t *numWorkGroups,
const size_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder);
};
template <typename GfxFamily>

View File

@@ -292,6 +292,47 @@ bool EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32
return true;
}
template <typename GfxFamily>
void EncodeDispatchKernel<GfxFamily>::encodeThreadData(WALKER_TYPE &walkerCmd,
const size_t *startWorkGroup,
const size_t *numWorkGroups,
const size_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder) {
if (isIndirect) {
walkerCmd.setIndirectParameterEnable(true);
} else {
walkerCmd.setThreadGroupIdXDimension(static_cast<uint32_t>(numWorkGroups[0]));
walkerCmd.setThreadGroupIdYDimension(static_cast<uint32_t>(numWorkGroups[1]));
walkerCmd.setThreadGroupIdZDimension(static_cast<uint32_t>(numWorkGroups[2]));
}
if (startWorkGroup) {
walkerCmd.setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroup[0]));
walkerCmd.setThreadGroupIdStartingY(static_cast<uint32_t>(startWorkGroup[1]));
walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
}
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd));
auto localWorkSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2];
auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize);
walkerCmd.setThreadWidthCounterMaximum(static_cast<uint32_t>(threadsPerWorkGroup));
auto remainderSimdLanes = localWorkSize & (simd - 1);
uint64_t executionMask = maxNBitValue(remainderSimdLanes);
if (!executionMask)
executionMask = ~executionMask;
walkerCmd.setRightExecutionMask(static_cast<uint32_t>(executionMask));
walkerCmd.setBottomExecutionMask(static_cast<uint32_t>(0xffffffff));
}
template <typename GfxFamily>
void EncodeMiFlushDW<GfxFamily>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {}