Refactor programWalker.

- Pass variables computed in upper layers via args.
- declare variables prior to functions.
- Change some names for better verbosity.

Change-Id: I603b9ada1f62a08de5ac0fce177ccd840f2ce98c
This commit is contained in:
Mrozek, Michal
2019-01-11 11:32:27 +01:00
committed by sys_ocldev
parent 8ae7de7b0e
commit 15bfdc101f
3 changed files with 31 additions and 28 deletions

View File

@@ -88,12 +88,15 @@ class HardwareInterface {
IndirectHeap &dsh,
IndirectHeap &ioh,
IndirectHeap &ssh,
size_t globalWorkSizes[3],
size_t localWorkSizes[3],
PreemptionMode preemptionMode,
size_t currentDispatchIndex,
uint32_t &interfaceDescriptorIndex,
const DispatchInfo &dispatchInfo,
size_t offsetInterfaceDescriptorTable);
size_t offsetInterfaceDescriptorTable,
Vec3<size_t> &numberOfWorkgroups,
Vec3<size_t> &startOfWorkgroups);
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
const Kernel &kernel);

View File

@@ -144,15 +144,19 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
uint32_t dim = dispatchInfo.getDim();
Vec3<size_t> gws = dispatchInfo.getGWS();
Vec3<size_t> offset = dispatchInfo.getOffset();
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
Vec3<size_t> startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
// Compute local workgroup sizes
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
Vec3<size_t> elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
// Compute number of work groups
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
: generateWorkgroupsNumber(gws, lws);
Vec3<size_t> totalNumberOfWorkgroups = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
: generateWorkgroupsNumber(gws, lws);
Vec3<size_t> numberOfWorkgroups = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : totalNumberOfWorkgroups;
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
// Patch our kernel constants
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
@@ -178,9 +182,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
if (&kernel == mainKernel) {
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);
*kernel.numWorkGroupsX = static_cast<uint32_t>(totalNumberOfWorkgroups.x);
*kernel.numWorkGroupsY = static_cast<uint32_t>(totalNumberOfWorkgroups.y);
*kernel.numWorkGroupsZ = static_cast<uint32_t>(totalNumberOfWorkgroups.z);
}
*kernel.workDim = dim;
@@ -198,8 +202,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
}
programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh,
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable);
programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh, globalWorkSizes,
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo,
offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups);
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
if (dispatchInfo.isPipeControlRequired()) {

View File

@@ -100,36 +100,34 @@ inline void HardwareInterface<GfxFamily>::programWalker(
IndirectHeap &dsh,
IndirectHeap &ioh,
IndirectHeap &ssh,
size_t globalWorkSizes[3],
size_t localWorkSizes[3],
PreemptionMode preemptionMode,
size_t currentDispatchIndex,
uint32_t &interfaceDescriptorIndex,
const DispatchInfo &dispatchInfo,
size_t offsetInterfaceDescriptorTable) {
size_t offsetInterfaceDescriptorTable,
Vec3<size_t> &numberOfWorkgroups,
Vec3<size_t> &startOfWorkgroups) {
auto walkerCmd = allocateWalkerSpace(commandStream, kernel);
uint32_t dim = dispatchInfo.getDim();
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
Vec3<size_t> gws = dispatchInfo.getGWS();
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws);
Vec3<size_t> nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs;
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z};
size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z};
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
auto idd = obtainInterfaceDescriptorData(walkerCmd);
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
}
auto idd = obtainInterfaceDescriptorData(walkerCmd);
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
Vec3<size_t> offset = dispatchInfo.getOffset();
KernelCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
dsh,
@@ -147,9 +145,6 @@ inline void HardwareInterface<GfxFamily>::programWalker(
kernelUsesLocalIds,
inlineDataProgrammingRequired);
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
numWorkGroups, localWorkSizes, simd, dim,
localIdsGenerationByRuntime, inlineDataProgrammingRequired,