mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 15:03:02 +08:00
Refactor programWalker.
- Pass variables computed in upper layers via args. - declare variables prior to functions. - Change some names for better verbosity. Change-Id: I603b9ada1f62a08de5ac0fce177ccd840f2ce98c
This commit is contained in:
committed by
sys_ocldev
parent
8ae7de7b0e
commit
15bfdc101f
@@ -88,12 +88,15 @@ class HardwareInterface {
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
size_t globalWorkSizes[3],
|
||||
size_t localWorkSizes[3],
|
||||
PreemptionMode preemptionMode,
|
||||
size_t currentDispatchIndex,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable);
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups);
|
||||
|
||||
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
|
||||
const Kernel &kernel);
|
||||
|
||||
@@ -144,15 +144,19 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
uint32_t dim = dispatchInfo.getDim();
|
||||
Vec3<size_t> gws = dispatchInfo.getGWS();
|
||||
Vec3<size_t> offset = dispatchInfo.getOffset();
|
||||
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
|
||||
Vec3<size_t> startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
|
||||
|
||||
// Compute local workgroup sizes
|
||||
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
Vec3<size_t> elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
|
||||
|
||||
// Compute number of work groups
|
||||
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
|
||||
: generateWorkgroupsNumber(gws, lws);
|
||||
Vec3<size_t> totalNumberOfWorkgroups = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
|
||||
: generateWorkgroupsNumber(gws, lws);
|
||||
|
||||
Vec3<size_t> numberOfWorkgroups = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : totalNumberOfWorkgroups;
|
||||
|
||||
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
||||
|
||||
// Patch our kernel constants
|
||||
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
|
||||
@@ -178,9 +182,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
|
||||
|
||||
if (&kernel == mainKernel) {
|
||||
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
|
||||
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
|
||||
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);
|
||||
*kernel.numWorkGroupsX = static_cast<uint32_t>(totalNumberOfWorkgroups.x);
|
||||
*kernel.numWorkGroupsY = static_cast<uint32_t>(totalNumberOfWorkgroups.y);
|
||||
*kernel.numWorkGroupsZ = static_cast<uint32_t>(totalNumberOfWorkgroups.z);
|
||||
}
|
||||
|
||||
*kernel.workDim = dim;
|
||||
@@ -198,8 +202,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
|
||||
}
|
||||
|
||||
programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh,
|
||||
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable);
|
||||
programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh, globalWorkSizes,
|
||||
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo,
|
||||
offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups);
|
||||
|
||||
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
||||
if (dispatchInfo.isPipeControlRequired()) {
|
||||
|
||||
@@ -100,36 +100,34 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
size_t globalWorkSizes[3],
|
||||
size_t localWorkSizes[3],
|
||||
PreemptionMode preemptionMode,
|
||||
size_t currentDispatchIndex,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable) {
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups) {
|
||||
|
||||
auto walkerCmd = allocateWalkerSpace(commandStream, kernel);
|
||||
uint32_t dim = dispatchInfo.getDim();
|
||||
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
Vec3<size_t> gws = dispatchInfo.getGWS();
|
||||
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
|
||||
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws);
|
||||
Vec3<size_t> nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs;
|
||||
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||
|
||||
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
|
||||
size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z};
|
||||
size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z};
|
||||
|
||||
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
|
||||
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
||||
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||
auto idd = obtainInterfaceDescriptorData(walkerCmd);
|
||||
|
||||
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
|
||||
}
|
||||
|
||||
auto idd = obtainInterfaceDescriptorData(walkerCmd);
|
||||
|
||||
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
|
||||
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
||||
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||
|
||||
Vec3<size_t> offset = dispatchInfo.getOffset();
|
||||
|
||||
KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
@@ -147,9 +145,6 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
kernelUsesLocalIds,
|
||||
inlineDataProgrammingRequired);
|
||||
|
||||
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
|
||||
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
|
||||
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired,
|
||||
|
||||
Reference in New Issue
Block a user