From 15bfdc101ff2ed044bea2255fbf1610a3706e9c7 Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Fri, 11 Jan 2019 11:32:27 +0100 Subject: [PATCH] Refactor programWalker. - Pass variables computed in upper layers via args. - declare variables prior to functions. - Change some names for better verbosity. Change-Id: I603b9ada1f62a08de5ac0fce177ccd840f2ce98c --- runtime/command_queue/hardware_interface.h | 5 ++- runtime/command_queue/hardware_interface.inl | 21 +++++++----- .../command_queue/hardware_interface_base.inl | 33 ++++++++----------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/runtime/command_queue/hardware_interface.h b/runtime/command_queue/hardware_interface.h index 956fdf5bf6..3937f97973 100644 --- a/runtime/command_queue/hardware_interface.h +++ b/runtime/command_queue/hardware_interface.h @@ -88,12 +88,15 @@ class HardwareInterface { IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, + size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, - size_t offsetInterfaceDescriptorTable); + size_t offsetInterfaceDescriptorTable, + Vec3 &numberOfWorkgroups, + Vec3 &startOfWorkgroups); static WALKER_TYPE *allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel); diff --git a/runtime/command_queue/hardware_interface.inl b/runtime/command_queue/hardware_interface.inl index 963e551c11..56a2375c6d 100644 --- a/runtime/command_queue/hardware_interface.inl +++ b/runtime/command_queue/hardware_interface.inl @@ -144,15 +144,19 @@ void HardwareInterface::dispatchWalker( uint32_t dim = dispatchInfo.getDim(); Vec3 gws = dispatchInfo.getGWS(); Vec3 offset = dispatchInfo.getOffset(); - Vec3 swgs = dispatchInfo.getStartOfWorkgroups(); + Vec3 startOfWorkgroups = dispatchInfo.getStartOfWorkgroups(); // Compute local workgroup sizes Vec3 lws = dispatchInfo.getLocalWorkgroupSize(); Vec3 elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws; // Compute number of work groups - Vec3 twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() - : generateWorkgroupsNumber(gws, lws); + Vec3 totalNumberOfWorkgroups = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() + : generateWorkgroupsNumber(gws, lws); + + Vec3 numberOfWorkgroups = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : totalNumberOfWorkgroups; + + size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z}; // Patch our kernel constants *kernel.globalWorkOffsetX = static_cast(offset.x); @@ -178,9 +182,9 @@ void HardwareInterface::dispatchWalker( *kernel.enqueuedLocalWorkSizeZ = static_cast(elws.z); if (&kernel == mainKernel) { - *kernel.numWorkGroupsX = static_cast(twgs.x); - *kernel.numWorkGroupsY = static_cast(twgs.y); - *kernel.numWorkGroupsZ = static_cast(twgs.z); + *kernel.numWorkGroupsX = static_cast(totalNumberOfWorkgroups.x); + *kernel.numWorkGroupsY = static_cast(totalNumberOfWorkgroups.y); + *kernel.numWorkGroupsZ = static_cast(totalNumberOfWorkgroups.z); } *kernel.workDim = dim; @@ -198,8 +202,9 @@ void HardwareInterface::dispatchWalker( GpgpuWalkerHelper::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker); } - programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh, - localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable); + programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh, globalWorkSizes, + localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, + offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups); dispatchWorkarounds(commandStream, commandQueue, kernel, false); if (dispatchInfo.isPipeControlRequired()) { diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl index c18f215626..efee7adf6b 100644 --- a/runtime/command_queue/hardware_interface_base.inl +++ b/runtime/command_queue/hardware_interface_base.inl @@ -100,36 +100,34 @@ inline void HardwareInterface::programWalker( IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, + size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, - size_t offsetInterfaceDescriptorTable) { + size_t offsetInterfaceDescriptorTable, + Vec3 &numberOfWorkgroups, + Vec3 &startOfWorkgroups) { auto walkerCmd = allocateWalkerSpace(commandStream, kernel); uint32_t dim = dispatchInfo.getDim(); - Vec3 lws = dispatchInfo.getLocalWorkgroupSize(); - Vec3 gws = dispatchInfo.getGWS(); - Vec3 swgs = dispatchInfo.getStartOfWorkgroups(); - Vec3 twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws); - Vec3 nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs; - size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z}; + uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); + + size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; + size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; + size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; + + bool localIdsGenerationByRuntime = KernelCommandsHelper::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes); + bool inlineDataProgrammingRequired = KernelCommandsHelper::inlineDataProgrammingRequired(kernel); + bool kernelUsesLocalIds = KernelCommandsHelper::kernelUsesLocalIds(kernel); + auto idd = obtainInterfaceDescriptorData(walkerCmd); if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag; GpgpuWalkerHelper::setupTimestampPacket(&commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker); } - auto idd = obtainInterfaceDescriptorData(walkerCmd); - - bool localIdsGenerationByRuntime = KernelCommandsHelper::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes); - bool inlineDataProgrammingRequired = KernelCommandsHelper::inlineDataProgrammingRequired(kernel); - bool kernelUsesLocalIds = KernelCommandsHelper::kernelUsesLocalIds(kernel); - uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); - - Vec3 offset = dispatchInfo.getOffset(); - KernelCommandsHelper::sendIndirectState( commandStream, dsh, @@ -147,9 +145,6 @@ inline void HardwareInterface::programWalker( kernelUsesLocalIds, inlineDataProgrammingRequired); - size_t globalOffsets[3] = {offset.x, offset.y, offset.z}; - size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z}; - size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z}; GpgpuWalkerHelper::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, localIdsGenerationByRuntime, inlineDataProgrammingRequired,