mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Revert "fix: correct limitation for num threads per thread group"
This reverts commit 6ad4ad41b1.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d6849a5605
commit
593c9e76f2
@@ -69,7 +69,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(const Kernel &kerne
|
||||
requiredWalkOrder,
|
||||
simdSize);
|
||||
auto size = kernel.getCrossThreadDataSize() +
|
||||
HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, numChannels, localWorkSize, rootDeviceEnvironment);
|
||||
HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, numChannels, localWorkSize, isHwLocalIdGeneration, rootDeviceEnvironment);
|
||||
|
||||
auto pImplicitArgs = kernel.getImplicitArgs();
|
||||
if (pImplicitArgs) {
|
||||
@@ -297,7 +297,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
auto grfCount = kernel.getDescriptor().kernelAttributes.numGrfRequired;
|
||||
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
|
||||
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfCount, device.getRootDeviceEnvironment());
|
||||
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfCount, !localIdsGenerationByRuntime, device.getRootDeviceEnvironment());
|
||||
|
||||
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
|
||||
|
||||
@@ -2235,9 +2235,10 @@ void Kernel::reconfigureKernel() {
|
||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
const auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto maxWorkGroupSize = gfxCoreHelper.calculateMaxWorkGroupSize(kernelDescriptor, this->maxKernelWorkGroupSize);
|
||||
bool isLocalIdsGeneratedByHw = false; // if local ids generated by runtime then more work groups available
|
||||
maxWorkGroupSize = static_cast<uint32_t>(kernelInfo.getMaxRequiredWorkGroupSize(maxWorkGroupSize));
|
||||
|
||||
this->maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, maxWorkGroupSize, getDevice().getRootDeviceEnvironment());
|
||||
this->maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, isLocalIdsGeneratedByHw, maxWorkGroupSize, getDevice().getRootDeviceEnvironment());
|
||||
|
||||
this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
|
||||
this->systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
|
||||
|
||||
Reference in New Issue
Block a user