Merge patchWorkDim method's logic into setGroupCount method

Related-To: NEO-5081
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2021-09-13 16:53:34 +00:00
committed by Compute-Runtime-Automation
parent 0d1c8be447
commit f958b053ab
6 changed files with 13 additions and 29 deletions

View File

@@ -116,8 +116,6 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
virtual ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) = 0;
virtual void patchGlobalOffset() = 0;
virtual void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
bool isEngineInstanced) = 0;
virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0;

View File

@@ -280,6 +280,18 @@ void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32
uint32_t groupCount[3] = {groupCountX, groupCountY, groupCountZ};
NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.numWorkGroups, groupCount);
uint32_t workDim = 1;
if (groupCountZ * groupSize[2] > 1) {
workDim = 3;
} else if (groupCountY * groupSize[1] > 1) {
workDim = 2;
}
auto workDimOffset = desc.payloadMappings.dispatchTraits.workDim;
if (NEO::isValidOffset(workDimOffset)) {
auto destinationBuffer = ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize);
NEO::patchNonPointer(destinationBuffer, desc.payloadMappings.dispatchTraits.workDim, workDim);
}
}
ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
@@ -924,21 +936,6 @@ void KernelImp::patchGlobalOffset() {
NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkOffset, this->globalOffsets);
}
void KernelImp::patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor();
auto dataOffset = kernelDescriptor.payloadMappings.dispatchTraits.workDim;
if (NEO::isValidOffset(dataOffset)) {
auto destinationBuffer = ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize);
uint32_t workDim = 1;
if (groupCountZ * groupSize[2] > 1) {
workDim = 3;
} else if (groupCountY * groupSize[1] > 1) {
workDim = 2;
}
NEO::patchNonPointer(destinationBuffer, kernelDescriptor.payloadMappings.dispatchTraits.workDim, workDim);
}
}
Kernel *Kernel::create(uint32_t productFamily, Module *module,
const ze_kernel_desc_t *desc, ze_result_t *res) {
UNRECOVERABLE_IF(productFamily >= IGFX_MAX_PRODUCT);

View File

@@ -127,8 +127,6 @@ struct KernelImp : Kernel {
ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) override;
void patchGlobalOffset() override;
void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override;
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
bool usesRayTracing() {
return kernelImmData->getDescriptor().hasRTCalls();