diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index c18098d723..1dab1577f8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -95,9 +95,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernel->setGroupCount(pThreadGroupDimensions->groupCountX, pThreadGroupDimensions->groupCountY, pThreadGroupDimensions->groupCountZ); - kernel->patchWorkDim(pThreadGroupDimensions->groupCountX, - pThreadGroupDimensions->groupCountY, - pThreadGroupDimensions->groupCountZ); } if (isIndirect && pThreadGroupDimensions) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index e253e1c208..58d3b16b95 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -146,9 +146,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernel->setGroupCount(pThreadGroupDimensions->groupCountX, pThreadGroupDimensions->groupCountY, pThreadGroupDimensions->groupCountZ); - kernel->patchWorkDim(pThreadGroupDimensions->groupCountX, - pThreadGroupDimensions->groupCountY, - pThreadGroupDimensions->groupCountZ); } NEO::GraphicsAllocation *eventAlloc = nullptr; uint64_t eventAddress = 0; diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index 77f906f08b..f966bb7e43 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -116,8 +116,6 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { virtual ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) = 0; virtual void patchGlobalOffset() = 0; - virtual void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0; - virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType, bool isEngineInstanced) = 0; virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index a12d6b450e..c17005e65d 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -280,6 +280,18 @@ void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32 uint32_t groupCount[3] = {groupCountX, groupCountY, groupCountZ}; NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.numWorkGroups, groupCount); + + uint32_t workDim = 1; + if (groupCountZ * groupSize[2] > 1) { + workDim = 3; + } else if (groupCountY * groupSize[1] > 1) { + workDim = 2; + } + auto workDimOffset = desc.payloadMappings.dispatchTraits.workDim; + if (NEO::isValidOffset(workDimOffset)) { + auto destinationBuffer = ArrayRef(crossThreadData.get(), crossThreadDataSize); + NEO::patchNonPointer(destinationBuffer, desc.payloadMappings.dispatchTraits.workDim, workDim); + } } ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, @@ -924,21 +936,6 @@ void KernelImp::patchGlobalOffset() { NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkOffset, this->globalOffsets); } -void KernelImp::patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { - const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor(); - auto dataOffset = kernelDescriptor.payloadMappings.dispatchTraits.workDim; - if (NEO::isValidOffset(dataOffset)) { - auto destinationBuffer = ArrayRef(crossThreadData.get(), crossThreadDataSize); - uint32_t workDim = 1; - if (groupCountZ * groupSize[2] > 1) { - workDim = 3; - } else if (groupCountY * groupSize[1] > 1) { - workDim = 2; - } - NEO::patchNonPointer(destinationBuffer, kernelDescriptor.payloadMappings.dispatchTraits.workDim, workDim); - } -} - Kernel *Kernel::create(uint32_t productFamily, Module *module, const ze_kernel_desc_t *desc, ze_result_t *res) { UNRECOVERABLE_IF(productFamily >= IGFX_MAX_PRODUCT); diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 72868b5a18..c3beaf2b02 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -127,8 +127,6 @@ struct KernelImp : Kernel { ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) override; void patchGlobalOffset() override; - void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override; - ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override; bool usesRayTracing() { return kernelImmData->getDescriptor().hasRTCalls(); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index faa1b5f8a1..8077fd6f76 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1929,8 +1929,6 @@ HWTEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadD createKernel(kernel.get()); kernel->setCrossThreadData(sizeof(uint32_t)); - kernel->patchWorkDim(1, 1, 1); - mockKernelImmData->mockKernelDescriptor->payloadMappings.dispatchTraits.workDim = 0x0u; auto destinationBuffer = ArrayRef(kernel->getCrossThreadData(), kernel->getCrossThreadDataSize()); @@ -1950,8 +1948,7 @@ HWTEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadD for (auto &[groupSizeX, groupSizeY, groupSizeZ, groupCountX, groupCountY, groupCountZ, expectedWorkDim] : sizesCountsWorkDim) { ze_result_t res = kernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - kernel->patchWorkDim(groupCountX, groupCountY, groupCountZ); + kernel->setGroupCount(groupCountX, groupCountY, groupCountZ); EXPECT_EQ(*workDimInCrossThreadDataPtr, expectedWorkDim); } }