Correct dimension order in local ids generated for implicit args
when local ids are generated by HW, use same dim order for runtime generation move common logic to separated file Related-To: NEO-5081 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
ca5a8162eb
commit
b697d75695
|
@ -1028,19 +1028,17 @@ void KernelImp::patchImplicitArgs(void *&pOut) const {
|
|||
if (!pImplicitArgs) {
|
||||
return;
|
||||
}
|
||||
const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor();
|
||||
const auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
|
||||
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
|
||||
auto dimensionOrder = NEO::ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, kernelRequiresGenerationOfLocalIdsByRuntime, requiredWorkgroupOrder);
|
||||
|
||||
NEO::generateLocalIDs(
|
||||
pOut,
|
||||
static_cast<uint16_t>(kernelDescriptor.kernelAttributes.simdSize),
|
||||
static_cast<uint16_t>(kernelAttributes.simdSize),
|
||||
std::array<uint16_t, 3>{{static_cast<uint16_t>(groupSize[0]),
|
||||
static_cast<uint16_t>(groupSize[1]),
|
||||
static_cast<uint16_t>(groupSize[2])}},
|
||||
std::array<uint8_t, 3>{{
|
||||
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
|
||||
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
|
||||
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2],
|
||||
}},
|
||||
dimensionOrder,
|
||||
false, grfSize);
|
||||
auto sizeForLocalIdsProgramming = getSizeForImplicitArgsPatching() - sizeof(NEO::ImplicitArgs);
|
||||
pOut = ptrOffset(pOut, sizeForLocalIdsProgramming);
|
||||
|
|
|
@ -122,7 +122,9 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
|||
using KernelImp::crossThreadDataSize;
|
||||
using KernelImp::kernelArgHandlers;
|
||||
using KernelImp::kernelHasIndirectAccess;
|
||||
using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using KernelImp::privateMemoryGraphicsAllocation;
|
||||
using KernelImp::requiredWorkgroupOrder;
|
||||
|
||||
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
|
||||
}
|
||||
|
|
|
@ -998,6 +998,85 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsWhenAppendLa
|
|||
|
||||
alignedFree(expectedLocalIds);
|
||||
}
|
||||
HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) {
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
|
||||
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
auto simd = kernelDescriptor->kernelAttributes.simdSize;
|
||||
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = 2;
|
||||
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = 1;
|
||||
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[2] = 0;
|
||||
createModuleFromBinary(0u, false, mockKernelImmData.get());
|
||||
|
||||
auto kernel = std::make_unique<MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||
kernel->initialize(&kernelDesc);
|
||||
kernel->kernelRequiresGenerationOfLocalIdsByRuntime = false;
|
||||
kernel->requiredWorkgroupOrder = 2; // walk order 1 0 2
|
||||
|
||||
EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
|
||||
ASSERT_NE(nullptr, kernel->getImplicitArgs());
|
||||
|
||||
kernel->setGroupSize(4, 5, 6);
|
||||
kernel->setGroupCount(3, 2, 1);
|
||||
kernel->setGlobalOffsetExp(1, 2, 3);
|
||||
kernel->patchGlobalOffset();
|
||||
|
||||
ze_result_t result{};
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
|
||||
memset(indirectHeap->getSpace(0), 0, kernel->getSizeForImplicitArgsPatching());
|
||||
|
||||
ze_group_count_t groupCount{3, 2, 1};
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
|
||||
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + kernel->getSizeForImplicitArgsPatching());
|
||||
|
||||
ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)};
|
||||
expectedImplicitArgs.numWorkDim = 3;
|
||||
expectedImplicitArgs.simdWidth = simd;
|
||||
expectedImplicitArgs.localSizeX = 4;
|
||||
expectedImplicitArgs.localSizeY = 5;
|
||||
expectedImplicitArgs.localSizeZ = 6;
|
||||
expectedImplicitArgs.globalSizeX = 12;
|
||||
expectedImplicitArgs.globalSizeY = 10;
|
||||
expectedImplicitArgs.globalSizeZ = 6;
|
||||
expectedImplicitArgs.globalOffsetX = 1;
|
||||
expectedImplicitArgs.globalOffsetY = 2;
|
||||
expectedImplicitArgs.globalOffsetZ = 3;
|
||||
expectedImplicitArgs.groupCountX = 3;
|
||||
expectedImplicitArgs.groupCountY = 2;
|
||||
expectedImplicitArgs.groupCountZ = 1;
|
||||
expectedImplicitArgs.localIdTablePtr = indirectHeap->getGraphicsAllocation()->getGpuAddress();
|
||||
expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
|
||||
|
||||
auto sizeForImplicitArgPatching = kernel->getSizeForImplicitArgsPatching();
|
||||
|
||||
EXPECT_LT(0u, sizeForImplicitArgPatching);
|
||||
|
||||
auto localIdsProgrammingSize = sizeForImplicitArgPatching - sizeof(ImplicitArgs);
|
||||
|
||||
auto expectedLocalIds = alignedMalloc(localIdsProgrammingSize, 64);
|
||||
memset(expectedLocalIds, 0, localIdsProgrammingSize);
|
||||
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
NEO::generateLocalIDs(expectedLocalIds, simd,
|
||||
std::array<uint16_t, 3>{{4, 5, 6}},
|
||||
std::array<uint8_t, 3>{{1, 0, 2}},
|
||||
false, grfSize);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeap->getCpuBase(), localIdsProgrammingSize));
|
||||
auto pImplicitArgs = reinterpret_cast<ImplicitArgs *>(ptrOffset(indirectHeap->getCpuBase(), localIdsProgrammingSize));
|
||||
EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs)));
|
||||
|
||||
alignedFree(expectedLocalIds);
|
||||
}
|
||||
HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreNotSentToIndirectHeap) {
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
|
||||
|
|
|
@ -219,15 +219,28 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||
auto pImplicitArgs = kernel.getImplicitArgs();
|
||||
if (pImplicitArgs) {
|
||||
constexpr uint32_t grfSize = sizeof(typename GfxFamily::GRF);
|
||||
const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
|
||||
uint32_t requiredWalkOrder = 0u;
|
||||
auto generationOfLocalIdsByRuntime = EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(
|
||||
3,
|
||||
localWorkSize,
|
||||
std::array<uint8_t, 3>{
|
||||
{kernelAttributes.workgroupWalkOrder[0],
|
||||
kernelAttributes.workgroupWalkOrder[1],
|
||||
kernelAttributes.workgroupWalkOrder[2]}},
|
||||
kernelAttributes.flags.requiresWorkgroupWalkOrder,
|
||||
requiredWalkOrder,
|
||||
simd);
|
||||
|
||||
auto dimensionOrder = ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, generationOfLocalIdsByRuntime, requiredWalkOrder);
|
||||
|
||||
auto offsetLocalIds = sendPerThreadData(
|
||||
ioh,
|
||||
simd,
|
||||
grfSize,
|
||||
3u, // all channels for implicit args
|
||||
std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSize[0]), static_cast<uint16_t>(localWorkSize[1]), static_cast<uint16_t>(localWorkSize[2])}},
|
||||
{{kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}},
|
||||
dimensionOrder,
|
||||
kernel.usesOnlyImages());
|
||||
|
||||
pImplicitArgs->localIdTablePtr = offsetLocalIds + ioh.getGraphicsAllocation()->getGpuAddress();
|
||||
|
|
|
@ -1261,7 +1261,9 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
|
|||
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeapAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableHwGenerationLocalIds.set(0);
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
uint32_t simd = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
|
||||
|
@ -1330,6 +1332,74 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
|
|||
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndHwLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
uint32_t simd = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0;
|
||||
|
||||
MockContext context(pClDevice);
|
||||
CommandQueueHw<FamilyType> cmdQ(&context, pClDevice, 0, false);
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
|
||||
MockKernel kernel(&program, *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
const size_t localWorkSizeX = 2;
|
||||
const size_t localWorkSizeY = 3;
|
||||
const size_t localWorkSizeZ = 4;
|
||||
const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ};
|
||||
std::array<uint8_t, 3> expectedDimOrder = {0, 2, 1};
|
||||
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
auto pWalkerCmd = reinterpret_cast<typename FamilyType::WALKER_TYPE *>(commandStream.getSpace(0));
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192);
|
||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192);
|
||||
|
||||
dsh.align(EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
auto interfaceDescriptor = reinterpret_cast<typename FamilyType::INTERFACE_DESCRIPTOR_DATA *>(dsh.getSpace(0));
|
||||
uint32_t interfaceDescriptorIndex = 0u;
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
0u,
|
||||
simd,
|
||||
localWorkSizes,
|
||||
0u,
|
||||
interfaceDescriptorIndex,
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
interfaceDescriptor,
|
||||
false,
|
||||
*pDevice);
|
||||
|
||||
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, 3u, localWorkSize);
|
||||
ASSERT_LE(expectedIohSize, ioh.getUsed());
|
||||
|
||||
auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
|
||||
generateLocalIDs(expectedLocalIds, simd,
|
||||
std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
|
||||
expectedDimOrder,
|
||||
false, grfSize);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
|
||||
alignedFree(expectedLocalIds);
|
||||
|
||||
auto pImplicitArgs = reinterpret_cast<ImplicitArgs *>(ptrOffset(ioh.getCpuBase(), alignUp(expectedIohSize, MemoryConstants::cacheLineSize)));
|
||||
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
|
||||
}
|
||||
|
||||
using HardwareCommandsTestXeHpAndLater = HardwareCommandsTest;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) {
|
||||
|
|
|
@ -64,7 +64,7 @@ struct EncodeDispatchKernel {
|
|||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
const size_t *lws,
|
||||
std::array<uint8_t, 3> walkOrder,
|
||||
bool requireInputWalkOrder,
|
||||
uint32_t &requiredWalkOrder,
|
||||
|
|
|
@ -263,7 +263,7 @@ void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &contai
|
|||
|
||||
template <typename Family>
|
||||
inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
const size_t *lws,
|
||||
std::array<uint8_t, 3> walkOrder,
|
||||
bool requireInputWalkOrder,
|
||||
uint32_t &requiredWalkOrder,
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_walk_order.h"
|
||||
#include "shared/source/helpers/pipe_control_args.h"
|
||||
#include "shared/source/helpers/pipeline_select_helper.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
|
@ -298,7 +299,7 @@ inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const Har
|
|||
|
||||
template <typename Family>
|
||||
bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
const size_t *lws,
|
||||
std::array<uint8_t, 3> walkOrder,
|
||||
bool requireInputWalkOrder,
|
||||
uint32_t &requiredWalkOrder,
|
||||
|
@ -324,18 +325,6 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
|||
return true;
|
||||
}
|
||||
|
||||
//make sure table below matches Hardware Spec
|
||||
constexpr uint32_t walkOrderPossibilties = 6u;
|
||||
constexpr uint8_t X = 0;
|
||||
constexpr uint8_t Y = 1;
|
||||
constexpr uint8_t Z = 2;
|
||||
constexpr uint8_t possibleWalkOrders[walkOrderPossibilties][3] = {{X, Y, Z}, // 0 1 2
|
||||
{X, Z, Y}, // 0 2 1
|
||||
{Y, X, Z}, // 1 0 2
|
||||
{Z, X, Y}, // 1 2 0
|
||||
{Y, Z, X}, // 2 0 1
|
||||
{Z, Y, X}}; // 2 1 0
|
||||
|
||||
//check if we need to follow kernel requirements
|
||||
if (requireInputWalkOrder) {
|
||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||
|
@ -345,24 +334,24 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
|||
}
|
||||
|
||||
auto index = 0u;
|
||||
while (index < walkOrderPossibilties) {
|
||||
if (walkOrder[0] == possibleWalkOrders[index][0] &&
|
||||
walkOrder[1] == possibleWalkOrders[index][1]) {
|
||||
while (index < HwWalkOrderHelper::walkOrderPossibilties) {
|
||||
if (walkOrder[0] == HwWalkOrderHelper::compatibleDimensionOrders[index][0] &&
|
||||
walkOrder[1] == HwWalkOrderHelper::compatibleDimensionOrders[index][1]) {
|
||||
break;
|
||||
};
|
||||
index++;
|
||||
}
|
||||
DEBUG_BREAK_IF(index >= walkOrderPossibilties);
|
||||
DEBUG_BREAK_IF(index >= HwWalkOrderHelper::walkOrderPossibilties);
|
||||
|
||||
requiredWalkOrder = index;
|
||||
return false;
|
||||
}
|
||||
|
||||
//kernel doesn't specify any walk order requirements, check if we have any compatible
|
||||
for (uint32_t walkOrder = 0; walkOrder < walkOrderPossibilties; walkOrder++) {
|
||||
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
|
||||
bool allDimensionsCompatible = true;
|
||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||
if (!Math::isPow2<size_t>(lws[possibleWalkOrders[walkOrder][dimension]])) {
|
||||
if (!Math::isPow2<size_t>(lws[HwWalkOrderHelper::compatibleDimensionOrders[walkOrder][dimension]])) {
|
||||
allDimensionsCompatible = false;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -74,6 +74,7 @@ set(NEO_CORE_HELPERS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}hw_info_extended.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_walk_order.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
namespace HwWalkOrderHelper {
|
||||
//make sure table below matches Hardware Spec
|
||||
constexpr uint32_t walkOrderPossibilties = 6u;
|
||||
constexpr uint8_t X = 0;
|
||||
constexpr uint8_t Y = 1;
|
||||
constexpr uint8_t Z = 2;
|
||||
constexpr std::array<uint8_t, 3> compatibleDimensionOrders[walkOrderPossibilties] = {{X, Y, Z}, // 0 1 2
|
||||
{X, Z, Y}, // 0 2 1
|
||||
{Y, X, Z}, // 1 0 2
|
||||
{Z, X, Y}, // 1 2 0
|
||||
{Y, Z, X}, // 2 0 1
|
||||
{Z, Y, X}}; // 2 1 0
|
||||
} // namespace HwWalkOrderHelper
|
||||
} // namespace NEO
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (C) 2019-2021 Intel Corporation
|
||||
# Copyright (C) 2019-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
@ -10,6 +10,7 @@ set(NEO_CORE_KERNEL
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_kernel_encoder_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
|
@ -36,4 +37,8 @@ static_assert((sizeof(ImplicitArgs) & 31) == 0, "Implicit args size need to be a
|
|||
static_assert(std::is_pod<ImplicitArgs>::value);
|
||||
|
||||
constexpr const char *implicitArgsRelocationSymbolName = "INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0";
|
||||
|
||||
namespace ImplicitArgsHelper {
|
||||
std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/hw_walk_order.h"
|
||||
#include "shared/source/kernel/implicit_args.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
std::array<uint8_t, 3> ImplicitArgsHelper::getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds) {
|
||||
if (generationOfLocalIdsByRuntime) {
|
||||
UNRECOVERABLE_IF(!workgroupDimensionsOrder);
|
||||
return {{
|
||||
workgroupDimensionsOrder[0],
|
||||
workgroupDimensionsOrder[1],
|
||||
workgroupDimensionsOrder[2],
|
||||
}};
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(walkOrderForHwGenerationOfLocalIds >= HwWalkOrderHelper::walkOrderPossibilties);
|
||||
return HwWalkOrderHelper::compatibleDimensionOrders[walkOrderForHwGenerationOfLocalIds];
|
||||
}
|
||||
} // namespace NEO
|
|
@ -1,11 +1,12 @@
|
|||
#
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/hw_walk_order.h"
|
||||
#include "shared/source/kernel/implicit_args.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByRuntimeThenDimensionOrderIsTakedFromInput) {
|
||||
for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) {
|
||||
uint8_t inputDimensionOrder[3] = {2, 0, 1};
|
||||
auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(inputDimensionOrder, true, i);
|
||||
EXPECT_EQ(inputDimensionOrder[0], dimOrderForImplicitArgs[0]);
|
||||
EXPECT_EQ(inputDimensionOrder[1], dimOrderForImplicitArgs[1]);
|
||||
EXPECT_EQ(inputDimensionOrder[2], dimOrderForImplicitArgs[2]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ImplicitArgsHelperTest, givenIncorrectcInputWhenGettingDimensionOrderThenAbortIsCalled) {
|
||||
EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, true, 0), std::runtime_error);
|
||||
EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, HwWalkOrderHelper::walkOrderPossibilties), std::runtime_error);
|
||||
}
|
||||
|
||||
TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByHwThenProperDimensionOrderIsReturned) {
|
||||
for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) {
|
||||
auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, i);
|
||||
EXPECT_EQ(HwWalkOrderHelper::compatibleDimensionOrders[i], dimOrderForImplicitArgs);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue