Correct dimension order in local ids generated for implicit args

when local ids are generated by HW, use same dim order for runtime generation
move common logic to separated file

Related-To: NEO-5081
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2022-02-03 17:00:25 +00:00 committed by Compute-Runtime-Automation
parent ca5a8162eb
commit b697d75695
15 changed files with 282 additions and 35 deletions

View File

@ -1028,19 +1028,17 @@ void KernelImp::patchImplicitArgs(void *&pOut) const {
if (!pImplicitArgs) {
return;
}
const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor();
const auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
auto dimensionOrder = NEO::ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, kernelRequiresGenerationOfLocalIdsByRuntime, requiredWorkgroupOrder);
NEO::generateLocalIDs(
pOut,
static_cast<uint16_t>(kernelDescriptor.kernelAttributes.simdSize),
static_cast<uint16_t>(kernelAttributes.simdSize),
std::array<uint16_t, 3>{{static_cast<uint16_t>(groupSize[0]),
static_cast<uint16_t>(groupSize[1]),
static_cast<uint16_t>(groupSize[2])}},
std::array<uint8_t, 3>{{
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2],
}},
dimensionOrder,
false, grfSize);
auto sizeForLocalIdsProgramming = getSizeForImplicitArgsPatching() - sizeof(NEO::ImplicitArgs);
pOut = ptrOffset(pOut, sizeForLocalIdsProgramming);

View File

@ -122,7 +122,9 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
using KernelImp::crossThreadDataSize;
using KernelImp::kernelArgHandlers;
using KernelImp::kernelHasIndirectAccess;
using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
using KernelImp::privateMemoryGraphicsAllocation;
using KernelImp::requiredWorkgroupOrder;
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
}

View File

@ -998,6 +998,85 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsWhenAppendLa
alignedFree(expectedLocalIds);
}
HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
auto simd = kernelDescriptor->kernelAttributes.simdSize;
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = 2;
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = 1;
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[2] = 0;
createModuleFromBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernel->initialize(&kernelDesc);
kernel->kernelRequiresGenerationOfLocalIdsByRuntime = false;
kernel->requiredWorkgroupOrder = 2; // walk order 1 0 2
EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
ASSERT_NE(nullptr, kernel->getImplicitArgs());
kernel->setGroupSize(4, 5, 6);
kernel->setGroupCount(3, 2, 1);
kernel->setGlobalOffsetExp(1, 2, 3);
kernel->patchGlobalOffset();
ze_result_t result{};
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
memset(indirectHeap->getSpace(0), 0, kernel->getSizeForImplicitArgsPatching());
ze_group_count_t groupCount{3, 2, 1};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + kernel->getSizeForImplicitArgsPatching());
ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)};
expectedImplicitArgs.numWorkDim = 3;
expectedImplicitArgs.simdWidth = simd;
expectedImplicitArgs.localSizeX = 4;
expectedImplicitArgs.localSizeY = 5;
expectedImplicitArgs.localSizeZ = 6;
expectedImplicitArgs.globalSizeX = 12;
expectedImplicitArgs.globalSizeY = 10;
expectedImplicitArgs.globalSizeZ = 6;
expectedImplicitArgs.globalOffsetX = 1;
expectedImplicitArgs.globalOffsetY = 2;
expectedImplicitArgs.globalOffsetZ = 3;
expectedImplicitArgs.groupCountX = 3;
expectedImplicitArgs.groupCountY = 2;
expectedImplicitArgs.groupCountZ = 1;
expectedImplicitArgs.localIdTablePtr = indirectHeap->getGraphicsAllocation()->getGpuAddress();
expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
auto sizeForImplicitArgPatching = kernel->getSizeForImplicitArgsPatching();
EXPECT_LT(0u, sizeForImplicitArgPatching);
auto localIdsProgrammingSize = sizeForImplicitArgPatching - sizeof(ImplicitArgs);
auto expectedLocalIds = alignedMalloc(localIdsProgrammingSize, 64);
memset(expectedLocalIds, 0, localIdsProgrammingSize);
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
NEO::generateLocalIDs(expectedLocalIds, simd,
std::array<uint16_t, 3>{{4, 5, 6}},
std::array<uint8_t, 3>{{1, 0, 2}},
false, grfSize);
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeap->getCpuBase(), localIdsProgrammingSize));
auto pImplicitArgs = reinterpret_cast<ImplicitArgs *>(ptrOffset(indirectHeap->getCpuBase(), localIdsProgrammingSize));
EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs)));
alignedFree(expectedLocalIds);
}
HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreNotSentToIndirectHeap) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;

View File

@ -219,15 +219,28 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
auto pImplicitArgs = kernel.getImplicitArgs();
if (pImplicitArgs) {
constexpr uint32_t grfSize = sizeof(typename GfxFamily::GRF);
const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
uint32_t requiredWalkOrder = 0u;
auto generationOfLocalIdsByRuntime = EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(
3,
localWorkSize,
std::array<uint8_t, 3>{
{kernelAttributes.workgroupWalkOrder[0],
kernelAttributes.workgroupWalkOrder[1],
kernelAttributes.workgroupWalkOrder[2]}},
kernelAttributes.flags.requiresWorkgroupWalkOrder,
requiredWalkOrder,
simd);
auto dimensionOrder = ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, generationOfLocalIdsByRuntime, requiredWalkOrder);
auto offsetLocalIds = sendPerThreadData(
ioh,
simd,
grfSize,
3u, // all channels for implicit args
std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSize[0]), static_cast<uint16_t>(localWorkSize[1]), static_cast<uint16_t>(localWorkSize[2])}},
{{kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}},
dimensionOrder,
kernel.usesOnlyImages());
pImplicitArgs->localIdTablePtr = offsetLocalIds + ioh.getGraphicsAllocation()->getGpuAddress();

View File

@ -1261,7 +1261,9 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeapAllocation);
}
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableHwGenerationLocalIds.set(0);
auto pKernelInfo = std::make_unique<MockKernelInfo>();
uint32_t simd = 32;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
@ -1330,6 +1332,74 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndHwLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
auto pKernelInfo = std::make_unique<MockKernelInfo>();
uint32_t simd = 32;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2;
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0;
MockContext context(pClDevice);
CommandQueueHw<FamilyType> cmdQ(&context, pClDevice, 0, false);
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
MockKernel kernel(&program, *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
const size_t localWorkSizeX = 2;
const size_t localWorkSizeY = 3;
const size_t localWorkSizeZ = 4;
const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ};
std::array<uint8_t, 3> expectedDimOrder = {0, 2, 1};
auto &commandStream = cmdQ.getCS(1024);
auto pWalkerCmd = reinterpret_cast<typename FamilyType::WALKER_TYPE *>(commandStream.getSpace(0));
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192);
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192);
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192);
dsh.align(EncodeStates<FamilyType>::alignInterfaceDescriptorData);
auto interfaceDescriptor = reinterpret_cast<typename FamilyType::INTERFACE_DESCRIPTOR_DATA *>(dsh.getSpace(0));
uint32_t interfaceDescriptorIndex = 0u;
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
kernel,
0u,
simd,
localWorkSizes,
0u,
interfaceDescriptorIndex,
pDevice->getPreemptionMode(),
pWalkerCmd,
interfaceDescriptor,
false,
*pDevice);
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, 3u, localWorkSize);
ASSERT_LE(expectedIohSize, ioh.getUsed());
auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
generateLocalIDs(expectedLocalIds, simd,
std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
expectedDimOrder,
false, grfSize);
EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
alignedFree(expectedLocalIds);
auto pImplicitArgs = reinterpret_cast<ImplicitArgs *>(ptrOffset(ioh.getCpuBase(), alignUp(expectedIohSize, MemoryConstants::cacheLineSize)));
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
}
using HardwareCommandsTestXeHpAndLater = HardwareCommandsTest;
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) {

View File

@ -64,7 +64,7 @@ struct EncodeDispatchKernel {
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
size_t *lws,
const size_t *lws,
std::array<uint8_t, 3> walkOrder,
bool requireInputWalkOrder,
uint32_t &requiredWalkOrder,

View File

@ -263,7 +263,7 @@ void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &contai
template <typename Family>
inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
size_t *lws,
const size_t *lws,
std::array<uint8_t, 3> walkOrder,
bool requireInputWalkOrder,
uint32_t &requiredWalkOrder,

View File

@ -19,6 +19,7 @@
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_walk_order.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
@ -298,7 +299,7 @@ inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const Har
template <typename Family>
bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
size_t *lws,
const size_t *lws,
std::array<uint8_t, 3> walkOrder,
bool requireInputWalkOrder,
uint32_t &requiredWalkOrder,
@ -324,18 +325,6 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
return true;
}
//make sure table below matches Hardware Spec
constexpr uint32_t walkOrderPossibilties = 6u;
constexpr uint8_t X = 0;
constexpr uint8_t Y = 1;
constexpr uint8_t Z = 2;
constexpr uint8_t possibleWalkOrders[walkOrderPossibilties][3] = {{X, Y, Z}, // 0 1 2
{X, Z, Y}, // 0 2 1
{Y, X, Z}, // 1 0 2
{Z, X, Y}, // 1 2 0
{Y, Z, X}, // 2 0 1
{Z, Y, X}}; // 2 1 0
//check if we need to follow kernel requirements
if (requireInputWalkOrder) {
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
@ -345,24 +334,24 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
}
auto index = 0u;
while (index < walkOrderPossibilties) {
if (walkOrder[0] == possibleWalkOrders[index][0] &&
walkOrder[1] == possibleWalkOrders[index][1]) {
while (index < HwWalkOrderHelper::walkOrderPossibilties) {
if (walkOrder[0] == HwWalkOrderHelper::compatibleDimensionOrders[index][0] &&
walkOrder[1] == HwWalkOrderHelper::compatibleDimensionOrders[index][1]) {
break;
};
index++;
}
DEBUG_BREAK_IF(index >= walkOrderPossibilties);
DEBUG_BREAK_IF(index >= HwWalkOrderHelper::walkOrderPossibilties);
requiredWalkOrder = index;
return false;
}
//kernel doesn't specify any walk order requirements, check if we have any compatible
for (uint32_t walkOrder = 0; walkOrder < walkOrderPossibilties; walkOrder++) {
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
bool allDimensionsCompatible = true;
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
if (!Math::isPow2<size_t>(lws[possibleWalkOrders[walkOrder][dimension]])) {
if (!Math::isPow2<size_t>(lws[HwWalkOrderHelper::compatibleDimensionOrders[walkOrder][dimension]])) {
allDimensionsCompatible = false;
break;
}

View File

@ -74,6 +74,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}hw_info_extended.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_walk_order.h
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <array>
#include <cstdint>
namespace NEO {
namespace HwWalkOrderHelper {
//make sure table below matches Hardware Spec
constexpr uint32_t walkOrderPossibilties = 6u;
constexpr uint8_t X = 0;
constexpr uint8_t Y = 1;
constexpr uint8_t Z = 2;
constexpr std::array<uint8_t, 3> compatibleDimensionOrders[walkOrderPossibilties] = {{X, Y, Z}, // 0 1 2
{X, Z, Y}, // 0 2 1
{Y, X, Z}, // 1 0 2
{Z, X, Y}, // 1 2 0
{Y, Z, X}, // 2 0 1
{Z, Y, X}}; // 2 1 0
} // namespace HwWalkOrderHelper
} // namespace NEO

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2021 Intel Corporation
# Copyright (C) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -10,6 +10,7 @@ set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_kernel_encoder_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args.h
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -7,6 +7,7 @@
#pragma once
#include <array>
#include <cstdint>
#include <type_traits>
@ -36,4 +37,8 @@ static_assert((sizeof(ImplicitArgs) & 31) == 0, "Implicit args size need to be a
static_assert(std::is_pod<ImplicitArgs>::value);
constexpr const char *implicitArgsRelocationSymbolName = "INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0";
namespace ImplicitArgsHelper {
std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds);
}
} // namespace NEO

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/hw_walk_order.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/source/kernel/kernel_descriptor.h"
namespace NEO {
std::array<uint8_t, 3> ImplicitArgsHelper::getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds) {
if (generationOfLocalIdsByRuntime) {
UNRECOVERABLE_IF(!workgroupDimensionsOrder);
return {{
workgroupDimensionsOrder[0],
workgroupDimensionsOrder[1],
workgroupDimensionsOrder[2],
}};
}
UNRECOVERABLE_IF(walkOrderForHwGenerationOfLocalIds >= HwWalkOrderHelper::walkOrderPossibilties);
return HwWalkOrderHelper::compatibleDimensionOrders[walkOrderForHwGenerationOfLocalIds];
}
} // namespace NEO

View File

@ -1,11 +1,12 @@
#
# Copyright (C) 2020-2021 Intel Corporation
# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp

View File

@ -0,0 +1,34 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/hw_walk_order.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByRuntimeThenDimensionOrderIsTakedFromInput) {
for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) {
uint8_t inputDimensionOrder[3] = {2, 0, 1};
auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(inputDimensionOrder, true, i);
EXPECT_EQ(inputDimensionOrder[0], dimOrderForImplicitArgs[0]);
EXPECT_EQ(inputDimensionOrder[1], dimOrderForImplicitArgs[1]);
EXPECT_EQ(inputDimensionOrder[2], dimOrderForImplicitArgs[2]);
}
}
TEST(ImplicitArgsHelperTest, givenIncorrectcInputWhenGettingDimensionOrderThenAbortIsCalled) {
EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, true, 0), std::runtime_error);
EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, HwWalkOrderHelper::walkOrderPossibilties), std::runtime_error);
}
TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByHwThenProperDimensionOrderIsReturned) {
for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) {
auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, i);
EXPECT_EQ(HwWalkOrderHelper::compatibleDimensionOrders[i], dimOrderForImplicitArgs);
}
}