mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Correct programming of implicit args on pre-XeHp platforms
On pre-XeHp platforms implicit args aren't at the beginning of indirect data, GPU address of implicit args buffer is programmed within cross thread data Related-To: NEO-5081, IGC-4710 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
06a4d2cc02
commit
a2386ad216
@@ -660,7 +660,20 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhenAppendingThenMiMathCommandsForWorkGroupCountAndGlobalWorkSizeAndWorkDimAreProgrammed) {
|
||||
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
||||
|
||||
template <typename FamilyType>
|
||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {
|
||||
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo());
|
||||
return implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
} else {
|
||||
return 0u;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWithImplicitArgsWhenAppendingThenMiMathCommandsForWorkGroupCountAndGlobalWorkSizeAndWorkDimAreProgrammed) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
@@ -669,12 +682,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
kernel.immutableData.crossThreadDataSize = sizeof(uint64_t);
|
||||
kernel.pImplicitArgs.reset(new ImplicitArgs());
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernel.immutableData.kernelDescriptor);
|
||||
|
||||
kernel.setGroupSize(1, 1, 1);
|
||||
|
||||
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo());
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
|
||||
@@ -688,7 +701,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT);
|
||||
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + getIndirectHeapOffsetForImplicitArgsBuffer<FamilyType>(kernel);
|
||||
|
||||
auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
workDimStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R0);
|
||||
workDimStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA);
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/helpers/per_thread_data.h"
|
||||
#include "shared/source/utilities/software_tags_manager.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -937,6 +938,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
expectedImplicitArgs.groupCountZ = 3;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void dispatchKernelWithImplicitArgs() {
|
||||
expectedImplicitArgs.globalSizeX = expectedImplicitArgs.localSizeX * expectedImplicitArgs.groupCountX;
|
||||
expectedImplicitArgs.globalSizeY = expectedImplicitArgs.localSizeY * expectedImplicitArgs.groupCountY;
|
||||
@@ -944,7 +946,8 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
|
||||
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernelDescriptor);
|
||||
kernelDescriptor->kernelAttributes.simdSize = expectedImplicitArgs.simdWidth;
|
||||
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0];
|
||||
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1];
|
||||
@@ -957,6 +960,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
kernel->initialize(&kernelDesc);
|
||||
kernel->kernelRequiresGenerationOfLocalIdsByRuntime = kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
kernel->requiredWorkgroupOrder = requiredWorkgroupOrder;
|
||||
kernel->setCrossThreadData(sizeof(uint64_t));
|
||||
|
||||
EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
|
||||
ASSERT_NE(nullptr, kernel->getImplicitArgs());
|
||||
@@ -982,7 +986,9 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
|
||||
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize);
|
||||
|
||||
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
||||
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
||||
}
|
||||
expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
|
||||
}
|
||||
std::unique_ptr<L0::CommandList> commandList;
|
||||
@@ -994,7 +1000,8 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
|
||||
uint32_t requiredWorkgroupOrder = 0;
|
||||
};
|
||||
HWTEST_F(CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreSentToIndirectHeap) {
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) {
|
||||
std::array<uint16_t, 3> localSize{2, 3, 4};
|
||||
size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2];
|
||||
|
||||
@@ -1002,7 +1009,7 @@ HWTEST_F(CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicit
|
||||
expectedImplicitArgs.localSizeY = localSize[1];
|
||||
expectedImplicitArgs.localSizeZ = localSize[2];
|
||||
|
||||
dispatchKernelWithImplicitArgs();
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF));
|
||||
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
|
||||
@@ -1017,7 +1024,20 @@ HWTEST_F(CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicit
|
||||
auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize);
|
||||
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
|
||||
}
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) {
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenPreXeHpPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) {
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer();
|
||||
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
|
||||
|
||||
auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80);
|
||||
|
||||
auto programmedImplicitArgsGpuVA = reinterpret_cast<uint64_t *>(crossThreadDataInIndirectData)[0];
|
||||
EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformAndHwGeneratedLocalIdsWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsLocalIdsRespectWalkOrder) {
|
||||
workgroupDimOrder[0] = 2;
|
||||
workgroupDimOrder[1] = 1;
|
||||
workgroupDimOrder[2] = 0;
|
||||
@@ -1034,7 +1054,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
expectedImplicitArgs.localSizeY = localSize[1];
|
||||
expectedImplicitArgs.localSizeZ = localSize[2];
|
||||
|
||||
dispatchKernelWithImplicitArgs();
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF));
|
||||
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
|
||||
@@ -1050,7 +1070,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsWhenAppendLaunchKernelWithSimd1ThenLocalIdsAreGeneratedCorrectly) {
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsAndSimd1ThenLocalIdsAreGeneratedCorrectly) {
|
||||
workgroupDimOrder[0] = 2;
|
||||
workgroupDimOrder[1] = 1;
|
||||
workgroupDimOrder[2] = 0;
|
||||
@@ -1060,7 +1080,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
expectedImplicitArgs.localSizeY = 2;
|
||||
expectedImplicitArgs.localSizeZ = 1;
|
||||
|
||||
dispatchKernelWithImplicitArgs();
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
uint16_t expectedLocalIds[][3] = {{0, 0, 0},
|
||||
{0, 1, 0},
|
||||
|
||||
Reference in New Issue
Block a user