Correct programming of implicit args on pre-XeHp platforms

On pre-XeHp platforms implicit args aren't at the beginning of indirect data,
GPU address of implicit args buffer is programmed within cross thread data

Related-To: NEO-5081, IGC-4710
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-02-23 13:27:24 +00:00
committed by Compute-Runtime-Automation
parent 06a4d2cc02
commit a2386ad216
12 changed files with 230 additions and 61 deletions

View File

@ -1566,7 +1566,7 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
MockKernel kernelWithoutImplicitArgs(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernelWithoutImplicitArgs.initialize());
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
@ -1612,11 +1612,9 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
auto simdSize = kernelInfo.getMaxSimdSize();
uint32_t grfSize = sizeof(typename FamilyType::GRF);
auto grfSizeForImplicitArgs = ImplicitArgsHelper::getGrfSize(simdSize, grfSize);
auto size = kernelWithImplicitArgs.getCrossThreadDataSize() +
HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, Math::computeTotalElementsCount(localWorkgroupSize)) +
sizeof(ImplicitArgs) +
alignUp(HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSizeForImplicitArgs, 3u, Math::computeTotalElementsCount(localWorkgroupSize)), MemoryConstants::cacheLineSize);
ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), *defaultHwInfo);
size = alignUp(size, MemoryConstants::cacheLineSize);
EXPECT_EQ(size, iohSizeWithImplicitArgs);

View File

@ -633,6 +633,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
*mockKernel.mockKernel->pImplicitArgs = {};
@ -678,7 +679,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWithStringMapDisbaledAndI
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
*mockKernel.mockKernel->pImplicitArgs = {};

View File

@ -14,6 +14,7 @@
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
@ -1240,7 +1241,7 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = expectedImplicitArgs.simdWidth;
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(pKernelInfo->kernelDescriptor);
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0];
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1];
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = workgroupDimOrder[2];
@ -1254,6 +1255,8 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
ASSERT_NE(nullptr, pImplicitArgs);
kernel.setCrossThreadData(nullptr, sizeof(uint64_t));
kernel.setWorkDim(expectedImplicitArgs.numWorkDim);
kernel.setLocalWorkSizeValues(expectedImplicitArgs.localSizeX, expectedImplicitArgs.localSizeY, expectedImplicitArgs.localSizeZ);
kernel.setGlobalWorkSizeValues(static_cast<uint32_t>(expectedImplicitArgs.globalSizeX), static_cast<uint32_t>(expectedImplicitArgs.globalSizeY), static_cast<uint32_t>(expectedImplicitArgs.globalSizeZ));
@ -1272,7 +1275,9 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
EXPECT_LE(implicitArgsProgrammingSize, indirectHeap.getUsed());
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
}
}
ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)};
@ -1281,7 +1286,7 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
uint32_t implicitArgsProgrammingSize = 0u;
};
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingCrossThreadDataThenImplicitArgsAreSetAtTheBeginningOfIndirectData) {
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenXeHpAndLaterPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) {
dispatchKernelWithImplicitArgs<FamilyType>();
auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
@ -1289,6 +1294,18 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsImplicitArgsTests, givenPreXeHpPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) {
dispatchKernelWithImplicitArgs<FamilyType>();
auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer();
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80);
auto programmedImplicitArgsGpuVA = reinterpret_cast<uint64_t *>(crossThreadDataInIndirectData)[0];
EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableHwGenerationLocalIds.set(0);