mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Correct programming of implicit args on pre-XeHp platforms
On pre-XeHp platforms implicit args aren't at the beginning of indirect data, GPU address of implicit args buffer is programmed within cross thread data Related-To: NEO-5081, IGC-4710 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
06a4d2cc02
commit
a2386ad216
@ -1566,7 +1566,7 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
MockKernel kernelWithoutImplicitArgs(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernelWithoutImplicitArgs.initialize());
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
|
||||
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
|
||||
|
||||
@ -1612,11 +1612,9 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
auto simdSize = kernelInfo.getMaxSimdSize();
|
||||
uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
auto grfSizeForImplicitArgs = ImplicitArgsHelper::getGrfSize(simdSize, grfSize);
|
||||
auto size = kernelWithImplicitArgs.getCrossThreadDataSize() +
|
||||
HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, Math::computeTotalElementsCount(localWorkgroupSize)) +
|
||||
sizeof(ImplicitArgs) +
|
||||
alignUp(HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSizeForImplicitArgs, 3u, Math::computeTotalElementsCount(localWorkgroupSize)), MemoryConstants::cacheLineSize);
|
||||
ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), *defaultHwInfo);
|
||||
|
||||
size = alignUp(size, MemoryConstants::cacheLineSize);
|
||||
EXPECT_EQ(size, iohSizeWithImplicitArgs);
|
||||
|
@ -633,6 +633,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
|
||||
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
|
||||
*mockKernel.mockKernel->pImplicitArgs = {};
|
||||
|
||||
@ -678,7 +679,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWithStringMapDisbaledAndI
|
||||
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
|
||||
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
|
||||
*mockKernel.mockKernel->pImplicitArgs = {};
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
@ -1240,7 +1241,7 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
|
||||
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = expectedImplicitArgs.simdWidth;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(pKernelInfo->kernelDescriptor);
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0];
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1];
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = workgroupDimOrder[2];
|
||||
@ -1254,6 +1255,8 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
|
||||
|
||||
ASSERT_NE(nullptr, pImplicitArgs);
|
||||
|
||||
kernel.setCrossThreadData(nullptr, sizeof(uint64_t));
|
||||
|
||||
kernel.setWorkDim(expectedImplicitArgs.numWorkDim);
|
||||
kernel.setLocalWorkSizeValues(expectedImplicitArgs.localSizeX, expectedImplicitArgs.localSizeY, expectedImplicitArgs.localSizeZ);
|
||||
kernel.setGlobalWorkSizeValues(static_cast<uint32_t>(expectedImplicitArgs.globalSizeX), static_cast<uint32_t>(expectedImplicitArgs.globalSizeY), static_cast<uint32_t>(expectedImplicitArgs.globalSizeZ));
|
||||
@ -1272,7 +1275,9 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
|
||||
|
||||
EXPECT_LE(implicitArgsProgrammingSize, indirectHeap.getUsed());
|
||||
|
||||
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
||||
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
||||
}
|
||||
}
|
||||
|
||||
ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)};
|
||||
@ -1281,7 +1286,7 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
|
||||
uint32_t implicitArgsProgrammingSize = 0u;
|
||||
};
|
||||
|
||||
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingCrossThreadDataThenImplicitArgsAreSetAtTheBeginningOfIndirectData) {
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenXeHpAndLaterPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) {
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
@ -1289,6 +1294,18 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
|
||||
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsImplicitArgsTests, givenPreXeHpPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) {
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer();
|
||||
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
|
||||
|
||||
auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80);
|
||||
|
||||
auto programmedImplicitArgsGpuVA = reinterpret_cast<uint64_t *>(crossThreadDataInIndirectData)[0];
|
||||
EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableHwGenerationLocalIds.set(0);
|
||||
|
Reference in New Issue
Block a user