mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Correct dimension order in local ids generated for implicit args
when local ids are generated by HW, use same dim order for runtime generation move common logic to separated file Related-To: NEO-5081 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
ca5a8162eb
commit
b697d75695
@ -1261,7 +1261,9 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeapAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableHwGenerationLocalIds.set(0);
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
uint32_t simd = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
|
||||
@ -1330,6 +1332,74 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi
|
||||
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndHwLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
uint32_t simd = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0;
|
||||
|
||||
MockContext context(pClDevice);
|
||||
CommandQueueHw<FamilyType> cmdQ(&context, pClDevice, 0, false);
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
|
||||
MockKernel kernel(&program, *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
const size_t localWorkSizeX = 2;
|
||||
const size_t localWorkSizeY = 3;
|
||||
const size_t localWorkSizeZ = 4;
|
||||
const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ};
|
||||
std::array<uint8_t, 3> expectedDimOrder = {0, 2, 1};
|
||||
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
auto pWalkerCmd = reinterpret_cast<typename FamilyType::WALKER_TYPE *>(commandStream.getSpace(0));
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192);
|
||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192);
|
||||
|
||||
dsh.align(EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
auto interfaceDescriptor = reinterpret_cast<typename FamilyType::INTERFACE_DESCRIPTOR_DATA *>(dsh.getSpace(0));
|
||||
uint32_t interfaceDescriptorIndex = 0u;
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
0u,
|
||||
simd,
|
||||
localWorkSizes,
|
||||
0u,
|
||||
interfaceDescriptorIndex,
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
interfaceDescriptor,
|
||||
false,
|
||||
*pDevice);
|
||||
|
||||
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, 3u, localWorkSize);
|
||||
ASSERT_LE(expectedIohSize, ioh.getUsed());
|
||||
|
||||
auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
|
||||
generateLocalIDs(expectedLocalIds, simd,
|
||||
std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
|
||||
expectedDimOrder,
|
||||
false, grfSize);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
|
||||
alignedFree(expectedLocalIds);
|
||||
|
||||
auto pImplicitArgs = reinterpret_cast<ImplicitArgs *>(ptrOffset(ioh.getCpuBase(), alignUp(expectedIohSize, MemoryConstants::cacheLineSize)));
|
||||
EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr);
|
||||
}
|
||||
|
||||
using HardwareCommandsTestXeHpAndLater = HardwareCommandsTest;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) {
|
||||
|
Reference in New Issue
Block a user