Bindless addressing support for OCL

Related-To: NEO-4607

Change-Id: Iaf4a8d45f22d134366e398a196bdd8dc906ab6ab
Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2020-04-21 22:40:21 +02:00
committed by sys_ocldev
parent e01b9e0c86
commit 6e79105466
23 changed files with 519 additions and 32 deletions

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
@ -207,3 +209,67 @@ TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpect
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelArgBufferTest, givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessBuffers.set(1);
pKernelInfo->usesSsh = true;
pKernelInfo->requiresSshForBuffers = true;
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset;
pKernelInfo->kernelArgInfo[0].offsetHeap = 64;
pKernelInfo->kernelArgInfo[0].isBuffer = true;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
auto expectedOffset = (sshOffset + pKernelInfo->kernelArgInfo[0].offsetHeap) << 6;
EXPECT_EQ(expectedOffset, *patchLocation);
sshOffset = static_cast<uint32_t>(maxNBitValue(20)) - 64;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
expectedOffset = (sshOffset + pKernelInfo->kernelArgInfo[0].offsetHeap) << 6;
EXPECT_EQ(expectedOffset, *patchLocation);
}
TEST_F(KernelArgBufferTest, givenUsedBindlessBuffersAndNonBufferArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessBuffers.set(1);
pKernelInfo->usesSsh = true;
pKernelInfo->requiresSshForBuffers = true;
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset;
pKernelInfo->kernelArgInfo[0].offsetHeap = 64;
pKernelInfo->kernelArgInfo[0].isBuffer = false;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
EXPECT_EQ(0xdeadu, *patchLocation);
}
TEST_F(KernelArgBufferTest, givenNotUsedBindlessBuffersAndBufferArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessBuffers.set(false);
DebugManager.flags.UseBindlessImages.set(true);
pKernelInfo->usesSsh = true;
pKernelInfo->requiresSshForBuffers = true;
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset;
pKernelInfo->kernelArgInfo[0].offsetHeap = 64;
pKernelInfo->kernelArgInfo[0].isBuffer = true;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
EXPECT_EQ(0xdeadu, *patchLocation);
}

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/helpers/ptr_math.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/helpers/memory_properties_flags_helpers.h"
#include "opencl/source/kernel/kernel.h"
@ -324,3 +325,85 @@ TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAlloc
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelImageArgTest, givenUsedBindlessImagesWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessImages.set(1);
pKernelInfo->usesSsh = true;
for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) {
pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast<uint32_t>(4 * i);
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
}
pKernelInfo->kernelArgInfo[pKernelInfo->kernelArgInfo.size() - 1].isImage = false;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) {
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
if (pKernelInfo->kernelArgInfo[i].isImage) {
auto expectedOffset = (sshOffset + pKernelInfo->kernelArgInfo[i].offsetHeap) << 6;
EXPECT_EQ(expectedOffset, *patchLocation);
} else {
EXPECT_EQ(0xdeadu, *patchLocation);
}
}
}
TEST_F(KernelImageArgTest, givenUsedBindlessImagesAndNonImageArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessImages.set(1);
pKernelInfo->usesSsh = true;
for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) {
pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast<uint32_t>(4 * i);
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
}
int nonImageIndex = 1;
pKernelInfo->kernelArgInfo[nonImageIndex].isImage = false;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[nonImageIndex].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
EXPECT_EQ(0xdeadu, *patchLocation);
}
TEST_F(KernelImageArgTest, givenNotUsedBindlessImagesAndImageArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseBindlessImages.set(false);
DebugManager.flags.UseBindlessBuffers.set(true);
pKernelInfo->usesSsh = true;
for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) {
pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast<uint32_t>(4 * i);
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
*patchLocation = 0xdead;
}
int nonImageIndex = 1;
pKernelInfo->kernelArgInfo[nonImageIndex].isImage = true;
uint32_t sshOffset = 4000;
pKernel->patchBindlessSurfaceStateOffsets(sshOffset);
auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[nonImageIndex].kernelArgPatchInfoVector[0].crossthreadOffset;
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset));
EXPECT_EQ(0xdeadu, *patchLocation);
}