/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include "hw_cmds.h" #include using namespace NEO; typedef Test KernelArgBufferTest; TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg); delete buffer; } struct MultiDeviceKernelArgBufferTest : public ::testing::Test { void SetUp() override { ClDeviceVector devicesForContext; devicesForContext.push_back(deviceFactory.rootDevices[1]); devicesForContext.push_back(deviceFactory.subDevices[4]); devicesForContext.push_back(deviceFactory.subDevices[5]); pContext = std::make_unique(devicesForContext); kernelInfos.resize(3); kernelInfos[0] = nullptr; pKernelInfosStorage[0] = std::make_unique(); pKernelInfosStorage[1] = std::make_unique(); kernelInfos[1] = pKernelInfosStorage[0].get(); kernelInfos[2] = pKernelInfosStorage[1].get(); auto &hwHelper = HwHelper::get(renderCoreFamily); for (auto i = 0u; i < 2; i++) { pKernelInfosStorage[i]->heapInfo.pSsh = pSshLocal[i]; pKernelInfosStorage[i]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[i]); pKernelInfosStorage[i]->kernelDescriptor.kernelAttributes.simdSize = hwHelper.getMinimalSIMDSize(); auto crossThreadDataPointer = &pCrossThreadData[i]; memcpy_s(ptrOffset(&pCrossThreadData[i], i * sizeof(void *)), sizeof(void *), &crossThreadDataPointer, sizeof(void *)); pKernelInfosStorage[i]->crossThreadData = pCrossThreadData[i]; pKernelInfosStorage[i]->addArgBuffer(0, static_cast(i * sizeof(void *)), sizeof(void *)); pKernelInfosStorage[i]->setCrossThreadDataSize(static_cast((i + 1) * sizeof(void *))); } auto retVal = CL_INVALID_PROGRAM; pBuffer = std::unique_ptr(Buffer::create(pContext.get(), 0u, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pBuffer); pProgram = std::make_unique(pContext.get(), false, pContext->getDevices()); } void TearDown() override { for (auto i = 0u; i < 2; i++) { pKernelInfosStorage[i]->crossThreadData = nullptr; } } UltClDeviceFactory deviceFactory{3, 2}; std::unique_ptr pContext; std::unique_ptr pKernelInfosStorage[2]; char pCrossThreadData[2][64]{}; char pSshLocal[2][64]{}; KernelInfoContainer kernelInfos; std::unique_ptr pBuffer; std::unique_ptr pProgram; }; TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) { int32_t retVal = CL_INVALID_VALUE; auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem val = pBuffer.get(); auto pVal = &val; retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().stateless); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg); } } TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); delete buffer; } HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress); delete buffer; } HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { cl_mem val = pBuffer.get(); auto pVal = &val; int32_t retVal = CL_INVALID_VALUE; for (auto &kernelInfo : pKernelInfosStorage) { kernelInfo->argAsPtr(0).bindful = 0; } auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress); } } HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->requiresCoherency()); delete buffer; } TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { char *ptr = new char[sizeof(Buffer)]; auto val = (cl_mem *)ptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete[] ptr; } TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) { auto val = (cl_mem *)nullptr; auto pVal = &val; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(nullptr, *pKernelArg); } TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) { int32_t retVal = CL_INVALID_VALUE; auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto val = nullptr; auto pVal = &val; pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().stateless); EXPECT_EQ(nullptr, *pKernelArg); } } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) { auto val = (cl_mem *)nullptr; auto pVal = &val; auto &argAsPtr = pKernelInfo->argAsPtr(0); argAsPtr.pointerSize = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless); auto pKernelArg32bit = (uint32_t *)pKernelArg64bit; *pKernelArg64bit = 0xffffffffffffffff; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) { auto pVal = nullptr; auto &argAsPtr = pKernelInfo->argAsPtr(0); argAsPtr.pointerSize = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless); auto pKernelArg32bit = (uint32_t *)pKernelArg64bit; *pKernelArg64bit = 0xffffffffffffffff; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(true); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); auto val = (cl_mem)&buffer; auto pVal = &val; for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); auto val = (cl_mem)&buffer; auto pVal = &val; for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { KernelInfo kernelInfo; MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) { KernelInfo kernelInfo; EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess); MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory()); kernelInfo.hasIndirectStatelessAccess = true; MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory()); const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}; MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice); MockGraphicsAllocation gfxAllocation; for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type); kernelWithIndirectUnifiedMemoryAllocation.setUnifiedMemoryExecInfo(&gfxAllocation); if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { EXPECT_TRUE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory()); } else { EXPECT_FALSE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory()); } kernelWithIndirectUnifiedMemoryAllocation.clearUnifiedMemoryExecInfo(); } } TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) { KernelInfo kernelInfo; kernelInfo.hasIndirectStatelessAccess = true; MockKernel mockKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto svmAllocationsManager = mockKernel.getContext().getSVMAllocsManager(); if (svmAllocationsManager == nullptr) { return; } mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true; EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields()); auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields()); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties); EXPECT_TRUE(mockKernel.hasIndirectStatelessAccessToHostMemory()); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) { for (auto auxTranslationRequired : {false, true}) { pKernel->setAuxTranslationRequired(auxTranslationRequired); EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired()); } } class KernelArgBufferFixtureBindless : public KernelArgBufferFixture { public: void SetUp() { DebugManager.flags.UseBindlessMode.set(1); KernelArgBufferFixture::SetUp(); } void TearDown() override { KernelArgBufferFixture::TearDown(); } DebugManagerStateRestore restorer; }; typedef Test KernelArgBufferTestBindless; HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) { using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); auto crossThreadDataOffset = pKernelInfo->argAsPtr(0).stateless; pKernelInfo->argAsPtr(0).stateless = undefined; pKernelInfo->argAsPtr(0).bindless = crossThreadDataOffset; pKernelInfo->argAsPtr(0).bindful = 64; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 0x1000; pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); DataPortBindlessSurfaceExtendedMessageDescriptor extMessageDesc; extMessageDesc.setBindlessSurfaceOffset(sshOffset + pKernelInfo->argAsPtr(0).bindful); auto expectedOffset = extMessageDesc.getBindlessSurfaceOffsetToPatch(); EXPECT_EQ(expectedOffset, *patchLocation); sshOffset = static_cast(maxNBitValue(20) + 1) - 64; pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); extMessageDesc.setBindlessSurfaceOffset(sshOffset + pKernelInfo->argAsPtr(0).bindful); expectedOffset = extMessageDesc.getBindlessSurfaceOffsetToPatch(); EXPECT_EQ(expectedOffset, *patchLocation); } TEST_F(KernelArgBufferTest, givenUsedBindlessBuffersAndNonBufferArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); auto crossThreadDataOffset = pKernelInfo->argAsPtr(0).stateless; pKernelInfo->argAsPtr(0).stateless = undefined; pKernelInfo->argAsPtr(0).bindless = crossThreadDataOffset; pKernelInfo->argAsPtr(0).bindful = 64; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 4000; pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); EXPECT_EQ(0xdeadu, *patchLocation); } TEST_F(KernelArgBufferTest, givenNotUsedBindlessBuffersAndBufferArgWhenPatchingSurfaceStateOffsetsThenCrossThreadDataIsNotPatched) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); auto crossThreadDataOffset = pKernelInfo->argAsPtr(0).stateless; pKernelInfo->argAsPtr(0).stateless = undefined; pKernelInfo->argAsPtr(0).bindless = crossThreadDataOffset; pKernelInfo->argAsPtr(0).bindful = 64; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 4000; pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); EXPECT_EQ(0xdeadu, *patchLocation); } HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersAndBuiltinKernelWhenPatchingSurfaceStateOffsetsThenOffsetIsPatched) { using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; auto crossThreadDataOffset = pKernelInfo->argAsPtr(0).stateless; pKernelInfo->argAsPtr(0).stateless = undefined; pKernelInfo->argAsPtr(0).bindless = crossThreadDataOffset; pKernelInfo->argAsPtr(0).bindful = 64; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; pKernel->isBuiltIn = true; uint32_t sshOffset = 0x1000; pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); EXPECT_NE(0xdeadu, *patchLocation); }