/* * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: KernelArgSvmFixture_() { } protected: void SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() override { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test KernelArgSvmTest; TEST_F(KernelArgSvmTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect) { char *svmPtr = new char[256]; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(KernelArgSvmTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(KernelArgSvmTest, GivenValidSvmAllocStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThenProperSvmAddressIsPatched) { std::unique_ptr svmPtr(new char[256]); auto offsetedPtr = svmPtr.get() + 4; MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(offsetedPtr, surfaceAddress); } HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) { this->pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; auto systemPointer = reinterpret_cast(0xfeedbac); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernel->setArgSvmAlloc(0, systemPointer, nullptr); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(systemPointer, surfaceAddress); EXPECT_EQ(128u, surfaceState->getWidth()); EXPECT_EQ(2048u, surfaceState->getDepth()); EXPECT_EQ(16384u, surfaceState->getHeight()); } TEST_F(KernelArgSvmTest, WhenSettingKernelArgImmediateThenInvalidArgValueErrorIsReturned) { auto retVal = pKernel->setArgImmediate(0, 256, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(void *)); pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); pKernelInfo->requiresSshForBuffers = true; pKernelInfo->usesSsh = true; { MockGraphicsAllocation svmAlloc(svmPtr.data(), svmPtr.size()); SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patch; memset(&patch, 0, sizeof(patch)); patch.DataParamOffset = 0; patch.DataParamSize = sizeof(void *); patch.SurfaceStateHeapOffset = 0; constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr.data() + patchOffset; ASSERT_GE(pKernel->getCrossThreadDataSize(rootDeviceIndex), sizeof(void *)); *reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)) = 0U; ASSERT_GE(pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); memset(surfState, 0, rendSurfSize); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, *pDevice, patch); // verify cross thread data was properly patched EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex))); // create surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); { void *addressToPatch = svmAlloc.getUnderlyingBuffer(); size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize(); Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false, sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0); } // verify ssh was properly patched EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfState, rendSurfSize)); // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); pKernel->setSshLocal(nullptr, 0, rootDeviceIndex); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, *pDevice, patch); } } TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); { constexpr uint32_t initVal = 7U; constexpr uint32_t svmOffset = 13U; MockGraphicsAllocation svmAlloc(svmPtr.data(), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); KernelArgInfo kai; void *returnedPtr = nullptr; kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), nullptr, rootDeviceIndex); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, nullptr, rootDeviceIndex); void *expectedPtr = alignDown(svmPtr.data() + svmOffset, 4); // expecting to see DWORD alignment restriction in offset uint32_t expectedOffset = static_cast(ptrDiff(svmPtr.data() + svmOffset, expectedPtr)); EXPECT_EQ(expectedPtr, returnedPtr); EXPECT_EQ(expectedOffset, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, &svmAlloc, rootDeviceIndex); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(svmOffset, *expectedPatchPtr); } } template class KernelArgSvmTestTyped : public KernelArgSvmTest { }; struct SetArgHandlerSetArgSvm { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { kernel.setArgSvm(argNum, allocSize, ptrToPatch, &alloc, 0u); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgSvmAlloc { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { kernel.setArgMultiDeviceSvmAlloc(argNum, ptrToPatch, &alloc); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgBuffer { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { MockBuffer mb{*alloc.getDefaultGraphicsAllocation()}; cl_mem memObj = &mb; kernel.setArgBuffer(argNum, sizeof(cl_mem), &memObj); } static constexpr bool supportsOffsets() { return false; } }; using SetArgHandlers = ::testing::Types; TYPED_TEST_CASE(KernelArgSvmTestTyped, SetArgHandlers); HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsNeededTheSetArgSetsIt) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto rootDeviceIndex = device->getRootDeviceIndex(); uint32_t svmSize = MemoryConstants::pageSize; char *svmPtr = reinterpret_cast(alignedMalloc(svmSize, MemoryConstants::pageSize)); KernelArgInfo &kai = this->pKernelInfo->kernelArgInfo[0]; kai.offsetHeap = 0; kai.kernelArgPatchInfoVector[0].sourceOffset = 0; kai.kernelArgPatchInfoVector[0].crossthreadOffset = 0; kai.kernelArgPatchInfoVector[0].size = sizeof(void *); kai.offsetBufferOffset = kai.kernelArgPatchInfoVector[0].size; this->pKernel->setCrossThreadData(nullptr, kai.offsetBufferOffset + sizeof(uint32_t)); this->pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); this->pKernelInfo->requiresSshForBuffers = true; this->pKernelInfo->usesSsh = true; { MockGraphicsAllocation svmAlloc(svmPtr, svmSize); constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr + patchOffset; size_t sizeToPatch = svmSize - patchOffset; ASSERT_GE(this->pKernel->getCrossThreadDataSize(rootDeviceIndex), kai.offsetBufferOffset + sizeof(uint32_t)); void **expectedPointerPatchPtr = reinterpret_cast(this->pKernel->getCrossThreadData(rootDeviceIndex)); uint32_t *expectedOffsetPatchPtr = reinterpret_cast(ptrOffset(this->pKernel->getCrossThreadData(rootDeviceIndex), kai.offsetBufferOffset)); *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap(rootDeviceIndex)); memset(surfState, 0, rendSurfSize); MultiGraphicsAllocation multiGraphicsAllocation(svmAlloc.getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(&svmAlloc); TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, multiGraphicsAllocation); // surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); if (TypeParam::supportsOffsets()) { // setArgSvm, setArgSvmAlloc EXPECT_EQ(ptrToPatch, *expectedPointerPatchPtr); EXPECT_EQ(patchOffset, *expectedOffsetPatchPtr); } else { // setArgBuffer EXPECT_EQ(svmAlloc.getUnderlyingBuffer(), *expectedPointerPatchPtr); EXPECT_EQ(0U, *expectedOffsetPatchPtr); } Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(), svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0); // verify ssh was properly patched int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize); EXPECT_EQ(0, cmpResult); } alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&svmAlloc, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvmArgUseGpuAddress) { char svmPtr[256]; pKernelInfo->kernelArgInfo[0].offsetBufferOffset = 0u; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr), 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBufferOffsetWithGpuAddress) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); constexpr uint32_t initVal = 7U; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr.data()), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); KernelArgInfo kai; void *returnedPtr = nullptr; kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } struct KernelArgSvmMultiDeviceTest : public MultiRootDeviceWithSubDevicesFixture { void SetUp() override { MultiRootDeviceWithSubDevicesFixture::SetUp(); program = std::make_unique(context.get(), false, context->getDevices()); KernelInfoContainer kernelInfos; kernelInfos.resize(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernelInfo[rootDeviceIndex] = std::make_unique(); pKernelInfo[rootDeviceIndex]->kernelDescriptor.kernelAttributes.simdSize = 1; // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo[rootDeviceIndex]->heapInfo.pSsh = pSshLocal[rootDeviceIndex]; pKernelInfo[rootDeviceIndex]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[rootDeviceIndex]); pKernelInfo[rootDeviceIndex]->usesSsh = true; pKernelInfo[rootDeviceIndex]->requiresSshForBuffers = true; pKernelInfo[rootDeviceIndex]->kernelArgInfo.resize(1); pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); kernelInfos[rootDeviceIndex] = pKernelInfo[rootDeviceIndex].get(); } pKernel = new MockKernel(program.get(), kernelInfos); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernel->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex, &pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex])); } } void TearDown() override { delete pKernel; MultiRootDeviceWithSubDevicesFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel = nullptr; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr pKernelInfo[3]; char pCrossThreadData[3][0x60]; char pSshLocal[3][64]; }; TEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect) { char svmPtr[256] = {}; auto retVal = pKernel->setArgSvm(0, 256, &svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } } TEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char svmPtr[256] = {}; GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; MultiGraphicsAllocation multiGraphicsAllocation(2); multiGraphicsAllocation.addAllocation(&graphicsAllocation1); multiGraphicsAllocation.addAllocation(&graphicsAllocation2); auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } } TEST_F(KernelArgSvmMultiDeviceTest, whenSettingArgTwiceThenOverrideWithCorrectValue) { char svmPtr[256] = {}; char svmPtr2[256] = {}; GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; MultiGraphicsAllocation multiGraphicsAllocation(2); multiGraphicsAllocation.addAllocation(&graphicsAllocation1); multiGraphicsAllocation.addAllocation(&graphicsAllocation2); auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr2, &multiGraphicsAllocation); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr2, *pKernelArg); } } HWTEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char svmPtr[256] = {}; GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; MultiGraphicsAllocation multiGraphicsAllocation(2); multiGraphicsAllocation.addAllocation(&graphicsAllocation1); multiGraphicsAllocation.addAllocation(&graphicsAllocation2); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernelInfo[rootDeviceIndex]->usesSsh = true; pKernelInfo[rootDeviceIndex]->requiresSshForBuffers = true; } auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); EXPECT_EQ(CL_SUCCESS, retVal); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); } }