/* * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/command_stream/command_stream_receiver_hw.h" #include "runtime/helpers/options.h" #include "runtime/helpers/surface_formats.h" #include "runtime/kernel/kernel.h" #include "runtime/mem_obj/image.h" #include "runtime/memory_manager/os_agnostic_memory_manager.h" #include "runtime/os_interface/debug_settings_manager.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/execution_model_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/gtest_helpers.h" #include "test.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_program.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/program/program_from_binary.h" #include "unit_tests/program/program_tests.h" #include using namespace OCLRT; class KernelTest : public ProgramFromBinaryTest { public: KernelTest() { } ~KernelTest() override = default; protected: void SetUp() override { ProgramFromBinaryTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id device = pDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(KernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; deleteDataReadFromFile(knownSource); ProgramFromBinaryTest::TearDown(); } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; }; TEST(KernelTest, isMemObj) { EXPECT_TRUE(Kernel::isMemObj(Kernel::BUFFER_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::IMAGE_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::PIPE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SAMPLER_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::ACCELERATOR_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::NONE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SVM_ALLOC_OBJ)); } TEST_P(KernelTest, getKernelHeap) { EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeader->KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_P(KernelTest, Create_Simple) { // included in the setup of fixture } TEST_P(KernelTest, GetInfo_InvalidParamName) { size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelTest, GetInfo_Name) { cl_kernel_info paramName = CL_KERNEL_FUNCTION_NAME; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, strcmp(paramValue, KernelName)); EXPECT_EQ(CL_SUCCESS, retVal); delete[] paramValue; } TEST_P(KernelTest, GetInfo_BinaryProgramIntel) { cl_kernel_info paramName = CL_KERNEL_BINARY_PROGRAM_INTEL; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for kernel binary paramValue = new char[paramValueSizeRet]; // get kernel binary paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, memcmp(paramValue, pKernelData, paramValueSize)); delete[] paramValue; } TEST_P(KernelTest, GetInfo_NumArgs) { cl_kernel_info paramName = CL_KERNEL_NUM_ARGS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(2u, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(KernelTest, GetInfo_Program) { cl_kernel_info paramName = CL_KERNEL_PROGRAM; size_t paramValueSize = sizeof(cl_program); cl_program paramValue = 0; size_t paramValueSizeRet = 0; cl_program prog = pProgram; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_program), paramValueSizeRet); EXPECT_EQ(prog, paramValue); } TEST_P(KernelTest, GetInfo_Context) { cl_kernel_info paramName = CL_KERNEL_CONTEXT; cl_context paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_context context = pContext; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(context, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_WORK_GROUP_SIZE; size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( pDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_CompileWorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; size_t paramValue[3]; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( pDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } INSTANTIATE_TEST_CASE_P(KernelTests, KernelTest, ::testing::Combine( ::testing::ValuesIn(BinaryFileNames), ::testing::ValuesIn(KernelNames))); class KernelFromBinaryTest : public ProgramFromBinarySimpleTest, public MemoryManagementFixture { public: void SetUp() override { MemoryManagementFixture::SetUp(); ProgramFromBinarySimpleTest::SetUp(); } void TearDown() override { ProgramFromBinarySimpleTest::TearDown(); MemoryManagementFixture::TearDown(); } }; typedef Test KernelFromBinaryTests; TEST_F(KernelFromBinaryTests, getInfo_NumArgs) { cl_device_id device = pDevice; CreateProgramFromBinary(pContext, &device, "kernel_num_args"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( CL_KERNEL_NUM_ARGS, sizeof(cl_uint), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(3u, paramValue); delete pKernel; } TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) { cl_device_id device = pDevice; CreateProgramFromBinary(pContext, &device, "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); // get builtIn property bool isBuiltIn = pKernel->isBuiltIn; EXPECT_FALSE(isBuiltIn); delete pKernel; } TEST(PatchInfo, Constructor) { PatchInfo patchInfo; EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorDataLoad); EXPECT_EQ(nullptr, patchInfo.localsurface); EXPECT_EQ(nullptr, patchInfo.mediavfestate); EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorData); EXPECT_EQ(nullptr, patchInfo.samplerStateArray); EXPECT_EQ(nullptr, patchInfo.bindingTableState); EXPECT_EQ(nullptr, patchInfo.dataParameterStream); EXPECT_EQ(nullptr, patchInfo.threadPayload); EXPECT_EQ(nullptr, patchInfo.executionEnvironment); EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrintfSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessEventPoolSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface); } typedef Test KernelPrivateSurfaceTest; typedef Test KernelGlobalSurfaceTest; typedef Test KernelConstantSurfaceTest; struct KernelWithDeviceQueueFixture : public DeviceFixture, public DeviceQueueFixture, public testing::Test { void SetUp() override { DeviceFixture::SetUp(); DeviceQueueFixture::SetUp(&context, pDevice); } void TearDown() override { DeviceQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext context; }; typedef KernelWithDeviceQueueFixture KernelDefaultDeviceQueueSurfaceTest; typedef KernelWithDeviceQueueFixture KernelEventPoolSurfaceTest; class CommandStreamReceiverMock : public CommandStreamReceiver { typedef CommandStreamReceiver BaseClass; public: CommandStreamReceiverMock() : BaseClass() { } void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); CommandStreamReceiver::makeResident(graphicsAllocation); } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); CommandStreamReceiver::makeNonResident(graphicsAllocation); } FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override { return flushStamp->peekStamp(); } void addPipeControl(LinearStream &commandStream, bool dcFlush) override { } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override { } CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const LinearStream &dsh, const LinearStream &ih, const LinearStream &ioh, const LinearStream &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags) override { CompletionStamp cs = {}; return cs; } void flushBatchedSubmissions() override {} std::map residency; }; TEST_F(KernelPrivateSurfaceTest, testPrivateSurface) { ASSERT_NE(nullptr, pDevice); // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it std::unique_ptr memoryManager(new OsAgnosticMemoryManager()); std::unique_ptr csr(new CommandStreamReceiverMock()); csr->setMemoryManager(memoryManager.get()); csr->residency.clear(); EXPECT_EQ(0u, csr->residency.size()); pKernel->makeResident(*csr.get()); EXPECT_EQ(1u, csr->residency.size()); csr->makeSurfacePackNonResident(nullptr); pKernel->updateWithCompletionStamp(*csr.get(), nullptr); EXPECT_EQ(0u, csr->residency.size()); delete pKernel; delete pKernelInfo; } TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWhenKernelIsBeingDestroyedThenAllocationIsAddedToDefferedFreeList) { std::unique_ptr pKernelInfo(KernelInfo::create()); SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; MockContext context; MockProgram program(&context, false); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); pKernel->initialize(); auto memoryManager = pDevice->getMemoryManager(); auto privateSurface = pKernel->getPrivateSurface(); auto tagAddress = context.getDevice(0)->getTagAddress(); privateSurface->taskCount = *tagAddress + 1; EXPECT_TRUE(memoryManager->graphicsAllocations.peekIsEmpty()); pKernel.reset(nullptr); EXPECT_FALSE(memoryManager->graphicsAllocations.peekIsEmpty()); EXPECT_EQ(memoryManager->graphicsAllocations.peekHead(), privateSurface); } TEST_F(KernelPrivateSurfaceTest, testPrivateSurfaceAllocationFailure) { ASSERT_NE(nullptr, pDevice); // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(&context, false); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); if (MemoryManagementFixture::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } delete pKernel; }; auto f = new MemoryManagementFixture(); f->SetUp(); f->injectFailures(method); f->TearDown(); delete f; delete pKernelInfo; } TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateSurfaceIs32BitAllocation) { if (is64bit) { pDevice->getMemoryManager()->setForce32BitAllocations(true); // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 4; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getPrivateSurface()->is32BitAllocation); delete pKernel; delete pKernelInfo; } } HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamSize = 8; AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; MockContext context; MockProgram program(&context, false); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); auto bufferAddress = pKernel->getPrivateSurface()->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); delete pKernel; delete pKernelInfo; } TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; GraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockContext context; MockProgram program(&context, false); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setConstantSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelPrivateSurfaceTest, givenNullDataParameterStreamGetConstantBufferSizeReturnsZero) { KernelInfo *pKernelInfo = KernelInfo::create(); EXPECT_EQ(0u, pKernelInfo->getConstantBufferSize()); delete pKernelInfo; } TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamGetConstantBufferSizeReturnsCorrectSize) { KernelInfo *pKernelInfo = KernelInfo::create(); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize()); delete pKernelInfo; } TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc((void *)buffer, (uint64_t)buffer - 8u, 8); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockContext context; MockProgram program(&context, false); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc((void *)buffer, (uint64_t)buffer - 8u, 8); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program; program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; char buffer[16]; GraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); void *bufferAddress = gfxAlloc.getUnderlyingBuffer(); MockContext context; MockProgram program(&context, false); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(bufferAddress, surfaceAddress); program.setGlobalSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; GraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program; program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setGlobalSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithCpuAddress) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc((void *)buffer, (uint64_t)buffer - 8u, 8); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockProgram program; program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc((void *)buffer, (uint64_t)buffer - 8u, 8); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program; program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; char buffer[16]; GraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); void *bufferAddress = gfxAlloc.getUnderlyingBuffer(); MockContext context; MockProgram program(&context, false); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(bufferAddress, surfaceAddress); program.setConstantSurface(nullptr); delete pKernel; delete pKernelInfo; } TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; GraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program; program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setConstantSurface(nullptr); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(nullptr, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchEventPool(pDevQueue); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), (uint64_t)surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; delete pKernelInfo; } TEST_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); if (pDevice->getSupportedClVersion() < 20) { EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); } else { } delete pKernel; delete pKernelInfo; } TEST_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(nullptr, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(&context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), (uint64_t)surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; delete pKernelInfo; } TEST_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pKernel; delete pKernelInfo; } TEST_F(KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; delete pKernelInfo; } TEST_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched) { // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); SPatchExecutionEnvironment tokenEE; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; delete pKernelInfo; } typedef Test KernelResidencyTest; TEST_F(KernelResidencyTest, test_MakeArgsResident) { ASSERT_NE(nullptr, pDevice); char pCrossThreadData[64]; // define kernel info KernelInfo *pKernelInfo = KernelInfo::create(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; MockProgram program; MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); // Currently just a sandbox for testing pKernel->makeResident(pDevice->getCommandStreamReceiver()); delete pKernel; delete pKernelInfo; } HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) { ASSERT_NE(nullptr, pDevice); //create NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 16; imageDesc.image_height = 16; imageDesc.image_depth = 1; cl_int retVal; MockContext context; std::unique_ptr imageNV12(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal)); //create Y plane imageFormat.image_channel_order = CL_R; flags = CL_MEM_READ_ONLY; surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.mem_object = imageNV12.get(); std::unique_ptr imageY(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal)); std::unique_ptr pKernelInfo(KernelInfo::create()); KernelArgInfo kernelArgInfo; kernelArgInfo.isImage = true; pKernelInfo->kernelArgInfo.push_back(kernelArgInfo); std::unique_ptr program(new MockProgram); std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); pKernel->makeResident(pDevice->getCommandStreamReceiver()); EXPECT_FALSE(imageNV12->isImageFromImage()); EXPECT_TRUE(imageY->isImageFromImage()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired()); } struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { DeviceFixture::SetUp(); pKernelInfo = KernelInfo::create(); pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } void TearDown() override { delete pKernelInfo; delete pKernel; DeviceFixture::TearDown(); } MockKernel *pKernel; MockProgram program; KernelInfo *pKernelInfo; SPatchExecutionEnvironment executionEnvironment; }; TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll32) { executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(32u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll16) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(16u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll8) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns8ByDefault) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenExecutionEnvironmentNotAvailable) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; this->pKernelInfo->patchInfo.executionEnvironment = nullptr; EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsZero) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 0; EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsLowerThanMaxWorkGroupSize) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize / 2); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 1; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 1; EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSize) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = static_cast(maxWorkGroupSize); EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } struct KernelCrossThreadTests : Test { KernelCrossThreadTests() { } void SetUp() override { DeviceFixture::SetUp(); patchDataParameterStream.DataParameterStreamSize = 64 * sizeof(uint8_t); pKernelInfo = KernelInfo::create(); ASSERT_NE(nullptr, pKernelInfo); pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream; pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; } void TearDown() override { delete pKernelInfo; DeviceFixture::TearDown(); } MockProgram program; KernelInfo *pKernelInfo = nullptr; SPatchDataParameterStream patchDataParameterStream; SPatchExecutionEnvironment executionEnvironment; }; TEST_F(KernelCrossThreadTests, globalWorkOffset) { pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); EXPECT_NE(nullptr, kernel.globalWorkOffsetY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ); } TEST_F(KernelCrossThreadTests, localWorkSize) { pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.localWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ); } TEST_F(KernelCrossThreadTests, localWorkSize2) { pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); EXPECT_NE(nullptr, kernel.localWorkSizeY2); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2); } TEST_F(KernelCrossThreadTests, globalWorkSize) { pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY); EXPECT_NE(nullptr, kernel.globalWorkSizeZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ); } TEST_F(KernelCrossThreadTests, workDim) { pKernelInfo->workloadInfo.workDimOffset = 12; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.workDim); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim); } TEST_F(KernelCrossThreadTests, numWorkGroups) { pKernelInfo->workloadInfo.numWorkGroupsOffset[0] = 0 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.numWorkGroupsX); EXPECT_NE(nullptr, kernel.numWorkGroupsY); EXPECT_NE(nullptr, kernel.numWorkGroupsZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ); } TEST_F(KernelCrossThreadTests, enqueuedLocalWorkSize) { pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ); } TEST_F(KernelCrossThreadTests, maxWorkGroupSize) { pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSize); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.maxWorkGroupSize)); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSize); } TEST_F(KernelCrossThreadTests, dataParameterSimdSize) { pKernelInfo->workloadInfo.simdSizeOffset = 16; MockKernel kernel(&program, *pKernelInfo, *pDevice); executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = true; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.dataParameterSimdSize)); EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } TEST_F(KernelCrossThreadTests, GIVENparentEventOffsetWHENinitializeKernelTHENparentEventInitWithInvalid) { pKernelInfo->workloadInfo.parentEventOffset = 16; MockKernel kernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.parentEventOffset)); EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, kernelAddRefCountToProgram) { auto refCount = program.getReference(); MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pDevice); auto refCount2 = program.getReference(); EXPECT_EQ(refCount2, refCount + 1); delete kernel; auto refCount3 = program.getReference(); EXPECT_EQ(refCount, refCount3); } TEST_F(KernelCrossThreadTests, kernelSetsTotalSLMSize) { pKernelInfo->workloadInfo.slmStaticSize = 1024; MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pDevice); EXPECT_EQ(1024u, kernel->slmTotalSize); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) { SPatchAllocateStatelessPrivateSurface allocatePrivate; allocatePrivate.DataParamSize = 8; allocatePrivate.DataParamOffset = 0; allocatePrivate.PerThreadPrivateMemorySize = 1; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate; MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pDevice); kernel->initialize(); auto privateSurface = kernel->getPrivateSurface(); auto constantBuffer = kernel->getCrossThreadData(); auto privateAddress = (uintptr_t)privateSurface->getGpuAddressToPatch(); auto ptrCurbe = (uint64_t *)constantBuffer; auto privateAddressFromCurbe = (uintptr_t)*ptrCurbe; EXPECT_EQ(privateAddressFromCurbe, privateAddress); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrefferedWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->workloadInfo.prefferedWkgMultipleOffset = 8; MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pDevice); kernel->initialize(); auto *crossThread = kernel->getCrossThreadData(); uint32_t *prefferedWkgMultipleOffset = (uint32_t *)ptrOffset(crossThread, 8); EXPECT_EQ(pKernelInfo->getMaxSimdSize(), *prefferedWkgMultipleOffset); delete kernel; } TEST_F(KernelCrossThreadTests, patchBlocksSimdSize) { MockKernelWithInternals *kernel = new MockKernelWithInternals(*pDevice); // store offset to child's simd size in kernel info uint32_t crossThreadOffset = 0; //offset of simd size kernel->kernelInfo.childrenKernelsIdOffset.push_back({0, crossThreadOffset}); // add a new block kernel to program KernelInfo *infoBlock = new KernelInfo(); kernel->executionEnvironmentBlock.CompiledSIMD8 = 0; kernel->executionEnvironmentBlock.CompiledSIMD16 = 1; kernel->executionEnvironmentBlock.CompiledSIMD32 = 0; infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock; kernel->mockProgram->addBlockKernel(infoBlock); // patch block's simd size kernel->mockKernel->patchBlocksSimdSize(); // obtain block's simd size from cross thread data void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(), kernel->kernelInfo.childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); // check of block's simd size has been patched correctly EXPECT_EQ(kernel->mockProgram->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); delete kernel; } TEST(KernelInfoTest, borderColorOffset) { KernelInfo info; SPatchSamplerStateArray samplerState; samplerState.BorderColorOffset = 3; info.patchInfo.samplerStateArray = nullptr; EXPECT_EQ(0u, info.getBorderColorOffset()); info.patchInfo.samplerStateArray = &samplerState; EXPECT_EQ(3u, info.getBorderColorOffset()); } TEST(KernelInfoTest, getArgNumByName) { KernelInfo info; EXPECT_EQ(-1, info.getArgNumByName("")); KernelArgInfo kai; kai.name = "arg1"; info.kernelArgInfo.push_back(kai); EXPECT_EQ(-1, info.getArgNumByName("")); EXPECT_EQ(-1, info.getArgNumByName("arg2")); EXPECT_EQ(0, info.getArgNumByName("arg1")); kai.name = "arg2"; info.kernelArgInfo.push_back(kai); EXPECT_EQ(0, info.getArgNumByName("arg1")); EXPECT_EQ(1, info.getArgNumByName("arg2")); } TEST(KernelTest, getInstructionHeapSizeForExecutionModelReturnsZeroForNormalKernel) { auto device = std::unique_ptr(DeviceHelper<>::create(platformDevices[0])); MockKernelWithInternals kernel(*device); EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel()); } TEST(KernelTest, setKernelArgUsesBuiltinDispatchInfoBuilderIfAvailable) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { MockBuiltinDispatchBuilder(BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { receivedArgs.push_back(std::make_tuple(argIndex, argSize, argVal)); err = errToReturn; return valueToReturn; } bool valueToReturn = false; cl_int errToReturn = CL_SUCCESS; mutable std::vector> receivedArgs; }; auto device = std::unique_ptr(DeviceHelper<>::create(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); kernel.mockKernel->initialize(); MockBuiltinDispatchBuilder mockBuilder(BuiltIns::getInstance()); kernel.kernelInfo.builtinDispatchBuilder = &mockBuilder; mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_SUCCESS; EXPECT_EQ(0u, kernel.mockKernel->getPatchedArgumentsNum()); auto ret = kernel.mockKernel->setArg(1, 3, reinterpret_cast(5)); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(7, 11, reinterpret_cast(13)); EXPECT_EQ(CL_INVALID_ARG_SIZE, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_SUCCESS; ret = kernel.mockKernel->setArg(17, 19, reinterpret_cast(23)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(29, 31, reinterpret_cast(37)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); ASSERT_EQ(4U, mockBuilder.receivedArgs.size()); EXPECT_EQ(1U, std::get<0>(mockBuilder.receivedArgs[0])); EXPECT_EQ(3U, std::get<1>(mockBuilder.receivedArgs[0])); EXPECT_EQ(reinterpret_cast(5), std::get<2>(mockBuilder.receivedArgs[0])); EXPECT_EQ(7U, std::get<0>(mockBuilder.receivedArgs[1])); EXPECT_EQ(11U, std::get<1>(mockBuilder.receivedArgs[1])); EXPECT_EQ(reinterpret_cast(13), std::get<2>(mockBuilder.receivedArgs[1])); EXPECT_EQ(17U, std::get<0>(mockBuilder.receivedArgs[2])); EXPECT_EQ(19U, std::get<1>(mockBuilder.receivedArgs[2])); EXPECT_EQ(reinterpret_cast(23), std::get<2>(mockBuilder.receivedArgs[2])); EXPECT_EQ(29U, std::get<0>(mockBuilder.receivedArgs[3])); EXPECT_EQ(31U, std::get<1>(mockBuilder.receivedArgs[3])); EXPECT_EQ(reinterpret_cast(37), std::get<2>(mockBuilder.receivedArgs[3])); BuiltIns::shutDown(); } TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMaxWorkgroupSizeIsSimdSizeDependant) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); GT_SYSTEM_INFO mySysInfo = *platformDevices[0]->pSysInfo; FeatureTable mySkuTable = *platformDevices[0]->pSkuTable; HardwareInfo myHwInfo = {platformDevices[0]->pPlatform, &mySkuTable, platformDevices[0]->pWaTable, &mySysInfo, platformDevices[0]->capabilityTable}; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::unique_ptr(DeviceHelper<>::create(&myHwInfo)); MockKernelWithInternals kernel(*device); kernel.executionEnvironment.LargestCompiledSIMDSize = 32; size_t maxKernelWkgSize; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(1024u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 16; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(512u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 8; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(256u, maxKernelWkgSize); }