/* * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "core/debug_settings/debug_settings_manager.h" #include "core/gmm_helper/gmm_helper.h" #include "core/helpers/hw_helper.h" #include "core/helpers/options.h" #include "core/memory_manager/unified_memory_manager.h" #include "core/unit_tests/helpers/debug_manager_state_restore.h" #include "core/unit_tests/page_fault_manager/mock_cpu_page_fault_manager.h" #include "core/unit_tests/utilities/base_object_utils.h" #include "runtime/built_ins/builtins_dispatch_builder.h" #include "runtime/command_stream/command_stream_receiver_hw.h" #include "runtime/helpers/flush_stamp.h" #include "runtime/helpers/memory_properties_flags_helpers.h" #include "runtime/helpers/surface_formats.h" #include "runtime/kernel/kernel.h" #include "runtime/mem_obj/image.h" #include "runtime/memory_manager/allocations_list.h" #include "runtime/memory_manager/os_agnostic_memory_manager.h" #include "runtime/os_interface/os_context.h" #include "test.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/device_host_queue_fixture.h" #include "unit_tests/fixtures/execution_model_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/fixtures/multi_root_device_fixture.h" #include "unit_tests/helpers/gtest_helpers.h" #include "unit_tests/libult/ult_command_stream_receiver.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_graphics_allocation.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_memory_manager.h" #include "unit_tests/mocks/mock_program.h" #include "unit_tests/program/program_from_binary.h" #include "unit_tests/program/program_tests.h" #include using namespace NEO; using namespace DeviceHostQueue; class KernelTest : public ProgramFromBinaryTest { public: ~KernelTest() override = default; protected: void SetUp() override { ProgramFromBinaryTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id device = pDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(KernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; knownSource.reset(); ProgramFromBinaryTest::TearDown(); } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; }; TEST(KernelTest, isMemObj) { EXPECT_TRUE(Kernel::isMemObj(Kernel::BUFFER_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::IMAGE_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::PIPE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SAMPLER_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::ACCELERATOR_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::NONE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SVM_ALLOC_OBJ)); } TEST_P(KernelTest, getKernelHeap) { EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeader->KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_P(KernelTest, GetInfo_InvalidParamName) { size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelTest, GetInfo_Name) { cl_kernel_info paramName = CL_KERNEL_FUNCTION_NAME; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, strcmp(paramValue, KernelName)); EXPECT_EQ(CL_SUCCESS, retVal); delete[] paramValue; } TEST_P(KernelTest, GetInfo_BinaryProgramIntel) { cl_kernel_info paramName = CL_KERNEL_BINARY_PROGRAM_INTEL; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for kernel binary paramValue = new char[paramValueSizeRet]; // get kernel binary paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, memcmp(paramValue, pKernelData, paramValueSize)); delete[] paramValue; } TEST_P(KernelTest, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIsReturned) { cl_kernel_info paramName = CL_KERNEL_BINARY_GPU_ADDRESS_INTEL; uint64_t paramValue = 0llu; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_P(KernelTest, GetInfo_NumArgs) { cl_kernel_info paramName = CL_KERNEL_NUM_ARGS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(2u, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(KernelTest, GetInfo_Program) { cl_kernel_info paramName = CL_KERNEL_PROGRAM; size_t paramValueSize = sizeof(cl_program); cl_program paramValue = 0; size_t paramValueSizeRet = 0; cl_program prog = pProgram; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_program), paramValueSizeRet); EXPECT_EQ(prog, paramValue); } TEST_P(KernelTest, GetInfo_Context) { cl_kernel_info paramName = CL_KERNEL_CONTEXT; cl_context paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_context context = pContext; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(context, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_WORK_GROUP_SIZE; size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1; pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); retVal = pKernel->getWorkGroupInfo( pDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(kernelMaxWorkGroupSize, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_CompileWorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; size_t paramValue[3]; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( pDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } INSTANTIATE_TEST_CASE_P(KernelTests, KernelTest, ::testing::Combine( ::testing::ValuesIn(BinaryFileNames), ::testing::ValuesIn(KernelNames))); class KernelFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; typedef Test KernelFromBinaryTests; TEST_F(KernelFromBinaryTests, getInfo_NumArgs) { cl_device_id device = pDevice; CreateProgramFromBinary(pContext, &device, "kernel_num_args"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( CL_KERNEL_NUM_ARGS, sizeof(cl_uint), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(3u, paramValue); delete pKernel; } TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) { cl_device_id device = pDevice; CreateProgramFromBinary(pContext, &device, "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); // get builtIn property bool isBuiltIn = pKernel->isBuiltIn; EXPECT_FALSE(isBuiltIn); delete pKernel; } TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreatedThenArgumentIsMarkedAsReadOnly) { cl_device_id device = pDevice; CreateProgramFromBinary(pContext, &device, "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_6"); EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].isReadOnly); pKernelInfo = pProgram->getKernelInfo("simple_kernel_1"); EXPECT_TRUE(pKernelInfo->kernelArgInfo[0].isReadOnly); } TEST(PatchInfo, Constructor) { PatchInfo patchInfo; EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorDataLoad); EXPECT_EQ(nullptr, patchInfo.localsurface); EXPECT_EQ(nullptr, patchInfo.mediavfestate); EXPECT_EQ(nullptr, patchInfo.mediaVfeStateSlot1); EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorData); EXPECT_EQ(nullptr, patchInfo.samplerStateArray); EXPECT_EQ(nullptr, patchInfo.bindingTableState); EXPECT_EQ(nullptr, patchInfo.dataParameterStream); EXPECT_EQ(nullptr, patchInfo.threadPayload); EXPECT_EQ(nullptr, patchInfo.executionEnvironment); EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrintfSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessEventPoolSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface); } typedef Test KernelPrivateSurfaceTest; typedef Test KernelGlobalSurfaceTest; typedef Test KernelConstantSurfaceTest; struct KernelWithDeviceQueueFixture : public DeviceFixture, public DeviceQueueFixture, public testing::Test { void SetUp() override { DeviceFixture::SetUp(); DeviceQueueFixture::SetUp(&context, pDevice); } void TearDown() override { DeviceQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext context; }; typedef KernelWithDeviceQueueFixture KernelDefaultDeviceQueueSurfaceTest; typedef KernelWithDeviceQueueFixture KernelEventPoolSurfaceTest; class CommandStreamReceiverMock : public CommandStreamReceiver { typedef CommandStreamReceiver BaseClass; public: using CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver; bool isMultiOsContextCapable() const override { return false; } CommandStreamReceiverMock() : BaseClass(*(new ExecutionEnvironment), 0) { this->mockExecutionEnvironment.reset(&this->executionEnvironment); executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); } void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeResident(graphicsAllocation); } } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeNonResident(graphicsAllocation); } } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return true; } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override { } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { return taskCount; }; CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { CompletionStamp cs = {}; return cs; } bool flushBatchedSubmissions() override { return true; } CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } std::map residency; bool passResidencyCallToBaseClass = true; std::unique_ptr mockExecutionEnvironment; }; TEST_F(KernelPrivateSurfaceTest, testPrivateSurface) { ASSERT_NE(nullptr, pDevice); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it auto executionEnvironment = pDevice->getExecutionEnvironment(); std::unique_ptr csr(new CommandStreamReceiverMock(*executionEnvironment, 0)); csr->setupContext(*pDevice->getDefaultEngine().osContext); csr->residency.clear(); EXPECT_EQ(0u, csr->residency.size()); pKernel->makeResident(*csr.get()); EXPECT_EQ(1u, csr->residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->residency.size()); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWhenKernelIsBeingDestroyedThenAllocationIsAddedToDefferedFreeList) { auto pKernelInfo = std::make_unique(); SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); pKernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); auto privateSurface = pKernel->getPrivateSurface(); auto tagAddress = csr.getTagAddress(); privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); pKernel.reset(nullptr); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), privateSurface); } TEST_F(KernelPrivateSurfaceTest, testPrivateSurfaceAllocationFailure) { ASSERT_NE(nullptr, pDevice); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } delete pKernel; }; auto f = new MemoryManagementFixture(); f->SetUp(); f->injectFailures(method); f->TearDown(); delete f; } TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateSurfaceIs32BitAllocation) { if (is64bit) { pDevice->getMemoryManager()->setForce32BitAllocations(true); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 4; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getPrivateSurface()->is32BitAllocation()); delete pKernel; } } HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamSize = 8; AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); auto bufferAddress = pKernel->getPrivateSurface()->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenNullDataParameterStreamGetConstantBufferSizeReturnsZero) { auto pKernelInfo = std::make_unique(); EXPECT_EQ(0u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamGetConstantBufferSizeReturnsCorrectSize) { auto pKernelInfo = std::make_unique(); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); pKernelInfo->gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, (osHandle)1u, MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info auto pKernelInfo = std::make_unique(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(*pDevice->getExecutionEnvironment()); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, (osHandle)1u, MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info auto pKernelInfo = std::make_unique(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); program.setConstantSurface(nullptr); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchEventPool(pDevQueue); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); if (pDevice->getSupportedClVersion() < 20) { EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); } else { } delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } typedef Test KernelResidencyTest; HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIsMadeResident) { ASSERT_NE(nullptr, pDevice); char pCrossThreadData[64]; // define kernel info auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; MockProgram program(*pDevice->getExecutionEnvironment()); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFunctionsIsaAllocationIsMadeResident) { auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); MockProgram program(*pDevice->getExecutionEnvironment()); auto exportedFunctionsSurface = std::make_unique(); program.exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.exportedFunctionsSurface)); // check getResidency as well std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(exportedFunctionsSurface->getUnderlyingBuffer())); mockCsrExecEnv = std::move(csrMock.mockExecutionEnvironment); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBufferIsMadeResident) { auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); MockProgram program(*pDevice->getExecutionEnvironment()); MockContext ctx; program.setContext(&ctx); program.globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.globalSurface)); std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(program.globalSurface->getUnderlyingBuffer())); mockCsrExecEnv = std::move(csrMock.mockExecutionEnvironment); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAllocationThenTheyAreMadeResident) { MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_SVM_PTRS, true); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pDevice, nullptr, true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, unifiedMemoryGraphicsAllocation->gpuAllocation, unifiedMemoryAllocation, 4096u, unifiedMemoryGraphicsAllocation->gpuAllocation, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAllocPageFaultManagerAndIndirectAllocsAllowedWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { MockKernelWithInternals mockKernel(*this->pDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { MockKernelWithInternals mockKernel(*this->pDevice); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryAllocation2 = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation2); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation2)); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation2); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyIsCalledThenKernelControlIsChanged) { MockKernelWithInternals mockKernel(*this->pDevice); cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); enableIndirectDeviceAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyIsCalledThenKernelControlIsChanged) { MockKernelWithInternals mockKernel(*this->pDevice); cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); enableIndirectHostAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyIsCalledThenKernelControlIsChanged) { MockKernelWithInternals mockKernel(*this->pDevice); cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); enableIndirectSharedAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false)); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_TRUE(kernel->usesOnlyImages()); } TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[1].isBuffer = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false)); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); } TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].isBuffer = true; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false)); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); } HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) { ASSERT_NE(nullptr, pDevice); //create NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 16; imageDesc.image_height = 16; imageDesc.image_depth = 1; cl_int retVal; MockContext context; std::unique_ptr imageNV12(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageNV12->getMediaPlaneType(), 0u); //create Y plane imageFormat.image_channel_order = CL_R; flags = CL_MEM_READ_ONLY; surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.mem_object = imageNV12.get(); std::unique_ptr imageY(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageY->getMediaPlaneType(), 0u); auto pKernelInfo = std::make_unique(); KernelArgInfo kernelArgInfo; kernelArgInfo.isImage = true; pKernelInfo->kernelArgInfo.push_back(kernelArgInfo); auto program = std::make_unique(*pDevice->getExecutionEnvironment()); program->setContext(&context); std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(imageNV12->isImageFromImage()); EXPECT_TRUE(imageY->isImageFromImage()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); } struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; pKernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } void TearDown() override { delete pKernel; DeviceFixture::TearDown(); } MockKernel *pKernel; std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll32) { executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(32u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll16) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(16u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll8) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns8ByDefault) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenExecutionEnvironmentNotAvailable) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; this->pKernelInfo->patchInfo.executionEnvironment = nullptr; EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenLargestCompilledSimdSizeEqualOne) { executionEnvironment.LargestCompiledSIMDSize = 1; auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsZero) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 0; EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsLowerThanMaxWorkGroupSize) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize / 2); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 1; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 1; EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSize) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = static_cast(maxWorkGroupSize); EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } struct KernelCrossThreadTests : Test { KernelCrossThreadTests() { } void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); patchDataParameterStream.DataParameterStreamSize = 64 * sizeof(uint8_t); pKernelInfo = std::make_unique(); ASSERT_NE(nullptr, pKernelInfo); pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream; pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchDataParameterStream patchDataParameterStream; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelCrossThreadTests, globalWorkOffset) { pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); EXPECT_NE(nullptr, kernel.globalWorkOffsetY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ); } TEST_F(KernelCrossThreadTests, localWorkSize) { pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.localWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ); } TEST_F(KernelCrossThreadTests, localWorkSize2) { pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); EXPECT_NE(nullptr, kernel.localWorkSizeY2); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2); } TEST_F(KernelCrossThreadTests, globalWorkSize) { pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY); EXPECT_NE(nullptr, kernel.globalWorkSizeZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ); } TEST_F(KernelCrossThreadTests, workDim) { pKernelInfo->workloadInfo.workDimOffset = 12; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.workDim); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim); } TEST_F(KernelCrossThreadTests, numWorkGroups) { pKernelInfo->workloadInfo.numWorkGroupsOffset[0] = 0 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.numWorkGroupsX); EXPECT_NE(nullptr, kernel.numWorkGroupsY); EXPECT_NE(nullptr, kernel.numWorkGroupsZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ); } TEST_F(KernelCrossThreadTests, enqueuedLocalWorkSize) { pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ); } TEST_F(KernelCrossThreadTests, maxWorkGroupSize) { pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } TEST_F(KernelCrossThreadTests, dataParameterSimdSize) { pKernelInfo->workloadInfo.simdSizeOffset = 16; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = true; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.dataParameterSimdSize)); EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } TEST_F(KernelCrossThreadTests, GIVENparentEventOffsetWHENinitializeKernelTHENparentEventInitWithInvalid) { pKernelInfo->workloadInfo.parentEventOffset = 16; MockKernel kernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.parentEventOffset)); EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, kernelAddRefCountToProgram) { auto refCount = program->getReference(); MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); auto refCount2 = program->getReference(); EXPECT_EQ(refCount2, refCount + 1); delete kernel; auto refCount3 = program->getReference(); EXPECT_EQ(refCount, refCount3); } TEST_F(KernelCrossThreadTests, kernelSetsTotalSLMSize) { pKernelInfo->workloadInfo.slmStaticSize = 1024; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); EXPECT_EQ(1024u, kernel->slmTotalSize); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) { SPatchAllocateStatelessPrivateSurface allocatePrivate; allocatePrivate.DataParamSize = 8; allocatePrivate.DataParamOffset = 0; allocatePrivate.PerThreadPrivateMemorySize = 1; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); kernel->initialize(); auto privateSurface = kernel->getPrivateSurface(); auto constantBuffer = kernel->getCrossThreadData(); auto privateAddress = (uintptr_t)privateSurface->getGpuAddressToPatch(); auto ptrCurbe = (uint64_t *)constantBuffer; auto privateAddressFromCurbe = (uintptr_t)*ptrCurbe; EXPECT_EQ(privateAddressFromCurbe, privateAddress); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->workloadInfo.preferredWkgMultipleOffset = 8; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); kernel->initialize(); auto *crossThread = kernel->getCrossThreadData(); uint32_t *preferredWkgMultipleOffset = (uint32_t *)ptrOffset(crossThread, 8); EXPECT_EQ(pKernelInfo->getMaxSimdSize(), *preferredWkgMultipleOffset); delete kernel; } TEST_F(KernelCrossThreadTests, patchBlocksSimdSize) { MockKernelWithInternals *kernel = new MockKernelWithInternals(*pDevice); // store offset to child's simd size in kernel info uint32_t crossThreadOffset = 0; //offset of simd size kernel->kernelInfo.childrenKernelsIdOffset.push_back({0, crossThreadOffset}); // add a new block kernel to program auto infoBlock = new KernelInfo(); kernel->executionEnvironmentBlock.CompiledSIMD8 = 0; kernel->executionEnvironmentBlock.CompiledSIMD16 = 1; kernel->executionEnvironmentBlock.CompiledSIMD32 = 0; infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock; kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); // patch block's simd size kernel->mockKernel->patchBlocksSimdSize(); // obtain block's simd size from cross thread data void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(), kernel->kernelInfo.childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); // check of block's simd size has been patched correctly EXPECT_EQ(kernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); delete kernel; } TEST(KernelInfoTest, borderColorOffset) { KernelInfo info; SPatchSamplerStateArray samplerState; samplerState.BorderColorOffset = 3; info.patchInfo.samplerStateArray = nullptr; EXPECT_EQ(0u, info.getBorderColorOffset()); info.patchInfo.samplerStateArray = &samplerState; EXPECT_EQ(3u, info.getBorderColorOffset()); } TEST(KernelInfoTest, getArgNumByName) { KernelInfo info; EXPECT_EQ(-1, info.getArgNumByName("")); KernelArgInfo kai; kai.name = "arg1"; info.kernelArgInfo.push_back(kai); EXPECT_EQ(-1, info.getArgNumByName("")); EXPECT_EQ(-1, info.getArgNumByName("arg2")); EXPECT_EQ(0, info.getArgNumByName("arg1")); kai.name = "arg2"; info.kernelArgInfo.push_back(kai); EXPECT_EQ(0, info.getArgNumByName("arg1")); EXPECT_EQ(1, info.getArgNumByName("arg2")); } TEST(KernelTest, getInstructionHeapSizeForExecutionModelReturnsZeroForNormalKernel) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel()); } TEST(KernelTest, setKernelArgUsesBuiltinDispatchInfoBuilderIfAvailable) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { MockBuiltinDispatchBuilder(BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { receivedArgs.push_back(std::make_tuple(argIndex, argSize, argVal)); err = errToReturn; return valueToReturn; } bool valueToReturn = false; cl_int errToReturn = CL_SUCCESS; mutable std::vector> receivedArgs; }; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); kernel.mockKernel->initialize(); MockBuiltinDispatchBuilder mockBuilder(*device->getExecutionEnvironment()->getBuiltIns()); kernel.kernelInfo.builtinDispatchBuilder = &mockBuilder; mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_SUCCESS; EXPECT_EQ(0u, kernel.mockKernel->getPatchedArgumentsNum()); auto ret = kernel.mockKernel->setArg(1, 3, reinterpret_cast(5)); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(7, 11, reinterpret_cast(13)); EXPECT_EQ(CL_INVALID_ARG_SIZE, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_SUCCESS; ret = kernel.mockKernel->setArg(17, 19, reinterpret_cast(23)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(29, 31, reinterpret_cast(37)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); ASSERT_EQ(4U, mockBuilder.receivedArgs.size()); EXPECT_EQ(1U, std::get<0>(mockBuilder.receivedArgs[0])); EXPECT_EQ(3U, std::get<1>(mockBuilder.receivedArgs[0])); EXPECT_EQ(reinterpret_cast(5), std::get<2>(mockBuilder.receivedArgs[0])); EXPECT_EQ(7U, std::get<0>(mockBuilder.receivedArgs[1])); EXPECT_EQ(11U, std::get<1>(mockBuilder.receivedArgs[1])); EXPECT_EQ(reinterpret_cast(13), std::get<2>(mockBuilder.receivedArgs[1])); EXPECT_EQ(17U, std::get<0>(mockBuilder.receivedArgs[2])); EXPECT_EQ(19U, std::get<1>(mockBuilder.receivedArgs[2])); EXPECT_EQ(reinterpret_cast(23), std::get<2>(mockBuilder.receivedArgs[2])); EXPECT_EQ(29U, std::get<0>(mockBuilder.receivedArgs[3])); EXPECT_EQ(31U, std::get<1>(mockBuilder.receivedArgs[3])); EXPECT_EQ(reinterpret_cast(37), std::get<2>(mockBuilder.receivedArgs[3])); } TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMaxWorkgroupSizeIsSimdSizeDependant) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); HardwareInfo myHwInfo = *platformDevices[0]; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); MockKernelWithInternals kernel(*device); kernel.executionEnvironment.LargestCompiledSIMDSize = 32; size_t maxKernelWkgSize; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(1024u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 16; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(512u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 8; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(256u, maxKernelWkgSize); } TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) { KernelInfo info; info.gpuPointerSize = 4; MockContext context; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment(), &context, false); std::unique_ptr kernel(new MockKernel(&program, info, *device.get())); EXPECT_TRUE(kernel->is32Bit()); } TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { KernelInfo info; info.gpuPointerSize = 8; MockContext context; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment(), &context, false); std::unique_ptr kernel(new MockKernel(&program, info, *device.get())); EXPECT_FALSE(kernel->is32Bit()); } TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.DisableAuxTranslation.set(false); std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto hwInfo = device->getExecutionEnvironment()->getMutableHardwareInfo(); auto &capabilityTable = hwInfo->capabilityTable; auto context = clUniquePtr(new MockContext(device.get())); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo.at(0).typeStr = "char *"; kernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true; capabilityTable.ftrRenderCompressedBuffers = false; kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = true; kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); capabilityTable.ftrRenderCompressedBuffers = true; kernel.mockKernel->initialize(); if (HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves()) { EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } DebugManager.flags.DisableAuxTranslation.set(true); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); HardwareInfo localHwInfo = *platformDevices[0]; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo.at(0).typeStr = "char *"; kernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true; kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; kernel.mockKernel->initialize(); if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves()) { EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } } TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediateIsUsedAsArgHandler) { HardwareInfo localHwInfo = *platformDevices[0]; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo.at(0).typeStr = "pair"; kernel.mockKernel->initialize(); EXPECT_EQ(&Kernel::setArgImmediate, kernel.mockKernel->kernelArgHandlers[0]); } TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(true); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.mockKernel->svmAllocationsRequireCacheFlush = true; MockCommandQueue queue; DebugManagerStateRestore debugRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(true); queue.requiresCacheFlushAfterWalker = true; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); queue.requiresCacheFlushAfterWalker = false; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); } TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(true); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); auto executionEnvironment = device->getExecutionEnvironment(); auto mockKernel = std::make_unique(*device); GraphicsAllocation *privateSurface = device->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); mockKernel->mockKernel->setPrivateSurface(privateSurface, 10); executionEnvironment->incRefInternal(); device.reset(nullptr); mockKernel.reset(nullptr); executionEnvironment->decRefInternal(); } TEST(KernelTest, givenAllArgumentsAreStatefulBuffersWhenInitializingThenAllBufferArgsStatefulIsTrue) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = true; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = true; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo = kernelArgInfo; kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenAllArgumentsAreBuffersButNotAllAreStatefulWhenInitializingThenAllBufferArgsStatefulIsFalse) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = true; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = false; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo = kernelArgInfo; kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenInitializingThenAllBufferArgsStatefulIsTrue) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = false; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = false; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo = kernelArgInfo; kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals mockKernel(*device); SPatchMediaVFEState mediaVFEstate; SPatchMediaVFEState mediaVFEstateSlot1; mediaVFEstateSlot1.PerThreadScratchSpace = 1024u; mediaVFEstate.PerThreadScratchSpace = 512u; mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1; EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr; EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->areStatelessWritesUsed()); } namespace NEO { template class DeviceQueueHwMock : public DeviceQueueHw { using BaseClass = DeviceQueueHw; public: using BaseClass::buildSlbDummyCommands; using BaseClass::getCSPrefetchSize; using BaseClass::getExecutionModelCleanupSectionSize; using BaseClass::getMediaStateClearCmdsSize; using BaseClass::getMinimumSlbSize; using BaseClass::getProfilingEndCmdsSize; using BaseClass::getSlbCS; using BaseClass::getWaCommandsSize; using BaseClass::offsetDsh; DeviceQueueHwMock(Context *context, Device *device, cl_queue_properties &properties) : BaseClass(context, device, properties) { auto slb = this->getSlbBuffer(); LinearStream *slbCS = getSlbCS(); slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset }; }; } // namespace NEO HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue) { std::unique_ptr> mockDeviceQueueHw(new DeviceQueueHwMock(pContext, device, deviceQueueProperties::minimumProperties[0])); auto slb = mockDeviceQueueHw->getSlbBuffer(); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); auto slbCopy = malloc(slb->getUnderlyingBufferSize()); memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize()); memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); // slbEndOffset < commandsSize * 128 // always fill only 1 enqueue (after offset) auto offset = static_cast(commandsSize) * 50; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset; mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset), slbCopy, commandsSize)); // change 1 enqueue EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize), slbCopy, offset)); // dont touch memory after (offset + 1 enqueue) // slbEndOffset == commandsSize * 128 // dont fill commands memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize()); offset = static_cast(commandsSize) * 128; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast(commandsSize); mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues free(slbCopy); } using KernelMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(KernelMultiRootDeviceTest, privateSurfaceHasCorrectRootDeviceIndex) { auto kernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; kernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; MockProgram program(*device->getExecutionEnvironment(), context.get(), false); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *device.get())); kernel->initialize(); auto privateSurface = kernel->getPrivateSurface(); ASSERT_NE(nullptr, privateSurface); EXPECT_EQ(expectedRootDeviceIndex, privateSurface->getRootDeviceIndex()); }