/* * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture { public: CloneKernelTest() { } protected: void SetUp() override { MultiRootDeviceWithSubDevicesFixture::SetUp(); pProgram = std::make_unique(context.get(), false, context->getDevices()); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.resize(1); pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.resize(1); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; KernelInfoContainer kernelInfos; kernelInfos.resize(3); kernelInfos[0] = kernelInfos[1] = kernelInfos[2] = pKernelInfo.get(); KernelVectorType sourceKernels; sourceKernels.resize(3); KernelVectorType clonedKernels; clonedKernels.resize(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pSourceKernel[rootDeviceIndex]->initialize()); char pSourceCrossThreadData[64] = {}; sourceKernels[rootDeviceIndex] = pSourceKernel[rootDeviceIndex]; pClonedKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pClonedKernel[rootDeviceIndex]->initialize()); char pClonedCrossThreadData[64] = {}; clonedKernels[rootDeviceIndex] = pClonedKernel[rootDeviceIndex]; pSourceKernel[rootDeviceIndex]->setCrossThreadData(pSourceCrossThreadData, sizeof(pSourceCrossThreadData)); pClonedKernel[rootDeviceIndex]->setCrossThreadData(pClonedCrossThreadData, sizeof(pClonedCrossThreadData)); } pSourceMultiDeviceKernel = std::make_unique(sourceKernels, kernelInfos); pClonedMultiDeviceKernel = std::make_unique(clonedKernels, kernelInfos); } void TearDown() override { MultiRootDeviceWithSubDevicesFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr pProgram; std::unique_ptr pSourceMultiDeviceKernel; std::unique_ptr pClonedMultiDeviceKernel; MockKernel *pSourceKernel[3] = {nullptr}; MockKernel *pClonedKernel[3] = {nullptr}; std::unique_ptr pKernelInfo; char surfaceStateHeap[128]; }; TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_FALSE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } } TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) { const size_t slmSize = 0x800; pKernelInfo->addArgLocal(0, 0, 1); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal); } retVal = pSourceMultiDeviceKernel->setArg(0, slmSize, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SLM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize); } } TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); auto buffer = clUniquePtr(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal)); cl_mem memObj = buffer.get(); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer); } retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::BUFFER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } } TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgPipe(0, 0x20, sizeof(void *)); auto pipe = clUniquePtr(Pipe::create(context.get(), 0, 1, 20, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem memObj = pipe.get(); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgPipe); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgPipe); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::PIPE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgImage(0, 0x20); auto &metaPayload = pKernelInfo->argAsImg(0).metadataPayload; metaPayload.imgWidth = 0x4; metaPayload.imgHeight = 0x8; metaPayload.imgDepth = 0xc; pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(0, 0); auto image = std::unique_ptr(Image2dHelper<>::create(context.get())); ASSERT_NE(nullptr, image); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageDepth = image->getImageDesc().image_depth; cl_mem memObj = image.get(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::IMAGE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as(); EXPECT_EQ(clonedArg.bindful, *crossThreadData); auto pImgWidth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgWidth); EXPECT_EQ(imageWidth, *pImgWidth); auto pImgHeight = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgHeight); EXPECT_EQ(imageHeight, *pImgHeight); auto pImgDepth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgDepth); EXPECT_EQ(imageDepth, *pImgDepth); } TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgAccelerator(0, undefined, 0x4, 0x14, 0x1c, 0xc); cl_motion_estimation_desc_intel desc = { CL_ME_MB_TYPE_4x4_INTEL, CL_ME_SUBPIXEL_MODE_QPEL_INTEL, CL_ME_SAD_ADJUST_MODE_HAAR_INTEL, CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL}; cl_accelerator_intel accelerator = VmeAccelerator::create( context.get(), CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, accelerator); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgAccelerator); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgAccelerator); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::ACCELERATOR_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); ASSERT_TRUE(pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).getExtendedTypeInfo().hasVmeExtendedDescriptor); const auto clonedArgDescVme = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[0].get()); uint32_t *pMbBlockType = ptrOffset(crossThreadData, clonedArgDescVme->mbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); uint32_t *pSubpixelMode = ptrOffset(crossThreadData, clonedArgDescVme->subpixelMode); EXPECT_EQ(desc.subpixel_mode, *pSubpixelMode); uint32_t *pSadAdjustMode = ptrOffset(crossThreadData, clonedArgDescVme->sadAdjustMode); EXPECT_EQ(desc.sad_adjust_mode, *pSadAdjustMode); uint32_t *pSearchPathType = ptrOffset(crossThreadData, clonedArgDescVme->searchPathType); EXPECT_EQ(desc.search_path_type, *pSearchPathType); retVal = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) { auto sampler = clUniquePtr(new MockSampler(context.get(), true, (cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT, (cl_filter_mode)CL_FILTER_NEAREST)); pKernelInfo->addArgSampler(0, 0x20, 0x8, 0x10, 0x4); pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(0, 0); cl_sampler samplerObj = sampler.get(); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_sampler), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SAMPLER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); const auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as(); EXPECT_EQ(SAMPLER_OBJECT_ID_SHIFT + clonedArg.bindful, *crossThreadData); auto pSnapWa = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerSnapWa); EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa); auto pAddressingMode = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerAddressingMode); EXPECT_EQ(GetAddrModeEnum(sampler->addressingMode), *pAddressingMode); auto pNormalizedCoords = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerNormalizedCoords); EXPECT_EQ(GetNormCoordsEnum(sampler->normalizedCoordinates), *pNormalizedCoords); EXPECT_EQ(3, sampler->getRefInternalCount()); } HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgDevQueue(0, 0x20, sizeof(void *)); REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device1); cl_queue_properties queueProps[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0, 0, 0}; MockDeviceQueueHw mockDevQueue(context.get(), device1, queueProps[0]); auto clDeviceQueue = static_cast(&mockDevQueue); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_command_queue), &clDeviceQueue); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::DEVICE_QUEUE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); } TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { char *svmPtr = new char[256]; pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { retVal = pSourceKernel[rootDeviceIndex]->setArgSvm(0, 256, svmPtr, nullptr, 0u); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SVM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(svmPtr, *pKernelArg); } delete[] svmPtr; } TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); char memory[100] = {}; MultiGraphicsAllocation multiGraphicsAllocation(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto svmAlloc = new MockGraphicsAllocation(rootDeviceIndex, memory, 100); multiGraphicsAllocation.addAllocation(svmAlloc); } retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(memory, *pKernelArg); delete multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); } } TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgImmediate(0, sizeof(void *), 0x20); using TypeParam = unsigned long; auto value = (TypeParam)0xAA55AA55UL; retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(TypeParam), &value); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().elements[0].offset); EXPECT_EQ(value, *pKernelArg); } } TEST_F(CloneKernelTest, GivenExecInfoWhenCloningKernelThenSvmAllocationIsCorrect) { REQUIRE_SVM_OR_SKIP(device1); void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, ptrSVM); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); auto &pSvmAllocs = svmData->gpuAllocations; pSourceMultiDeviceKernel->setSvmKernelExecInfo(pSvmAllocs); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size()); EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(CloneKernelTest, GivenUnifiedMemoryExecInfoWhenCloningKernelThenUnifiedMemoryAllocationIsCorrect) { REQUIRE_SVM_OR_SKIP(device1); void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, ptrSVM); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); auto &pSvmAllocs = svmData->gpuAllocations; pSourceMultiDeviceKernel->setUnifiedMemoryExecInfo(pSvmAllocs); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->isBuiltIn = true; } pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_TRUE(pClonedKernel[rootDeviceIndex]->isBuiltIn); } }