/* * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "core/memory_manager/memory_constants.h" #include "runtime/built_ins/built_ins.h" #include "runtime/built_ins/builtins_dispatch_builder.h" #include "runtime/helpers/dispatch_info.h" #include "test.h" #include "unit_tests/command_queue/enqueue_copy_buffer_rect_fixture.h" #include "unit_tests/gen_common/gen_commands_common_validation.h" #include "unit_tests/mocks/mock_buffer.h" #include "reg_configs_common.h" using namespace NEO; const size_t EnqueueCopyBufferRectTest::BufferRect::sizeInBytes = 100 * 100 * 100 * sizeof(cl_char); HWTEST_F(EnqueueCopyBufferRectTest, GivenNullSrcMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, nullptr, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenNullDstMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, nullptr, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenValidParametersWhenCopyingBufferRectThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyBufferRect2D(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect2D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = (1ull << simd) - 1; // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyBufferRect2D(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = pCmdQ->getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getContext(), pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessKernelIsUsed) { auto &builder = pCmdQ->getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless, pCmdQ->getContext(), pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect3D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_LT(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = (1ull << simd) - 1; // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect3D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenIndirectDataIsAdded) { auto usedIndirectHeapBefore = pDSH->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedIndirectHeapBefore, pDSH->getUsed()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect3D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When3DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect3D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect3D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect3D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } struct EnqueueCopyBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device.reset(MockDevice::createWithNewExecutionEnvironment(*platformDevices)); context.reset(new MockContext(device.get())); dstBuffer = std::unique_ptr(BufferHelper::create(context.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; std::unique_ptr dstBuffer; const size_t rowPitch = 100; const size_t slicePitch = 100 * 100; std::array srcOrigin = {{0, 0, 0}}; std::array dstOrigin = {{0, 0, 0}}; std::array region = {{50, 50, 1}}; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; protected: template cl_int enqueueCopyBufferRectHw(CommandQueueHw *cmdQ) { auto retVal = CL_SUCCESS; retVal = clEnqueueCopyBufferRect( cmdQ, &srcBuffer, dstBuffer.get(), srcOrigin.data(), dstOrigin.data(), region.data(), rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); return retVal; } }; using EnqueueCopyBufferRectStateless = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateless, GivenValidParametersWhenCopyingBufferRectStatelessThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateless(context.get(), device.get())); srcBuffer.size = static_cast(bigSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferRectStateful = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateful, GivenValidParametersWhenCopyingBufferRectStatefulThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateful(context.get(), device.get())); srcBuffer.size = static_cast(smallSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); }