/* * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/built_ins/built_ins.h" #include "runtime/built_ins/builtins_dispatch_builder.h" #include "reg_configs_common.h" #include "runtime/helpers/dispatch_info.h" #include "runtime/memory_manager/memory_constants.h" #include "unit_tests/command_queue/enqueue_read_buffer_rect_fixture.h" #include "unit_tests/gen_common/gen_commands_common_validation.h" #include "test.h" using namespace OCLRT; HWTEST_F(EnqueueReadBufferRectTest, null_src_mem_object) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, nullptr, CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueReadBufferRectTest, nullHostPtr) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueReadBufferRectTest, returnSuccess) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, alignsToCSR_Blocking) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); enqueueReadBufferRect2D(CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, alignsToCSR_NonBlocking) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueReadBufferRect2D(CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_GPGPUWalker) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueReadBufferRect2D(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = (1ull << simd) - 1; // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_bumpsTaskLevel) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueReadBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_addsCommands) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueReadBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_addsIndirectData) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueReadBufferRect2D(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getContext(), pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinDispatchInfoBuilder::BuiltinOpParams dc; dc.srcMemObj = buffer.get(); dc.dstPtr = hostPtr; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_LoadRegisterImmediateL3CNTLREG) { enqueueReadBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueReadBufferRect2D(); validateStateBaseAddress(this->pDevice->getCommandStreamReceiver().getMemoryManager()->getInternalHeapBaseAddress(), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_MediaInterfaceDescriptorLoad) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_InterfaceDescriptorData) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); // Extract the IDD auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)(DSH + iddStart); // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_PipelineSelect) { enqueueReadBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_MediaVFEState) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; enqueueReadBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; // Verify we have a valid length EXPECT_EQ(pDevice->getHardwareInfo().pSysInfo->ThreadCount, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, blockingRequiresPipeControlAfterWalkerWithDCFlushSet) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto blocking = CL_TRUE; enqueueReadBufferRect2D(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); // All state should be programmed after walker auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (::renderCoreFamily != IGFX_GEN8_CORE) { // SKL+: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_FALSE(cmd->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); ; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, slicePitch, 0, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, 0, rowPitch, 0, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {50, 50, 0}; size_t hostOrigin[] = {20, 20, 0}; size_t region[] = {50, 50, 1}; size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]); auto hostStorage = ptrOffset(ptr, hostOffset); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostStorage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {1, 1, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( nonZeroCopyBuffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; constexpr size_t rowPitch = 100; constexpr size_t slicePitch = 100 * 100; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); }