compute-runtime/unit_tests/command_queue/enqueue_read_buffer_rect_te...

560 lines
18 KiB
C++

/*
* Copyright (C) 2017-2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/built_ins/built_ins.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/event/event.h"
#include "reg_configs_common.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/memory_manager/memory_constants.h"
#include "unit_tests/command_queue/enqueue_read_buffer_rect_fixture.h"
#include "unit_tests/gen_common/gen_commands_common_validation.h"
#include "test.h"
using namespace OCLRT;
HWTEST_F(EnqueueReadBufferRectTest, null_src_mem_object) {
auto retVal = CL_SUCCESS;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 0};
retVal = clEnqueueReadBufferRect(
pCmdQ,
nullptr,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
10,
0,
10,
0,
hostPtr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
}
HWTEST_F(EnqueueReadBufferRectTest, nullHostPtr) {
auto retVal = CL_SUCCESS;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 0};
retVal = clEnqueueReadBufferRect(
pCmdQ,
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
10,
0,
10,
0,
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
HWTEST_F(EnqueueReadBufferRectTest, returnSuccess) {
auto retVal = CL_SUCCESS;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 1};
retVal = clEnqueueReadBufferRect(
pCmdQ,
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
10,
0,
10,
0,
hostPtr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueReadBufferRectTest, alignsToCSR_Blocking) {
//this test case assumes IOQ
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.taskCount = pCmdQ->taskCount + 100;
csr.taskLevel = pCmdQ->taskLevel + 50;
auto oldCsrTaskLevel = csr.peekTaskLevel();
enqueueReadBufferRect2D<FamilyType>(CL_TRUE);
EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount);
EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel);
}
HWTEST_F(EnqueueReadBufferRectTest, alignsToCSR_NonBlocking) {
//this test case assumes IOQ
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.taskCount = pCmdQ->taskCount + 100;
csr.taskLevel = pCmdQ->taskLevel + 50;
enqueueReadBufferRect2D<FamilyType>(CL_FALSE);
EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount);
EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_GPGPUWalker) {
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
enqueueReadBufferRect2D<FamilyType>();
ASSERT_NE(cmdList.end(), itorWalker);
auto *cmd = (GPGPU_WALKER *)*itorWalker;
// Verify GPGPU_WALKER parameters
EXPECT_NE(0u, cmd->getThreadGroupIdXDimension());
EXPECT_NE(0u, cmd->getThreadGroupIdYDimension());
EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension());
EXPECT_NE(0u, cmd->getRightExecutionMask());
EXPECT_NE(0u, cmd->getBottomExecutionMask());
EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize());
EXPECT_NE(0u, cmd->getIndirectDataLength());
EXPECT_FALSE(cmd->getIndirectParameterEnable());
// Compute the SIMD lane mask
size_t simd =
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8;
uint64_t simdMask = (1ull << simd) - 1;
// Mask off lanes based on the execution masks
auto laneMaskRight = cmd->getRightExecutionMask() & simdMask;
auto lanesPerThreadX = 0;
while (laneMaskRight) {
lanesPerThreadX += laneMaskRight & 1;
laneMaskRight >>= 1;
}
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_bumpsTaskLevel) {
auto taskLevelBefore = pCmdQ->taskLevel;
enqueueReadBufferRect2D<FamilyType>();
EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore);
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_addsCommands) {
auto usedCmdBufferBefore = pCS->getUsed();
enqueueReadBufferRect2D<FamilyType>();
EXPECT_NE(usedCmdBufferBefore, pCS->getUsed());
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_addsIndirectData) {
auto dshBefore = pDSH->getUsed();
auto iohBefore = pIOH->getUsed();
auto sshBefore = pSSH->getUsed();
enqueueReadBufferRect2D<FamilyType>();
// Extract the kernel used
MultiDispatchInfo multiDispatchInfo;
auto &builder = pCmdQ->getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
pCmdQ->getContext(), pCmdQ->getDevice());
ASSERT_NE(nullptr, &builder);
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
dc.srcMemObj = buffer.get();
dc.dstPtr = hostPtr;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {50, 50, 1};
dc.srcRowPitch = rowPitch;
dc.srcSlicePitch = slicePitch;
dc.dstRowPitch = rowPitch;
dc.dstSlicePitch = slicePitch;
builder.buildDispatchInfos(multiDispatchInfo, dc);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_NE(dshBefore, pDSH->getUsed());
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_LoadRegisterImmediateL3CNTLREG) {
enqueueReadBufferRect2D<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) {
enqueueReadBufferRect2D<FamilyType>();
validateStateBaseAddress<FamilyType>(this->pCmdQ->getCommandStreamReceiver().getMemoryManager()->getInternalHeapBaseAddress(),
pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_MediaInterfaceDescriptorLoad) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueReadBufferRect2D<FamilyType>();
// All state should be programmed before walker
auto itorCmd = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(itorPipelineSelect, itorWalker);
ASSERT_NE(itorWalker, itorCmd);
auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd;
// Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA));
// Validate the start address
size_t alignmentStartAddress = 64 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress);
// Validate the length
EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength());
size_t alignmentTotalLength = 32 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength);
// Generically validate this command
FamilyType::PARSE::template validateCommand<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(cmdList.begin(), itorCmd);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_InterfaceDescriptorData) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueReadBufferRect2D<FamilyType>();
// Extract the MIDL command
auto itorCmd = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(itorPipelineSelect, itorWalker);
ASSERT_NE(itorWalker, itorCmd);
auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd;
// Extract the SBA command
itorCmd = find<STATE_BASE_ADDRESS *>(cmdList.begin(), itorWalker);
ASSERT_NE(itorWalker, itorCmd);
auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd;
// Extrach the DSH
auto DSH = cmdSBA->getDynamicStateBaseAddress();
ASSERT_NE(0u, DSH);
// IDD should be located within DSH
auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress();
auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength();
ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize);
// Extract the IDD
auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)(DSH + iddStart);
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());
EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength());
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_PipelineSelect) {
enqueueReadBufferRect2D<FamilyType>();
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, 2D_MediaVFEState) {
enqueueReadBufferRect2D<FamilyType>();
validateMediaVFEState<FamilyType>(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, blockingRequiresPipeControlAfterWalkerWithDCFlushSet) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
auto blocking = CL_TRUE;
enqueueReadBufferRect2D<FamilyType>(blocking);
auto itorWalker = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
// All state should be programmed after walker
auto itorCmd = find<PIPE_CONTROL *>(itorWalker, cmdList.end());
auto *cmd = (PIPE_CONTROL *)*itorCmd;
EXPECT_NE(cmdList.end(), itorCmd);
if (::renderCoreFamily != IGFX_GEN8_CORE) {
// SKL+: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag
EXPECT_TRUE(cmd->getDcFlushEnable());
// Move to next PPC
auto itorCmdP = ++((GenCmdList::iterator)itorCmd);
EXPECT_NE(cmdList.end(), itorCmdP);
auto itorCmd2 = find<PIPE_CONTROL *>(itorCmdP, cmdList.end());
cmd = (PIPE_CONTROL *)*itorCmd2;
EXPECT_FALSE(cmd->getDcFlushEnable());
} else {
// BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag
EXPECT_TRUE(cmd->getDcFlushEnable());
}
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
void *ptr = buffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
;
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdQ->taskLevel);
EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType());
pEvent->release();
}
HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
void *ptr = buffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType());
pEvent->release();
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
0,
slicePitch,
0,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
0,
rowPitch,
0,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {50, 50, 0};
size_t hostOrigin[] = {20, 20, 0};
size_t region[] = {50, 50, 1};
size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]);
auto hostStorage = ptrOffset(ptr, hostOffset);
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
hostStorage,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {1, 1, 0};
size_t region[] = {1, 1, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 1u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 1};
retVal = pCmdQ->enqueueReadBufferRect(
nonZeroCopyBuffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 1u);
}
using NegativeFailAllocationTest = Test<NegativeFailAllocationCommandEnqueueBaseFixture>;
HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
cl_int retVal = CL_SUCCESS;
constexpr size_t rowPitch = 100;
constexpr size_t slicePitch = 100 * 100;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer.get(),
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
}