compute-runtime/unit_tests/command_queue/enqueue_copy_buffer_rect_te...

469 lines
18 KiB
C++

/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/memory_manager/memory_constants.h"
#include "runtime/built_ins/built_ins.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/helpers/dispatch_info.h"
#include "test.h"
#include "unit_tests/command_queue/enqueue_copy_buffer_rect_fixture.h"
#include "unit_tests/gen_common/gen_commands_common_validation.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "reg_configs_common.h"
using namespace NEO;
const size_t EnqueueCopyBufferRectTest::BufferRect::sizeInBytes = 100 * 100 * 100 * sizeof(cl_char);
HWTEST_F(EnqueueCopyBufferRectTest, GivenNullSrcMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) {
auto retVal = CL_SUCCESS;
size_t srcOrigin[] = {0, 0, 0};
size_t dstOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 0};
retVal = clEnqueueCopyBufferRect(
pCmdQ,
nullptr,
dstBuffer,
srcOrigin,
dstOrigin,
region,
10,
0,
10,
0,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
}
HWTEST_F(EnqueueCopyBufferRectTest, GivenNullDstMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) {
auto retVal = CL_SUCCESS;
size_t srcOrigin[] = {0, 0, 0};
size_t dstOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 0};
retVal = clEnqueueCopyBufferRect(
pCmdQ,
srcBuffer,
nullptr,
srcOrigin,
dstOrigin,
region,
10,
0,
10,
0,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
}
HWTEST_F(EnqueueCopyBufferRectTest, GivenValidParametersWhenCopyingBufferRectThenSuccessIsReturned) {
auto retVal = CL_SUCCESS;
size_t srcOrigin[] = {0, 0, 0};
size_t dstOrigin[] = {0, 0, 0};
size_t region[] = {1, 1, 1};
retVal = clEnqueueCopyBufferRect(
pCmdQ,
srcBuffer,
dstBuffer,
srcOrigin,
dstOrigin,
region,
10,
0,
10,
0,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskCountIsAlignedWithCsr) {
//this test case assumes IOQ
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.taskCount = pCmdQ->taskCount + 100;
csr.taskLevel = pCmdQ->taskLevel + 50;
enqueueCopyBufferRect2D<FamilyType>();
EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount);
EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenGpgpuWalkerIsCorrect) {
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
enqueueCopyBufferRect2D<FamilyType>();
auto *cmd = (GPGPU_WALKER *)cmdWalker;
ASSERT_NE(nullptr, cmd);
// Verify GPGPU_WALKER parameters
EXPECT_NE(0u, cmd->getThreadGroupIdXDimension());
EXPECT_NE(0u, cmd->getThreadGroupIdYDimension());
EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension());
EXPECT_NE(0u, cmd->getRightExecutionMask());
EXPECT_NE(0u, cmd->getBottomExecutionMask());
EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize());
EXPECT_NE(0u, cmd->getIndirectDataLength());
EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
EXPECT_FALSE(cmd->getIndirectParameterEnable());
// Compute the SIMD lane mask
size_t simd =
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8;
uint64_t simdMask = (1ull << simd) - 1;
// Mask off lanes based on the execution masks
auto laneMaskRight = cmd->getRightExecutionMask() & simdMask;
auto lanesPerThreadX = 0;
while (laneMaskRight) {
lanesPerThreadX += laneMaskRight & 1;
laneMaskRight >>= 1;
}
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskLevelIsIncremented) {
auto taskLevelBefore = pCmdQ->taskLevel;
enqueueCopyBufferRect2D<FamilyType>();
EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenCommandsAreAdded) {
auto usedCmdBufferBefore = pCS->getUsed();
enqueueCopyBufferRect2D<FamilyType>();
EXPECT_NE(usedCmdBufferBefore, pCS->getUsed());
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenIndirectDataGetsAdded) {
auto dshBefore = pDSH->getUsed();
auto iohBefore = pIOH->getUsed();
auto sshBefore = pSSH->getUsed();
enqueueCopyBufferRect2D<FamilyType>();
// Extract the kernel used
MultiDispatchInfo multiDispatchInfo;
auto &builder = pCmdQ->getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
pCmdQ->getContext(), pCmdQ->getDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer;
dc.dstMemObj = dstBuffer;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {50, 50, 1};
dc.srcRowPitch = rowPitch;
dc.srcSlicePitch = slicePitch;
dc.dstRowPitch = rowPitch;
dc.dstSlicePitch = slicePitch;
builder.buildDispatchInfos(multiDispatchInfo, dc);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_NE(dshBefore, pDSH->getUsed());
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessKernelIsUsed) {
auto &builder = pCmdQ->getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless,
pCmdQ->getContext(),
pCmdQ->getDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer;
dc.dstMemObj = dstBuffer;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {50, 50, 1};
dc.srcRowPitch = rowPitch;
dc.srcSlicePitch = slicePitch;
dc.dstRowPitch = rowPitch;
dc.dstSlicePitch = slicePitch;
MultiDispatchInfo multiDispatchInfo;
builder.buildDispatchInfos(multiDispatchInfo, dc);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
enqueueCopyBufferRect2D<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) {
enqueueCopyBufferRect2D<FamilyType>();
auto &ultCsr = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
validateStateBaseAddress<FamilyType>(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex),
pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaInterfaceDescriptorLoadIsCorrect) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueCopyBufferRect2D<FamilyType>();
auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad;
ASSERT_NE(nullptr, cmd);
// Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA));
// Validate the start address
size_t alignmentStartAddress = 64 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress);
// Validate the length
EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength());
size_t alignmentTotalLength = 32 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength);
// Generically validate this command
FamilyType::PARSE::template validateCommand<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(cmdList.begin(), itorMediaInterfaceDescriptorLoad);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenInterfaceDescriptorDataIsCorrect) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueCopyBufferRect2D<FamilyType>();
auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress;
auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData;
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());
EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength());
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenNumberOfPipelineSelectsIsOne) {
enqueueCopyBufferRect2D<FamilyType>();
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaVfeStateIsSetCorrectly) {
enqueueCopyBufferRect2D<FamilyType>();
validateMediaVFEState<FamilyType>(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenGpgpuWalkerIsCorrect) {
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
enqueueCopyBufferRect3D<FamilyType>();
auto *cmd = (GPGPU_WALKER *)cmdWalker;
ASSERT_NE(nullptr, cmd);
// Verify GPGPU_WALKER parameters
EXPECT_NE(0u, cmd->getThreadGroupIdXDimension());
EXPECT_NE(0u, cmd->getThreadGroupIdYDimension());
EXPECT_LT(1u, cmd->getThreadGroupIdZDimension());
EXPECT_NE(0u, cmd->getRightExecutionMask());
EXPECT_NE(0u, cmd->getBottomExecutionMask());
EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize());
EXPECT_NE(0u, cmd->getIndirectDataLength());
EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
EXPECT_FALSE(cmd->getIndirectParameterEnable());
// Compute the SIMD lane mask
size_t simd =
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8;
uint64_t simdMask = (1ull << simd) - 1;
// Mask off lanes based on the execution masks
auto laneMaskRight = cmd->getRightExecutionMask() & simdMask;
auto lanesPerThreadX = 0;
while (laneMaskRight) {
lanesPerThreadX += laneMaskRight & 1;
laneMaskRight >>= 1;
}
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenTaskLevelIsIncremented) {
auto taskLevelBefore = pCmdQ->taskLevel;
enqueueCopyBufferRect3D<FamilyType>();
EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenCommandsAreAdded) {
auto usedCmdBufferBefore = pCS->getUsed();
enqueueCopyBufferRect3D<FamilyType>();
EXPECT_NE(usedCmdBufferBefore, pCS->getUsed());
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenIndirectDataIsAdded) {
auto usedIndirectHeapBefore = pDSH->getUsed();
enqueueCopyBufferRect3D<FamilyType>();
EXPECT_NE(usedIndirectHeapBefore, pDSH->getUsed());
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenL3ProgrammingIsCorrect) {
enqueueCopyBufferRect3D<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When3DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) {
enqueueCopyBufferRect3D<FamilyType>();
auto &ultCsr = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
validateStateBaseAddress<FamilyType>(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex),
pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaInterfaceDescriptorLoadIsCorrect) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueCopyBufferRect3D<FamilyType>();
auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad;
ASSERT_NE(nullptr, cmd);
// Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA));
// Validate the start address
size_t alignmentStartAddress = 64 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress);
// Validate the length
EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength());
size_t alignmentTotalLength = 32 * sizeof(uint8_t);
EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength);
// Generically validate this command
FamilyType::PARSE::template validateCommand<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(cmdList.begin(), itorMediaInterfaceDescriptorLoad);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenInterfaceDescriptorDataIsCorrect) {
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
enqueueCopyBufferRect3D<FamilyType>();
auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress;
auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData;
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());
EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength());
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenNumberOfPipelineSelectsIsOne) {
enqueueCopyBufferRect3D<FamilyType>();
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaVfeStateIsSetCorrectly) {
enqueueCopyBufferRect3D<FamilyType>();
validateMediaVFEState<FamilyType>(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState);
}
struct EnqueueCopyBufferRectHw : public ::testing::Test {
void SetUp() override {
if (is32bit) {
GTEST_SKIP();
}
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(*platformDevices));
context.reset(new MockContext(device.get()));
dstBuffer = std::unique_ptr<Buffer>(BufferHelper<EnqueueCopyBufferRectTest::BufferRect>::create(context.get()));
}
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
MockBuffer srcBuffer;
std::unique_ptr<Buffer> dstBuffer;
const size_t rowPitch = 100;
const size_t slicePitch = 100 * 100;
std::array<size_t, 3> srcOrigin = {{0, 0, 0}};
std::array<size_t, 3> dstOrigin = {{0, 0, 0}};
std::array<size_t, 3> region = {{50, 50, 1}};
uint64_t bigSize = 4ull * MemoryConstants::gigaByte;
uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1;
protected:
template <typename FamilyType>
cl_int enqueueCopyBufferRectHw(CommandQueueHw<FamilyType> *cmdQ) {
auto retVal = CL_SUCCESS;
retVal = clEnqueueCopyBufferRect(
cmdQ,
&srcBuffer,
dstBuffer.get(),
srcOrigin.data(),
dstOrigin.data(),
region.data(),
rowPitch,
slicePitch,
rowPitch,
slicePitch,
0,
nullptr,
nullptr);
return retVal;
}
};
using EnqueueCopyBufferRectStateless = EnqueueCopyBufferRectHw;
HWTEST_F(EnqueueCopyBufferRectStateless, GivenValidParametersWhenCopyingBufferRectStatelessThenSuccessIsReturned) {
std::unique_ptr<CommandQueueHw<FamilyType>> cmdQ(new CommandQueueStateless<FamilyType>(context.get(), device.get()));
srcBuffer.size = static_cast<size_t>(bigSize);
auto retVal = enqueueCopyBufferRectHw(cmdQ.get());
EXPECT_EQ(CL_SUCCESS, retVal);
}
using EnqueueCopyBufferRectStateful = EnqueueCopyBufferRectHw;
HWTEST_F(EnqueueCopyBufferRectStateful, GivenValidParametersWhenCopyingBufferRectStatefulThenSuccessIsReturned) {
std::unique_ptr<CommandQueueHw<FamilyType>> cmdQ(new CommandQueueStateful<FamilyType>(context.get(), device.get()));
srcBuffer.size = static_cast<size_t>(smallSize);
auto retVal = enqueueCopyBufferRectHw(cmdQ.get());
EXPECT_EQ(CL_SUCCESS, retVal);
}