compute-runtime/unit_tests/command_queue/command_queue_hw_tests.cpp

1257 lines
48 KiB
C++

/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/unit_tests/helpers/debug_manager_state_restore.h"
#include "core/unit_tests/utilities/base_object_utils.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/helpers/dispatch_info_builder.h"
#include "test.h"
#include "unit_tests/command_queue/command_queue_fixture.h"
#include "unit_tests/fixtures/buffer_fixture.h"
#include "unit_tests/fixtures/context_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/image_fixture.h"
#include "unit_tests/helpers/unit_test_helper.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_builtins.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_csr.h"
#include "unit_tests/mocks/mock_event.h"
#include "unit_tests/mocks/mock_kernel.h"
using namespace NEO;
struct CommandQueueHwTest
: public DeviceFixture,
public ContextFixture,
public CommandQueueHwFixture,
::testing::Test {
using ContextFixture::SetUp;
void SetUp() override {
DeviceFixture::SetUp();
cl_device_id device = pDevice;
ContextFixture::SetUp(1, &device);
CommandQueueHwFixture::SetUp(pDevice, 0);
}
void TearDown() override {
CommandQueueHwFixture::TearDown();
ContextFixture::TearDown();
DeviceFixture::TearDown();
}
cl_command_queue_properties properties;
const HardwareInfo *pHwInfo = nullptr;
};
struct OOQueueHwTest : public DeviceFixture,
public ContextFixture,
public OOQueueFixture,
::testing::Test {
using ContextFixture::SetUp;
OOQueueHwTest() {
}
void SetUp() override {
DeviceFixture::SetUp();
cl_device_id device = pDevice;
ContextFixture::SetUp(1, &device);
OOQueueFixture::SetUp(pDevice, 0);
}
void SetUp(Device *pDevice, cl_command_queue_properties properties) override {
}
void TearDown() override {
OOQueueFixture::TearDown();
ContextFixture::TearDown();
DeviceFixture::TearDown();
}
};
HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEventIsCreated) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockBuffer buffer;
pHwQ->virtualEvent = nullptr;
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
&buffer,
size, offset, false,
eventBuilder);
ASSERT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->decRefInternal();
pHwQ->virtualEvent = nullptr;
}
HWTEST_F(CommandQueueHwTest, givenBlockedMapBufferCallWhenMemObjectIsPassedToCommandThenItsRefCountIsBeingIncreased) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockBuffer buffer;
pHwQ->virtualEvent = nullptr;
auto currentRefCount = buffer.getRefInternalCount();
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
&buffer,
size, offset, false,
eventBuilder);
EXPECT_EQ(currentRefCount + 1, buffer.getRefInternalCount());
ASSERT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->decRefInternal();
pHwQ->virtualEvent = nullptr;
EXPECT_EQ(currentRefCount, buffer.getRefInternalCount());
}
HWTEST_F(CommandQueueHwTest, givenNoReturnEventWhenCallingEnqueueBlockedMapUnmapOperationThenVirtualEventIncrementsCommandQueueInternalRefCount) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockBuffer buffer;
pHwQ->virtualEvent = nullptr;
auto initialRefCountInternal = pHwQ->getRefInternalCount();
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
&buffer,
size, offset, false,
eventBuilder);
ASSERT_NE(nullptr, pHwQ->virtualEvent);
auto refCountInternal = pHwQ->getRefInternalCount();
EXPECT_EQ(initialRefCountInternal + 1, refCountInternal);
pHwQ->virtualEvent->decRefInternal();
pHwQ->virtualEvent = nullptr;
}
HWTEST_F(CommandQueueHwTest, WhenAddMapUnmapToWaitlistEventsThenDependenciesAreNotAddedIntoChild) {
auto buffer = new MockBuffer;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
auto returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0);
auto event = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0);
const cl_event eventWaitList = event;
pHwQ->virtualEvent = nullptr;
MockEventBuilder eventBuilder(returnEvent);
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(&eventWaitList,
1,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_EQ(returnEvent, pHwQ->virtualEvent);
ASSERT_EQ(nullptr, event->peekChildEvents());
// Release API refcount (i.e. from workload's perspective)
returnEvent->release();
event->decRefInternal();
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedThenTaskCountIsNotBeingWaited) {
auto buffer = new MockBuffer;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), pHwQ->latestTaskCountWaited);
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedOnNonZeroCopyBufferThenTaskCountIsBeingWaited) {
auto buffer = new MockBuffer;
buffer->isZeroCopy = false;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
EXPECT_EQ(1u, pHwQ->latestTaskCountWaited);
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenEventIsRetained) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0);
auto buffer = new MockBuffer;
pHwQ->virtualEvent = nullptr;
MockEventBuilder eventBuilder(returnEvent);
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
eventBuilder.finalizeAndRelease();
EXPECT_EQ(returnEvent, pHwQ->virtualEvent);
EXPECT_NE(nullptr, returnEvent->peekCommand());
// CommandQueue has retained this event, release it
returnEvent->release();
pHwQ->virtualEvent = nullptr;
delete returnEvent;
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenChildIsUnaffected) {
auto buffer = new MockBuffer;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0);
Event event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0);
pHwQ->virtualEvent = nullptr;
pHwQ->virtualEvent = &event;
//virtual event from regular event to stored in previousVirtualEvent
pHwQ->virtualEvent->incRefInternal();
MockEventBuilder eventBuilder(returnEvent);
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_EQ(returnEvent, pHwQ->virtualEvent);
ASSERT_EQ(nullptr, event.peekChildEvents());
returnEvent->release();
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, GivenNonEmptyQueueOnBlockingMapBufferWillWaitForPrecedingCommandsToComplete) {
struct MockCmdQ : CommandQueueHw<FamilyType> {
MockCmdQ(Context *context, Device *device)
: CommandQueueHw<FamilyType>(context, device, 0) {
finishWasCalled = false;
}
cl_int finish() override {
finishWasCalled = true;
return 0;
}
bool finishWasCalled;
};
MockCmdQ cmdQ(context, &pCmdQ->getDevice());
auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr);
auto b2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr);
auto gatingEvent = clCreateUserEvent(context, nullptr);
void *ptr1 = clEnqueueMapBuffer(&cmdQ, b1, CL_FALSE, CL_MAP_READ, 0, 8, 1, &gatingEvent, nullptr, nullptr);
clEnqueueUnmapMemObject(&cmdQ, b1, ptr1, 0, nullptr, nullptr);
ASSERT_FALSE(cmdQ.finishWasCalled);
void *ptr2 = clEnqueueMapBuffer(&cmdQ, b2, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, nullptr);
ASSERT_TRUE(cmdQ.finishWasCalled);
clSetUserEventStatus(gatingEvent, CL_COMPLETE);
clEnqueueUnmapMemObject(pCmdQ, b2, ptr2, 0, nullptr, nullptr);
clReleaseMemObject(b1);
clReleaseMemObject(b2);
clReleaseEvent(gatingEvent);
}
HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingMapBufferWillWaitForEvents) {
struct MockEvent : UserEvent {
MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted)
: UserEvent(ctx),
updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) {
this->updateTaskCount(0);
this->taskLevel = 0;
}
void updateExecutionStatus() override {
++updateCount;
if (updateCount == updateCountBeforeCompleted) {
transitionExecutionStatus(CL_COMPLETE);
}
unblockEventsBlockedByThis(executionStatus);
}
uint32_t updateCount;
uint32_t updateCountBeforeCompleted;
};
MockEvent *me = new MockEvent(context, 1024);
auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr);
cl_event meAsClEv = me;
void *ptr1 = clEnqueueMapBuffer(pCmdQ, b1, CL_TRUE, CL_MAP_READ, 0, 8, 1, &meAsClEv, nullptr, nullptr);
ASSERT_TRUE(me->updateStatusAndCheckCompletion());
ASSERT_LE(me->updateCountBeforeCompleted, me->updateCount);
clEnqueueUnmapMemObject(pCmdQ, b1, ptr1, 0, nullptr, nullptr);
clReleaseMemObject(b1);
me->release();
}
HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIsUnblockedThenAllSurfacesForBlockedCommandsAreMadeResident) {
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment);
pDevice->resetCommandStreamReceiver(mockCSR);
auto userEvent = make_releaseable<UserEvent>(context);
KernelInfo kernelInfo;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto mockProgram = mockKernelWithInternals.mockProgram;
size_t offset = 0;
size_t size = 1;
GraphicsAllocation *constantSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
mockProgram->setConstantSurface(constantSurface);
GraphicsAllocation *printfSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
GraphicsAllocation *privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
mockKernel->setPrivateSurface(privateSurface, 10);
cl_event blockedEvent = userEvent.get();
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent->setStatus(CL_COMPLETE);
EXPECT_TRUE(mockCSR->isMadeResident(constantSurface));
EXPECT_TRUE(mockCSR->isMadeResident(privateSurface));
mockKernel->setPrivateSurface(nullptr, 0);
mockProgram->setConstantSurface(nullptr);
mockCSR->getMemoryManager()->freeGraphicsMemory(privateSurface);
mockCSR->getMemoryManager()->freeGraphicsMemory(printfSurface);
mockCSR->getMemoryManager()->freeGraphicsMemory(constantSurface);
}
typedef CommandQueueHwTest BlockedCommandQueueTest;
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent.setStatus(CL_COMPLETE);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
uint32_t defaultSshUse = UnitTestHelper<FamilyType>::getDefaultSshUsage();
EXPECT_EQ(0u, ioh.getUsed());
EXPECT_EQ(0u, dsh.getUsed());
EXPECT_EQ(defaultSshUse, ssh.getUsed());
pCmdQ->isQueueBlocked();
}
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) {
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
auto spaceToUse = 4u;
ioh.getSpace(spaceToUse);
dsh.getSpace(spaceToUse);
ssh.getSpace(spaceToUse);
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent.setStatus(CL_COMPLETE);
uint32_t sshSpaceUse = spaceToUse + UnitTestHelper<FamilyType>::getDefaultSshUsage();
EXPECT_EQ(spaceToUse, ioh.getUsed());
EXPECT_EQ(spaceToUse, dsh.getUsed());
EXPECT_EQ(sshSpaceUse, ssh.getUsed());
pCmdQ->isQueueBlocked();
}
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
auto iohBase = ioh.getCpuBase();
auto dshBase = dsh.getCpuBase();
auto sshBase = ssh.getCpuBase();
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(iohBase, ioh.getCpuBase());
EXPECT_EQ(dshBase, dsh.getCpuBase());
EXPECT_EQ(sshBase, ssh.getCpuBase());
pCmdQ->isQueueBlocked();
}
HWTEST_F(BlockedCommandQueueTest, givenEnqueueBlockedByUserEventWhenItIsEnqueuedThenKernelReferenceCountIsIncreased) {
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
auto currentRefCount = mockKernel->getRefInternalCount();
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
EXPECT_EQ(currentRefCount + 1, mockKernel->getRefInternalCount());
userEvent.setStatus(CL_COMPLETE);
pCmdQ->isQueueBlocked();
EXPECT_EQ(currentRefCount, mockKernel->getRefInternalCount());
}
typedef CommandQueueHwTest CommandQueueHwRefCountTest;
HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWhenNewBlockedEnqueueReplacesVirtualEventThenPreviousVirtualEventDecrementsCmdQRefCount) {
cl_int retVal = 0;
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// UserEvent on waitlist doesn't increments cmdQ refCount, virtualEvent increments refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// new virtual event increments refCount
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
userEvent.setStatus(CL_COMPLETE);
// UserEvent is set to complete and event tree is unblocked, queue has only 1 refference to itself after this operation
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
//this call will release the queue
releaseQueue<CommandQueue>(mockCmdQ, retVal);
}
HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWithOutputEventAsVirtualEventWhenNewBlockedEnqueueReplacesVirtualEventCreatedFromOutputEventThenPreviousVirtualEventDoesntDecrementRefCount) {
cl_int retVal = 0;
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
UserEvent userEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event eventOut = nullptr;
cl_event blockedEvent = &userEvent;
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut);
//output event increments
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// previous virtualEvent which was outputEvent DOES NOT decrement refCount,
// new virtual event increments refCount
EXPECT_EQ(4, mockCmdQ->getRefInternalCount());
// unblocking deletes 2 virtualEvents
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
auto pEventOut = castToObject<Event>(eventOut);
pEventOut->release();
// releasing output event decrements refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->isQueueBlocked();
releaseQueue<CommandQueue>(mockCmdQ, retVal);
}
HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenEveryEventIsDeletedAndCmdQIsReleasedThenCmdQIsDeleted) {
cl_int retVal = 0;
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
UserEvent *userEvent = new UserEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event eventOut = nullptr;
cl_event blockedEvent = userEvent;
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut);
//output event increments refCount
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// previous virtualEvent which was outputEvent DOES NOT decrement refCount,
// new virtual event increments refCount
EXPECT_EQ(4, mockCmdQ->getRefInternalCount());
// unblocking deletes 2 virtualEvents
userEvent->setStatus(CL_COMPLETE);
userEvent->release();
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
auto pEventOut = castToObject<Event>(eventOut);
pEventOut->release();
// releasing output event decrements refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->isQueueBlocked();
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
releaseQueue<CommandQueue>(mockCmdQ, retVal);
}
HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenCmdQIsReleasedBeforeOutputEventThenOutputEventDeletesCmdQ) {
cl_int retVal = 0;
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
UserEvent *userEvent = new UserEvent(context);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event eventOut = nullptr;
cl_event blockedEvent = userEvent;
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount
EXPECT_EQ(2, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut);
//output event increments refCount
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
// previous virtualEvent which was outputEvent DOES NOT decrement refCount,
// new virtual event increments refCount
EXPECT_EQ(4, mockCmdQ->getRefInternalCount());
userEvent->setStatus(CL_COMPLETE);
userEvent->release();
// releasing UserEvent doesn't change the queue refCount
EXPECT_EQ(3, mockCmdQ->getRefInternalCount());
releaseQueue<CommandQueue>(mockCmdQ, retVal);
// releasing cmdQ decrements refCount
EXPECT_EQ(1, mockCmdQ->getRefInternalCount());
auto pEventOut = castToObject<Event>(eventOut);
pEventOut->release();
}
HWTEST_F(CommandQueueHwTest, GivenEventThatIsNotCompletedWhenFinishIsCalledAndItGetsCompletedThenItStatusIsUpdatedAfterFinishCall) {
cl_int ret;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
struct ClbFuncTempStruct {
static void CL_CALLBACK ClbFuncT(cl_event e, cl_int execStatus, void *valueForUpdate) {
*((cl_int *)valueForUpdate) = 1;
}
};
auto Value = 0u;
auto ev = new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, Event::eventNotReady + 1);
clSetEventCallback(ev, CL_COMPLETE, ClbFuncTempStruct::ClbFuncT, &Value);
auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver();
EXPECT_GT(3u, csr.peekTaskCount());
*csr.getTagAddress() = Event::eventNotReady + 1;
ret = clFinish(this->pCmdQ);
ASSERT_EQ(CL_SUCCESS, ret);
ev->updateExecutionStatus();
EXPECT_EQ(1u, Value);
ev->decRefInternal();
}
void CloneMdi(MultiDispatchInfo &dst, const MultiDispatchInfo &src) {
for (auto &srcDi : src) {
dst.push(srcDi);
}
}
struct MockBuilder : BuiltinDispatchInfoBuilder {
MockBuilder(NEO::BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) {
}
bool buildDispatchInfos(MultiDispatchInfo &d, const BuiltinOpParams &conf) const override {
wasBuildDispatchInfosWithBuiltinOpParamsCalled = true;
paramsReceived.multiDispatchInfo.setBuiltinOpParams(conf);
return true;
}
bool buildDispatchInfos(MultiDispatchInfo &d, Kernel *kernel,
const uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset) const override {
paramsReceived.kernel = kernel;
paramsReceived.gws = gws;
paramsReceived.elws = elws;
paramsReceived.offset = offset;
wasBuildDispatchInfosWithKernelParamsCalled = true;
DispatchInfoBuilder<NEO::SplitDispatch::Dim::d3D, NEO::SplitDispatch::SplitMode::NoSplit> dib;
dib.setKernel(paramsToUse.kernel);
dib.setDispatchGeometry(dim, paramsToUse.gws, paramsToUse.elws, paramsToUse.offset);
dib.bake(d);
CloneMdi(paramsReceived.multiDispatchInfo, d);
return true;
}
mutable bool wasBuildDispatchInfosWithBuiltinOpParamsCalled = false;
mutable bool wasBuildDispatchInfosWithKernelParamsCalled = false;
struct Params {
MultiDispatchInfo multiDispatchInfo;
Kernel *kernel = nullptr;
Vec3<size_t> gws = Vec3<size_t>{0, 0, 0};
Vec3<size_t> elws = Vec3<size_t>{0, 0, 0};
Vec3<size_t> offset = Vec3<size_t>{0, 0, 0};
};
mutable Params paramsReceived;
Params paramsToUse;
};
struct BuiltinParamsCommandQueueHwTests : public CommandQueueHwTest {
void SetUpImpl(EBuiltInOps::Type operation) {
auto builtIns = new MockBuiltins();
pCmdQ->getDevice().getExecutionEnvironment()->builtins.reset(builtIns);
auto swapBuilder = builtIns->setBuiltinDispatchInfoBuilder(
operation,
*pContext,
*pDevice,
std::unique_ptr<NEO::BuiltinDispatchInfoBuilder>(new MockBuilder(*builtIns)));
mockBuilder = static_cast<MockBuilder *>(&builtIns->getBuiltinDispatchInfoBuilder(
operation,
*pContext,
*pDevice));
}
MockBuilder *mockBuilder;
};
HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) {
SetUpImpl(EBuiltInOps::CopyBufferToBuffer);
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
void *alignedPtr = alignDown(ptr, 4);
size_t ptrOffset = ptrDiff(ptr, alignedPtr);
Vec3<size_t> offset = {0, 0, 0};
auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.dstPtr);
EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x);
EXPECT_EQ(offset, builtinParams.srcOffset);
status = pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.srcPtr);
EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x);
EXPECT_EQ(offset, builtinParams.dstOffset);
}
HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueWriteImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) {
SetUpImpl(EBuiltInOps::CopyBufferToImage3d);
std::unique_ptr<Image> dstImage(ImageHelper<ImageUseHostPtr<Image2dDefaults>>::create(context));
auto imageDesc = dstImage->getImageDesc();
size_t origin[] = {0, 0, 0};
size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0};
size_t rowPitch = dstImage->getHostPtrRowPitch();
size_t slicePitch = dstImage->getHostPtrSlicePitch();
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
void *alignedPtr = alignDown(ptr, 4);
size_t ptrOffset = ptrDiff(ptr, alignedPtr);
Vec3<size_t> offset = {0, 0, 0};
cl_int status = pCmdQ->enqueueWriteImage(dstImage.get(),
CL_FALSE,
origin,
region,
rowPitch,
slicePitch,
ptr,
nullptr,
0,
0,
nullptr);
EXPECT_EQ(CL_SUCCESS, status);
auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.srcPtr);
EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x);
EXPECT_EQ(offset, builtinParams.dstOffset);
}
HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) {
SetUpImpl(EBuiltInOps::CopyImage3dToBuffer);
std::unique_ptr<Image> dstImage(ImageHelper<ImageUseHostPtr<Image2dDefaults>>::create(context));
auto imageDesc = dstImage->getImageDesc();
size_t origin[] = {0, 0, 0};
size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0};
size_t rowPitch = dstImage->getHostPtrRowPitch();
size_t slicePitch = dstImage->getHostPtrSlicePitch();
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
void *alignedPtr = alignDown(ptr, 4);
size_t ptrOffset = ptrDiff(ptr, alignedPtr);
Vec3<size_t> offset = {0, 0, 0};
cl_int status = pCmdQ->enqueueReadImage(dstImage.get(),
CL_FALSE,
origin,
region,
rowPitch,
slicePitch,
ptr,
nullptr,
0,
0,
nullptr);
EXPECT_EQ(CL_SUCCESS, status);
auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.dstPtr);
EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x);
EXPECT_EQ(offset, builtinParams.srcOffset);
}
HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferRectCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) {
SetUpImpl(EBuiltInOps::CopyBufferRect);
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
size_t bufferOrigin[3] = {0, 0, 0};
size_t hostOrigin[3] = {0, 0, 0};
size_t region[3] = {0, 0, 0};
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
cl_int status = pCmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr);
void *alignedPtr = alignDown(ptr, 4);
size_t ptrOffset = ptrDiff(ptr, alignedPtr);
Vec3<size_t> offset = {0, 0, 0};
auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.dstPtr);
EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x);
EXPECT_EQ(offset, builtinParams.srcOffset);
status = pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams();
EXPECT_EQ(alignedPtr, builtinParams.srcPtr);
EXPECT_EQ(offset, builtinParams.dstOffset);
EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x);
}
HWTEST_F(CommandQueueHwTest, givenCommandQueueThatIsBlockedAndUsesCpuCopyWhenEventIsReturnedItIsNotReady) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
MockBuffer buffer;
cl_event returnEvent = nullptr;
auto retVal = CL_SUCCESS;
cmdQHw->taskLevel = Event::eventNotReady;
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(&buffer, CL_COMMAND_READ_BUFFER, 0, false, &offset, &size, nullptr, false);
EventsRequest eventsRequest(0, nullptr, &returnEvent);
cmdQHw->cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(Event::eventNotReady, castToObject<Event>(returnEvent)->peekTaskCount());
clReleaseEvent(returnEvent);
}
HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCalledThenTaskCountMustBeUpdatedFromOtherThread) {
std::atomic_bool go{false};
struct mockEvent : public Event {
using Event::Event;
using Event::eventWithoutCommand;
using Event::submitCommand;
void synchronizeTaskCount() override {
*atomicFence = true;
Event::synchronizeTaskCount();
}
uint32_t synchronizeCallCount = 0u;
std::atomic_bool *atomicFence = nullptr;
};
mockEvent neoEvent(this->pCmdQ, CL_COMMAND_MAP_BUFFER, Event::eventNotReady, Event::eventNotReady);
neoEvent.atomicFence = &go;
EXPECT_TRUE(neoEvent.eventWithoutCommand);
neoEvent.eventWithoutCommand = false;
EXPECT_EQ(Event::eventNotReady, neoEvent.peekTaskCount());
std::thread t([&]() {
while (!go)
;
neoEvent.updateTaskCount(77u);
});
neoEvent.submitCommand(false);
EXPECT_EQ(77u, neoEvent.peekTaskCount());
t.join();
}
HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsProvidedThenThisBuilderIsUsedForCreatingDispatchInfo) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
MockKernelWithInternals mockKernelToUse(*pDevice);
MockBuilder builder(*pDevice->getExecutionEnvironment()->getBuiltIns());
builder.paramsToUse.gws.x = 11;
builder.paramsToUse.elws.x = 13;
builder.paramsToUse.offset.x = 17;
builder.paramsToUse.kernel = mockKernelToUse.mockKernel;
MockKernelWithInternals mockKernelToSend(*pDevice);
mockKernelToSend.kernelInfo.builtinDispatchBuilder = &builder;
NullSurface s;
Surface *surfaces[] = {&s};
size_t gws[3] = {3, 0, 0};
size_t lws[3] = {5, 0, 0};
size_t off[3] = {7, 0, 0};
EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled);
EXPECT_FALSE(builder.wasBuildDispatchInfosWithKernelParamsCalled);
cmdQHw->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(surfaces, false, mockKernelToSend.mockKernel, 1, off, gws, lws, lws, 0, nullptr, nullptr);
EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled);
EXPECT_TRUE(builder.wasBuildDispatchInfosWithKernelParamsCalled);
EXPECT_EQ(Vec3<size_t>(gws[0], gws[1], gws[2]), builder.paramsReceived.gws);
EXPECT_EQ(Vec3<size_t>(lws[0], lws[1], lws[2]), builder.paramsReceived.elws);
EXPECT_EQ(Vec3<size_t>(off[0], off[1], off[2]), builder.paramsReceived.offset);
EXPECT_EQ(mockKernelToSend.mockKernel, builder.paramsReceived.kernel);
auto dispatchInfo = builder.paramsReceived.multiDispatchInfo.begin();
EXPECT_EQ(1U, builder.paramsReceived.multiDispatchInfo.size());
EXPECT_EQ(builder.paramsToUse.gws.x, dispatchInfo->getGWS().x);
EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x);
EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x);
EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel());
}
HWTEST_F(CommandQueueHwTest, givenNonBlockedEnqueueWhenEventIsPassedThenUpdateItsFlushStamp) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.flushStamp->setStamp(5);
size_t offset = 0;
size_t size = 1;
cl_event event;
auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event);
ASSERT_EQ(CL_SUCCESS, retVal);
auto eventObj = castToObject<Event>(event);
EXPECT_EQ(csr.flushStamp->peekStamp(), eventObj->flushStamp->peekStamp());
EXPECT_EQ(csr.flushStamp->peekStamp(), pCmdQ->flushStamp->peekStamp());
eventObj->release();
}
HWTEST_F(CommandQueueHwTest, givenBlockedEnqueueWhenEventIsPassedThenDontUpdateItsFlushStamp) {
UserEvent userEvent;
cl_event event, clUserEvent;
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.flushStamp->setStamp(5);
size_t offset = 0;
size_t size = 1;
clUserEvent = &userEvent;
auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 1, &clUserEvent, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(cmdQHw->isQueueBlocked());
retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event);
ASSERT_EQ(CL_SUCCESS, retVal);
FlushStamp expectedFlushStamp = 0;
auto eventObj = castToObject<Event>(event);
EXPECT_EQ(expectedFlushStamp, eventObj->flushStamp->peekStamp());
EXPECT_EQ(expectedFlushStamp, pCmdQ->flushStamp->peekStamp());
eventObj->release();
}
HWTEST_F(CommandQueueHwTest, givenBlockedInOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment);
pDevice->resetCommandStreamReceiver(mockCSR);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
auto event = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0);
uint32_t virtualEventTaskLevel = 77;
uint32_t virtualEventTaskCount = 80;
auto virtualEvent = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount);
cl_event blockedEvent = event;
// Put Queue in blocked state by assigning virtualEvent
event->addChild(*virtualEvent);
virtualEvent->incRefInternal();
cmdQHw->virtualEvent = virtualEvent;
*mockCSR->getTagAddress() = 0u;
cmdQHw->taskLevel = 23;
cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
//new virtual event is created on enqueue, bind it to the created virtual event
EXPECT_NE(cmdQHw->virtualEvent, virtualEvent);
EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_QUEUED);
event->setStatus(CL_SUBMITTED);
EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_SUBMITTED);
EXPECT_FALSE(cmdQHw->isQueueBlocked());
// +1 for next level after virtualEvent is unblocked
// +1 as virtualEvent was a parent for event with actual command that is being submitted
EXPECT_EQ(virtualEventTaskLevel + 2, cmdQHw->taskLevel);
//command being submitted was dependant only on virtual event hence only +1
EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
*mockCSR->getTagAddress() = initialHardwareTag;
virtualEvent->decRefInternal();
event->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, givenBlockedOutOfOrderQueueWhenUserEventIsSubmittedThenNDREventIsSubmittedAsWell) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
auto &mockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
cl_event userEvent = clCreateUserEvent(this->pContext, nullptr);
cl_event blockedEvent = nullptr;
*mockCsr.getTagAddress() = 0u;
cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &userEvent, &blockedEvent);
auto neoEvent = castToObject<Event>(blockedEvent);
EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED);
neoEvent->updateExecutionStatus();
EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED);
EXPECT_EQ(neoEvent->peekTaskCount(), Event::eventNotReady);
clSetUserEventStatus(userEvent, 0u);
EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_SUBMITTED);
EXPECT_EQ(neoEvent->peekTaskCount(), 1u);
*mockCsr.getTagAddress() = initialHardwareTag;
clReleaseEvent(blockedEvent);
clReleaseEvent(userEvent);
}
HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) {
CommandQueueHw<FamilyType> *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment);
pDevice->resetCommandStreamReceiver(mockCSR);
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
size_t offset = 0;
size_t size = 1;
class MockEventWithSetCompleteOnUpdate : public Event {
public:
MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType,
uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
}
void updateExecutionStatus() override {
setStatus(CL_COMPLETE);
}
};
Event event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0);
uint32_t virtualEventTaskLevel = 77;
uint32_t virtualEventTaskCount = 80;
MockEventWithSetCompleteOnUpdate virtualEvent(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount);
cl_event blockedEvent = &event;
// Put Queue in blocked state by assigning virtualEvent
virtualEvent.incRefInternal();
event.addChild(virtualEvent);
cmdQHw->virtualEvent = &virtualEvent;
cmdQHw->taskLevel = 23;
cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
//new virtual event is created on enqueue, bind it to the created virtual event
EXPECT_NE(cmdQHw->virtualEvent, &virtualEvent);
event.setStatus(CL_SUBMITTED);
virtualEvent.Event::updateExecutionStatus();
EXPECT_FALSE(cmdQHw->isQueueBlocked());
//+1 due to dependency between virtual event & new virtual event
//new virtual event is actually responsible for command delivery
EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel);
EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
}
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) {
KernelInfo kernelInfo;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto mockProgram = mockKernelWithInternals.mockProgram;
mockProgram->setAllowNonUniform(true);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
size_t offset = 0;
size_t gws = 63;
size_t lws = 16;
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_EQ(1u, mockKernel->makeResidentCalls);
}
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
UserEvent userEvent(context);
KernelInfo kernelInfo;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto mockProgram = mockKernelWithInternals.mockProgram;
mockProgram->setAllowNonUniform(true);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
size_t offset = 0;
size_t gws = 63;
size_t lws = 16;
cl_event blockedEvent = &userEvent;
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_EQ(1u, mockKernel->getResidencyCalls);
userEvent.setStatus(CL_COMPLETE);
pCmdQ->isQueueBlocked();
}
HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
UserEvent userEvent(context);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
csr.timestampPacketWriteEnabled = false;
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation();
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
cl_event blockedEvent = &userEvent;
cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 1, &blockedEvent, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
userEvent.setStatus(CL_COMPLETE);
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
uint32_t expected = 1u;
//Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call)
if (it->first == bufferAllocation) {
expected = 3u;
}
EXPECT_EQ(expected, it->second);
}
pCmdQ->isQueueBlocked();
}
HWTEST_F(CommandQueueHwTest, givenDefaultHwCommandQueueThenCacheFlushAfterWalkerIsNotNeeded) {
EXPECT_FALSE(pCmdQ->getRequiresCacheFlushAfterWalker());
}