mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Optimize timestamp packet dependencies
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
882ae8088f
commit
95585a81f7
@@ -233,6 +233,12 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
return commandItor;
|
||||
}
|
||||
|
||||
template <typename Command>
|
||||
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||
EXPECT_TRUE(commandItor == itorEnd);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
|
||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||
@@ -1042,13 +1048,10 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacket
|
||||
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(bcsCommands.begin(), bcsCommands.end());
|
||||
|
||||
cmdFound = expectMiFlush<MI_FLUSH_DW>(cmdFound++, bcsCommands.end());
|
||||
auto miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
|
||||
const auto bcsSignalAddress = miflushDwCmd->getDestinationAddress();
|
||||
|
||||
cmdFound = expectCommand<WALKER_TYPE>(ccsCommands.begin(), ccsCommands.end());
|
||||
|
||||
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
|
||||
verifySemaphore<FamilyType>(cmdFound, bcsSignalAddress);
|
||||
expectNoCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
|
||||
}
|
||||
|
||||
struct BlitEnqueueWithDebugCapabilityTests : public BlitEnqueueTests<0> {
|
||||
@@ -1803,7 +1806,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushR
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenDontClearDependencies) {
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
const bool clearDependencies = true;
|
||||
|
||||
@@ -1813,12 +1816,6 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionT
|
||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||
}
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
||||
EXPECT_EQ(1u, previousNodes.peekNodes().size());
|
||||
}
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
@@ -1500,20 +1501,23 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode);
|
||||
}
|
||||
|
||||
template <bool ooq>
|
||||
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
|
||||
using ContextFixture::SetUp;
|
||||
|
||||
void SetUp() override {
|
||||
REQUIRE_FULL_BLITTER_OR_SKIP(defaultHwInfo.get());
|
||||
hwInfo = *::defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
|
||||
|
||||
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
ClDeviceFixture::SetUp();
|
||||
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
ClDeviceFixture::SetUpImpl(&hwInfo);
|
||||
cl_device_id device = pClDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
CommandQueueHwFixture::SetUp(pClDevice, 0);
|
||||
cl_command_queue_properties queueProperties = ooq ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
|
||||
CommandQueueHwFixture::SetUp(pClDevice, queueProperties);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@@ -1522,10 +1526,14 @@ struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwF
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
HardwareInfo hwInfo{};
|
||||
DebugManagerStateRestore state{};
|
||||
};
|
||||
|
||||
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
||||
using IoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<false>;
|
||||
using OoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<true>;
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
@@ -1555,7 +1563,7 @@ HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenG
|
||||
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
|
||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
@@ -1580,3 +1588,185 @@ HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenG
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandThenWaitForBarrierOnBlit) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||
size_t offset = 0;
|
||||
size_t gws = 1;
|
||||
BufferDefaults::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
uint64_t barrierNodeAddress = 0u;
|
||||
{
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||
|
||||
// There shouldn't be any semaphores before the barrier
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), pipeControlItor);
|
||||
EXPECT_EQ(pipeControlItor, semaphoreItor);
|
||||
}
|
||||
|
||||
{
|
||||
HardwareParse bcsHwParser;
|
||||
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), 0u);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(bcsHwParser.cmdList.end(), pipeControlItor);
|
||||
}
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||
}
|
||||
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||
size_t offset = 0;
|
||||
size_t gws = 1;
|
||||
BufferDefaults::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]);
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed();
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
uint64_t barrierNodeAddress = 0u;
|
||||
{
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, ccsHwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||
|
||||
// There shouldn't be any more semaphores before the barrier
|
||||
EXPECT_EQ(pipeControlItor, find<MI_SEMAPHORE_WAIT *>(std::next(semaphoreItor), pipeControlItor));
|
||||
}
|
||||
|
||||
{
|
||||
HardwareParse bcsHwParser;
|
||||
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(bcsHwParser.cmdList.end(), find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end()));
|
||||
}
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||
}
|
||||
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlockedBlitAfterBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event userEventWaitlist[] = {&userEvent};
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||
size_t offset = 0;
|
||||
size_t gws = 1;
|
||||
BufferDefaults::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]);
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed();
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 1, userEventWaitlist, nullptr));
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
uint64_t barrierNodeAddress = 0u;
|
||||
{
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, ccsHwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||
|
||||
// There shouldn't be any more semaphores before the barrier
|
||||
EXPECT_EQ(pipeControlItor, find<MI_SEMAPHORE_WAIT *>(std::next(semaphoreItor), pipeControlItor));
|
||||
}
|
||||
|
||||
{
|
||||
HardwareParse bcsHwParser;
|
||||
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(bcsHwParser.cmdList.end(), find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end()));
|
||||
}
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||
}
|
||||
|
||||
@@ -1849,6 +1849,204 @@ TEST(CommandQueue, givenSupportForOutEventAndOutEventIsPassedWhenValidatingSuppo
|
||||
EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent));
|
||||
}
|
||||
|
||||
struct CommandQueueWithTimestampPacketTests : ::testing::Test {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore{};
|
||||
};
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledThenEnsureBarrierNodeIsPresent) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
TimestampPacketDependencies dependencies{};
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||
}
|
||||
|
||||
// No pending barrier, skip
|
||||
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||
EXPECT_EQ(0u, dependencies.barrierNodes.peekNodes().size());
|
||||
|
||||
// Add barrier node
|
||||
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
auto node1 = dependencies.barrierNodes.peekNodes()[0];
|
||||
|
||||
// Do not add new node, if it exists
|
||||
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
auto node2 = dependencies.barrierNodes.peekNodes()[0];
|
||||
EXPECT_EQ(node2, node1);
|
||||
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForOtherBcses) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
TimestampPacketDependencies dependencies{};
|
||||
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||
}
|
||||
|
||||
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
auto barrierNode = dependencies.barrierNodes.peekNodes()[0];
|
||||
|
||||
for (auto currentBcsIndex = 0u; currentBcsIndex < queue.bcsTimestampPacketContainers.size(); currentBcsIndex++) {
|
||||
auto &containers = queue.bcsTimestampPacketContainers[currentBcsIndex];
|
||||
if (currentBcsIndex == 0) {
|
||||
EXPECT_EQ(0u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||
} else {
|
||||
EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||
EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0));
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnNonBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForBcses) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
TimestampPacketDependencies dependencies{};
|
||||
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||
}
|
||||
|
||||
for (auto engineType : {aub_stream::EngineType::ENGINE_RCS,
|
||||
aub_stream::EngineType::ENGINE_CCS}) {
|
||||
queue.setupBarrierTimestampForBcsEngines(engineType, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
auto barrierNode = dependencies.barrierNodes.peekNodes()[0];
|
||||
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||
EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]);
|
||||
}
|
||||
EXPECT_EQ(1u + queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenSavedBarrierWhenProcessBarrierTimestampForBcsEngineCalledThenMoveSaveBarrierPacketToBarrierNodes) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
TimestampPacketDependencies dependencies{};
|
||||
|
||||
// No saved barriers
|
||||
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||
EXPECT_TRUE(dependencies.barrierNodes.peekNodes().empty());
|
||||
|
||||
// Save barrier
|
||||
TagNodeBase *node = queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
|
||||
queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.add(node);
|
||||
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
EXPECT_EQ(node, dependencies.barrierNodes.peekNodes()[0]);
|
||||
EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().empty());
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnComputeEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
|
||||
for (auto engineType : {aub_stream::EngineType::ENGINE_RCS,
|
||||
aub_stream::EngineType::ENGINE_CCS}) {
|
||||
TimestampPacketDependencies dependencies{};
|
||||
queue.setupBarrierTimestampForBcsEngines(engineType, dependencies);
|
||||
|
||||
TimestampPacketDependencies blitDependencies{};
|
||||
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, blitDependencies);
|
||||
EXPECT_EQ(1u, blitDependencies.barrierNodes.peekNodes().size());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnBcsEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
|
||||
TimestampPacketDependencies dependencies{};
|
||||
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSettingLastBcsPacketThenDoNotSaveThePacket) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
|
||||
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||
EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().empty());
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSettingLastBcsPacketThenSaveOnlyOneLastPacket) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
|
||||
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||
EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes());
|
||||
EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size());
|
||||
|
||||
queue.timestampPacketContainer->moveNodesToNewContainer(*queue.getDeferredTimestampPackets());
|
||||
|
||||
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||
EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes());
|
||||
EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size());
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenFillingCsrDependenciesThenMovePacketToCsrDependencies) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
|
||||
CsrDependencies csrDeps;
|
||||
queue.fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||
EXPECT_EQ(1u, queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().size());
|
||||
EXPECT_EQ(&queue.bcsTimestampPacketContainers[0].lastSignalledPacket, csrDeps.timestampPacketContainer[0]);
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenClearingPacketsThenClearThePacket) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
|
||||
queue.clearLastBcsPackets();
|
||||
EXPECT_EQ(0u, queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().size());
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueWithTimestampPacketTests, givenQueueWhenSettingAndQueryingLastBcsPacketThenReturnCorrectResults) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
|
||||
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||
|
||||
CsrDependencies csrDeps;
|
||||
queue.fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||
EXPECT_FALSE(csrDeps.timestampPacketContainer.empty());
|
||||
|
||||
queue.clearLastBcsPackets();
|
||||
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||
EXPECT_TRUE(containers.lastSignalledPacket.peekNodes().empty());
|
||||
}
|
||||
}
|
||||
|
||||
using KernelExecutionTypesTests = DispatchFlagsTests;
|
||||
HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
|
||||
Reference in New Issue
Block a user