Do not track split internal events as a barrier

Related-To: NEO-7723, NEO-7716

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2023-02-13 07:10:45 +00:00
committed by Compute-Runtime-Automation
parent 6b6e112412
commit 073de9db62
17 changed files with 154 additions and 62 deletions

View File

@@ -136,7 +136,7 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) = 0;
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,

View File

@@ -165,7 +165,7 @@ struct CommandListCoreFamily : CommandListImp {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
@@ -174,7 +174,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies);
ze_result_t reserveSpace(size_t size, void **ptr) override;
ze_result_t reset() override;

View File

@@ -265,7 +265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -299,7 +299,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -323,7 +323,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -353,7 +353,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -440,7 +440,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
if (ret) {
return ret;
}
@@ -1027,7 +1027,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -1180,7 +1180,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
isStateless = true;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
@@ -1575,7 +1575,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (res) {
return res;
}
@@ -1763,7 +1763,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
if (gfxCoreHelper.getMaxFillPaternSizeForCopyEngine() < patternSize) {
return ZE_RESULT_ERROR_INVALID_SIZE;
} else {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
if (ret) {
return ret;
}
@@ -1929,11 +1929,11 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::getAllocationOffsetForAppend
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed) {
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
if (numWaitEvents > 0) {
if (phWaitEvents) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -1975,7 +1975,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
NEO::Device *neoDevice = device->getNEODevice();
@@ -2040,7 +2040,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
}
if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly()) {
if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly() && trackDependencies) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
const auto &productHelper = this->device->getProductHelper();
@@ -2146,7 +2146,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
if (numWaitEvents > 0) {
if (phWaitEvents) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -2567,7 +2567,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
if (ret) {
return ret;
}

View File

@@ -91,7 +91,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
NEO::GraphicsAllocation *srcAllocation,
size_t size, bool flushHost) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;

View File

@@ -529,7 +529,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed) {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
bool allSignaled = true;
for (auto i = 0u; i < numEvents; i++) {
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
@@ -541,7 +541,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
checkAvailableSpace(numEvents);
checkWaitEventsState(numEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, false, nullptr);
}

View File

@@ -83,7 +83,7 @@ struct BcsSplit {
for (size_t i = 0; i < this->cmdQs.size(); i++) {
if (barrierRequired) {
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies);
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false);
}
auto localSize = totalSize / engineCount;
@@ -105,7 +105,7 @@ struct BcsSplit {
engineCount--;
}
cmdList->addEventsToCmdList(static_cast<uint32_t>(this->cmdQs.size()), eventHandles.data(), hasRelaxedOrderingDependencies);
cmdList->addEventsToCmdList(static_cast<uint32_t>(this->cmdQs.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false);
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true);
if (hSignalEvent) {

View File

@@ -331,7 +331,7 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numEvents,
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed));
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies));
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
(uint64_t * dstptr,

View File

@@ -1041,7 +1041,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false), true, true);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), true, true);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, 0, nullptr), true, true);
@@ -1172,7 +1172,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), false, false);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
false, false);
@@ -1283,7 +1283,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr);
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -1345,7 +1345,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnExecutingCommandListsWhenCreatingImme
commandList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
commandList->csr = oldCsr;
@@ -1421,7 +1421,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
commandList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
commandList->csr = oldCsr;
static_cast<WhiteBox<::L0::CommandQueue> *>(commandList->cmdQImmediate)->csr = oldCsr;
@@ -1471,7 +1471,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
commandList->cmdQImmediate = oldCommandQueue;
}
@@ -1520,7 +1520,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
commandList->cmdQImmediate = oldCommandQueue;
}
@@ -1563,7 +1563,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr);
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -1656,7 +1656,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre
commandList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
commandList->csr = oldCsr;

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
@@ -590,7 +591,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->destroy();
@@ -703,6 +704,97 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
context->destroy();
}
HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendBarrierThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue));
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u);
auto result = commandList->appendBarrier(nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
if (EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize()) {
itor++;
}
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress());
EXPECT_EQ(cmd->getImmediateData(), 2u);
}
HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEventsThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue));
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
if (EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize()) {
itor++;
}
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress());
EXPECT_EQ(cmd->getImmediateData(), 2u);
}
HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEventsWithTrackDependenciesSetToFalseThenDoNotIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue));
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 1u);
}
HWTEST_F(CommandListCreate, GivenCommandListWhenUnalignedPtrThenLeftMiddleAndRightCopyAdded) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
ze_result_t returnValue;

View File

@@ -610,7 +610,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, false);
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -634,7 +634,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, false);
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -665,7 +665,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
commandList->appendWaitOnEvents(2, events, false);
commandList->appendWaitOnEvents(2, events, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -717,7 +717,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false);
commandList->appendWaitOnEvents(2, events, false, true);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -763,7 +763,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false);
commandList->appendWaitOnEvents(2, events, false, true);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -803,7 +803,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false);
commandList->appendWaitOnEvents(2, events, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -834,7 +834,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false);
commandList->appendWaitOnEvents(2, events, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -866,7 +866,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto ret = commandList->appendWaitOnEvents(2, events, false);
auto ret = commandList->appendWaitOnEvents(2, events, false, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}

View File

@@ -2360,7 +2360,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
cmdList.appendWaitOnEvents(1, &eventHandle, false);
cmdList.appendWaitOnEvents(1, &eventHandle, false, true);
EXPECT_TRUE(cmdList.dependenciesPresent);

View File

@@ -636,7 +636,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE
eventPool->createEvent(&eventDesc, &hEvent);
auto result = commandList->appendWaitOnEvents(1, &hEvent, false);
auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();

View File

@@ -31,7 +31,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -85,7 +85,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
ultCsr->directSubmission.reset(directSubmission);
ze_event_handle_t hEventHandle = event->toHandle();
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true);
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = immCommandList->commandContainer.getCommandStream()->getUsed();
@@ -270,7 +270,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()};
auto result = commandList->appendWaitOnEvents(2, handles, false);
auto result = commandList->appendWaitOnEvents(2, handles, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -301,7 +301,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) {
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &residencyContainer = commandList->commandContainer.getResidencyContainer();
@@ -328,7 +328,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -373,7 +373,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
event->setPacketsInUse(3u);
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -436,7 +436,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
ASSERT_EQ(9u, event->getPacketsInUse());
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -542,7 +542,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai
ze_event_handle_t hEventHandle = event->toHandle();
auto oldCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -604,7 +604,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent,
ze_event_handle_t eventHandle = event->toHandle();
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false);
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -643,7 +643,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
EXPECT_TRUE(cmdList.dependenciesPresent);
}
@@ -657,7 +657,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
EXPECT_FALSE(cmdList.dependenciesPresent);
}
@@ -738,7 +738,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto eventHandle = event->toHandle();
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false);
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}

View File

@@ -1055,7 +1055,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
size_t sizeBefore = cmdStream->getUsed();
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &eventHandle, false);
result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@@ -271,7 +271,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
ASSERT_NE(nullptr, eventObject->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr);
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -334,7 +334,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
ASSERT_NE(nullptr, eventObject->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr);
returnValue = commandList->appendWaitOnEvents(1, &event, false);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event);