feature: Use Event allocation for cross in-order CL synchronization

Related-To: LOCI-4332

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-05-09 10:06:53 +00:00
committed by Compute-Runtime-Automation
parent 273635e8e7
commit 36d3c65284
4 changed files with 52 additions and 9 deletions

View File

@@ -168,7 +168,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
void appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed);
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed);
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,

View File

@@ -2029,7 +2029,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
}
if (inOrderDependencyCounter > 0) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounter, relaxedOrderingAllowed);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, inOrderDependencyCounter, relaxedOrderingAllowed);
}
if (numWaitEvents > 0) {
@@ -2076,12 +2076,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed) {
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
commandContainer.addToResidencyContainer(dependencyCounterAllocation);
uint64_t gpuAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t gpuAddress = dependencyCounterAllocation->getGpuAddress();
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue,
@@ -2135,7 +2135,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
auto event = Event::fromHandle(phEvent[i]);
if (event->isInOrderExecEvent()) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecSignalValue(), relaxedOrderingAllowed);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), relaxedOrderingAllowed);
continue;
}

View File

@@ -206,6 +206,7 @@ struct Event : _ze_event_handle_t {
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue);
bool isInOrderExecEvent() const { return inOrderExecEvent; }
uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; }
protected:
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}

View File

@@ -18,6 +18,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
@@ -702,9 +703,9 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
ze_command_queue_desc_t desc = {};
mockCmdQ = std::make_unique<Mock<CommandQueue>>(device, csr, &desc);
mockCmdQs.emplace_back(std::make_unique<Mock<CommandQueue>>(device, csr, &desc));
cmdList->cmdQImmediate = mockCmdQ.get();
cmdList->cmdQImmediate = mockCmdQs[createdCmdLists].get();
cmdList->isFlushTaskSubmissionEnabled = true;
cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
cmdList->csr = csr;
@@ -712,12 +713,15 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
cmdList->commandContainer.setImmediateCmdListCsr(csr);
cmdList->enableInOrderExecution();
createdCmdLists++;
return cmdList;
}
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<CommandQueue>> mockCmdQ;
uint32_t createdCmdLists = 0;
std::vector<std::unique_ptr<Mock<CommandQueue>>> mockCmdQs;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
@@ -811,6 +815,44 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentCmdListThenProgramSemaphoreForEvent, IsAtLeastSkl) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
auto immCmdList2 = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents(1);
auto cmdStream = immCmdList2->getCmdContainer().getCommandStream();
auto event0Handle = events[0]->toHandle();
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
ultCsr->storeMakeResidentAllocations = true;
immCmdList1->appendLaunchKernel(kernel->toHandle(), &groupCount, event0Handle, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]);
immCmdList2->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, &event0Handle, launchParams, false);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_NE(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress());
EXPECT_EQ(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();