Fix event signaling in command list extension function

Related-To: NEO-7490

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-01-12 14:21:51 +00:00
committed by Compute-Runtime-Automation
parent d17b1e9019
commit ee99df18aa
6 changed files with 73 additions and 35 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -167,7 +167,7 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMINoop() = 0;
virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0;
virtual ze_result_t appendWaitOnMemory(void *desc, void *ptr,
uint32_t data, ze_event_handle_t hSignalEvent) = 0;
uint32_t data, ze_event_handle_t signalEventHandle) = 0;
virtual ze_result_t appendWriteToMemory(void *desc, void *ptr,
uint64_t data) = 0;

View File

@@ -156,7 +156,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendMINoop() override;
ze_result_t appendPipeControl(void *dstPtr, uint64_t value) override;
ze_result_t appendWaitOnMemory(void *desc, void *ptr,
uint32_t data, ze_event_handle_t hSignalEvent) override;
uint32_t data, ze_event_handle_t signalEventHandle) override;
ze_result_t appendWriteToMemory(void *desc, void *ptr,
uint64_t data) override;

View File

@@ -2589,7 +2589,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
void *ptr,
uint32_t data,
ze_event_handle_t hSignalEvent) {
ze_event_handle_t signalEventHandle) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
auto descriptor = reinterpret_cast<zex_wait_on_mem_desc_t *>(desc);
@@ -2617,11 +2617,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
Event *signalEvent = nullptr;
if (signalEventHandle) {
signalEvent = Event::fromHandle(signalEventHandle);
}
auto srcAllocationStruct = getAlignedAllocation(this->device, ptr, sizeof(uint32_t), true);
if (srcAllocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
appendEventForProfiling(signalEvent, true);
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
@@ -2638,27 +2646,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, hwInfo);
}
if (hSignalEvent) {
auto event = Event::fromHandle(hSignalEvent);
appendSignalEventPostWalker(signalEvent);
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
uint64_t eventGpuAddr = event->getCompletionFieldGpuAddress(this->device);
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), eventGpuAddr,
Event::STATE_SIGNALED, args, hwInfo);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData,
eventGpuAddr, Event::STATE_SIGNALED,
hwInfo,
args);
}
}
return ZE_RESULT_SUCCESS;
}

View File

@@ -166,10 +166,10 @@ struct Event : _ze_event_handle_t {
return maxKernelCount;
}
uint64_t globalStartTS;
uint64_t globalEndTS;
uint64_t contextStartTS;
uint64_t contextEndTS;
uint64_t globalStartTS = 1;
uint64_t globalEndTS = 1;
uint64_t contextStartTS = 1;
uint64_t contextEndTS = 1;
std::chrono::microseconds gpuHangCheckPeriod{500'000};
// Metric streamer instance associated with the event.
@@ -205,10 +205,11 @@ struct Event : _ze_event_handle_t {
uint32_t maxPacketCount = 0;
uint32_t totalEventSize = 0;
std::atomic<State> isCompleted{STATE_INITIAL};
bool isTimestampEvent = false;
bool usingContextEndOffset = false;
bool signalAllEventPackets = false;
std::atomic<State> isCompleted{STATE_INITIAL};
};
template <typename TagSizeT>

View File

@@ -405,7 +405,7 @@ struct MockCommandList : public CommandList {
uint64_t value));
ADDMETHOD_NOBASE(appendWaitOnMemory, ze_result_t, ZE_RESULT_SUCCESS,
(void *desc, void *ptr,
uint32_t data, ze_event_handle_t hSignalEvent));
uint32_t data, ze_event_handle_t signalEventHandle));
ADDMETHOD_NOBASE(appendWriteToMemory, ze_result_t, ZE_RESULT_SUCCESS,
(void *desc, void *ptr,

View File

@@ -12,6 +12,7 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/api/driver_experimental/public/zex_api.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@@ -47,6 +48,8 @@ class CommandListWaitOnMemFixture : public DeviceFixture {
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
signalAllPackets = L0GfxCoreHelper::useSignalAllEventPackets(device->getHwInfo());
}
void tearDown() {
@@ -62,8 +65,9 @@ class CommandListWaitOnMemFixture : public DeviceFixture {
std::unique_ptr<L0::ult::CommandList> commandListBcs;
std::unique_ptr<EventPool> eventPool;
std::unique_ptr<Event> event;
uint32_t waitMemData = 1u;
void *ptr = nullptr;
uint32_t waitMemData = 1u;
bool signalAllPackets = false;
};
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -393,6 +397,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostS
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
@@ -421,19 +426,40 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostS
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
size_t expectedPostSyncStoreDataImm = 0;
uint64_t storeDataImmAddress = gpuAddress;
if (signalAllPackets) {
expectedPostSyncStoreDataImm = event->getMaxPacketsCount() - 1;
}
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedPostSyncStoreDataImm, itorStoreDataImm.size());
for (size_t i = 0; i < expectedPostSyncStoreDataImm; i++) {
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
EXPECT_EQ(storeDataImmAddress, cmd->getAddress());
EXPECT_FALSE(cmd->getStoreQword());
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
storeDataImmAddress += event->getSinglePacketSize();
}
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
ASSERT_NE(0u, itorPC.size());
auto pipeControlAddress = storeDataImmAddress;
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(pipeControlAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
postSyncFound = true;
}
@@ -445,6 +471,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoSco
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
@@ -471,19 +498,40 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoSco
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
size_t expectedPostSyncStoreDataImm = 0;
uint64_t storeDataImmAddress = gpuAddress;
if (signalAllPackets) {
expectedPostSyncStoreDataImm = event->getMaxPacketsCount() - 1;
}
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedPostSyncStoreDataImm, itorStoreDataImm.size());
for (size_t i = 0; i < expectedPostSyncStoreDataImm; i++) {
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
EXPECT_EQ(storeDataImmAddress, cmd->getAddress());
EXPECT_FALSE(cmd->getStoreQword());
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
storeDataImmAddress += event->getSinglePacketSize();
}
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
ASSERT_NE(0u, itorPC.size());
auto pipeControlAddress = storeDataImmAddress;
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(pipeControlAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_FALSE(cmd->getDcFlushEnable());
postSyncFound = true;
}