mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Fix event signaling in command list extension function
Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d17b1e9019
commit
ee99df18aa
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -167,7 +167,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
virtual ze_result_t appendMINoop() = 0;
|
||||
virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0;
|
||||
virtual ze_result_t appendWaitOnMemory(void *desc, void *ptr,
|
||||
uint32_t data, ze_event_handle_t hSignalEvent) = 0;
|
||||
uint32_t data, ze_event_handle_t signalEventHandle) = 0;
|
||||
virtual ze_result_t appendWriteToMemory(void *desc, void *ptr,
|
||||
uint64_t data) = 0;
|
||||
|
||||
|
||||
@@ -156,7 +156,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_result_t appendMINoop() override;
|
||||
ze_result_t appendPipeControl(void *dstPtr, uint64_t value) override;
|
||||
ze_result_t appendWaitOnMemory(void *desc, void *ptr,
|
||||
uint32_t data, ze_event_handle_t hSignalEvent) override;
|
||||
uint32_t data, ze_event_handle_t signalEventHandle) override;
|
||||
ze_result_t appendWriteToMemory(void *desc, void *ptr,
|
||||
uint64_t data) override;
|
||||
|
||||
|
||||
@@ -2589,7 +2589,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
void *ptr,
|
||||
uint32_t data,
|
||||
ze_event_handle_t hSignalEvent) {
|
||||
ze_event_handle_t signalEventHandle) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
auto descriptor = reinterpret_cast<zex_wait_on_mem_desc_t *>(desc);
|
||||
@@ -2617,11 +2617,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (signalEventHandle) {
|
||||
signalEvent = Event::fromHandle(signalEventHandle);
|
||||
}
|
||||
|
||||
auto srcAllocationStruct = getAlignedAllocation(this->device, ptr, sizeof(uint32_t), true);
|
||||
if (srcAllocationStruct.alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
|
||||
|
||||
appendEventForProfiling(signalEvent, true);
|
||||
|
||||
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
|
||||
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
|
||||
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
@@ -2638,27 +2646,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, hwInfo);
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
auto event = Event::fromHandle(hSignalEvent);
|
||||
appendSignalEventPostWalker(signalEvent);
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
uint64_t eventGpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), eventGpuAddr,
|
||||
Event::STATE_SIGNALED, args, hwInfo);
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData,
|
||||
eventGpuAddr, Event::STATE_SIGNALED,
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -166,10 +166,10 @@ struct Event : _ze_event_handle_t {
|
||||
return maxKernelCount;
|
||||
}
|
||||
|
||||
uint64_t globalStartTS;
|
||||
uint64_t globalEndTS;
|
||||
uint64_t contextStartTS;
|
||||
uint64_t contextEndTS;
|
||||
uint64_t globalStartTS = 1;
|
||||
uint64_t globalEndTS = 1;
|
||||
uint64_t contextStartTS = 1;
|
||||
uint64_t contextEndTS = 1;
|
||||
std::chrono::microseconds gpuHangCheckPeriod{500'000};
|
||||
|
||||
// Metric streamer instance associated with the event.
|
||||
@@ -205,10 +205,11 @@ struct Event : _ze_event_handle_t {
|
||||
uint32_t maxPacketCount = 0;
|
||||
uint32_t totalEventSize = 0;
|
||||
|
||||
std::atomic<State> isCompleted{STATE_INITIAL};
|
||||
|
||||
bool isTimestampEvent = false;
|
||||
bool usingContextEndOffset = false;
|
||||
bool signalAllEventPackets = false;
|
||||
std::atomic<State> isCompleted{STATE_INITIAL};
|
||||
};
|
||||
|
||||
template <typename TagSizeT>
|
||||
|
||||
@@ -405,7 +405,7 @@ struct MockCommandList : public CommandList {
|
||||
uint64_t value));
|
||||
ADDMETHOD_NOBASE(appendWaitOnMemory, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(void *desc, void *ptr,
|
||||
uint32_t data, ze_event_handle_t hSignalEvent));
|
||||
uint32_t data, ze_event_handle_t signalEventHandle));
|
||||
|
||||
ADDMETHOD_NOBASE(appendWriteToMemory, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(void *desc, void *ptr,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/api/driver_experimental/public/zex_api.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
|
||||
@@ -47,6 +48,8 @@ class CommandListWaitOnMemFixture : public DeviceFixture {
|
||||
size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
signalAllPackets = L0GfxCoreHelper::useSignalAllEventPackets(device->getHwInfo());
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
@@ -62,8 +65,9 @@ class CommandListWaitOnMemFixture : public DeviceFixture {
|
||||
std::unique_ptr<L0::ult::CommandList> commandListBcs;
|
||||
std::unique_ptr<EventPool> eventPool;
|
||||
std::unique_ptr<Event> event;
|
||||
uint32_t waitMemData = 1u;
|
||||
void *ptr = nullptr;
|
||||
uint32_t waitMemData = 1u;
|
||||
bool signalAllPackets = false;
|
||||
};
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -393,6 +397,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostS
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
@@ -421,19 +426,40 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostS
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
|
||||
|
||||
size_t expectedPostSyncStoreDataImm = 0;
|
||||
uint64_t storeDataImmAddress = gpuAddress;
|
||||
if (signalAllPackets) {
|
||||
expectedPostSyncStoreDataImm = event->getMaxPacketsCount() - 1;
|
||||
}
|
||||
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedPostSyncStoreDataImm, itorStoreDataImm.size());
|
||||
|
||||
for (size_t i = 0; i < expectedPostSyncStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(storeDataImmAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
|
||||
storeDataImmAddress += event->getSinglePacketSize();
|
||||
}
|
||||
|
||||
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
itor++;
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
|
||||
ASSERT_NE(0u, itorPC.size());
|
||||
|
||||
auto pipeControlAddress = storeDataImmAddress;
|
||||
bool postSyncFound = false;
|
||||
for (auto it : itorPC) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(pipeControlAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
|
||||
postSyncFound = true;
|
||||
}
|
||||
@@ -445,6 +471,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoSco
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
@@ -471,19 +498,40 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoSco
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
|
||||
|
||||
size_t expectedPostSyncStoreDataImm = 0;
|
||||
uint64_t storeDataImmAddress = gpuAddress;
|
||||
if (signalAllPackets) {
|
||||
expectedPostSyncStoreDataImm = event->getMaxPacketsCount() - 1;
|
||||
}
|
||||
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedPostSyncStoreDataImm, itorStoreDataImm.size());
|
||||
|
||||
for (size_t i = 0; i < expectedPostSyncStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(storeDataImmAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
|
||||
storeDataImmAddress += event->getSinglePacketSize();
|
||||
}
|
||||
|
||||
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
itor++;
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
|
||||
ASSERT_NE(0u, itorPC.size());
|
||||
|
||||
auto pipeControlAddress = storeDataImmAddress;
|
||||
bool postSyncFound = false;
|
||||
for (auto it : itorPC) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(this->device);
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(pipeControlAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||
postSyncFound = true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user