mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Introduce barrier tracking mechanism
Related-To: NEO-7696 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6f3503af38
commit
9f574b6fba
@@ -2036,6 +2036,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
}
|
||||
}
|
||||
|
||||
if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
const auto &productHelper = this->device->getProductHelper();
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), this->csr->getBarrierCountGpuAddress(), this->csr->getNextBarrierCount() + 1, args, productHelper);
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||
*commandContainer.getCommandStream(),
|
||||
@@ -2563,8 +2571,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
uint64_t gpuAddress = 0u;
|
||||
TaskCountType value = 0u;
|
||||
|
||||
if (this->cmdListType == TYPE_IMMEDIATE) {
|
||||
args.commandWithPostSync = true;
|
||||
gpuAddress = this->csr->getBarrierCountGpuAddress();
|
||||
value = this->csr->getNextBarrierCount() + 1;
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
const auto &productHelper = this->device->getProductHelper();
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, productHelper);
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress, value, args, productHelper);
|
||||
} else {
|
||||
appendComputeBarrierCommand();
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_container/encode_surface_state.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
@@ -704,6 +705,71 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendBarrierThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue));
|
||||
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u);
|
||||
|
||||
auto result = commandList->appendBarrier(nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
if (EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize()) {
|
||||
itor++;
|
||||
}
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
|
||||
EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress());
|
||||
EXPECT_EQ(cmd->getImmediateData(), 2u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEventsThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue));
|
||||
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
if (EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize()) {
|
||||
itor++;
|
||||
}
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
|
||||
EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress());
|
||||
EXPECT_EQ(cmd->getImmediateData(), 2u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendBarrierWithEventThenSuccessIsReturned) {
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0);
|
||||
|
||||
|
||||
@@ -743,6 +743,8 @@ bool CommandStreamReceiver::initializeTagAllocation() {
|
||||
userPauseConfirmation = Thread::create(CommandStreamReceiver::asyncDebugBreakConfirmation, reinterpret_cast<void *>(this));
|
||||
}
|
||||
|
||||
this->barrierCountTagAddress = ptrOffset(this->tagAddress, TagAllocationLayout::barrierCountOffset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1009,7 +1011,7 @@ TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionS
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t CommandStreamReceiver::getBarrierCountGpuAddress() const { return ptrOffset(this->tagAllocation->getGpuAddress(), TagAllocationLayout::barrierCountOffset); }
|
||||
uint64_t CommandStreamReceiver::getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + TagAllocationLayout::debugPauseStateAddressOffset; }
|
||||
uint64_t CommandStreamReceiver::getCompletionAddress() const {
|
||||
uint64_t completionFenceAddress = castToUint64(const_cast<TagAddressType *>(tagAddress));
|
||||
|
||||
@@ -132,7 +132,11 @@ class CommandStreamReceiver {
|
||||
return tagsMultiAllocation;
|
||||
}
|
||||
MultiGraphicsAllocation &createTagsMultiAllocation();
|
||||
|
||||
TaskCountType getNextBarrierCount() { return this->barrierCount.fetch_add(1u); }
|
||||
volatile TagAddressType *getTagAddress() const { return tagAddress; }
|
||||
volatile TagAddressType *getBarrierCountTagAddress() const { return this->barrierCountTagAddress; }
|
||||
uint64_t getBarrierCountGpuAddress() const;
|
||||
uint64_t getDebugPauseStateGPUAddress() const;
|
||||
|
||||
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }
|
||||
@@ -424,6 +428,7 @@ class CommandStreamReceiver {
|
||||
uint64_t totalMemoryUsed = 0u;
|
||||
|
||||
volatile TagAddressType *tagAddress = nullptr;
|
||||
volatile TagAddressType *barrierCountTagAddress = nullptr;
|
||||
volatile DebugPauseState *debugPauseStateAddress = nullptr;
|
||||
SpinLock debugPauseStateLock;
|
||||
static void *asyncDebugBreakConfirmation(void *arg);
|
||||
@@ -445,6 +450,7 @@ class CommandStreamReceiver {
|
||||
OsContext *osContext = nullptr;
|
||||
TaskCountType *completionFenceValuePointer = nullptr;
|
||||
|
||||
std::atomic<TaskCountType> barrierCount{0};
|
||||
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
|
||||
std::atomic<TaskCountType> taskLevel{0};
|
||||
std::atomic<TaskCountType> latestSentTaskCount{0};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,6 +12,6 @@ namespace NEO {
|
||||
namespace TagAllocationLayout {
|
||||
inline constexpr uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
|
||||
inline constexpr uint64_t completionFenceOffset = 2 * MemoryConstants::kiloByte;
|
||||
|
||||
inline constexpr uint64_t barrierCountOffset = 3 * MemoryConstants::kiloByte;
|
||||
} // namespace TagAllocationLayout
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1454,6 +1454,24 @@ TEST(CommandStreamReceiverSimpleTest, givenPrintfTagAllocationAddressFlagEnabled
|
||||
EXPECT_TRUE(hasSubstr(output, std::string(expectedStr)));
|
||||
}
|
||||
|
||||
TEST(CommandStreamReceiverSimpleTest, whenInitializeTagAllocationThenBarrierCountAddressAreSet) {
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
auto osContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})));
|
||||
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
csr.setupContext(*osContext);
|
||||
|
||||
csr.initializeTagAllocation();
|
||||
|
||||
EXPECT_EQ(csr.getBarrierCountTagAddress(), ptrOffset(csr.getTagAddress(), TagAllocationLayout::barrierCountOffset));
|
||||
EXPECT_EQ(csr.getBarrierCountGpuAddress(), ptrOffset(csr.getTagAllocation()->getGpuAddress(), TagAllocationLayout::barrierCountOffset));
|
||||
}
|
||||
|
||||
TEST(CommandStreamReceiverSimpleTest, givenGpuIdleImplicitFlushCheckDisabledWhenGpuIsIdleThenReturnFalse) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
|
||||
Reference in New Issue
Block a user