feature: get command list information for in order noop data

Related-To: NEO-15376

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-08-08 12:42:03 +00:00
committed by Compute-Runtime-Automation
parent c2d64cae6a
commit 092f2bb333
10 changed files with 127 additions and 7 deletions

View File

@@ -374,6 +374,38 @@ void CommandListImp::clearInOrderExecCounterAllocation() {
} }
} }
size_t CommandListImp::getInOrderExecDeviceRequiredSize() const {
size_t size = 0;
if (isInOrderExecutionEnabled()) {
size = inOrderExecInfo->getDeviceNodeWriteSize();
}
return size;
}
uint64_t CommandListImp::getInOrderExecDeviceGpuAddress() const {
uint64_t gpuAddress = 0;
if (isInOrderExecutionEnabled()) {
gpuAddress = inOrderExecInfo->getDeviceNodeGpuAddress();
}
return gpuAddress;
}
size_t CommandListImp::getInOrderExecHostRequiredSize() const {
size_t size = 0;
if (isInOrderExecutionEnabled()) {
size = inOrderExecInfo->getHostNodeWriteSize();
}
return size;
}
uint64_t CommandListImp::getInOrderExecHostGpuAddress() const {
uint64_t gpuAddress = 0;
if (isInOrderExecutionEnabled()) {
gpuAddress = inOrderExecInfo->getHostNodeGpuAddress();
}
return gpuAddress;
}
void CommandListImp::enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode) { void CommandListImp::enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode) {
if (!device->isImplicitScalingCapable() || this->synchronizedDispatchMode != NEO::SynchronizedDispatchMode::disabled) { if (!device->isImplicitScalingCapable() || this->synchronizedDispatchMode != NEO::SynchronizedDispatchMode::disabled) {
return; return;

View File

@@ -52,6 +52,10 @@ struct CommandListImp : public CommandList {
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr); void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
virtual bool kernelMemoryPrefetchEnabled() const = 0; virtual bool kernelMemoryPrefetchEnabled() const = 0;
std::shared_ptr<NEO::InOrderExecInfo> &getInOrderExecInfo() { return inOrderExecInfo; } std::shared_ptr<NEO::InOrderExecInfo> &getInOrderExecInfo() { return inOrderExecInfo; }
size_t getInOrderExecDeviceRequiredSize() const;
uint64_t getInOrderExecDeviceGpuAddress() const;
size_t getInOrderExecHostRequiredSize() const;
uint64_t getInOrderExecHostGpuAddress() const;
protected: protected:
std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo; std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo;

View File

@@ -23,6 +23,7 @@
#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/product_helper.h" #include "shared/source/os_interface/product_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
#include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/device/device_imp.h"
@@ -400,4 +401,13 @@ void CommandQueueImp::makeResidentForResidencyContainer(const NEO::ResidencyCont
} }
} }
void CommandQueueImp::prepareInOrderCommandList(CommandListImp *commandList) {
if (commandList->inOrderCmdsPatchingEnabled()) {
commandList->addRegularCmdListSubmissionCounter();
commandList->patchInOrderCmds();
} else {
commandList->clearInOrderExecCounterAllocation();
}
}
} // namespace L0 } // namespace L0

View File

@@ -788,12 +788,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
} }
commandList->storeReferenceTsToMappedEvents(false); commandList->storeReferenceTsToMappedEvents(false);
if (commandList->inOrderCmdsPatchingEnabled()) { this->prepareInOrderCommandList(commandList);
commandList->addRegularCmdListSubmissionCounter();
commandList->patchInOrderCmds();
} else {
commandList->clearInOrderExecCounterAllocation();
}
commandList->setInterruptEventsCsr(*this->csr); commandList->setInterruptEventsCsr(*this->csr);

View File

@@ -34,6 +34,7 @@ class MemoryManager;
namespace L0 { namespace L0 {
struct CommandList; struct CommandList;
struct CommandListImp;
struct Kernel; struct Kernel;
struct CommandQueueImp : public CommandQueue { struct CommandQueueImp : public CommandQueue {
class CommandBufferManager { class CommandBufferManager {
@@ -126,6 +127,7 @@ struct CommandQueueImp : public CommandQueue {
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeoutNanoseconds); ze_result_t synchronizeByPollingForTaskCount(uint64_t timeoutNanoseconds);
void postSyncOperations(bool hangDetected); void postSyncOperations(bool hangDetected);
void prepareInOrderCommandList(CommandListImp *commandList);
static constexpr uint32_t defaultCommandListStateChangeListSize = 10; static constexpr uint32_t defaultCommandListStateChangeListSize = 10;
struct CommandListDirtyFlags { struct CommandListDirtyFlags {

View File

@@ -1704,5 +1704,17 @@ HWTEST_F(ImmediateCommandListTest,
EXPECT_TRUE(ultCsr.isMadeResident(cmdBufferAllocation)); EXPECT_TRUE(ultCsr.isMadeResident(cmdBufferAllocation));
} }
HWTEST_F(CommandListCreateTests, givenRegularOutOfOrderCommandListWhenGettingInOrderPropertiesThenReturnZeros) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto commandListImp = static_cast<L0::CommandListImp *>(commandList.get());
EXPECT_EQ(0u, commandListImp->getInOrderExecDeviceRequiredSize());
EXPECT_EQ(0u, commandListImp->getInOrderExecDeviceGpuAddress());
EXPECT_EQ(0u, commandListImp->getInOrderExecHostRequiredSize());
EXPECT_EQ(0u, commandListImp->getInOrderExecHostGpuAddress());
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -318,5 +318,29 @@ HWTEST_F(InOrderIpcTests, givenIncorrectParamsWhenUsingIpcApisThenReturnError) {
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zexCounterBasedEventOpenIpcHandle(context->toHandle(), zexIpcData, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zexCounterBasedEventOpenIpcHandle(context->toHandle(), zexIpcData, nullptr));
} }
using InOrderRegularCmdListTests = InOrderCmdListFixture;
HWTEST_F(InOrderRegularCmdListTests, givenInOrderCmdListWhenQueryingRequiredSizeThenExpectCorrectValues) {
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(0);
auto regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
auto deviceRequiredSize = regularCmdList->getInOrderExecDeviceRequiredSize();
EXPECT_EQ(sizeof(uint64_t), deviceRequiredSize);
auto deviceNodeAddress = regularCmdList->getInOrderExecDeviceGpuAddress();
EXPECT_NE(0u, deviceNodeAddress);
auto hostRequiredSize = regularCmdList->getInOrderExecHostRequiredSize();
EXPECT_EQ(0u, hostRequiredSize);
auto hostNodeAddress = regularCmdList->getInOrderExecHostGpuAddress();
EXPECT_EQ(0u, hostNodeAddress);
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
hostRequiredSize = regularCmdList->getInOrderExecHostRequiredSize();
EXPECT_EQ(sizeof(uint64_t), hostRequiredSize);
hostNodeAddress = regularCmdList->getInOrderExecHostGpuAddress();
EXPECT_NE(0u, hostNodeAddress);
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -165,4 +165,18 @@ void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) {
tempTimestampNodes.swap(tempVector); tempTimestampNodes.swap(tempVector);
} }
uint64_t InOrderExecInfo::getHostNodeGpuAddress() const {
if (hostCounterNode) {
return hostCounterNode->getGpuAddress() + allocationOffset;
}
return 0;
}
uint64_t InOrderExecInfo::getDeviceNodeGpuAddress() const {
if (deviceCounterNode) {
return deviceCounterNode->getGpuAddress() + allocationOffset;
}
return 0;
}
} // namespace NEO } // namespace NEO

View File

@@ -63,6 +63,23 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
uint64_t getBaseDeviceAddress() const { return deviceAddress; } uint64_t getBaseDeviceAddress() const { return deviceAddress; }
uint64_t getBaseHostGpuAddress() const; uint64_t getBaseHostGpuAddress() const;
uint64_t getDeviceNodeGpuAddress() const;
uint64_t getHostNodeGpuAddress() const;
size_t getDeviceNodeWriteSize() const {
if (deviceCounterNode) {
const size_t deviceAllocationWriteSize = sizeof(uint64_t) * numDevicePartitionsToWait;
return deviceAllocationWriteSize;
}
return 0;
}
size_t getHostNodeWriteSize() const {
if (hostCounterNode) {
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
return hostAllocationWriteSize;
}
return 0;
}
uint64_t getCounterValue() const { return counterValue; } uint64_t getCounterValue() const { return counterValue; }
void addCounterValue(uint64_t addValue) { counterValue += addValue; } void addCounterValue(uint64_t addValue) { counterValue += addValue; }
void resetCounterValue() { counterValue = 0; } void resetCounterValue() { counterValue = 0; }

View File

@@ -82,6 +82,10 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO
EXPECT_TRUE(inOrderExecInfo->isExternalMemoryExecInfo()); EXPECT_TRUE(inOrderExecInfo->isExternalMemoryExecInfo());
EXPECT_EQ(2u, inOrderExecInfo->getNumDevicePartitionsToWait()); EXPECT_EQ(2u, inOrderExecInfo->getNumDevicePartitionsToWait());
EXPECT_EQ(3u, inOrderExecInfo->getNumHostPartitionsToWait()); EXPECT_EQ(3u, inOrderExecInfo->getNumHostPartitionsToWait());
EXPECT_EQ(0u, inOrderExecInfo->getDeviceNodeWriteSize());
EXPECT_EQ(0u, inOrderExecInfo->getHostNodeWriteSize());
EXPECT_EQ(0u, inOrderExecInfo->getDeviceNodeGpuAddress());
EXPECT_EQ(0u, inOrderExecInfo->getHostNodeGpuAddress());
inOrderExecInfo->reset(); inOrderExecInfo->reset();
@@ -215,11 +219,17 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, 2, false); constexpr uint32_t partitionCount = 2u;
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, partitionCount, false);
EXPECT_EQ(inOrderExecInfo->getBaseHostGpuAddress(), hostNode->getGpuAddress()); EXPECT_EQ(inOrderExecInfo->getBaseHostGpuAddress(), hostNode->getGpuAddress());
EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation());
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation());
EXPECT_NE(0u, inOrderExecInfo->getDeviceNodeGpuAddress());
size_t deviceNodeSize = sizeof(uint64_t) * (mockDevice.getGfxCoreHelper().inOrderAtomicSignallingEnabled(mockDevice.getRootDeviceEnvironment()) ? 1u : partitionCount);
EXPECT_EQ(deviceNodeSize, inOrderExecInfo->getDeviceNodeWriteSize());
EXPECT_NE(0u, inOrderExecInfo->getHostNodeGpuAddress());
EXPECT_EQ(sizeof(uint64_t) * partitionCount, inOrderExecInfo->getHostNodeWriteSize());
EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress());
EXPECT_EQ(ptrOffset(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), offset), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(ptrOffset(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), offset), inOrderExecInfo->getBaseHostAddress());