fix: InOrderExecInfo upload to tbx

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-20 14:53:27 +00:00
committed by Compute-Runtime-Automation
parent 4ab60311c4
commit d7ce841081
5 changed files with 82 additions and 25 deletions

View File

@@ -39,7 +39,7 @@ NEO::TagAllocatorBase *getInOrderCounterAllocator(std::unique_ptr<NEO::TagAlloca
DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k);
allocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice.getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, true, neoDevice.getDeviceBitfield());
MemoryConstants::cacheLineSize, nodeSize, false, false, neoDevice.getDeviceBitfield());
}
}

View File

@@ -3445,7 +3445,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
auto mockNode = tagAllocator.getTag();
auto syncAllocation = mockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(mockNode, nullptr, *neoDevice->getMemoryManager(), 1, 0, false, false);
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(mockNode, nullptr, *neoDevice, 1, false, false);
*inOrderExecInfo->getBaseHostAddress() = 1;
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);
@@ -3547,7 +3547,7 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh
auto deviceSyncAllocation = deviceMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto hostSyncAllocation = hostMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(deviceMockNode, hostMockNode, *neoDevice->getMemoryManager(), 1, 0, false, false);
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(deviceMockNode, hostMockNode, *neoDevice, 1, false, false);
*inOrderExecInfo->getBaseHostAddress() = 1;
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);

View File

@@ -7,10 +7,10 @@
#include "shared/source/helpers/in_order_cmd_helpers.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/utilities/tag_allocator.h"
#include <cstdint>
@@ -24,11 +24,11 @@ std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCoun
UNRECOVERABLE_IF(!deviceCounterNode);
return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterNode, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling);
return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterNode, device, partitionCount, regularCmdList, atomicDeviceSignalling);
}
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, NEO::GraphicsAllocation *hostAllocation, uint64_t *hostAddress, uint64_t counterValue) {
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(nullptr, nullptr, *device.getMemoryManager(), 1, device.getRootDeviceIndex(), false, true);
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(nullptr, nullptr, device, 1, false, true);
inOrderExecInfo->counterValue = counterValue;
inOrderExecInfo->externalHostAllocation = hostAllocation;
@@ -51,9 +51,8 @@ InOrderExecInfo::~InOrderExecInfo() {
releaseNotUsedTempTimestampNodes(true);
}
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling)
: memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterNode(hostCounterNode), rootDeviceIndex(rootDeviceIndex),
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling)
: device(device), deviceCounterNode(deviceCounterNode), hostCounterNode(hostCounterNode), rootDeviceIndex(device.getRootDeviceIndex()),
regularCmdList(regularCmdList), atomicDeviceSignalling(atomicDeviceSignalling) {
numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount;
@@ -70,18 +69,47 @@ InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *ho
deviceAddress = deviceCounterNode->getGpuAddress();
}
isTbx = device.getDefaultEngine().commandStreamReceiver->isTbxMode();
reset();
}
void InOrderExecInfo::uploadToTbx(TagNodeBase &node, size_t size) {
constexpr uint32_t allBanks = std::numeric_limits<uint32_t>::max();
auto csr = device.getDefaultEngine().commandStreamReceiver;
auto allocation = node.getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex);
auto offset = ptrDiff(node.getGpuAddress(), allocation->getGpuAddress()) + this->allocationOffset;
if (allocation->isTbxWritable(allBanks)) {
// initialize full page tables for the first time
csr->writeMemory(*allocation, false, 0, 0);
} else {
// chunk write if allocation already initialized
allocation->setTbxWritable(true, allBanks);
csr->writeMemory(*allocation, true, offset, size);
}
allocation->setTbxWritable(false, allBanks);
}
void InOrderExecInfo::initializeAllocationsFromHost() {
if (deviceCounterNode) {
const size_t deviceAllocationWriteSize = sizeof(uint64_t) * numDevicePartitionsToWait;
memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize);
if (isTbx) {
uploadToTbx(*deviceCounterNode, deviceAllocationWriteSize);
}
}
if (hostCounterNode) {
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize);
if (isTbx) {
uploadToTbx(*hostCounterNode, hostAllocationWriteSize);
}
}
}

View File

@@ -52,8 +52,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList);
static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, NEO::GraphicsAllocation *hostAllocation, uint64_t *hostAddress, uint64_t counterValue);
InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling);
InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling);
NEO::GraphicsAllocation *getDeviceCounterAllocation() const;
NEO::GraphicsAllocation *getHostCounterAllocation() const;
@@ -95,7 +94,9 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
void releaseNotUsedTempTimestampNodes(bool forceReturn);
protected:
NEO::MemoryManager &memoryManager;
void uploadToTbx(TagNodeBase &node, size_t size);
NEO::Device &device;
NEO::TagNodeBase *deviceCounterNode = nullptr;
NEO::TagNodeBase *hostCounterNode = nullptr;
NEO::GraphicsAllocation *externalHostAllocation = nullptr;
@@ -116,6 +117,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
bool regularCmdList = false;
bool duplicatedHostStorage = false;
bool atomicDeviceSignalling = false;
bool isTbx = false;
};
namespace InOrderPatchCommandHelpers {

View File

@@ -23,6 +23,7 @@
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
@@ -89,7 +90,6 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
AllocatorT tsAllocator(0, mockDevice.getMemoryManager());
auto &memoryManager = *mockDevice.getMemoryManager();
auto node0 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
auto node1 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
@@ -97,7 +97,7 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
{
MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, memoryManager, 1, 0, false, false);
MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, mockDevice, 1, false, false);
inOrderExecInfo.lastWaitedCounterValue = 0;
@@ -147,7 +147,6 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, mockDevice.getMemoryManager());
auto &memoryManager = *mockDevice.getMemoryManager();
auto tempNode1 = deviceTagAllocator.getTag();
auto tempNode2 = hostTagAllocator.getTag();
@@ -184,7 +183,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
{
auto deviceNode = deviceTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, true);
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, mockDevice, 2, true, true);
EXPECT_TRUE(inOrderExecInfo.isRegularCmdList());
EXPECT_TRUE(inOrderExecInfo.isAtomicDeviceSignalling());
EXPECT_EQ(1u, inOrderExecInfo.getNumDevicePartitionsToWait());
@@ -214,7 +213,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
auto deviceNode = deviceTagAllocator.getTag();
auto hostNode = hostTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, hostNode, memoryManager, 1, 0, false, false);
InOrderExecInfo inOrderExecInfo(deviceNode, hostNode, mockDevice, 1, false, false);
auto deviceAllocHostAddress = reinterpret_cast<uint64_t *>(deviceNode->getCpuBase());
EXPECT_EQ(0u, inOrderExecInfo.getCounterValue());
EXPECT_EQ(0u, inOrderExecInfo.getRegularCmdListSubmissionCounter());
@@ -244,7 +243,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
{
auto deviceNode = deviceTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, false);
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, mockDevice, 2, true, false);
EXPECT_EQ(0u, InOrderPatchCommandHelpers::getAppendCounterValue(inOrderExecInfo));
inOrderExecInfo.addCounterValue(2);
@@ -265,12 +264,11 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIs
MockDevice mockDevice;
MockExecutionEnvironment mockExecutionEnvironment{};
MockMemoryManager memoryManager(mockExecutionEnvironment);
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, memoryManager, 2, 0, true, false);
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, mockDevice, 2, true, false);
inOrderExecInfo->setLastWaitedCounterValue(1u);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
@@ -292,6 +290,35 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIs
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
}
HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenResetCalledThenUploadToTbx) {
MockDevice mockDevice;
auto &csr = mockDevice.getUltCommandStreamReceiver<FamilyType>();
csr.commandStreamReceiverType = CommandStreamReceiverType::tbx;
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<false>> hostTagAllocator(0, mockDevice.getMemoryManager());
auto deviceNode = deviceTagAllocator.getTag();
auto hostNode = hostTagAllocator.getTag();
EXPECT_EQ(0u, csr.writeMemoryParams.totalCallCount);
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(deviceNode, hostNode, mockDevice, 2, true, false);
EXPECT_EQ(2u, csr.writeMemoryParams.totalCallCount);
EXPECT_EQ(0u, csr.writeMemoryParams.chunkWriteCallCount);
inOrderExecInfo->reset();
EXPECT_EQ(4u, csr.writeMemoryParams.totalCallCount);
EXPECT_EQ(2u, csr.writeMemoryParams.chunkWriteCallCount);
inOrderExecInfo = std::make_unique<InOrderExecInfo>(deviceNode, nullptr, mockDevice, 2, true, false);
EXPECT_EQ(5u, csr.writeMemoryParams.totalCallCount);
EXPECT_EQ(3u, csr.writeMemoryParams.chunkWriteCallCount);
inOrderExecInfo->reset();
EXPECT_EQ(6u, csr.writeMemoryParams.totalCallCount);
EXPECT_EQ(4u, csr.writeMemoryParams.chunkWriteCallCount);
}
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {
MockDevice mockDevice;
@@ -301,7 +328,7 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValu
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, memoryManager, 2, 0, true, false);
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, mockDevice, 2, true, false);
inOrderExecInfo->addCounterValue(1);
{
@@ -371,7 +398,7 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingWalkerThenSetCorre
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, memoryManager, 2, 0, false, false);
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, mockDevice, 2, false, false);
auto cmd = FamilyType::template getInitGpuWalker<DefaultWalkerType>();
@@ -395,7 +422,7 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingDisabledThenNoCmdB
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, memoryManager, 1, 0, true, false);
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, mockDevice, 1, true, false);
inOrderExecInfo->addRegularCmdListSubmissionCounter(4);
inOrderExecInfo->addCounterValue(1);
@@ -423,7 +450,7 @@ HWTEST_F(CommandEncoderTests, givenNewInOrderExecInfoWhenChangingInOrderExecInfo
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, memoryManager, 1, 0, true, false);
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(node, nullptr, mockDevice, 1, true, false);
inOrderExecInfo->addRegularCmdListSubmissionCounter(4);
inOrderExecInfo->addCounterValue(1);
@@ -437,7 +464,7 @@ HWTEST_F(CommandEncoderTests, givenNewInOrderExecInfoWhenChangingInOrderExecInfo
EXPECT_EQ(4u, cmd.getSemaphoreDataDword());
auto node2 = tagAllocator.getTag();
auto inOrderExecInfo2 = std::make_shared<InOrderExecInfo>(node2, nullptr, memoryManager, 1, 0, true, false);
auto inOrderExecInfo2 = std::make_shared<InOrderExecInfo>(node2, nullptr, mockDevice, 1, true, false);
inOrderExecInfo2->addRegularCmdListSubmissionCounter(6);
inOrderExecInfo2->addCounterValue(1);