Refactor TagAllocator

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-03-24 18:21:13 +00:00
committed by Compute-Runtime-Automation
parent cb4db7767e
commit 5a50ad098c
49 changed files with 868 additions and 430 deletions

View File

@@ -606,15 +606,15 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
return true;
}
TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
if (profilingTimeStampAllocator.get() == nullptr) {
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
}
return profilingTimeStampAllocator.get();
}
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
TagAllocatorBase *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
if (perfCounterAllocator.get() == nullptr) {
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, osContext->getDeviceBitfield());
@@ -622,15 +622,15 @@ TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(c
return perfCounterAllocator.get();
}
TagAllocator<TimestampPacketStorage> *CommandStreamReceiver::getTimestampPacketAllocator() {
TagAllocatorBase *CommandStreamReceiver::getTimestampPacketAllocator() {
if (timestampPacketAllocator.get() == nullptr) {
// dont release nodes in aub/tbx mode, to avoid removing semaphores optimization or reusing returned tags
bool doNotReleaseNodes = (getType() > CommandStreamReceiverType::CSR_HW) ||
DebugManager.flags.DisableTimestampPacketOptimizations.get();
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(
timestampPacketAllocator = std::make_unique<TagAllocator<NEO::TimestampPackets<uint32_t>>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize * 4,
sizeof(TimestampPacketStorage), doNotReleaseNodes, osContext->getDeviceBitfield());
sizeof(NEO::TimestampPackets<uint32_t>), doNotReleaseNodes, osContext->getDeviceBitfield());
}
return timestampPacketAllocator.get();
}

View File

@@ -43,8 +43,9 @@ class MultiGraphicsAllocation;
class OsContext;
class OSInterface;
class ScratchSpaceController;
struct HwPerfCounter;
struct HwTimeStamps;
class HwPerfCounter;
class HwTimeStamps;
class TagAllocatorBase;
template <typename TSize>
class TimestampPackets;
@@ -192,9 +193,9 @@ class CommandStreamReceiver {
virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; }
OsContext &getOsContext() const { return *osContext; }
TagAllocator<HwTimeStamps> *getEventTsAllocator();
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator(const uint32_t tagSize);
TagAllocator<TimestampPacketStorage> *getTimestampPacketAllocator();
TagAllocatorBase *getEventTsAllocator();
TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize);
TagAllocatorBase *getTimestampPacketAllocator();
virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
@@ -261,9 +262,9 @@ class CommandStreamReceiver {
std::unique_ptr<InternalAllocationStorage> internalAllocationStorage;
std::unique_ptr<KmdNotifyHelper> kmdNotifyHelper;
std::unique_ptr<ScratchSpaceController> scratchSpaceController;
std::unique_ptr<TagAllocator<HwTimeStamps>> profilingTimeStampAllocator;
std::unique_ptr<TagAllocator<HwPerfCounter>> perfCounterAllocator;
std::unique_ptr<TagAllocator<TimestampPacketStorage>> timestampPacketAllocator;
std::unique_ptr<TagAllocatorBase> profilingTimeStampAllocator;
std::unique_ptr<TagAllocatorBase> perfCounterAllocator;
std::unique_ptr<TagAllocatorBase> timestampPacketAllocator;
std::unique_ptr<Thread> userPauseConfirmation;
ResidencyContainer residencyAllocations;

View File

@@ -26,16 +26,17 @@ class LinearStream;
struct RootDeviceEnvironment;
template <typename TagType>
struct TagNode;
class TagNode;
template <typename TSize>
class TimestampPackets;
class TagNodeBase;
struct BlitProperties;
struct HardwareInfo;
struct TimestampPacketDependencies;
using BlitPropertiesContainer = StackVec<BlitProperties, 16>;
using TimestampPacketStorage = TimestampPackets<uint32_t>;
struct BlitProperties {
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
@@ -60,7 +61,7 @@ struct BlitProperties {
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
TagNode<TimestampPacketStorage> *outputTimestampPacket = nullptr;
TagNodeBase *outputTimestampPacket = nullptr;
BlitterConstants::BlitDirection blitDirection;
CsrDependencies csrDependencies;
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -23,4 +23,13 @@ enum class DebugPauseState : uint32_t {
hasUserEndConfirmation,
terminate
};
class TagTypeBase {
};
enum class TagNodeType {
TimestampPacket,
HwTimeStamps,
HwPerfCounter
};
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -13,7 +13,7 @@
using namespace NEO;
void TimestampPacketContainer::add(Node *timestampPacketNode) {
void TimestampPacketContainer::add(TagNodeBase *timestampPacketNode) {
timestampPacketNodes.push_back(timestampPacketNode);
}
@@ -28,7 +28,7 @@ void TimestampPacketContainer::swapNodes(TimestampPacketContainer &timestampPack
}
void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
std::vector<Node *> pendingNodes;
std::vector<TagNodeBase *> pendingNodes;
for (auto node : timestampPacketNodes) {
if (node->canBeReleased() || clearAllDependencies) {

View File

@@ -31,7 +31,7 @@ constexpr uint32_t preferredPacketCount = 16u;
#pragma pack(1)
template <typename TSize>
class TimestampPackets {
class TimestampPackets : public TagTypeBase {
public:
struct Packet {
TSize contextStart = 1u;
@@ -40,10 +40,14 @@ class TimestampPackets {
TSize globalEnd = 1u;
};
static GraphicsAllocation::AllocationType getAllocationType() {
static constexpr GraphicsAllocation::AllocationType getAllocationType() {
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
}
static constexpr TagNodeType getTagNodeType() { return TagNodeType::TimestampPacket; }
size_t getSinglePacketSize() const { return sizeof(Packet); }
bool isCompleted() const {
if (DebugManager.flags.DisableAtomicForPostSyncs.get()) {
return false;
@@ -96,29 +100,25 @@ class TimestampPackets {
};
#pragma pack()
using TimestampPacketStorage = TimestampPackets<uint32_t>;
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPacketStorage),
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPackets<uint32_t>),
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
class TimestampPacketContainer : public NonCopyableClass {
public:
using Node = TagNode<TimestampPacketStorage>;
TimestampPacketContainer() = default;
TimestampPacketContainer(TimestampPacketContainer &&) = default;
TimestampPacketContainer &operator=(TimestampPacketContainer &&) = default;
MOCKABLE_VIRTUAL ~TimestampPacketContainer();
const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
void add(Node *timestampPacketNode);
const std::vector<TagNodeBase *> &peekNodes() const { return timestampPacketNodes; }
void add(TagNodeBase *timestampPacketNode);
void swapNodes(TimestampPacketContainer &timestampPacketContainer);
void assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer);
void resolveDependencies(bool clearAllDependencies);
void makeResident(CommandStreamReceiver &commandStreamReceiver);
protected:
std::vector<Node *> timestampPacketNodes;
std::vector<TagNodeBase *> timestampPacketNodes;
};
struct TimestampPacketDependencies : public NonCopyableClass {
@@ -130,27 +130,27 @@ struct TimestampPacketDependencies : public NonCopyableClass {
};
struct TimestampPacketHelper {
static uint64_t getContextEndGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextEndOffset();
static uint64_t getContextEndGpuAddress(const TagNodeBase &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextEndOffset();
}
static uint64_t getContextStartGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextStartOffset();
static uint64_t getContextStartGpuAddress(const TagNodeBase &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextStartOffset();
}
static uint64_t getGlobalEndGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalEndOffset();
static uint64_t getGlobalEndGpuAddress(const TagNodeBase &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalEndOffset();
}
static uint64_t getGlobalStartGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalStartOffset();
static uint64_t getGlobalStartGpuAddress(const TagNodeBase &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalStartOffset();
}
static uint64_t getGpuDependenciesCountGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
static uint64_t getGpuDependenciesCountGpuAddress(const TagNodeBase &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getImplicitGpuDependenciesCountOffset();
}
static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices);
template <typename GfxFamily>
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode<TimestampPacketStorage> &timestampPacketNode, uint32_t numSupportedDevices) {
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNodeBase &timestampPacketNode, uint32_t numSupportedDevices) {
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
@@ -158,8 +158,8 @@ struct TimestampPacketHelper {
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode);
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->getPacketsUsed(); packetId++) {
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) {
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
}
@@ -231,8 +231,8 @@ struct TimestampPacketHelper {
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> &timestampPacketNode) {
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNodeBase &timestampPacketNode) {
size_t totalMiSemaphoreWaitSize = timestampPacketNode.getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
}

View File

@@ -34,7 +34,9 @@ set(NEO_CORE_UTILITIES
${CMAKE_CURRENT_SOURCE_DIR}/software_tags_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/spinlock.h
${CMAKE_CURRENT_SOURCE_DIR}/stackvec.h
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.h
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.inl
${CMAKE_CURRENT_SOURCE_DIR}/time_measure_wrapper.h
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
)

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/tag_allocator.h"
namespace NEO {
TagAllocatorBase::TagAllocatorBase(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
: deviceBitfield(deviceBitfield), rootDeviceIndex(rootDeviceIndex), memoryManager(memMngr), tagCount(tagCount), tagSize(tagSize), doNotReleaseNodes(doNotReleaseNodes) {
this->tagSize = alignUp(tagSize, tagAlignment);
}
void TagAllocatorBase::cleanUpResources() {
for (auto gfxAllocation : gfxAllocations) {
memoryManager->freeGraphicsMemory(gfxAllocation);
}
gfxAllocations.clear();
}
void TagNodeBase::returnTag() {
allocator->returnTag(this);
}
bool TagNodeBase::canBeReleased() const {
return (!doNotReleaseNodes) &&
(isCompleted()) &&
(getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount());
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -14,6 +14,7 @@
#include <atomic>
#include <cstdint>
#include <mutex>
#include <type_traits>
#include <vector>
namespace NEO {
@@ -23,28 +24,31 @@ template <typename TagType>
class TagAllocator;
template <typename TagType>
struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
class TagNode;
class TagAllocatorBase;
class TagNodeBase : public NonCopyableOrMovableClass {
public:
TagType *tagForCpuAccess;
virtual ~TagNodeBase() = default;
GraphicsAllocation *getBaseGraphicsAllocation() const { return gfxAllocation; }
uint64_t getGpuAddress() const { return gpuAddress; }
void incRefCount() { refCount++; }
MOCKABLE_VIRTUAL void returnTag() {
allocator->returnTag(this);
}
uint32_t refCountFetchSub(uint32_t value) { return refCount.fetch_sub(value); }
bool canBeReleased() const {
return (!doNotReleaseNodes) &&
(tagForCpuAccess->isCompleted()) &&
(tagForCpuAccess->getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount());
}
MOCKABLE_VIRTUAL void returnTag();
void setDoNotReleaseNodes(bool doNotRelease) {
doNotReleaseNodes = doNotRelease;
}
virtual void initialize() = 0;
bool canBeReleased() const;
virtual void *getCpuBase() const = 0;
void setDoNotReleaseNodes(bool doNotRelease) { doNotReleaseNodes = doNotRelease; }
void setProfilingCapable(bool capable) { profilingCapable = capable; }
@@ -52,18 +56,42 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
void incImplicitCpuDependenciesCount() { implicitCpuDependenciesCount++; }
void initialize() {
tagForCpuAccess->initialize();
implicitCpuDependenciesCount.store(0);
setProfilingCapable(true);
}
uint32_t getImplicitCpuDependenciesCount() const { return implicitCpuDependenciesCount.load(); }
const TagAllocator<TagType> *getAllocator() const { return allocator; }
const TagAllocatorBase *getAllocator() const { return allocator; }
// TagType specific calls
virtual bool isCompleted() const = 0;
virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0;
virtual size_t getGlobalStartOffset() const = 0;
virtual size_t getContextStartOffset() const = 0;
virtual size_t getContextEndOffset() const = 0;
virtual size_t getGlobalEndOffset() const = 0;
virtual size_t getImplicitGpuDependenciesCountOffset() const = 0;
virtual uint64_t getContextStartValue(uint32_t packetIndex) const = 0;
virtual uint64_t getGlobalStartValue(uint32_t packetIndex) const = 0;
virtual uint64_t getContextEndValue(uint32_t packetIndex) const = 0;
virtual uint64_t getGlobalEndValue(uint32_t packetIndex) const = 0;
virtual uint64_t &getGlobalEndRef() const = 0;
virtual uint64_t &getContextCompleteRef() const = 0;
virtual void setPacketsUsed(uint32_t used) = 0;
virtual uint32_t getPacketsUsed() const = 0;
virtual size_t getSinglePacketSize() const = 0;
virtual uint32_t getImplicitGpuDependenciesCount() const = 0;
virtual MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const = 0;
protected:
TagAllocator<TagType> *allocator = nullptr;
TagNodeBase() = default;
TagAllocatorBase *allocator = nullptr;
GraphicsAllocation *gfxAllocation = nullptr;
uint64_t gpuAddress = 0;
std::atomic<uint32_t> refCount{0};
@@ -71,71 +99,78 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
bool doNotReleaseNodes = false;
bool profilingCapable = true;
template <typename TagType2>
template <typename TagType>
friend class TagAllocator;
};
template <typename TagType>
class TagAllocator {
class TagNode : public TagNodeBase, public IDNode<TagNode<TagType>> {
static_assert(!std::is_polymorphic<TagType>::value,
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
public:
using NodeType = TagNode<TagType>;
TagType *tagForCpuAccess;
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
DeviceBitfield deviceBitfield) : deviceBitfield(deviceBitfield),
rootDeviceIndex(rootDeviceIndex),
memoryManager(memMngr),
tagCount(tagCount),
doNotReleaseNodes(doNotReleaseNodes) {
this->tagSize = alignUp(tagSize, tagAlignment);
populateFreeTags();
void initialize() override {
tagForCpuAccess->initialize();
implicitCpuDependenciesCount.store(0);
setProfilingCapable(true);
}
MOCKABLE_VIRTUAL ~TagAllocator() {
cleanUpResources();
}
void *getCpuBase() const override { return tagForCpuAccess; }
void cleanUpResources() {
for (auto gfxAllocation : gfxAllocations) {
memoryManager->freeGraphicsMemory(gfxAllocation);
}
gfxAllocations.clear();
}
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) override;
NodeType *getTag() {
if (freeTags.peekIsEmpty()) {
releaseDeferredTags();
}
NodeType *node = freeTags.removeFrontOne().release();
if (!node) {
std::unique_lock<std::mutex> lock(allocatorMutex);
populateFreeTags();
node = freeTags.removeFrontOne().release();
}
usedTags.pushFrontOne(*node);
node->incRefCount();
node->initialize();
return node;
}
bool isCompleted() const override;
MOCKABLE_VIRTUAL void returnTag(NodeType *node) {
if (node->refCount.fetch_sub(1) == 1) {
if (node->canBeReleased()) {
returnTagToFreePool(node);
} else {
returnTagToDeferredPool(node);
}
}
}
size_t getGlobalStartOffset() const override;
size_t getContextStartOffset() const override;
size_t getContextEndOffset() const override;
size_t getGlobalEndOffset() const override;
size_t getImplicitGpuDependenciesCountOffset() const override;
uint64_t getContextStartValue(uint32_t packetIndex) const override;
uint64_t getGlobalStartValue(uint32_t packetIndex) const override;
uint64_t getContextEndValue(uint32_t packetIndex) const override;
uint64_t getGlobalEndValue(uint32_t packetIndex) const override;
uint64_t &getGlobalEndRef() const override;
uint64_t &getContextCompleteRef() const override;
void setPacketsUsed(uint32_t used) override;
uint32_t getPacketsUsed() const override;
size_t getSinglePacketSize() const override;
uint32_t getImplicitGpuDependenciesCount() const override;
MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const override;
};
class TagAllocatorBase {
public:
virtual ~TagAllocatorBase() { cleanUpResources(); };
virtual void returnTag(TagNodeBase *node) = 0;
virtual TagNodeBase *getTag() = 0;
protected:
IDList<NodeType> freeTags;
IDList<NodeType> usedTags;
IDList<NodeType> deferredTags;
std::vector<GraphicsAllocation *> gfxAllocations;
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
TagAllocatorBase() = delete;
TagAllocatorBase(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
DeviceBitfield deviceBitfield);
virtual void returnTagToFreePool(TagNodeBase *node) = 0;
virtual void returnTagToDeferredPool(TagNodeBase *node) = 0;
virtual void releaseDeferredTags() = 0;
void cleanUpResources();
std::vector<GraphicsAllocation *> gfxAllocations;
const DeviceBitfield deviceBitfield;
const uint32_t rootDeviceIndex;
MemoryManager *memoryManager;
@@ -144,66 +179,38 @@ class TagAllocator {
bool doNotReleaseNodes = false;
std::mutex allocatorMutex;
};
MOCKABLE_VIRTUAL void returnTagToFreePool(NodeType *node) {
NodeType *usedNode = usedTags.removeOne(*node).release();
DEBUG_BREAK_IF(usedNode == nullptr);
UNUSED_VARIABLE(usedNode);
freeTags.pushFrontOne(*node);
}
template <typename TagType>
class TagAllocator : public TagAllocatorBase {
public:
using NodeType = TagNode<TagType>;
void returnTagToDeferredPool(NodeType *node) {
NodeType *usedNode = usedTags.removeOne(*node).release();
DEBUG_BREAK_IF(!usedNode);
deferredTags.pushFrontOne(*usedNode);
}
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
DeviceBitfield deviceBitfield);
void populateFreeTags() {
size_t allocationSizeRequired = tagCount * tagSize;
TagNodeBase *getTag() override;
auto allocationType = TagType::getAllocationType();
AllocationProperties allocationProperties{rootDeviceIndex, allocationSizeRequired, allocationType, deviceBitfield};
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
gfxAllocations.push_back(graphicsAllocation);
void returnTag(TagNodeBase *node) override;
auto nodesMemory = std::make_unique<NodeType[]>(tagCount);
protected:
TagAllocator() = delete;
for (size_t i = 0; i < tagCount; ++i) {
auto tagOffset = i * tagSize;
void returnTagToFreePool(TagNodeBase *node) override;
nodesMemory[i].allocator = this;
nodesMemory[i].gfxAllocation = graphicsAllocation;
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), tagOffset));
nodesMemory[i].gpuAddress = graphicsAllocation->getGpuAddress() + tagOffset;
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
void returnTagToDeferredPool(TagNodeBase *node) override;
freeTags.pushTailOne(nodesMemory[i]);
}
void releaseDeferredTags() override;
tagPoolMemory.push_back(std::move(nodesMemory));
}
void populateFreeTags();
void releaseDeferredTags() {
IDList<NodeType, false> pendingFreeTags;
IDList<NodeType, false> pendingDeferredTags;
auto currentNode = deferredTags.detachNodes();
IDList<NodeType> freeTags;
IDList<NodeType> usedTags;
IDList<NodeType> deferredTags;
while (currentNode != nullptr) {
auto nextNode = currentNode->next;
if (currentNode->canBeReleased()) {
pendingFreeTags.pushFrontOne(*currentNode);
} else {
pendingDeferredTags.pushFrontOne(*currentNode);
}
currentNode = nextNode;
}
if (!pendingFreeTags.peekIsEmpty()) {
freeTags.splice(*pendingFreeTags.detachNodes());
}
if (!pendingDeferredTags.peekIsEmpty()) {
deferredTags.splice(*pendingDeferredTags.detachNodes());
}
}
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
};
} // namespace NEO
#include "shared/source/utilities/tag_allocator.inl"

View File

@@ -0,0 +1,282 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/tag_allocator.h"
namespace NEO {
template <typename TagType>
TagAllocator<TagType>::TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment,
size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
: TagAllocatorBase(rootDeviceIndex, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
populateFreeTags();
}
template <typename TagType>
TagNodeBase *TagAllocator<TagType>::getTag() {
if (freeTags.peekIsEmpty()) {
releaseDeferredTags();
}
auto node = freeTags.removeFrontOne().release();
if (!node) {
std::unique_lock<std::mutex> lock(allocatorMutex);
populateFreeTags();
node = freeTags.removeFrontOne().release();
}
usedTags.pushFrontOne(*node);
node->incRefCount();
node->initialize();
return node;
}
template <typename TagType>
void TagAllocator<TagType>::returnTagToFreePool(TagNodeBase *node) {
auto nodeT = static_cast<NodeType *>(node);
auto usedNode = usedTags.removeOne(*nodeT).release();
DEBUG_BREAK_IF(usedNode == nullptr);
UNUSED_VARIABLE(usedNode);
freeTags.pushFrontOne(*nodeT);
}
template <typename TagType>
void TagAllocator<TagType>::returnTagToDeferredPool(TagNodeBase *node) {
auto nodeT = static_cast<NodeType *>(node);
auto usedNode = usedTags.removeOne(*nodeT).release();
DEBUG_BREAK_IF(!usedNode);
deferredTags.pushFrontOne(*usedNode);
}
template <typename TagType>
void TagAllocator<TagType>::releaseDeferredTags() {
IDList<NodeType, false> pendingFreeTags;
IDList<NodeType, false> pendingDeferredTags;
auto currentNode = deferredTags.detachNodes();
while (currentNode != nullptr) {
auto nextNode = currentNode->next;
if (currentNode->canBeReleased()) {
pendingFreeTags.pushFrontOne(*currentNode);
} else {
pendingDeferredTags.pushFrontOne(*currentNode);
}
currentNode = nextNode;
}
if (!pendingFreeTags.peekIsEmpty()) {
freeTags.splice(*pendingFreeTags.detachNodes());
}
if (!pendingDeferredTags.peekIsEmpty()) {
deferredTags.splice(*pendingDeferredTags.detachNodes());
}
}
template <typename TagType>
void TagAllocator<TagType>::populateFreeTags() {
size_t allocationSizeRequired = tagCount * tagSize;
AllocationProperties allocationProperties{rootDeviceIndex, allocationSizeRequired, TagType::getAllocationType(), deviceBitfield};
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
gfxAllocations.push_back(graphicsAllocation);
auto nodesMemory = std::make_unique<NodeType[]>(tagCount);
for (size_t i = 0; i < tagCount; ++i) {
auto tagOffset = i * tagSize;
nodesMemory[i].allocator = this;
nodesMemory[i].gfxAllocation = graphicsAllocation;
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), tagOffset));
nodesMemory[i].gpuAddress = graphicsAllocation->getGpuAddress() + tagOffset;
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
freeTags.pushTailOne(nodesMemory[i]);
}
tagPoolMemory.push_back(std::move(nodesMemory));
}
template <typename TagType>
void TagAllocator<TagType>::returnTag(TagNodeBase *node) {
if (node->refCountFetchSub(1) == 1) {
if (node->canBeReleased()) {
returnTagToFreePool(node);
} else {
returnTagToDeferredPool(node);
}
}
}
template <typename TagType>
size_t TagNode<TagType>::getGlobalStartOffset() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getGlobalStartOffset();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
size_t TagNode<TagType>::getContextStartOffset() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getContextStartOffset();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
size_t TagNode<TagType>::getContextEndOffset() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getContextEndOffset();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
size_t TagNode<TagType>::getGlobalEndOffset() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getGlobalEndOffset();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
size_t TagNode<TagType>::getImplicitGpuDependenciesCountOffset() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t TagNode<TagType>::getContextStartValue(uint32_t packetIndex) const {
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
return tagForCpuAccess->getContextStartValue(packetIndex);
} else {
UNUSED_VARIABLE(packetIndex);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t TagNode<TagType>::getGlobalStartValue(uint32_t packetIndex) const {
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
return tagForCpuAccess->getGlobalStartValue(packetIndex);
} else {
UNUSED_VARIABLE(packetIndex);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t TagNode<TagType>::getContextEndValue(uint32_t packetIndex) const {
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
return tagForCpuAccess->getContextEndValue(packetIndex);
} else {
UNUSED_VARIABLE(packetIndex);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t TagNode<TagType>::getGlobalEndValue(uint32_t packetIndex) const {
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
return tagForCpuAccess->getGlobalEndValue(packetIndex);
} else {
UNUSED_VARIABLE(packetIndex);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t &TagNode<TagType>::getContextCompleteRef() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::HwTimeStamps) {
return tagForCpuAccess->ContextCompleteTS;
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint64_t &TagNode<TagType>::getGlobalEndRef() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::HwTimeStamps) {
return tagForCpuAccess->GlobalEndTS;
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
void TagNode<TagType>::setPacketsUsed(uint32_t used) {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->setPacketsUsed(used);
} else {
UNUSED_VARIABLE(used);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint32_t TagNode<TagType>::getPacketsUsed() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getPacketsUsed();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
uint32_t TagNode<TagType>::getImplicitGpuDependenciesCount() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getImplicitGpuDependenciesCount();
} else {
return 0;
}
}
template <typename TagType>
size_t TagNode<TagType>::getSinglePacketSize() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->getSinglePacketSize();
} else {
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
void TagNode<TagType>::assignDataToAllTimestamps(uint32_t packetIndex, void *source) {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->assignDataToAllTimestamps(packetIndex, source);
} else {
UNUSED_VARIABLE(packetIndex);
UNUSED_VARIABLE(source);
UNRECOVERABLE_IF(true);
}
}
template <typename TagType>
bool TagNode<TagType>::isCompleted() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
return tagForCpuAccess->isCompleted();
} else {
return true;
}
}
template <typename TagType>
MetricsLibraryApi::QueryHandle_1_0 &TagNode<TagType>::getQueryHandleRef() const {
if constexpr (TagType::getTagNodeType() == TagNodeType::HwPerfCounter) {
return tagForCpuAccess->query.handle;
} else {
UNRECOVERABLE_IF(true);
}
}
} // namespace NEO