mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Refactor TagAllocator
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
cb4db7767e
commit
5a50ad098c
@@ -606,15 +606,15 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
|
||||
return true;
|
||||
}
|
||||
|
||||
TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
|
||||
TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
|
||||
if (profilingTimeStampAllocator.get() == nullptr) {
|
||||
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
|
||||
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
|
||||
sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
|
||||
}
|
||||
return profilingTimeStampAllocator.get();
|
||||
}
|
||||
|
||||
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
|
||||
TagAllocatorBase *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
|
||||
if (perfCounterAllocator.get() == nullptr) {
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, osContext->getDeviceBitfield());
|
||||
@@ -622,15 +622,15 @@ TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(c
|
||||
return perfCounterAllocator.get();
|
||||
}
|
||||
|
||||
TagAllocator<TimestampPacketStorage> *CommandStreamReceiver::getTimestampPacketAllocator() {
|
||||
TagAllocatorBase *CommandStreamReceiver::getTimestampPacketAllocator() {
|
||||
if (timestampPacketAllocator.get() == nullptr) {
|
||||
// dont release nodes in aub/tbx mode, to avoid removing semaphores optimization or reusing returned tags
|
||||
bool doNotReleaseNodes = (getType() > CommandStreamReceiverType::CSR_HW) ||
|
||||
DebugManager.flags.DisableTimestampPacketOptimizations.get();
|
||||
|
||||
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(
|
||||
timestampPacketAllocator = std::make_unique<TagAllocator<NEO::TimestampPackets<uint32_t>>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize * 4,
|
||||
sizeof(TimestampPacketStorage), doNotReleaseNodes, osContext->getDeviceBitfield());
|
||||
sizeof(NEO::TimestampPackets<uint32_t>), doNotReleaseNodes, osContext->getDeviceBitfield());
|
||||
}
|
||||
return timestampPacketAllocator.get();
|
||||
}
|
||||
|
||||
@@ -43,8 +43,9 @@ class MultiGraphicsAllocation;
|
||||
class OsContext;
|
||||
class OSInterface;
|
||||
class ScratchSpaceController;
|
||||
struct HwPerfCounter;
|
||||
struct HwTimeStamps;
|
||||
class HwPerfCounter;
|
||||
class HwTimeStamps;
|
||||
class TagAllocatorBase;
|
||||
|
||||
template <typename TSize>
|
||||
class TimestampPackets;
|
||||
@@ -192,9 +193,9 @@ class CommandStreamReceiver {
|
||||
virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; }
|
||||
OsContext &getOsContext() const { return *osContext; }
|
||||
|
||||
TagAllocator<HwTimeStamps> *getEventTsAllocator();
|
||||
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator(const uint32_t tagSize);
|
||||
TagAllocator<TimestampPacketStorage> *getTimestampPacketAllocator();
|
||||
TagAllocatorBase *getEventTsAllocator();
|
||||
TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize);
|
||||
TagAllocatorBase *getTimestampPacketAllocator();
|
||||
|
||||
virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
|
||||
|
||||
@@ -261,9 +262,9 @@ class CommandStreamReceiver {
|
||||
std::unique_ptr<InternalAllocationStorage> internalAllocationStorage;
|
||||
std::unique_ptr<KmdNotifyHelper> kmdNotifyHelper;
|
||||
std::unique_ptr<ScratchSpaceController> scratchSpaceController;
|
||||
std::unique_ptr<TagAllocator<HwTimeStamps>> profilingTimeStampAllocator;
|
||||
std::unique_ptr<TagAllocator<HwPerfCounter>> perfCounterAllocator;
|
||||
std::unique_ptr<TagAllocator<TimestampPacketStorage>> timestampPacketAllocator;
|
||||
std::unique_ptr<TagAllocatorBase> profilingTimeStampAllocator;
|
||||
std::unique_ptr<TagAllocatorBase> perfCounterAllocator;
|
||||
std::unique_ptr<TagAllocatorBase> timestampPacketAllocator;
|
||||
std::unique_ptr<Thread> userPauseConfirmation;
|
||||
|
||||
ResidencyContainer residencyAllocations;
|
||||
|
||||
@@ -26,16 +26,17 @@ class LinearStream;
|
||||
struct RootDeviceEnvironment;
|
||||
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
class TagNode;
|
||||
|
||||
template <typename TSize>
|
||||
class TimestampPackets;
|
||||
|
||||
class TagNodeBase;
|
||||
|
||||
struct BlitProperties;
|
||||
struct HardwareInfo;
|
||||
struct TimestampPacketDependencies;
|
||||
using BlitPropertiesContainer = StackVec<BlitProperties, 16>;
|
||||
using TimestampPacketStorage = TimestampPackets<uint32_t>;
|
||||
|
||||
struct BlitProperties {
|
||||
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
|
||||
@@ -60,7 +61,7 @@ struct BlitProperties {
|
||||
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
|
||||
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
TagNode<TimestampPacketStorage> *outputTimestampPacket = nullptr;
|
||||
TagNodeBase *outputTimestampPacket = nullptr;
|
||||
BlitterConstants::BlitDirection blitDirection;
|
||||
CsrDependencies csrDependencies;
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -23,4 +23,13 @@ enum class DebugPauseState : uint32_t {
|
||||
hasUserEndConfirmation,
|
||||
terminate
|
||||
};
|
||||
|
||||
class TagTypeBase {
|
||||
};
|
||||
|
||||
enum class TagNodeType {
|
||||
TimestampPacket,
|
||||
HwTimeStamps,
|
||||
HwPerfCounter
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
void TimestampPacketContainer::add(Node *timestampPacketNode) {
|
||||
void TimestampPacketContainer::add(TagNodeBase *timestampPacketNode) {
|
||||
timestampPacketNodes.push_back(timestampPacketNode);
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ void TimestampPacketContainer::swapNodes(TimestampPacketContainer ×tampPack
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
|
||||
std::vector<Node *> pendingNodes;
|
||||
std::vector<TagNodeBase *> pendingNodes;
|
||||
|
||||
for (auto node : timestampPacketNodes) {
|
||||
if (node->canBeReleased() || clearAllDependencies) {
|
||||
|
||||
@@ -31,7 +31,7 @@ constexpr uint32_t preferredPacketCount = 16u;
|
||||
|
||||
#pragma pack(1)
|
||||
template <typename TSize>
|
||||
class TimestampPackets {
|
||||
class TimestampPackets : public TagTypeBase {
|
||||
public:
|
||||
struct Packet {
|
||||
TSize contextStart = 1u;
|
||||
@@ -40,10 +40,14 @@ class TimestampPackets {
|
||||
TSize globalEnd = 1u;
|
||||
};
|
||||
|
||||
static GraphicsAllocation::AllocationType getAllocationType() {
|
||||
static constexpr GraphicsAllocation::AllocationType getAllocationType() {
|
||||
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
|
||||
}
|
||||
|
||||
static constexpr TagNodeType getTagNodeType() { return TagNodeType::TimestampPacket; }
|
||||
|
||||
size_t getSinglePacketSize() const { return sizeof(Packet); }
|
||||
|
||||
bool isCompleted() const {
|
||||
if (DebugManager.flags.DisableAtomicForPostSyncs.get()) {
|
||||
return false;
|
||||
@@ -96,29 +100,25 @@ class TimestampPackets {
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
using TimestampPacketStorage = TimestampPackets<uint32_t>;
|
||||
|
||||
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPacketStorage),
|
||||
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPackets<uint32_t>),
|
||||
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
|
||||
|
||||
class TimestampPacketContainer : public NonCopyableClass {
|
||||
public:
|
||||
using Node = TagNode<TimestampPacketStorage>;
|
||||
|
||||
TimestampPacketContainer() = default;
|
||||
TimestampPacketContainer(TimestampPacketContainer &&) = default;
|
||||
TimestampPacketContainer &operator=(TimestampPacketContainer &&) = default;
|
||||
MOCKABLE_VIRTUAL ~TimestampPacketContainer();
|
||||
|
||||
const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
|
||||
void add(Node *timestampPacketNode);
|
||||
const std::vector<TagNodeBase *> &peekNodes() const { return timestampPacketNodes; }
|
||||
void add(TagNodeBase *timestampPacketNode);
|
||||
void swapNodes(TimestampPacketContainer ×tampPacketContainer);
|
||||
void assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
void resolveDependencies(bool clearAllDependencies);
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
|
||||
protected:
|
||||
std::vector<Node *> timestampPacketNodes;
|
||||
std::vector<TagNodeBase *> timestampPacketNodes;
|
||||
};
|
||||
|
||||
struct TimestampPacketDependencies : public NonCopyableClass {
|
||||
@@ -130,27 +130,27 @@ struct TimestampPacketDependencies : public NonCopyableClass {
|
||||
};
|
||||
|
||||
struct TimestampPacketHelper {
|
||||
static uint64_t getContextEndGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextEndOffset();
|
||||
static uint64_t getContextEndGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextEndOffset();
|
||||
}
|
||||
static uint64_t getContextStartGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextStartOffset();
|
||||
static uint64_t getContextStartGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextStartOffset();
|
||||
}
|
||||
static uint64_t getGlobalEndGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalEndOffset();
|
||||
static uint64_t getGlobalEndGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalEndOffset();
|
||||
}
|
||||
static uint64_t getGlobalStartGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalStartOffset();
|
||||
static uint64_t getGlobalStartGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalStartOffset();
|
||||
}
|
||||
|
||||
static uint64_t getGpuDependenciesCountGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
|
||||
static uint64_t getGpuDependenciesCountGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getImplicitGpuDependenciesCountOffset();
|
||||
}
|
||||
|
||||
static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices);
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode<TimestampPacketStorage> ×tampPacketNode, uint32_t numSupportedDevices) {
|
||||
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNodeBase ×tampPacketNode, uint32_t numSupportedDevices) {
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
@@ -158,8 +158,8 @@ struct TimestampPacketHelper {
|
||||
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
|
||||
auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode);
|
||||
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->getPacketsUsed(); packetId++) {
|
||||
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) {
|
||||
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
}
|
||||
|
||||
@@ -231,8 +231,8 @@ struct TimestampPacketHelper {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNodeBase ×tampPacketNode) {
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
|
||||
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
||||
@@ -34,7 +34,9 @@ set(NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/software_tags_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/spinlock.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/stackvec.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/time_measure_wrapper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
|
||||
)
|
||||
|
||||
35
shared/source/utilities/tag_allocator.cpp
Normal file
35
shared/source/utilities/tag_allocator.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
TagAllocatorBase::TagAllocatorBase(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
|
||||
: deviceBitfield(deviceBitfield), rootDeviceIndex(rootDeviceIndex), memoryManager(memMngr), tagCount(tagCount), tagSize(tagSize), doNotReleaseNodes(doNotReleaseNodes) {
|
||||
|
||||
this->tagSize = alignUp(tagSize, tagAlignment);
|
||||
}
|
||||
|
||||
void TagAllocatorBase::cleanUpResources() {
|
||||
for (auto gfxAllocation : gfxAllocations) {
|
||||
memoryManager->freeGraphicsMemory(gfxAllocation);
|
||||
}
|
||||
gfxAllocations.clear();
|
||||
}
|
||||
|
||||
void TagNodeBase::returnTag() {
|
||||
allocator->returnTag(this);
|
||||
}
|
||||
|
||||
bool TagNodeBase::canBeReleased() const {
|
||||
return (!doNotReleaseNodes) &&
|
||||
(isCompleted()) &&
|
||||
(getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount());
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
@@ -23,28 +24,31 @@ template <typename TagType>
|
||||
class TagAllocator;
|
||||
|
||||
template <typename TagType>
|
||||
struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
|
||||
class TagNode;
|
||||
|
||||
class TagAllocatorBase;
|
||||
|
||||
class TagNodeBase : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
TagType *tagForCpuAccess;
|
||||
virtual ~TagNodeBase() = default;
|
||||
|
||||
GraphicsAllocation *getBaseGraphicsAllocation() const { return gfxAllocation; }
|
||||
|
||||
uint64_t getGpuAddress() const { return gpuAddress; }
|
||||
|
||||
void incRefCount() { refCount++; }
|
||||
|
||||
MOCKABLE_VIRTUAL void returnTag() {
|
||||
allocator->returnTag(this);
|
||||
}
|
||||
uint32_t refCountFetchSub(uint32_t value) { return refCount.fetch_sub(value); }
|
||||
|
||||
bool canBeReleased() const {
|
||||
return (!doNotReleaseNodes) &&
|
||||
(tagForCpuAccess->isCompleted()) &&
|
||||
(tagForCpuAccess->getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount());
|
||||
}
|
||||
MOCKABLE_VIRTUAL void returnTag();
|
||||
|
||||
void setDoNotReleaseNodes(bool doNotRelease) {
|
||||
doNotReleaseNodes = doNotRelease;
|
||||
}
|
||||
virtual void initialize() = 0;
|
||||
|
||||
bool canBeReleased() const;
|
||||
|
||||
virtual void *getCpuBase() const = 0;
|
||||
|
||||
void setDoNotReleaseNodes(bool doNotRelease) { doNotReleaseNodes = doNotRelease; }
|
||||
|
||||
void setProfilingCapable(bool capable) { profilingCapable = capable; }
|
||||
|
||||
@@ -52,18 +56,42 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
|
||||
|
||||
void incImplicitCpuDependenciesCount() { implicitCpuDependenciesCount++; }
|
||||
|
||||
void initialize() {
|
||||
tagForCpuAccess->initialize();
|
||||
implicitCpuDependenciesCount.store(0);
|
||||
setProfilingCapable(true);
|
||||
}
|
||||
|
||||
uint32_t getImplicitCpuDependenciesCount() const { return implicitCpuDependenciesCount.load(); }
|
||||
|
||||
const TagAllocator<TagType> *getAllocator() const { return allocator; }
|
||||
const TagAllocatorBase *getAllocator() const { return allocator; }
|
||||
|
||||
// TagType specific calls
|
||||
virtual bool isCompleted() const = 0;
|
||||
virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0;
|
||||
|
||||
virtual size_t getGlobalStartOffset() const = 0;
|
||||
virtual size_t getContextStartOffset() const = 0;
|
||||
virtual size_t getContextEndOffset() const = 0;
|
||||
virtual size_t getGlobalEndOffset() const = 0;
|
||||
virtual size_t getImplicitGpuDependenciesCountOffset() const = 0;
|
||||
|
||||
virtual uint64_t getContextStartValue(uint32_t packetIndex) const = 0;
|
||||
virtual uint64_t getGlobalStartValue(uint32_t packetIndex) const = 0;
|
||||
virtual uint64_t getContextEndValue(uint32_t packetIndex) const = 0;
|
||||
virtual uint64_t getGlobalEndValue(uint32_t packetIndex) const = 0;
|
||||
|
||||
virtual uint64_t &getGlobalEndRef() const = 0;
|
||||
virtual uint64_t &getContextCompleteRef() const = 0;
|
||||
|
||||
virtual void setPacketsUsed(uint32_t used) = 0;
|
||||
virtual uint32_t getPacketsUsed() const = 0;
|
||||
|
||||
virtual size_t getSinglePacketSize() const = 0;
|
||||
|
||||
virtual uint32_t getImplicitGpuDependenciesCount() const = 0;
|
||||
|
||||
virtual MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const = 0;
|
||||
|
||||
protected:
|
||||
TagAllocator<TagType> *allocator = nullptr;
|
||||
TagNodeBase() = default;
|
||||
|
||||
TagAllocatorBase *allocator = nullptr;
|
||||
|
||||
GraphicsAllocation *gfxAllocation = nullptr;
|
||||
uint64_t gpuAddress = 0;
|
||||
std::atomic<uint32_t> refCount{0};
|
||||
@@ -71,71 +99,78 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
|
||||
bool doNotReleaseNodes = false;
|
||||
bool profilingCapable = true;
|
||||
|
||||
template <typename TagType2>
|
||||
template <typename TagType>
|
||||
friend class TagAllocator;
|
||||
};
|
||||
|
||||
template <typename TagType>
|
||||
class TagAllocator {
|
||||
class TagNode : public TagNodeBase, public IDNode<TagNode<TagType>> {
|
||||
static_assert(!std::is_polymorphic<TagType>::value,
|
||||
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
|
||||
|
||||
public:
|
||||
using NodeType = TagNode<TagType>;
|
||||
TagType *tagForCpuAccess;
|
||||
|
||||
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
|
||||
DeviceBitfield deviceBitfield) : deviceBitfield(deviceBitfield),
|
||||
rootDeviceIndex(rootDeviceIndex),
|
||||
memoryManager(memMngr),
|
||||
tagCount(tagCount),
|
||||
doNotReleaseNodes(doNotReleaseNodes) {
|
||||
|
||||
this->tagSize = alignUp(tagSize, tagAlignment);
|
||||
populateFreeTags();
|
||||
void initialize() override {
|
||||
tagForCpuAccess->initialize();
|
||||
implicitCpuDependenciesCount.store(0);
|
||||
setProfilingCapable(true);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL ~TagAllocator() {
|
||||
cleanUpResources();
|
||||
}
|
||||
void *getCpuBase() const override { return tagForCpuAccess; }
|
||||
|
||||
void cleanUpResources() {
|
||||
for (auto gfxAllocation : gfxAllocations) {
|
||||
memoryManager->freeGraphicsMemory(gfxAllocation);
|
||||
}
|
||||
gfxAllocations.clear();
|
||||
}
|
||||
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) override;
|
||||
|
||||
NodeType *getTag() {
|
||||
if (freeTags.peekIsEmpty()) {
|
||||
releaseDeferredTags();
|
||||
}
|
||||
NodeType *node = freeTags.removeFrontOne().release();
|
||||
if (!node) {
|
||||
std::unique_lock<std::mutex> lock(allocatorMutex);
|
||||
populateFreeTags();
|
||||
node = freeTags.removeFrontOne().release();
|
||||
}
|
||||
usedTags.pushFrontOne(*node);
|
||||
node->incRefCount();
|
||||
node->initialize();
|
||||
return node;
|
||||
}
|
||||
bool isCompleted() const override;
|
||||
|
||||
MOCKABLE_VIRTUAL void returnTag(NodeType *node) {
|
||||
if (node->refCount.fetch_sub(1) == 1) {
|
||||
if (node->canBeReleased()) {
|
||||
returnTagToFreePool(node);
|
||||
} else {
|
||||
returnTagToDeferredPool(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t getGlobalStartOffset() const override;
|
||||
size_t getContextStartOffset() const override;
|
||||
size_t getContextEndOffset() const override;
|
||||
size_t getGlobalEndOffset() const override;
|
||||
size_t getImplicitGpuDependenciesCountOffset() const override;
|
||||
|
||||
uint64_t getContextStartValue(uint32_t packetIndex) const override;
|
||||
uint64_t getGlobalStartValue(uint32_t packetIndex) const override;
|
||||
uint64_t getContextEndValue(uint32_t packetIndex) const override;
|
||||
uint64_t getGlobalEndValue(uint32_t packetIndex) const override;
|
||||
|
||||
uint64_t &getGlobalEndRef() const override;
|
||||
uint64_t &getContextCompleteRef() const override;
|
||||
|
||||
void setPacketsUsed(uint32_t used) override;
|
||||
uint32_t getPacketsUsed() const override;
|
||||
|
||||
size_t getSinglePacketSize() const override;
|
||||
|
||||
uint32_t getImplicitGpuDependenciesCount() const override;
|
||||
|
||||
MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const override;
|
||||
};
|
||||
|
||||
class TagAllocatorBase {
|
||||
public:
|
||||
virtual ~TagAllocatorBase() { cleanUpResources(); };
|
||||
|
||||
virtual void returnTag(TagNodeBase *node) = 0;
|
||||
|
||||
virtual TagNodeBase *getTag() = 0;
|
||||
|
||||
protected:
|
||||
IDList<NodeType> freeTags;
|
||||
IDList<NodeType> usedTags;
|
||||
IDList<NodeType> deferredTags;
|
||||
std::vector<GraphicsAllocation *> gfxAllocations;
|
||||
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
|
||||
TagAllocatorBase() = delete;
|
||||
|
||||
TagAllocatorBase(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
|
||||
DeviceBitfield deviceBitfield);
|
||||
|
||||
virtual void returnTagToFreePool(TagNodeBase *node) = 0;
|
||||
|
||||
virtual void returnTagToDeferredPool(TagNodeBase *node) = 0;
|
||||
|
||||
virtual void releaseDeferredTags() = 0;
|
||||
|
||||
void cleanUpResources();
|
||||
|
||||
std::vector<GraphicsAllocation *> gfxAllocations;
|
||||
const DeviceBitfield deviceBitfield;
|
||||
const uint32_t rootDeviceIndex;
|
||||
MemoryManager *memoryManager;
|
||||
@@ -144,66 +179,38 @@ class TagAllocator {
|
||||
bool doNotReleaseNodes = false;
|
||||
|
||||
std::mutex allocatorMutex;
|
||||
};
|
||||
|
||||
MOCKABLE_VIRTUAL void returnTagToFreePool(NodeType *node) {
|
||||
NodeType *usedNode = usedTags.removeOne(*node).release();
|
||||
DEBUG_BREAK_IF(usedNode == nullptr);
|
||||
UNUSED_VARIABLE(usedNode);
|
||||
freeTags.pushFrontOne(*node);
|
||||
}
|
||||
template <typename TagType>
|
||||
class TagAllocator : public TagAllocatorBase {
|
||||
public:
|
||||
using NodeType = TagNode<TagType>;
|
||||
|
||||
void returnTagToDeferredPool(NodeType *node) {
|
||||
NodeType *usedNode = usedTags.removeOne(*node).release();
|
||||
DEBUG_BREAK_IF(!usedNode);
|
||||
deferredTags.pushFrontOne(*usedNode);
|
||||
}
|
||||
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
|
||||
DeviceBitfield deviceBitfield);
|
||||
|
||||
void populateFreeTags() {
|
||||
size_t allocationSizeRequired = tagCount * tagSize;
|
||||
TagNodeBase *getTag() override;
|
||||
|
||||
auto allocationType = TagType::getAllocationType();
|
||||
AllocationProperties allocationProperties{rootDeviceIndex, allocationSizeRequired, allocationType, deviceBitfield};
|
||||
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
gfxAllocations.push_back(graphicsAllocation);
|
||||
void returnTag(TagNodeBase *node) override;
|
||||
|
||||
auto nodesMemory = std::make_unique<NodeType[]>(tagCount);
|
||||
protected:
|
||||
TagAllocator() = delete;
|
||||
|
||||
for (size_t i = 0; i < tagCount; ++i) {
|
||||
auto tagOffset = i * tagSize;
|
||||
void returnTagToFreePool(TagNodeBase *node) override;
|
||||
|
||||
nodesMemory[i].allocator = this;
|
||||
nodesMemory[i].gfxAllocation = graphicsAllocation;
|
||||
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), tagOffset));
|
||||
nodesMemory[i].gpuAddress = graphicsAllocation->getGpuAddress() + tagOffset;
|
||||
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
|
||||
void returnTagToDeferredPool(TagNodeBase *node) override;
|
||||
|
||||
freeTags.pushTailOne(nodesMemory[i]);
|
||||
}
|
||||
void releaseDeferredTags() override;
|
||||
|
||||
tagPoolMemory.push_back(std::move(nodesMemory));
|
||||
}
|
||||
void populateFreeTags();
|
||||
|
||||
void releaseDeferredTags() {
|
||||
IDList<NodeType, false> pendingFreeTags;
|
||||
IDList<NodeType, false> pendingDeferredTags;
|
||||
auto currentNode = deferredTags.detachNodes();
|
||||
IDList<NodeType> freeTags;
|
||||
IDList<NodeType> usedTags;
|
||||
IDList<NodeType> deferredTags;
|
||||
|
||||
while (currentNode != nullptr) {
|
||||
auto nextNode = currentNode->next;
|
||||
if (currentNode->canBeReleased()) {
|
||||
pendingFreeTags.pushFrontOne(*currentNode);
|
||||
} else {
|
||||
pendingDeferredTags.pushFrontOne(*currentNode);
|
||||
}
|
||||
currentNode = nextNode;
|
||||
}
|
||||
|
||||
if (!pendingFreeTags.peekIsEmpty()) {
|
||||
freeTags.splice(*pendingFreeTags.detachNodes());
|
||||
}
|
||||
if (!pendingDeferredTags.peekIsEmpty()) {
|
||||
deferredTags.splice(*pendingDeferredTags.detachNodes());
|
||||
}
|
||||
}
|
||||
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/utilities/tag_allocator.inl"
|
||||
|
||||
282
shared/source/utilities/tag_allocator.inl
Normal file
282
shared/source/utilities/tag_allocator.inl
Normal file
@@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename TagType>
|
||||
TagAllocator<TagType>::TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment,
|
||||
size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
|
||||
: TagAllocatorBase(rootDeviceIndex, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
|
||||
|
||||
populateFreeTags();
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
TagNodeBase *TagAllocator<TagType>::getTag() {
|
||||
if (freeTags.peekIsEmpty()) {
|
||||
releaseDeferredTags();
|
||||
}
|
||||
auto node = freeTags.removeFrontOne().release();
|
||||
if (!node) {
|
||||
std::unique_lock<std::mutex> lock(allocatorMutex);
|
||||
populateFreeTags();
|
||||
node = freeTags.removeFrontOne().release();
|
||||
}
|
||||
usedTags.pushFrontOne(*node);
|
||||
node->incRefCount();
|
||||
node->initialize();
|
||||
return node;
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagAllocator<TagType>::returnTagToFreePool(TagNodeBase *node) {
|
||||
auto nodeT = static_cast<NodeType *>(node);
|
||||
auto usedNode = usedTags.removeOne(*nodeT).release();
|
||||
DEBUG_BREAK_IF(usedNode == nullptr);
|
||||
UNUSED_VARIABLE(usedNode);
|
||||
freeTags.pushFrontOne(*nodeT);
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagAllocator<TagType>::returnTagToDeferredPool(TagNodeBase *node) {
|
||||
auto nodeT = static_cast<NodeType *>(node);
|
||||
auto usedNode = usedTags.removeOne(*nodeT).release();
|
||||
DEBUG_BREAK_IF(!usedNode);
|
||||
deferredTags.pushFrontOne(*usedNode);
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagAllocator<TagType>::releaseDeferredTags() {
|
||||
IDList<NodeType, false> pendingFreeTags;
|
||||
IDList<NodeType, false> pendingDeferredTags;
|
||||
auto currentNode = deferredTags.detachNodes();
|
||||
|
||||
while (currentNode != nullptr) {
|
||||
auto nextNode = currentNode->next;
|
||||
if (currentNode->canBeReleased()) {
|
||||
pendingFreeTags.pushFrontOne(*currentNode);
|
||||
} else {
|
||||
pendingDeferredTags.pushFrontOne(*currentNode);
|
||||
}
|
||||
currentNode = nextNode;
|
||||
}
|
||||
|
||||
if (!pendingFreeTags.peekIsEmpty()) {
|
||||
freeTags.splice(*pendingFreeTags.detachNodes());
|
||||
}
|
||||
if (!pendingDeferredTags.peekIsEmpty()) {
|
||||
deferredTags.splice(*pendingDeferredTags.detachNodes());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagAllocator<TagType>::populateFreeTags() {
|
||||
size_t allocationSizeRequired = tagCount * tagSize;
|
||||
|
||||
AllocationProperties allocationProperties{rootDeviceIndex, allocationSizeRequired, TagType::getAllocationType(), deviceBitfield};
|
||||
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
gfxAllocations.push_back(graphicsAllocation);
|
||||
|
||||
auto nodesMemory = std::make_unique<NodeType[]>(tagCount);
|
||||
|
||||
for (size_t i = 0; i < tagCount; ++i) {
|
||||
auto tagOffset = i * tagSize;
|
||||
|
||||
nodesMemory[i].allocator = this;
|
||||
nodesMemory[i].gfxAllocation = graphicsAllocation;
|
||||
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), tagOffset));
|
||||
nodesMemory[i].gpuAddress = graphicsAllocation->getGpuAddress() + tagOffset;
|
||||
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
|
||||
|
||||
freeTags.pushTailOne(nodesMemory[i]);
|
||||
}
|
||||
|
||||
tagPoolMemory.push_back(std::move(nodesMemory));
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagAllocator<TagType>::returnTag(TagNodeBase *node) {
|
||||
if (node->refCountFetchSub(1) == 1) {
|
||||
if (node->canBeReleased()) {
|
||||
returnTagToFreePool(node);
|
||||
} else {
|
||||
returnTagToDeferredPool(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getGlobalStartOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getGlobalStartOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getContextStartOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getContextStartOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getContextEndOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getContextEndOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getGlobalEndOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getGlobalEndOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getImplicitGpuDependenciesCountOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t TagNode<TagType>::getContextStartValue(uint32_t packetIndex) const {
|
||||
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
|
||||
return tagForCpuAccess->getContextStartValue(packetIndex);
|
||||
} else {
|
||||
UNUSED_VARIABLE(packetIndex);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t TagNode<TagType>::getGlobalStartValue(uint32_t packetIndex) const {
|
||||
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
|
||||
return tagForCpuAccess->getGlobalStartValue(packetIndex);
|
||||
} else {
|
||||
UNUSED_VARIABLE(packetIndex);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t TagNode<TagType>::getContextEndValue(uint32_t packetIndex) const {
|
||||
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
|
||||
return tagForCpuAccess->getContextEndValue(packetIndex);
|
||||
} else {
|
||||
UNUSED_VARIABLE(packetIndex);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t TagNode<TagType>::getGlobalEndValue(uint32_t packetIndex) const {
|
||||
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
|
||||
return tagForCpuAccess->getGlobalEndValue(packetIndex);
|
||||
} else {
|
||||
UNUSED_VARIABLE(packetIndex);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t &TagNode<TagType>::getContextCompleteRef() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::HwTimeStamps) {
|
||||
return tagForCpuAccess->ContextCompleteTS;
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t &TagNode<TagType>::getGlobalEndRef() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::HwTimeStamps) {
|
||||
return tagForCpuAccess->GlobalEndTS;
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagNode<TagType>::setPacketsUsed(uint32_t used) {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->setPacketsUsed(used);
|
||||
} else {
|
||||
UNUSED_VARIABLE(used);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint32_t TagNode<TagType>::getPacketsUsed() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getPacketsUsed();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint32_t TagNode<TagType>::getImplicitGpuDependenciesCount() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getImplicitGpuDependenciesCount();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getSinglePacketSize() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getSinglePacketSize();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
void TagNode<TagType>::assignDataToAllTimestamps(uint32_t packetIndex, void *source) {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->assignDataToAllTimestamps(packetIndex, source);
|
||||
} else {
|
||||
UNUSED_VARIABLE(packetIndex);
|
||||
UNUSED_VARIABLE(source);
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
bool TagNode<TagType>::isCompleted() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->isCompleted();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
MetricsLibraryApi::QueryHandle_1_0 &TagNode<TagType>::getQueryHandleRef() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::HwPerfCounter) {
|
||||
return tagForCpuAccess->query.handle;
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user