mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 15:12:56 +08:00
Disable TimestampPacket optimizations in Aub/Tbx mode
Avoid removing semaphores and reusing returned tags Change-Id: Ic26167953c5d5a9ccceaae49f4921af11a375fab Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
54f65c0243
commit
0527c9113c
@@ -416,21 +416,28 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
|
||||
|
||||
TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
|
||||
if (profilingTimeStampAllocator.get() == nullptr) {
|
||||
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
|
||||
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false);
|
||||
}
|
||||
return profilingTimeStampAllocator.get();
|
||||
}
|
||||
|
||||
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
|
||||
if (perfCounterAllocator.get() == nullptr) {
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize);
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false);
|
||||
}
|
||||
return perfCounterAllocator.get();
|
||||
}
|
||||
|
||||
TagAllocator<TimestampPacketStorage> *CommandStreamReceiver::getTimestampPacketAllocator() {
|
||||
if (timestampPacketAllocator.get() == nullptr) {
|
||||
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
|
||||
// dont release nodes in aub/tbx mode, to avoid removing semaphores optimization or reusing returned tags
|
||||
bool doNotReleaseNodes = (getType() > CommandStreamReceiverType::CSR_HW);
|
||||
|
||||
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(
|
||||
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
|
||||
sizeof(TimestampPacketStorage), doNotReleaseNodes);
|
||||
}
|
||||
return timestampPacketAllocator.get();
|
||||
}
|
||||
|
||||
@@ -29,6 +29,13 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
|
||||
AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override;
|
||||
void setupContext(OsContext &osContext) override;
|
||||
|
||||
CommandStreamReceiverType getType() override {
|
||||
if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) {
|
||||
return CommandStreamReceiverType::CSR_TBX_WITH_AUB;
|
||||
}
|
||||
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
|
||||
}
|
||||
|
||||
std::unique_ptr<CommandStreamReceiver> aubCSR;
|
||||
};
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ struct HwTimeStamps {
|
||||
GlobalCompleteTS = 0;
|
||||
ContextCompleteTS = 0;
|
||||
}
|
||||
bool canBeReleased() const { return true; }
|
||||
bool isCompleted() const { return true; }
|
||||
static GraphicsAllocation::AllocationType getAllocationType() {
|
||||
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ struct HwPerfCounter {
|
||||
static GraphicsAllocation::AllocationType getAllocationType() {
|
||||
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
|
||||
}
|
||||
bool canBeReleased() const { return true; }
|
||||
bool isCompleted() const { return true; }
|
||||
|
||||
// Gpu report size is not known during compile time.
|
||||
// Such information will be provided by metrics library dll.
|
||||
|
||||
@@ -33,7 +33,7 @@ void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
|
||||
std::vector<Node *> pendingNodes;
|
||||
|
||||
for (auto node : timestampPacketNodes) {
|
||||
if (node->tagForCpuAccess->canBeReleased() || clearAllDependencies) {
|
||||
if (node->canBeReleased() || clearAllDependencies) {
|
||||
node->returnTag();
|
||||
} else {
|
||||
pendingNodes.push_back(node);
|
||||
|
||||
@@ -42,17 +42,13 @@ struct TimestampPacketStorage {
|
||||
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
|
||||
}
|
||||
|
||||
bool canBeReleased() const {
|
||||
return isCompleted() && implicitDependenciesCount.load() == 0;
|
||||
}
|
||||
|
||||
bool isCompleted() const {
|
||||
for (uint32_t i = 0; i < packetsUsed; i++) {
|
||||
if ((packets[i].contextEnd & 1) || (packets[i].globalEnd & 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return implicitDependenciesCount.load() == 0;
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
|
||||
@@ -36,11 +36,20 @@ struct TagNode : public IDNode<TagNode<TagType>> {
|
||||
allocator->returnTag(this);
|
||||
}
|
||||
|
||||
bool canBeReleased() const {
|
||||
return !doNotReleaseNodes && tagForCpuAccess->isCompleted();
|
||||
}
|
||||
|
||||
void setDoNotReleaseNodes(bool doNotRelease) {
|
||||
doNotReleaseNodes = doNotRelease;
|
||||
}
|
||||
|
||||
protected:
|
||||
TagAllocator<TagType> *allocator = nullptr;
|
||||
GraphicsAllocation *gfxAllocation = nullptr;
|
||||
uint64_t gpuAddress = 0;
|
||||
std::atomic<uint32_t> refCount{0};
|
||||
bool doNotReleaseNodes = false;
|
||||
|
||||
template <typename TagType2>
|
||||
friend class TagAllocator;
|
||||
@@ -52,10 +61,11 @@ class TagAllocator {
|
||||
using NodeType = TagNode<TagType>;
|
||||
|
||||
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize = sizeof(TagType)) : rootDeviceIndex(rootDeviceIndex),
|
||||
memoryManager(memMngr),
|
||||
tagCount(tagCount),
|
||||
tagAlignment(tagAlignment) {
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes) : rootDeviceIndex(rootDeviceIndex),
|
||||
memoryManager(memMngr),
|
||||
tagCount(tagCount),
|
||||
tagAlignment(tagAlignment),
|
||||
doNotReleaseNodes(doNotReleaseNodes) {
|
||||
|
||||
this->tagSize = alignUp(tagSize, tagAlignment);
|
||||
populateFreeTags();
|
||||
@@ -95,7 +105,7 @@ class TagAllocator {
|
||||
|
||||
MOCKABLE_VIRTUAL void returnTag(NodeType *node) {
|
||||
if (node->refCount.fetch_sub(1) == 1) {
|
||||
if (node->tagForCpuAccess->canBeReleased()) {
|
||||
if (node->canBeReleased()) {
|
||||
returnTagToFreePool(node);
|
||||
} else {
|
||||
returnTagToDeferredPool(node);
|
||||
@@ -115,6 +125,7 @@ class TagAllocator {
|
||||
size_t tagCount;
|
||||
size_t tagAlignment;
|
||||
size_t tagSize;
|
||||
bool doNotReleaseNodes = false;
|
||||
|
||||
std::mutex allocatorMutex;
|
||||
|
||||
@@ -150,6 +161,7 @@ class TagAllocator {
|
||||
nodesMemory[i].gfxAllocation = graphicsAllocation;
|
||||
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(Start);
|
||||
nodesMemory[i].gpuAddress = gpuBaseAddress + (i * tagSize);
|
||||
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
|
||||
freeTags.pushTailOne(nodesMemory[i]);
|
||||
Start += tagSize;
|
||||
}
|
||||
@@ -165,7 +177,7 @@ class TagAllocator {
|
||||
|
||||
while (currentNode != nullptr) {
|
||||
auto nextNode = currentNode->next;
|
||||
if (currentNode->tagForCpuAccess->canBeReleased()) {
|
||||
if (currentNode->canBeReleased()) {
|
||||
pendingFreeTags.pushFrontOne(*currentNode);
|
||||
} else {
|
||||
pendingDeferredTags.pushFrontOne(*currentNode);
|
||||
|
||||
Reference in New Issue
Block a user