Disable TimestampPacket optimizations in Aub/Tbx mode

Avoid removing semaphores and reusing returned tags

Change-Id: Ic26167953c5d5a9ccceaae49f4921af11a375fab
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-12-02 08:37:42 +01:00
committed by sys_ocldev
parent 54f65c0243
commit 0527c9113c
16 changed files with 149 additions and 38 deletions

View File

@@ -416,21 +416,28 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
if (profilingTimeStampAllocator.get() == nullptr) {
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false);
}
return profilingTimeStampAllocator.get();
}
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
if (perfCounterAllocator.get() == nullptr) {
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize);
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false);
}
return perfCounterAllocator.get();
}
TagAllocator<TimestampPacketStorage> *CommandStreamReceiver::getTimestampPacketAllocator() {
if (timestampPacketAllocator.get() == nullptr) {
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
// dont release nodes in aub/tbx mode, to avoid removing semaphores optimization or reusing returned tags
bool doNotReleaseNodes = (getType() > CommandStreamReceiverType::CSR_HW);
timestampPacketAllocator = std::make_unique<TagAllocator<TimestampPacketStorage>>(
rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
sizeof(TimestampPacketStorage), doNotReleaseNodes);
}
return timestampPacketAllocator.get();
}

View File

@@ -29,6 +29,13 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override;
void setupContext(OsContext &osContext) override;
CommandStreamReceiverType getType() override {
if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) {
return CommandStreamReceiverType::CSR_TBX_WITH_AUB;
}
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
}
std::unique_ptr<CommandStreamReceiver> aubCSR;
};

View File

@@ -21,7 +21,7 @@ struct HwTimeStamps {
GlobalCompleteTS = 0;
ContextCompleteTS = 0;
}
bool canBeReleased() const { return true; }
bool isCompleted() const { return true; }
static GraphicsAllocation::AllocationType getAllocationType() {
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
}

View File

@@ -20,7 +20,7 @@ struct HwPerfCounter {
static GraphicsAllocation::AllocationType getAllocationType() {
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
}
bool canBeReleased() const { return true; }
bool isCompleted() const { return true; }
// Gpu report size is not known during compile time.
// Such information will be provided by metrics library dll.

View File

@@ -33,7 +33,7 @@ void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
std::vector<Node *> pendingNodes;
for (auto node : timestampPacketNodes) {
if (node->tagForCpuAccess->canBeReleased() || clearAllDependencies) {
if (node->canBeReleased() || clearAllDependencies) {
node->returnTag();
} else {
pendingNodes.push_back(node);

View File

@@ -42,17 +42,13 @@ struct TimestampPacketStorage {
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
}
bool canBeReleased() const {
return isCompleted() && implicitDependenciesCount.load() == 0;
}
bool isCompleted() const {
for (uint32_t i = 0; i < packetsUsed; i++) {
if ((packets[i].contextEnd & 1) || (packets[i].globalEnd & 1)) {
return false;
}
}
return true;
return implicitDependenciesCount.load() == 0;
}
void initialize() {

View File

@@ -36,11 +36,20 @@ struct TagNode : public IDNode<TagNode<TagType>> {
allocator->returnTag(this);
}
bool canBeReleased() const {
return !doNotReleaseNodes && tagForCpuAccess->isCompleted();
}
void setDoNotReleaseNodes(bool doNotRelease) {
doNotReleaseNodes = doNotRelease;
}
protected:
TagAllocator<TagType> *allocator = nullptr;
GraphicsAllocation *gfxAllocation = nullptr;
uint64_t gpuAddress = 0;
std::atomic<uint32_t> refCount{0};
bool doNotReleaseNodes = false;
template <typename TagType2>
friend class TagAllocator;
@@ -52,10 +61,11 @@ class TagAllocator {
using NodeType = TagNode<TagType>;
TagAllocator(uint32_t rootDeviceIndex, MemoryManager *memMngr, size_t tagCount,
size_t tagAlignment, size_t tagSize = sizeof(TagType)) : rootDeviceIndex(rootDeviceIndex),
memoryManager(memMngr),
tagCount(tagCount),
tagAlignment(tagAlignment) {
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes) : rootDeviceIndex(rootDeviceIndex),
memoryManager(memMngr),
tagCount(tagCount),
tagAlignment(tagAlignment),
doNotReleaseNodes(doNotReleaseNodes) {
this->tagSize = alignUp(tagSize, tagAlignment);
populateFreeTags();
@@ -95,7 +105,7 @@ class TagAllocator {
MOCKABLE_VIRTUAL void returnTag(NodeType *node) {
if (node->refCount.fetch_sub(1) == 1) {
if (node->tagForCpuAccess->canBeReleased()) {
if (node->canBeReleased()) {
returnTagToFreePool(node);
} else {
returnTagToDeferredPool(node);
@@ -115,6 +125,7 @@ class TagAllocator {
size_t tagCount;
size_t tagAlignment;
size_t tagSize;
bool doNotReleaseNodes = false;
std::mutex allocatorMutex;
@@ -150,6 +161,7 @@ class TagAllocator {
nodesMemory[i].gfxAllocation = graphicsAllocation;
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(Start);
nodesMemory[i].gpuAddress = gpuBaseAddress + (i * tagSize);
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);
freeTags.pushTailOne(nodesMemory[i]);
Start += tagSize;
}
@@ -165,7 +177,7 @@ class TagAllocator {
while (currentNode != nullptr) {
auto nextNode = currentNode->next;
if (currentNode->tagForCpuAccess->canBeReleased()) {
if (currentNode->canBeReleased()) {
pendingFreeTags.pushFrontOne(*currentNode);
} else {
pendingDeferredTags.pushFrontOne(*currentNode);