Files
compute-runtime/shared/source/helpers/in_order_cmd_helpers.h
Lukasz Jobczyk 09e23804dc refactor: Use timestamp types for events and in order nodes
Switching tagAllocation into UC causes regressions in non event
scenarios. It is no longer used as dependant type for semaphores.
Restoring previous GMM_USAGE settings for tag.
Marking events and in order nodes using only timestamp types as they
have proper GMM_USAGE settings already and can be both in smem and lmem.

Resolves: NEO-13847

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
2025-01-30 12:20:38 +01:00

252 lines
9.3 KiB
C++

/*
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/allocation_type.h"
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
namespace NEO {
class GraphicsAllocation;
class MemoryManager;
class Device;
class TagNodeBase;
template <bool deviceAlloc>
class DeviceAllocNodeType {
public:
using ValueT = uint64_t;
static constexpr size_t defaultAllocatorTagCount = 128;
static constexpr AllocationType getAllocationType() { return deviceAlloc ? NEO::AllocationType::gpuTimestampDeviceBuffer : NEO::AllocationType::timestampPacketTagBuffer; }
static constexpr TagNodeType getTagNodeType() { return TagNodeType::counter64b; }
static constexpr size_t getSinglePacketSize() { return sizeof(uint64_t); }
void initialize(uint64_t initValue) { data = initValue; }
protected:
uint64_t data = {};
};
static_assert(sizeof(uint64_t) == sizeof(DeviceAllocNodeType<true>), "This structure is consumed by GPU and has to follow specific restrictions for padding and size");
static_assert(sizeof(uint64_t) == sizeof(DeviceAllocNodeType<false>), "This structure is consumed by GPU and has to follow specific restrictions for padding and size");
class InOrderExecInfo : public NEO::NonCopyableClass {
public:
~InOrderExecInfo();
InOrderExecInfo() = delete;
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList);
static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, NEO::GraphicsAllocation *deviceAllocation, uint64_t deviceAddress, NEO::GraphicsAllocation *hostAllocation,
uint64_t *hostAddress, uint64_t counterValue, uint32_t devicePartitions, uint32_t hostPartitions);
InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling);
NEO::GraphicsAllocation *getDeviceCounterAllocation() const;
NEO::GraphicsAllocation *getHostCounterAllocation() const;
uint64_t *getBaseHostAddress() const { return hostAddress; }
uint64_t getBaseDeviceAddress() const { return deviceAddress; }
uint64_t getBaseHostGpuAddress() const;
uint64_t getCounterValue() const { return counterValue; }
void addCounterValue(uint64_t addValue) { counterValue += addValue; }
void resetCounterValue() { counterValue = 0; }
uint64_t getRegularCmdListSubmissionCounter() const { return regularCmdListSubmissionCounter; }
void addRegularCmdListSubmissionCounter(uint64_t addValue) { regularCmdListSubmissionCounter += addValue; }
bool isRegularCmdList() const { return regularCmdList; }
bool isHostStorageDuplicated() const { return duplicatedHostStorage; }
bool isAtomicDeviceSignalling() const { return atomicDeviceSignalling; }
uint32_t getNumDevicePartitionsToWait() const { return numDevicePartitionsToWait; }
uint32_t getNumHostPartitionsToWait() const { return numHostPartitionsToWait; }
void setAllocationOffset(uint32_t newOffset) { allocationOffset = newOffset; }
void initializeAllocationsFromHost();
uint32_t getAllocationOffset() const { return allocationOffset; }
void reset();
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
void setLastWaitedCounterValue(uint64_t value) {
lastWaitedCounterValue = std::max(value, lastWaitedCounterValue);
}
bool isCounterAlreadyDone(uint64_t waitValue) const {
return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u;
}
NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; }
NEO::GraphicsAllocation *getExternalDeviceAllocation() const { return externalDeviceAllocation; }
void pushTempTimestampNode(TagNodeBase *node, uint64_t value);
void releaseNotUsedTempTimestampNodes(bool forceReturn);
protected:
void uploadToTbx(TagNodeBase &node, size_t size);
NEO::Device &device;
NEO::TagNodeBase *deviceCounterNode = nullptr;
NEO::TagNodeBase *hostCounterNode = nullptr;
NEO::GraphicsAllocation *externalHostAllocation = nullptr;
NEO::GraphicsAllocation *externalDeviceAllocation = nullptr;
std::vector<std::pair<NEO::TagNodeBase *, uint64_t>> tempTimestampNodes;
std::mutex mutex;
uint64_t counterValue = 0;
uint64_t lastWaitedCounterValue = 0;
uint64_t regularCmdListSubmissionCounter = 0;
uint64_t deviceAddress = 0;
uint64_t *hostAddress = nullptr;
uint32_t numDevicePartitionsToWait = 0;
uint32_t numHostPartitionsToWait = 0;
uint32_t allocationOffset = 0;
uint32_t rootDeviceIndex = 0;
bool regularCmdList = false;
bool duplicatedHostStorage = false;
bool atomicDeviceSignalling = false;
bool isTbx = false;
};
namespace InOrderPatchCommandHelpers {
inline uint64_t getAppendCounterValue(const InOrderExecInfo &inOrderExecInfo) {
if (inOrderExecInfo.isRegularCmdList() && inOrderExecInfo.getRegularCmdListSubmissionCounter() > 1) {
return inOrderExecInfo.getCounterValue() * (inOrderExecInfo.getRegularCmdListSubmissionCounter() - 1);
}
return 0;
}
enum class PatchCmdType {
none,
lri64b,
sdi,
semaphore,
walker,
pipeControl
};
template <typename GfxFamily>
struct PatchCmd {
PatchCmd(std::shared_ptr<InOrderExecInfo> *inOrderExecInfo, void *cmd1, void *cmd2, uint64_t baseCounterValue, PatchCmdType patchCmdType, bool deviceAtomicSignaling, bool duplicatedHostStorage)
: cmd1(cmd1), cmd2(cmd2), baseCounterValue(baseCounterValue), patchCmdType(patchCmdType), deviceAtomicSignaling(deviceAtomicSignaling), duplicatedHostStorage(duplicatedHostStorage) {
if (inOrderExecInfo) {
this->inOrderExecInfo = *inOrderExecInfo;
}
}
void patch(uint64_t appendCounterValue) {
if (skipPatching) {
return;
}
switch (patchCmdType) {
case PatchCmdType::sdi:
patchSdi(appendCounterValue);
break;
case PatchCmdType::semaphore:
patchSemaphore(appendCounterValue);
break;
case PatchCmdType::walker:
patchComputeWalker(appendCounterValue);
break;
case PatchCmdType::lri64b:
patchLri64b(appendCounterValue);
break;
case PatchCmdType::pipeControl:
patchPipeControl(appendCounterValue);
break;
default:
UNRECOVERABLE_IF(true);
break;
}
}
void updateInOrderExecInfo(std::shared_ptr<InOrderExecInfo> *inOrderExecInfo) {
this->inOrderExecInfo = *inOrderExecInfo;
}
void setSkipPatching(bool value) {
skipPatching = value;
}
bool isExternalDependency() const { return inOrderExecInfo.get(); }
std::shared_ptr<InOrderExecInfo> inOrderExecInfo;
void *cmd1 = nullptr;
void *cmd2 = nullptr;
const uint64_t baseCounterValue = 0;
const PatchCmdType patchCmdType = PatchCmdType::none;
bool deviceAtomicSignaling = false;
bool duplicatedHostStorage = false;
bool skipPatching = false;
protected:
void patchSdi(uint64_t appendCounterValue) {
auto sdiCmd = reinterpret_cast<typename GfxFamily::MI_STORE_DATA_IMM *>(cmd1);
sdiCmd->setDataDword0(getLowPart(baseCounterValue + appendCounterValue));
sdiCmd->setDataDword1(getHighPart(baseCounterValue + appendCounterValue));
}
void patchSemaphore(uint64_t appendCounterValue) {
if (isExternalDependency()) {
appendCounterValue = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo);
if (appendCounterValue == 0) {
return;
}
}
auto semaphoreCmd = reinterpret_cast<typename GfxFamily::MI_SEMAPHORE_WAIT *>(cmd1);
semaphoreCmd->setSemaphoreDataDword(static_cast<uint32_t>(baseCounterValue + appendCounterValue));
}
void patchComputeWalker(uint64_t appendCounterValue);
void patchPipeControl(uint64_t appendCounterValue) {
auto pcCmd = reinterpret_cast<typename GfxFamily::PIPE_CONTROL *>(cmd1);
pcCmd->setImmediateData(static_cast<uint64_t>(baseCounterValue + appendCounterValue));
}
void patchLri64b(uint64_t appendCounterValue) {
if (isExternalDependency()) {
appendCounterValue = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo);
if (appendCounterValue == 0) {
return;
}
}
const uint64_t counterValue = baseCounterValue + appendCounterValue;
auto lri1 = reinterpret_cast<typename GfxFamily::MI_LOAD_REGISTER_IMM *>(cmd1);
lri1->setDataDword(getLowPart(counterValue));
auto lri2 = reinterpret_cast<typename GfxFamily::MI_LOAD_REGISTER_IMM *>(cmd2);
lri2->setDataDword(getHighPart(counterValue));
}
PatchCmd() = delete;
};
} // namespace InOrderPatchCommandHelpers
template <typename GfxFamily>
using InOrderPatchCommandsContainer = std::vector<NEO::InOrderPatchCommandHelpers::PatchCmd<GfxFamily>>;
} // namespace NEO