mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 15:12:56 +08:00
feature direct submission: use tag allocation as a completion fence
use tag allocation address as a completion address in exec call wait for completion value before destroying drm direct submission Related-To: NEO-6643 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
38190c5d17
commit
03185f7111
@@ -321,7 +321,7 @@ class CommandStreamReceiver {
|
||||
|
||||
MOCKABLE_VIRTUAL bool isGpuHangDetected() const;
|
||||
|
||||
uint64_t getCompletionAddress() {
|
||||
uint64_t getCompletionAddress() const {
|
||||
uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
|
||||
if (completionFenceAddress == 0) {
|
||||
return 0;
|
||||
|
||||
@@ -14,6 +14,7 @@ DirectSubmissionInputParams::DirectSubmissionInputParams(const CommandStreamRece
|
||||
memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
globalFenceAllocation = commandStreamReceiver.getGlobalFenceAllocation();
|
||||
workPartitionAllocation = commandStreamReceiver.getWorkPartitionAllocation();
|
||||
completionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -64,6 +64,7 @@ struct DirectSubmissionInputParams : NonCopyableClass {
|
||||
MemoryManager *memoryManager = nullptr;
|
||||
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
||||
GraphicsAllocation *workPartitionAllocation = nullptr;
|
||||
GraphicsAllocation *completionFenceAllocation = nullptr;
|
||||
const uint32_t rootDeviceIndex;
|
||||
};
|
||||
|
||||
@@ -160,6 +161,7 @@ class DirectSubmissionHw {
|
||||
MemoryOperationsHandler *memoryOperationHandler = nullptr;
|
||||
const HardwareInfo *hwInfo = nullptr;
|
||||
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
||||
GraphicsAllocation *completionFenceAllocation = nullptr;
|
||||
GraphicsAllocation *ringBuffer = nullptr;
|
||||
GraphicsAllocation *ringBuffer2 = nullptr;
|
||||
GraphicsAllocation *semaphores = nullptr;
|
||||
|
||||
@@ -100,6 +100,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
|
||||
allocations.push_back(workPartitionAllocation);
|
||||
}
|
||||
|
||||
if (completionFenceAllocation != nullptr) {
|
||||
allocations.push_back(completionFenceAllocation);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
|
||||
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
|
||||
ringBuffer->getGpuAddress(),
|
||||
|
||||
@@ -35,7 +35,8 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
||||
|
||||
MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);
|
||||
|
||||
TagData currentTagData;
|
||||
TagData currentTagData{};
|
||||
volatile uint32_t *tagAddress;
|
||||
uint32_t completionFenceValue{};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -39,12 +39,17 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubm
|
||||
this->partitionedMode = this->activeTiles > 1u;
|
||||
this->partitionConfigSet = !this->partitionedMode;
|
||||
|
||||
osContextLinux->getDrm().setDirectSubmissionActive(true);
|
||||
auto &drm = osContextLinux->getDrm();
|
||||
drm.setDirectSubmissionActive(true);
|
||||
|
||||
if (this->partitionedMode) {
|
||||
this->workPartitionAllocation = inputParams.workPartitionAllocation;
|
||||
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
|
||||
}
|
||||
|
||||
if (drm.completionFenceSupport()) {
|
||||
this->completionFenceAllocation = inputParams.completionFenceAllocation;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
@@ -53,6 +58,24 @@ inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
|
||||
this->stopRingBuffer();
|
||||
this->wait(static_cast<uint32_t>(this->currentTagData.tagValue));
|
||||
}
|
||||
if (this->completionFenceAllocation) {
|
||||
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||
auto &drm = osContextLinux->getDrm();
|
||||
auto &drmContextIds = osContextLinux->getDrmContextIds();
|
||||
uint32_t drmContextId = 0u;
|
||||
auto completionFenceCpuAddress = reinterpret_cast<uint64_t>(this->completionFenceAllocation->getUnderlyingBuffer()) + Drm::completionFenceOffset;
|
||||
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
||||
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
||||
if (*reinterpret_cast<uint32_t *>(completionFenceCpuAddress) < completionFenceValue) {
|
||||
constexpr int64_t timeout = -1;
|
||||
constexpr uint16_t flags = 0;
|
||||
drm.waitUserFence(drmContextIds[drmContextId], completionFenceCpuAddress, completionFenceValue, Drm::ValueWidth::U32, timeout, flags);
|
||||
}
|
||||
drmContextId++;
|
||||
completionFenceCpuAddress = ptrOffset(completionFenceCpuAddress, this->postSyncOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
this->deallocateResources();
|
||||
}
|
||||
|
||||
@@ -81,6 +104,14 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
||||
|
||||
bool ret = false;
|
||||
uint32_t drmContextId = 0u;
|
||||
|
||||
uint32_t completionValue = 0u;
|
||||
uint64_t completionFenceGpuAddress = 0u;
|
||||
if (this->completionFenceAllocation) {
|
||||
completionValue = ++completionFenceValue;
|
||||
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + Drm::completionFenceOffset;
|
||||
}
|
||||
|
||||
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
||||
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
||||
ret |= !!bb->exec(static_cast<uint32_t>(size),
|
||||
@@ -93,9 +124,12 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
||||
nullptr,
|
||||
0,
|
||||
&execObject,
|
||||
0,
|
||||
0);
|
||||
completionFenceGpuAddress,
|
||||
completionValue);
|
||||
drmContextId++;
|
||||
if (completionFenceGpuAddress) {
|
||||
completionFenceGpuAddress += this->postSyncOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user