refactor: extract common logic for waiting on user fences in Drm

Related-To: NEO-6643
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-04-25 11:49:22 +00:00
committed by Compute-Runtime-Automation
parent 0a5f78fbfe
commit 4a5f1b9358
5 changed files with 20 additions and 25 deletions

View File

@@ -1568,7 +1568,6 @@ void DrmMemoryManager::waitOnCompletionFence(GraphicsAllocation *allocation) {
OsContext *osContext = engine.osContext;
CommandStreamReceiver *csr = engine.commandStreamReceiver;
uint32_t activeHwContexts = csr->getActivePartitions();
auto osContextId = osContext->getContextId();
auto allocationTaskCount = csr->getCompletionValue(*allocation);
uint64_t completionFenceAddress = csr->getCompletionAddress();
@@ -1578,17 +1577,7 @@ void DrmMemoryManager::waitOnCompletionFence(GraphicsAllocation *allocation) {
if (allocation->isUsedByOsContext(osContextId)) {
Drm &drm = getDrm(csr->getRootDeviceIndex());
auto &ctxVector = static_cast<const OsContextLinux *>(osContext)->getDrmContextIds();
for (uint32_t i = 0; i < activeHwContexts; i++) {
uint32_t *fenceValue = reinterpret_cast<uint32_t *>(completionFenceAddress);
if (*fenceValue < allocationTaskCount) {
constexpr int64_t timeout = -1;
constexpr uint16_t flags = 0;
drm.waitUserFence(ctxVector[i], completionFenceAddress, allocationTaskCount, Drm::ValueWidth::U32, timeout, flags);
}
completionFenceAddress += csr->getPostSyncWriteOffset();
}
drm.waitOnUserFences(static_cast<const OsContextLinux &>(*osContext), completionFenceAddress, allocationTaskCount, csr->getActivePartitions(), csr->getPostSyncWriteOffset());
}
}
} else {

View File

@@ -1621,4 +1621,18 @@ PhyicalDevicePciSpeedInfo Drm::getPciSpeedInfo() const {
return pciSpeedInfo;
}
void Drm::waitOnUserFences(const OsContextLinux &osContext, uint64_t address, uint64_t value, uint32_t numActiveTiles, uint32_t postSyncOffset) {
auto &drmContextIds = osContext.getDrmContextIds();
UNRECOVERABLE_IF(numActiveTiles > drmContextIds.size());
auto completionFenceCpuAddress = address;
for (auto drmIterator = 0u; drmIterator < numActiveTiles; drmIterator++) {
if (*reinterpret_cast<uint32_t *>(completionFenceCpuAddress) < value) {
constexpr int64_t timeout = -1;
constexpr uint16_t flags = 0;
waitUserFence(drmContextIds[drmIterator], completionFenceCpuAddress, value, Drm::ValueWidth::U32, timeout, flags);
}
completionFenceCpuAddress = ptrOffset(completionFenceCpuAddress, postSyncOffset);
}
}
} // namespace NEO

View File

@@ -44,6 +44,7 @@ enum class AllocationType;
class BufferObject;
class DeviceFactory;
class OsContext;
class OsContextLinux;
class Gmm;
struct HardwareInfo;
struct RootDeviceEnvironment;
@@ -230,6 +231,8 @@ class Drm : public DriverModel {
};
MOCKABLE_VIRTUAL int waitUserFence(uint32_t ctxId, uint64_t address, uint64_t value, ValueWidth dataWidth, int64_t timeout, uint16_t flags);
void waitOnUserFences(const OsContextLinux &osContext, uint64_t address, uint64_t value, uint32_t numActiveTiles, uint32_t postSyncOffset);
void setNewResourceBoundToVM(uint32_t vmHandleId);
const std::vector<int> &getSliceMappings(uint32_t deviceIndex);