feature: add peer access check on driver init

Related-To: NEO-14885, HSD-14024947073

Signed-off-by: Alicja Lukaszewicz <alicja.lukaszewicz@intel.com>
This commit is contained in:
Alicja Lukaszewicz
2025-08-27 13:54:40 +00:00
committed by Compute-Runtime-Automation
parent 2e58669fe9
commit bca503548a
54 changed files with 486 additions and 86 deletions

View File

@@ -1126,6 +1126,10 @@ bool CommandStreamReceiver::isAubMode() const {
return (getType() == NEO::CommandStreamReceiverType::aub || getType() == NEO::CommandStreamReceiverType::tbxWithAub || getType() == NEO::CommandStreamReceiverType::hardwareWithAub || getType() == NEO::CommandStreamReceiverType::nullAub);
}
bool CommandStreamReceiver::isHardwareMode() const {
return (getType() == NEO::CommandStreamReceiverType::hardware);
}
TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) {
switch (status) {
case SubmissionStatus::outOfHostMemory:

View File

@@ -501,6 +501,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
bool isTbxMode() const;
bool isAubMode() const;
bool isHardwareMode() const;
bool ensureTagAllocationForRootDeviceIndex(uint32_t rootDeviceIndex);
L1CachePolicy *getStoredL1CachePolicy() {

View File

@@ -468,7 +468,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTaskHeapful(
handlePipelineSelectStateTransition(dispatchFlags);
this->streamProperties.stateComputeMode.setPropertiesAll(false, dispatchFlags.numGrfRequired,
dispatchFlags.threadArbitrationPolicy, device.getPreemptionMode());
dispatchFlags.threadArbitrationPolicy, device.getPreemptionMode(), device.hasAnyPeerAccess());
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;

View File

@@ -7,6 +7,8 @@
#include "shared/source/command_stream/stream_property.h"
#include <optional>
namespace NEO {
enum PreemptionMode : uint32_t;
struct HardwareInfo;
@@ -37,8 +39,8 @@ struct StateComputeModeProperties {
void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment);
void resetState();
void setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode);
void setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState);
void setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode, std::optional<bool> hasPeerAccess);
void setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState, std::optional<bool> hasPeerAccess);
void setPropertiesGrfNumberThreadArbitration(uint32_t numGrfRequired, int32_t threadArbitrationPolicy);
void copyPropertiesAll(const StateComputeModeProperties &properties);
@@ -55,7 +57,7 @@ struct StateComputeModeProperties {
bool isDirtyExtra() const;
void resetStateExtra();
void setPropertiesExtraPerContext();
void setPropertiesExtraPerContext(std::optional<bool> hasPeerAccess);
void copyPropertiesExtra(const StateComputeModeProperties &properties);

View File

@@ -16,7 +16,7 @@
using namespace NEO;
void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode) {
void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode, std::optional<bool> hasPeerAccess) {
DEBUG_BREAK_IF(!this->propertiesSupportLoaded);
clearIsDirty();
@@ -47,7 +47,7 @@ void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32
this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(memoryAllocationForScratchAndMidthreadPreemptionBuffers);
}
setPropertiesPerContext(requiresCoherency, devicePreemptionMode, false);
setPropertiesPerContext(requiresCoherency, devicePreemptionMode, false, hasPeerAccess);
}
void StateComputeModeProperties::copyPropertiesAll(const StateComputeModeProperties &properties) {
@@ -166,7 +166,7 @@ void StateComputeModeProperties::resetState() {
resetStateExtra();
}
void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState) {
void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState, std::optional<bool> hasPeerAccess) {
DEBUG_BREAK_IF(!this->propertiesSupportLoaded);
if (!clearDirtyState) {
@@ -183,7 +183,7 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
setPipelinedEuThreadArbitration();
}
setPropertiesExtraPerContext();
setPropertiesExtraPerContext(hasPeerAccess);
if (clearDirtyState) {
clearIsDirtyPerContext();
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
using namespace NEO;
void StateComputeModeProperties::setPropertiesExtraPerContext() {
void StateComputeModeProperties::setPropertiesExtraPerContext(std::optional<bool> hasPeerAccess) {
}
void StateComputeModeProperties::copyPropertiesExtra(const StateComputeModeProperties &properties) {

View File

@@ -1363,18 +1363,42 @@ bool Device::canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevi
return retVal;
}
auto setPeerAccess = [&](bool value) {
this->crossAccessEnabledDevices[peerRootDeviceIndex] = value;
peerDevice->crossAccessEnabledDevices[rootDeviceIndex] = value;
};
auto lock = executionEnvironment->obtainPeerAccessQueryLock();
if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) == this->crossAccessEnabledDevices.end()) {
retVal = queryPeerAccess(*this, *peerDevice, canAccess);
setPeerAccess(canAccess);
this->updatePeerAccessCache(peerDevice, canAccess);
}
canAccess = this->crossAccessEnabledDevices[peerRootDeviceIndex];
return retVal;
}
void Device::initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices) {
for (auto &device : devices) {
if (device->getReleaseHelper() && device->getReleaseHelper()->shouldQueryPeerAccess()) {
device->hasPeerAccess = false;
auto rootDeviceIndex = device->getRootDeviceIndex();
for (auto &peerDevice : devices) {
auto peerRootDeviceIndex = peerDevice->getRootDeviceIndex();
if (rootDeviceIndex == peerRootDeviceIndex) {
continue;
}
bool canAccess = false;
if (device->crossAccessEnabledDevices.find(peerRootDeviceIndex) == device->crossAccessEnabledDevices.end()) {
auto lock = device->getExecutionEnvironment()->obtainPeerAccessQueryLock();
queryPeerAccess(*device, *peerDevice, canAccess);
device->updatePeerAccessCache(peerDevice, canAccess);
} else {
canAccess = device->crossAccessEnabledDevices[peerRootDeviceIndex];
}
if (canAccess) {
device->hasPeerAccess = true;
}
}
}
}
}
} // namespace NEO

View File

@@ -264,6 +264,16 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
std::unordered_map<uint32_t, bool> crossAccessEnabledDevices;
bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess);
static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices);
std::optional<bool> hasAnyPeerAccess() const {
return hasPeerAccess;
}
void updatePeerAccessCache(Device *peerDevice, bool value) {
this->crossAccessEnabledDevices[peerDevice->getRootDeviceIndex()] = value;
peerDevice->crossAccessEnabledDevices[this->getRootDeviceIndex()] = value;
}
protected:
Device() = delete;
@@ -348,6 +358,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
uint32_t maxBufferPoolCount = 0u;
uint32_t microsecondResolution = 1000u;
std::optional<bool> hasPeerAccess = std::nullopt;
struct {
bool isValid = false;
std::array<uint8_t, ProductHelper::uuidSize> id;

View File

@@ -68,6 +68,7 @@ class ReleaseHelper {
virtual bool isBlitImageAllowedForDepthFormat() const = 0;
virtual bool isPostImageWriteFlushRequired() const = 0;
virtual uint32_t adjustMaxThreadsPerEuCount(uint32_t maxThreadsPerEuCount, uint32_t grfCount) const = 0;
virtual bool shouldQueryPeerAccess() const = 0;
protected:
ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {}
@@ -115,6 +116,7 @@ class ReleaseHelperHw : public ReleaseHelper {
bool isBlitImageAllowedForDepthFormat() const override;
bool isPostImageWriteFlushRequired() const override;
uint32_t adjustMaxThreadsPerEuCount(uint32_t maxThreadsPerEuCount, uint32_t grfCount) const override;
bool shouldQueryPeerAccess() const override;
protected:
ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {}

View File

@@ -186,4 +186,8 @@ uint32_t ReleaseHelperHw<releaseType>::adjustMaxThreadsPerEuCount(uint32_t maxTh
return maxThreadsPerEuCount;
}
template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::shouldQueryPeerAccess() const {
return false;
}
} // namespace NEO