diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 08da25aac0..49ab9ae6dd 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3875,7 +3875,7 @@ void CommandListCoreFamily::updateStreamPropertiesForRegularComma finalStreamState = requiredStreamState; } else { finalStreamState = requiredStreamState; - requiredStreamState.stateComputeMode.setPropertiesAll(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode()); + requiredStreamState.stateComputeMode.setPropertiesAll(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), device->getNEODevice()->hasAnyPeerAccess()); } containsAnyKernel = true; } @@ -3914,7 +3914,7 @@ void CommandListCoreFamily::updateStreamPropertiesForRegularComma if (this->stateComputeModeTracking) { finalStreamState.stateComputeMode.setPropertiesGrfNumberThreadArbitration(kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy); } else { - finalStreamState.stateComputeMode.setPropertiesAll(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode()); + finalStreamState.stateComputeMode.setPropertiesAll(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), device->getNEODevice()->hasAnyPeerAccess()); } if (finalStreamState.stateComputeMode.isDirty()) { bool isRcs = (this->engineGroupType == NEO::EngineGroupType::renderCompute); diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 9eb6591edd..13fa6a9739 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -29,6 +29,8 @@ #include "log_manager.h" #include "neo_igfxfmid.h" +#include + namespace L0 { CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) { @@ -319,7 +321,7 @@ void CommandListImp::enableCopyOperationOffload() { void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties) { if (this->stateComputeModeTracking) { - streamProperties.stateComputeMode.setPropertiesPerContext(cmdListDefaultCoherency, this->commandListPreemptionMode, true); + streamProperties.stateComputeMode.setPropertiesPerContext(cmdListDefaultCoherency, this->commandListPreemptionMode, true, std::nullopt); } streamProperties.frontEndState.setPropertiesDisableOverdispatch(cmdListDefaultDisableOverdispatch, true); diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index f07e746b3f..0ec0c8d4d5 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -189,6 +189,12 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t * bool DeviceImp::queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, bool &canAccess) { ze_result_t retVal = ZE_RESULT_SUCCESS; + auto csr = device.getInternalEngine().commandStreamReceiver; + if (!csr->isHardwareMode()) { + canAccess = false; + return false; + } + auto deviceImp = device.getSpecializedDevice(); auto peerDeviceImp = peerDevice.getSpecializedDevice(); diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index 4e4bf8ccc3..245780fbe8 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -182,9 +182,9 @@ struct DeviceImp : public Device, NEO::NonCopyableAndNonMovableClass { uint32_t getCopyEngineOrdinal() const; std::optional tryGetCopyEngineOrdinal() const; void bcsSplitReleaseResources() override; + static bool queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, bool &canAccess); protected: - static bool queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, bool &canAccess); bool submitCopyForP2P(DeviceImp *hPeerDevice, ze_result_t &result); ze_result_t getGlobalTimestampsUsingSubmission(uint64_t *hostTimestamp, uint64_t *deviceTimestamp); ze_result_t getGlobalTimestampsUsingOsInterface(uint64_t *hostTimestamp, uint64_t *deviceTimestamp); diff --git a/level_zero/core/source/driver/driver.cpp b/level_zero/core/source/driver/driver.cpp index 5fe70eb478..febf5f06a3 100644 --- a/level_zero/core/source/driver/driver.cpp +++ b/level_zero/core/source/driver/driver.cpp @@ -89,6 +89,15 @@ void DriverImp::initialize(ze_result_t *result) { auto driverHandle = DriverHandle::create(std::move(devices), envVariables, result); if (driverHandle) { globalDriverHandles->push_back(driverHandle); + + auto &devicesToExpose = static_cast(driverHandle)->devicesToExpose; + std::vector neoDeviceToExpose; + neoDeviceToExpose.reserve(devicesToExpose.size()); + for (auto deviceToExpose : devicesToExpose) { + neoDeviceToExpose.push_back(Device::fromHandle(deviceToExpose)->getNEODevice()); + } + + NEO::Device::initializePeerAccessForDevices(DeviceImp::queryPeerAccess, neoDeviceToExpose); } } diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index 9e4c1eaa3c..8d5cbbcc71 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -3547,6 +3547,59 @@ TEST_F(MultipleDevicesTest, givenCanAccessPeerFailsThenReturnsDeviceLost) { delete device0; } +HWTEST_F(MultipleDevicesTest, givenCsrModeDifferentThanHardwareWhenQueryPeerAccessThenReturnsFalse) { + struct MockDeviceFail : public MockDeviceImp { + MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) { + this->driverHandle = device->getDriverHandle(); + this->neoDevice->template setSpecializedDevice(this); + } + + ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, + ze_command_queue_handle_t *commandQueue) override { + *commandQueue = &this->commandQueue; + return ZE_RESULT_SUCCESS; + } + + ze_result_t createCommandList(const ze_command_list_desc_t *desc, + ze_command_list_handle_t *commandList) override { + *commandList = &this->commandList; + return ZE_RESULT_SUCCESS; + } + ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc, + ze_command_queue_handle_t *commandQueue) override { + *commandQueue = &this->commandQueue; + return ZE_RESULT_SUCCESS; + } + + ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc, + ze_command_list_handle_t *commandList) override { + *commandList = &this->commandList; + return ZE_RESULT_SUCCESS; + } + + MockCommandList commandList; + Mock commandQueue; + }; + + MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]); + L0::Device *device1 = driverHandle->devices[1]; + + auto deviceInternalEngine = device0->getNEODevice()->getInternalEngine(); + auto hwCsr = static_cast *>(deviceInternalEngine.commandStreamReceiver); + auto ultCsr = static_cast *>(hwCsr); + ultCsr->commandStreamReceiverType = CommandStreamReceiverType::tbx; + + bool canAccess = false; + bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess); + EXPECT_FALSE(res); + EXPECT_FALSE(canAccess); + delete device0; +} + TEST_F(MultipleDevicesTest, givenDeviceFailsExecuteCommandListThenQueryPeerAccessReturnsFalse) { struct MockDeviceFail : public MockDeviceImp { struct MockCommandQueueImp : public Mock { diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index ac1986ccd2..f8af32d4de 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1033,7 +1033,7 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp for (size_t i = 0; i < numberOfNodes; i++) { auto newTag = allocator->getTag(); - if (csr.getType() != CommandStreamReceiverType::hardware) { + if (!csr.isHardwareMode()) { auto tagAlloc = newTag->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex()); // initialize full page tables for the first time diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 82f51944ca..87838efd85 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -912,7 +912,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(false, flushTaskFlags.numGrfRequired, - flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled); + flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled, false); flushTask(commandStreamReceiver); EXPECT_GE(sizeNeeded, csrCS.getUsed()); @@ -952,7 +952,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(false, flushTaskFlags.numGrfRequired, - flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled); + flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled, false); flushTask(commandStreamReceiver); EXPECT_GE(sizeNeeded, csrCS.getUsed()); @@ -992,7 +992,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(mockDevice->getHardwareInfo()); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(false, flushTaskFlags.numGrfRequired, - flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled); + flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled, false); commandStreamReceiver.flushTask( commandStream, 0, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index c64bca6444..279e51c090 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -1075,7 +1075,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, GivenPreambl expectedUsed = alignUp(expectedUsed, MemoryConstants::cacheLineSize); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(false, flushTaskFlags.numGrfRequired, - flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled); + flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled, false); commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, flushTaskFlags, *pDevice); // Verify that we didn't grab a new CS buffer diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp index c1a2d2e2b6..3f3097a429 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp @@ -96,7 +96,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskDg2AndLaterTests, givenProgramExtendedPi dispatchFlags.threadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy(); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(false, dispatchFlags.numGrfRequired, - dispatchFlags.threadArbitrationPolicy, PreemptionMode::Disabled); + dispatchFlags.threadArbitrationPolicy, PreemptionMode::Disabled, false); auto cmdSizeForAllCommands = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.flushTask(commandStream, 0, diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index ead4c45c71..34c01bf3e2 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -167,7 +167,7 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.streamProperties.pipelineSelect.setPropertiesAll(true, false); commandStreamReceiver.streamProperties.stateComputeMode.setPropertiesAll(0, GrfConfig::defaultGrfNumber, - gfxCoreHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode()); + gfxCoreHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), false); commandStreamReceiver.streamProperties.frontEndState.setPropertiesAll(false, false, false); } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 91c6cb6237..93c34ecad3 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -1126,6 +1126,10 @@ bool CommandStreamReceiver::isAubMode() const { return (getType() == NEO::CommandStreamReceiverType::aub || getType() == NEO::CommandStreamReceiverType::tbxWithAub || getType() == NEO::CommandStreamReceiverType::hardwareWithAub || getType() == NEO::CommandStreamReceiverType::nullAub); } +bool CommandStreamReceiver::isHardwareMode() const { + return (getType() == NEO::CommandStreamReceiverType::hardware); +} + TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) { switch (status) { case SubmissionStatus::outOfHostMemory: diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index ad3ef8c479..47c8f21627 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -501,6 +501,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { bool isTbxMode() const; bool isAubMode() const; + bool isHardwareMode() const; bool ensureTagAllocationForRootDeviceIndex(uint32_t rootDeviceIndex); L1CachePolicy *getStoredL1CachePolicy() { diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index e5186d38a0..ac11dea606 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -468,7 +468,7 @@ CompletionStamp CommandStreamReceiverHw::flushTaskHeapful( handlePipelineSelectStateTransition(dispatchFlags); this->streamProperties.stateComputeMode.setPropertiesAll(false, dispatchFlags.numGrfRequired, - dispatchFlags.threadArbitrationPolicy, device.getPreemptionMode()); + dispatchFlags.threadArbitrationPolicy, device.getPreemptionMode(), device.hasAnyPeerAccess()); csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl index 320b0e8540..1d4fe3ccfd 100644 --- a/shared/source/command_stream/definitions/stream_properties.inl +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -7,6 +7,8 @@ #include "shared/source/command_stream/stream_property.h" +#include + namespace NEO { enum PreemptionMode : uint32_t; struct HardwareInfo; @@ -37,8 +39,8 @@ struct StateComputeModeProperties { void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment); void resetState(); - void setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode); - void setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState); + void setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode, std::optional hasPeerAccess); + void setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState, std::optional hasPeerAccess); void setPropertiesGrfNumberThreadArbitration(uint32_t numGrfRequired, int32_t threadArbitrationPolicy); void copyPropertiesAll(const StateComputeModeProperties &properties); @@ -55,7 +57,7 @@ struct StateComputeModeProperties { bool isDirtyExtra() const; void resetStateExtra(); - void setPropertiesExtraPerContext(); + void setPropertiesExtraPerContext(std::optional hasPeerAccess); void copyPropertiesExtra(const StateComputeModeProperties &properties); diff --git a/shared/source/command_stream/stream_properties.cpp b/shared/source/command_stream/stream_properties.cpp index 1baa6e5442..e239b40fbb 100644 --- a/shared/source/command_stream/stream_properties.cpp +++ b/shared/source/command_stream/stream_properties.cpp @@ -16,7 +16,7 @@ using namespace NEO; -void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode) { +void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode, std::optional hasPeerAccess) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); clearIsDirty(); @@ -47,7 +47,7 @@ void StateComputeModeProperties::setPropertiesAll(bool requiresCoherency, uint32 this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(memoryAllocationForScratchAndMidthreadPreemptionBuffers); } - setPropertiesPerContext(requiresCoherency, devicePreemptionMode, false); + setPropertiesPerContext(requiresCoherency, devicePreemptionMode, false, hasPeerAccess); } void StateComputeModeProperties::copyPropertiesAll(const StateComputeModeProperties &properties) { @@ -166,7 +166,7 @@ void StateComputeModeProperties::resetState() { resetStateExtra(); } -void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState) { +void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState, std::optional hasPeerAccess) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); if (!clearDirtyState) { @@ -183,7 +183,7 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency, setPipelinedEuThreadArbitration(); } - setPropertiesExtraPerContext(); + setPropertiesExtraPerContext(hasPeerAccess); if (clearDirtyState) { clearIsDirtyPerContext(); } diff --git a/shared/source/command_stream/stream_properties_extra.cpp b/shared/source/command_stream/stream_properties_extra.cpp index 7f3b80b40b..981113f6ad 100644 --- a/shared/source/command_stream/stream_properties_extra.cpp +++ b/shared/source/command_stream/stream_properties_extra.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ using namespace NEO; -void StateComputeModeProperties::setPropertiesExtraPerContext() { +void StateComputeModeProperties::setPropertiesExtraPerContext(std::optional hasPeerAccess) { } void StateComputeModeProperties::copyPropertiesExtra(const StateComputeModeProperties &properties) { diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index cb61520917..771bea6c99 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1363,18 +1363,42 @@ bool Device::canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevi return retVal; } - auto setPeerAccess = [&](bool value) { - this->crossAccessEnabledDevices[peerRootDeviceIndex] = value; - peerDevice->crossAccessEnabledDevices[rootDeviceIndex] = value; - }; - auto lock = executionEnvironment->obtainPeerAccessQueryLock(); if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) == this->crossAccessEnabledDevices.end()) { retVal = queryPeerAccess(*this, *peerDevice, canAccess); - setPeerAccess(canAccess); + this->updatePeerAccessCache(peerDevice, canAccess); } canAccess = this->crossAccessEnabledDevices[peerRootDeviceIndex]; return retVal; } + +void Device::initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector &devices) { + for (auto &device : devices) { + if (device->getReleaseHelper() && device->getReleaseHelper()->shouldQueryPeerAccess()) { + device->hasPeerAccess = false; + auto rootDeviceIndex = device->getRootDeviceIndex(); + + for (auto &peerDevice : devices) { + auto peerRootDeviceIndex = peerDevice->getRootDeviceIndex(); + if (rootDeviceIndex == peerRootDeviceIndex) { + continue; + } + + bool canAccess = false; + if (device->crossAccessEnabledDevices.find(peerRootDeviceIndex) == device->crossAccessEnabledDevices.end()) { + auto lock = device->getExecutionEnvironment()->obtainPeerAccessQueryLock(); + queryPeerAccess(*device, *peerDevice, canAccess); + device->updatePeerAccessCache(peerDevice, canAccess); + } else { + canAccess = device->crossAccessEnabledDevices[peerRootDeviceIndex]; + } + + if (canAccess) { + device->hasPeerAccess = true; + } + } + } + } +} } // namespace NEO diff --git a/shared/source/device/device.h b/shared/source/device/device.h index c5e4afa45d..c12246520d 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -264,6 +264,16 @@ class Device : public ReferenceTrackedObject, NEO::NonCopyableAndNonMova std::unordered_map crossAccessEnabledDevices; bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess); + static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector &devices); + + std::optional hasAnyPeerAccess() const { + return hasPeerAccess; + } + + void updatePeerAccessCache(Device *peerDevice, bool value) { + this->crossAccessEnabledDevices[peerDevice->getRootDeviceIndex()] = value; + peerDevice->crossAccessEnabledDevices[this->getRootDeviceIndex()] = value; + } protected: Device() = delete; @@ -348,6 +358,8 @@ class Device : public ReferenceTrackedObject, NEO::NonCopyableAndNonMova uint32_t maxBufferPoolCount = 0u; uint32_t microsecondResolution = 1000u; + std::optional hasPeerAccess = std::nullopt; + struct { bool isValid = false; std::array id; diff --git a/shared/source/release_helper/release_helper.h b/shared/source/release_helper/release_helper.h index 336ae52e9a..6ddf1934bb 100644 --- a/shared/source/release_helper/release_helper.h +++ b/shared/source/release_helper/release_helper.h @@ -68,6 +68,7 @@ class ReleaseHelper { virtual bool isBlitImageAllowedForDepthFormat() const = 0; virtual bool isPostImageWriteFlushRequired() const = 0; virtual uint32_t adjustMaxThreadsPerEuCount(uint32_t maxThreadsPerEuCount, uint32_t grfCount) const = 0; + virtual bool shouldQueryPeerAccess() const = 0; protected: ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {} @@ -115,6 +116,7 @@ class ReleaseHelperHw : public ReleaseHelper { bool isBlitImageAllowedForDepthFormat() const override; bool isPostImageWriteFlushRequired() const override; uint32_t adjustMaxThreadsPerEuCount(uint32_t maxThreadsPerEuCount, uint32_t grfCount) const override; + bool shouldQueryPeerAccess() const override; protected: ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {} diff --git a/shared/source/release_helper/release_helper_base.inl b/shared/source/release_helper/release_helper_base.inl index 8d5cdf99c2..f6393beb2b 100644 --- a/shared/source/release_helper/release_helper_base.inl +++ b/shared/source/release_helper/release_helper_base.inl @@ -186,4 +186,8 @@ uint32_t ReleaseHelperHw::adjustMaxThreadsPerEuCount(uint32_t maxTh return maxThreadsPerEuCount; } +template +bool ReleaseHelperHw::shouldQueryPeerAccess() const { + return false; +} } // namespace NEO diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 835fa4d0d9..d917c36d46 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -69,6 +69,7 @@ class MockDevice : public RootDevice { using Device::generateUuidFromPciBusInfo; using Device::getGlobalMemorySize; using Device::initializeCaps; + using Device::initializePeerAccessForDevices; using Device::initUsmReuseLimits; using Device::maxBufferPoolCount; using Device::microsecondResolution; diff --git a/shared/test/common/mocks/mock_release_helper.h b/shared/test/common/mocks/mock_release_helper.h index 952f7a863d..64d3860df7 100644 --- a/shared/test/common/mocks/mock_release_helper.h +++ b/shared/test/common/mocks/mock_release_helper.h @@ -46,6 +46,7 @@ class MockReleaseHelper : public ReleaseHelper { ADDMETHOD_CONST_NOBASE(isPostImageWriteFlushRequired, bool, false, ()); ADDMETHOD_CONST_NOBASE(adjustMaxThreadsPerEuCount, uint32_t, 8u, (uint32_t maxThreadsPerEuCount, uint32_t grfCount)); ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels)); + ADDMETHOD_CONST_NOBASE(shouldQueryPeerAccess, bool, false, ()); const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override { static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {}; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a9a5f6241e..1d55b9a95f 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3972,7 +3972,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty()); - this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup); + this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup, false); commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice); @@ -4017,7 +4017,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty()); - this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup); + this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup, false); immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::nonKernel; commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice); @@ -6471,3 +6471,22 @@ HWTEST_F(CommandStreamReceiverHwTest, GivenWaitOnWalkerPostSyncWhenImmediateFlus EXPECT_TRUE(commandStreamReceiver.isWalkerWithProfilingEnqueued); } } + +HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingHardwareModeThenExpectOnlyWhenModeIsHarware) { + auto &ultCsr = pDevice->getUltCommandStreamReceiver(); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::hardware; + EXPECT_TRUE(ultCsr.isHardwareMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::hardwareWithAub; + EXPECT_FALSE(ultCsr.isHardwareMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::aub; + EXPECT_FALSE(ultCsr.isHardwareMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::tbx; + EXPECT_FALSE(ultCsr.isHardwareMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::tbxWithAub; + EXPECT_FALSE(ultCsr.isHardwareMode()); +} diff --git a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp index 46cafbd6f4..1fe53b3d07 100644 --- a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp +++ b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp @@ -136,7 +136,7 @@ HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrec for (auto requiresCoherency : ::testing::Bool()) { for (auto largeGrf : ::testing::Bool()) { for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) { - properties.stateComputeMode.setPropertiesAll(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy, preemptionMode); + properties.stateComputeMode.setPropertiesAll(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy, preemptionMode, false); if constexpr (TestTraits::largeGrfModeInStateComputeModeSupported) { EXPECT_EQ(largeGrf, properties.stateComputeMode.largeGrfMode.value); } else { @@ -163,7 +163,7 @@ HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrec for (auto forceZPassAsyncComputeThreadLimit : ::testing::Bool()) { debugManager.flags.ForceZPassAsyncComputeThreadLimit.set(forceZPassAsyncComputeThreadLimit); - properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch); + properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch, false); if (scmPropertiesSupport.zPassAsyncComputeThreadLimit) { EXPECT_EQ(forceZPassAsyncComputeThreadLimit, properties.stateComputeMode.zPassAsyncComputeThreadLimit.value); } else { @@ -173,7 +173,7 @@ HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrec for (auto forcePixelAsyncComputeThreadLimit : ::testing::Bool()) { debugManager.flags.ForcePixelAsyncComputeThreadLimit.set(forcePixelAsyncComputeThreadLimit); - properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch); + properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch, false); if (scmPropertiesSupport.pixelAsyncComputeThreadLimit) { EXPECT_EQ(forcePixelAsyncComputeThreadLimit, properties.stateComputeMode.pixelAsyncComputeThreadLimit.value); } else { @@ -183,7 +183,7 @@ HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrec for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) { debugManager.flags.OverrideThreadArbitrationPolicy.set(threadArbitrationPolicy); - properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch); + properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch, false); if (scmPropertiesSupport.threadArbitrationPolicy) { EXPECT_EQ(threadArbitrationPolicy, properties.stateComputeMode.threadArbitrationPolicy.value); } else { @@ -193,7 +193,7 @@ HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrec for (auto forceScratchAndMTPBufferSizeMode : ::testing::Bool()) { debugManager.flags.ForceScratchAndMTPBufferSizeMode.set(forceScratchAndMTPBufferSizeMode); - properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch); + properties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::MidBatch, false); if (scmPropertiesSupport.allocationForScratchAndMidthreadPreemption) { EXPECT_EQ(forceScratchAndMTPBufferSizeMode, properties.stateComputeMode.memoryAllocationForScratchAndMidthreadPreemptionBuffers.value); } else { @@ -301,36 +301,36 @@ TEST(StreamPropertiesTests, givenVariousDevicePreemptionComputeModesWhenSettingP bool coherencyRequired = false; PreemptionMode devicePreemptionMode = PreemptionMode::Disabled; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(-1, scmProperties.devicePreemptionMode.value); scmProperties.scmPropertiesSupport.devicePreemptionMode = true; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); devicePreemptionMode = PreemptionMode::Initial; - scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode); + scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); devicePreemptionMode = PreemptionMode::MidThread; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); clearDirtyState = true; devicePreemptionMode = PreemptionMode::ThreadGroup; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); } @@ -343,35 +343,35 @@ TEST(StreamPropertiesTests, givenVariousCoherencyRequirementsWhenSettingProperty bool coherencyRequired = false; PreemptionMode devicePreemptionMode = PreemptionMode::Disabled; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(-1, scmProperties.isCoherencyRequired.value); scmProperties.scmPropertiesSupport.coherencyRequired = true; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.isCoherencyRequired.value); - scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode); + scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.isCoherencyRequired.value); - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.isCoherencyRequired.value); coherencyRequired = true; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(1, scmProperties.isCoherencyRequired.value); - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(1, scmProperties.isCoherencyRequired.value); clearDirtyState = true; coherencyRequired = false; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.isCoherencyRequired.value); } @@ -385,20 +385,20 @@ TEST(StreamPropertiesTests, givenVariableRegisterSizeAllocationSettingWhenSettin scmProperties.propertiesSupportLoaded = true; scmProperties.scmPropertiesSupport.enableVariableRegisterSizeAllocation = false; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(-1, scmProperties.enableVariableRegisterSizeAllocation.value); scmProperties.scmPropertiesSupport.enableVariableRegisterSizeAllocation = true; - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(1, scmProperties.enableVariableRegisterSizeAllocation.value); - scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode); + scmProperties.setPropertiesAll(coherencyRequired, -1, -1, devicePreemptionMode, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(1, scmProperties.enableVariableRegisterSizeAllocation.value); - scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState); + scmProperties.setPropertiesPerContext(coherencyRequired, devicePreemptionMode, clearDirtyState, false); EXPECT_FALSE(scmProperties.isDirty()); EXPECT_EQ(1, scmProperties.enableVariableRegisterSizeAllocation.value); } @@ -418,7 +418,7 @@ TEST(StreamPropertiesTests, givenGrfNumberAndThreadArbitrationStateComputeModePr scmProperties.scmPropertiesSupport.largeGrfMode = true; scmProperties.scmPropertiesSupport.threadArbitrationPolicy = true; - scmProperties.setPropertiesAll(false, static_cast(grfNumber), threadArbitration, PreemptionMode::Initial); + scmProperties.setPropertiesAll(false, static_cast(grfNumber), threadArbitration, PreemptionMode::Initial, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.largeGrfMode.value); EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value); @@ -462,7 +462,7 @@ TEST(StreamPropertiesTests, givenSetAllStateComputeModePropertiesWhenResettingSt int32_t threadArbitration = 1; PreemptionMode devicePreemptionMode = PreemptionMode::Initial; bool coherency = false; - scmProperties.setPropertiesAll(coherency, static_cast(grfNumber), threadArbitration, devicePreemptionMode); + scmProperties.setPropertiesAll(coherency, static_cast(grfNumber), threadArbitration, devicePreemptionMode, false); EXPECT_TRUE(scmProperties.isDirty()); EXPECT_EQ(0, scmProperties.largeGrfMode.value); EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value); @@ -1446,7 +1446,7 @@ TEST(StreamPropertiesTests, givenAllStreamPropertiesSetWhenAllStreamPropertiesRe uint32_t grfNumber = 128; int32_t threadArbitration = 1; - globalStreamProperties.stateComputeMode.setPropertiesAll(false, grfNumber, threadArbitration, PreemptionMode::Initial); + globalStreamProperties.stateComputeMode.setPropertiesAll(false, grfNumber, threadArbitration, PreemptionMode::Initial, false); bool isCooperativeKernel = false; bool disableEuFusion = true; diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index a715ae947a..f3d121009e 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -2953,3 +2953,197 @@ TEST(DeviceCanAccessPeerTest, givenTwoSubDevicesFromTheSameRootDeviceThenCanAcce EXPECT_TRUE(res); EXPECT_TRUE(canAccess); } + +TEST(DevicePeerAccessInitializationTest, givenDeviceListWhenInitializePeerAccessThenQueryOnlyRelevantPeers) { + UltDeviceFactory deviceFactory{3, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1], deviceFactory.rootDevices[2]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = false; + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = true; + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + auto releaseHelper2 = std::make_unique(); + releaseHelper2->shouldQueryPeerAccessResult = true; + rootDevices[2]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper2); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = true; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + // Check device[0] with none + // Check device[1] with device[0] and device[2] - 2 calls + // Check device[2] with device[0] and device[1] (cached) - 1 call + uint32_t numberOfCalls = 3; + EXPECT_EQ(numberOfCalls, queryCalled); +} + +TEST(DevicePeerAccessInitializationTest, givenSubDevicesWhenInitializePeerAccessThenSkipPeerAccessQuery) { + UltDeviceFactory deviceFactory{1, 2}; + std::vector subDevices = {deviceFactory.subDevices[0], deviceFactory.subDevices[1]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = true; + subDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = true; + subDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = true; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, subDevices); + + EXPECT_EQ(0u, queryCalled); +} + +TEST(DevicePeerAccessInitializationTest, givenDevicesWithPeerAccessCachedWhenInitializePeerAccessForDevicesThenSkipPeerAccessQuery) { + UltDeviceFactory deviceFactory{2, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = true; + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = true; + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + rootDevices[0]->updatePeerAccessCache(rootDevices[1], true); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = false; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + EXPECT_EQ(0u, queryCalled); +} + +TEST(DevicePeerAccessInitializationTest, givenDevicesWhenInitializePeerAccessForDevicesThenSetsHasAnyPeerAccessAccordingToP2PConnection) { + // Devices have P2P connection + { + UltDeviceFactory deviceFactory{2, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = true; + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = true; + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = true; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + EXPECT_EQ(1u, queryCalled); + ASSERT_TRUE(rootDevices[0]->hasAnyPeerAccess().has_value()); + ASSERT_TRUE(rootDevices[1]->hasAnyPeerAccess().has_value()); + EXPECT_TRUE(rootDevices[0]->hasAnyPeerAccess().value()); + EXPECT_TRUE(rootDevices[1]->hasAnyPeerAccess().value()); + } + + // Devices do not have P2P connection + { + UltDeviceFactory deviceFactory{2, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = true; + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = true; + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = false; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + EXPECT_EQ(1u, queryCalled); + ASSERT_TRUE(rootDevices[0]->hasAnyPeerAccess().has_value()); + ASSERT_TRUE(rootDevices[1]->hasAnyPeerAccess().has_value()); + EXPECT_FALSE(rootDevices[0]->hasAnyPeerAccess().value()); + EXPECT_FALSE(rootDevices[1]->hasAnyPeerAccess().value()); + } +} + +TEST(DevicePeerAccessInitializationTest, givenDevicesThatDontRequirePeerAccessQueryWhenInitializePeerAccessThenDontSetHasPeerAccess) { + UltDeviceFactory deviceFactory{2, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; + + auto releaseHelper0 = std::make_unique(); + releaseHelper0->shouldQueryPeerAccessResult = false; + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0); + + auto releaseHelper1 = std::make_unique(); + releaseHelper1->shouldQueryPeerAccessResult = false; + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = true; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + EXPECT_EQ(0u, queryCalled); + + EXPECT_FALSE(rootDevices[0]->hasAnyPeerAccess().has_value()); + EXPECT_FALSE(rootDevices[1]->hasAnyPeerAccess().has_value()); +} + +TEST(DevicePeerAccessInitializationTest, givenDevicesWithoutReleaseHelperWhenInitializePeerAccessCalledThenDontSetHasPeerAccess) { + UltDeviceFactory deviceFactory{2, 0}; + std::vector rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; + + rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper.reset(); + rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper.reset(); + + ASSERT_EQ(nullptr, rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper); + ASSERT_EQ(nullptr, rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper); + + uint32_t queryCalled = 0; + auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool { + queryCalled++; + canAccess = true; + return true; + }; + + MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices); + + EXPECT_EQ(0u, queryCalled); + + EXPECT_FALSE(rootDevices[0]->hasAnyPeerAccess().has_value()); + EXPECT_FALSE(rootDevices[1]->hasAnyPeerAccess().has_value()); +} diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index 294324bf98..987b29d3bb 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -98,7 +98,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::largeGrfNumber, 0u, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::largeGrfNumber, 0u, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, rootDeviceEnvironment); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 44bdfb11d0..59682c86e3 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -574,11 +574,11 @@ HWTEST2_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNot StreamProperties streamProperties{}; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 128, 0u, PreemptionMode::Disabled); - streamProperties.stateComputeMode.setPropertiesAll(false, 128, 0u, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 128, 0u, PreemptionMode::Disabled, false); + streamProperties.stateComputeMode.setPropertiesAll(false, 128, 0u, PreemptionMode::Disabled, false); EXPECT_FALSE(streamProperties.stateComputeMode.isDirty()); - streamProperties.stateComputeMode.setPropertiesAll(false, 256, 0u, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 256, 0u, PreemptionMode::Disabled, false); if constexpr (TestTraits::largeGrfModeInStateComputeModeSupported) { EXPECT_TRUE(streamProperties.stateComputeMode.isDirty()); } else { diff --git a/shared/test/unit_test/encoders/test_encode_grf_mode_xe_hp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_grf_mode_xe_hp_and_later.cpp index 5fd2d7956d..ac324044f8 100644 --- a/shared/test/unit_test/encoders/test_encode_grf_mode_xe_hp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_grf_mode_xe_hp_and_later.cpp @@ -25,7 +25,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCommandContainerWhenN StreamProperties streamProperties{}; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, 0u, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, 0u, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, rootDeviceEnvironment); GenCmdList commands; diff --git a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp index bd0aeeff89..d5225100e6 100644 --- a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp @@ -41,7 +41,7 @@ GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeM // Adjust the State Compute Mode which sets FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT StreamProperties properties{}; properties.initSupport(rootDeviceEnvironment); - properties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, 0, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, 0, PreemptionMode::Disabled, false); NEO::EncodeComputeMode::programComputeModeCommand(*cmdContainer.getCommandStream(), properties.stateComputeMode, rootDeviceEnvironment); diff --git a/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp index bc72e95afe..14ac64a12b 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp @@ -121,3 +121,7 @@ TEST_F(ReleaseHelper1255Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1255Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1255Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp index f43dfc16c3..8092bf6efa 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp @@ -121,3 +121,7 @@ TEST_F(ReleaseHelper1256Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1256Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1256Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp index 1ba2436ca6..3c62fa29fa 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp @@ -121,3 +121,7 @@ TEST_F(ReleaseHelper1257Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1257Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1257Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp index 27fe954907..f9515af047 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp @@ -105,3 +105,7 @@ TEST_F(ReleaseHelper1260Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1260Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1260Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp index 0ac52dcd3f..43b16f7db8 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp @@ -104,3 +104,7 @@ TEST_F(ReleaseHelper1261Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1261Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1261Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp index 588cf30880..3d291a31ed 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp @@ -115,3 +115,7 @@ TEST_F(ReleaseHelper1270Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1270Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1270Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp index a0cbcfec56..71f77c807f 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp @@ -115,3 +115,7 @@ TEST_F(ReleaseHelper1271Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper1271Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1271Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp index fcf946fc4e..af7c00beb0 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp @@ -121,3 +121,7 @@ TEST_F(ReleaseHelper1274Tests, whenIsBlitImageAllowedForDepthFormatCalledThenFal TEST_F(ReleaseHelper1274Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper1274Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_20_01_tests.cpp b/shared/test/unit_test/release_helper/release_helper_20_01_tests.cpp index 9e588de9ee..5476d8742b 100644 --- a/shared/test/unit_test/release_helper/release_helper_20_01_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_20_01_tests.cpp @@ -114,3 +114,7 @@ TEST_F(ReleaseHelper2001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper2001Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper2001Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_20_02_tests.cpp b/shared/test/unit_test/release_helper/release_helper_20_02_tests.cpp index 7890341453..a62fee16ab 100644 --- a/shared/test/unit_test/release_helper/release_helper_20_02_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_20_02_tests.cpp @@ -114,3 +114,7 @@ TEST_F(ReleaseHelper2002Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper2002Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper2002Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp b/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp index f0844a6dd0..b6766465b3 100644 --- a/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp @@ -117,3 +117,7 @@ TEST_F(ReleaseHelper2004Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper2004Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper2004Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp b/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp index 4d43fe968b..77b19a8907 100644 --- a/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp @@ -115,3 +115,7 @@ TEST_F(ReleaseHelper3000Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper3000Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper3000Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp b/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp index c665e0257f..442b910e71 100644 --- a/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp @@ -115,3 +115,7 @@ TEST_F(ReleaseHelper3001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper3001Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper3001Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_30_03_tests.cpp b/shared/test/unit_test/release_helper/release_helper_30_03_tests.cpp index b184e5fdfb..5106619350 100644 --- a/shared/test/unit_test/release_helper/release_helper_30_03_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_30_03_tests.cpp @@ -115,3 +115,7 @@ TEST_F(ReleaseHelper3003Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorr TEST_F(ReleaseHelper3003Tests, whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned) { whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); } + +TEST_F(ReleaseHelper3003Tests, whenShouldQueryPeerAccessCalledThenFalseReturned) { + whenShouldQueryPeerAccessCalledThenFalseReturned(); +} diff --git a/shared/test/unit_test/release_helper/release_helper_tests_base.cpp b/shared/test/unit_test/release_helper/release_helper_tests_base.cpp index 01aaac9de9..56217b74b6 100644 --- a/shared/test/unit_test/release_helper/release_helper_tests_base.cpp +++ b/shared/test/unit_test/release_helper/release_helper_tests_base.cpp @@ -196,3 +196,12 @@ void ReleaseHelperTestsBase::whenCallingAdjustMaxThreadsPerEuCountThenCorrectVal } } } + +void ReleaseHelperTestsBase::whenShouldQueryPeerAccessCalledThenFalseReturned() { + for (auto &revision : getRevisions()) { + ipVersion.revision = revision; + releaseHelper = ReleaseHelper::create(ipVersion); + ASSERT_NE(nullptr, releaseHelper); + EXPECT_FALSE(releaseHelper->shouldQueryPeerAccess()); + } +} diff --git a/shared/test/unit_test/release_helper/release_helper_tests_base.h b/shared/test/unit_test/release_helper/release_helper_tests_base.h index e679f910b3..a9d5efbad9 100644 --- a/shared/test/unit_test/release_helper/release_helper_tests_base.h +++ b/shared/test/unit_test/release_helper/release_helper_tests_base.h @@ -37,6 +37,7 @@ struct ReleaseHelperTestsBase : public ::testing::Test { void whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); void whenIsPostImageWriteFlushRequiredCalledThenFalseReturned(); void whenCallingAdjustMaxThreadsPerEuCountThenCorrectValueIsReturned(); + void whenShouldQueryPeerAccessCalledThenFalseReturned(); virtual std::vector getRevisions() = 0; std::unique_ptr releaseHelper; diff --git a/shared/test/unit_test/xe2_hpg_core/compute_mode_tests_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/compute_mode_tests_xe2_hpg_core.cpp index 78877a9ff6..3874710833 100644 --- a/shared/test/unit_test/xe2_hpg_core/compute_mode_tests_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/compute_mode_tests_xe2_hpg_core.cpp @@ -185,7 +185,7 @@ XE2_HPG_CORETEST_F(ComputeModeRequirementsXe2HpgCore, giventhreadArbitrationPoli }; getCsrHw()->streamProperties.stateComputeMode.setPropertiesAll(false, flags.numGrfRequired, - flags.threadArbitrationPolicy, PreemptionMode::Disabled); + flags.threadArbitrationPolicy, PreemptionMode::Disabled, false); flushTask(true); findCmd(true); // first time diff --git a/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp index 2eaa0042f4..f309747ac3 100644 --- a/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp @@ -267,25 +267,25 @@ XE2_HPG_CORETEST_F(CommandEncodeXe2HpgCoreTest, whenAdjustComputeModeIsCalledThe properties.initSupport(rootDeviceEnvironment); auto pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::AgeBased, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::AgeBased, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); auto pScm = reinterpret_cast(pLinearStream->getCpuBase()); EXPECT_EQ(EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_OLDEST_FIRST, pScm->getEuThreadSchedulingMode()); pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobin, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobin, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); EXPECT_EQ(EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_ROUND_ROBIN, pScm->getEuThreadSchedulingMode()); pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); EXPECT_EQ(EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_STALL_BASED_ROUND_ROBIN, pScm->getEuThreadSchedulingMode()); pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::NotPresent, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::NotPresent, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); EXPECT_EQ(EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_HW_DEFAULT, pScm->getEuThreadSchedulingMode()); @@ -488,7 +488,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDebugFlagSetWhenAdjustIsCall StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); @@ -504,7 +504,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDebugFlagSetWhenAdjustIsCall StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); @@ -520,7 +520,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDebugFlagSetWhenAdjustIsCall StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); diff --git a/shared/test/unit_test/xe3_core/compute_mode_tests_xe3_core.cpp b/shared/test/unit_test/xe3_core/compute_mode_tests_xe3_core.cpp index fa6accd984..5e44954b81 100644 --- a/shared/test/unit_test/xe3_core/compute_mode_tests_xe3_core.cpp +++ b/shared/test/unit_test/xe3_core/compute_mode_tests_xe3_core.cpp @@ -170,7 +170,7 @@ XE3_CORETEST_F(ComputeModeRequirementsXe3Core, giventhreadArbitrationPolicyWitho }; getCsrHw()->streamProperties.stateComputeMode.setPropertiesAll(false, flags.numGrfRequired, - flags.threadArbitrationPolicy, PreemptionMode::Disabled); + flags.threadArbitrationPolicy, PreemptionMode::Disabled, false); flushTask(true); findCmd(true); // first time diff --git a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp index bf480208dc..e00be946f6 100644 --- a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp +++ b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp @@ -397,7 +397,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCal StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EXPECT_TRUE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled()); } @@ -407,7 +407,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCal StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EXPECT_FALSE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled()); } } @@ -427,7 +427,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagWhenProgrammingStateComput StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); @@ -442,7 +442,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagWhenProgrammingStateComput StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); @@ -457,7 +457,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagWhenProgrammingStateComput StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment); auto &stateComputeModeCmd = *reinterpret_cast(linearStream.getCpuBase()); @@ -558,7 +558,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenCommandContainerWhenNumGrfRequiredI auto &productHelper = rootDeviceEnvironment.getHelper(); StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); - streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::largeGrfNumber, 0u, PreemptionMode::Disabled); + streamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::largeGrfNumber, 0u, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, rootDeviceEnvironment); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp index 7be6ffd203..77450b70cb 100644 --- a/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp @@ -189,7 +189,7 @@ HWTEST2_F(XeHpcComputeModeRequirements, giventhreadArbitrationPolicyWithoutShare }; getCsrHw()->streamProperties.stateComputeMode.setPropertiesAll(false, flags.numGrfRequired, - flags.threadArbitrationPolicy, PreemptionMode::Disabled); + flags.threadArbitrationPolicy, PreemptionMode::Disabled, false); flushTask(true); findCmd(productHelper.isThreadArbitrationPolicyReportedWithScm()); // first time diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 6dce58f632..9a2c7f456d 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -272,7 +272,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; auto &productHelper = rootDeviceEnvironment.getHelper(); properties.initSupport(rootDeviceEnvironment); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::AgeBased, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::AgeBased, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); auto pScm = reinterpret_cast(pLinearStream->getCpuBase()); if (productHelper.isThreadArbitrationPolicyReportedWithScm()) { @@ -282,7 +282,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC } pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobin, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobin, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); if (productHelper.isThreadArbitrationPolicyReportedWithScm()) { @@ -292,7 +292,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC } pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); if (productHelper.isThreadArbitrationPolicyReportedWithScm()) { @@ -302,7 +302,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC } pLinearStream = std::make_unique(buffer, sizeof(buffer)); - properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::NotPresent, PreemptionMode::Disabled); + properties.stateComputeMode.setPropertiesAll(false, 0, ThreadArbitrationPolicy::NotPresent, PreemptionMode::Disabled, false); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, rootDeviceEnvironment); pScm = reinterpret_cast(pLinearStream->getCpuBase()); EXPECT_EQ(EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_HW_DEFAULT, pScm->getEuThreadSchedulingMode()); diff --git a/shared/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp index bc983a6bf0..a24718ab97 100644 --- a/shared/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -68,7 +68,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp pCsr->streamProperties.stateComputeMode = {}; pCsr->streamProperties.stateComputeMode.initSupport(rootDeviceEnvironment); - pCsr->streamProperties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::Disabled); + pCsr->streamProperties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::Disabled, false); LinearStream stream(buff, 1024); pCsr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -90,7 +90,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp pCsr->streamProperties.stateComputeMode = {}; pCsr->streamProperties.stateComputeMode.initSupport(rootDeviceEnvironment); - pCsr->streamProperties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::Disabled); + pCsr->streamProperties.stateComputeMode.setPropertiesAll(false, 0u, 0u, PreemptionMode::Disabled, false); LinearStream stream(buff, 1024); pCsr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp index 5ec4f182fc..032b0bccf5 100644 --- a/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/test_encode_dispatch_kernel_xe_hpg_core.cpp @@ -37,7 +37,7 @@ HWTEST2_F(CommandEncodeStatesTestXeHpgCore, givenVariousValuesWhenCallingSetBarr } } template -struct MockProductHelper : NEO::ProductHelperHw { +struct TempMockProductHelper : NEO::ProductHelperHw { bool isAdjustWalkOrderAvailable(const ReleaseHelper *releaseHelper) const override { return true; } }; @@ -47,7 +47,7 @@ HWTEST2_F(CommandEncodeStatesTestXeHpgCore, givenRequiredWorkGroupOrderAndIsAdju MockExecutionEnvironment executionEnvironment{}; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; - RAIIProductHelperFactory> raii(rootDeviceEnvironment); + RAIIProductHelperFactory> raii(rootDeviceEnvironment); DefaultWalkerType walkerCmd{}; DefaultWalkerType walkerOnStart{};