feature: add support for HP copy engine context

- add support for contect group with HP copy engine
- choose HP copy engine when available

Related-To: NEO-11983

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-07-19 10:20:37 +00:00
committed by Compute-Runtime-Automation
parent 4fc37f9afd
commit b6299b8a21
13 changed files with 510 additions and 46 deletions

View File

@@ -1684,7 +1684,7 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr
auto engineGroupType = getEngineGroupTypeForOrdinal(ordinal);
bool copyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType);
if (secondaryContextsEnabled && priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH) {
if (priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH) {
contextPriority = NEO::EngineUsage::highPriority;
} else if (isSuitableForLowPriority(priority, copyOnly)) {
contextPriority = NEO::EngineUsage::lowPriority;
@@ -1709,6 +1709,10 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr
*csr = this->subDeviceCopyEngineGroups[subDeviceOrdinal].engines[index].commandStreamReceiver;
}
if (copyOnly && contextPriority == NEO::EngineUsage::highPriority) {
getCsrForHighPriority(csr, copyOnly);
}
auto &osContext = (*csr)->getOsContext();
if (secondaryContextsEnabled) {
@@ -1747,6 +1751,18 @@ ze_result_t DeviceImp::getCsrForLowPriority(NEO::CommandStreamReceiver **csr, bo
UNRECOVERABLE_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_result_t DeviceImp::getCsrForHighPriority(NEO::CommandStreamReceiver **csr, bool copyOnly) {
for (auto &it : getActiveDevice()->getAllEngines()) {
bool engineTypeMatch = NEO::EngineHelpers::isBcs(it.osContext->getEngineType()) == copyOnly;
if (it.osContext->isHighPriority() && engineTypeMatch) {
*csr = it.commandStreamReceiver;
return ZE_RESULT_SUCCESS;
}
}
// if the code falls through, we have no high priority context created by neoDevice.
return ZE_RESULT_ERROR_UNKNOWN;
}
bool DeviceImp::isSuitableForLowPriority(ze_command_queue_priority_t priority, bool copyOnly) {
bool engineSuitable = copyOnly ? getGfxCoreHelper().getContextGroupContextsCount() > 0 : !this->implicitScalingCapable;

View File

@@ -117,6 +117,7 @@ struct DeviceImp : public Device, NEO::NonCopyableOrMovableClass {
SysmanDevice *getSysmanHandle() override;
ze_result_t getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index, ze_command_queue_priority_t priority, bool allocateInterrupt) override;
ze_result_t getCsrForLowPriority(NEO::CommandStreamReceiver **csr, bool copyOnly) override;
ze_result_t getCsrForHighPriority(NEO::CommandStreamReceiver **csr, bool copyOnly);
bool isSuitableForLowPriority(ze_command_queue_priority_t priority, bool copyOnly);
NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::AllocationType type) override;
void storeReusableAllocation(NEO::GraphicsAllocation &alloc) override;

View File

@@ -4676,6 +4676,115 @@ HWTEST_F(DeviceTest, givenContextGroupSupportedWhenGettingHighPriorityCsrThenCor
}
}
HWTEST2_F(DeviceTest, givenHpCopyEngineWhenGettingHighPriorityCsrThenCorrectCsrAndContextIsReturned, IsAtLeastXeHpcCore) {
struct MockGfxCoreHelper : public NEO::GfxCoreHelperHw<FamilyType> {
public:
const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto defaultEngine = getChosenEngineType(hwInfo);
EngineInstancesContainer engines;
if (hwInfo.featureTable.flags.ftrRcsNode) {
engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::regular});
} else {
engines.push_back({defaultEngine, EngineUsage::regular});
}
engines.push_back({defaultEngine, EngineUsage::lowPriority});
engines.push_back({defaultEngine, EngineUsage::internal});
if (hwInfo.capabilityTable.blitterOperationsSupported && hwInfo.featureTable.ftrBcsInfo.test(0)) {
engines.push_back({aub_stream::ENGINE_BCS, EngineUsage::regular});
engines.push_back({aub_stream::ENGINE_BCS, EngineUsage::internal}); // internal usage
}
uint32_t hpIndex = 0;
for (uint32_t i = static_cast<uint32_t>(hwInfo.featureTable.ftrBcsInfo.size() - 1); i > 0; i--) {
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
hpIndex = i;
break;
}
}
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
auto engineType = EngineHelpers::getBcsEngineAtIdx(i);
if (hpIndex == i) {
engines.push_back({engineType, EngineUsage::highPriority});
continue;
}
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
engines.push_back({engineType, EngineUsage::regular});
}
}
return engines;
}
aub_stream::EngineType getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const override {
uint32_t hpIndex = 0;
auto bscCount = static_cast<uint32_t>(hwInfo.featureTable.ftrBcsInfo.size());
for (uint32_t i = bscCount - 1; i > 0; i--) {
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
hpIndex = i;
break;
}
}
if (hpIndex == 0) {
return aub_stream::EngineType::NUM_ENGINES;
}
return EngineHelpers::getBcsEngineAtIdx(hpIndex);
}
};
const uint32_t rootDeviceIndex = 0u;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.featureTable.flags.ftrRcsNode = false;
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS;
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
hwInfo.featureTable.ftrBcsInfo = 0b111;
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockExecutionEnvironment mockExecutionEnvironment{&hwInfo};
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*mockExecutionEnvironment.rootDeviceEnvironments[rootDeviceIndex]);
{
MockExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment{&hwInfo};
auto *neoMockDevice = NEO::MockDevice::createWithExecutionEnvironment<NEO::MockDevice>(&hwInfo, executionEnvironment, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
NEO::CommandStreamReceiver *highPriorityCsr = nullptr;
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
uint32_t count = static_cast<uint32_t>(engineGroups.size());
auto ordinal = 0u;
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy || engineGroups[i].engineGroupType == NEO::EngineGroupType::linkedCopy) {
ordinal = i;
uint32_t index = 0;
auto result = deviceImp.getCsrForOrdinalAndIndex(&highPriorityCsr, ordinal, index, ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, highPriorityCsr);
EXPECT_TRUE(highPriorityCsr->getOsContext().getIsPrimaryEngine());
EXPECT_TRUE(highPriorityCsr->getOsContext().isHighPriority());
EXPECT_EQ(aub_stream::ENGINE_BCS2, highPriorityCsr->getOsContext().getEngineType());
}
}
}
}
TEST(DevicePropertyFlagIsIntegratedTest, givenIntegratedDeviceThenCorrectDevicePropertyFlagSet) {
std::unique_ptr<Mock<L0::DriverHandleImp>> driverHandle;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();