mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
feature: enable BCS split for copy offload queues
Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e01c47152e
commit
2e58669fe9
@@ -353,6 +353,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
enableCopyOperationOffload();
|
||||
}
|
||||
|
||||
enableImmediateBcsSplit();
|
||||
|
||||
return returnType;
|
||||
}
|
||||
|
||||
|
||||
@@ -257,10 +257,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
commandList->isTbxMode = csr->isTbxMode();
|
||||
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
|
||||
if (!internalUsage) {
|
||||
commandList->isBcsSplitNeeded = deviceImp->bcsSplit->setupDevice(csr);
|
||||
}
|
||||
|
||||
commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, productHelper);
|
||||
|
||||
const bool cmdListSupportsCopyOffload = commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed();
|
||||
@@ -269,9 +265,17 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
commandList->enableCopyOperationOffload();
|
||||
}
|
||||
|
||||
commandList->enableImmediateBcsSplit();
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
void CommandListImp::enableImmediateBcsSplit() {
|
||||
if (device->getNEODevice()->isBcsSplitSupported() && isImmediateType() && !internalUsage && !isBcsSplitNeeded) {
|
||||
isBcsSplitNeeded = static_cast<DeviceImp *>(getDevice())->bcsSplit->setupDevice(getCsr(false), isCopyOffloadEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
void CommandListImp::enableCopyOperationOffload() {
|
||||
if (isCopyOnly(false) || !static_cast<DeviceImp *>(device)->tryGetCopyEngineOrdinal().has_value()) {
|
||||
return;
|
||||
|
||||
@@ -56,6 +56,7 @@ struct CommandListImp : public CommandList {
|
||||
uint64_t getInOrderExecDeviceGpuAddress() const;
|
||||
size_t getInOrderExecHostRequiredSize() const;
|
||||
uint64_t getInOrderExecHostGpuAddress() const;
|
||||
void enableImmediateBcsSplit();
|
||||
|
||||
protected:
|
||||
std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo;
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
namespace L0 {
|
||||
|
||||
bool BcsSplit::setupDevice(NEO::CommandStreamReceiver *csr) {
|
||||
bool BcsSplit::setupDevice(NEO::CommandStreamReceiver *csr, bool copyOffloadEnabled) {
|
||||
auto &productHelper = this->device.getProductHelper();
|
||||
auto bcsSplitSettings = productHelper.getBcsSplitSettings(this->device.getHwInfo());
|
||||
|
||||
@@ -29,12 +29,12 @@ bool BcsSplit::setupDevice(NEO::CommandStreamReceiver *csr) {
|
||||
|
||||
// If expectedTileCount==1, route root device to Tile0, otherwise use all Tiles
|
||||
bool tileCountMatch = (bcsSplitSettings.requiredTileCount == 1) || (this->device.getNEODevice()->getNumSubDevices() == bcsSplitSettings.requiredTileCount);
|
||||
bool engineMatch = (csr->getOsContext().getEngineType() == productHelper.getDefaultCopyEngine());
|
||||
if (copyOffloadEnabled && NEO::debugManager.flags.SplitBcsForCopyOffload.get() != 0) {
|
||||
engineMatch = NEO::EngineHelpers::isComputeEngine(csr->getOsContext().getEngineType());
|
||||
}
|
||||
|
||||
auto initializeBcsSplit = this->device.getNEODevice()->isBcsSplitSupported() &&
|
||||
(csr->getOsContext().getEngineType() == productHelper.getDefaultCopyEngine()) &&
|
||||
tileCountMatch;
|
||||
|
||||
if (!initializeBcsSplit) {
|
||||
if (!(engineMatch && tileCountMatch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ struct BcsSplit {
|
||||
return result;
|
||||
}
|
||||
|
||||
bool setupDevice(NEO::CommandStreamReceiver *csr);
|
||||
bool setupDevice(NEO::CommandStreamReceiver *csr, bool copyOffloadEnabled);
|
||||
void releaseResources();
|
||||
std::vector<CommandList *> &getCmdListsForSplit(NEO::TransferDirection direction);
|
||||
void setupEnginesMask(NEO::BcsSplitSettings &settings);
|
||||
|
||||
@@ -1110,13 +1110,38 @@ HWTEST2_F(AggregatedBcsSplitTests, givenLimitedEnginesCountWhenCreatingBcsSplitT
|
||||
|
||||
BcsSplit bcsSplit(static_cast<L0::DeviceImp &>(*device));
|
||||
|
||||
bcsSplit.setupDevice(cmdList->getCsr(false));
|
||||
bcsSplit.setupDevice(cmdList->getCsr(false), false);
|
||||
|
||||
EXPECT_EQ(expectedEnginesCount, bcsSplit.cmdLists.size());
|
||||
|
||||
bcsSplit.releaseResources();
|
||||
}
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, givenCopyOffloadEnabledWhenCreatingCmdListThenEnableBcsSplit, IsAtLeastXeHpcCore) {
|
||||
debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
|
||||
|
||||
ze_result_t returnValue;
|
||||
ze_command_queue_desc_t desc = {
|
||||
.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER,
|
||||
};
|
||||
std::unique_ptr<L0::CommandList> commandList1(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue));
|
||||
auto mockCmdList1 = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>> *>(commandList1.get());
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_NE(device->getProductHelper().isDcFlushAllowed(), commandList1->isCopyOffloadEnabled());
|
||||
EXPECT_EQ(commandList1->isCopyOffloadEnabled(), mockCmdList1->isBcsSplitNeeded);
|
||||
|
||||
debugManager.flags.SplitBcsForCopyOffload.set(0);
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList2(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue));
|
||||
auto mockCmdList2 = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>> *>(commandList2.get());
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_FALSE(mockCmdList2->isBcsSplitNeeded);
|
||||
}
|
||||
|
||||
HWTEST_F(AggregatedBcsSplitTests, givenTransferDirectionWhenAskingIfSplitIsNeededThenReturnCorrectValue) {
|
||||
debugManager.flags.SplitBcsTransferDirectionMask.set(-1);
|
||||
|
||||
@@ -1133,7 +1158,7 @@ HWTEST2_F(AggregatedBcsSplitTests, givenPlatformSupporingAggregatedSplitModeWhen
|
||||
|
||||
BcsSplit bcsSplit(static_cast<L0::DeviceImp &>(*device));
|
||||
|
||||
bcsSplit.setupDevice(cmdList->getCsr(false));
|
||||
bcsSplit.setupDevice(cmdList->getCsr(false), false);
|
||||
|
||||
EXPECT_EQ(device->getL0GfxCoreHelper().bcsSplitAggregatedModeEnabled(), bcsSplit.events.aggregatedEventsMode);
|
||||
|
||||
|
||||
@@ -4042,7 +4042,7 @@ struct BcsSplitInOrderCmdListTests : public InOrderCmdListFixture {
|
||||
|
||||
auto bcsSplit = static_cast<DeviceImp *>(device)->bcsSplit.get();
|
||||
|
||||
cmdList->isBcsSplitNeeded = bcsSplit->setupDevice(cmdList->getCsr(false));
|
||||
cmdList->isBcsSplitNeeded = bcsSplit->setupDevice(cmdList->getCsr(false), false);
|
||||
|
||||
return cmdList;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user