Add bcs split control mask

Introduce debug variable to control which engines
the tranfser will be split into

Related-To: NEO-7173

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2022-08-19 10:36:31 +00:00 committed by Compute-Runtime-Automation
parent 908ddd0c92
commit 82e29fd048
7 changed files with 65 additions and 17 deletions

View File

@ -305,21 +305,28 @@ void CommandQueue::initializeBcsEngine(bool internalUsage) {
}
void CommandQueue::constructBcsEnginesForSplit() {
if (!this->bcsSplitInitialized) {
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
auto index = EngineHelpers::getBcsIndex(static_cast<aub_stream::EngineType>(i));
if (!bcsEngines[index]) {
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
bcsEngines[index] = neoDevice.tryGetEngine(static_cast<aub_stream::EngineType>(i), EngineUsage::Regular);
bcsEngineTypes.push_back(static_cast<aub_stream::EngineType>(i));
if (bcsEngines[index]) {
bcsEngines[index]->osContext->ensureContextInitialized();
bcsEngines[index]->commandStreamReceiver->initDirectSubmission();
}
if (this->bcsSplitInitialized) {
return;
}
if (DebugManager.flags.SplitBcsMask.get() > 0) {
this->splitEngines = DebugManager.flags.SplitBcsMask.get();
}
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
if (this->splitEngines.test(i) && !bcsEngines[i]) {
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
bcsEngines[i] = neoDevice.tryGetEngine(engineType, EngineUsage::Regular);
bcsEngineTypes.push_back(engineType);
if (bcsEngines[i]) {
bcsEngines[i]->osContext->ensureContextInitialized();
bcsEngines[i]->commandStreamReceiver->initDirectSubmission();
}
}
this->bcsSplitInitialized = true;
}
this->bcsSplitInitialized = true;
}
void CommandQueue::prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation) {

View File

@ -420,7 +420,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool isCopyOnly = false;
bool bcsAllowed = false;
bool bcsInitialized = false;
bool bcsSplitInitialized = false;
BcsInfoMask splitEngines = EngineHelpers::evenLinkedCopyEnginesMask;
LinearStream *commandStream = nullptr;

View File

@ -1136,13 +1136,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 3u> locks;
StackVec<CommandStreamReceiver *, 3u> copyEngines;
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
auto bcs = getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
if (bcs) {
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
copyEngines.push_back(bcs);
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
if (this->splitEngines.test(i)) {
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
auto bcs = getBcsCommandStreamReceiver(engineType);
if (bcs) {
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
copyEngines.push_back(bcs);
}
}
}
DEBUG_BREAK_IF(copyEngines.size() == 0);
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);

View File

@ -129,6 +129,37 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenConta
EXPECT_EQ(4u, queue->countBcsEngines());
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenSplitBcsMaskWhenConstructBcsEnginesForSplitThenContainsGivenBcsEngines, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
std::bitset<bcsInfoMaskSize> bcsMask = 0b100110101;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
DebugManager.flags.SplitBcsMask.set(static_cast<int>(bcsMask.to_ulong()));
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(0u, queue->countBcsEngines());
queue->constructBcsEnginesForSplit();
EXPECT_EQ(5u, queue->countBcsEngines());
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
if (bcsMask.test(i)) {
EXPECT_NE(queue->bcsEngines[i], nullptr);
} else {
EXPECT_EQ(queue->bcsEngines[i], nullptr);
}
}
}
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);

View File

@ -288,6 +288,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default,
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMask, 0, "0: default, >0: bitmask: indicates bcs engines for split")
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
/*DIRECT SUBMISSION FLAGS*/

View File

@ -65,6 +65,7 @@ constexpr bool isLinkBcs(aub_stream::EngineType engineType) {
}
constexpr uint32_t numLinkedCopyEngines = 8u;
constexpr size_t evenLinkedCopyEnginesMask = 0b101010100;
bool linkCopyEnginesSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield);

View File

@ -394,6 +394,7 @@ AssignBCSAtEnqueue = -1
DeferCmdQGpgpuInitialization = -1
DeferCmdQBcsInitialization = -1
SplitBcsCopy = -1
SplitBcsMask = 0
PreferInternalBcsEngine = -1
ReuseKernelBinaries = -1
EnableChipsetUniqueUUID = -1