Add bcs split control mask
Introduce debug variable to control which engines the tranfser will be split into Related-To: NEO-7173 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
908ddd0c92
commit
82e29fd048
|
@ -305,21 +305,28 @@ void CommandQueue::initializeBcsEngine(bool internalUsage) {
|
|||
}
|
||||
|
||||
void CommandQueue::constructBcsEnginesForSplit() {
|
||||
if (!this->bcsSplitInitialized) {
|
||||
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
|
||||
auto index = EngineHelpers::getBcsIndex(static_cast<aub_stream::EngineType>(i));
|
||||
if (!bcsEngines[index]) {
|
||||
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
|
||||
bcsEngines[index] = neoDevice.tryGetEngine(static_cast<aub_stream::EngineType>(i), EngineUsage::Regular);
|
||||
bcsEngineTypes.push_back(static_cast<aub_stream::EngineType>(i));
|
||||
if (bcsEngines[index]) {
|
||||
bcsEngines[index]->osContext->ensureContextInitialized();
|
||||
bcsEngines[index]->commandStreamReceiver->initDirectSubmission();
|
||||
}
|
||||
if (this->bcsSplitInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.SplitBcsMask.get() > 0) {
|
||||
this->splitEngines = DebugManager.flags.SplitBcsMask.get();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||
if (this->splitEngines.test(i) && !bcsEngines[i]) {
|
||||
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
|
||||
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
||||
bcsEngines[i] = neoDevice.tryGetEngine(engineType, EngineUsage::Regular);
|
||||
bcsEngineTypes.push_back(engineType);
|
||||
if (bcsEngines[i]) {
|
||||
bcsEngines[i]->osContext->ensureContextInitialized();
|
||||
bcsEngines[i]->commandStreamReceiver->initDirectSubmission();
|
||||
}
|
||||
}
|
||||
this->bcsSplitInitialized = true;
|
||||
}
|
||||
|
||||
this->bcsSplitInitialized = true;
|
||||
}
|
||||
|
||||
void CommandQueue::prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation) {
|
||||
|
|
|
@ -420,7 +420,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
bool isCopyOnly = false;
|
||||
bool bcsAllowed = false;
|
||||
bool bcsInitialized = false;
|
||||
|
||||
bool bcsSplitInitialized = false;
|
||||
BcsInfoMask splitEngines = EngineHelpers::evenLinkedCopyEnginesMask;
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
|
||||
|
|
|
@ -1136,13 +1136,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
|||
|
||||
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 3u> locks;
|
||||
StackVec<CommandStreamReceiver *, 3u> copyEngines;
|
||||
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
|
||||
auto bcs = getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
|
||||
if (bcs) {
|
||||
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
|
||||
copyEngines.push_back(bcs);
|
||||
|
||||
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||
if (this->splitEngines.test(i)) {
|
||||
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
||||
auto bcs = getBcsCommandStreamReceiver(engineType);
|
||||
if (bcs) {
|
||||
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
|
||||
copyEngines.push_back(bcs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(copyEngines.size() == 0);
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
|
|
|
@ -129,6 +129,37 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenConta
|
|||
EXPECT_EQ(4u, queue->countBcsEngines());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, givenSplitBcsMaskWhenConstructBcsEnginesForSplitThenContainsGivenBcsEngines, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
std::bitset<bcsInfoMaskSize> bcsMask = 0b100110101;
|
||||
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
||||
DebugManager.flags.SplitBcsMask.set(static_cast<int>(bcsMask.to_ulong()));
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
|
||||
MockClDevice clDevice{device};
|
||||
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
|
||||
ClDeviceVector clDevices{&clDeviceId, 1u};
|
||||
cl_int retVal{};
|
||||
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto queue = std::make_unique<MockCommandQueue>(*context);
|
||||
EXPECT_EQ(0u, queue->countBcsEngines());
|
||||
|
||||
queue->constructBcsEnginesForSplit();
|
||||
|
||||
EXPECT_EQ(5u, queue->countBcsEngines());
|
||||
|
||||
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||
if (bcsMask.test(i)) {
|
||||
EXPECT_NE(queue->bcsEngines[i], nullptr);
|
||||
} else {
|
||||
EXPECT_EQ(queue->bcsEngines[i], nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
||||
|
|
|
@ -288,6 +288,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default,
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMask, 0, "0: default, >0: bitmask: indicates bcs engines for split")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
|
|
|
@ -65,6 +65,7 @@ constexpr bool isLinkBcs(aub_stream::EngineType engineType) {
|
|||
}
|
||||
|
||||
constexpr uint32_t numLinkedCopyEngines = 8u;
|
||||
constexpr size_t evenLinkedCopyEnginesMask = 0b101010100;
|
||||
|
||||
bool linkCopyEnginesSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield);
|
||||
|
||||
|
|
|
@ -394,6 +394,7 @@ AssignBCSAtEnqueue = -1
|
|||
DeferCmdQGpgpuInitialization = -1
|
||||
DeferCmdQBcsInitialization = -1
|
||||
SplitBcsCopy = -1
|
||||
SplitBcsMask = 0
|
||||
PreferInternalBcsEngine = -1
|
||||
ReuseKernelBinaries = -1
|
||||
EnableChipsetUniqueUUID = -1
|
||||
|
|
Loading…
Reference in New Issue