Add bcs split control mask
Introduce debug variable to control which engines the tranfser will be split into Related-To: NEO-7173 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
908ddd0c92
commit
82e29fd048
|
@ -305,22 +305,29 @@ void CommandQueue::initializeBcsEngine(bool internalUsage) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandQueue::constructBcsEnginesForSplit() {
|
void CommandQueue::constructBcsEnginesForSplit() {
|
||||||
if (!this->bcsSplitInitialized) {
|
if (this->bcsSplitInitialized) {
|
||||||
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
|
return;
|
||||||
auto index = EngineHelpers::getBcsIndex(static_cast<aub_stream::EngineType>(i));
|
}
|
||||||
if (!bcsEngines[index]) {
|
|
||||||
|
if (DebugManager.flags.SplitBcsMask.get() > 0) {
|
||||||
|
this->splitEngines = DebugManager.flags.SplitBcsMask.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||||
|
if (this->splitEngines.test(i) && !bcsEngines[i]) {
|
||||||
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
|
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
|
||||||
bcsEngines[index] = neoDevice.tryGetEngine(static_cast<aub_stream::EngineType>(i), EngineUsage::Regular);
|
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
||||||
bcsEngineTypes.push_back(static_cast<aub_stream::EngineType>(i));
|
bcsEngines[i] = neoDevice.tryGetEngine(engineType, EngineUsage::Regular);
|
||||||
if (bcsEngines[index]) {
|
bcsEngineTypes.push_back(engineType);
|
||||||
bcsEngines[index]->osContext->ensureContextInitialized();
|
if (bcsEngines[i]) {
|
||||||
bcsEngines[index]->commandStreamReceiver->initDirectSubmission();
|
bcsEngines[i]->osContext->ensureContextInitialized();
|
||||||
|
bcsEngines[i]->commandStreamReceiver->initDirectSubmission();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->bcsSplitInitialized = true;
|
this->bcsSplitInitialized = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void CommandQueue::prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation) {
|
void CommandQueue::prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation) {
|
||||||
if (split) {
|
if (split) {
|
||||||
|
|
|
@ -420,7 +420,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
bool isCopyOnly = false;
|
bool isCopyOnly = false;
|
||||||
bool bcsAllowed = false;
|
bool bcsAllowed = false;
|
||||||
bool bcsInitialized = false;
|
bool bcsInitialized = false;
|
||||||
|
|
||||||
bool bcsSplitInitialized = false;
|
bool bcsSplitInitialized = false;
|
||||||
|
BcsInfoMask splitEngines = EngineHelpers::evenLinkedCopyEnginesMask;
|
||||||
|
|
||||||
LinearStream *commandStream = nullptr;
|
LinearStream *commandStream = nullptr;
|
||||||
|
|
||||||
|
|
|
@ -1136,13 +1136,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
||||||
|
|
||||||
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 3u> locks;
|
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 3u> locks;
|
||||||
StackVec<CommandStreamReceiver *, 3u> copyEngines;
|
StackVec<CommandStreamReceiver *, 3u> copyEngines;
|
||||||
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
|
|
||||||
auto bcs = getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
|
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||||
|
if (this->splitEngines.test(i)) {
|
||||||
|
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
||||||
|
auto bcs = getBcsCommandStreamReceiver(engineType);
|
||||||
if (bcs) {
|
if (bcs) {
|
||||||
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
|
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
|
||||||
copyEngines.push_back(bcs);
|
copyEngines.push_back(bcs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_BREAK_IF(copyEngines.size() == 0);
|
DEBUG_BREAK_IF(copyEngines.size() == 0);
|
||||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,37 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenConta
|
||||||
EXPECT_EQ(4u, queue->countBcsEngines());
|
EXPECT_EQ(4u, queue->countBcsEngines());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandQueuePvcAndLaterTests, givenSplitBcsMaskWhenConstructBcsEnginesForSplitThenContainsGivenBcsEngines, IsAtLeastXeHpcCore) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
std::bitset<bcsInfoMaskSize> bcsMask = 0b100110101;
|
||||||
|
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
||||||
|
DebugManager.flags.SplitBcsMask.set(static_cast<int>(bcsMask.to_ulong()));
|
||||||
|
HardwareInfo hwInfo = *defaultHwInfo;
|
||||||
|
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
|
||||||
|
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||||
|
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
|
||||||
|
MockClDevice clDevice{device};
|
||||||
|
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
|
||||||
|
ClDeviceVector clDevices{&clDeviceId, 1u};
|
||||||
|
cl_int retVal{};
|
||||||
|
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
auto queue = std::make_unique<MockCommandQueue>(*context);
|
||||||
|
EXPECT_EQ(0u, queue->countBcsEngines());
|
||||||
|
|
||||||
|
queue->constructBcsEnginesForSplit();
|
||||||
|
|
||||||
|
EXPECT_EQ(5u, queue->countBcsEngines());
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||||
|
if (bcsMask.test(i)) {
|
||||||
|
EXPECT_NE(queue->bcsEngines[i], nullptr);
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(queue->bcsEngines[i], nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
|
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
||||||
|
|
|
@ -288,6 +288,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default,
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
|
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
|
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMask, 0, "0: default, >0: bitmask: indicates bcs engines for split")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
|
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
|
||||||
|
|
||||||
/*DIRECT SUBMISSION FLAGS*/
|
/*DIRECT SUBMISSION FLAGS*/
|
||||||
|
|
|
@ -65,6 +65,7 @@ constexpr bool isLinkBcs(aub_stream::EngineType engineType) {
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr uint32_t numLinkedCopyEngines = 8u;
|
constexpr uint32_t numLinkedCopyEngines = 8u;
|
||||||
|
constexpr size_t evenLinkedCopyEnginesMask = 0b101010100;
|
||||||
|
|
||||||
bool linkCopyEnginesSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield);
|
bool linkCopyEnginesSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield);
|
||||||
|
|
||||||
|
|
|
@ -394,6 +394,7 @@ AssignBCSAtEnqueue = -1
|
||||||
DeferCmdQGpgpuInitialization = -1
|
DeferCmdQGpgpuInitialization = -1
|
||||||
DeferCmdQBcsInitialization = -1
|
DeferCmdQBcsInitialization = -1
|
||||||
SplitBcsCopy = -1
|
SplitBcsCopy = -1
|
||||||
|
SplitBcsMask = 0
|
||||||
PreferInternalBcsEngine = -1
|
PreferInternalBcsEngine = -1
|
||||||
ReuseKernelBinaries = -1
|
ReuseKernelBinaries = -1
|
||||||
EnableChipsetUniqueUUID = -1
|
EnableChipsetUniqueUUID = -1
|
||||||
|
|
Loading…
Reference in New Issue