fix: revert use dedciated cmd lists for bcs split

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-08-19 15:43:59 +00:00
committed by Compute-Runtime-Automation
parent a2f60af5c6
commit bc223b9052
12 changed files with 366 additions and 419 deletions

View File

@@ -42,7 +42,7 @@ bool BcsSplit::setupDevice(NEO::CommandStreamReceiver *csr) {
this->clientCount++;
if (!this->cmdLists.empty()) {
if (!this->cmdQs.empty()) {
return true;
}
@@ -82,27 +82,24 @@ bool BcsSplit::setupQueues(const NEO::BcsSplitSettings &settings) {
return false;
}
ze_command_queue_flags_t flags = events.aggregatedEventsMode ? static_cast<ze_command_queue_flags_t>(ZE_COMMAND_QUEUE_FLAG_IN_ORDER) : 0u;
const ze_command_queue_desc_t splitDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, .flags = flags, .mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS};
const ze_command_queue_desc_t splitDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, .mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS};
auto productFamily = this->device.getHwInfo().platform.eProductFamily;
for (const auto &csr : csrs) {
ze_result_t result;
auto cmdList = CommandList::createImmediate(productFamily, &device, &splitDesc, true, NEO::EngineHelpers::engineTypeToEngineGroupType(csr->getOsContext().getEngineType()), csr, result);
auto commandQueue = CommandQueue::create(productFamily, &device, csr, &splitDesc, true, false, true, result);
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
cmdList->forceDisableInOrderWaits();
this->cmdLists.push_back(cmdList);
this->cmdQs.push_back(commandQueue);
auto engineType = csr->getOsContext().getEngineType();
auto bcsId = NEO::EngineHelpers::getBcsIndex(engineType);
if (settings.h2dEngines.test(bcsId)) {
this->h2dCmdLists.push_back(cmdList);
this->h2dCmdQs.push_back(commandQueue);
}
if (settings.d2hEngines.test(bcsId)) {
this->d2hCmdLists.push_back(cmdList);
this->d2hCmdQs.push_back(commandQueue);
}
}
@@ -130,24 +127,24 @@ void BcsSplit::releaseResources() {
this->clientCount--;
if (this->clientCount == 0u) {
for (auto cmdList : cmdLists) {
cmdList->destroy();
for (auto cmdQ : cmdQs) {
cmdQ->destroy();
}
cmdLists.clear();
d2hCmdLists.clear();
h2dCmdLists.clear();
cmdQs.clear();
d2hCmdQs.clear();
h2dCmdQs.clear();
this->events.releaseResources();
}
}
std::vector<CommandList *> &BcsSplit::getCmdListsForSplit(NEO::TransferDirection direction) {
std::vector<CommandQueue *> &BcsSplit::getCmdQsForSplit(NEO::TransferDirection direction) {
if (direction == NEO::TransferDirection::hostToLocal) {
return this->h2dCmdLists;
return this->h2dCmdQs;
} else if (direction == NEO::TransferDirection::localToHost) {
return this->d2hCmdLists;
return this->d2hCmdQs;
}
return this->cmdLists;
return this->cmdQs;
}
size_t BcsSplit::Events::obtainAggregatedEventsForSplit(Context *context) {
@@ -215,7 +212,7 @@ size_t BcsSplit::Events::createAggregatedEvent(Context *context) {
zex_counter_based_event_external_storage_properties_t externalStorageAllocProperties = {.stype = ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_STORAGE_ALLOC_PROPERTIES,
.incrementValue = 1,
.completionValue = static_cast<uint64_t>(bcsSplit.cmdLists.size())};
.completionValue = static_cast<uint64_t>(bcsSplit.cmdQs.size())};
const zex_counter_based_event_desc_t counterBasedDesc = {.stype = ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC,
.pNext = &externalStorageAllocProperties,
@@ -272,7 +269,7 @@ std::optional<size_t> BcsSplit::Events::createFromPool(Context *context, size_t
* - 1 event to handle barrier (vector of barrier events).
*/
const size_t neededEvents = this->bcsSplit.cmdLists.size() + 2;
const size_t neededEvents = this->bcsSplit.cmdQs.size() + 2;
if (!allocatePool(context, maxEventCountInPool, neededEvents)) {
return std::nullopt;
@@ -314,8 +311,8 @@ std::optional<size_t> BcsSplit::Events::createFromPool(Context *context, size_t
void BcsSplit::Events::resetEventPackage(size_t index) {
this->marker[index]->reset();
this->barrier[index]->reset();
for (size_t j = 0; j < this->bcsSplit.cmdLists.size(); j++) {
this->subcopy[index * this->bcsSplit.cmdLists.size() + j]->reset();
for (size_t j = 0; j < this->bcsSplit.cmdQs.size(); j++) {
this->subcopy[index * this->bcsSplit.cmdQs.size() + j]->reset();
}
}

View File

@@ -61,9 +61,9 @@ struct BcsSplit {
Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit) {}
} events;
std::vector<CommandList *> cmdLists;
std::vector<CommandList *> h2dCmdLists;
std::vector<CommandList *> d2hCmdLists;
std::vector<CommandQueue *> cmdQs;
std::vector<CommandQueue *> h2dCmdQs;
std::vector<CommandQueue *> d2hCmdQs;
template <GFXCORE_FAMILY gfxCoreFamily, typename T, typename K>
ze_result_t appendSplitCall(CommandListCoreFamilyImmediate<gfxCoreFamily> *cmdList,
@@ -76,8 +76,9 @@ struct BcsSplit {
bool performMigration,
bool hasRelaxedOrderingDependencies,
NEO::TransferDirection direction,
std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t)> appendCall) {
std::function<ze_result_t(T, K, size_t, ze_event_handle_t)> appendCall) {
ze_result_t result = ZE_RESULT_SUCCESS;
const bool hasStallingCmds = !hasRelaxedOrderingDependencies;
auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
if (!markerEventIndexRet.has_value()) {
@@ -91,10 +92,10 @@ struct BcsSplit {
cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle(), false);
}
auto subcopyEventIndex = markerEventIndex * this->cmdLists.size();
StackVec<ze_event_handle_t, 16> eventHandles;
auto subcopyEventIndex = markerEventIndex * this->cmdQs.size();
StackVec<ze_event_handle_t, 4> eventHandles;
auto &cmdListsForSplit = this->getCmdListsForSplit(direction);
auto &cmdQsForSplit = this->getCmdQsForSplit(direction);
auto signalEvent = Event::fromHandle(hSignalEvent);
@@ -105,23 +106,17 @@ struct BcsSplit {
const auto aggregatedEventsMode = this->events.aggregatedEventsMode;
auto totalSize = size;
auto engineCount = cmdListsForSplit.size();
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
auto engineCount = cmdQsForSplit.size();
for (size_t i = 0; i < cmdQsForSplit.size(); i++) {
if (barrierRequired) {
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
subCmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
cmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
}
if (cmdList->hasInOrderDependencies()) {
auto &inOrderExecInfo = cmdList->getInOrderExecInfo();
subCmdList->appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), hasRelaxedOrderingDependencies, false, false, false, false);
}
subCmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, false, false, false);
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
if (signalEvent && i == 0u) {
subCmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
}
auto localSize = totalSize / engineCount;
@@ -130,7 +125,9 @@ struct BcsSplit {
auto copyEventIndex = aggregatedEventsMode ? markerEventIndex : subcopyEventIndex + i;
auto eventHandle = this->events.subcopy[copyEventIndex]->toHandle();
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle);
result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle);
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, true, cmdQsForSplit[i], nullptr, nullptr);
if ((aggregatedEventsMode && i == 0) || !aggregatedEventsMode) {
eventHandles.push_back(eventHandle);
@@ -140,7 +137,7 @@ struct BcsSplit {
engineCount--;
if (signalEvent) {
signalEvent->appendAdditionalCsr(subCmdList->getCsr(false));
signalEvent->appendAdditionalCsr(static_cast<CommandQueueImp *>(cmdQsForSplit[i])->getCsr());
}
}
@@ -167,7 +164,7 @@ struct BcsSplit {
bool setupDevice(NEO::CommandStreamReceiver *csr);
void releaseResources();
std::vector<CommandList *> &getCmdListsForSplit(NEO::TransferDirection direction);
std::vector<CommandQueue *> &getCmdQsForSplit(NEO::TransferDirection direction);
void setupEnginesMask(NEO::BcsSplitSettings &settings);
bool setupQueues(const NEO::BcsSplitSettings &settings);