fix: correct bcs split subCmdList commands estimation

Related-To: HSD-18043198483

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-08-26 15:43:50 +00:00
committed by Compute-Runtime-Automation
parent 5eddca89c4
commit 4de68a67c7
3 changed files with 31 additions and 3 deletions

View File

@@ -687,7 +687,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, memoryCopyParams);
};
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, memoryCopyParams.relaxedOrderingDispatch, direction, splitCall);
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, memoryCopyParams.relaxedOrderingDispatch, direction, estimatedSize, splitCall);
} else if (this->isValidForStagingTransfer(dstptr, srcptr, size, numWaitEvents > 0)) {
return this->appendStagingMemoryCopy(dstptr, srcptr, size, hSignalEvent, memoryCopyParams);
} else {
@@ -749,7 +749,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
hSignalEventParam, 0u, nullptr, memoryCopyParams);
};
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, memoryCopyParams.relaxedOrderingDispatch, direction, splitCall);
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, memoryCopyParams.relaxedOrderingDispatch, direction, estimatedSize, splitCall);
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch,
srcPtr, srcRegion, srcPitch, srcSlicePitch,
@@ -822,7 +822,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEventParam, false);
};
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, splitCall);
ret = static_cast<DeviceImp *>(this->device)->bcsSplit->appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, commonImmediateCommandSize, splitCall);
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
}

View File

@@ -76,6 +76,7 @@ struct BcsSplit {
bool performMigration,
bool hasRelaxedOrderingDependencies,
NEO::TransferDirection direction,
size_t estimatedCmdBufferSize,
std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t)> appendCall) {
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -109,6 +110,8 @@ struct BcsSplit {
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
subCmdList->checkAvailableSpace(numWaitEvents, hasRelaxedOrderingDependencies, estimatedCmdBufferSize, false);
if (barrierRequired) {
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
subCmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);

View File

@@ -1261,6 +1261,31 @@ HWTEST2_F(AggregatedBcsSplitTests, givenMarkerEventWhenCheckingCompletionThenRes
context->freeMem(ptr);
}
HWTEST2_F(AggregatedBcsSplitTests, givenFullCmdBufferWhenAppendCalledThenAllocateNewBuffer, IsAtLeastXeHpcCore) {
auto ptr = allocHostMem();
auto cmdListHw = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>> *>(cmdList.get());
cmdListHw->appendMemoryCopy(ptr, ptr, copySize, nullptr, 0, nullptr, copyParams);
std::vector<void *> cmdBuffers;
for (auto cmdList : bcsSplit->cmdLists) {
auto cmdStream = cmdList->getCmdContainer().getCommandStream();
cmdStream->getSpace(cmdStream->getAvailableSpace());
cmdBuffers.push_back(cmdStream->getCpuBase());
}
cmdListHw->appendMemoryCopy(ptr, ptr, copySize, nullptr, 0, nullptr, copyParams);
for (size_t i = 0; i < bcsSplit->cmdLists.size(); i++) {
auto cmdStream = bcsSplit->cmdLists[i]->getCmdContainer().getCommandStream();
EXPECT_NE(cmdBuffers[i], cmdStream->getCpuBase());
}
context->freeMem(ptr);
}
struct MultiRootAggregatedBcsSplitTests : public AggregatedBcsSplitTests {
void SetUp() override {
expectedNumRootDevices = 2;