Revert "fix: add missing lock in bcs split path"

This reverts commit 8ea8e78471.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-10-04 04:39:52 +02:00
committed by Compute-Runtime-Automation
parent 42364dd070
commit d3ac2f871b
6 changed files with 34 additions and 88 deletions

View File

@@ -1327,17 +1327,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
const auto copyOffloadModeForOperation = getCopyOffloadModeForOperation(copyOffloadSubmission);
auto queue = getCmdQImmediate(copyOffloadModeForOperation);
this->latestFlushIsDualCopyOffload = (copyOffloadModeForOperation == CopyOffloadModes::dualStream);
this->latestFlushIsHostVisible = !this->dcFlushSupport;
if (NEO::debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() == 1) {
static_cast<CommandQueueImp *>(queue)->getCsr()->ensurePrimaryCsrInitialized(*this->device->getNEODevice());
}
if (signalEvent) {
signalEvent->setCsr(static_cast<CommandQueueImp *>(queue)->getCsr(), isInOrderExecutionEnabled());
this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST) && !this->latestFlushIsDualCopyOffload;
}
if (inputRet == ZE_RESULT_SUCCESS) {
if (signalEvent && (NEO::debugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
signalEvent->setLatestUsedCmdQueue(queue);
@@ -1346,6 +1340,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
outerLock, outerLockForIndirect);
}
this->latestFlushIsHostVisible = !this->dcFlushSupport;
if (signalEvent) {
signalEvent->setCsr(static_cast<CommandQueueImp *>(queue)->getCsr(), isInOrderExecutionEnabled());
this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST) && !this->latestFlushIsDualCopyOffload;
}
return inputRet;
}

View File

@@ -121,8 +121,6 @@ struct BcsSplit {
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
auto lock = subCmdList->getCsr(false)->obtainUniqueOwnership();
subCmdList->checkAvailableSpace(numWaitEvents, hasRelaxedOrderingDependencies, estimatedCmdBufferSize, false);
if (barrierRequired) {
@@ -147,7 +145,7 @@ struct BcsSplit {
auto copyEventIndex = aggregatedEventsMode ? markerEventIndex : subcopyEventIndex + i;
auto eventHandle = useSignalEventForSubcopy ? signalEvent : this->events.subcopy[copyEventIndex]->toHandle();
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle, aggregatedEventIncrementVal);
subCmdList->flushImmediate(result, true, !hasRelaxedOrderingDependencies, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, nullptr, true, &lock, nullptr);
subCmdList->flushImmediate(result, true, !hasRelaxedOrderingDependencies, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, nullptr, true, nullptr, nullptr);
if ((aggregatedEventsMode && i == 0) || !aggregatedEventsMode) {
eventHandles.push_back(eventHandle);
@@ -181,7 +179,6 @@ struct BcsSplit {
cmdList->handleInOrderDependencyCounter(signalEvent, false, dualStreamCopyOffload);
if (aggregatedEventsMode && !useSignalEventForSubcopy) {
std::lock_guard<std::mutex> lock(events.mtx);
cmdList->assignInOrderExecInfoToEvent(this->events.marker[markerEventIndex]);
}

View File

@@ -9,14 +9,11 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include <array>
namespace L0 {
namespace ult {
@@ -150,59 +147,5 @@ HWTEST_F(DeviceMtTest, givenMultiThreadsExecutingCmdListAndSynchronizingDeviceWh
}
commandQueue->destroy();
}
using AggregatedBcsSplitMtTests = AggregatedBcsSplitTests;
HWTEST2_F(AggregatedBcsSplitMtTests, givenBcsSplitEnabledWhenMultipleThreadsAccessingThenInternalResourcesUsedCorrectly, IsAtLeastXeHpcCore) {
constexpr uint32_t numThreads = 8;
constexpr uint32_t iterationCount = 5;
std::array<DestroyableZeUniquePtr<L0::CommandList>, numThreads> cmdLists = {};
std::array<std::thread, numThreads> threads = {};
std::array<void *, numThreads> hostPtrs = {};
std::vector<TaskCountType> initialTaskCounts;
for (uint32_t i = 0; i < numThreads; i++) {
cmdLists[i] = createCmdList(true);
hostPtrs[i] = allocHostMem();
cmdLists[i]->appendMemoryCopy(hostPtrs[i], hostPtrs[i], copySize, nullptr, 0, nullptr, copyParams);
}
for (auto &cmdList : bcsSplit->cmdLists) {
initialTaskCounts.push_back(cmdList->getCsr(false)->peekTaskCount());
}
std::atomic_bool started = false;
auto threadBody = [&](uint32_t cmdListId) {
while (!started.load()) {
std::this_thread::yield();
}
auto localCopyParams = copyParams;
for (uint32_t i = 1; i < iterationCount; i++) {
cmdLists[cmdListId]->appendMemoryCopy(hostPtrs[cmdListId], hostPtrs[cmdListId], copySize, nullptr, 0, nullptr, localCopyParams);
}
};
for (uint32_t i = 0; i < numThreads; ++i) {
threads[i] = std::thread(threadBody, i);
}
started = true;
for (auto &thread : threads) {
thread.join();
}
for (size_t i = 0; i < bcsSplit->cmdLists.size(); i++) {
EXPECT_TRUE(bcsSplit->cmdLists[i]->getCsr(false)->peekTaskCount() > initialTaskCounts[i]);
}
for (auto &ptr : hostPtrs) {
context->freeMem(ptr);
}
}
} // namespace ult
} // namespace L0