fix: unregister CSR client for regular cmd lists on sync points

Related-To: NEO-8321

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-08-31 15:46:36 +00:00 committed by Compute-Runtime-Automation
parent fce659d8c8
commit 0cf60e1ac3
9 changed files with 195 additions and 0 deletions

View File

@ -344,6 +344,8 @@ struct CommandList : _ze_command_list_handle_t {
return kernelWithAssertAppended;
}
ze_event_handle_t getLatestUsedEvent() const { return latestUsedEvent; }
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
@ -376,6 +378,7 @@ struct CommandList : _ze_command_list_handle_t {
int64_t currentIndirectObjectBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentBindingTablePoolBaseAddress = NEO::StreamProperty64::initValue;
ze_event_handle_t latestUsedEvent = nullptr;
ze_context_handle_t hContext = nullptr;
CommandQueue *cmdQImmediate = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;

View File

@ -142,6 +142,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
inOrderDependencyCounter = 0;
inOrderAllocationOffset = 0;
latestUsedEvent = nullptr;
return ZE_RESULT_SUCCESS;
}
@ -343,6 +345,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
callId);
}
this->latestUsedEvent = hEvent;
return res;
}
@ -370,6 +374,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams);
addToMappedEventList(event);
this->latestUsedEvent = hSignalEvent;
return ret;
}
@ -402,6 +409,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
addToMappedEventList(event);
appendSignalEventPostWalker(event);
this->latestUsedEvent = hEvent;
return ret;
}
@ -448,6 +457,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
addToMappedEventList(event);
appendSignalEventPostWalker(event);
this->latestUsedEvent = hEvent;
return ret;
}
@ -516,6 +527,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
appendSignalEventPostWalker(signalEvent);
addToMappedEventList(signalEvent);
this->latestUsedEvent = hSignalEvent;
return ZE_RESULT_SUCCESS;
}
@ -667,6 +680,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(Event::fromHandle(hEvent));
this->latestUsedEvent = hEvent;
return status;
}
@ -826,6 +842,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
addFlushRequiredCommand(allocationStruct.needsFlush, event);
this->latestUsedEvent = hEvent;
return ret;
}
@ -969,6 +987,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
this->latestUsedEvent = hEvent;
return status;
}
@ -1046,8 +1067,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
memoryManager->setMemAdvise(alloc, flags, deviceImp->getRootDeviceIndex());
deviceImp->memAdviseSharedAllocations[allocData] = flags;
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@ -1427,6 +1450,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
callId);
}
this->latestUsedEvent = hSignalEvent;
return ret;
}
@ -1516,6 +1541,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
callId);
}
this->latestUsedEvent = hSignalEvent;
return ZE_RESULT_SUCCESS;
}
@ -1943,6 +1970,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
callId);
}
this->latestUsedEvent = hSignalEvent;
return res;
}
@ -2220,6 +2249,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
callId);
}
this->latestUsedEvent = hEvent;
return ZE_RESULT_SUCCESS;
}
@ -2486,6 +2517,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
addToMappedEventList(signalEvent);
this->latestUsedEvent = hSignalEvent;
return ZE_RESULT_SUCCESS;
}
@ -2599,6 +2632,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
addToMappedEventList(Event::fromHandle(hSignalEvent));
this->latestUsedEvent = hSignalEvent;
return ZE_RESULT_SUCCESS;
}
@ -2991,6 +3026,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent);
this->latestUsedEvent = hSignalEvent;
return ZE_RESULT_SUCCESS;
}
@ -3132,6 +3170,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent);
this->latestUsedEvent = signalEventHandle;
return ZE_RESULT_SUCCESS;
}

View File

@ -211,6 +211,8 @@ void CommandQueueImp::postSyncOperations(bool hangDetected) {
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
}
unregisterCsrClient();
}
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,

View File

@ -234,6 +234,8 @@ struct CommandQueueHw : public CommandQueueImp {
CommandListRequiredStateChange &cmdListRequired);
inline void updateBaseAddressState(CommandList *lastCommandList);
void assignLatestUsedEvents(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists);
size_t alignedChildStreamPadding{};
};

View File

@ -39,6 +39,7 @@
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/core/source/helpers/error_code_helper_l0.h"
@ -97,6 +98,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence);
}
assignLatestUsedEvents(numCommandLists, phCommandLists);
if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) {
neoDevice->debugExecutionCounter++;
}
@ -104,6 +107,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
return ret;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::assignLatestUsedEvents(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) {
for (uint32_t i = 0; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
if (auto signalEvent = Event::fromHandle(commandList->getLatestUsedEvent())) {
signalEvent->setLatestUsedCmdQueue(this);
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
CommandListExecutionContext &ctx,

View File

@ -241,6 +241,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;
using BaseClass::latestUsedEvent;
using BaseClass::minimalSizeForBcsSplit;
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;

View File

@ -103,6 +103,7 @@ class MockEvent : public ::L0::Event {
using ::L0::Event::isCompleted;
using ::L0::Event::isFromIpcPool;
using ::L0::Event::l3FlushAppliedOnKernel;
using ::L0::Event::latestUsedCmdQueue;
using ::L0::Event::maxKernelCount;
using ::L0::Event::signalAllEventPackets;
using ::L0::Event::signalScope;

View File

@ -1542,6 +1542,91 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
driverHandle->releaseImportedPointer(dstPtr);
}
HWTEST2_F(CommandListCreate, givenCmdListWhenAppendingTheSetLatestUsedEvent, IsAtLeastXeHpCore) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
DestroyableZeUniquePtr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t event = nullptr;
DestroyableZeUniquePtr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
DestroyableZeUniquePtr<L0::Event> eventObject(L0::Event::fromHandle(event));
Mock<::L0::KernelImp> kernel;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
uint8_t srcPtr[64] = {};
uint8_t dstPtr[64] = {};
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
auto verifyCall = [&whiteBoxCmdList, &event](ze_result_t result) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(event, whiteBoxCmdList->getLatestUsedEvent());
whiteBoxCmdList->latestUsedEvent = nullptr;
};
verifyCall(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, launchParams, false));
verifyCall(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, event, 0, nullptr, false));
verifyCall(commandList->appendBarrier(event, 0, nullptr, false));
verifyCall(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, false, false));
verifyCall(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, event, 0, nullptr, false, false));
verifyCall(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, false));
verifyCall(commandList->appendSignalEvent(event));
verifyCall(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr));
if constexpr (FamilyType::supportsSampler) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
auto image = makeZeUniquePtr<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
verifyCall(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, false));
verifyCall(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, false));
verifyCall(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, false));
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyCall(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr));
verifyCall(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, false));
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(nullptr, whiteBoxCmdList->getLatestUsedEvent());
driverHandle->releaseImportedPointer(dstPtr);
}
HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrderingWithoutInputEventsThenCountPreviousEventAsWaitlist, IsAtLeastXeHpcCore) {
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();

View File

@ -19,6 +19,7 @@
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 {
@ -1550,6 +1551,53 @@ HWTEST_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListWhenFlushingThenPassS
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
}
HWTEST_F(PrimaryBatchBufferCmdListTest, givenLatestAppendWithEventThenRegisterCmdQueue) {
MockEvent event;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event.toHandle(), 0, nullptr, launchParams, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close());
auto cmdListHandle = commandList->toHandle();
EXPECT_EQ(ZE_RESULT_SUCCESS, commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true));
EXPECT_EQ(commandQueue, event.latestUsedCmdQueue);
event.latestUsedCmdQueue = nullptr;
commandList->reset();
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event.toHandle(), 0, nullptr, launchParams, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close());
EXPECT_EQ(ZE_RESULT_SUCCESS, commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true));
EXPECT_EQ(nullptr, event.latestUsedCmdQueue);
}
HWTEST_F(PrimaryBatchBufferCmdListTest, givenCmdListWhenCallingSynchronizeThenUnregisterCsrClient) {
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close());
auto csr = commandQueue->getCsr();
auto numClients = csr->getNumClients();
auto cmdListHandle = commandList->toHandle();
EXPECT_EQ(ZE_RESULT_SUCCESS, commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true));
EXPECT_EQ(numClients + 1, csr->getNumClients());
commandQueue->synchronize(std::numeric_limits<uint64_t>::max());
EXPECT_EQ(numClients, csr->getNumClients());
}
HWTEST_F(PrimaryBatchBufferCmdListTest, givenPrimaryBatchBufferWhenCopyCommandListAndQueueAreCreatedThenFirstDispatchCreatesGlobalInitPreambleAndLaterDispatchProvideCmdListBuffer) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;