feature: unregister CSR client on Event host synchronize
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
61fb19caab
commit
aea5f435db
|
@ -727,6 +727,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
inOrderDependencyCounter++;
|
||||
|
@ -735,14 +737,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
|||
}
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
|
||||
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
|
||||
}
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
inputRet = executeCommandListImmediate(performMigration);
|
||||
}
|
||||
}
|
||||
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
if (signalEvent) {
|
||||
signalEvent->setCsr(this->csr);
|
||||
|
||||
|
|
|
@ -53,9 +53,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
|
|||
}
|
||||
|
||||
ze_result_t CommandQueueImp::destroy() {
|
||||
if (this->clientId != CommandQueue::clientNotRegistered) {
|
||||
this->csr->unregisterClient();
|
||||
}
|
||||
unregisterCsrClient();
|
||||
|
||||
if (commandStream.getCpuBase() != nullptr) {
|
||||
commandStream.replaceGraphicsAllocation(nullptr);
|
||||
|
@ -252,6 +250,13 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::
|
|||
return commandQueue;
|
||||
}
|
||||
|
||||
void CommandQueueImp::unregisterCsrClient() {
|
||||
if (getClientId() != CommandQueue::clientNotRegistered) {
|
||||
this->csr->unregisterClient();
|
||||
setClientId(CommandQueue::clientNotRegistered);
|
||||
}
|
||||
}
|
||||
|
||||
ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() const {
|
||||
return desc.mode;
|
||||
}
|
||||
|
|
|
@ -60,8 +60,9 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
|||
|
||||
bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; }
|
||||
|
||||
uint32_t getClientId() { return this->clientId; }
|
||||
uint32_t getClientId() const { return this->clientId; }
|
||||
void setClientId(uint32_t value) { this->clientId = value; }
|
||||
virtual void unregisterCsrClient() = 0;
|
||||
|
||||
static constexpr uint32_t clientNotRegistered = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
|
|
|
@ -91,6 +91,7 @@ struct CommandQueueImp : public CommandQueue {
|
|||
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
void checkAssert();
|
||||
void unregisterCsrClient() override;
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
|
||||
|
|
|
@ -388,6 +388,7 @@ void Event::setIsCompleted() {
|
|||
if (this->isCompleted.load() == STATE_CLEARED) {
|
||||
this->isCompleted = STATE_SIGNALED;
|
||||
}
|
||||
unsetCmdQueue(true);
|
||||
}
|
||||
|
||||
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue) {
|
||||
|
@ -397,4 +398,15 @@ void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAl
|
|||
inOrderExecDataAllocation = &inOrderDependenciesAllocation;
|
||||
}
|
||||
|
||||
void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) {
|
||||
this->latestUsedCmdQueue = newCmdQ;
|
||||
}
|
||||
|
||||
void Event::unsetCmdQueue(bool unregisterClient) {
|
||||
if (latestUsedCmdQueue && unregisterClient) {
|
||||
latestUsedCmdQueue->unregisterCsrClient();
|
||||
}
|
||||
latestUsedCmdQueue = nullptr;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -37,6 +37,7 @@ struct EventPool;
|
|||
struct MetricStreamer;
|
||||
struct ContextImp;
|
||||
struct Context;
|
||||
struct CommandQueue;
|
||||
struct DriverHandle;
|
||||
struct DriverHandleImp;
|
||||
struct Device;
|
||||
|
@ -214,6 +215,7 @@ struct Event : _ze_event_handle_t {
|
|||
bool isInOrderExecEvent() const { return inOrderExecEvent; }
|
||||
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; }
|
||||
uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
|
||||
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
|
||||
void setReferenceTs(NEO::TimeStampData ×tamp) {
|
||||
referenceTs = timestamp;
|
||||
}
|
||||
|
@ -222,6 +224,8 @@ struct Event : _ze_event_handle_t {
|
|||
protected:
|
||||
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
||||
|
||||
void unsetCmdQueue(bool unregisterClient);
|
||||
|
||||
uint64_t globalStartTS = 1;
|
||||
uint64_t globalEndTS = 1;
|
||||
uint64_t contextStartTS = 1;
|
||||
|
@ -253,6 +257,7 @@ struct Event : _ze_event_handle_t {
|
|||
EventPool *eventPool = nullptr;
|
||||
Kernel *kernelWithPrintf = nullptr;
|
||||
NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr;
|
||||
CommandQueue *latestUsedCmdQueue = nullptr;
|
||||
|
||||
uint32_t maxKernelCount = 0;
|
||||
uint32_t kernelCount = 1u;
|
||||
|
|
|
@ -163,6 +163,7 @@ void EventImp<TagSizeT>::handleSuccessfulHostSynchronization() {
|
|||
}
|
||||
}
|
||||
this->setIsCompleted();
|
||||
unsetCmdQueue(true);
|
||||
for (auto &csr : csrs) {
|
||||
csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
@ -396,6 +397,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
|||
inOrderExecDataAllocation = nullptr;
|
||||
inOrderExecSignalValue = 0;
|
||||
}
|
||||
unsetCmdQueue(false);
|
||||
this->resetCompletionStatus();
|
||||
this->resetDeviceCompletionData(false);
|
||||
this->l3FlushAppliedOnKernel.reset();
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
|
@ -1108,6 +1109,113 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
|
|||
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.numCsrClients, ultCsr->getNumClients());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 3;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
|
||||
ze_event_handle_t event1 = nullptr;
|
||||
ze_event_handle_t event2 = nullptr;
|
||||
ze_event_handle_t event3 = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event1));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event2));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event3));
|
||||
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 0u);
|
||||
|
||||
{
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event1, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
|
||||
Event::fromHandle(event1)->setIsCompleted();
|
||||
|
||||
zeEventHostSynchronize(event1, std::numeric_limits<uint64_t>::max());
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 0u);
|
||||
}
|
||||
|
||||
{
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event2, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
|
||||
*reinterpret_cast<uint32_t *>(Event::fromHandle(event2)->getHostAddress()) = static_cast<uint32_t>(Event::STATE_SIGNALED);
|
||||
|
||||
zeEventHostSynchronize(event2, std::numeric_limits<uint64_t>::max());
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 0u);
|
||||
}
|
||||
|
||||
{
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event3, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
|
||||
zeEventHostReset(event3);
|
||||
|
||||
zeEventHostSynchronize(event3, 1);
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
}
|
||||
|
||||
zeEventDestroy(event1);
|
||||
zeEventDestroy(event2);
|
||||
zeEventDestroy(event3);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnregister) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
|
||||
ze_event_handle_t event = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 0u);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
|
||||
Event::fromHandle(event)->setIsCompleted();
|
||||
|
||||
zeEventHostSynchronize(event, std::numeric_limits<uint64_t>::max());
|
||||
|
||||
EXPECT_EQ(ultCsr->getNumClients(), 1u);
|
||||
|
||||
zeEventDestroy(event);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
|
|
|
@ -245,6 +245,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintTimestampPacketUsage, -1, "-1: default, 0: Disabled, 1: Print when TSP is allocated, initialized, returned to pool, etc.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
|
||||
|
|
|
@ -536,5 +536,6 @@ VfBarResourceAllocationWa = 1
|
|||
EnableDynamicPostSyncAllocLayout = -1
|
||||
ForceNumberOfThreadsInGpgpuThreadGroup = -1
|
||||
PrintTimestampPacketUsage = -1
|
||||
TrackNumCsrClientsOnSyncPoints = -1
|
||||
CommandListTimestampRefreshIntervalInMilliSec = -1
|
||||
# Please don't edit below this line
|
||||
|
|
Loading…
Reference in New Issue