mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
fix: unregister CSR client on OCL sync points
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
998e0a5833
commit
3cf1f5c462
@@ -1293,7 +1293,7 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri
|
||||
|
||||
waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
|
||||
|
||||
tryReleaseDeferredNodes(false);
|
||||
handlePostCompletionOperations(false);
|
||||
|
||||
if (printfHandler) {
|
||||
if (!printfHandler->printEnqueueOutput()) {
|
||||
@@ -1381,13 +1381,15 @@ bool CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpPara
|
||||
return migrationHandled;
|
||||
}
|
||||
|
||||
void CommandQueue::tryReleaseDeferredNodes(bool checkEventsState) {
|
||||
void CommandQueue::handlePostCompletionOperations(bool checkQueueCompletion) {
|
||||
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
|
||||
|
||||
if (checkEventsState && !isCompleted(this->taskCount, this->bcsStates)) {
|
||||
if (checkQueueCompletion && !isCompleted(this->taskCount, this->bcsStates)) {
|
||||
return;
|
||||
}
|
||||
|
||||
unregisterGpgpuAndBcsCsrClients();
|
||||
|
||||
TimestampPacketContainer nodesToRelease;
|
||||
if (deferredTimestampPackets) {
|
||||
deferredTimestampPackets->swapNodes(nodesToRelease);
|
||||
@@ -1398,4 +1400,51 @@ void CommandQueue::tryReleaseDeferredNodes(bool checkEventsState) {
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::registerGpgpuCsrClient() {
|
||||
if (!gpgpuCsrClientRegistered) {
|
||||
gpgpuCsrClientRegistered = true;
|
||||
|
||||
getGpgpuCommandStreamReceiver().registerClient();
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::registerBcsCsrClient(CommandStreamReceiver &bcsCsr) {
|
||||
auto engineType = bcsCsr.getOsContext().getEngineType();
|
||||
|
||||
auto &bcsState = bcsStates[EngineHelpers::getBcsIndex(engineType)];
|
||||
|
||||
if (!bcsState.csrClientRegistered) {
|
||||
bcsState.csrClientRegistered = true;
|
||||
bcsCsr.registerClient();
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::unregisterGpgpuCsrClient() {
|
||||
if (gpgpuCsrClientRegistered) {
|
||||
gpgpuEngine->commandStreamReceiver->unregisterClient();
|
||||
gpgpuCsrClientRegistered = false;
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::unregisterBcsCsrClient(CommandStreamReceiver &bcsCsr) {
|
||||
auto engineType = bcsCsr.getOsContext().getEngineType();
|
||||
|
||||
auto &bcsState = bcsStates[EngineHelpers::getBcsIndex(engineType)];
|
||||
|
||||
if (bcsState.isValid() && bcsState.csrClientRegistered) {
|
||||
bcsCsr.unregisterClient();
|
||||
bcsState.csrClientRegistered = false;
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::unregisterGpgpuAndBcsCsrClients() {
|
||||
unregisterGpgpuCsrClient();
|
||||
|
||||
for (auto &engine : this->bcsEngines) {
|
||||
if (engine) {
|
||||
unregisterBcsCsrClient(*engine->commandStreamReceiver);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -377,7 +377,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
const std::array<CopyEngineState, bcsInfoMaskSize> &peekActiveBcsStates() const { return bcsStates; }
|
||||
|
||||
void tryReleaseDeferredNodes(bool checkEventsState);
|
||||
void handlePostCompletionOperations(bool checkQueueCompletion);
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
@@ -414,6 +414,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||
void assignDataToOverwrittenBcsNode(TagNodeBase *node);
|
||||
|
||||
void registerGpgpuCsrClient();
|
||||
void registerBcsCsrClient(CommandStreamReceiver &bcsCsr);
|
||||
|
||||
void unregisterGpgpuCsrClient();
|
||||
void unregisterBcsCsrClient(CommandStreamReceiver &bcsCsr);
|
||||
|
||||
void unregisterGpgpuAndBcsCsrClients();
|
||||
|
||||
Context *context = nullptr;
|
||||
ClDevice *device = nullptr;
|
||||
mutable EngineControl *gpgpuEngine = nullptr;
|
||||
@@ -463,6 +471,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
bool stallingCommandsOnNextFlushRequired = false;
|
||||
bool dcFlushRequiredOnStallingCommandsOnNextFlush = false;
|
||||
bool splitBarrierRequired = false;
|
||||
bool gpgpuCsrClientRegistered = false;
|
||||
};
|
||||
|
||||
template <typename PtrType>
|
||||
|
||||
@@ -521,10 +521,5 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
|
||||
bool relaxedOrderingForGpgpuAllowed(uint32_t numWaitEvents) const;
|
||||
|
||||
void registerGpgpuCsrClient();
|
||||
void registerBcsCsrClient(CommandStreamReceiver &bcsCsr);
|
||||
|
||||
bool gpgpuCsrClientRegistered = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -245,37 +245,8 @@ void CommandQueueHw<Family>::setupEvent(EventBuilder &eventBuilder, cl_event *ou
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::registerGpgpuCsrClient() {
|
||||
if (!gpgpuCsrClientRegistered) {
|
||||
gpgpuCsrClientRegistered = true;
|
||||
|
||||
getGpgpuCommandStreamReceiver().registerClient();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::registerBcsCsrClient(CommandStreamReceiver &bcsCsr) {
|
||||
auto engineType = bcsCsr.getOsContext().getEngineType();
|
||||
|
||||
auto &bcsState = bcsStates[EngineHelpers::getBcsIndex(engineType)];
|
||||
|
||||
if (!bcsState.csrClientRegistered) {
|
||||
bcsState.csrClientRegistered = true;
|
||||
bcsCsr.registerClient();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
CommandQueueHw<Family>::~CommandQueueHw() {
|
||||
if (gpgpuCsrClientRegistered) {
|
||||
gpgpuEngine->commandStreamReceiver->unregisterClient();
|
||||
}
|
||||
|
||||
for (auto ©Engine : bcsStates) {
|
||||
if (copyEngine.isValid() && copyEngine.csrClientRegistered) {
|
||||
bcsEngines[EngineHelpers::getBcsIndex(copyEngine.engineType)]->commandStreamReceiver->unregisterClient();
|
||||
}
|
||||
}
|
||||
unregisterGpgpuAndBcsCsrClients();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -454,7 +454,7 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
||||
|
||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||
|
||||
cmdQueue->tryReleaseDeferredNodes(true);
|
||||
cmdQueue->handlePostCompletionOperations(true);
|
||||
|
||||
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
||||
|
||||
@@ -1099,6 +1099,47 @@ HWTEST_F(CommandQueueHwTest, givenCommandQueueWhenDispatchingWorkThenRegisterCsr
|
||||
EXPECT_EQ(baseNumClients, csr.getNumClients());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenCsrClientWhenCallingSyncPointsThenUnregister) {
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
size_t gws = 1;
|
||||
|
||||
auto baseNumClients = csr.getNumClients();
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, mockCmdQueueHw.enqueueKernel(mockKernel, 1, nullptr, &gws, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
EXPECT_EQ(baseNumClients + 1, csr.getNumClients());
|
||||
|
||||
mockCmdQueueHw.finish();
|
||||
|
||||
EXPECT_EQ(baseNumClients, csr.getNumClients()); // queue synchronized
|
||||
|
||||
cl_event e0, e1;
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, mockCmdQueueHw.enqueueKernel(mockKernel, 1, nullptr, &gws, nullptr, 0, nullptr, &e0));
|
||||
EXPECT_EQ(baseNumClients + 1, csr.getNumClients());
|
||||
*csr.tagAddress = mockCmdQueueHw.taskCount;
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, mockCmdQueueHw.enqueueKernel(mockKernel, 1, nullptr, &gws, nullptr, 0, nullptr, &e1));
|
||||
EXPECT_EQ(baseNumClients + 1, csr.getNumClients());
|
||||
|
||||
clWaitForEvents(1, &e0);
|
||||
EXPECT_EQ(baseNumClients + 1, csr.getNumClients()); // CSR task count < queue task count
|
||||
|
||||
*csr.tagAddress = mockCmdQueueHw.taskCount;
|
||||
|
||||
clWaitForEvents(1, &e0);
|
||||
EXPECT_EQ(baseNumClients, csr.getNumClients()); // queue ready
|
||||
|
||||
clReleaseEvent(e0);
|
||||
clReleaseEvent(e1);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
|
||||
UserEvent userEvent(context);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
@@ -39,7 +39,7 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC
|
||||
EngineControl engineControl0{&tbxCsr0, &osContext0};
|
||||
|
||||
MockOsContext osContext1(1, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield()));
|
||||
tbxCsr1.setupContext(osContext0);
|
||||
tbxCsr1.setupContext(osContext1);
|
||||
EngineControl engineControl1{&tbxCsr1, &osContext1};
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(&context, pClDevice, nullptr);
|
||||
@@ -47,6 +47,8 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC
|
||||
cmdQ.clearBcsEngines();
|
||||
cmdQ.bcsEngines[0] = &engineControl1;
|
||||
|
||||
cmdQ.bcsStates[0] = {aub_stream::ENGINE_BCS, 0, false};
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&context, 0, 1, nullptr, error));
|
||||
|
||||
|
||||
@@ -609,6 +609,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenAllBcsEnginesReadyWhenWaitingForEventThe
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
|
||||
commandQueue.reset();
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) {
|
||||
|
||||
@@ -272,6 +272,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
using BaseClass::relaxedOrderingForGpgpuAllowed;
|
||||
using BaseClass::requiresCacheFlushAfterWalker;
|
||||
using BaseClass::splitBarrierRequired;
|
||||
using BaseClass::taskCount;
|
||||
using BaseClass::throttle;
|
||||
using BaseClass::timestampPacketContainer;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user