From 8523747f910bd974128d95ecaa412447666180fd Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Fri, 18 Jun 2021 10:35:54 +0000 Subject: [PATCH] Clear Event dependencies for Subcapture Signed-off-by: Bartosz Dunajski --- .../source/command_queue/command_queue_hw.h | 1 + opencl/source/command_queue/enqueue_common.h | 15 ++++-- .../command_queue/enqueue_handler_tests.cpp | 49 +++++++++++++++++++ opencl/test/unit_test/mocks/mock_aub_csr.h | 11 +++++ 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 4b1984401d..6688ee56a7 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -374,6 +374,7 @@ class CommandQueueHw : public CommandQueue { LinearStream &commandStream, size_t commandStreamStart, bool &blocking, + bool clearDependenciesForSubCapture, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 4d42b2c747..60515f97cb 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -162,7 +162,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); auto devQueue = this->getContext().getDefaultDeviceQueue(); DeviceQueueHw *devQueueHw = castToObject>(devQueue); - auto clearAllDependencies = queueDependenciesClearRequired(); TagNodeBase *hwTimeStamps = nullptr; @@ -188,7 +187,10 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, enqueueHandlerHook(commandType, multiDispatchInfo); - aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo); + bool clearDependenciesForSubCapture = false; + aubCaptureHook(blocking, clearDependenciesForSubCapture, multiDispatchInfo); + + bool clearAllDependencies = (queueDependenciesClearRequired() || clearDependenciesForSubCapture); if (DebugManager.flags.MakeEachEnqueueBlocking.get()) { blocking = true; @@ -206,7 +208,10 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); + if (!clearDependenciesForSubCapture) { + eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); + } + auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); size_t nodesCount = 0u; @@ -298,6 +303,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, commandStream, commandStreamStart, blocking, + clearDependenciesForSubCapture, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, @@ -708,6 +714,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( LinearStream &commandStream, size_t commandStreamStart, bool &blocking, + bool clearDependenciesForSubCapture, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, @@ -855,7 +862,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode; - if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index e08e5e53e0..d4d4dadfd8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -25,7 +25,9 @@ #include "opencl/test/unit_test/mocks/mock_internal_allocation_storage.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" +#include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" +#include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test.h" @@ -176,6 +178,53 @@ HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCapt EXPECT_FALSE(cmdQ.timestampPacketDependenciesCleared); } +HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenInputEventsWhenDispatchingEnqueueWithSubCaptureThenClearDependencies) { + DebugManagerStateRestore stateRestore; + DebugManager.flags.AUBDumpSubCaptureMode.set(1); + DebugManager.flags.EnableTimestampPacket.set(true); + + auto defaultEngine = defaultHwInfo->capabilityTable.defaultEngineType; + + MockOsContext mockOsContext(0, 1, EngineTypeUsage{defaultEngine, EngineUsage::Regular}, PreemptionMode::Disabled, false); + + auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + + aubCsr->setupContext(mockOsContext); + aubCsr2->setupContext(mockOsContext); + + pDevice->resetCommandStreamReceiver(aubCsr); + + AubSubCaptureCommon subCaptureCommon; + subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; + subCaptureCommon.subCaptureFilter.dumpKernelName = ""; + subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 0; + subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 1; + auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); + aubCsr->subCaptureManager.reset(subCaptureManagerMock); + + MockCmdQWithAubSubCapture cmdQ(context, pClDevice); + MockKernelWithInternals mockKernel(*pClDevice); + mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernelName"; + size_t gws[3] = {1, 0, 0}; + + MockTimestampPacketContainer onCsrTimestamp(*aubCsr->getTimestampPacketAllocator(), 1); + MockTimestampPacketContainer outOfCsrTimestamp(*aubCsr2->getTimestampPacketAllocator(), 1); + + Event event1(&cmdQ, 0, 0, 0); + Event event2(&cmdQ, 0, 0, 0); + event1.addTimestampPacketNodes(onCsrTimestamp); + event1.addTimestampPacketNodes(outOfCsrTimestamp); + + cl_event waitlist[] = {&event1, &event2}; + + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); + EXPECT_TRUE(cmdQ.timestampPacketDependenciesCleared); + + CsrDependencies &outOfCsrDeps = aubCsr->recordedDispatchFlags.csrDependencies; + EXPECT_EQ(0u, outOfCsrDeps.timestampPacketContainer.size()); +} + template class MyCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; diff --git a/opencl/test/unit_test/mocks/mock_aub_csr.h b/opencl/test/unit_test/mocks/mock_aub_csr.h index 595a99968f..6650977a4c 100644 --- a/opencl/test/unit_test/mocks/mock_aub_csr.h +++ b/opencl/test/unit_test/mocks/mock_aub_csr.h @@ -12,6 +12,7 @@ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/default_hw_info.h" +#include "shared/test/common/helpers/dispatch_flags_helper.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/platform/platform.h" @@ -61,6 +62,14 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::writeMemory; using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; + CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, + uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + recordedDispatchFlags = dispatchFlags; + + return AUBCommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + DispatchMode peekDispatchMode() const { return this->dispatchMode; } @@ -132,6 +141,8 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw { bool isMultiOsContextCapable() const override { return multiOsContextCapable; } + + DispatchFlags recordedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); bool multiOsContextCapable = false; bool flushBatchedSubmissionsCalled = false; bool initProgrammingFlagsCalled = false;