diff --git a/opencl/source/aub_mem_dump/aub_alloc_dump.h b/opencl/source/aub_mem_dump/aub_alloc_dump.h index 74c978fa84..f51e5fbdbc 100644 --- a/opencl/source/aub_mem_dump/aub_alloc_dump.h +++ b/opencl/source/aub_mem_dump/aub_alloc_dump.h @@ -34,7 +34,8 @@ inline bool isWritableBuffer(GraphicsAllocation &gfxAllocation) { gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR || - gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::MAP_ALLOCATION) && + gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::MAP_ALLOCATION || + gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::SVM_GPU) && gfxAllocation.isMemObjectsAllocationWithWritableFlags(); } diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 06ffd68a62..55624afa3b 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -90,6 +90,7 @@ class CommandQueueHw : public CommandQueue { MOCKABLE_VIRTUAL void notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr); MOCKABLE_VIRTUAL void notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr); + MOCKABLE_VIRTUAL void notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr); cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index d772b0bc3e..a0e55c1aed 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -45,6 +45,13 @@ void CommandQueueHw::notifyEnqueueReadImage(Image *image, bool blockingR } } +template +void CommandQueueHw::notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) { + if (DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { + gfxAllocation->setAllocDumpable(blockingCopy, notifyBcsCsr); + } +} + template cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index d1777ab890..c6fafedeca 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -349,6 +349,7 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); + notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcSvmSurf; diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl index dc8066a2c0..70989525e3 100644 --- a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -735,7 +735,7 @@ void AUBCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &g return; } - if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { + if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } diff --git a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl index d04c6e85d4..c33282230f 100644 --- a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl +++ b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl @@ -565,7 +565,7 @@ void TbxCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &g return; } - if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { + if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index 58b69f6965..b0290513bc 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -5,8 +5,10 @@ * */ +#include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" @@ -15,6 +17,7 @@ #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" +#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" using namespace NEO; @@ -302,6 +305,61 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBuffer alignedFree(dstHostPtr); } +HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThenSetAllocDumpable) { + if (!pDevice->isFullRangeSvm()) { + return; + } + + DebugManagerStateRestore dbgRestore; + DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.set(true); + DebugManager.flags.AUBDumpBufferFormat.set("BIN"); + + auto dstHostPtr = alignedMalloc(256, 64); + + EXPECT_FALSE(srcSvmAlloc->isAllocDumpable()); + + auto retVal = pCmdQ->enqueueSVMMemcpy( + CL_TRUE, // cl_bool blocking_copy + dstHostPtr, // void *dst_ptr + srcSvmPtr, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_event *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(srcSvmAlloc->isAllocDumpable()); + + alignedFree(dstHostPtr); +} + +HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThenItCallsNotifyFunction) { + if (!pDevice->isFullRangeSvm()) { + return; + } + + auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); + auto dstHostPtr = alignedMalloc(256, 64); + + auto retVal = mockCmdQ->enqueueSVMMemcpy( + CL_TRUE, // cl_bool blocking_copy + dstHostPtr, // void *dst_ptr + srcSvmPtr, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_event *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled); + + auto &csr = mockCmdQ->getCommandStreamReceiverByCommandType(CL_COMMAND_SVM_MEMCPY); + EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled); + + alignedFree(dstHostPtr); +} + struct EnqueueSvmMemCopyHw : public ::testing::Test { void SetUp() override { diff --git a/opencl/test/unit_test/command_stream/aub_command_stream_receiver_2_tests.cpp b/opencl/test/unit_test/command_stream/aub_command_stream_receiver_2_tests.cpp index 3c3718a40b..966f482f21 100644 --- a/opencl/test/unit_test/command_stream/aub_command_stream_receiver_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/aub_command_stream_receiver_2_tests.cpp @@ -1092,6 +1092,35 @@ HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationDumpableWhenDumpA memoryManager->freeGraphicsMemory(gfxAllocation); } +HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWhenDumpAllocationIsCalledAndAUBDumpAllocsOnEnqueueSVMMemcpyOnlyIsSetThenDumpableFlagShouldBeRespected) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.set(true); + DebugManager.flags.AUBDumpBufferFormat.set("BIN"); + + MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); + aubCsr.setupContext(osContext); + + auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); + + auto memoryManager = pDevice->getMemoryManager(); + auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER, pDevice->getDeviceBitfield()}); + + gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); + gfxAllocation->setAllocDumpable(false, false); + + aubCsr.dumpAllocation(*gfxAllocation); + EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); + EXPECT_FALSE(gfxAllocation->isAllocDumpable()); + + gfxAllocation->setAllocDumpable(true, false); + aubCsr.dumpAllocation(*gfxAllocation); + EXPECT_FALSE(gfxAllocation->isAllocDumpable()); + EXPECT_TRUE(mockHardwareContext->dumpSurfaceCalled); + + memoryManager->freeGraphicsMemory(gfxAllocation); +} + HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWritableWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenPollForCompletionShouldBeCalledBeforeGraphicsAllocationIsDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); diff --git a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp index b25f05b016..6a309077ab 100644 --- a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp +++ b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp @@ -1099,6 +1099,35 @@ HWTEST_F(TbxCommandStreamTests, givenGraphicsAllocationWhenDumpAllocationIsCalle memoryManager->freeGraphicsMemory(gfxAllocation); } +HWTEST_F(TbxCommandStreamTests, givenGraphicsAllocationWhenDumpAllocationIsCalledAndAUBDumpAllocsOnEnqueueSVMMemcpyOnlyIsSetThenDumpableFlagShouldBeRespected) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.set(true); + DebugManager.flags.AUBDumpBufferFormat.set("BIN"); + + MockTbxCsr tbxCsr(*pDevice->executionEnvironment); + MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); + tbxCsr.setupContext(osContext); + + auto mockHardwareContext = static_cast(tbxCsr.hardwareContextController->hardwareContexts[0].get()); + + auto memoryManager = pDevice->getMemoryManager(); + auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER, pDevice->getDeviceBitfield()}); + + gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); + gfxAllocation->setAllocDumpable(false, false); + + tbxCsr.dumpAllocation(*gfxAllocation); + EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); + EXPECT_FALSE(gfxAllocation->isAllocDumpable()); + + gfxAllocation->setAllocDumpable(true, false); + tbxCsr.dumpAllocation(*gfxAllocation); + EXPECT_FALSE(gfxAllocation->isAllocDumpable()); + EXPECT_TRUE(mockHardwareContext->dumpSurfaceCalled); + + memoryManager->freeGraphicsMemory(gfxAllocation); +} + HWTEST_F(TbxCommandStreamTests, givenGraphicsAllocationWhenDumpAllocationIsCalledButUseAubStreamIsSetToFalseThenEarlyReturn) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseAubStream.set(false); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 67a72bc206..21713c3b07 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -277,6 +277,10 @@ class MockCommandQueueHw : public CommandQueueHw { notifyEnqueueReadImageCalled = true; useBcsCsrOnNotifyEnabled = notifyBcsCsr; } + void notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) override { + notifyEnqueueSVMMemcpyCalled = true; + useBcsCsrOnNotifyEnabled = notifyBcsCsr; + } void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; @@ -300,6 +304,7 @@ class MockCommandQueueHw : public CommandQueueHw { bool storeMultiDispatchInfo = false; bool notifyEnqueueReadBufferCalled = false; bool notifyEnqueueReadImageCalled = false; + bool notifyEnqueueSVMMemcpyCalled = false; bool cpuDataTransferHandlerCalled = false; bool useBcsCsrOnNotifyEnabled = false; struct OverrideReturnValue { diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index de1b1e6128..9289c14344 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -24,6 +24,7 @@ FlattenBatchBufferForAUBDump = 0 AddPatchInfoCommentsForAUBDump = 0 UseAubStream = 1 AUBDumpAllocsOnEnqueueReadOnly = 0 +AUBDumpAllocsOnEnqueueSVMMemcpyOnly = 0 AUBDumpForceAllToLocalMemory = 0 ForceDeviceId = unk SchedulerSimulationReturnInstance = 0 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 03023e319c..3c585be27e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -32,6 +32,7 @@ DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-le DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information") DECLARE_DEBUG_VARIABLE(bool, UseAubStream, true, "Use aub_stream for aub dumping") DECLARE_DEBUG_VARIABLE(bool, AUBDumpAllocsOnEnqueueReadOnly, false, "Force dumping buffers and images on clEnqueueReadBuffer/Image only (blocking calls)") +DECLARE_DEBUG_VARIABLE(bool, AUBDumpAllocsOnEnqueueSVMMemcpyOnly, false, "Force dumping allocations on clEnqueueSVMMemcpy only (blocking calls)") DECLARE_DEBUG_VARIABLE(bool, AUBDumpForceAllToLocalMemory, false, "Force placing every allocation in local memory address space") /*DEBUG FLAGS*/