diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index f0e55eb99d..8ddbdecb12 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -547,10 +547,16 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) { getGpgpuCommandStreamReceiver().releaseIndirectHeap(heapType); } -void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) { - auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); +void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, bool blitEnqueue) { + auto allocator = blitEnqueue ? getBcsCommandStreamReceiver()->getTimestampPacketAllocator() + : getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); previousNodes.swapNodes(*timestampPacketContainer); + + if ((previousNodes.peekNodes().size() > 0) && (previousNodes.peekNodes()[0]->getAllocator() != allocator)) { + clearAllDependencies = false; + } + previousNodes.resolveDependencies(clearAllDependencies); DEBUG_BREAK_IF(timestampPacketContainer->peekNodes().size() > 0); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 17c19293ae..908bebcbcf 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -330,7 +330,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const; - MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies); + MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, bool blitEnqueue); void storeProperties(const cl_queue_properties *properties); void processProperties(const cl_queue_properties *properties); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 0b8d432c77..8845fc5d11 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -225,7 +225,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (nodesCount > 0) { - obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies); + obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, blitEnqueue); csrDeps.push_back(×tampPacketDependencies.previousEnqueueNodes); } } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index fe583657cf..a744d84117 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1345,4 +1345,29 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushR } } +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenDontClearDependencies) { + auto mockCommandQueue = static_cast *>(commandQueue.get()); + const bool clearDependencies = true; + const bool blitEnqueue = true; + const bool nonBlitEnqueue = false; + + { + TimestampPacketContainer previousNodes; + mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, nonBlitEnqueue); // init + EXPECT_EQ(0u, previousNodes.peekNodes().size()); + } + + { + TimestampPacketContainer previousNodes; + mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, blitEnqueue); + EXPECT_EQ(1u, previousNodes.peekNodes().size()); + } + + { + TimestampPacketContainer previousNodes; + mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, blitEnqueue); + EXPECT_EQ(0u, previousNodes.peekNodes().size()); + } +} + } // namespace NEO diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 3463a2c873..58c84a3f7f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -250,7 +250,7 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); - mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); + mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, true); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, @@ -292,7 +292,7 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); - mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); + mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, true); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 2be1f3065e..f2adc9a348 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -110,9 +110,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); } - void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) override { + void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, bool blitEnqueue) override { timestampPacketDependenciesCleared = clearAllDependencies; - CommandQueueHw::obtainNewTimestampPacketNodes(numberOfNodes, previousNodes, clearAllDependencies); + CommandQueueHw::obtainNewTimestampPacketNodes(numberOfNodes, previousNodes, clearAllDependencies, blitEnqueue); } bool waitUntilCompleteCalled = false; diff --git a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp index 8c6a8f2a17..b66918b4ad 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp @@ -1280,7 +1280,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingNonBlockedT auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer previousNodes; - cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false); + cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false, false); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; @@ -1302,7 +1302,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThen auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); TimestampPacketContainer previousNodes; - cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false); + cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false, false); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; @@ -1325,7 +1325,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKee MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; - cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); + cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, false); auto firstNode = cmdQ.timestampPacketContainer->peekNodes().at(0); setTagToReadyState(firstNode); @@ -1357,7 +1357,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDep MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; - cmdQ.obtainNewTimestampPacketNodes(2, previousNodes, false); + cmdQ.obtainNewTimestampPacketNodes(2, previousNodes, false, false); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(0)); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(1)); auto firstTag0 = firstNode.getNode(0); @@ -1393,7 +1393,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDo cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueueHw cmdQ(context, device.get(), properties); TimestampPacketContainer previousNodes; - cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); + cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, false); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); @@ -1424,7 +1424,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTim MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; - cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); + cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, false); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); @@ -1580,7 +1580,7 @@ TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObta EXPECT_EQ(0u, mockCmdQ->timestampPacketContainer->peekNodes().size()); TimestampPacketContainer previousNodes; - mockCmdQ->obtainNewTimestampPacketNodes(dispatchSize, previousNodes, false); + mockCmdQ->obtainNewTimestampPacketNodes(dispatchSize, previousNodes, false, false); EXPECT_EQ(dispatchSize, mockCmdQ->timestampPacketContainer->peekNodes().size()); } diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index 7c86d0de7f..6f9e16ae7e 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -1045,7 +1045,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMake bufferForBlt->forceDisallowCPUCopy = true; TimestampPacketContainer previousTimestampPackets; - mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPackets, false); + mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPackets, false, true); auto dependencyFromPreviousEnqueue = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto event = make_releaseable(mockCmdQ, CL_COMMAND_READ_BUFFER, 0, 0); @@ -1465,12 +1465,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCal auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; - auto &cmdQueueCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); - auto memoryManager = cmdQueueCsr.getMemoryManager(); - cmdQueueCsr.timestampPacketAllocator = std::make_unique>(device->getRootDeviceIndex(), memoryManager, 1, - MemoryConstants::cacheLineSize, - sizeof(TimestampPacketStorage), - false, device->getDeviceBitfield()); + auto memoryManager = bcsCsr->getMemoryManager(); + bcsCsr->timestampPacketAllocator = std::make_unique>(device->getRootDeviceIndex(), memoryManager, 1, + MemoryConstants::cacheLineSize, + sizeof(TimestampPacketStorage), + false, device->getDeviceBitfield()); auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; @@ -3129,4 +3128,4 @@ TEST_F(MultiRootDeviceBufferTest, givenNullptrGraphicsAllocationForRootDeviceInd address = buffer->getBufferAddress(0); EXPECT_EQ(reinterpret_cast(buffer->getHostPtr()), address); -} \ No newline at end of file +} diff --git a/shared/source/utilities/tag_allocator.h b/shared/source/utilities/tag_allocator.h index 4b6bc16396..f1471ea831 100644 --- a/shared/source/utilities/tag_allocator.h +++ b/shared/source/utilities/tag_allocator.h @@ -55,6 +55,8 @@ struct TagNode : public IDNode>, NonCopyableOrMovableClass { uint32_t getImplicitCpuDependenciesCount() const { return implicitCpuDependenciesCount.load(); } + const TagAllocator *getAllocator() const { return allocator; } + protected: TagAllocator *allocator = nullptr; GraphicsAllocation *gfxAllocation = nullptr;