From a42e88d55b343a69c24f771fdeb01c260fd9c1f6 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 11 Aug 2023 14:39:33 +0000 Subject: [PATCH] fix: stall RelaxedOrdering scheduler when programming OCL dependencies Signed-off-by: Dunajski, Bartosz --- opencl/source/command_queue/enqueue_common.h | 3 +- .../command_queue/enqueue_kernel_2_tests.cpp | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index c768e869ca..12cd8c90b4 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -865,6 +865,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired); + bool hasStallingCmds = !relaxedOrderingEnabled && (eventsRequest.numEventsInWaitList > 0 || timestampPacketDependencies.previousEnqueueNodes.peekNodes().size() > 0); DispatchFlags dispatchFlags( {}, // csrDependencies @@ -895,7 +896,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext kernel->requiresMemoryMigration(), // memoryMigrationRequired isTextureCacheFlushNeeded(commandType), // textureCacheFlush - false, // hasStallingCmds + hasStallingCmds, // hasStallingCmds relaxedOrderingEnabled, // hasRelaxedOrderingDependencies false, // stateCacheInvalidation isStallingCommandsOnNextFlushRequired(), // isStallingCommandsOnNextFlushRequired diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 3d1e6abdf7..15772750a4 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -1208,6 +1208,55 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenEnqueueKernelWhenProgrammingDe clReleaseEvent(outEvent); } +HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenRelaxedOrderingDisabledWhenDispatchingWithDependencyThenMarkAsStallingCmd, IsAtLeastXeHpcCore) { + auto &ultCsr = pDevice->getUltCommandStreamReceiver(); + + MockKernelWithInternals mockKernel(*pClDevice); + + { + MockCommandQueueHw ioq{context, pClDevice, nullptr}; + + ioq.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + + // IOQ without dependency + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasStallingCmds); + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies); + + ioq.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + + // IOQ with implicit dependency + EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasStallingCmds); + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies); + } + + { + MockCommandQueueHw ooq{context, pClDevice, nullptr}; + ooq.setOoqEnabled(); + + cl_event event; + + ooq.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + + // OOQ without dependency + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasStallingCmds); + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies); + + ooq.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); + + // OOQ without implicit dependency + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasStallingCmds); + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies); + + ooq.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &event, nullptr); + + // OOQ with explicit dependency + EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasStallingCmds); + EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies); + + clReleaseEvent(event); + } +} + HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenBarrierWithDependenciesWhenFlushingThenAllowForRelaxedOrdering, IsAtLeastXeHpcCore) { using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;