From 19a6c9b1d3a1fec04223904aed5b446cb6d9bb19 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Wed, 6 Oct 2021 15:19:50 +0000 Subject: [PATCH] Track separate task count for each BCS in OpenCL CommandQueue Related-To: NEO-6057 Signed-off-by: Maciej Dziuban --- opencl/source/command_queue/command_queue.cpp | 21 ++++++++++++------- opencl/source/command_queue/command_queue.h | 2 +- .../enqueue_command_without_kernel_tests.cpp | 11 +++++----- .../command_queue/enqueue_kernel_2_tests.cpp | 8 +++---- .../fixtures/dispatch_flags_fixture.h | 14 +++++++++++-- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 10 ++++----- .../test/unit_test/mocks/mock_command_queue.h | 7 ++++++- 7 files changed, 45 insertions(+), 28 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 1b98d8af4d..fa8c22d549 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -87,7 +87,6 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage); bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); - bcsState.engineType = bcsEngineType; } } @@ -630,13 +629,15 @@ cl_uint CommandQueue::getQueueFamilyIndex() const { } void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) { - UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType); - this->bcsState.taskCount = newBcsTaskCount; + CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; + state.engineType = bcsEngineType; + state.taskCount = newBcsTaskCount; } uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const { - UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType); - return this->bcsState.taskCount; + const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; + DEBUG_BREAK_IF(!state.isValid()); + return state.taskCount; } IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { @@ -887,7 +888,6 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage if (isEngineCopyOnly) { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular); - bcsState.engineType = engineType; timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); isCopyOnly = true; @@ -927,8 +927,13 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan deferredTimestampPackets->swapNodes(nodesToRelease); } - Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; - waitUntilComplete(taskCount, states, flushStamp->peekStamp(), false); + StackVec activeBcsStates{}; + for (CopyEngineState &state : this->bcsStates) { + if (state.isValid()) { + activeBcsStates.push_back(state); + } + } + waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false); if (printfHandler) { printfHandler->printEnqueueOutput(); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 57dea695d1..14491dd6fb 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -380,7 +380,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { QueueThrottle throttle = QueueThrottle::MEDIUM; EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::None; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; - CopyEngineState bcsState = {}; + std::array bcsStates = {}; bool perfCountersEnabled = false; diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 39a6966f41..4cb19e34f1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -261,17 +261,17 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrot EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle); } -HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { +HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { using CsrType = MockCsrHw2; DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.EnableTimestampPacket.set(1); + SetUpImpl(); + REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; - mockCmdQ->clearBcsEngines(); - mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; @@ -306,18 +306,17 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); } -HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) { +HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) { using CsrType = MockCsrHw2; DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); SetUpImpl(); + REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; - mockCmdQ->clearBcsEngines(); - mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 96aedfe562..3bddfdfbb1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -932,13 +932,11 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo(); + hwInfo->capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(hwInfo); MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); - cmdQ.clearBcsEngines(); - cmdQ.bcsEngines[0] = cmdQ.gpgpuEngine; - - hwInfo->capabilityTable.blitterOperationsSupported = true; size_t gws[3] = {1, 0, 0}; MockBuffer buffer; @@ -1045,4 +1043,4 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size, extendedCommandStreamSize); -} \ No newline at end of file +} diff --git a/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h b/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h index 720934c0c7..eb0ab156cd 100644 --- a/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h +++ b/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h @@ -11,16 +11,23 @@ #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" +#include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { -struct DispatchFlagsTests : public ::testing::Test { +template +struct DispatchFlagsTestsBase : public ::testing::Test { template void SetUpImpl() { + HardwareInfo hwInfo = *defaultHwInfo; + if (setupBlitter) { + hwInfo.capabilityTable.blitterOperationsSupported = true; + } + environmentWrapper.setCsrType(); - device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); context = std::make_unique(device.get()); } @@ -29,4 +36,7 @@ struct DispatchFlagsTests : public ::testing::Test { std::unique_ptr context; DebugManagerStateRestore restore; }; + +using DispatchFlagsTests = DispatchFlagsTestsBase; +using DispatchFlagsBlitTests = DispatchFlagsTestsBase; } // namespace NEO diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index c0755e2553..b459e68563 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -176,7 +176,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->clearBcsEngines(); - mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; + mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -192,7 +192,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->clearBcsEngines(); - mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; + mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -208,7 +208,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->bcsEngines[0] = bcsEngine; - mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); + mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -225,7 +225,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1); mockCmdQueue->bcsEngines[0] = bcsEngine; - mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); + mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -244,7 +244,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->bcsEngines[0] = bcsEngine; - mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); + mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index ae78dc064a..293cc1beb1 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -215,7 +215,7 @@ class MockCommandQueueHw : public CommandQueueHw { public: using BaseClass::bcsEngines; - using BaseClass::bcsState; + using BaseClass::bcsStates; using BaseClass::blitEnqueueAllowed; using BaseClass::commandQueueProperties; using BaseClass::commandStream; @@ -229,6 +229,11 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::throttle; using BaseClass::timestampPacketContainer; + void clearBcsStates() { + CopyEngineState unusedState{}; + std::fill(bcsStates.begin(), bcsStates.end(), unusedState); + } + MockCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) {