From c04f8e5e5bc797586ca78593c46ccf14a7eee324 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Thu, 9 Sep 2021 16:57:09 +0000 Subject: [PATCH] Pass copy engines to waitUntilComplete in OpenCL command queue Related-To: NEO-6057 Signed-off-by: Maciej Dziuban --- opencl/source/command_queue/command_queue.cpp | 13 +++++++------ opencl/source/command_queue/command_queue.h | 3 ++- opencl/source/command_queue/enqueue_common.h | 2 +- opencl/source/event/event.cpp | 3 ++- opencl/source/helpers/task_information.cpp | 9 ++++----- .../command_queue/blit_enqueue_tests.cpp | 3 ++- .../command_queue/command_queue_tests.cpp | 2 +- .../command_queue/enqueue_handler_tests.cpp | 4 ++-- .../command_queue/enqueue_kernel_2_tests.cpp | 4 ++-- .../command_queue/enqueue_read_image_tests.cpp | 2 +- .../unit_test/helpers/kmd_notify_tests.cpp | 18 +++++++++--------- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 5 +++++ .../test/unit_test/mocks/mock_command_queue.h | 8 ++++---- shared/source/utilities/range.h | 8 ++++++-- 14 files changed, 48 insertions(+), 36 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 00cfd4d85f..57227391c7 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -188,7 +188,7 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState return false; } -void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { +void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { WAIT_ENTER() DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait); @@ -206,10 +206,10 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs gtpinNotifyTaskCompletion(gpgpuTaskCountToWait); } - if (bcsEngine) { - auto bcsCsr = getBcsCommandStreamReceiver(bcsEngine->getEngineType()); - bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false); - bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait); + for (const CopyEngineState ©Engine : copyEnginesToWait) { + auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType); + bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false); + bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount); } getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait); @@ -919,7 +919,8 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan deferredTimestampPackets->swapNodes(nodesToRelease); } - waitUntilComplete(taskCount, this->bcsState.taskCount, flushStamp->peekStamp(), false); + Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; + waitUntilComplete(taskCount, states, flushStamp->peekStamp(), false); if (printfHandler) { printfHandler->printEnqueueOutput(); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index f971b63dce..d037eedd12 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/engine_control.h" +#include "shared/source/utilities/range.h" #include "opencl/source/command_queue/copy_engine_state.h" #include "opencl/source/command_queue/csr_selection_args.h" @@ -216,7 +217,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool isQueueBlocked(); - MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); + MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler); static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 8c6114931d..649595547a 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -331,7 +331,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true); if (devQueueHw->getSchedulerReturnInstance() > 0) { - waitUntilComplete(completionStamp.taskCount, this->bcsState.taskCount, completionStamp.flushStamp, false); + waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); this->runSchedulerSimulation(*devQueueHw, *parentKernel); } } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 8487fb5dde..7a67b5f007 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -408,7 +408,8 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) { } } - cmdQueue->waitUntilComplete(taskCount.load(), this->bcsState.taskCount, flushStamp->peekStamp(), useQuickKmdSleep); + Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; + cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep); updateExecutionStatus(); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 5fde7a8e84..a1d90a8868 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -97,7 +97,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::DependencyResolveOnGpu); if (!memObj.isMemObjZeroCopy()) { - commandQueue.waitUntilComplete(completionStamp.taskCount, 0u, completionStamp.flushStamp, false); + commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); if (operationType == MAP) { memObj.transferDataToHostPtr(copySize, copyOffset); } else if (!readOnly) { @@ -287,10 +287,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate dispatchFlags, commandQueue.getDevice()); - uint32_t bcsTaskCount = 0u; if (kernelOperation->blitPropertiesContainer.size() > 0) { - bcsTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); - commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), bcsTaskCount); + const auto newTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); + commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount); } commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel); @@ -299,7 +298,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate } if (printfHandler) { - commandQueue.waitUntilComplete(completionStamp.taskCount, bcsTaskCount, completionStamp.flushStamp, false); + commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); printfHandler.get()->printEnqueueOutput(); } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 410e40967f..d844551bcf 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1275,7 +1275,8 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW uint32_t gpgpuTaskCount = 123; uint32_t bcsTaskCount = 123; - commandQueue->waitUntilComplete(gpgpuTaskCount, bcsTaskCount, 0, false); + CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount}; + commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false); EXPECT_EQ(gpgpuTaskCount, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(bcsTaskCount, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 9e60bcf64f..16d1cf182f 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -807,7 +807,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index a44d24c411..338f4dd470 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -109,9 +109,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitUntilCompleteCalled = true; - CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); + CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 2732b49a09..6337a3dfb8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -801,9 +801,9 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest { auxTranslationDirection); } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitCalled++; - CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); + CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } std::vector auxTranslationDirections; diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 018a97ca99..94bd428bfa 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -762,13 +762,13 @@ HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSam HWTEST_F(EnqueueReadImageTest, givenDeviceWithBlitterSupportWhenEnqueueReadImageThenBlitEnqueueImageAllowedReturnsCorrectResult) { DebugManagerStateRestore restorer; - DebugManager.flags.OverrideInvalidEngineWithDefault.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); hwInfo->capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(hwInfo); size_t origin[] = {0, 0, 0}; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr image(Image2dHelper<>::create(context)); diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp index 774b744ac7..3c49ab6730 100644 --- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp +++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp @@ -99,7 +99,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { @@ -109,7 +109,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompleti EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) { @@ -122,7 +122,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false)); //we have unrecoverable for this case, this will throw. - EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false), std::exception); + EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false), std::exception); } HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) { @@ -132,7 +132,7 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) { @@ -141,7 +141,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDi EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) { @@ -150,7 +150,7 @@ HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThen EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) { @@ -160,7 +160,7 @@ HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSl EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) { @@ -214,7 +214,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; - cmdQ->waitUntilComplete(1, 0, 1, false); + cmdQ->waitUntilComplete(1, {}, 1, false); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) { @@ -223,7 +223,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; - cmdQ->waitUntilComplete(1, 0, 0, false); + cmdQ->waitUntilComplete(1, {}, 0, false); } HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) { diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 013f5d3f9c..e7b56d9bdf 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -176,6 +176,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->bcsEngine = nullptr; + mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -191,6 +192,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->bcsEngine = nullptr; + mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -206,6 +208,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -222,6 +225,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1); mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); @@ -240,6 +244,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index ee0bd790eb..7e9a63a1a6 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -70,9 +70,9 @@ class MockCommandQueue : public CommandQueue { return writeBufferRetValue; } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; - return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); + return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin, @@ -300,9 +300,9 @@ class MockCommandQueueHw : public CommandQueueHw { useBcsCsrOnNotifyEnabled = notifyBcsCsr; } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; - return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); + return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } bool isCacheFlushForBcsRequired() const override { diff --git a/shared/source/utilities/range.h b/shared/source/utilities/range.h index 6295a3e258..5da0af304f 100644 --- a/shared/source/utilities/range.h +++ b/shared/source/utilities/range.h @@ -18,7 +18,11 @@ struct Range { using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; - Range(DataType *base, size_t count) + Range() + : begIt(nullptr), endIt(nullptr) { + } + + explicit Range(DataType *base, size_t count = 1) : begIt(base), endIt(base + count) { } @@ -80,4 +84,4 @@ template inline Range CreateRange(T *base, size_t count) { return Range(base, count); } -} // namespace NEO \ No newline at end of file +} // namespace NEO