From 107f07eb083c38440d236e47e9521d6e5e2b71e9 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Fri, 26 Jun 2020 11:21:07 +0200 Subject: [PATCH] Optimize BCS flushing scheme [3/n] Change-Id: I806d642c869bccfe40a1eb0c58b6a2f53e071cd8 Signed-off-by: Bartosz Dunajski --- opencl/source/command_queue/command_queue.cpp | 24 ++-- opencl/source/command_queue/command_queue.h | 5 +- .../source/command_queue/command_queue_hw.h | 2 + .../command_queue/command_queue_hw_base.inl | 14 ++ opencl/source/command_queue/enqueue_common.h | 6 +- opencl/source/event/event.cpp | 2 +- opencl/source/event/event.h | 2 +- opencl/source/helpers/task_information.cpp | 9 ++ .../command_queue/blit_enqueue_tests.cpp | 121 +++++++++++++++++- .../enqueue_command_without_kernel_tests.cpp | 2 + .../test/unit_test/mem_obj/buffer_tests.cpp | 1 + 11 files changed, 166 insertions(+), 22 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 13e1090603..f0e55eb99d 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -141,10 +141,19 @@ volatile uint32_t *CommandQueue::getHwTagAddress() const { return getGpgpuCommandStreamReceiver().getTagAddress(); } -bool CommandQueue::isCompleted(uint32_t taskCount) const { - uint32_t tag = getHwTag(); - DEBUG_BREAK_IF(tag == CompletionStamp::notReady); - return tag >= taskCount; +bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) const { + uint32_t gpgpuHwTag = getHwTag(); + DEBUG_BREAK_IF(gpgpuHwTag == CompletionStamp::notReady); + + if (gpgpuHwTag >= gpgpuTaskCount) { + if (auto bcsCsr = getBcsCommandStreamReceiver()) { + return (*bcsCsr->getTagAddress()) >= bcsTaskCount; + } + + return true; + } + + return false; } void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { @@ -687,11 +696,4 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co } } -bool CommandQueue::isGpgpuSubmissionForBcsRequired() const { - if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 0) { - return (latestSentEnqueueType != EnqueueProperties::Operation::Blit) && (latestSentEnqueueType != EnqueueProperties::Operation::None); - } - - return true; -} } // namespace NEO diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index a27f7b6d93..17c19293ae 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -211,7 +211,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { volatile uint32_t *getHwTagAddress() const; - bool isCompleted(uint32_t taskCount) const; + bool isCompleted(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) const; MOCKABLE_VIRTUAL bool isQueueBlocked(); @@ -301,6 +301,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; } uint32_t peekBcsTaskCount() const { return bcsTaskCount; } + void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; } + // taskCount of last task uint32_t taskCount = 0; @@ -338,7 +340,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool blitEnqueueAllowed(cl_command_type cmdType) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; - bool isGpgpuSubmissionForBcsRequired() const; Context *context = nullptr; ClDevice *device = nullptr; diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index ca093c904e..d054fe7a78 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -474,5 +474,7 @@ class CommandQueueHw : public CommandQueue { CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies); + + bool isGpgpuSubmissionForBcsRequired(bool queueBlocked) const; }; } // namespace NEO diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 478e5c6033..4c039600bc 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -151,4 +151,18 @@ bool CommandQueueHw::obtainTimestampPacketForCacheFlush(bool isCacheFlus return isCacheFlushRequired; } +template +bool CommandQueueHw::isGpgpuSubmissionForBcsRequired(bool queueBlocked) const { + if (queueBlocked) { + return true; + } + + bool required = isCacheFlushForBcsRequired() && (latestSentEnqueueType != EnqueueProperties::Operation::Blit) && (latestSentEnqueueType != EnqueueProperties::Operation::None); + + if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) { + required = true; + } + + return required; +} } // namespace NEO diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index a1dbf91ed1..0b8d432c77 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -215,7 +215,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (isCacheFlushForBcsRequired()) { // Cache flush for aux translation is always required (if supported) - if ((blitEnqueue && isGpgpuSubmissionForBcsRequired()) || (enqueueWithBlitAuxTranslation)) { + if ((blitEnqueue && isGpgpuSubmissionForBcsRequired(blockQueue)) || (enqueueWithBlitAuxTranslation)) { timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag()); } } @@ -490,7 +490,7 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(const Mu auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); blitProperties.outputTimestampPacket = currentTimestampPacketNode; - if (isGpgpuSubmissionForBcsRequired()) { + if (isGpgpuSubmissionForBcsRequired(queueBlocked)) { if (isCacheFlushForBcsRequired()) { auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]); PipeControlArgs args(true); @@ -956,7 +956,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()}; bool flushGpgpuCsr = true; - if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && !isGpgpuSubmissionForBcsRequired()) { + if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && !isGpgpuSubmissionForBcsRequired(false)) { flushGpgpuCsr = false; } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index a43cd11d44..ab9709e77f 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -408,7 +408,7 @@ void Event::updateExecutionStatus() { // Note : Intentional fallthrough (no return) to check for CL_COMPLETE } - if ((cmdQueue != nullptr) && (cmdQueue->isCompleted(getCompletionStamp()))) { + if ((cmdQueue != nullptr) && (cmdQueue->isCompleted(getCompletionStamp(), this->bcsTaskCount))) { transitionExecutionStatus(CL_COMPLETE); executeCallbacks(CL_COMPLETE); unblockEventsBlockedByThis(CL_COMPLETE); diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index 0593b65c25..a332e878c6 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -88,7 +88,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { ~Event() override; - uint32_t getCompletionStamp(void) const; + uint32_t getCompletionStamp() const; void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp); cl_ulong getDelta(cl_ulong startTime, cl_ulong endTime); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 927f7a58eb..df768a3dec 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -87,6 +87,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { dispatchFlags, commandQueue.getDevice()); + commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::DependencyResolveOnGpu); + if (!memObj.isMemObjZeroCopy()) { commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false); if (operationType == MAP) { @@ -262,6 +264,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate auto bcsTaskCount = commandQueue.getBcsCommandStreamReceiver()->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled()); commandQueue.updateBcsTaskCount(bcsTaskCount); } + commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel); if (gtpinIsGTPinInitialized()) { gtpinNotifyFlushTask(completionStamp.taskCount); @@ -314,7 +317,11 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate auto lockCSR = commandStreamReceiver.obtainUniqueOwnership(); + auto enqueueOperationType = EnqueueProperties::Operation::DependencyResolveOnGpu; + if (kernelOperation->blitEnqueue) { + enqueueOperationType = EnqueueProperties::Operation::Blit; + if (commandStreamReceiver.isStallingPipeControlOnNextFlushRequired()) { timestampPacketDependencies->barrierNodes.add(commandStreamReceiver.getTimestampPacketAllocator()->getTag()); } @@ -364,6 +371,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate dispatchBlitOperation(); } + commandQueue.updateLatestSentEnqueueType(enqueueOperationType); + return completionStamp; } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index d4c1f8cc7c..fe583657cf 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -63,6 +63,7 @@ struct BlitEnqueueTests : public ::testing::Test { DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); DebugManager.flags.ForceAuxTranslationMode.set(1); + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; @@ -1037,6 +1038,25 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW EXPECT_EQ(bcsTaskCount, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); } +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWithNotreadyBcsTaskCountThenDontReportCompletion) { + const uint32_t gpgpuTaskCount = 123; + const uint32_t bcsTaskCount = 123; + + *gpgpuCsr->getTagAddress() = gpgpuTaskCount; + *bcsCsr->getTagAddress() = bcsTaskCount - 1; + commandQueue->updateBcsTaskCount(bcsTaskCount); + + Event event(commandQueue.get(), CL_COMMAND_WRITE_BUFFER, 1, gpgpuTaskCount); + event.updateCompletionStamp(gpgpuTaskCount, bcsTaskCount, 1, 0); + + event.updateExecutionStatus(); + EXPECT_EQ(static_cast(CL_SUBMITTED), event.peekExecutionStatus()); + + *bcsCsr->getTagAddress() = bcsTaskCount; + event.updateExecutionStatus(); + EXPECT_EQ(static_cast(CL_COMPLETE), event.peekExecutionStatus()); +} + HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; @@ -1153,11 +1173,15 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingFo using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>; -HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSetWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) { - DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(0); +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + + mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; + mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; + auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; @@ -1183,12 +1207,101 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSe EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); } -HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSetWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) { +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) { + auto mockCommandQueue = static_cast *>(commandQueue.get()); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + + mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; + mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; + + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) { + auto mockCommandQueue = static_cast *>(commandQueue.get()); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + + mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; + mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; + + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + UserEvent userEvent; + cl_event waitlist = &userEvent; + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + userEvent.setStatus(CL_COMPLETE); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + EXPECT_FALSE(commandQueue->isQueueBlocked()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) { + auto mockCommandQueue = static_cast *>(commandQueue.get()); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + + mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; + mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; + + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + UserEvent userEvent; + cl_event waitlist = &userEvent; + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + userEvent.setStatus(CL_COMPLETE); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + EXPECT_FALSE(commandQueue->isQueueBlocked()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; - DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(0); + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + auto mockCommandQueue = static_cast *>(commandQueue.get()); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index a86fc6d1e8..3463a2c873 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -229,6 +229,7 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrot HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { using CsrType = MockCsrHw2; + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.EnableTimestampPacket.set(1); SetUpImpl(); @@ -269,6 +270,7 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) { using CsrType = MockCsrHw2; DebugManager.flags.EnableTimestampPacket.set(1); + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); SetUpImpl(); diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index f259e28b14..5696aad26c 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -745,6 +745,7 @@ struct BcsBufferTests : public ::testing::Test { REQUIRE_SVM_OR_SKIP(defaultHwInfo); DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported;