From c910a58aa0613d0829916d1efe9773c97e2c66ea Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Fri, 24 Sep 2021 16:32:20 +0000 Subject: [PATCH] Store bcsEngine in an std::vector in OpenCL CommandQUeue Related-To: NEO-6057 Signed-off-by: Maciej Dziuban --- opencl/source/command_queue/command_queue.cpp | 38 ++++++++++------- opencl/source/command_queue/command_queue.h | 3 +- .../source/command_queue/command_queue_hw.h | 6 +-- .../command_queue/blit_enqueue_tests.cpp | 2 +- .../command_queue/command_queue_tests.cpp | 21 +++++----- .../enqueue_command_without_kernel_tests.cpp | 12 +++--- .../command_queue/enqueue_kernel_2_tests.cpp | 11 ++--- .../tbx_command_stream_tests.cpp | 3 +- opencl/test/unit_test/event/event_tests.cpp | 4 +- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 41 +++++++++++-------- .../test/unit_test/mocks/mock_command_queue.h | 24 ++++++++++- shared/source/helpers/engine_node_helper.h | 1 + .../helpers/engine_node_helper_extra.cpp | 4 ++ shared/source/sku_info/definitions/sku_info.h | 3 +- 14 files changed, 111 insertions(+), 62 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 7e07297e87..1b98d8af4d 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -86,7 +86,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice(); auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage); - bcsEngine = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); + bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); bcsState.engineType = bcsEngineType; } } @@ -113,9 +113,9 @@ CommandQueue::~CommandQueue() { device->getPerformanceCounters()->shutdown(); } - if (bcsEngine) { + if (auto mainBcs = bcsEngines[0]; mainBcs != nullptr) { auto &selectorCopyEngine = device->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); - EngineHelpers::releaseBcsEngineType(bcsEngine->getEngineType(), selectorCopyEngine); + EngineHelpers::releaseBcsEngineType(mainBcs->getEngineType(), selectorCopyEngine); } } @@ -133,18 +133,25 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const { } CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const { - if (bcsEngine) { - UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType); - return bcsEngine->commandStreamReceiver; + const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]; + if (engine == nullptr) { + return nullptr; + } else { + return engine->commandStreamReceiver; + } +} + +CommandStreamReceiver *CommandQueue::getAnyBcs() const { + for (const EngineControl *engine : this->bcsEngines) { + if (engine != nullptr) { + return engine->commandStreamReceiver; + } } return nullptr; } CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const { - if (bcsEngine) { - return bcsEngine->commandStreamReceiver; - } - return nullptr; + return getAnyBcs(); } CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const { @@ -154,7 +161,7 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec const bool blit = blitAllowed && (blitPreferred || blitRequired); if (blit) { - return *bcsEngine->commandStreamReceiver; + return *getAnyBcs(); } else { return getGpgpuCommandStreamReceiver(); } @@ -623,12 +630,12 @@ cl_uint CommandQueue::getQueueFamilyIndex() const { } void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) { - UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType); + UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType); this->bcsState.taskCount = newBcsTaskCount; } uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const { - UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType); + UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType); return this->bcsState.taskCount; } @@ -733,7 +740,7 @@ bool CommandQueue::queueDependenciesClearRequired() const { } bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const { - if (bcsEngine == nullptr) { + if (getAnyBcs() == nullptr) { return false; } @@ -878,7 +885,8 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage const bool isEngineCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroupType); if (isEngineCopyOnly) { - bcsEngine = &device->getEngine(engineType, EngineUsage::Regular); + std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); + bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular); bcsState.engineType = engineType; timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index d037eedd12..57dea695d1 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -226,6 +226,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const; + CommandStreamReceiver *getAnyBcs() const; CommandStreamReceiver *getBcsForAuxTranslation() const; MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const; Device &getDevice() const noexcept; @@ -365,7 +366,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { Context *context = nullptr; ClDevice *device = nullptr; EngineControl *gpgpuEngine = nullptr; - EngineControl *bcsEngine = nullptr; + std::array bcsEngines = {}; cl_command_queue_properties commandQueueProperties = 0; std::vector propertiesVector; diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 2219144659..cfcd2216ff 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -92,9 +92,9 @@ class CommandQueueHw : public CommandQueue { gpgpuEngine->osContext->ensureContextInitialized(); gpgpuEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *gpgpuEngine->osContext); - if (bcsEngine) { - bcsEngine->osContext->ensureContextInitialized(); - bcsEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *bcsEngine->osContext); + if (const EngineControl *mainBcsEngine = bcsEngines[0]; mainBcsEngine != nullptr) { + mainBcsEngine->osContext->ensureContextInitialized(); + mainBcsEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *mainBcsEngine->osContext); } } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index d844551bcf..62bcf2bbe5 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -104,7 +104,7 @@ struct BlitEnqueueTests : public ::testing::Test { mockProgram->setAllowNonUniform(true); gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver; - bcsCsr = mockCmdQueue->bcsEngine->commandStreamReceiver; + bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver; } template diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 658f29f290..90185030c7 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -230,13 +230,13 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam {}; -TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) { +TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterAnyBcsCsrs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); - EXPECT_EQ(nullptr, cmdQ.bcsEngine); + EXPECT_EQ(0u, cmdQ.countBcsEngines()); auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); @@ -1150,8 +1150,8 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue MockContext context{}; HardwareInfo *hwInfo = context.getDevice(0)->getRootDeviceEnvironment().getMutableHardwareInfo(); MockCommandQueue queue(&context, context.getDevice(0), 0, false); - if (!queue.bcsEngine) { - queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); + if (queue.countBcsEngines() == 0) { + queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } hwInfo->capabilityTable.blitterOperationsSupported = false; @@ -1173,8 +1173,8 @@ TEST(CommandQueue, givenSimpleClCommandWhenCallingBlitEnqueueAllowedThenReturnCo MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); - if (!queue.bcsEngine) { - queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); + if (queue.countBcsEngines() == 0) { + queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MultiGraphicsAllocation multiAlloc{1}; @@ -1205,8 +1205,8 @@ TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenR MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); - if (!queue.bcsEngine) { - queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); + if (queue.countBcsEngines() == 0) { + queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MockImageBase image{}; @@ -1232,8 +1232,8 @@ TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenR TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); - if (!queue.bcsEngine) { - queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); + if (queue.countBcsEngines() == 0) { + queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MultiGraphicsAllocation multiAlloc{1}; @@ -1796,6 +1796,7 @@ struct CopyOnlyQueueTests : ::testing::Test { TEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenCreatingCommandQueueThenItIsCopyOnly) { MockCommandQueue queue{context.get(), clDevice.get(), properties, false}; EXPECT_EQ(bcsEngine->commandStreamReceiver, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)); + EXPECT_EQ(1u, queue.countBcsEngines()); EXPECT_NE(nullptr, queue.timestampPacketContainer); EXPECT_TRUE(queue.isCopyOnly); } diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 51109bc07c..6900c58873 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -207,7 +207,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest, - eventBuilder, std::unique_ptr(nullptr), mockCmdQ->bcsEngine->commandStreamReceiver); + eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getAnyBcs()); EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue); EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation); EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation); @@ -270,10 +270,11 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; - mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine; + mockCmdQ->clearBcsEngines(); + mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); - auto &bcsCsr = *mockCmdQ->bcsEngine->commandStreamReceiver; + auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; @@ -315,10 +316,11 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllow auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; - mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine; + mockCmdQ->clearBcsEngines(); + mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); - auto &bcsCsr = *mockCmdQ->bcsEngine->commandStreamReceiver; + auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 6337a3dfb8..f877425e55 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -783,12 +783,12 @@ typedef Test EnqueueKernelTest; struct EnqueueAuxKernelTests : public EnqueueKernelTest { template - class MyCmdQ : public CommandQueueHw { + class MyCmdQ : public MockCommandQueueHw { public: using CommandQueueHw::commandStream; using CommandQueueHw::gpgpuEngine; - using CommandQueueHw::bcsEngine; - MyCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} + using CommandQueueHw::bcsEngines; + MyCmdQ(Context *context, ClDevice *device) : MockCommandQueueHw(context, device, nullptr) {} void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override { CommandQueueHw::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection); auxTranslationDirections.push_back(auxTranslationDirection); @@ -803,7 +803,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest { void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitCalled++; - CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); + MockCommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } std::vector auxTranslationDirections; @@ -935,7 +935,8 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); - cmdQ.bcsEngine = cmdQ.gpgpuEngine; + cmdQ.clearBcsEngines(); + cmdQ.bcsEngines[0] = cmdQ.gpgpuEngine; hwInfo->capabilityTable.blitterOperationsSupported = true; diff --git a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp index 571720f6d2..ed0c45d0ec 100644 --- a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp +++ b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp @@ -899,7 +899,8 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessCor MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); cmdQ.gpgpuEngine = &engineControl0; - cmdQ.bcsEngine = &engineControl1; + cmdQ.clearBcsEngines(); + cmdQ.bcsEngines[0] = &engineControl1; cl_int error = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, error)); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index cc2e60d8a7..be1d4ab2cd 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -143,12 +143,12 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo)}}; MockContext context{device.get()}; MockCommandQueue queue{context}; - queue.updateBcsTaskCount(queue.bcsEngine->getEngineType(), 19); + queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19); Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0}; EXPECT_EQ(0u, event.peekBcsTaskCountFromCommandQueue()); - event.setupBcs(queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getOsContext().getEngineType()); + event.setupBcs(queue.bcsEngines[0]->getEngineType()); EXPECT_EQ(19u, event.peekBcsTaskCountFromCommandQueue()); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index a211fa4f9c..c0755e2553 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -113,7 +113,7 @@ struct BcsBufferTests : public ::testing::Test { bcsMockContext = std::make_unique(device.get()); commandQueue.reset(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - bcsCsr = static_cast *>(commandQueue.get())->bcsEngine->commandStreamReceiver; + bcsCsr = static_cast *>(commandQueue.get())->bcsEngines[0]->commandStreamReceiver; } template @@ -162,7 +162,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferWithNotDefaultRootDeviceIndexAndBc HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIsCalledThenUseBcsCsr) { DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); auto mockCmdQueue = static_cast *>(commandQueue.get()); - auto bcsEngine = mockCmdQueue->bcsEngine; + auto bcsEngine = mockCmdQueue->bcsEngines[0]; auto bcsCsr = static_cast *>(bcsEngine->commandStreamReceiver); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); @@ -175,7 +175,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs size_t region[] = {1, 2, 1}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); - mockCmdQueue->bcsEngine = nullptr; + mockCmdQueue->clearBcsEngines(); mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); @@ -191,7 +191,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs MemoryConstants::cacheLineSize, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); - mockCmdQueue->bcsEngine = nullptr; + mockCmdQueue->clearBcsEngines(); mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); @@ -207,7 +207,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs MemoryConstants::cacheLineSize, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); - mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); @@ -224,7 +224,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs commandQueue->enqueueSVMMemcpy(CL_TRUE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1); - mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); @@ -243,7 +243,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs EXPECT_EQ(7u, bcsCsr->blitBufferCalled); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); - mockCmdQueue->bcsEngine = bcsEngine; + mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); @@ -874,7 +874,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -913,7 +914,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -960,7 +962,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1007,7 +1010,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1046,7 +1050,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1079,7 +1084,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1112,7 +1118,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1166,7 +1173,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSV EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; @@ -1294,7 +1302,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); - cmdQ->bcsEngine = &bcsEngineControl; + cmdQ->clearBcsEngines(); + cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 7e9a63a1a6..2b6afcbcfc 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -18,7 +18,7 @@ namespace NEO { class MockCommandQueue : public CommandQueue { public: - using CommandQueue::bcsEngine; + using CommandQueue::bcsEngines; using CommandQueue::blitEnqueueAllowed; using CommandQueue::blitEnqueueImageAllowed; using CommandQueue::blitEnqueuePreferred; @@ -36,6 +36,22 @@ class MockCommandQueue : public CommandQueue { using CommandQueue::throttle; using CommandQueue::timestampPacketContainer; + void clearBcsEngines() { + std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); + } + + void insertBcsEngine(aub_stream::EngineType bcsEngineType) { + const auto index = NEO::EngineHelpers::getBcsIndex(bcsEngineType); + const auto engine = &getDevice().getEngine(bcsEngineType, EngineUsage::Regular); + bcsEngines[index] = engine; + } + + size_t countBcsEngines() const { + return std::count_if(bcsEngines.begin(), bcsEngines.end(), [](const EngineControl *engine) { + return engine != nullptr; + }); + } + void setProfilingEnabled() { commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; } @@ -198,7 +214,7 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass = CommandQueueHw; public: - using BaseClass::bcsEngine; + using BaseClass::bcsEngines; using BaseClass::bcsState; using BaseClass::blitEnqueueAllowed; using BaseClass::commandQueueProperties; @@ -218,6 +234,10 @@ class MockCommandQueueHw : public CommandQueueHw { cl_queue_properties *properties) : BaseClass(context, device, properties, false) { } + void clearBcsEngines() { + std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); + } + cl_int flush() override { flushCalled = true; return BaseClass::flush(); diff --git a/shared/source/helpers/engine_node_helper.h b/shared/source/helpers/engine_node_helper.h index 3ae4e9e708..2be109f555 100644 --- a/shared/source/helpers/engine_node_helper.h +++ b/shared/source/helpers/engine_node_helper.h @@ -51,6 +51,7 @@ bool isBcs(aub_stream::EngineType engineType); aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage); void releaseBcsEngineType(aub_stream::EngineType engineType, SelectorCopyEngine &selectorCopyEngine); aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputType, const HardwareInfo &hwInfo); +uint32_t getBcsIndex(aub_stream::EngineType engineType); std::string engineTypeToString(aub_stream::EngineType engineType); std::string engineTypeToStringAdditional(aub_stream::EngineType engineType); diff --git a/shared/source/helpers/engine_node_helper_extra.cpp b/shared/source/helpers/engine_node_helper_extra.cpp index 72baff2486..6c3b83bdd4 100644 --- a/shared/source/helpers/engine_node_helper_extra.cpp +++ b/shared/source/helpers/engine_node_helper_extra.cpp @@ -27,5 +27,9 @@ aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputT return inputType; } +uint32_t getBcsIndex(aub_stream::EngineType engineType) { + return 0; +} + } // namespace EngineHelpers } // namespace NEO diff --git a/shared/source/sku_info/definitions/sku_info.h b/shared/source/sku_info/definitions/sku_info.h index 9fd4c04b0d..dfa270a0df 100644 --- a/shared/source/sku_info/definitions/sku_info.h +++ b/shared/source/sku_info/definitions/sku_info.h @@ -12,7 +12,8 @@ namespace NEO { -using BcsInfoMask = std::bitset<1>; +constexpr inline size_t bcsInfoMaskSize = 1u; +using BcsInfoMask = std::bitset; struct FeatureTable : FeatureTableBase { BcsInfoMask ftrBcsInfo = 0;