diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 7b8e623d18..5ed733202c 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -179,8 +179,17 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec case TransferDirection::HostToLocal: case TransferDirection::LocalToHost: { preferBcs = true; - preferredBcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), - device->getSelectorCopyEngine(), false); + + auto preferredBCSType = true; + + if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) { + preferredBCSType = DebugManager.flags.AssignBCSAtEnqueue.get(); + } + + if (preferredBCSType) { + preferredBcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), + device->getSelectorCopyEngine(), false); + } break; } default: @@ -189,7 +198,16 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec CommandStreamReceiver *selectedCsr = nullptr; if (preferBcs) { - selectedCsr = getBcsCommandStreamReceiver(preferredBcsEngineType); + auto assignBCS = true; + + if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) { + assignBCS = DebugManager.flags.AssignBCSAtEnqueue.get(); + } + + if (assignBCS) { + selectedCsr = getBcsCommandStreamReceiver(preferredBcsEngineType); + } + if (selectedCsr == nullptr && !bcsEngineTypes.empty()) { selectedCsr = getBcsCommandStreamReceiver(bcsEngineTypes[0]); } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index a9230efb9c..c703c6028d 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -1069,6 +1069,7 @@ template template void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) { auto commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); + auto bcsCommandStreamReceiverOwnership = bcsCsr.obtainUniqueOwnership(); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); EventBuilder eventBuilder; @@ -1154,6 +1155,7 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets); queueOwnership.unlock(); + bcsCommandStreamReceiverOwnership.unlock(); commandStreamReceiverOwnership.unlock(); if (blocking) { diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 052bb38885..8a58704518 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1780,6 +1780,27 @@ TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenInvalidTransferDirectionWh EXPECT_ANY_THROW(queue->selectCsrForBuiltinOperation(args)); } +TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenBlitterAndAssignBCSAtEnqueueSetToFalseWhenSelectCsrThenDefaultBcsReturned) { + DebugManagerStateRestore restore{}; + DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); + DebugManager.flags.AssignBCSAtEnqueue.set(0); + + BuiltinOpParams builtinOpParams{}; + MockGraphicsAllocation srcGraphicsAllocation{}; + MockGraphicsAllocation dstGraphicsAllocation{}; + MockBuffer srcMemObj{srcGraphicsAllocation}; + MockBuffer dstMemObj{dstGraphicsAllocation}; + builtinOpParams.srcMemObj = &srcMemObj; + builtinOpParams.dstMemObj = &dstMemObj; + + CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; + args.direction = TransferDirection::LocalToHost; + + auto &csr = queue->selectCsrForBuiltinOperation(args); + + EXPECT_EQ(&csr, queue->getBcsCommandStreamReceiver(queue->bcsEngineTypes[0])); +} + TEST_F(CsrSelectionCommandQueueWithQueueFamiliesBlitterTests, givenBlitterSelectedWithQueueFamiliesWhenSelectingBlitterThenSelectBlitter) { DebugManagerStateRestore restore{}; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 594a44edf5..3151bde02e 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -376,6 +376,7 @@ ForceExtendedBufferSize = -1 ForceExtendedUSMBufferSize = -1 MakeIndirectAllocationsResidentAsPack = -1 MakeEachAllocationResident = -1 +AssignBCSAtEnqueue = -1 ReuseKernelBinaries = -1 EnableChipsetUniqueUUID = -1 ForceSimdMessageSizeInWalker = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 1e0f37e403..e966205174 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -256,6 +256,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0 DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels") DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.") DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush") +DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.") /*DIRECT SUBMISSION FLAGS*/