diff --git a/Jenkinsfile b/Jenkinsfile index 486f4f0a1f..ef3a4d7ee6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,5 @@ #!groovy dependenciesRevision='752539c0bf9f26785a1a99ebbb7be67263262272-1280' strategy='EQUAL' -allowedCD=260 +allowedCD=259 allowedF=5 diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 8069ba8e06..85295c20ae 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -71,6 +71,9 @@ CommandQueue::CommandQueue(Context *context, Device *deviceId, const cl_queue_pr if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { timestampPacketContainer = std::make_unique(); } + if (device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported) { + bcsEngine = &device->getEngine(aub_stream::EngineType::ENGINE_BCS, false); + } } processProperties(properties); @@ -107,6 +110,13 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const { return *gpgpuEngine->commandStreamReceiver; } +CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver() const { + if (bcsEngine) { + return bcsEngine->commandStreamReceiver; + } + return nullptr; +} + uint32_t CommandQueue::getHwTag() const { uint32_t tag = *getHwTagAddress(); return tag; diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 0a4b84cae0..1b93437d94 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -335,6 +335,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { const cl_event *eventWaitList); CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; + CommandStreamReceiver *getBcsCommandStreamReceiver() const; Device &getDevice() const { return *device; } Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } @@ -440,6 +441,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { Context *context = nullptr; Device *device = nullptr; EngineControl *gpgpuEngine = nullptr; + EngineControl *bcsEngine = nullptr; cl_command_queue_properties commandQueueProperties = 0; diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 4c2e21e9fd..4901b2c9ff 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -465,8 +465,7 @@ void CommandQueueHw::processDispatchForBlitEnqueue(const MultiDispatc bool blocking) { auto blitDirection = BlitProperties::obtainBlitDirection(commandType); - auto blitCommandStreamReceiver = BlitProperties::obtainBlitCommandStreamReceiver(*context, multiDispatchInfo.peekBuiltinOpParams(), - commandType); + auto blitCommandStreamReceiver = getBcsCommandStreamReceiver(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(blitDirection, *blitCommandStreamReceiver, multiDispatchInfo.peekBuiltinOpParams(), blocking); diff --git a/runtime/device/device.cpp b/runtime/device/device.cpp index 179056a891..48f3b75ee1 100644 --- a/runtime/device/device.cpp +++ b/runtime/device/device.cpp @@ -148,41 +148,50 @@ bool Device::createDeviceImpl() { } bool Device::createEngines() { - auto &hwInfo = getHardwareInfo(); - auto defaultEngineType = getChosenEngineType(hwInfo); auto &gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(); for (uint32_t deviceCsrIndex = 0; deviceCsrIndex < gpgpuEngines.size(); deviceCsrIndex++) { - if (!executionEnvironment->initializeCommandStreamReceiver(getDeviceIndex(), deviceCsrIndex)) { + if (!createEngine(getDeviceIndex(), deviceCsrIndex, gpgpuEngines[deviceCsrIndex])) { return false; } - - auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[getDeviceIndex()][deviceCsrIndex].get(); - - DeviceBitfield deviceBitfield; - deviceBitfield.set(getDeviceIndex()); - bool lowPriority = deviceCsrIndex == HwHelper::lowPriorityGpgpuEngineIndex; - auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver, gpgpuEngines[deviceCsrIndex], - deviceBitfield, preemptionMode, lowPriority); - commandStreamReceiver->setupContext(*osContext); - - if (!commandStreamReceiver->initializeTagAllocation()) { - return false; - } - if (gpgpuEngines[deviceCsrIndex] == defaultEngineType && !lowPriority) { - defaultEngineIndex = deviceCsrIndex; - } - - if ((preemptionMode == PreemptionMode::MidThread || isSourceLevelDebuggerActive()) && !commandStreamReceiver->createPreemptionAllocation()) { - return false; - } - - engines.push_back({commandStreamReceiver, osContext}); } return true; } +bool Device::createEngine(uint32_t deviceIndex, uint32_t deviceCsrIndex, aub_stream::EngineType engineType) { + auto &hwInfo = getHardwareInfo(); + auto defaultEngineType = getChosenEngineType(hwInfo); + + if (!executionEnvironment->initializeCommandStreamReceiver(deviceIndex, deviceCsrIndex)) { + return false; + } + + auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[deviceIndex][deviceCsrIndex].get(); + + DeviceBitfield deviceBitfield; + deviceBitfield.set(deviceIndex); + bool lowPriority = (deviceCsrIndex == HwHelper::lowPriorityGpgpuEngineIndex); + auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver, engineType, + deviceBitfield, preemptionMode, lowPriority); + commandStreamReceiver->setupContext(*osContext); + + if (!commandStreamReceiver->initializeTagAllocation()) { + return false; + } + if (engineType == defaultEngineType && !lowPriority) { + defaultEngineIndex = deviceCsrIndex; + } + + if ((preemptionMode == PreemptionMode::MidThread || isSourceLevelDebuggerActive()) && !commandStreamReceiver->createPreemptionAllocation()) { + return false; + } + + engines.push_back({commandStreamReceiver, osContext}); + + return true; +} + const HardwareInfo &Device::getHardwareInfo() const { return *executionEnvironment->getHardwareInfo(); } const WorkaroundTable *Device::getWaTable() const { return &getHardwareInfo().workaroundTable; } diff --git a/runtime/device/device.h b/runtime/device/device.h index b776d7c68b..2e005c9516 100644 --- a/runtime/device/device.h +++ b/runtime/device/device.h @@ -132,6 +132,7 @@ class Device : public BaseObject<_cl_device_id> { bool createDeviceImpl(); bool createEngines(); + bool createEngine(uint32_t deviceIndex, uint32_t deviceCsrIndex, aub_stream::EngineType engineType); MOCKABLE_VIRTUAL void initializeCaps(); void setupFp64Flags(); diff --git a/runtime/helpers/blit_commands_helper.cpp b/runtime/helpers/blit_commands_helper.cpp index bdeaddfef6..d5b3aa2a11 100644 --- a/runtime/helpers/blit_commands_helper.cpp +++ b/runtime/helpers/blit_commands_helper.cpp @@ -64,13 +64,4 @@ BlitterConstants::BlitDirection BlitProperties::obtainBlitDirection(uint32_t com : BlitterConstants::BlitDirection::BufferToHostPtr; } -CommandStreamReceiver *BlitProperties::obtainBlitCommandStreamReceiver(Context &context, const BuiltinOpParams &builtinOpParams, - uint32_t commandType) { - if (CL_COMMAND_WRITE_BUFFER == commandType) { - return context.getCommandStreamReceiverForBlitOperation(*builtinOpParams.dstMemObj); - } else { - return context.getCommandStreamReceiverForBlitOperation(*builtinOpParams.srcMemObj); - } -} - } // namespace NEO diff --git a/runtime/helpers/blit_commands_helper.h b/runtime/helpers/blit_commands_helper.h index 62c618cb36..dde6190c27 100644 --- a/runtime/helpers/blit_commands_helper.h +++ b/runtime/helpers/blit_commands_helper.h @@ -40,8 +40,6 @@ struct BlitProperties { GraphicsAllocation *allocation); static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType); - static CommandStreamReceiver *obtainBlitCommandStreamReceiver(Context &context, const BuiltinOpParams &builtinOpParams, - uint32_t commandType); TimestampPacketContainer *outputTimestampPacket = nullptr; BlitterConstants::BlitDirection blitDirection; diff --git a/runtime/memory_manager/definitions/engine_limits.h b/runtime/memory_manager/definitions/engine_limits.h index 4baf155ad8..802e83214f 100644 --- a/runtime/memory_manager/definitions/engine_limits.h +++ b/runtime/memory_manager/definitions/engine_limits.h @@ -10,7 +10,7 @@ namespace NEO { -constexpr uint32_t numGpgpuEngineInstances = 2u; +constexpr uint32_t numGpgpuEngineInstances = 3u; constexpr uint32_t maxOsContextCount = numGpgpuEngineInstances; constexpr uint32_t maxHandleCount = 1u; diff --git a/unit_tests/command_queue/command_queue_tests.cpp b/unit_tests/command_queue/command_queue_tests.cpp index fca4ad85d1..82915b51f8 100644 --- a/unit_tests/command_queue/command_queue_tests.cpp +++ b/unit_tests/command_queue/command_queue_tests.cpp @@ -207,6 +207,15 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); } +TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) { + HardwareInfo hwInfo = *platformDevices[0]; + hwInfo.capabilityTable.blitterOperationsSupported = false; + std::unique_ptr mockDevice(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + CommandQueue cmdQ(nullptr, mockDevice.get(), 0); + + EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver()); +} + TEST(CommandQueue, givenCmdQueueBlockedByReadyVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) { std::unique_ptr mockDevice(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = new MockContext; diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index 6752cd30a7..4e244903fa 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -666,13 +666,22 @@ struct BcsBufferTests : public ::testing::Test { DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true); device.reset(MockDevice::createWithNewExecutionEnvironment(nullptr)); - device->getExecutionEnvironment()->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; + auto &capabilityTable = device->getExecutionEnvironment()->getMutableHardwareInfo()->capabilityTable; + bool createBcsEngine = !capabilityTable.blitterOperationsSupported; + capabilityTable.blitterOperationsSupported = true; + + if (createBcsEngine) { + device->createEngine(0, static_cast(device->engines.size()), aub_stream::EngineType::ENGINE_BCS); + } + bcsMockContext = std::make_unique(device.get()); + commandQueue.reset(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal)); } DebugManagerStateRestore restore; std::unique_ptr device; std::unique_ptr bcsMockContext; + std::unique_ptr commandQueue; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; @@ -690,11 +699,10 @@ HWTEST_F(BcsBufferTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingT HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThenUseBcsCsr) { DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false); - auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); + auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; - auto commandQueue = std::unique_ptr(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal)); auto *hwInfo = device->getExecutionEnvironment()->getMutableHardwareInfo(); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false); @@ -723,11 +731,10 @@ HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThen } HWTEST_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDontTakeBcsPath) { - auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); + auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; - auto commandQueue = std::unique_ptr(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal)); UserEvent userEvent(bcsMockContext.get()); cl_event waitlist = &userEvent; @@ -847,7 +854,7 @@ HWTEST_F(BcsBufferTests, givenReadOrWriteBufferOperationWithoutKernelWhenEstimat HWTEST_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenProgramMiFlushDwWithDataWrite) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - auto csr = static_cast *>(bcsMockContext->bcsCsr.get()); + auto csr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; @@ -882,7 +889,7 @@ HWTEST_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenProgramMiFl } HWTEST_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCalledThenMakeThemResident) { - auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); + auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; @@ -927,7 +934,9 @@ HWTEST_F(BcsBufferTests, givenBlockingEnqueueWhenUsingBcsThenCallWait) { myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); + EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); + cmdQ->bcsEngine = &bcsEngineControl; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); diff --git a/unit_tests/mocks/mock_command_queue.h b/unit_tests/mocks/mock_command_queue.h index 899fec4d2d..64214efa3a 100644 --- a/unit_tests/mocks/mock_command_queue.h +++ b/unit_tests/mocks/mock_command_queue.h @@ -73,6 +73,7 @@ class MockCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: + using BaseClass::bcsEngine; using BaseClass::commandStream; using BaseClass::gpgpuEngine; using BaseClass::multiEngineQueue; diff --git a/unit_tests/mocks/mock_device.h b/unit_tests/mocks/mock_device.h index d5b77e67d9..531c65deab 100644 --- a/unit_tests/mocks/mock_device.h +++ b/unit_tests/mocks/mock_device.h @@ -20,6 +20,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio class MockDevice : public Device { public: + using Device::createEngine; using Device::enabledClVersion; using Device::engines; using Device::executionEnvironment;