From a3fe50c2e43cee898dd1dfc854c51b0c15c26702 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Wed, 4 May 2022 12:46:37 +0200 Subject: [PATCH] Revert "Assign BCS at first blit enqueue" This reverts commit 0469034acb1abcda3fdf7342e6f6a6cc3c89d3d1. Signed-off-by: Compute-Runtime-Validation --- opencl/source/command_queue/command_queue.cpp | 46 +++++------------ opencl/source/command_queue/command_queue.h | 11 ++-- .../command_queue/command_queue_tests.cpp | 2 +- .../command_queue_tests_pvc_and_later.cpp | 50 ++----------------- opencl/test/unit_test/event/event_tests.cpp | 1 - .../test/unit_test/mocks/mock_command_queue.h | 1 - .../test/unit_test/test_files/igdrcl.config | 1 - .../debug_settings/debug_variables_base.inl | 1 - 8 files changed, 22 insertions(+), 91 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 462907a720..9ce41b0dba 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -73,22 +73,19 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); - bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) && - hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS); + bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) && + hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS); if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) { timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); } - - auto deferCmdQBcsInitialization = true; - - if (DebugManager.flags.DeferCmdQBcsInitialization.get() != -1) { - deferCmdQBcsInitialization = DebugManager.flags.DeferCmdQBcsInitialization.get(); - } - - if (!deferCmdQBcsInitialization) { - this->initializeBcsEngine(internalUsage); + if (bcsAllowed) { + auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice(); + auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); + auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage); + bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); + bcsEngineTypes.push_back(bcsEngineType); } } @@ -184,8 +181,7 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const { return *gpgpuEngine->commandStreamReceiver; } -CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) { - initializeBcsEngine(isSpecial()); +CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const { const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]; if (engine == nullptr) { return nullptr; @@ -194,8 +190,7 @@ CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::Eng } } -CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() { - initializeBcsEngine(isSpecial()); +CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const { for (const EngineControl *engine : this->bcsEngines) { if (engine != nullptr) { return engine->commandStreamReceiver; @@ -204,8 +199,7 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() { return nullptr; } -CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) { - initializeBcsEngine(isSpecial()); +CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const { if (isCopyOnly) { return *getBcsCommandStreamReceiver(bcsEngineTypes[0]); } @@ -273,21 +267,6 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec return *selectedCsr; } -void CommandQueue::initializeBcsEngine(bool internalUsage) { - if (bcsAllowed && !bcsInitialized) { - auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice(); - auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); - auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage); - bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); - bcsEngineTypes.push_back(bcsEngineType); - bcsInitialized = true; - if (bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]) { - bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->osContext->ensureContextInitialized(); - bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->commandStreamReceiver->initDirectSubmission(); - } - } -} - Device &CommandQueue::getDevice() const noexcept { return device->getDevice(); } @@ -301,7 +280,7 @@ volatile uint32_t *CommandQueue::getHwTagAddress() const { return getGpgpuCommandStreamReceiver().getTagAddress(); } -bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) { +bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const { DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady); if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) { @@ -1049,7 +1028,6 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); isCopyOnly = true; - bcsInitialized = true; } else { gpgpuEngine = &device->getEngine(engineType, engineUsage); } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 26409b27a8..199ee50d53 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -202,7 +202,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { volatile uint32_t *getHwTagAddress() const; - bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState); + bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const; bool isWaitForTimestampsEnabled() const; virtual bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount) = 0; @@ -225,10 +225,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void initializeGpgpu() const; void initializeGpgpuInternals() const; MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; - MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType); - CommandStreamReceiver *getBcsForAuxTranslation(); - MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args); - void initializeBcsEngine(bool internalUsage); + MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const; + CommandStreamReceiver *getBcsForAuxTranslation() const; + MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const; Device &getDevice() const noexcept; ClDevice &getClDevice() const { return *device; } Context &getContext() const { return *context; } @@ -414,8 +413,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool perfCountersEnabled = false; bool isCopyOnly = false; - bool bcsAllowed = false; - bool bcsInitialized = false; LinearStream *commandStream = nullptr; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 78d4478bbd..3c5c22b8fa 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1067,7 +1067,7 @@ struct WaitUntilCompletionTests : public ::testing::Test { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; - CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) override { + CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const override { return bcsCsrToReturn; } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp b/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp index b93aae7397..9f04352919 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp @@ -84,46 +84,6 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenAdditionalBcsWhenCreatingCommandQue EXPECT_EQ(1u, queue->countBcsEngines()); } -HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationEnabledWhenCreateCommandQueueThenBcsCountIsZero, IsAtLeastXeHpcCore) { - DebugManagerStateRestore restorer; - DebugManager.flags.DeferCmdQBcsInitialization.set(1u); - - HardwareInfo hwInfo = *defaultHwInfo; - hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); - hwInfo.capabilityTable.blitterOperationsSupported = true; - MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); - MockClDevice clDevice{device}; - cl_device_id clDeviceId = static_cast(&clDevice); - ClDeviceVector clDevices{&clDeviceId, 1u}; - cl_int retVal{}; - auto context = std::unique_ptr{Context::create(nullptr, clDevices, nullptr, nullptr, retVal)}; - EXPECT_EQ(CL_SUCCESS, retVal); - - auto queue = std::make_unique(*context); - - EXPECT_EQ(0u, queue->countBcsEngines()); -} - -HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationDisabledWhenCreateCommandQueueThenBcsIsInitialized, IsAtLeastXeHpcCore) { - DebugManagerStateRestore restorer; - DebugManager.flags.DeferCmdQBcsInitialization.set(0u); - - HardwareInfo hwInfo = *defaultHwInfo; - hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); - hwInfo.capabilityTable.blitterOperationsSupported = true; - MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); - MockClDevice clDevice{device}; - cl_device_id clDeviceId = static_cast(&clDevice); - ClDeviceVector clDevices{&clDeviceId, 1u}; - cl_int retVal{}; - auto context = std::unique_ptr{Context::create(nullptr, clDevices, nullptr, nullptr, retVal)}; - EXPECT_EQ(CL_SUCCESS, retVal); - - auto queue = std::make_unique(*context); - - EXPECT_NE(0u, queue->countBcsEngines()); -} - HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQueueIsCreatedThenMainBcsCanBeUsedAgain, IsAtLeastXeHpcCore) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); @@ -142,9 +102,9 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu auto queue4 = std::make_unique(*context); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType()); - EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); - EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); - EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); + EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); + EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); + EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); // Releasing main BCS. Next creation should be able to grab it queue1.reset(); @@ -154,7 +114,7 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu // Releasing link BCS. Shouldn't change anything queue2.reset(); queue2 = std::make_unique(*context); - EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); + EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); } HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) { @@ -531,10 +491,10 @@ HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelec aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8, }); - EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); + EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); } } diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index cad94f4dc6..5c8eef221f 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -144,7 +144,6 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo)}}; MockContext context{device.get()}; MockCommandQueue queue{context}; - queue.initializeBcsEngine(false); queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19); Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0}; diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index ac2ad190d2..da2b628973 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -257,7 +257,6 @@ class MockCommandQueueHw : public CommandQueueHw { MockCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) { - this->initializeBcsEngine(false); } void clearBcsEngines() { diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 1f25ba0261..129c5a0488 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -383,7 +383,6 @@ MakeIndirectAllocationsResidentAsPack = -1 MakeEachAllocationResident = -1 AssignBCSAtEnqueue = -1 DeferCmdQGpgpuInitialization = -1 -DeferCmdQBcsInitialization = -1 ReuseKernelBinaries = -1 EnableChipsetUniqueUUID = -1 ForceSimdMessageSizeInWalker = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 5f2dda82c0..aafb01d705 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -268,7 +268,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush") DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.") -DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.") /*DIRECT SUBMISSION FLAGS*/