From 44032a4386283621e0382e78a76970d082d7f6b1 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Fri, 27 Jan 2023 01:18:42 +0100 Subject: [PATCH] Revert "Flush task at device init" This reverts commit 73bd55ad40ec1b44be32bf1184fce0c635726370. Signed-off-by: Compute-Runtime-Validation --- .../linux/os_metric_ip_sampling_imp_linux.cpp | 4 ---- .../command_queue/blit_enqueue_1_tests.cpp | 11 ----------- opencl/test/unit_test/linux/main_linux_dll.cpp | 8 -------- .../command_stream_receiver_hw_base.inl | 13 ++++++------- shared/source/device/device.cpp | 17 +++++------------ shared/source/device/device.h | 3 --- shared/source/dll/device_dll.cpp | 4 ---- shared/source/helpers/completion_stamp.h | 2 +- shared/source/os_interface/hw_info_config.h | 2 -- shared/source/os_interface/hw_info_config.inl | 5 ----- .../pvc/os_agnostic_hw_info_config_pvc.inl | 5 ----- shared/test/common/base_ult_config_listener.cpp | 2 +- shared/test/common/helpers/ult_hw_config.h | 1 - shared/test/common/mocks/device_ult.cpp | 5 +---- .../test/unit_test/device/neo_device_tests.cpp | 10 ++-------- 15 files changed, 16 insertions(+), 76 deletions(-) diff --git a/level_zero/tools/source/metrics/linux/os_metric_ip_sampling_imp_linux.cpp b/level_zero/tools/source/metrics/linux/os_metric_ip_sampling_imp_linux.cpp index 2f01925973..cdd73aae63 100644 --- a/level_zero/tools/source/metrics/linux/os_metric_ip_sampling_imp_linux.cpp +++ b/level_zero/tools/source/metrics/linux/os_metric_ip_sampling_imp_linux.cpp @@ -5,7 +5,6 @@ * */ -#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/hw_info_config.h" @@ -81,9 +80,6 @@ ze_result_t MetricIpSamplingLinuxImp::startMeasurement(uint32_t ¬ifyEveryNRep DeviceImp &deviceImp = static_cast(device); - auto csr = deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver; - csr->waitForTaskCountWithKmdNotifyFallback(csr->peekTaskCount(), 0, false, NEO::QueueThrottle::MEDIUM); - auto ioctlHelper = drm->getIoctlHelper(); uint32_t euStallFdParameter = ioctlHelper->getEuStallFdParameter(); std::array properties; diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index b1ad06cdf6..b20146b4c2 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -238,17 +238,6 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenFlushTagUpdateThenSetStallingCmd EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds); } -HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenInitializeDeviceWithFirstSubmissionThenMiFlushDwIsFlushed) { - using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - - EXPECT_EQ(SubmissionStatus::SUCCESS, bcsCsr->initializeDeviceWithFirstSubmission()); - - auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); - - auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); - EXPECT_NE(cmdFound, cmdListBcs.end()); -} - HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; diff --git a/opencl/test/unit_test/linux/main_linux_dll.cpp b/opencl/test/unit_test/linux/main_linux_dll.cpp index ca7ddf81ef..d6bcd46b5d 100644 --- a/opencl/test/unit_test/linux/main_linux_dll.cpp +++ b/opencl/test/unit_test/linux/main_linux_dll.cpp @@ -884,14 +884,6 @@ TEST(DeviceTest, whenCheckBlitSplitEnabledThenReturnsTrue) { EXPECT_TRUE(Device::isBlitSplitEnabled()); } -TEST(DeviceTest, givenCsrHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) { - EXPECT_TRUE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_HW)); -} - -TEST(DeviceTest, givenCsrNonHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) { - EXPECT_FALSE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_TBX)); -} - TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) { EXPECT_NE(nullptr, platformsImpl); platformsDestructor(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f759f0c426..5f134d1236 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1349,13 +1349,9 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); - auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, args.tlbInvalidation) + this->getCmdSizeForPrologue(); - - auto &commandStream = getCS(dispatchSize); + auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, args.tlbInvalidation)); auto commandStreamStart = commandStream.getUsed(); - this->programEnginePrologue(commandStream); - MemorySynchronizationCommands::addBarrierWithPostSyncOperation(commandStream, PostSyncMode::ImmediateData, getTagAllocation()->getGpuAddress(), @@ -1364,7 +1360,6 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { args); makeResident(*tagAllocation); - makeResident(*commandStream.getGraphicsAllocation()); auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart); this->latestFlushedTaskCount = taskCount.load(); @@ -1602,7 +1597,11 @@ void CommandStreamReceiverHw::createKernelArgsBufferAllocation() { template SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSubmission() { - return flushTagUpdate(); + auto lock = obtainUniqueOwnership(); + + auto &commandStream = getCS(EncodeBatchBufferStartOrEnd::getBatchBufferEndSize()); + auto commandStreamStart = commandStream.getUsed(); + return this->flushSmallTask(commandStream, commandStreamStart); } template diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index cc682bedb6..26fc7c2888 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -381,18 +381,13 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa commandStreamReceiver->createKernelArgsBufferAllocation(); - if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { - return false; - } - if (isDefaultEngine) { bool defaultEngineAlreadySet = (allEngines.size() > defaultEngineIndex) && (allEngines[defaultEngineIndex].getEngineType() == engineType); if (!defaultEngineAlreadySet) { defaultEngineIndex = deviceCsrIndex; - if (osContext->isDebuggableContext() || - this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) { + if (osContext->isDebuggableContext()) { if (SubmissionStatus::SUCCESS != commandStreamReceiver->initializeDeviceWithFirstSubmission()) { return false; } @@ -404,12 +399,15 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa defaultBcsEngineIndex = deviceCsrIndex; } + if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { + return false; + } + EngineControl engine{commandStreamReceiver.get(), osContext}; allEngines.push_back(engine); if (engineUsage == EngineUsage::Regular) { addEngineToEngineGroup(engine); } - commandStreamReceivers.push_back(std::move(commandStreamReceiver)); return true; @@ -442,11 +440,6 @@ bool Device::isBcsSplitSupported() { return bcsSplit; } -bool Device::isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType) { - return getProductHelper().isInitDeviceWithFirstSubmissionRequired(getHardwareInfo()) && - Device::isInitDeviceWithFirstSubmissionEnabled(csrType); -} - double Device::getPlatformHostTimerResolution() const { if (getOSTime()) { return getOSTime()->getHostTimerResolution(); diff --git a/shared/source/device/device.h b/shared/source/device/device.h index ef2f9eabd6..8b0aba56be 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -10,7 +10,6 @@ #include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" -#include "shared/source/helpers/options.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/performance_counters.h" #include "shared/source/utilities/reference_tracked_object.h" @@ -109,9 +108,7 @@ class Device : public ReferenceTrackedObject { RootDeviceEnvironment &getRootDeviceEnvironmentRef() const; bool isFullRangeSvm() const; static bool isBlitSplitEnabled(); - static bool isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType); bool isBcsSplitSupported(); - bool isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType); bool areSharedSystemAllocationsAllowed() const; template void setSpecializedDevice(SpecializedDeviceT *specializedDevice) { diff --git a/shared/source/dll/device_dll.cpp b/shared/source/dll/device_dll.cpp index 45879d086b..8dd274da89 100644 --- a/shared/source/dll/device_dll.cpp +++ b/shared/source/dll/device_dll.cpp @@ -13,8 +13,4 @@ bool Device::isBlitSplitEnabled() { return true; } -bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) { - return csrType == CommandStreamReceiverType::CSR_HW; -} - } // namespace NEO diff --git a/shared/source/helpers/completion_stamp.h b/shared/source/helpers/completion_stamp.h index 9685e75b3d..c26fe7c96e 100644 --- a/shared/source/helpers/completion_stamp.h +++ b/shared/source/helpers/completion_stamp.h @@ -7,7 +7,6 @@ #pragma once -#include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/task_count_helper.h" #include @@ -15,6 +14,7 @@ namespace NEO { using FlushStamp = uint64_t; +enum class SubmissionStatus : uint32_t; struct CompletionStamp { static TaskCountType getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus); diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index d218fda00c..b6449390c4 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -135,7 +135,6 @@ class ProductHelper { virtual bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const = 0; virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const = 0; - virtual bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0; @@ -281,7 +280,6 @@ class ProductHelperHw : public ProductHelper { bool isTimestampWaitSupportedForEvents() const override; bool isTilePlacementResourceWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const override; - bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const override; bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override; bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 0c9f8665b5..0b6cd52549 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -469,11 +469,6 @@ bool ProductHelperHw::isBlitSplitEnqueueWARequired(const HardwareInf return false; } -template -bool ProductHelperHw::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const { - return false; -} - template bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const { return allocation.isAllocatedInLocalMemoryPool() && diff --git a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl index 0c892d09ef..1db347b0b7 100644 --- a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl +++ b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl @@ -173,11 +173,6 @@ bool ProductHelperHw::isBlitSplitEnqueueWARequired(const HardwareInf return true; } -template <> -bool ProductHelperHw::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const { - return true; -} - template <> bool ProductHelperHw::isImplicitScalingSupported(const HardwareInfo &hwInfo) const { return getSteppingFromHwRevId(hwInfo) >= REVISION_B; diff --git a/shared/test/common/base_ult_config_listener.cpp b/shared/test/common/base_ult_config_listener.cpp index 9bd2cba169..ac4ed7413e 100644 --- a/shared/test/common/base_ult_config_listener.cpp +++ b/shared/test/common/base_ult_config_listener.cpp @@ -36,7 +36,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) { // Ensure that global state is restored UltHwConfig expectedState{}; - static_assert(sizeof(UltHwConfig) == 14 * sizeof(bool), ""); // Ensure that there is no internal padding + static_assert(sizeof(UltHwConfig) == 13 * sizeof(bool), ""); // Ensure that there is no internal padding EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig))); EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM))); diff --git a/shared/test/common/helpers/ult_hw_config.h b/shared/test/common/helpers/ult_hw_config.h index 60790d3b58..2316cc9f82 100644 --- a/shared/test/common/helpers/ult_hw_config.h +++ b/shared/test/common/helpers/ult_hw_config.h @@ -14,7 +14,6 @@ struct UltHwConfig { bool forceOsAgnosticMemoryManager = true; bool useWaitForTimestamps = false; bool useBlitSplit = false; - bool useFirstSubmissionInitDevice = false; bool csrFailInitDirectSubmission = false; bool csrBaseCallDirectSubmissionAvailable = false; diff --git a/shared/test/common/mocks/device_ult.cpp b/shared/test/common/mocks/device_ult.cpp index bc8fdf6f1b..bee5e6820c 100644 --- a/shared/test/common/mocks/device_ult.cpp +++ b/shared/test/common/mocks/device_ult.cpp @@ -7,14 +7,11 @@ #include "shared/source/device/device.h" #include "shared/test/common/helpers/ult_hw_config.h" + namespace NEO { bool Device::isBlitSplitEnabled() { return ultHwConfig.useBlitSplit; } -bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) { - return ultHwConfig.useFirstSubmissionInitDevice; -} - } // namespace NEO diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 465e093b76..4caddd2583 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -673,9 +673,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZexNumberOfCssEnvVariableSetAmbig } HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) { - VariableBackup backup(&ultHwConfig); - ultHwConfig.useFirstSubmissionInitDevice = true; - auto hwInfo = *defaultHwInfo; hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0); @@ -690,10 +687,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCrea EXPECT_EQ(1u, csr->peekLatestSentTaskCount()); } -HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, whenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) { - VariableBackup backup(&ultHwConfig); - ultHwConfig.useFirstSubmissionInitDevice = true; - +HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsNotInitializedWithFirstSubmission) { auto hwInfo = *defaultHwInfo; hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0); @@ -704,7 +698,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, whenDeviceCreatesEnginesThenDeviceIsIn auto device = deviceFactory.rootDevices[0]; auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver; - EXPECT_EQ(device->isInitDeviceWithFirstSubmissionSupported(csr->getType()), csr->peekLatestSentTaskCount()); + EXPECT_EQ(0u, csr->peekLatestSentTaskCount()); } TEST(FailDeviceTest, GivenFailedDeviceWhenCreatingDeviceThenNullIsReturned) {