From abb366e3c06e828514b04a614193deba0751596e Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Tue, 24 Jan 2023 11:15:03 +0000 Subject: [PATCH] Flush task at device init Resolves: NEO-7642 Signed-off-by: Lukasz Jobczyk --- .../command_queue/blit_enqueue_1_tests.cpp | 11 +++++++++++ opencl/test/unit_test/linux/main_linux_dll.cpp | 8 ++++++++ .../command_stream_receiver_hw_base.inl | 13 +++++++------ shared/source/device/device.cpp | 17 ++++++++++++----- shared/source/device/device.h | 3 +++ shared/source/dll/device_dll.cpp | 4 ++++ shared/source/helpers/completion_stamp.h | 4 ++-- shared/source/os_interface/hw_info_config.h | 2 ++ shared/source/os_interface/hw_info_config.inl | 5 +++++ .../pvc/os_agnostic_hw_info_config_pvc.inl | 5 +++++ shared/test/common/base_ult_config_listener.cpp | 2 +- shared/test/common/helpers/ult_hw_config.h | 1 + shared/test/common/mocks/device_ult.cpp | 5 ++++- .../test/unit_test/device/neo_device_tests.cpp | 10 ++++++++-- 14 files changed, 73 insertions(+), 17 deletions(-) diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index b20146b4c2..b1ad06cdf6 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -238,6 +238,17 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenFlushTagUpdateThenSetStallingCmd EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds); } +HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenInitializeDeviceWithFirstSubmissionThenMiFlushDwIsFlushed) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + EXPECT_EQ(SubmissionStatus::SUCCESS, bcsCsr->initializeDeviceWithFirstSubmission()); + + auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); + + auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); + EXPECT_NE(cmdFound, cmdListBcs.end()); +} + HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; diff --git a/opencl/test/unit_test/linux/main_linux_dll.cpp b/opencl/test/unit_test/linux/main_linux_dll.cpp index d6bcd46b5d..ca7ddf81ef 100644 --- a/opencl/test/unit_test/linux/main_linux_dll.cpp +++ b/opencl/test/unit_test/linux/main_linux_dll.cpp @@ -884,6 +884,14 @@ TEST(DeviceTest, whenCheckBlitSplitEnabledThenReturnsTrue) { EXPECT_TRUE(Device::isBlitSplitEnabled()); } +TEST(DeviceTest, givenCsrHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) { + EXPECT_TRUE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_HW)); +} + +TEST(DeviceTest, givenCsrNonHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) { + EXPECT_FALSE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_TBX)); +} + TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) { EXPECT_NE(nullptr, platformsImpl); platformsDestructor(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 482ab2ea8f..18b5779fe1 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1346,9 +1346,13 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); - auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, args.tlbInvalidation)); + auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, args.tlbInvalidation) + this->getCmdSizeForPrologue(); + + auto &commandStream = getCS(dispatchSize); auto commandStreamStart = commandStream.getUsed(); + this->programEnginePrologue(commandStream); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(commandStream, PostSyncMode::ImmediateData, getTagAllocation()->getGpuAddress(), @@ -1357,6 +1361,7 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { args); makeResident(*tagAllocation); + makeResident(*commandStream.getGraphicsAllocation()); auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart); this->latestFlushedTaskCount = taskCount.load(); @@ -1594,11 +1599,7 @@ void CommandStreamReceiverHw::createKernelArgsBufferAllocation() { template SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSubmission() { - auto lock = obtainUniqueOwnership(); - - auto &commandStream = getCS(EncodeBatchBufferStartOrEnd::getBatchBufferEndSize()); - auto commandStreamStart = commandStream.getUsed(); - return this->flushSmallTask(commandStream, commandStreamStart); + return flushTagUpdate(); } template diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 5df3983052..27d104197b 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -382,13 +382,18 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa commandStreamReceiver->createKernelArgsBufferAllocation(); + if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { + return false; + } + if (isDefaultEngine) { bool defaultEngineAlreadySet = (allEngines.size() > defaultEngineIndex) && (allEngines[defaultEngineIndex].getEngineType() == engineType); if (!defaultEngineAlreadySet) { defaultEngineIndex = deviceCsrIndex; - if (osContext->isDebuggableContext()) { + if (osContext->isDebuggableContext() || + this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) { if (SubmissionStatus::SUCCESS != commandStreamReceiver->initializeDeviceWithFirstSubmission()) { return false; } @@ -400,15 +405,12 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa defaultBcsEngineIndex = deviceCsrIndex; } - if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { - return false; - } - EngineControl engine{commandStreamReceiver.get(), osContext}; allEngines.push_back(engine); if (engineUsage == EngineUsage::Regular) { addEngineToEngineGroup(engine); } + commandStreamReceivers.push_back(std::move(commandStreamReceiver)); return true; @@ -441,6 +443,11 @@ bool Device::isBcsSplitSupported() { return bcsSplit; } +bool Device::isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType) { + return getProductHelper().isInitDeviceWithFirstSubmissionRequired(getHardwareInfo()) && + Device::isInitDeviceWithFirstSubmissionEnabled(csrType); +} + double Device::getPlatformHostTimerResolution() const { if (getOSTime()) { return getOSTime()->getHostTimerResolution(); diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 8b0aba56be..ef2f9eabd6 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -10,6 +10,7 @@ #include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" +#include "shared/source/helpers/options.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/performance_counters.h" #include "shared/source/utilities/reference_tracked_object.h" @@ -108,7 +109,9 @@ class Device : public ReferenceTrackedObject { RootDeviceEnvironment &getRootDeviceEnvironmentRef() const; bool isFullRangeSvm() const; static bool isBlitSplitEnabled(); + static bool isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType); bool isBcsSplitSupported(); + bool isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType); bool areSharedSystemAllocationsAllowed() const; template void setSpecializedDevice(SpecializedDeviceT *specializedDevice) { diff --git a/shared/source/dll/device_dll.cpp b/shared/source/dll/device_dll.cpp index 8dd274da89..45879d086b 100644 --- a/shared/source/dll/device_dll.cpp +++ b/shared/source/dll/device_dll.cpp @@ -13,4 +13,8 @@ bool Device::isBlitSplitEnabled() { return true; } +bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) { + return csrType == CommandStreamReceiverType::CSR_HW; +} + } // namespace NEO diff --git a/shared/source/helpers/completion_stamp.h b/shared/source/helpers/completion_stamp.h index 1a9e6c15cf..9685e75b3d 100644 --- a/shared/source/helpers/completion_stamp.h +++ b/shared/source/helpers/completion_stamp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/task_count_helper.h" #include @@ -14,7 +15,6 @@ namespace NEO { using FlushStamp = uint64_t; -enum class SubmissionStatus : uint32_t; struct CompletionStamp { static TaskCountType getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus); diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index b6449390c4..d218fda00c 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -135,6 +135,7 @@ class ProductHelper { virtual bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const = 0; virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const = 0; + virtual bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0; @@ -280,6 +281,7 @@ class ProductHelperHw : public ProductHelper { bool isTimestampWaitSupportedForEvents() const override; bool isTilePlacementResourceWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const override; + bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const override; bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override; bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 0b6cd52549..0c9f8665b5 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -469,6 +469,11 @@ bool ProductHelperHw::isBlitSplitEnqueueWARequired(const HardwareInf return false; } +template +bool ProductHelperHw::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const { + return false; +} + template bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const { return allocation.isAllocatedInLocalMemoryPool() && diff --git a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl index 1db347b0b7..0c892d09ef 100644 --- a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl +++ b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl @@ -173,6 +173,11 @@ bool ProductHelperHw::isBlitSplitEnqueueWARequired(const HardwareInf return true; } +template <> +bool ProductHelperHw::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const { + return true; +} + template <> bool ProductHelperHw::isImplicitScalingSupported(const HardwareInfo &hwInfo) const { return getSteppingFromHwRevId(hwInfo) >= REVISION_B; diff --git a/shared/test/common/base_ult_config_listener.cpp b/shared/test/common/base_ult_config_listener.cpp index ac4ed7413e..9bd2cba169 100644 --- a/shared/test/common/base_ult_config_listener.cpp +++ b/shared/test/common/base_ult_config_listener.cpp @@ -36,7 +36,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) { // Ensure that global state is restored UltHwConfig expectedState{}; - static_assert(sizeof(UltHwConfig) == 13 * sizeof(bool), ""); // Ensure that there is no internal padding + static_assert(sizeof(UltHwConfig) == 14 * sizeof(bool), ""); // Ensure that there is no internal padding EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig))); EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM))); diff --git a/shared/test/common/helpers/ult_hw_config.h b/shared/test/common/helpers/ult_hw_config.h index 2316cc9f82..60790d3b58 100644 --- a/shared/test/common/helpers/ult_hw_config.h +++ b/shared/test/common/helpers/ult_hw_config.h @@ -14,6 +14,7 @@ struct UltHwConfig { bool forceOsAgnosticMemoryManager = true; bool useWaitForTimestamps = false; bool useBlitSplit = false; + bool useFirstSubmissionInitDevice = false; bool csrFailInitDirectSubmission = false; bool csrBaseCallDirectSubmissionAvailable = false; diff --git a/shared/test/common/mocks/device_ult.cpp b/shared/test/common/mocks/device_ult.cpp index bee5e6820c..bc8fdf6f1b 100644 --- a/shared/test/common/mocks/device_ult.cpp +++ b/shared/test/common/mocks/device_ult.cpp @@ -7,11 +7,14 @@ #include "shared/source/device/device.h" #include "shared/test/common/helpers/ult_hw_config.h" - namespace NEO { bool Device::isBlitSplitEnabled() { return ultHwConfig.useBlitSplit; } +bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) { + return ultHwConfig.useFirstSubmissionInitDevice; +} + } // namespace NEO diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 4caddd2583..465e093b76 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -673,6 +673,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZexNumberOfCssEnvVariableSetAmbig } HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) { + VariableBackup backup(&ultHwConfig); + ultHwConfig.useFirstSubmissionInitDevice = true; + auto hwInfo = *defaultHwInfo; hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0); @@ -687,7 +690,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCrea EXPECT_EQ(1u, csr->peekLatestSentTaskCount()); } -HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsNotInitializedWithFirstSubmission) { +HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, whenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) { + VariableBackup backup(&ultHwConfig); + ultHwConfig.useFirstSubmissionInitDevice = true; + auto hwInfo = *defaultHwInfo; hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0); @@ -698,7 +704,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceC auto device = deviceFactory.rootDevices[0]; auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver; - EXPECT_EQ(0u, csr->peekLatestSentTaskCount()); + EXPECT_EQ(device->isInitDeviceWithFirstSubmissionSupported(csr->getType()), csr->peekLatestSentTaskCount()); } TEST(FailDeviceTest, GivenFailedDeviceWhenCreatingDeviceThenNullIsReturned) {