mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Flush task at device init
Resolves: NEO-7642 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d2628babbc
commit
2abbd82195
@@ -53,6 +53,8 @@ void DriverImp::initialize(ze_result_t *result) {
|
||||
}
|
||||
}
|
||||
|
||||
executionEnvironment->setMetricsEnabled(envVariables.metrics);
|
||||
|
||||
executionEnvironment->incRefInternal();
|
||||
auto neoDevices = NEO::DeviceFactory::createDevices(*executionEnvironment);
|
||||
executionEnvironment->decRefInternal();
|
||||
|
||||
@@ -238,6 +238,17 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenFlushTagUpdateThenSetStallingCmd
|
||||
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenInitializeDeviceWithFirstSubmissionThenMiFlushDwIsFlushed) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
EXPECT_EQ(SubmissionStatus::SUCCESS, bcsCsr->initializeDeviceWithFirstSubmission());
|
||||
|
||||
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0), 0);
|
||||
|
||||
auto cmdFound = expectCommand<MI_FLUSH_DW>(cmdListBcs.begin(), cmdListBcs.end());
|
||||
EXPECT_NE(cmdFound, cmdListBcs.end());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) {
|
||||
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
@@ -884,6 +884,14 @@ TEST(DeviceTest, whenCheckBlitSplitEnabledThenReturnsTrue) {
|
||||
EXPECT_TRUE(Device::isBlitSplitEnabled());
|
||||
}
|
||||
|
||||
TEST(DeviceTest, givenCsrHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) {
|
||||
EXPECT_TRUE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_HW));
|
||||
}
|
||||
|
||||
TEST(DeviceTest, givenCsrNonHwWhenCheckIsInitDeviceWithFirstSubmissionEnabledThenReturnsTrue) {
|
||||
EXPECT_FALSE(Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType::CSR_TBX));
|
||||
}
|
||||
|
||||
TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) {
|
||||
EXPECT_NE(nullptr, platformsImpl);
|
||||
platformsDestructor();
|
||||
|
||||
@@ -1346,9 +1346,13 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.workloadPartitionOffset = isMultiTileOperationEnabled();
|
||||
|
||||
auto &commandStream = getCS(MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation));
|
||||
auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation) + this->getCmdSizeForPrologue();
|
||||
|
||||
auto &commandStream = getCS(dispatchSize);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
this->programEnginePrologue(commandStream);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(commandStream,
|
||||
PostSyncMode::ImmediateData,
|
||||
getTagAllocation()->getGpuAddress(),
|
||||
@@ -1357,6 +1361,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
||||
args);
|
||||
|
||||
makeResident(*tagAllocation);
|
||||
makeResident(*commandStream.getGraphicsAllocation());
|
||||
|
||||
auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart);
|
||||
this->latestFlushedTaskCount = taskCount.load();
|
||||
@@ -1594,11 +1599,7 @@ void CommandStreamReceiverHw<GfxFamily>::createKernelArgsBufferAllocation() {
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
|
||||
auto lock = obtainUniqueOwnership();
|
||||
|
||||
auto &commandStream = getCS(EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferEndSize());
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
return this->flushSmallTask(commandStream, commandStreamStart);
|
||||
return flushTagUpdate();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -383,13 +383,18 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
|
||||
commandStreamReceiver->createKernelArgsBufferAllocation();
|
||||
|
||||
if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isDefaultEngine) {
|
||||
bool defaultEngineAlreadySet = (allEngines.size() > defaultEngineIndex) && (allEngines[defaultEngineIndex].getEngineType() == engineType);
|
||||
|
||||
if (!defaultEngineAlreadySet) {
|
||||
defaultEngineIndex = deviceCsrIndex;
|
||||
|
||||
if (osContext->isDebuggableContext()) {
|
||||
if (osContext->isDebuggableContext() ||
|
||||
this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) {
|
||||
if (SubmissionStatus::SUCCESS != commandStreamReceiver->initializeDeviceWithFirstSubmission()) {
|
||||
return false;
|
||||
}
|
||||
@@ -401,15 +406,12 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
defaultBcsEngineIndex = deviceCsrIndex;
|
||||
}
|
||||
|
||||
if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
EngineControl engine{commandStreamReceiver.get(), osContext};
|
||||
allEngines.push_back(engine);
|
||||
if (engineUsage == EngineUsage::Regular) {
|
||||
addEngineToEngineGroup(engine);
|
||||
}
|
||||
|
||||
commandStreamReceivers.push_back(std::move(commandStreamReceiver));
|
||||
|
||||
return true;
|
||||
@@ -442,6 +444,12 @@ bool Device::isBcsSplitSupported() {
|
||||
return bcsSplit;
|
||||
}
|
||||
|
||||
bool Device::isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType) {
|
||||
return !this->executionEnvironment->areMetricsEnabled() &&
|
||||
getProductHelper().isInitDeviceWithFirstSubmissionRequired(getHardwareInfo()) &&
|
||||
Device::isInitDeviceWithFirstSubmissionEnabled(csrType);
|
||||
}
|
||||
|
||||
double Device::getPlatformHostTimerResolution() const {
|
||||
if (getOSTime()) {
|
||||
return getOSTime()->getHostTimerResolution();
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/helpers/options.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/performance_counters.h"
|
||||
#include "shared/source/utilities/reference_tracked_object.h"
|
||||
@@ -108,7 +109,9 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
RootDeviceEnvironment &getRootDeviceEnvironmentRef() const;
|
||||
bool isFullRangeSvm() const;
|
||||
static bool isBlitSplitEnabled();
|
||||
static bool isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType);
|
||||
bool isBcsSplitSupported();
|
||||
bool isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType);
|
||||
bool areSharedSystemAllocationsAllowed() const;
|
||||
template <typename SpecializedDeviceT>
|
||||
void setSpecializedDevice(SpecializedDeviceT *specializedDevice) {
|
||||
|
||||
@@ -13,4 +13,8 @@ bool Device::isBlitSplitEnabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) {
|
||||
return csrType == CommandStreamReceiverType::CSR_HW;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -38,6 +38,10 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
|
||||
debuggingEnabled = true;
|
||||
}
|
||||
bool isDebuggingEnabled() { return debuggingEnabled; }
|
||||
void setMetricsEnabled(bool value) {
|
||||
this->metricsEnabled = value;
|
||||
}
|
||||
bool areMetricsEnabled() { return this->metricsEnabled; }
|
||||
DirectSubmissionController *initializeDirectSubmissionController();
|
||||
|
||||
std::unique_ptr<MemoryManager> memoryManager;
|
||||
@@ -51,6 +55,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
|
||||
void adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const;
|
||||
void configureNeoEnvironment();
|
||||
bool debuggingEnabled = false;
|
||||
bool metricsEnabled = false;
|
||||
std::unordered_map<uint32_t, uint32_t> rootDeviceNumCcsMap;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/submission_status.h"
|
||||
#include "shared/source/command_stream/task_count_helper.h"
|
||||
|
||||
#include <cstdint>
|
||||
@@ -14,7 +15,6 @@
|
||||
|
||||
namespace NEO {
|
||||
using FlushStamp = uint64_t;
|
||||
enum class SubmissionStatus : uint32_t;
|
||||
struct CompletionStamp {
|
||||
static TaskCountType getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus);
|
||||
|
||||
|
||||
@@ -136,6 +136,7 @@ class ProductHelper {
|
||||
virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const = 0;
|
||||
virtual bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
|
||||
virtual bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const = 0;
|
||||
@@ -280,6 +281,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isTimestampWaitSupportedForEvents() const override;
|
||||
bool isTilePlacementResourceWaRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override;
|
||||
bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const override;
|
||||
|
||||
@@ -479,6 +479,11 @@ bool ProductHelperHw<gfxProduct>::isBlitCopyRequiredForLocalMemory(const RootDev
|
||||
!allocation.isAllocationLockable());
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
|
||||
@@ -176,6 +176,11 @@ bool ProductHelperHw<gfxProduct>::isBlitSplitEnqueueWARequired(const HardwareInf
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &hwInfo) const {
|
||||
return getSteppingFromHwRevId(hwInfo) >= REVISION_B;
|
||||
|
||||
@@ -36,7 +36,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
|
||||
|
||||
// Ensure that global state is restored
|
||||
UltHwConfig expectedState{};
|
||||
static_assert(sizeof(UltHwConfig) == 13 * sizeof(bool), ""); // Ensure that there is no internal padding
|
||||
static_assert(sizeof(UltHwConfig) == 14 * sizeof(bool), ""); // Ensure that there is no internal padding
|
||||
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
||||
|
||||
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));
|
||||
|
||||
@@ -14,6 +14,7 @@ struct UltHwConfig {
|
||||
bool forceOsAgnosticMemoryManager = true;
|
||||
bool useWaitForTimestamps = false;
|
||||
bool useBlitSplit = false;
|
||||
bool useFirstSubmissionInitDevice = false;
|
||||
|
||||
bool csrFailInitDirectSubmission = false;
|
||||
bool csrBaseCallDirectSubmissionAvailable = false;
|
||||
|
||||
@@ -7,11 +7,14 @@
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool Device::isBlitSplitEnabled() {
|
||||
return ultHwConfig.useBlitSplit;
|
||||
}
|
||||
|
||||
bool Device::isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType) {
|
||||
return ultHwConfig.useFirstSubmissionInitDevice;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -673,6 +673,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZexNumberOfCssEnvVariableSetAmbig
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) {
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useFirstSubmissionInitDevice = true;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0);
|
||||
|
||||
@@ -687,7 +690,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenDebuggableOsContextWhenDeviceCrea
|
||||
EXPECT_EQ(1u, csr->peekLatestSentTaskCount());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceCreatesEnginesThenDeviceIsNotInitializedWithFirstSubmission) {
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, whenDeviceCreatesEnginesThenDeviceIsInitializedWithFirstSubmission) {
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useFirstSubmissionInitDevice = true;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, 0);
|
||||
|
||||
@@ -698,7 +704,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceC
|
||||
|
||||
auto device = deviceFactory.rootDevices[0];
|
||||
auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver;
|
||||
EXPECT_EQ(0u, csr->peekLatestSentTaskCount());
|
||||
EXPECT_EQ(device->isInitDeviceWithFirstSubmissionSupported(csr->getType()), csr->peekLatestSentTaskCount());
|
||||
}
|
||||
|
||||
TEST(FailDeviceTest, GivenFailedDeviceWhenCreatingDeviceThenNullIsReturned) {
|
||||
|
||||
Reference in New Issue
Block a user