mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: add skeleton method to cmdlist immediate flush task
Related-To: NEO-7808 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a6fee8994d
commit
21823af419
@@ -323,6 +323,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void postInitComputeSetup();
|
||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
||||
@@ -194,6 +194,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
|
||||
commandContainer.setNumIddPerBlock(1);
|
||||
this->setupFlushMethod(rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
if (this->immediateCmdListHeapSharing) {
|
||||
|
||||
@@ -153,6 +153,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
ze_result_t executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, CommandQueue *cmdQ);
|
||||
|
||||
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
NEO::CompletionStamp flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
NEO::CompletionStamp flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, NEO::CommandStreamReceiver *csr);
|
||||
|
||||
void checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies);
|
||||
@@ -183,6 +184,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch);
|
||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
|
||||
@@ -109,6 +109,11 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
|
||||
return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
return {NEO::CompletionStamp::getTaskCountFromSubmissionStatusError(NEO::SubmissionStatus::UNSUPPORTED)};
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
NEO::DispatchFlags dispatchFlags(
|
||||
@@ -1155,4 +1160,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
|
||||
return status;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment)) {
|
||||
this->computeFlushMethod = &CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -112,6 +112,14 @@ bool L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(const NEO::RootDeviceE
|
||||
return value && allowPrimary;
|
||||
}
|
||||
|
||||
bool L0GfxCoreHelper::useImmediateComputeFlushTask(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
if (NEO::DebugManager.flags.UseImmediateFlushTask.get() != -1) {
|
||||
return !!(NEO::DebugManager.flags.UseImmediateFlushTask.get());
|
||||
}
|
||||
auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper<L0GfxCoreHelper>();
|
||||
return l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask();
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
template <>
|
||||
|
||||
@@ -51,6 +51,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
||||
static bool useSignalAllEventPackets(const NEO::HardwareInfo &hwInfo);
|
||||
static NEO::HeapAddressModel getHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static bool dispatchCmdListBatchBufferAsPrimary(const NEO::RootDeviceEnvironment &rootDeviceEnvironment, bool allowPrimary);
|
||||
static bool useImmediateComputeFlushTask(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
||||
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
||||
@@ -76,6 +77,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
||||
virtual uint32_t getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
virtual NEO::HeapAddressModel getPlatformHeapAddressModel() const = 0;
|
||||
virtual std::vector<uint32_t> getSupportedNumGrfs() const = 0;
|
||||
virtual bool platformSupportsImmediateComputeFlushTask() const = 0;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelper() = default;
|
||||
@@ -112,6 +114,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
||||
uint32_t getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||
NEO::HeapAddressModel getPlatformHeapAddressModel() const override;
|
||||
std::vector<uint32_t> getSupportedNumGrfs() const override;
|
||||
bool platformSupportsImmediateComputeFlushTask() const override;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelperHw() = default;
|
||||
|
||||
@@ -69,4 +69,9 @@ bool L0GfxCoreHelperHw<Family>::platformSupportsPrimaryBatchBufferCmdList() cons
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0GfxCoreHelperHw<Family>::platformSupportsImmediateComputeFlushTask() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -82,4 +82,9 @@ bool L0GfxCoreHelperHw<Family>::platformSupportsPrimaryBatchBufferCmdList() cons
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0GfxCoreHelperHw<Family>::platformSupportsImmediateComputeFlushTask() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -367,5 +367,11 @@ void PrimaryBatchBufferPreamblelessCmdListFixture::tearDown() {
|
||||
PrimaryBatchBufferCmdListFixture::tearDown();
|
||||
}
|
||||
|
||||
void ImmediateFlushTaskCmdListFixture::setUp() {
|
||||
DebugManager.flags.UseImmediateFlushTask.set(1);
|
||||
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -304,5 +304,9 @@ struct PrimaryBatchBufferPreamblelessCmdListFixture : public PrimaryBatchBufferC
|
||||
std::unique_ptr<L0::ult::CommandList> commandList3;
|
||||
};
|
||||
|
||||
struct ImmediateFlushTaskCmdListFixture : public ModuleMutableCommandListFixture {
|
||||
void setUp();
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1888,5 +1888,17 @@ HWTEST2_F(RayTracingCmdListTest,
|
||||
ultCsr->isMadeResident(rtAllocation, residentCount);
|
||||
}
|
||||
|
||||
using ImmediateFlushTaskCmdListTests = Test<ImmediateFlushTaskCmdListFixture>;
|
||||
|
||||
HWTEST2_F(ImmediateFlushTaskCmdListTests,
|
||||
givenInitialVersionOfImmediateFlushTaskWhenImmediateFlushTaskSelectedThenUnsupportedErrorReturned,
|
||||
IsAtLeastXeHpCore) {
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -889,5 +889,34 @@ TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingDefaultCmdlistPrimary
|
||||
EXPECT_EQ(l0GfxCoreHelper.platformSupportsPrimaryBatchBufferCmdList(), L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, true));
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperOnGenPlatformsWhenGettingPlatformUseImmediateFlushTaskThenReturnFalse, IsAtMostGen12lp) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
|
||||
}
|
||||
|
||||
TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingDefaultUseImmediateFlushTaskThenUsePlatformDefaultSetting) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0].get();
|
||||
auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper<L0GfxCoreHelper>();
|
||||
|
||||
EXPECT_EQ(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask(), L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperUsingOverrideDebugKeyWhenGettingUseImmediateFlushTaskThenUseDbgKeyValue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
const auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0].get();
|
||||
|
||||
DebugManager.flags.UseImmediateFlushTask.set(0);
|
||||
|
||||
EXPECT_FALSE(L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment));
|
||||
|
||||
DebugManager.flags.UseImmediateFlushTask.set(1);
|
||||
|
||||
EXPECT_TRUE(L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -69,5 +69,10 @@ XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForCmd
|
||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPrimaryBatchBufferCmdList());
|
||||
}
|
||||
|
||||
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForPlatformSupportsImmediateFlushTaskThenReturnFalse) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -71,5 +71,10 @@ XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForCmd
|
||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPrimaryBatchBufferCmdList());
|
||||
}
|
||||
|
||||
XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForPlatformSupportsImmediateFlushTaskThenReturnFalse) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1032,6 +1032,8 @@ TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionS
|
||||
return CompletionStamp::outOfDeviceMemory;
|
||||
case SubmissionStatus::FAILED:
|
||||
return CompletionStamp::failed;
|
||||
case SubmissionStatus::UNSUPPORTED:
|
||||
return CompletionStamp::unsupported;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -337,6 +337,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disable
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetAmountOfReusableAllocations, -1, "-1: default, 0:disabled, > 1: enabled. If enabled, driver will fill reusable allocation lists with given amount of command buffers and heaps at initialization of immediate command list.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseHighAlignmentForHeapExtended, -1, "-1: default, 0:disabled, > 1: enabled. If enabled, driver aligns HEAP_EXTENDED allocations to GPU VA that is next power of 2 for a given size, if disables GPU VA is using 2MB/64KB alignment.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DispatchCmdlistCmdBufferPrimary, -1, "-1: default, 0: dispatch command buffers as seconadry, 1: dispatch command buffers as primary and chain")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseImmediateFlushTask, -1, "-1: default, 0: use regular flush task, 1: use immediate flush task")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
|
||||
|
||||
@@ -23,6 +23,7 @@ struct CompletionStamp {
|
||||
FlushStamp flushStamp;
|
||||
|
||||
static constexpr TaskCountType notReady = std::numeric_limits<TaskCountType>::max() - 0xF;
|
||||
static constexpr TaskCountType unsupported = std::numeric_limits<TaskCountType>::max() - 0xE;
|
||||
static constexpr TaskCountType failed = std::numeric_limits<TaskCountType>::max() - 0x6;
|
||||
static constexpr TaskCountType gpuHang = std::numeric_limits<TaskCountType>::max() - 0x5;
|
||||
static constexpr TaskCountType outOfDeviceMemory = std::numeric_limits<TaskCountType>::max() - 0x4;
|
||||
|
||||
@@ -315,6 +315,7 @@ ForceMultiGpuAtomics = -1
|
||||
ForceBufferCompressionFormat = -1
|
||||
ExperimentalSetWalkerPartitionCount = 0
|
||||
EnableStatelessCompressionWithUnifiedMemory = 0
|
||||
UseImmediateFlushTask = -1
|
||||
EnableMultiGpuAtomicsOptimization = 1
|
||||
EnableHwGenerationLocalIds = -1
|
||||
WalkerPartitionPreferHighestDimension = -1
|
||||
|
||||
@@ -2672,6 +2672,7 @@ TEST(CommandStreamReceiverSimpleTest, whenTranslatingSubmissionStatusToTaskCount
|
||||
EXPECT_EQ(CompletionStamp::outOfHostMemory, CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus::OUT_OF_HOST_MEMORY));
|
||||
EXPECT_EQ(CompletionStamp::outOfDeviceMemory, CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus::OUT_OF_MEMORY));
|
||||
EXPECT_EQ(CompletionStamp::failed, CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus::FAILED));
|
||||
EXPECT_EQ(CompletionStamp::unsupported, CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus::UNSUPPORTED));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenFailureOnFlushWhenFlushingBcsTaskThenErrorIsPropagated) {
|
||||
|
||||
Reference in New Issue
Block a user