mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: new multitile post sync layout for immediate write [2/n]
No functional changes in this commit. This is prework. Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
112bbec6e9
commit
3d49658f50
@@ -284,6 +284,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
void appendMultiTileBarrier(NEO::Device &neoDevice);
|
||||
void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds);
|
||||
size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
|
||||
@@ -2320,6 +2320,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
}
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, true);
|
||||
|
||||
if (beforeWalker) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
@@ -2339,9 +2342,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment);
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
}
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -293,4 +293,8 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
|
||||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -481,4 +481,13 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Eve
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -119,6 +119,9 @@ struct Event : _ze_event_handle_t {
|
||||
size_t getSinglePacketSize() const {
|
||||
return singlePacketSize;
|
||||
}
|
||||
void setSinglePacketSize(size_t size) {
|
||||
singlePacketSize = size;
|
||||
}
|
||||
size_t getTimestampSizeInDw() const {
|
||||
return timestampSizeInDw;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
@@ -33,6 +34,10 @@ struct EventImp : public Event {
|
||||
globalEndOffset = NEO::TimestampPackets<TagSizeT>::getGlobalEndOffset();
|
||||
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
|
||||
singlePacketSize = NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
singlePacketSize = sizeof(uint64_t);
|
||||
}
|
||||
}
|
||||
|
||||
~EventImp() override {}
|
||||
|
||||
@@ -30,6 +30,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
|
||||
if (eventPool->isEventPoolTimestampFlagSet()) {
|
||||
event->setEventTimestampFlag(true);
|
||||
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
|
||||
}
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
|
||||
@@ -51,7 +52,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
event->kernelEventCompletionData =
|
||||
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
|
||||
|
||||
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet();
|
||||
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled();
|
||||
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
|
||||
if (overrideUseContextEndOffset != -1) {
|
||||
useContextEndOffset = !!overrideUseContextEndOffset;
|
||||
|
||||
@@ -35,6 +35,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return (NEO::DebugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return ApiSpecificConfig::L0;
|
||||
}
|
||||
|
||||
@@ -1421,6 +1421,52 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
|
||||
EXPECT_EQ(4u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenDebugVariableSetWhenUsingNonTimestampEventThenDontOverridePostSyncMode) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC;
|
||||
eventDesc.index = 0;
|
||||
auto deviceHandle = device->toHandle();
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(device->getDriverHandle(), context, 1, &deviceHandle, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
std::unique_ptr<L0::Event> event(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
EXPECT_FALSE(event->isUsingContextEndOffset());
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
ze_group_count_t groupCount{256, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, hEventHandle, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(4u, event->getPacketsInUse());
|
||||
EXPECT_EQ(4u, commandList->partitionCount);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, commandList->getCmdContainer().getCommandStream()->getCpuBase(), commandList->getCmdContainer().getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
auto cmd = genCmdCast<WALKER_TYPE *>(*itorWalker);
|
||||
ASSERT_NE(nullptr, cmd);
|
||||
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
|
||||
|
||||
auto &postSync = cmd->getPostSync();
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenSetProperPartitionSize, IsAtLeastXeHpCore) {
|
||||
ze_group_count_t groupCount{16, 1, 1};
|
||||
|
||||
|
||||
@@ -827,6 +827,9 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
void testAppendSignalEventForProfiling() {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
bool dynamicAllocSize = (ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset() != ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset());
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute;
|
||||
@@ -860,6 +863,14 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
|
||||
(sizeAfter - sizeBefore)));
|
||||
|
||||
if (dynamicAllocSize) {
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*cmdList.begin());
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
|
||||
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset(), lriCmd->getDataDword());
|
||||
}
|
||||
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
if constexpr (limitEventPacketes == 1) {
|
||||
@@ -887,6 +898,14 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamicAllocSize) {
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*cmdList.rbegin());
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
|
||||
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), lriCmd->getDataDword());
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1304,6 +1323,19 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW
|
||||
testAppendSignalEventForProfiling<gfxCoreFamily>();
|
||||
}
|
||||
|
||||
struct MultiTileCommandListSignalAllocLayoutTest : public MultiTileCommandListSignalAllEventPacketTest {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
MultiTileCommandListSignalAllEventPacketTest::SetUp();
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllocLayoutTest, givenDynamicLayoutEnabledWhenAppendEventForProfilingCalledThenProgramOffsetMmio, IsAtLeastXeHpCore) {
|
||||
EXPECT_NE(ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset());
|
||||
|
||||
testAppendSignalEventForProfiling<gfxCoreFamily>();
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
@@ -2790,6 +2790,39 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsSizeIsQword) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
const ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, 0, 0};
|
||||
|
||||
std::unique_ptr<L0::EventPool> timestampPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
|
||||
eventPoolDesc.flags = 0;
|
||||
std::unique_ptr<L0::EventPool> regularPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
|
||||
ze_event_handle_t timestampEventHandle = nullptr;
|
||||
ze_event_handle_t regularEventHandle = nullptr;
|
||||
|
||||
timestampPool->createEvent(&eventDesc, ×tampEventHandle);
|
||||
regularPool->createEvent(&eventDesc, ®ularEventHandle);
|
||||
|
||||
auto timestampEvent = Event::fromHandle(timestampEventHandle);
|
||||
auto regularEvent = Event::fromHandle(regularEventHandle);
|
||||
|
||||
EXPECT_EQ(NEO::TimestampPackets<typename FamilyType::TimestampPacketType>::getSinglePacketSize(), timestampEvent->getSinglePacketSize());
|
||||
EXPECT_EQ(sizeof(uint64_t), regularEvent->getSinglePacketSize());
|
||||
|
||||
timestampEvent->destroy();
|
||||
regularEvent->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(EventTests,
|
||||
WhenHostEventSyncThenExpectDownloadEventAllocationWithEachQuery) {
|
||||
std::map<GraphicsAllocation *, uint32_t> downloadAllocationTrack;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "level_zero/core/source/compiler_interface/l0_reg_path.h"
|
||||
|
||||
@@ -46,6 +47,16 @@ TEST(ApiSpecificConfigL0Tests, WhenCheckingIfDeviceAllocationCacheIsEnabledThenR
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagSetWhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnTrue) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
EXPECT_TRUE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
}
|
||||
|
||||
TEST(ImplicitScalingApiTests, givenLevelZeroApiUsedThenSupportEnabled) {
|
||||
EXPECT_TRUE(ImplicitScaling::apiSupport);
|
||||
}
|
||||
|
||||
@@ -35,6 +35,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return ApiSpecificConfig::OCL;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,16 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfDeviceAllocationCacheIsEnabledThen
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigOclTests, WhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigOclTests, givenEnableStatelessCompressionWhenProvidingSvmGpuAllocationThenPreferCompressedBuffer) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
|
||||
@@ -313,7 +313,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
|
||||
if (args.partitionCount > 1 && !args.isInternal) {
|
||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||
if (args.eventAddress != 0) {
|
||||
if (args.eventAddress != 0 && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP);
|
||||
}
|
||||
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
@@ -235,6 +236,9 @@ inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStr
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset() {
|
||||
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
return static_cast<uint32_t>(sizeof(uint64_t));
|
||||
}
|
||||
return static_cast<uint32_t>(GfxCoreHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw());
|
||||
}
|
||||
|
||||
|
||||
@@ -242,6 +242,7 @@ DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: defaul
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
|
||||
|
||||
@@ -18,6 +18,7 @@ struct ApiSpecificConfig {
|
||||
static bool getGlobalBindlessHeapConfiguration();
|
||||
static bool getBindlessMode();
|
||||
static bool isDeviceAllocationCacheEnabled();
|
||||
static bool isDynamicPostSyncAllocLayoutEnabled();
|
||||
static ApiType getApiType();
|
||||
static std::string getName();
|
||||
static uint64_t getReducedMaxAllocSize(uint64_t maxAllocSize);
|
||||
|
||||
@@ -533,4 +533,5 @@ OverrideHwIpVersion = -1
|
||||
PrintGlobalTimestampInNs = 0
|
||||
EnableDeviceStateVerification = -1
|
||||
VfBarResourceAllocationWa = 1
|
||||
EnableDynamicPostSyncAllocLayout = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/walker_partition_interface.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/pipe_control_args.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
@@ -1563,3 +1564,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(*bbStartList.begin());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, givenDebugFlagSetWhenCheckingImmWriteOffsetThenReturnQwordSize) {
|
||||
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
|
||||
DebugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
EXPECT_EQ(static_cast<uint32_t>(sizeof(uint64_t)), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
} else {
|
||||
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
}
|
||||
}
|
||||
@@ -55,6 +55,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return (NEO::DebugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return apiTypeForUlts;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user