refactor: remove not used logic to check dynamic postsync layout

Related-To: NEO-8210

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-12-22 13:31:50 +00:00 committed by Compute-Runtime-Automation
parent 2fe3804cc2
commit df66a0276f
26 changed files with 86 additions and 215 deletions

View File

@ -520,7 +520,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Eve
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);

View File

@ -39,6 +39,9 @@ namespace L0 {
template Event *Event::create<uint64_t>(EventPool *, const ze_event_desc_t *, Device *);
template Event *Event::create<uint32_t>(EventPool *, const ze_event_desc_t *, Device *);
template struct EventImp<uint32_t>;
template struct EventImp<uint64_t>;
ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) {
this->context = static_cast<ContextImp *>(context);

View File

@ -26,19 +26,7 @@ class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT, NEO::Ti
template <typename TagSizeT>
struct EventImp : public Event {
EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode)
: Event(eventPool, index, device), tbxMode(tbxMode) {
contextStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextStartOffset();
contextEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextEndOffset();
globalStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalStartOffset();
globalEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalEndOffset();
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
singlePacketSize = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize();
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
singlePacketSize = sizeof(uint64_t);
}
}
EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode);
~EventImp() override {}

View File

@ -58,7 +58,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->kernelEventCompletionData =
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled();
bool useContextEndOffset = false;
int32_t overrideUseContextEndOffset = NEO::debugManager.flags.UseContextEndOffsetForEventCompletion.get();
if (overrideUseContextEndOffset != -1) {
useContextEndOffset = !!overrideUseContextEndOffset;
@ -114,6 +114,17 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
return event;
}
template <typename TagSizeT>
EventImp<TagSizeT>::EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode)
: Event(eventPool, index, device), tbxMode(tbxMode) {
contextStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextStartOffset();
contextEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextEndOffset();
globalStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalStartOffset();
globalEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalEndOffset();
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
singlePacketSize = device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset();
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
constexpr uint32_t skipL3EventPacketIndex = 2u;

View File

@ -86,6 +86,8 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
virtual bool platformSupportsImmediateComputeFlushTask() const = 0;
virtual zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const = 0;
virtual uint32_t getCmdListWaitOnMemoryDataSize() const = 0;
virtual bool hasUnifiedPostSyncAllocationLayout() const = 0;
virtual uint32_t getImmediateWritePostSyncOffset() const = 0;
protected:
L0GfxCoreHelper() = default;
@ -125,6 +127,8 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
bool platformSupportsImmediateComputeFlushTask() const override;
zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const override;
uint32_t getCmdListWaitOnMemoryDataSize() const override;
bool hasUnifiedPostSyncAllocationLayout() const override;
uint32_t getImmediateWritePostSyncOffset() const override;
protected:
L0GfxCoreHelperHw() = default;

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/gfx_core_helper.h"
@ -53,4 +54,14 @@ uint32_t L0GfxCoreHelperHw<Family>::getCmdListWaitOnMemoryDataSize() const {
}
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::hasUnifiedPostSyncAllocationLayout() const {
return false;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getImmediateWritePostSyncOffset() const {
return NEO::ImplicitScalingDispatch<Family>::getImmediateWritePostSyncOffset();
}
} // namespace L0

View File

@ -37,10 +37,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
return false;
}
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() != 0);
}
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
return ApiSpecificConfig::L0;
}

View File

@ -13,6 +13,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
@ -307,6 +308,8 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
auto &rootDeviceEnv = device->getNEODevice()->getRootDeviceEnvironment();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
eventPoolDesc.count = 2;
@ -354,7 +357,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
}
size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false) +
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment());
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnv);
size_t stopRegisters = timestampRegisters + postBarrierSynchronization;
auto useSizeBefore = cmdListStream->getUsed();
@ -363,8 +366,10 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse());
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
if (!unifiedPostSyncLayout) {
totaSizedBarrierWithTimestampEvent += 4 * sizeof(MI_LOAD_REGISTER_IMM);
}
@ -374,7 +379,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
GenCmdList cmdList;
auto registersSizeToParse = timestampRegisters;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
if (!unifiedPostSyncLayout) {
registersSizeToParse += sizeof(MI_LOAD_REGISTER_IMM);
}
@ -389,7 +394,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
true);
auto barrierOffset = timestampRegisters;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
if (!unifiedPostSyncLayout) {
barrierOffset += 2 * sizeof(MI_LOAD_REGISTER_IMM);
}

View File

@ -1517,8 +1517,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;

View File

@ -14,6 +14,7 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@ -264,8 +265,14 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
commandList->partitionCount = 2;
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle()));
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false) +
(2 * sizeof(MI_LOAD_REGISTER_IMM));
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false);
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
if (!unifiedPostSyncLayout) {
expectedSize += (2 * sizeof(MI_LOAD_REGISTER_IMM));
}
size_t usedSize = cmdStream->getUsed() - offset;
EXPECT_EQ(expectedSize, usedSize);
@ -286,10 +293,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
auto endLriItor = cmdList.rbegin();
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*endLriItor);
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), lriCmd->getDataDword());
if (unifiedPostSyncLayout) {
EXPECT_EQ(nullptr, lriCmd);
} else {
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), lriCmd->getDataDword());
}
}
event->setEventTimestampFlag(false);

View File

@ -1261,14 +1261,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 8;
arg.expectedKernelCount = 3;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedWalkerPostSyncOp = 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
@ -1307,14 +1303,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedWalkerPostSyncOp = 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
@ -1446,14 +1438,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedWalkerPostSyncOp = 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;

View File

@ -935,10 +935,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
// two kernels and each kernel uses two packets (for two tiles), in total 4
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
arg.expectedWalkerPostSyncOp = 1;
arg.expectedPostSyncPipeControls = 0;
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, input.device->getNEODevice()->getRootDeviceEnvironment())) {
@ -991,15 +988,11 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
// kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedWalkerPostSyncOp = 1;
// cache flush with event signal
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
input.eventPoolFlags = 0;
if (input.signalAllPackets) {
@ -1216,10 +1209,7 @@ HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
} else {
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
arg.expectedWalkerPostSyncOp = 1;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = false;
input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize();

View File

@ -94,21 +94,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
}
HWTEST2_F(CommandListTests, givenDebugFlagSetWhenCallingRegisterOffsetThenDontProgramMmio, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
auto &commandContainer = pCommandList->getCmdContainer();
auto offset = commandContainer.getCommandStream()->getUsed();
pCommandList->appendDispatchOffsetRegister(true, true);
EXPECT_EQ(offset, commandContainer.getCommandStream()->getUsed());
}
HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeControlPriorToNonPipelinedStateCommandIsEnabledThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed, IsAtLeastXeHpCore) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@ -548,13 +533,9 @@ HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 4;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedWalkerPostSyncOp = 1;
arg.postSyncAddressZero = false;
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
arg.expectedWalkerPostSyncOp = 1;
}
input.eventPoolFlags = 0;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
@ -664,7 +645,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
expectedWalkerPostSyncOp = 1;
}
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() && expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) {
if (expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) {
expectedWalkerPostSyncOp = 1;
}
@ -1356,7 +1337,6 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW
struct MultiTileCommandListSignalAllocLayoutTest : public MultiTileCommandListSignalAllEventPacketTest {
void SetUp() override {
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
MultiTileCommandListSignalAllEventPacketTest::SetUp();
}
};

View File

@ -548,37 +548,6 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolWithDeviceAllocT
using EventPoolCreateMultiDevice = Test<MultiDeviceFixture>;
HWTEST_F(EventPoolCreateMultiDevice, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) {
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
ASSERT_NE(0u, driverHandle->devices.size());
auto device = driverHandle->devices[0];
auto deviceHandle = device->toHandle();
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
eventPoolDesc.count = 1;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = L0::EventPool::create(device->getDriverHandle(), context, 1, &deviceHandle, &eventPoolDesc, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
ze_event_handle_t hEvent = nullptr;
result = eventPool->createEvent(&eventDesc, &hEvent);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto eventObj = Event::fromHandle(hEvent);
constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType);
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj->getSinglePacketSize());
eventObj->destroy();
eventPool->destroy();
}
TEST_F(EventPoolCreateMultiDevice, whenGettingIpcHandleForEventPoolWhenHostShareableMemoryIsFalseThenUnsuportedIsReturned) {
uint32_t numEvents = 4;
ze_event_pool_desc_t eventPoolDesc = {
@ -3008,30 +2977,12 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
EXPECT_EQ(timestampPacketTypeSize * 2, eventObj0->getContextEndOffset());
EXPECT_EQ(timestampPacketTypeSize * 3, eventObj0->getGlobalEndOffset());
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
EXPECT_EQ(sizeof(uint64_t), eventObj0->getSinglePacketSize());
} else {
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize());
}
EXPECT_EQ(l0GfxCoreHelper.getImmediateWritePostSyncOffset(), eventObj0->getSinglePacketSize());
auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress());
EXPECT_EQ(expectedSize, hostPtrDiff);
}
HWTEST_F(EventSizeTests, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) {
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
ze_result_t result = ZE_RESULT_SUCCESS;
eventPool.reset(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
createEvents();
constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType);
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize());
}
HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto &hwInfo = device->getHwInfo();
@ -3094,9 +3045,8 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn
}
}
HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsSizeIsQword) {
HWTEST_F(EventTests, whenCreatingNonTimestampEventsThenPacketsSizeIsQword) {
DebugManagerStateRestore restore;
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
ze_result_t result = ZE_RESULT_SUCCESS;
@ -3122,7 +3072,7 @@ HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsS
auto timestampSinglePacketSize = NEO::TimestampPackets<typename FamilyType::TimestampPacketType, FamilyType::timestampPacketCount>::getSinglePacketSize();
EXPECT_EQ(timestampSinglePacketSize, timestampEvent->getSinglePacketSize());
EXPECT_EQ(sizeof(uint64_t), regularEvent->getSinglePacketSize());
EXPECT_EQ(device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(), regularEvent->getSinglePacketSize());
timestampEvent->destroy();
regularEvent->destroy();

View File

@ -43,16 +43,6 @@ TEST(ApiSpecificConfigL0Tests, WhenCheckingIfDeviceAllocationCacheIsEnabledThenR
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
}
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagSetWhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) {
DebugManagerStateRestore restore;
EXPECT_TRUE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
}
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagCombinationsGetCorrectSharedAllocPrefetchEnabled) {
DebugManagerStateRestore restore;

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/gfx_core_helper.h"
@ -887,6 +888,20 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperOnGenPlatformsWhenGettingPlat
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
}
HWTEST_F(L0GfxCoreHelperTest, whenAskingForUnifiedPostSyncAllocLayoutThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.hasUnifiedPostSyncAllocationLayout());
}
HWTEST_F(L0GfxCoreHelperTest, whenAskingForImmediateWritePostSyncOffsetThenReturnValueFromImplicitScalingHelper) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), l0GfxCoreHelper.getImmediateWritePostSyncOffset());
}
TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingDefaultUseImmediateFlushTaskThenUsePlatformDefaultSetting) {
MockExecutionEnvironment executionEnvironment;
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0].get();

View File

@ -36,10 +36,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
return false;
}
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
return false;
}
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
return ApiSpecificConfig::OCL;
}

View File

@ -45,16 +45,6 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfDeviceAllocationCacheIsEnabledThen
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
}
TEST(ApiSpecificConfigOclTests, WhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) {
DebugManagerStateRestore restore;
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
}
TEST(ApiSpecificConfigOclTests, givenEnableStatelessCompressionWhenProvidingSvmGpuAllocationThenPreferCompressedBuffer) {
DebugManagerStateRestore dbgRestorer;
debugManager.flags.RenderCompressedBuffersEnabled.set(1);

View File

@ -393,9 +393,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (args.partitionCount > 1 && !args.isInternal) {
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
if (args.eventAddress != 0 && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
walkerCmd.getPostSync().setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP);
}
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
walkerCmd,
&args.outWalkerPtr,

View File

@ -241,10 +241,7 @@ inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStr
template <typename GfxFamily>
inline uint32_t ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset() {
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
return static_cast<uint32_t>(sizeof(uint64_t));
}
return static_cast<uint32_t>(GfxCoreHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw());
return static_cast<uint32_t>(sizeof(uint64_t));
}
template <typename GfxFamily>

View File

@ -254,7 +254,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush d
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerificationAfterFailedSubmission, -1, "-1: default, 0: disable, 1: enable check of device state after failed submit on Windows")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling")
DECLARE_DEBUG_VARIABLE(int32_t, PrintTimestampPacketUsage, -1, "-1: default, 0: Disabled, 1: Print when TSP is allocated, initialized, returned to pool, etc.")
DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeEventBeforeReset, -1, "-1: default, 0: Disabled, 1: Synchronize Event completion on host before calling reset. 2: Synchronize + print extra logs.")
DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients")

View File

@ -23,7 +23,6 @@ struct ApiSpecificConfig {
static bool getGlobalBindlessHeapConfiguration();
static bool getBindlessMode(const ReleaseHelper *);
static bool isDeviceAllocationCacheEnabled();
static bool isDynamicPostSyncAllocLayoutEnabled();
static ApiType getApiType();
static std::string getName();
static uint64_t getReducedMaxAllocSize(uint64_t maxAllocSize);

View File

@ -542,7 +542,6 @@ OverrideHwIpVersion = -1
PrintGlobalTimestampInNs = 0
EnableDeviceStateVerification = -1
VfBarResourceAllocationWa = 1
EnableDynamicPostSyncAllocLayout = -1
PrintTimestampPacketUsage = -1
TrackNumCsrClientsOnSyncPoints = -1
EventTimestampRefreshIntervalInMilliSec = -1

View File

@ -40,10 +40,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
return false;
}
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
}
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
return apiTypeForUlts;
}

View File

@ -1058,38 +1058,6 @@ struct CommandEncodeStatesImplicitScalingFixtureT : public CommandEncodeStatesFi
using CommandEncodeStatesImplicitScalingFixture = CommandEncodeStatesImplicitScalingFixtureT<false, false>;
using CommandEncodeStatesImplicitScaling = Test<CommandEncodeStatesImplicitScalingFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling,
givenStaticPartitioningWhenNonTimestampEventProvidedThenExpectTimestampComputeWalkerPostSync) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
uint32_t dims[] = {16, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false;
uint64_t eventAddress = 0xFF112233000;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
size_t usedBuffer = cmdContainer->getCommandStream()->getUsed();
EXPECT_EQ(2u, dispatchArgs.partitionCount);
GenCmdList partitionedWalkerList;
CmdParse<FamilyType>::parseCommandBuffer(
partitionedWalkerList,
cmdContainer->getCommandStream()->getCpuBase(),
usedBuffer);
auto itor = find<DefaultWalkerType *>(partitionedWalkerList.begin(), partitionedWalkerList.end());
ASSERT_NE(itor, partitionedWalkerList.end());
auto partitionWalkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSync = partitionWalkerCmd->getPostSync();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation());
EXPECT_EQ(eventAddress, postSync.getDestinationAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativeKernelWhenEncodingDispatchKernelThenExpectPartitionSizeEqualWorkgroupSize) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;

View File

@ -1564,15 +1564,3 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(*bbStartList.begin());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, givenDebugFlagSetWhenCheckingImmWriteOffsetThenReturnQwordSize) {
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
EXPECT_EQ(static_cast<uint32_t>(sizeof(uint64_t)), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
} else {
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
}
}