refactor: remove not used logic to check dynamic postsync layout
Related-To: NEO-8210 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
2fe3804cc2
commit
df66a0276f
|
@ -520,7 +520,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Eve
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
|
||||
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
|
||||
|
|
|
@ -39,6 +39,9 @@ namespace L0 {
|
|||
template Event *Event::create<uint64_t>(EventPool *, const ze_event_desc_t *, Device *);
|
||||
template Event *Event::create<uint32_t>(EventPool *, const ze_event_desc_t *, Device *);
|
||||
|
||||
template struct EventImp<uint32_t>;
|
||||
template struct EventImp<uint64_t>;
|
||||
|
||||
ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) {
|
||||
this->context = static_cast<ContextImp *>(context);
|
||||
|
||||
|
|
|
@ -26,19 +26,7 @@ class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT, NEO::Ti
|
|||
template <typename TagSizeT>
|
||||
struct EventImp : public Event {
|
||||
|
||||
EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode)
|
||||
: Event(eventPool, index, device), tbxMode(tbxMode) {
|
||||
contextStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextStartOffset();
|
||||
contextEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextEndOffset();
|
||||
globalStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalStartOffset();
|
||||
globalEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalEndOffset();
|
||||
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
|
||||
singlePacketSize = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize();
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
singlePacketSize = sizeof(uint64_t);
|
||||
}
|
||||
}
|
||||
EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode);
|
||||
|
||||
~EventImp() override {}
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
|||
event->kernelEventCompletionData =
|
||||
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
|
||||
|
||||
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled();
|
||||
bool useContextEndOffset = false;
|
||||
int32_t overrideUseContextEndOffset = NEO::debugManager.flags.UseContextEndOffsetForEventCompletion.get();
|
||||
if (overrideUseContextEndOffset != -1) {
|
||||
useContextEndOffset = !!overrideUseContextEndOffset;
|
||||
|
@ -114,6 +114,17 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
|||
return event;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
EventImp<TagSizeT>::EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode)
|
||||
: Event(eventPool, index, device), tbxMode(tbxMode) {
|
||||
contextStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextStartOffset();
|
||||
contextEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getContextEndOffset();
|
||||
globalStartOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalStartOffset();
|
||||
globalEndOffset = NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getGlobalEndOffset();
|
||||
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
|
||||
singlePacketSize = device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset();
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
|
||||
constexpr uint32_t skipL3EventPacketIndex = 2u;
|
||||
|
|
|
@ -86,6 +86,8 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
|||
virtual bool platformSupportsImmediateComputeFlushTask() const = 0;
|
||||
virtual zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const = 0;
|
||||
virtual uint32_t getCmdListWaitOnMemoryDataSize() const = 0;
|
||||
virtual bool hasUnifiedPostSyncAllocationLayout() const = 0;
|
||||
virtual uint32_t getImmediateWritePostSyncOffset() const = 0;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelper() = default;
|
||||
|
@ -125,6 +127,8 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
|||
bool platformSupportsImmediateComputeFlushTask() const override;
|
||||
zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const override;
|
||||
uint32_t getCmdListWaitOnMemoryDataSize() const override;
|
||||
bool hasUnifiedPostSyncAllocationLayout() const override;
|
||||
uint32_t getImmediateWritePostSyncOffset() const override;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelperHw() = default;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
|
||||
|
@ -53,4 +54,14 @@ uint32_t L0GfxCoreHelperHw<Family>::getCmdListWaitOnMemoryDataSize() const {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0GfxCoreHelperHw<Family>::hasUnifiedPostSyncAllocationLayout() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getImmediateWritePostSyncOffset() const {
|
||||
return NEO::ImplicitScalingDispatch<Family>::getImmediateWritePostSyncOffset();
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -37,10 +37,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() != 0);
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return ApiSpecificConfig::L0;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
|
||||
|
@ -307,6 +308,8 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
|||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
auto &rootDeviceEnv = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
eventPoolDesc.count = 2;
|
||||
|
@ -354,7 +357,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
|||
}
|
||||
|
||||
size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false) +
|
||||
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment());
|
||||
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnv);
|
||||
size_t stopRegisters = timestampRegisters + postBarrierSynchronization;
|
||||
|
||||
auto useSizeBefore = cmdListStream->getUsed();
|
||||
|
@ -363,8 +366,10 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
|||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse());
|
||||
|
||||
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
|
||||
|
||||
size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters;
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
if (!unifiedPostSyncLayout) {
|
||||
totaSizedBarrierWithTimestampEvent += 4 * sizeof(MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
|
||||
|
@ -374,7 +379,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
|||
GenCmdList cmdList;
|
||||
|
||||
auto registersSizeToParse = timestampRegisters;
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
if (!unifiedPostSyncLayout) {
|
||||
registersSizeToParse += sizeof(MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
|
||||
|
@ -389,7 +394,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
|||
true);
|
||||
|
||||
auto barrierOffset = timestampRegisters;
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
if (!unifiedPostSyncLayout) {
|
||||
barrierOffset += 2 * sizeof(MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
|
||||
|
|
|
@ -1517,8 +1517,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
|
|||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
|
@ -264,8 +265,14 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
|
|||
commandList->partitionCount = 2;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle()));
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false) +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_IMM));
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false);
|
||||
|
||||
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
|
||||
|
||||
if (!unifiedPostSyncLayout) {
|
||||
expectedSize += (2 * sizeof(MI_LOAD_REGISTER_IMM));
|
||||
}
|
||||
|
||||
size_t usedSize = cmdStream->getUsed() - offset;
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
|
@ -286,10 +293,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
|
|||
auto endLriItor = cmdList.rbegin();
|
||||
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*endLriItor);
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
|
||||
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), lriCmd->getDataDword());
|
||||
if (unifiedPostSyncLayout) {
|
||||
EXPECT_EQ(nullptr, lriCmd);
|
||||
} else {
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
|
||||
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), lriCmd->getDataDword());
|
||||
}
|
||||
}
|
||||
|
||||
event->setEventTimestampFlag(false);
|
||||
|
|
|
@ -1261,14 +1261,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
|
|||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 8;
|
||||
arg.expectedKernelCount = 3;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
@ -1307,14 +1303,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
|
|||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
@ -1446,14 +1438,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
|
|||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
|
|
@ -935,10 +935,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
|
|||
// two kernels and each kernel uses two packets (for two tiles), in total 4
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, input.device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
|
@ -991,15 +988,11 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
|
|||
// kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
// cache flush with event signal
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
if (input.signalAllPackets) {
|
||||
|
@ -1216,10 +1209,7 @@ HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
|
|||
} else {
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = false;
|
||||
input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize();
|
||||
|
|
|
@ -94,21 +94,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd
|
|||
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTests, givenDebugFlagSetWhenCallingRegisterOffsetThenDontProgramMmio, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
|
||||
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
auto &commandContainer = pCommandList->getCmdContainer();
|
||||
|
||||
auto offset = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
pCommandList->appendDispatchOffsetRegister(true, true);
|
||||
|
||||
EXPECT_EQ(offset, commandContainer.getCommandStream()->getUsed());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeControlPriorToNonPipelinedStateCommandIsEnabledThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed, IsAtLeastXeHpCore) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
@ -548,13 +533,9 @@ HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
|
|||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
arg.expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
|
@ -664,7 +645,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
|||
expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() && expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) {
|
||||
if (expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) {
|
||||
expectedWalkerPostSyncOp = 1;
|
||||
}
|
||||
|
||||
|
@ -1356,7 +1337,6 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW
|
|||
|
||||
struct MultiTileCommandListSignalAllocLayoutTest : public MultiTileCommandListSignalAllEventPacketTest {
|
||||
void SetUp() override {
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
MultiTileCommandListSignalAllEventPacketTest::SetUp();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -548,37 +548,6 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolWithDeviceAllocT
|
|||
|
||||
using EventPoolCreateMultiDevice = Test<MultiDeviceFixture>;
|
||||
|
||||
HWTEST_F(EventPoolCreateMultiDevice, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) {
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
|
||||
|
||||
ASSERT_NE(0u, driverHandle->devices.size());
|
||||
auto device = driverHandle->devices[0];
|
||||
|
||||
auto deviceHandle = device->toHandle();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = L0::EventPool::create(device->getDriverHandle(), context, 1, &deviceHandle, &eventPoolDesc, result);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ze_event_desc_t eventDesc = {};
|
||||
ze_event_handle_t hEvent = nullptr;
|
||||
|
||||
result = eventPool->createEvent(&eventDesc, &hEvent);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto eventObj = Event::fromHandle(hEvent);
|
||||
|
||||
constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType);
|
||||
|
||||
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj->getSinglePacketSize());
|
||||
|
||||
eventObj->destroy();
|
||||
|
||||
eventPool->destroy();
|
||||
}
|
||||
|
||||
TEST_F(EventPoolCreateMultiDevice, whenGettingIpcHandleForEventPoolWhenHostShareableMemoryIsFalseThenUnsuportedIsReturned) {
|
||||
uint32_t numEvents = 4;
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
|
@ -3008,30 +2977,12 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
|
|||
EXPECT_EQ(timestampPacketTypeSize * 2, eventObj0->getContextEndOffset());
|
||||
EXPECT_EQ(timestampPacketTypeSize * 3, eventObj0->getGlobalEndOffset());
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
EXPECT_EQ(sizeof(uint64_t), eventObj0->getSinglePacketSize());
|
||||
} else {
|
||||
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize());
|
||||
}
|
||||
EXPECT_EQ(l0GfxCoreHelper.getImmediateWritePostSyncOffset(), eventObj0->getSinglePacketSize());
|
||||
|
||||
auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress());
|
||||
EXPECT_EQ(expectedSize, hostPtrDiff);
|
||||
}
|
||||
|
||||
HWTEST_F(EventSizeTests, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) {
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
eventPool.reset(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
createEvents();
|
||||
|
||||
constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType);
|
||||
|
||||
EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize());
|
||||
}
|
||||
|
||||
HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
|
@ -3094,9 +3045,8 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsSizeIsQword) {
|
||||
HWTEST_F(EventTests, whenCreatingNonTimestampEventsThenPacketsSizeIsQword) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
|
@ -3122,7 +3072,7 @@ HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsS
|
|||
|
||||
auto timestampSinglePacketSize = NEO::TimestampPackets<typename FamilyType::TimestampPacketType, FamilyType::timestampPacketCount>::getSinglePacketSize();
|
||||
EXPECT_EQ(timestampSinglePacketSize, timestampEvent->getSinglePacketSize());
|
||||
EXPECT_EQ(sizeof(uint64_t), regularEvent->getSinglePacketSize());
|
||||
EXPECT_EQ(device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(), regularEvent->getSinglePacketSize());
|
||||
|
||||
timestampEvent->destroy();
|
||||
regularEvent->destroy();
|
||||
|
|
|
@ -43,16 +43,6 @@ TEST(ApiSpecificConfigL0Tests, WhenCheckingIfDeviceAllocationCacheIsEnabledThenR
|
|||
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagSetWhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
EXPECT_TRUE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0);
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagCombinationsGetCorrectSharedAllocPrefetchEnabled) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
|
@ -887,6 +888,20 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperOnGenPlatformsWhenGettingPlat
|
|||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, whenAskingForUnifiedPostSyncAllocLayoutThenReturnFalse) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
EXPECT_FALSE(l0GfxCoreHelper.hasUnifiedPostSyncAllocationLayout());
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, whenAskingForImmediateWritePostSyncOffsetThenReturnValueFromImplicitScalingHelper) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset(), l0GfxCoreHelper.getImmediateWritePostSyncOffset());
|
||||
}
|
||||
|
||||
TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingDefaultUseImmediateFlushTaskThenUsePlatformDefaultSetting) {
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0].get();
|
||||
|
|
|
@ -36,10 +36,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return ApiSpecificConfig::OCL;
|
||||
}
|
||||
|
|
|
@ -45,16 +45,6 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfDeviceAllocationCacheIsEnabledThen
|
|||
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigOclTests, WhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigOclTests, givenEnableStatelessCompressionWhenProvidingSvmGpuAllocationThenPreferCompressedBuffer) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
|
|
|
@ -393,9 +393,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
|
||||
if (args.partitionCount > 1 && !args.isInternal) {
|
||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||
if (args.eventAddress != 0 && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
walkerCmd.getPostSync().setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP);
|
||||
}
|
||||
|
||||
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
||||
walkerCmd,
|
||||
&args.outWalkerPtr,
|
||||
|
|
|
@ -241,10 +241,7 @@ inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStr
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset() {
|
||||
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
return static_cast<uint32_t>(sizeof(uint64_t));
|
||||
}
|
||||
return static_cast<uint32_t>(GfxCoreHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw());
|
||||
return static_cast<uint32_t>(sizeof(uint64_t));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -254,7 +254,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush d
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerificationAfterFailedSubmission, -1, "-1: default, 0: disable, 1: enable check of device state after failed submit on Windows")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintTimestampPacketUsage, -1, "-1: default, 0: Disabled, 1: Print when TSP is allocated, initialized, returned to pool, etc.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeEventBeforeReset, -1, "-1: default, 0: Disabled, 1: Synchronize Event completion on host before calling reset. 2: Synchronize + print extra logs.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients")
|
||||
|
|
|
@ -23,7 +23,6 @@ struct ApiSpecificConfig {
|
|||
static bool getGlobalBindlessHeapConfiguration();
|
||||
static bool getBindlessMode(const ReleaseHelper *);
|
||||
static bool isDeviceAllocationCacheEnabled();
|
||||
static bool isDynamicPostSyncAllocLayoutEnabled();
|
||||
static ApiType getApiType();
|
||||
static std::string getName();
|
||||
static uint64_t getReducedMaxAllocSize(uint64_t maxAllocSize);
|
||||
|
|
|
@ -542,7 +542,6 @@ OverrideHwIpVersion = -1
|
|||
PrintGlobalTimestampInNs = 0
|
||||
EnableDeviceStateVerification = -1
|
||||
VfBarResourceAllocationWa = 1
|
||||
EnableDynamicPostSyncAllocLayout = -1
|
||||
PrintTimestampPacketUsage = -1
|
||||
TrackNumCsrClientsOnSyncPoints = -1
|
||||
EventTimestampRefreshIntervalInMilliSec = -1
|
||||
|
|
|
@ -40,10 +40,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return apiTypeForUlts;
|
||||
}
|
||||
|
|
|
@ -1058,38 +1058,6 @@ struct CommandEncodeStatesImplicitScalingFixtureT : public CommandEncodeStatesFi
|
|||
using CommandEncodeStatesImplicitScalingFixture = CommandEncodeStatesImplicitScalingFixtureT<false, false>;
|
||||
using CommandEncodeStatesImplicitScaling = Test<CommandEncodeStatesImplicitScalingFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling,
|
||||
givenStaticPartitioningWhenNonTimestampEventProvidedThenExpectTimestampComputeWalkerPostSync) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
uint32_t dims[] = {16, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
uint64_t eventAddress = 0xFF112233000;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.eventAddress = eventAddress;
|
||||
dispatchArgs.partitionCount = 2;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
|
||||
size_t usedBuffer = cmdContainer->getCommandStream()->getUsed();
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
|
||||
GenCmdList partitionedWalkerList;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(
|
||||
partitionedWalkerList,
|
||||
cmdContainer->getCommandStream()->getCpuBase(),
|
||||
usedBuffer);
|
||||
|
||||
auto itor = find<DefaultWalkerType *>(partitionedWalkerList.begin(), partitionedWalkerList.end());
|
||||
ASSERT_NE(itor, partitionedWalkerList.end());
|
||||
auto partitionWalkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSync = partitionWalkerCmd->getPostSync();
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation());
|
||||
EXPECT_EQ(eventAddress, postSync.getDestinationAddress());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativeKernelWhenEncodingDispatchKernelThenExpectPartitionSizeEqualWorkgroupSize) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
|
|
@ -1564,15 +1564,3 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(*bbStartList.begin());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, givenDebugFlagSetWhenCheckingImmWriteOffsetThenReturnQwordSize) {
|
||||
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
|
||||
debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1);
|
||||
|
||||
if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
EXPECT_EQ(static_cast<uint32_t>(sizeof(uint64_t)), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
} else {
|
||||
EXPECT_EQ(static_cast<uint32_t>(GfxCoreHelperHw<FamilyType>::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue