Disable kernel timestamp when not using implicit scaling

Related-To: LOCI-2826

Signed-off-by: Lu, Wenbin <wenbin.lu@intel.com>
This commit is contained in:
Lu, Wenbin
2023-03-24 21:26:48 +00:00
committed by Compute-Runtime-Automation
parent 3e5101424d
commit c66546df73
7 changed files with 59 additions and 20 deletions

View File

@@ -971,10 +971,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 3;
arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1;
arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
@@ -1014,10 +1013,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1;
arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
@@ -1163,10 +1161,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1;
arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;

View File

@@ -458,11 +458,10 @@ HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
givenAppendKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1;
arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1;
arg.postSyncAddressZero = false;
input.eventPoolFlags = 0;
@@ -595,8 +594,6 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
using OPERATION = typename POSTSYNC_DATA::OPERATION;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto &l0GfxCoreHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
@@ -634,7 +631,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
auto firstWalker = itorWalkers[0];
uint32_t expectedWalkerPostSyncOp = 3;
if (multiTile == 0 && eventPoolFlags == 0 && !l0GfxCoreHelper.multiTileCapablePlatform()) {
if (multiTile == 0 && eventPoolFlags == 0 && !eventPool->isImplicitScalingCapableFlagSet()) {
expectedWalkerPostSyncOp = 1;
}
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);

View File

@@ -408,6 +408,42 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndIsH
driverHandle->setMemoryManager(curMemoryManager);
}
TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenIsImplicitScalingCapableReturnedInHandle) {
uint32_t numEvents = 2;
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
nullptr,
ZE_EVENT_POOL_FLAG_IPC,
numEvents};
auto deviceHandle = device->toHandle();
ze_result_t result = ZE_RESULT_SUCCESS;
auto curMemoryManager = driverHandle->getMemoryManager();
MemoryManagerEventPoolIpcMock *mockMemoryManager = new MemoryManagerEventPoolIpcMock(*neoDevice->executionEnvironment);
driverHandle->setMemoryManager(mockMemoryManager);
auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, eventPool);
ze_ipc_event_pool_handle_t ipcHandle = {};
ze_result_t res = eventPool->getIpcHandle(&ipcHandle);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
auto &ipcHandleData = *reinterpret_cast<IpcEventPoolData *>(ipcHandle.data);
constexpr uint64_t expectedHandle = static_cast<uint64_t>(-1);
EXPECT_NE(expectedHandle, ipcHandleData.handle);
EXPECT_EQ(ipcHandleData.numEvents, 2u);
EXPECT_EQ(ipcHandleData.numDevices, 1u);
EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, device->isImplicitScalingCapable());
EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, eventPool->isImplicitScalingCapableFlagSet());
res = eventPool->destroy();
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
delete mockMemoryManager;
driverHandle->setMemoryManager(curMemoryManager);
}
TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndNumDevicesReturnedInHandle) {
uint32_t numEvents = 4;
ze_event_pool_desc_t eventPoolDesc = {
@@ -1285,7 +1321,7 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
auto event = std::unique_ptr<L0::Event>(l0GfxCoreHelper.createEvent(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event);
if (l0GfxCoreHelper.multiTileCapablePlatform()) {
if (eventPool->isImplicitScalingCapableFlagSet()) {
EXPECT_TRUE(event->isUsingContextEndOffset());
} else {
EXPECT_FALSE(event->isUsingContextEndOffset());
@@ -1349,9 +1385,7 @@ HWTEST2_F(EventCreate, givenPlatformSupportMultTileWhenDebugKeyIsSetToNotUseCont
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.set(0);
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform();
EXPECT_TRUE(useContextEndOffset);
EXPECT_TRUE(l0GfxCoreHelper.multiTileCapablePlatform());
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
@@ -1387,8 +1421,7 @@ HWTEST2_F(EventCreate, givenPlatformNotSupportsMultTileWhenDebugKeyIsSetToUseCon
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.set(1);
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform();
EXPECT_FALSE(useContextEndOffset);
EXPECT_FALSE(l0GfxCoreHelper.multiTileCapablePlatform());
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,

View File

@@ -62,8 +62,11 @@ HWTEST2_F(EventPoolIPCHandleHpcCoreTests, whenGettingIpcHandleForEventPoolWithDe
EXPECT_NE(expectedHandle, ipcHandleData.handle);
EXPECT_EQ(numEvents, ipcHandleData.numEvents);
EXPECT_EQ(0u, ipcHandleData.rootDeviceIndex);
EXPECT_EQ(1u, ipcHandleData.numDevices);
EXPECT_TRUE(ipcHandleData.isDeviceEventPoolAllocation);
EXPECT_TRUE(ipcHandleData.isHostVisibleEventPoolAllocation);
EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, device->isImplicitScalingCapable());
EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, eventPool->isImplicitScalingCapableFlagSet());
res = eventPool->destroy();
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
@@ -114,4 +117,4 @@ HWTEST2_F(EventPoolIPCHandleHpcCoreTests, whenOpeningIpcHandleForEventPoolWithHo
}
} // namespace ult
} // namespace L0
} // namespace L0