From 6d7e2760dcea945db41f27a280b1ca3eaa41988c Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Tue, 3 Sep 2024 09:38:57 +0000 Subject: [PATCH] refactor: correct expectations in level zero tests if heapless enabled 3/n Related-To: NEO-10641 Signed-off-by: Kamil Kopryk --- .../fixtures/in_order_cmd_list_fixture.h | 37 ++++ .../sources/cmdlist/test_in_order_cmdlist.cpp | 159 ++++++++++-------- .../sources/cmdqueue/test_cmdqueue_1.cpp | 6 +- .../test_cmdqueue_enqueue_cmdlist.cpp | 22 ++- .../unit_tests/sources/event/test_event.cpp | 3 + .../unit_tests/sources/memory/test_memory.cpp | 8 + .../command_stream_receiver_hw.h | 2 +- .../libult/ult_command_stream_receiver.h | 7 + 8 files changed, 171 insertions(+), 73 deletions(-) diff --git a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h index 27887e1d39..12cc9b37d7 100644 --- a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h @@ -273,5 +273,42 @@ bool InOrderCmdListFixture::verifyInOrderDependency(GenCmdList::iterator &cmd, u cmd++; return true; } + +struct MultiTileInOrderCmdListFixture : public InOrderCmdListFixture { + void SetUp() override { + NEO::debugManager.flags.CreateMultipleSubDevices.set(partitionCount); + NEO::debugManager.flags.EnableImplicitScaling.set(4); + + InOrderCmdListFixture::SetUp(); + } + + template + DestroyableZeUniquePtr>> createMultiTileImmCmdList() { + auto cmdList = createImmCmdList(); + + cmdList->partitionCount = partitionCount; + + return cmdList; + } + + template + DestroyableZeUniquePtr>> createMultiTileRegularCmdList(bool copyOnly) { + auto cmdList = createRegularCmdList(copyOnly); + + cmdList->partitionCount = partitionCount; + + return cmdList; + } + + const uint32_t partitionCount = 2; +}; + +struct MultiTileSynchronizedDispatchFixture : public MultiTileInOrderCmdListFixture { + void SetUp() override { + NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1); + MultiTileInOrderCmdListFixture::SetUp(); + } +}; + } // namespace ult } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 5a33240a86..126d6e1ee4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -1628,6 +1628,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto immCmdList = createImmCmdList(); + bool heaplessEnabled = immCmdList->isHeaplessModeEnabled(); auto eventPool = createEvents(1, false); events[0]->makeCounterBasedImplicitlyDisabled(); @@ -1656,21 +1657,21 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer offset = cmdStream->getUsed(); immCmdList->appendMemoryCopy(alloc, alloc, 1, eventHandle, 0, nullptr, false, false); - findSemaphores(numSemaphores); // implicit dependency + optional chaining + findSemaphores(heaplessEnabled ? 1 : numSemaphores); // implicit dependency + optional chaining numSemaphores = immCmdList->eventSignalPipeControl(false, immCmdList->getDcFlushRequired(events[0]->isSignalScope())) ? 1 : 0; offset = cmdStream->getUsed(); immCmdList->appendMemoryCopy(alloc, alloc, 1, nullptr, 0, nullptr, false, false); - findSemaphores(numSemaphores); // implicit dependency for Compact event or no semaphores for non-compact + findSemaphores(heaplessEnabled ? 1 : numSemaphores); // implicit dependency for Compact event or no semaphores for non-compact offset = cmdStream->getUsed(); immCmdList->appendMemoryCopyRegion(alloc, ®ion, 1, 1, alloc, ®ion, 1, 1, eventHandle, 0, nullptr, false, false); - findSemaphores(2); // implicit dependency + chaining + findSemaphores(heaplessEnabled ? 1 : 2); // implicit dependency + chaining offset = cmdStream->getUsed(); immCmdList->appendMemoryCopyRegion(alloc, ®ion, 1, 1, alloc, ®ion, 1, 1, nullptr, 0, nullptr, false, false); - findSemaphores(0); // no implicit dependency + findSemaphores(heaplessEnabled ? 1 : 0); // no implicit dependency context->freeMem(alloc); } @@ -1678,6 +1679,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyAndFlushRequiredWhenDispatchingKernelThenProgramSemaphoreOnce, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto immCmdList = createImmCmdList(); + bool heaplessEnabled = immCmdList->isHeaplessModeEnabled(); auto eventPool = createEvents(1, false); events[0]->makeCounterBasedImplicitlyDisabled(); auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -1699,19 +1701,21 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyAndFlushRequiredWh offset = cmdStream->getUsed(); immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventHandle, 0, nullptr, false, false); - findSemaphores(dcFlushRequired ? 1 : 2); // implicit dependency + timestamp chaining + auto nSemaphores = heaplessEnabled ? 1 : (dcFlushRequired ? 1 : 2); + findSemaphores(nSemaphores); // implicit dependency + timestamp chaining offset = cmdStream->getUsed(); immCmdList->appendMemoryCopy(©Data, ©Data, 1, nullptr, 0, nullptr, false, false); - findSemaphores(dcFlushRequired ? 1 : 0); // implicit dependency or already waited on previous call + nSemaphores = heaplessEnabled ? 1 : (dcFlushRequired ? 1 : 0); + findSemaphores(nSemaphores); // implicit dependency or already waited on previous call offset = cmdStream->getUsed(); immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, eventHandle, 0, nullptr, false, false); - findSemaphores(2); // implicit dependency + chaining + findSemaphores(heaplessEnabled ? 1 : 2); // implicit dependency + chaining offset = cmdStream->getUsed(); immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, nullptr, 0, nullptr, false, false); - findSemaphores(0); // no implicit dependency + findSemaphores(heaplessEnabled ? 1 : 0); // no implicit dependency } HWTEST2_F(InOrderCmdListTests, givenEventWithRequiredPipeControlWhenDispatchingCopyThenSignalInOrderAllocation, IsAtLeastXeHpCore) { @@ -4155,10 +4159,15 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); auto eventPool = createEvents(1, false); - auto eventHandle = events[0]->toHandle(); - auto hostAddress = static_cast(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 0; const uint32_t failCounter = 3; @@ -4212,7 +4221,13 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDoingCpuCopyThenPassInfo auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); - auto hostAddress = static_cast(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 3; immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, eventHandle, 0, nullptr, false, false); @@ -4263,7 +4278,13 @@ HWTEST2_F(InOrderCmdListTests, givenProfilingEventWhenDoingCpuCopyThenSetProfili auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); - auto hostAddress = static_cast(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 3; immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, eventHandle0, 0, nullptr, false, false); @@ -4562,7 +4583,13 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - auto hostAddress = static_cast(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 0; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); @@ -4695,34 +4722,7 @@ HWTEST2_F(InOrderCmdListTests, givenImplicitScalingEnabledWhenAskingForExtension EXPECT_EQ(extensionProperties.end(), it); } -struct MultiTileInOrderCmdListTests : public InOrderCmdListTests { - void SetUp() override { - NEO::debugManager.flags.CreateMultipleSubDevices.set(partitionCount); - NEO::debugManager.flags.EnableImplicitScaling.set(4); - - InOrderCmdListTests::SetUp(); - } - - template - DestroyableZeUniquePtr>> createMultiTileImmCmdList() { - auto cmdList = createImmCmdList(); - - cmdList->partitionCount = partitionCount; - - return cmdList; - } - - template - DestroyableZeUniquePtr>> createMultiTileRegularCmdList(bool copyOnly) { - auto cmdList = createRegularCmdList(copyOnly); - - cmdList->partitionCount = partitionCount; - - return cmdList; - } - - const uint32_t partitionCount = 2; -}; +using MultiTileInOrderCmdListTests = MultiTileInOrderCmdListFixture; HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) { uint64_t counterValue = 2; @@ -5192,8 +5192,10 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, false)); EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0)); - *hostAddress0 = 1; - *hostAddress1 = 1; + uint64_t waitvalue = immCmdList->inOrderExecInfo->getCounterValue(); + + *hostAddress0 = waitvalue; + *hostAddress1 = waitvalue; EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0, false)); EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0)); @@ -5330,6 +5332,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa mockCmdQHw->initialize(true, false, false); auto regularCmdList = createRegularCmdList(false); regularCmdList->partitionCount = 2; + bool isHeaplessEnabled = regularCmdList->isHeaplessModeEnabled(); auto cmdStream = regularCmdList->getCmdContainer().getCommandStream(); @@ -5338,10 +5341,14 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - ASSERT_EQ(4u, regularCmdList->inOrderPatchCmds.size()); // Walker + 2x Semaphore + Walker + auto nSemaphores = regularCmdList->inOrderExecInfo->getNumDevicePartitionsToWait(); + auto nWalkers = 2u; + ASSERT_EQ(nWalkers + nSemaphores, regularCmdList->inOrderPatchCmds.size()); // Walker + N x Semaphore + Walker + + auto lastWalkerI = nWalkers + nSemaphores - 1; WalkerVariant walkerVariantFromContainer1 = NEO::UnitTestHelper::getWalkerVariant(regularCmdList->inOrderPatchCmds[0].cmd1); - WalkerVariant walkerVariantFromContainer2 = NEO::UnitTestHelper::getWalkerVariant(regularCmdList->inOrderPatchCmds[3].cmd1); + WalkerVariant walkerVariantFromContainer2 = NEO::UnitTestHelper::getWalkerVariant(regularCmdList->inOrderPatchCmds[lastWalkerI].cmd1); std::visit([](auto &&walker1, auto &&walker2) { ASSERT_NE(nullptr, walker1); ASSERT_NE(nullptr, walker2); @@ -5361,17 +5368,23 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa ASSERT_NE(cmdList.end(), itor); WalkerVariant walkerVariantFromParser2 = NEO::UnitTestHelper::getWalkerVariant(*itor); - EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->getCounterValue()); + EXPECT_EQ(isHeaplessEnabled ? 4u : 2u, regularCmdList->inOrderExecInfo->getCounterValue()); std::visit([&](auto &&walkerFromParser1, auto &&walkerFromParser2, auto &&walkerFromContainer1, auto &&walkerFromContainer2) { auto verifyPatching = [&](uint64_t executionCounter) { auto appendValue = regularCmdList->inOrderExecInfo->getCounterValue() * executionCounter; - EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); - EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); - - EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData()); - EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData()); + if (isHeaplessEnabled) { + EXPECT_EQ(0u, walkerFromContainer1->getPostSync().getImmediateData()); + EXPECT_EQ(0u, walkerFromParser1->getPostSync().getImmediateData()); + EXPECT_EQ(0u, walkerFromContainer2->getPostSync().getImmediateData()); + EXPECT_EQ(0u, walkerFromParser2->getPostSync().getImmediateData()); + } else { + EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); + EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); + EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData()); + EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData()); + } }; regularCmdList->close(); @@ -6606,12 +6619,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeAndNoopWaitEventsAllowedWhenEvent using SynchronizedDispatchTests = InOrderCmdListFixture; -struct MultiTileSynchronizedDispatchTests : public MultiTileInOrderCmdListTests { - void SetUp() override { - NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1); - MultiTileInOrderCmdListTests::SetUp(); - } -}; +using MultiTileSynchronizedDispatchTests = MultiTileSynchronizedDispatchFixture; HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchExtensionWhenCreatingRegularCmdListThenEnableSyncDispatchMode, IsAtLeastSkl) { NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(-1); @@ -7078,7 +7086,8 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending auto itor = cmdList.begin(); if (hasDependencySemaphore) { - for (uint32_t i = 0; i < partitionCount; i++) { + auto nPartition = std::min(immCmdList->inOrderExecInfo->getNumDevicePartitionsToWait(), partitionCount); + for (uint32_t i = 0; i < nPartition; i++) { itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; @@ -7097,7 +7106,9 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending return false; } - auto miAtomic = reinterpret_cast(++miPredicate); + auto itAtomic = find(itor, cmdList.end()); + + auto miAtomic = reinterpret_cast(*itAtomic); EXPECT_EQ(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1, miAtomic->getDwordLength()); EXPECT_EQ(1u, miAtomic->getInlineData()); @@ -7890,15 +7901,15 @@ HWTEST2_F(CopyOffloadInOrderTests, givenTbxModeWhenSyncCalledAlwaysDownloadAlloc if (immCmdList->inOrderExecInfo->isHostStorageDuplicated()) { uint64_t *hostAddress = immCmdList->inOrderExecInfo->getBaseHostAddress(); - *hostAddress = 2; + *hostAddress = 3; } else { auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation(); auto hostAddress = static_cast(deviceAlloc->getUnderlyingBuffer()); - *hostAddress = 2; + *hostAddress = 3; } - *mainQueueCsr->getTagAddress() = 2; - *offloadCsr->getTagAddress() = 2; + *mainQueueCsr->getTagAddress() = 3; + *offloadCsr->getTagAddress() = 3; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); @@ -7924,8 +7935,8 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan auto mainQueueCsr = static_cast *>(immCmdList->getCsr(false)); auto offloadCsr = static_cast *>(immCmdList->getCsr(true)); - *mainQueueCsr->getTagAddress() = 2; - *offloadCsr->getTagAddress() = 2; + *mainQueueCsr->getTagAddress() = 3; + *offloadCsr->getTagAddress() = 3; auto mockAlloc = new MockGraphicsAllocation(); @@ -7936,8 +7947,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan auto eventPool = createEvents(1, false); - auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation(); - auto hostAddress = static_cast(deviceAlloc->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 0; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); @@ -7968,8 +7984,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan EXPECT_NE(mainQueueCsr, offloadCsr); - auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation(); - auto hostAddress = static_cast(deviceAlloc->getUnderlyingBuffer()); + auto inOrderExecInfo = immCmdList->inOrderExecInfo; + uint64_t *hostAddress = nullptr; + if (inOrderExecInfo->isHostStorageDuplicated()) { + hostAddress = inOrderExecInfo->getBaseHostAddress(); + } else { + hostAddress = static_cast(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer()); + } *hostAddress = 0; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index fb703799bd..0f9c2e2fce 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -1903,7 +1903,8 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdLi auto bbStartCmds = findAll(cmdList.begin(), cmdList.end()); - ASSERT_EQ(2u, bbStartCmds.size()); + auto heaplessStateInitEnabled = commandList->heaplessStateInitEnabled; + EXPECT_EQ(heaplessStateInitEnabled ? 1u : 2u, bbStartCmds.size()); for (auto &cmd : bbStartCmds) { auto bbStart = genCmdCast(*cmd); @@ -1948,7 +1949,8 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhe auto bbStartCmds = findAll(cmdList.begin(), cmdList.end()); - EXPECT_EQ(2u, bbStartCmds.size()); + auto heaplessStateInitEnabled = commandList->heaplessStateInitEnabled; + EXPECT_EQ(heaplessStateInitEnabled ? 1u : 2u, bbStartCmds.size()); for (auto &cmd : bbStartCmds) { auto bbStart = genCmdCast(*cmd); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp index e7a11d09dc..b5e23d81d7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp @@ -44,6 +44,9 @@ struct CommandQueueExecuteCommandListsFixture : DeviceFixture { commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle(); ASSERT_NE(nullptr, commandLists[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto commandList = CommandList::fromHandle(commandLists[0]); + this->heaplessStateInit = commandList->isHeaplessStateInitEnabled(); } void tearDown() { @@ -65,6 +68,7 @@ struct CommandQueueExecuteCommandListsFixture : DeviceFixture { const static uint32_t numCommandLists = 2; ze_command_list_handle_t commandLists[numCommandLists]; + bool heaplessStateInit = false; }; using CommandQueueExecuteCommandLists = Test; @@ -908,14 +912,25 @@ void CommandQueueExecuteCommandListsFixture::twoCommandListCommandPreemptionTest } HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce, IsAtLeastSkl) { + if (heaplessStateInit) { + GTEST_SKIP(); + } twoCommandListCommandPreemptionTest(false); } HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce, IsAtLeastSkl) { + if (heaplessStateInit) { + GTEST_SKIP(); + } + twoCommandListCommandPreemptionTest(true); } HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListThenExpectNoPreemptionProgramming) { + if (heaplessStateInit) { + GTEST_SKIP(); + } + using STATE_SIP = typename FamilyType::STATE_SIP; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; @@ -1154,7 +1169,12 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun // 1st call then initialize registers GenCmdList cmdList; - ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), usedSpaceBefore1stExecute), usedSpaceOn1stExecute)); + + if (csr->commandStreamHeaplessStateInit) { + ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(csr->commandStreamHeaplessStateInit->getCpuBase(), 0), csr->commandStreamHeaplessStateInit->getUsed())); + } else { + ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), usedSpaceBefore1stExecute), usedSpaceOn1stExecute)); + } findPartitionRegister(cmdList, true); auto usedSpaceBefore2ndExecute = commandQueue->commandStream.getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 9930394fd4..74b58e3fcd 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -2820,7 +2820,10 @@ struct EventPoolCreateNegativeTest : public ::testing::Test { for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(NEO::defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); + UnitTestSetter::setRcsExposure(*executionEnvironment->rootDeviceEnvironments[i]); + UnitTestSetter::setCcsExposure(*executionEnvironment->rootDeviceEnvironments[i]); } + executionEnvironment->calculateMaxOsContextCount(); std::vector> devices; for (uint32_t i = 0; i < numRootDevices; i++) { diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index e6663284f4..efe812998d 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -5138,11 +5138,19 @@ TEST(MemoryBitfieldTests, givenDeviceWithValidBitfieldWhenAllocatingSharedMemory executionEnvironment->rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique(); + + UnitTestSetter::setRcsExposure(*executionEnvironment->rootDeviceEnvironments[i]); + UnitTestSetter::setCcsExposure(*executionEnvironment->rootDeviceEnvironments[i]); } + executionEnvironment->calculateMaxOsContextCount(); + auto memoryManager = new NEO::MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); + NEO::Device *neoDevice0 = NEO::Device::create(executionEnvironment, 0u); debugManager.flags.CreateMultipleSubDevices.set(4); + + executionEnvironment->calculateMaxOsContextCount(); NEO::Device *neoDevice1 = NEO::Device::create(executionEnvironment, 1u); NEO::DeviceVector devices; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 5a9542310c..b3026618c8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -195,7 +195,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { uint64_t getScratchPatchAddress(); SubmissionStatus programHeaplessProlog(Device &device); - void programHeaplessStateProlog(Device &device, LinearStream &commandStream); + MOCKABLE_VIRTUAL void programHeaplessStateProlog(Device &device, LinearStream &commandStream); void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream); void programComputeModeHeapless(Device &device, LinearStream &commandStream); void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, Device &device); diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 80c83babb2..d933d39e5d 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -254,6 +254,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return BaseClass::initializeDeviceWithFirstSubmission(device); } + void programHeaplessStateProlog(Device &device, LinearStream &commandStream) override { + this->commandStreamHeaplessStateInit = &commandStream; + return BaseClass::programHeaplessStateProlog(device, commandStream); + } + bool writeMemory(GraphicsAllocation &gfxAllocation, bool isChunkCopy, uint64_t gpuVaChunkOffset, size_t chunkSize) override { writeMemoryParams.totalCallCount++; if (isChunkCopy) { @@ -541,6 +546,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ TaskCountType flushBcsTaskReturnValue{}; LinearStream *lastFlushedCommandStream = nullptr; + LinearStream *commandStreamHeaplessStateInit = nullptr; + const IndirectHeap *recordedSsh = nullptr; std::mutex mutex;