refactor: correct expectations in level zero tests if heapless enabled 3/n

Related-To: NEO-10641
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2024-09-03 09:38:57 +00:00 committed by Compute-Runtime-Automation
parent 7adfa344c0
commit 6d7e2760dc
8 changed files with 171 additions and 73 deletions

View File

@ -273,5 +273,42 @@ bool InOrderCmdListFixture::verifyInOrderDependency(GenCmdList::iterator &cmd, u
cmd++;
return true;
}
struct MultiTileInOrderCmdListFixture : public InOrderCmdListFixture {
void SetUp() override {
NEO::debugManager.flags.CreateMultipleSubDevices.set(partitionCount);
NEO::debugManager.flags.EnableImplicitScaling.set(4);
InOrderCmdListFixture::SetUp();
}
template <GFXCORE_FAMILY gfxCoreFamily>
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createMultiTileImmCmdList() {
auto cmdList = createImmCmdList<gfxCoreFamily>();
cmdList->partitionCount = partitionCount;
return cmdList;
}
template <GFXCORE_FAMILY gfxCoreFamily>
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamily<gfxCoreFamily>>> createMultiTileRegularCmdList(bool copyOnly) {
auto cmdList = createRegularCmdList<gfxCoreFamily>(copyOnly);
cmdList->partitionCount = partitionCount;
return cmdList;
}
const uint32_t partitionCount = 2;
};
struct MultiTileSynchronizedDispatchFixture : public MultiTileInOrderCmdListFixture {
void SetUp() override {
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
MultiTileInOrderCmdListFixture::SetUp();
}
};
} // namespace ult
} // namespace L0

View File

@ -1628,6 +1628,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
bool heaplessEnabled = immCmdList->isHeaplessModeEnabled();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
@ -1656,21 +1657,21 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(alloc, alloc, 1, eventHandle, 0, nullptr, false, false);
findSemaphores(numSemaphores); // implicit dependency + optional chaining
findSemaphores(heaplessEnabled ? 1 : numSemaphores); // implicit dependency + optional chaining
numSemaphores = immCmdList->eventSignalPipeControl(false, immCmdList->getDcFlushRequired(events[0]->isSignalScope())) ? 1 : 0;
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(alloc, alloc, 1, nullptr, 0, nullptr, false, false);
findSemaphores(numSemaphores); // implicit dependency for Compact event or no semaphores for non-compact
findSemaphores(heaplessEnabled ? 1 : numSemaphores); // implicit dependency for Compact event or no semaphores for non-compact
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopyRegion(alloc, &region, 1, 1, alloc, &region, 1, 1, eventHandle, 0, nullptr, false, false);
findSemaphores(2); // implicit dependency + chaining
findSemaphores(heaplessEnabled ? 1 : 2); // implicit dependency + chaining
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopyRegion(alloc, &region, 1, 1, alloc, &region, 1, 1, nullptr, 0, nullptr, false, false);
findSemaphores(0); // no implicit dependency
findSemaphores(heaplessEnabled ? 1 : 0); // no implicit dependency
context->freeMem(alloc);
}
@ -1678,6 +1679,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer
HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyAndFlushRequiredWhenDispatchingKernelThenProgramSemaphoreOnce, IsAtLeastXeHpCore) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
bool heaplessEnabled = immCmdList->isHeaplessModeEnabled();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@ -1699,19 +1701,21 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyAndFlushRequiredWh
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(&copyData, &copyData, 1, eventHandle, 0, nullptr, false, false);
findSemaphores(dcFlushRequired ? 1 : 2); // implicit dependency + timestamp chaining
auto nSemaphores = heaplessEnabled ? 1 : (dcFlushRequired ? 1 : 2);
findSemaphores(nSemaphores); // implicit dependency + timestamp chaining
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(&copyData, &copyData, 1, nullptr, 0, nullptr, false, false);
findSemaphores(dcFlushRequired ? 1 : 0); // implicit dependency or already waited on previous call
nSemaphores = heaplessEnabled ? 1 : (dcFlushRequired ? 1 : 0);
findSemaphores(nSemaphores); // implicit dependency or already waited on previous call
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopyRegion(&copyData, &region, 1, 1, &copyData, &region, 1, 1, eventHandle, 0, nullptr, false, false);
findSemaphores(2); // implicit dependency + chaining
findSemaphores(heaplessEnabled ? 1 : 2); // implicit dependency + chaining
offset = cmdStream->getUsed();
immCmdList->appendMemoryCopyRegion(&copyData, &region, 1, 1, &copyData, &region, 1, 1, nullptr, 0, nullptr, false, false);
findSemaphores(0); // no implicit dependency
findSemaphores(heaplessEnabled ? 1 : 0); // no implicit dependency
}
HWTEST2_F(InOrderCmdListTests, givenEventWithRequiredPipeControlWhenDispatchingCopyThenSignalInOrderAllocation, IsAtLeastXeHpCore) {
@ -4155,10 +4159,15 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 0;
const uint32_t failCounter = 3;
@ -4212,7 +4221,13 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDoingCpuCopyThenPassInfo
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 3;
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, eventHandle, 0, nullptr, false, false);
@ -4263,7 +4278,13 @@ HWTEST2_F(InOrderCmdListTests, givenProfilingEventWhenDoingCpuCopyThenSetProfili
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 3;
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, eventHandle0, 0, nullptr, false, false);
@ -4562,7 +4583,13 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@ -4695,34 +4722,7 @@ HWTEST2_F(InOrderCmdListTests, givenImplicitScalingEnabledWhenAskingForExtension
EXPECT_EQ(extensionProperties.end(), it);
}
struct MultiTileInOrderCmdListTests : public InOrderCmdListTests {
void SetUp() override {
NEO::debugManager.flags.CreateMultipleSubDevices.set(partitionCount);
NEO::debugManager.flags.EnableImplicitScaling.set(4);
InOrderCmdListTests::SetUp();
}
template <GFXCORE_FAMILY gfxCoreFamily>
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createMultiTileImmCmdList() {
auto cmdList = createImmCmdList<gfxCoreFamily>();
cmdList->partitionCount = partitionCount;
return cmdList;
}
template <GFXCORE_FAMILY gfxCoreFamily>
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamily<gfxCoreFamily>>> createMultiTileRegularCmdList(bool copyOnly) {
auto cmdList = createRegularCmdList<gfxCoreFamily>(copyOnly);
cmdList->partitionCount = partitionCount;
return cmdList;
}
const uint32_t partitionCount = 2;
};
using MultiTileInOrderCmdListTests = MultiTileInOrderCmdListFixture;
HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
@ -5192,8 +5192,10 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, false));
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0));
*hostAddress0 = 1;
*hostAddress1 = 1;
uint64_t waitvalue = immCmdList->inOrderExecInfo->getCounterValue();
*hostAddress0 = waitvalue;
*hostAddress1 = waitvalue;
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0));
@ -5330,6 +5332,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa
mockCmdQHw->initialize(true, false, false);
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
regularCmdList->partitionCount = 2;
bool isHeaplessEnabled = regularCmdList->isHeaplessModeEnabled();
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
@ -5338,10 +5341,14 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(4u, regularCmdList->inOrderPatchCmds.size()); // Walker + 2x Semaphore + Walker
auto nSemaphores = regularCmdList->inOrderExecInfo->getNumDevicePartitionsToWait();
auto nWalkers = 2u;
ASSERT_EQ(nWalkers + nSemaphores, regularCmdList->inOrderPatchCmds.size()); // Walker + N x Semaphore + Walker
auto lastWalkerI = nWalkers + nSemaphores - 1;
WalkerVariant walkerVariantFromContainer1 = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(regularCmdList->inOrderPatchCmds[0].cmd1);
WalkerVariant walkerVariantFromContainer2 = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(regularCmdList->inOrderPatchCmds[3].cmd1);
WalkerVariant walkerVariantFromContainer2 = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(regularCmdList->inOrderPatchCmds[lastWalkerI].cmd1);
std::visit([](auto &&walker1, auto &&walker2) {
ASSERT_NE(nullptr, walker1);
ASSERT_NE(nullptr, walker2);
@ -5361,17 +5368,23 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa
ASSERT_NE(cmdList.end(), itor);
WalkerVariant walkerVariantFromParser2 = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(*itor);
EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->getCounterValue());
EXPECT_EQ(isHeaplessEnabled ? 4u : 2u, regularCmdList->inOrderExecInfo->getCounterValue());
std::visit([&](auto &&walkerFromParser1, auto &&walkerFromParser2, auto &&walkerFromContainer1, auto &&walkerFromContainer2) {
auto verifyPatching = [&](uint64_t executionCounter) {
auto appendValue = regularCmdList->inOrderExecInfo->getCounterValue() * executionCounter;
EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData());
EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData());
EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData());
EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData());
if (isHeaplessEnabled) {
EXPECT_EQ(0u, walkerFromContainer1->getPostSync().getImmediateData());
EXPECT_EQ(0u, walkerFromParser1->getPostSync().getImmediateData());
EXPECT_EQ(0u, walkerFromContainer2->getPostSync().getImmediateData());
EXPECT_EQ(0u, walkerFromParser2->getPostSync().getImmediateData());
} else {
EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData());
EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData());
EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData());
EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData());
}
};
regularCmdList->close();
@ -6606,12 +6619,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeAndNoopWaitEventsAllowedWhenEvent
using SynchronizedDispatchTests = InOrderCmdListFixture;
struct MultiTileSynchronizedDispatchTests : public MultiTileInOrderCmdListTests {
void SetUp() override {
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
MultiTileInOrderCmdListTests::SetUp();
}
};
using MultiTileSynchronizedDispatchTests = MultiTileSynchronizedDispatchFixture;
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchExtensionWhenCreatingRegularCmdListThenEnableSyncDispatchMode, IsAtLeastSkl) {
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(-1);
@ -7078,7 +7086,8 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
auto itor = cmdList.begin();
if (hasDependencySemaphore) {
for (uint32_t i = 0; i < partitionCount; i++) {
auto nPartition = std::min(immCmdList->inOrderExecInfo->getNumDevicePartitionsToWait(), partitionCount);
for (uint32_t i = 0; i < nPartition; i++) {
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
@ -7097,7 +7106,9 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
return false;
}
auto miAtomic = reinterpret_cast<MI_ATOMIC *>(++miPredicate);
auto itAtomic = find<MI_ATOMIC *>(itor, cmdList.end());
auto miAtomic = reinterpret_cast<MI_ATOMIC *>(*itAtomic);
EXPECT_EQ(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1, miAtomic->getDwordLength());
EXPECT_EQ(1u, miAtomic->getInlineData());
@ -7890,15 +7901,15 @@ HWTEST2_F(CopyOffloadInOrderTests, givenTbxModeWhenSyncCalledAlwaysDownloadAlloc
if (immCmdList->inOrderExecInfo->isHostStorageDuplicated()) {
uint64_t *hostAddress = immCmdList->inOrderExecInfo->getBaseHostAddress();
*hostAddress = 2;
*hostAddress = 3;
} else {
auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation();
auto hostAddress = static_cast<uint64_t *>(deviceAlloc->getUnderlyingBuffer());
*hostAddress = 2;
*hostAddress = 3;
}
*mainQueueCsr->getTagAddress() = 2;
*offloadCsr->getTagAddress() = 2;
*mainQueueCsr->getTagAddress() = 3;
*offloadCsr->getTagAddress() = 3;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@ -7924,8 +7935,8 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan
auto mainQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(false));
auto offloadCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(true));
*mainQueueCsr->getTagAddress() = 2;
*offloadCsr->getTagAddress() = 2;
*mainQueueCsr->getTagAddress() = 3;
*offloadCsr->getTagAddress() = 3;
auto mockAlloc = new MockGraphicsAllocation();
@ -7936,8 +7947,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan
auto eventPool = createEvents<FamilyType>(1, false);
auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation();
auto hostAddress = static_cast<uint64_t *>(deviceAlloc->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@ -7968,8 +7984,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan
EXPECT_NE(mainQueueCsr, offloadCsr);
auto deviceAlloc = immCmdList->inOrderExecInfo->getDeviceCounterAllocation();
auto hostAddress = static_cast<uint64_t *>(deviceAlloc->getUnderlyingBuffer());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t *hostAddress = nullptr;
if (inOrderExecInfo->isHostStorageDuplicated()) {
hostAddress = inOrderExecInfo->getBaseHostAddress();
} else {
hostAddress = static_cast<uint64_t *>(inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
}
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);

View File

@ -1903,7 +1903,8 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdLi
auto bbStartCmds = findAll<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(2u, bbStartCmds.size());
auto heaplessStateInitEnabled = commandList->heaplessStateInitEnabled;
EXPECT_EQ(heaplessStateInitEnabled ? 1u : 2u, bbStartCmds.size());
for (auto &cmd : bbStartCmds) {
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*cmd);
@ -1948,7 +1949,8 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhe
auto bbStartCmds = findAll<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(2u, bbStartCmds.size());
auto heaplessStateInitEnabled = commandList->heaplessStateInitEnabled;
EXPECT_EQ(heaplessStateInitEnabled ? 1u : 2u, bbStartCmds.size());
for (auto &cmd : bbStartCmds) {
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*cmd);

View File

@ -44,6 +44,9 @@ struct CommandQueueExecuteCommandListsFixture : DeviceFixture {
commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle();
ASSERT_NE(nullptr, commandLists[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto commandList = CommandList::fromHandle(commandLists[0]);
this->heaplessStateInit = commandList->isHeaplessStateInitEnabled();
}
void tearDown() {
@ -65,6 +68,7 @@ struct CommandQueueExecuteCommandListsFixture : DeviceFixture {
const static uint32_t numCommandLists = 2;
ze_command_list_handle_t commandLists[numCommandLists];
bool heaplessStateInit = false;
};
using CommandQueueExecuteCommandLists = Test<CommandQueueExecuteCommandListsFixture>;
@ -908,14 +912,25 @@ void CommandQueueExecuteCommandListsFixture::twoCommandListCommandPreemptionTest
}
HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce, IsAtLeastSkl) {
if (heaplessStateInit) {
GTEST_SKIP();
}
twoCommandListCommandPreemptionTest<FamilyType>(false);
}
HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce, IsAtLeastSkl) {
if (heaplessStateInit) {
GTEST_SKIP();
}
twoCommandListCommandPreemptionTest<FamilyType>(true);
}
HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListThenExpectNoPreemptionProgramming) {
if (heaplessStateInit) {
GTEST_SKIP();
}
using STATE_SIP = typename FamilyType::STATE_SIP;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@ -1154,7 +1169,12 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
// 1st call then initialize registers
GenCmdList cmdList;
ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), usedSpaceBefore1stExecute), usedSpaceOn1stExecute));
if (csr->commandStreamHeaplessStateInit) {
ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(csr->commandStreamHeaplessStateInit->getCpuBase(), 0), csr->commandStreamHeaplessStateInit->getUsed()));
} else {
ASSERT_TRUE(Parse::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), usedSpaceBefore1stExecute), usedSpaceOn1stExecute));
}
findPartitionRegister<FamilyType>(cmdList, true);
auto usedSpaceBefore2ndExecute = commandQueue->commandStream.getUsed();

View File

@ -2820,7 +2820,10 @@ struct EventPoolCreateNegativeTest : public ::testing::Test {
for (uint32_t i = 0; i < numRootDevices; i++) {
executionEnvironment->rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(NEO::defaultHwInfo.get());
executionEnvironment->rootDeviceEnvironments[i]->initGmm();
UnitTestSetter::setRcsExposure(*executionEnvironment->rootDeviceEnvironments[i]);
UnitTestSetter::setCcsExposure(*executionEnvironment->rootDeviceEnvironments[i]);
}
executionEnvironment->calculateMaxOsContextCount();
std::vector<std::unique_ptr<NEO::Device>> devices;
for (uint32_t i = 0; i < numRootDevices; i++) {

View File

@ -5138,11 +5138,19 @@ TEST(MemoryBitfieldTests, givenDeviceWithValidBitfieldWhenAllocatingSharedMemory
executionEnvironment->rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(defaultHwInfo.get());
executionEnvironment->rootDeviceEnvironments[i]->initGmm();
executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique<MockMemoryOperations>();
UnitTestSetter::setRcsExposure(*executionEnvironment->rootDeviceEnvironments[i]);
UnitTestSetter::setCcsExposure(*executionEnvironment->rootDeviceEnvironments[i]);
}
executionEnvironment->calculateMaxOsContextCount();
auto memoryManager = new NEO::MockMemoryManager(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
NEO::Device *neoDevice0 = NEO::Device::create<RootDevice>(executionEnvironment, 0u);
debugManager.flags.CreateMultipleSubDevices.set(4);
executionEnvironment->calculateMaxOsContextCount();
NEO::Device *neoDevice1 = NEO::Device::create<RootDevice>(executionEnvironment, 1u);
NEO::DeviceVector devices;

View File

@ -195,7 +195,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
uint64_t getScratchPatchAddress();
SubmissionStatus programHeaplessProlog(Device &device);
void programHeaplessStateProlog(Device &device, LinearStream &commandStream);
MOCKABLE_VIRTUAL void programHeaplessStateProlog(Device &device, LinearStream &commandStream);
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, Device &device);

View File

@ -254,6 +254,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return BaseClass::initializeDeviceWithFirstSubmission(device);
}
void programHeaplessStateProlog(Device &device, LinearStream &commandStream) override {
this->commandStreamHeaplessStateInit = &commandStream;
return BaseClass::programHeaplessStateProlog(device, commandStream);
}
bool writeMemory(GraphicsAllocation &gfxAllocation, bool isChunkCopy, uint64_t gpuVaChunkOffset, size_t chunkSize) override {
writeMemoryParams.totalCallCount++;
if (isChunkCopy) {
@ -541,6 +546,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
TaskCountType flushBcsTaskReturnValue{};
LinearStream *lastFlushedCommandStream = nullptr;
LinearStream *commandStreamHeaplessStateInit = nullptr;
const IndirectHeap *recordedSsh = nullptr;
std::mutex mutex;