Unify memory layout for all multi tile post sync operations

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-11-30 14:41:26 +00:00
committed by Compute-Runtime-Automation
parent 63116e4100
commit 3e1023fa1a
38 changed files with 145 additions and 63 deletions

View File

@@ -99,7 +99,7 @@ HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledTh
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmd->getRegisterOffset()));
EXPECT_EQ(CommonConstants::partitionAddressOffset, static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getPostSyncOffset(), static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(true, lriCmd->getMmioRemapEnable());
auto result = commandList->close();

View File

@@ -1613,7 +1613,7 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
}
static constexpr size_t tagSize = 64;
static constexpr size_t tagSize = 128;
static volatile uint32_t tagAddressData[tagSize];
uint32_t waitForComplitionCalledTimes = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
@@ -1697,16 +1697,21 @@ HWTEST_F(CommandQueueSynchronizeTest, givenDebugOverrideEnabledWhenCallToSynchro
L0::CommandQueue::fromHandle(commandQueue)->destroy();
}
HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenCallingSynchronizeThenExpectTheSameNumberCsrSynchronizeCalls, IsWithinXeGfxFamily) {
HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenCallingSynchronizeThenExpectTheSameNumberCsrSynchronizeCalls, IsAtLeastXeHpCore) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto csr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(neoDevice->getDefaultEngine().commandStreamReceiver);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr->createPreemptionAllocation();
}
EXPECT_NE(0u, csr->getPostSyncWriteOffset());
volatile uint32_t *tagAddress = csr->getTagAddress();
for (uint32_t i = 0; i < 2; i++) {
*tagAddress = 0xFF;
tagAddress = ptrOffset(tagAddress, 8);
tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset());
}
csr->activePartitions = 2u;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
@@ -1729,20 +1734,22 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenC
uint64_t timeout = std::numeric_limits<uint64_t>::max();
commandQueue->synchronize(timeout);
EXPECT_EQ(2u, csr->activePartitions);
L0::CommandQueue::fromHandle(commandQueue)->destroy();
}
HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartitionWhenExecutingCmdListOnNewCmdQueueThenExpectCmdPartitionCountMatchCsrActivePartitions, IsWithinXeGfxFamily) {
HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartitionWhenExecutingCmdListOnNewCmdQueueThenExpectCmdPartitionCountMatchCsrActivePartitions, IsAtLeastXeHpCore) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto csr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(neoDevice->getDefaultEngine().commandStreamReceiver);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr->createPreemptionAllocation();
}
EXPECT_NE(0u, csr->getPostSyncWriteOffset());
volatile uint32_t *tagAddress = csr->getTagAddress();
for (uint32_t i = 0; i < 2; i++) {
*tagAddress = 0xFF;
tagAddress = ptrOffset(tagAddress, 8);
tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset());
}
csr->activePartitions = 2u;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,

View File

@@ -43,7 +43,8 @@ TEST_F(FenceTest, whenQueryingStatusThenCsrAllocationsAreDownloaded) {
}
TEST_F(FenceTest, whenQueryingStatusWithoutCsrAndFenceUnsignaledThenReturnsNotReady) {
Mock<CommandQueue> cmdQueue(device, nullptr);
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
Mock<CommandQueue> cmdQueue(device, csr.get());
auto fence = Fence::create(&cmdQueue, nullptr);
EXPECT_NE(nullptr, fence);
@@ -126,7 +127,7 @@ TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutNonZeroAn
TEST_F(FenceSynchronizeTest, givenMultiplePartitionsWhenFenceIsResetThenAllPartitionFenceStatesAreReset) {
std::unique_ptr<MockCommandStreamReceiver> csr = nullptr;
csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->postSyncWriteOffset = 16;
Mock<CommandQueue> cmdQueue(device, csr.get());
auto fence = whitebox_cast(Fence::create(&cmdQueue, nullptr));
@@ -136,13 +137,13 @@ TEST_F(FenceSynchronizeTest, givenMultiplePartitionsWhenFenceIsResetThenAllParti
for (uint32_t i = 0; i < 16; i++) {
EXPECT_EQ(Fence::STATE_CLEARED, *hostAddr);
hostAddr = ptrOffset(hostAddr, 8);
hostAddr = ptrOffset(hostAddr, 16);
}
hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
fence->partitionCount = 2;
*hostAddr = Fence::STATE_SIGNALED;
hostAddr = ptrOffset(hostAddr, 8);
hostAddr = ptrOffset(hostAddr, 16);
*hostAddr = Fence::STATE_SIGNALED;
ze_result_t result = fence->reset();
@@ -151,7 +152,7 @@ TEST_F(FenceSynchronizeTest, givenMultiplePartitionsWhenFenceIsResetThenAllParti
hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
for (uint32_t i = 0; i < 16; i++) {
EXPECT_EQ(Fence::STATE_CLEARED, *hostAddr);
hostAddr = ptrOffset(hostAddr, 8);
hostAddr = ptrOffset(hostAddr, 16);
}
EXPECT_EQ(1u, fence->partitionCount);