Enable multi-tile task count post-sync writes

Related-To: NEO-6244

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-23 18:13:37 +00:00
committed by Compute-Runtime-Automation
parent 91e9587a07
commit 6091861f3e
13 changed files with 321 additions and 33 deletions

View File

@ -87,8 +87,9 @@ inline void HardwareInterface<GfxFamily>::programWalker(
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
auto idd = &walkerCmd.getInterfaceDescriptor();
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, commandQueue.getDevice().getRootDeviceEnvironment());
}
@ -123,7 +124,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
auto devices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getDeviceBitfield();
auto devices = queueCsr.getOsContext().getDeviceBitfield();
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());
if (partitionWalker) {
@ -137,7 +138,9 @@ inline void HardwareInterface<GfxFamily>::programWalker(
false,
kernel.usesImages(),
workPartitionAllocationGpuVa);
if (queueCsr.isStaticWorkPartitioningEnabled()) {
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount));
}
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
timestampPacket->setPacketsUsed(partitionCount);
} else {

View File

@ -1410,6 +1410,72 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenOpenClWhe
EXPECT_EQ(0u, storeDataImmList.size());
}
struct XeHPAndLaterDispatchWalkerBasicTestDynamicPartition : public XeHPAndLaterDispatchWalkerBasicTest {
void SetUp() override {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableStaticPartitioning.set(0);
DebugManager.flags.EnableWalkerPartition.set(1u);
XeHPAndLaterDispatchWalkerBasicTest::SetUp();
}
void TearDown() override {
XeHPAndLaterDispatchWalkerBasicTest::TearDown();
}
};
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestDynamicPartition, givenDynamicPartitioningWhenEnqueueingKernelThenNoMultipleActivePartitionsSetInCsr) {
if (!OSInterface::osEnableLocalMemory) {
GTEST_SKIP();
}
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
size_t gws[] = {128, 1, 1};
size_t lws[] = {8, 1, 1};
auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver();
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ);
auto computeWalker = reinterpret_cast<typename FamilyType::COMPUTE_WALKER *>(hwParser.cmdWalker);
ASSERT_NE(nullptr, computeWalker);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType());
EXPECT_EQ(8u, computeWalker->getPartitionSize());
}
struct XeHPAndLaterDispatchWalkerBasicTestStaticPartition : public XeHPAndLaterDispatchWalkerBasicTest {
void SetUp() override {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableStaticPartitioning.set(1);
DebugManager.flags.EnableWalkerPartition.set(1u);
XeHPAndLaterDispatchWalkerBasicTest::SetUp();
}
void TearDown() override {
XeHPAndLaterDispatchWalkerBasicTest::TearDown();
}
};
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestStaticPartition, givenStaticPartitioningWhenEnqueueingKernelThenMultipleActivePartitionsAreSetInCsr) {
if (!OSInterface::osEnableLocalMemory) {
GTEST_SKIP();
}
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
size_t gws[] = {128, 1, 1};
size_t lws[] = {8, 1, 1};
auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver();
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
EXPECT_EQ(2u, commandStreamReceiver.activePartitions);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ);
auto computeWalker = reinterpret_cast<typename FamilyType::COMPUTE_WALKER *>(hwParser.cmdWalker);
ASSERT_NE(nullptr, computeWalker);
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType());
EXPECT_EQ(8u, computeWalker->getPartitionSize());
}
using NonDefaultPlatformGpuWalkerTest = XeHPAndLaterDispatchWalkerBasicTest;
HWCMDTEST_F(IGFX_XE_HP_CORE, NonDefaultPlatformGpuWalkerTest, givenNonDefaultPlatformWhenSetupTimestampPacketThenGmmHelperIsTakenFromNonDefaultPlatform) {

View File

@ -684,23 +684,24 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation()));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenMultipleActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOn) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
flushTask(commandStreamReceiver, true);
parseCommands<FamilyType>(commandStream, 0);
struct CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests : public CommandStreamReceiverFlushTaskXeHPAndLaterTests {
void SetUp() override {
DebugManager.flags.CreateMultipleSubDevices.set(2);
parsePipeControl = true;
findHardwareCommands<FamilyType>();
CommandStreamReceiverFlushTaskXeHPAndLaterTests::SetUp();
}
template <typename GfxFamily>
void verifyPipeControl(UltCommandStreamReceiver<GfxFamily> &commandStreamReceiver, uint32_t expectedTaskCount, bool workLoadPartition) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
uint64_t gpuAddressTagAllocation = commandStreamReceiver.getTagAllocation()->getGpuAddress();
uint32_t gpuAddressLow = static_cast<uint32_t>(gpuAddressTagAllocation & 0x0000FFFFFFFFULL);
uint32_t gpuAddressHigh = static_cast<uint32_t>(gpuAddressTagAllocation >> 32);
bool pipeControlTagUpdate = false;
bool pipeControlWorkloadPartition = false;
auto itorPipeControl = pipeControlList.begin();
while (itorPipeControl != pipeControlList.end()) {
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*itorPipeControl);
@ -708,13 +709,161 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
pipeControlTagUpdate = true;
if (pipeControl->getWorkloadPartitionIdOffsetEnable()) {
pipeControlWorkloadPartition = true;
}
EXPECT_EQ(gpuAddressLow, pipeControl->getAddress());
EXPECT_EQ(gpuAddressHigh, pipeControl->getAddressHigh());
EXPECT_EQ(4u, pipeControl->getImmediateData());
EXPECT_EQ(expectedTaskCount, pipeControl->getImmediateData());
break;
}
itorPipeControl++;
}
}
EXPECT_TRUE(pipeControlTagUpdate);
if (workLoadPartition) {
EXPECT_TRUE(pipeControlWorkloadPartition);
} else {
EXPECT_FALSE(pipeControlWorkloadPartition);
}
}
template <typename GfxFamily>
void verifyActivePartitionConfig(UltCommandStreamReceiver<GfxFamily> &commandStreamReceiver, bool activePartitionExists) {
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
uint64_t expectedWparidData = 0u;
if (activePartitionExists) {
expectedWparidData = commandStreamReceiver.getWorkPartitionAllocationGpuAddress();
}
uint32_t expectedWparidRegister = 0x221C;
uint32_t expectedAddressOffsetData = 8;
uint32_t expectedAddressOffsetRegister = 0x23B4;
bool wparidConfiguration = false;
bool addressOffsetConfiguration = false;
auto lrmList = getCommandsList<MI_LOAD_REGISTER_MEM>();
auto itorWparidRegister = lrmList.begin();
while (itorWparidRegister != lrmList.end()) {
auto loadRegisterMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*itorWparidRegister);
if (loadRegisterMem->getRegisterAddress() == expectedWparidRegister) {
wparidConfiguration = true;
EXPECT_EQ(expectedWparidData, loadRegisterMem->getMemoryAddress());
break;
}
itorWparidRegister++;
}
auto itorAddressOffsetRegister = lriList.begin();
while (itorAddressOffsetRegister != lriList.end()) {
auto loadRegisterImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorAddressOffsetRegister);
if (loadRegisterImm->getRegisterOffset() == expectedAddressOffsetRegister) {
addressOffsetConfiguration = true;
EXPECT_EQ(expectedAddressOffsetData, loadRegisterImm->getDataDword());
break;
}
itorAddressOffsetRegister++;
}
if (activePartitionExists) {
EXPECT_TRUE(wparidConfiguration);
EXPECT_TRUE(addressOffsetConfiguration);
} else {
EXPECT_FALSE(wparidConfiguration);
EXPECT_FALSE(addressOffsetConfiguration);
}
}
template <typename GfxFamily>
void prepareLinearStream(LinearStream &parsedStream, size_t offset) {
cmdList.clear();
lriList.clear();
pipeControlList.clear();
parseCommands<GfxFamily>(parsedStream, offset);
findHardwareCommands<GfxFamily>();
}
DebugManagerStateRestore restorer;
};
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleStaticActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
flushTask(commandStreamReceiver, true);
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
commandStreamReceiver.staticWorkPartitioningEnabled = false;
flushTask(commandStreamReceiver, true);
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.activePartitions = 1;
commandStreamReceiver.taskCount = 3;
flushTask(commandStreamReceiver, true);
parseCommands<FamilyType>(commandStream, 0);
parsePipeControl = true;
findHardwareCommands<FamilyType>();
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleStaticActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfigAtSecondFlush) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
flushTask(commandStreamReceiver, true);
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
size_t usedBeforeCmdStream = commandStream.getUsed();
size_t usedBeforeCsrCmdStream = commandStreamReceiver.commandStream.getUsed();
flushTask(commandStreamReceiver, true);
prepareLinearStream<FamilyType>(commandStream, usedBeforeCmdStream);
verifyPipeControl<FamilyType>(commandStreamReceiver, 5, true);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, usedBeforeCsrCmdStream);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
}

View File

@ -1400,3 +1400,13 @@ INSTANTIATE_TEST_CASE_P(BcsDetaliedTest,
::testing::Combine(
::testing::ValuesIn(BlitterProperties),
::testing::Values(BlitterConstants::BlitDirection::HostPtrToBuffer, BlitterConstants::BlitDirection::BufferToHostPtr)));
HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, WhenProgrammingActivePartitionsThenExpectNoAction) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t expectedCmdSize = 0;
EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getCmdSizeForActivePartitionConfig());
size_t usedBefore = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.programActivePartitionConfig();
size_t usedAfter = commandStreamReceiver.commandStream.getUsed();
EXPECT_EQ(usedBefore, usedAfter);
}

View File

@ -44,6 +44,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::makeResident;
using BaseClass::perDssBackedBuffer;
using BaseClass::postInitFlagsSetup;
using BaseClass::programActivePartitionConfig;
using BaseClass::programEnginePrologue;
using BaseClass::programPerDssBackedBuffer;
using BaseClass::programPreamble;
@ -55,6 +56,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::staticWorkPartitioningEnabled;
using BaseClass::wasSubmittedToSingleSubdevice;
using BaseClass::CommandStreamReceiver::activePartitions;
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
using BaseClass::CommandStreamReceiver::canUse4GbHeaps;
using BaseClass::CommandStreamReceiver::checkForNewResources;

View File

@ -302,7 +302,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
if (subCaptureManager->isSubCaptureMode()) {
if (!subCaptureManager->isSubCaptureEnabled()) {
if (this->standalone) {
*this->tagAddress = this->peekLatestSentTaskCount();
volatile uint32_t *pollAddress = this->tagAddress;
for (uint32_t i = 0; i < this->activePartitions; i++) {
*pollAddress = this->peekLatestSentTaskCount();
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
}
}
return true;
}
@ -339,7 +343,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
if (this->standalone) {
*this->tagAddress = this->peekLatestSentTaskCount();
volatile uint32_t *pollAddress = this->tagAddress;
for (uint32_t i = 0; i < this->activePartitions; i++) {
*pollAddress = this->peekLatestSentTaskCount();
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
}
}
if (subCaptureManager->isSubCaptureMode()) {

View File

@ -347,6 +347,7 @@ class CommandStreamReceiver {
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable;
uint32_t activePartitions = 1;
uint32_t activePartitionsConfig = 1;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;

View File

@ -70,6 +70,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
size_t getCmdSizeForActivePartitionConfig() const;
bool isComputeModeNeeded() const;
bool isPipelineSelectAlreadyProgrammed() const;
@ -147,6 +148,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programActivePartitionConfig();
void programEnginePrologue(LinearStream &csr);
size_t getCmdSizeForPrologue() const;

View File

@ -230,7 +230,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
PipeControlArgs args(dispatchFlags.dcFlush);
args.notifyEnable = isUsedNotifyEnableForPostSync();
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
args.workloadPartitionOffset = this->activePartitions > 1;
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStreamTask,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
@ -280,6 +280,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
lastSentNumGrfRequired = dispatchFlags.numGrfRequired;
csrSizeRequestFlags.activePartitionsChanged = this->activePartitionsConfig != this->activePartitions;
this->activePartitionsConfig = this->activePartitions;
if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) {
this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy;
}
@ -356,6 +359,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
programPreemption(commandStreamCSR, dispatchFlags);
programActivePartitionConfig();
bool dshDirty = dshState.updateAndCheck(&dsh);
bool iohDirty = iohState.updateAndCheck(&ioh);
@ -811,6 +815,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
size += getCmdSizeForEpilogue(dispatchFlags);
size += getCmdsSizeForHardwareContext();
size += getCmdSizeForActivePartitionConfig();
if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {

View File

@ -118,4 +118,13 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPerDssBackedBuffer(const
return 0;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
return 0;
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
}
} // namespace NEO

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_hw_base.inl"
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h"
@ -138,4 +139,27 @@ GraphicsAllocation *CommandStreamReceiverHw<GfxFamily>::getClearColorAllocation(
return nullptr;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
return EncodeSetMMIO<GfxFamily>::sizeMEM +
EncodeSetMMIO<GfxFamily>::sizeIMM;
}
return 0;
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress();
EncodeSetMMIO<GfxFamily>::encodeMEM(commandStream,
PartitionRegisters<GfxFamily>::wparidCCSOffset,
workPartitionAddress);
EncodeSetMMIO<GfxFamily>::encodeIMM(commandStream,
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
CommonConstants::partitionAddressOffset,
true);
}
}
} // namespace NEO

View File

@ -123,5 +123,6 @@ struct CsrSizeRequestFlags {
bool hasSharedHandles = false;
bool numGrfRequiredChanged = false;
bool specialPipelineSelectModeChanged = false;
bool activePartitionsChanged = false;
};
} // namespace NEO

View File

@ -468,9 +468,13 @@ template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations() {
this->flushBatchedSubmissions();
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
volatile uint32_t *pollAddress = this->getTagAddress();
for (uint32_t i = 0; i < this->activePartitions; i++) {
while (*pollAddress < this->latestFlushedTaskCount) {
downloadAllocation(*this->getTagAllocation());
}
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
}
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
downloadAllocation(*graphicsAllocation);
@ -535,9 +539,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocation(GraphicsAllocatio
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
volatile uint32_t *pollAddress = this->getTagAddress();
for (uint32_t i = 0; i < this->activePartitions; i++) {
while (*pollAddress < this->latestFlushedTaskCount) {
downloadAllocation(*this->getTagAllocation());
}
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
}
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
downloadAllocation(*graphicsAllocation);
}