mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Enable multi-tile task count post-sync writes
Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
91e9587a07
commit
6091861f3e
@ -87,8 +87,9 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
||||
auto idd = &walkerCmd.getInterfaceDescriptor();
|
||||
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
if (currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
@ -123,7 +124,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
|
||||
|
||||
auto devices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getDeviceBitfield();
|
||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());
|
||||
|
||||
if (partitionWalker) {
|
||||
@ -137,7 +138,9 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
false,
|
||||
kernel.usesImages(),
|
||||
workPartitionAllocationGpuVa);
|
||||
|
||||
if (queueCsr.isStaticWorkPartitioningEnabled()) {
|
||||
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount));
|
||||
}
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
timestampPacket->setPacketsUsed(partitionCount);
|
||||
} else {
|
||||
|
@ -1410,6 +1410,72 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenOpenClWhe
|
||||
EXPECT_EQ(0u, storeDataImmList.size());
|
||||
}
|
||||
|
||||
struct XeHPAndLaterDispatchWalkerBasicTestDynamicPartition : public XeHPAndLaterDispatchWalkerBasicTest {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
DebugManager.flags.EnableStaticPartitioning.set(0);
|
||||
DebugManager.flags.EnableWalkerPartition.set(1u);
|
||||
|
||||
XeHPAndLaterDispatchWalkerBasicTest::SetUp();
|
||||
}
|
||||
void TearDown() override {
|
||||
XeHPAndLaterDispatchWalkerBasicTest::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestDynamicPartition, givenDynamicPartitioningWhenEnqueueingKernelThenNoMultipleActivePartitionsSetInCsr) {
|
||||
if (!OSInterface::osEnableLocalMemory) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
size_t gws[] = {128, 1, 1};
|
||||
size_t lws[] = {8, 1, 1};
|
||||
auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver();
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
|
||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ);
|
||||
auto computeWalker = reinterpret_cast<typename FamilyType::COMPUTE_WALKER *>(hwParser.cmdWalker);
|
||||
ASSERT_NE(nullptr, computeWalker);
|
||||
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType());
|
||||
EXPECT_EQ(8u, computeWalker->getPartitionSize());
|
||||
}
|
||||
|
||||
struct XeHPAndLaterDispatchWalkerBasicTestStaticPartition : public XeHPAndLaterDispatchWalkerBasicTest {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
DebugManager.flags.EnableStaticPartitioning.set(1);
|
||||
DebugManager.flags.EnableWalkerPartition.set(1u);
|
||||
|
||||
XeHPAndLaterDispatchWalkerBasicTest::SetUp();
|
||||
}
|
||||
void TearDown() override {
|
||||
XeHPAndLaterDispatchWalkerBasicTest::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestStaticPartition, givenStaticPartitioningWhenEnqueueingKernelThenMultipleActivePartitionsAreSetInCsr) {
|
||||
if (!OSInterface::osEnableLocalMemory) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
size_t gws[] = {128, 1, 1};
|
||||
size_t lws[] = {8, 1, 1};
|
||||
auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver();
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitions);
|
||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitions);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ);
|
||||
auto computeWalker = reinterpret_cast<typename FamilyType::COMPUTE_WALKER *>(hwParser.cmdWalker);
|
||||
ASSERT_NE(nullptr, computeWalker);
|
||||
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType());
|
||||
EXPECT_EQ(8u, computeWalker->getPartitionSize());
|
||||
}
|
||||
|
||||
using NonDefaultPlatformGpuWalkerTest = XeHPAndLaterDispatchWalkerBasicTest;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, NonDefaultPlatformGpuWalkerTest, givenNonDefaultPlatformWhenSetupTimestampPacketThenGmmHelperIsTakenFromNonDefaultPlatform) {
|
||||
|
@ -684,23 +684,24 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation()));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenMultipleActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOn) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
flushTask(commandStreamReceiver, true);
|
||||
|
||||
parseCommands<FamilyType>(commandStream, 0);
|
||||
struct CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests : public CommandStreamReceiverFlushTaskXeHPAndLaterTests {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
parsePipeControl = true;
|
||||
findHardwareCommands<FamilyType>();
|
||||
CommandStreamReceiverFlushTaskXeHPAndLaterTests::SetUp();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void verifyPipeControl(UltCommandStreamReceiver<GfxFamily> &commandStreamReceiver, uint32_t expectedTaskCount, bool workLoadPartition) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
uint64_t gpuAddressTagAllocation = commandStreamReceiver.getTagAllocation()->getGpuAddress();
|
||||
uint32_t gpuAddressLow = static_cast<uint32_t>(gpuAddressTagAllocation & 0x0000FFFFFFFFULL);
|
||||
uint32_t gpuAddressHigh = static_cast<uint32_t>(gpuAddressTagAllocation >> 32);
|
||||
|
||||
bool pipeControlTagUpdate = false;
|
||||
bool pipeControlWorkloadPartition = false;
|
||||
|
||||
auto itorPipeControl = pipeControlList.begin();
|
||||
while (itorPipeControl != pipeControlList.end()) {
|
||||
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*itorPipeControl);
|
||||
@ -708,13 +709,161 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
|
||||
pipeControlTagUpdate = true;
|
||||
if (pipeControl->getWorkloadPartitionIdOffsetEnable()) {
|
||||
pipeControlWorkloadPartition = true;
|
||||
}
|
||||
EXPECT_EQ(gpuAddressLow, pipeControl->getAddress());
|
||||
EXPECT_EQ(gpuAddressHigh, pipeControl->getAddressHigh());
|
||||
EXPECT_EQ(4u, pipeControl->getImmediateData());
|
||||
EXPECT_EQ(expectedTaskCount, pipeControl->getImmediateData());
|
||||
break;
|
||||
}
|
||||
itorPipeControl++;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_TRUE(pipeControlTagUpdate);
|
||||
if (workLoadPartition) {
|
||||
EXPECT_TRUE(pipeControlWorkloadPartition);
|
||||
} else {
|
||||
EXPECT_FALSE(pipeControlWorkloadPartition);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void verifyActivePartitionConfig(UltCommandStreamReceiver<GfxFamily> &commandStreamReceiver, bool activePartitionExists) {
|
||||
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
uint64_t expectedWparidData = 0u;
|
||||
if (activePartitionExists) {
|
||||
expectedWparidData = commandStreamReceiver.getWorkPartitionAllocationGpuAddress();
|
||||
}
|
||||
uint32_t expectedWparidRegister = 0x221C;
|
||||
uint32_t expectedAddressOffsetData = 8;
|
||||
uint32_t expectedAddressOffsetRegister = 0x23B4;
|
||||
|
||||
bool wparidConfiguration = false;
|
||||
bool addressOffsetConfiguration = false;
|
||||
|
||||
auto lrmList = getCommandsList<MI_LOAD_REGISTER_MEM>();
|
||||
auto itorWparidRegister = lrmList.begin();
|
||||
while (itorWparidRegister != lrmList.end()) {
|
||||
auto loadRegisterMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*itorWparidRegister);
|
||||
|
||||
if (loadRegisterMem->getRegisterAddress() == expectedWparidRegister) {
|
||||
wparidConfiguration = true;
|
||||
EXPECT_EQ(expectedWparidData, loadRegisterMem->getMemoryAddress());
|
||||
break;
|
||||
}
|
||||
itorWparidRegister++;
|
||||
}
|
||||
|
||||
auto itorAddressOffsetRegister = lriList.begin();
|
||||
while (itorAddressOffsetRegister != lriList.end()) {
|
||||
auto loadRegisterImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorAddressOffsetRegister);
|
||||
|
||||
if (loadRegisterImm->getRegisterOffset() == expectedAddressOffsetRegister) {
|
||||
addressOffsetConfiguration = true;
|
||||
EXPECT_EQ(expectedAddressOffsetData, loadRegisterImm->getDataDword());
|
||||
break;
|
||||
}
|
||||
itorAddressOffsetRegister++;
|
||||
}
|
||||
|
||||
if (activePartitionExists) {
|
||||
EXPECT_TRUE(wparidConfiguration);
|
||||
EXPECT_TRUE(addressOffsetConfiguration);
|
||||
} else {
|
||||
EXPECT_FALSE(wparidConfiguration);
|
||||
EXPECT_FALSE(addressOffsetConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void prepareLinearStream(LinearStream &parsedStream, size_t offset) {
|
||||
cmdList.clear();
|
||||
lriList.clear();
|
||||
pipeControlList.clear();
|
||||
|
||||
parseCommands<GfxFamily>(parsedStream, offset);
|
||||
findHardwareCommands<GfxFamily>();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleStaticActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
|
||||
flushTask(commandStreamReceiver, true);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
commandStreamReceiver.staticWorkPartitioningEnabled = false;
|
||||
flushTask(commandStreamReceiver, true);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.activePartitions = 1;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
flushTask(commandStreamReceiver, true);
|
||||
|
||||
parseCommands<FamilyType>(commandStream, 0);
|
||||
parsePipeControl = true;
|
||||
findHardwareCommands<FamilyType>();
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleStaticActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfigAtSecondFlush) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
|
||||
flushTask(commandStreamReceiver, true);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
|
||||
|
||||
size_t usedBeforeCmdStream = commandStream.getUsed();
|
||||
size_t usedBeforeCsrCmdStream = commandStreamReceiver.commandStream.getUsed();
|
||||
|
||||
flushTask(commandStreamReceiver, true);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, usedBeforeCmdStream);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 5, true);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, usedBeforeCsrCmdStream);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||
}
|
||||
|
@ -1400,3 +1400,13 @@ INSTANTIATE_TEST_CASE_P(BcsDetaliedTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(BlitterProperties),
|
||||
::testing::Values(BlitterConstants::BlitDirection::HostPtrToBuffer, BlitterConstants::BlitDirection::BufferToHostPtr)));
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, WhenProgrammingActivePartitionsThenExpectNoAction) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
size_t expectedCmdSize = 0;
|
||||
EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getCmdSizeForActivePartitionConfig());
|
||||
size_t usedBefore = commandStreamReceiver.commandStream.getUsed();
|
||||
commandStreamReceiver.programActivePartitionConfig();
|
||||
size_t usedAfter = commandStreamReceiver.commandStream.getUsed();
|
||||
EXPECT_EQ(usedBefore, usedAfter);
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::makeResident;
|
||||
using BaseClass::perDssBackedBuffer;
|
||||
using BaseClass::postInitFlagsSetup;
|
||||
using BaseClass::programActivePartitionConfig;
|
||||
using BaseClass::programEnginePrologue;
|
||||
using BaseClass::programPerDssBackedBuffer;
|
||||
using BaseClass::programPreamble;
|
||||
@ -55,6 +56,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::staticWorkPartitioningEnabled;
|
||||
using BaseClass::wasSubmittedToSingleSubdevice;
|
||||
using BaseClass::CommandStreamReceiver::activePartitions;
|
||||
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
|
||||
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
|
||||
using BaseClass::CommandStreamReceiver::canUse4GbHeaps;
|
||||
using BaseClass::CommandStreamReceiver::checkForNewResources;
|
||||
|
@ -302,7 +302,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
if (!subCaptureManager->isSubCaptureEnabled()) {
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
volatile uint32_t *pollAddress = this->tagAddress;
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
*pollAddress = this->peekLatestSentTaskCount();
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -339,7 +343,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
|
||||
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
|
||||
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
volatile uint32_t *pollAddress = this->tagAddress;
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
*pollAddress = this->peekLatestSentTaskCount();
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
}
|
||||
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
|
@ -347,6 +347,7 @@ class CommandStreamReceiver {
|
||||
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
|
||||
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable;
|
||||
uint32_t activePartitions = 1;
|
||||
uint32_t activePartitionsConfig = 1;
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
const DeviceBitfield deviceBitfield;
|
||||
|
@ -70,6 +70,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
||||
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
|
||||
size_t getCmdSizeForActivePartitionConfig() const;
|
||||
|
||||
bool isComputeModeNeeded() const;
|
||||
bool isPipelineSelectAlreadyProgrammed() const;
|
||||
@ -147,6 +148,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programActivePartitionConfig();
|
||||
|
||||
void programEnginePrologue(LinearStream &csr);
|
||||
size_t getCmdSizeForPrologue() const;
|
||||
|
@ -230,7 +230,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
PipeControlArgs args(dispatchFlags.dcFlush);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.workloadPartitionOffset = this->activePartitions > 1;
|
||||
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
commandStreamTask,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
@ -280,6 +280,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
|
||||
lastSentNumGrfRequired = dispatchFlags.numGrfRequired;
|
||||
|
||||
csrSizeRequestFlags.activePartitionsChanged = this->activePartitionsConfig != this->activePartitions;
|
||||
this->activePartitionsConfig = this->activePartitions;
|
||||
|
||||
if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) {
|
||||
this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy;
|
||||
}
|
||||
@ -356,6 +359,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
|
||||
|
||||
programPreemption(commandStreamCSR, dispatchFlags);
|
||||
programActivePartitionConfig();
|
||||
|
||||
bool dshDirty = dshState.updateAndCheck(&dsh);
|
||||
bool iohDirty = iohState.updateAndCheck(&ioh);
|
||||
@ -811,6 +815,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
|
||||
size += getCmdSizeForEpilogue(dispatchFlags);
|
||||
size += getCmdsSizeForHardwareContext();
|
||||
size += getCmdSizeForActivePartitionConfig();
|
||||
|
||||
if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
|
||||
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
|
||||
|
@ -118,4 +118,13 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPerDssBackedBuffer(const
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_base.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h"
|
||||
@ -138,4 +139,27 @@ GraphicsAllocation *CommandStreamReceiverHw<GfxFamily>::getClearColorAllocation(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
|
||||
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
|
||||
return EncodeSetMMIO<GfxFamily>::sizeMEM +
|
||||
EncodeSetMMIO<GfxFamily>::sizeIMM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
|
||||
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
|
||||
uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress();
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
workPartitionAddress);
|
||||
EncodeSetMMIO<GfxFamily>::encodeIMM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -123,5 +123,6 @@ struct CsrSizeRequestFlags {
|
||||
bool hasSharedHandles = false;
|
||||
bool numGrfRequiredChanged = false;
|
||||
bool specialPipelineSelectModeChanged = false;
|
||||
bool activePartitionsChanged = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -468,9 +468,13 @@ template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations() {
|
||||
this->flushBatchedSubmissions();
|
||||
|
||||
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
|
||||
volatile uint32_t *pollAddress = this->getTagAddress();
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
while (*pollAddress < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
}
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
downloadAllocation(*graphicsAllocation);
|
||||
@ -535,9 +539,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocation(GraphicsAllocatio
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
|
||||
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
|
||||
volatile uint32_t *pollAddress = this->getTagAddress();
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
while (*pollAddress < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
}
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
downloadAllocation(*graphicsAllocation);
|
||||
}
|
||||
|
Reference in New Issue
Block a user