mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Enable multi-tile task count post-sync writes
Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
91e9587a07
commit
6091861f3e
@@ -302,7 +302,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
if (!subCaptureManager->isSubCaptureEnabled()) {
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
volatile uint32_t *pollAddress = this->tagAddress;
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
*pollAddress = this->peekLatestSentTaskCount();
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -339,7 +343,11 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Resi
|
||||
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
|
||||
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
volatile uint32_t *pollAddress = this->tagAddress;
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
*pollAddress = this->peekLatestSentTaskCount();
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
}
|
||||
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
|
||||
@@ -347,6 +347,7 @@ class CommandStreamReceiver {
|
||||
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
|
||||
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable;
|
||||
uint32_t activePartitions = 1;
|
||||
uint32_t activePartitionsConfig = 1;
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
const DeviceBitfield deviceBitfield;
|
||||
|
||||
@@ -70,6 +70,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
||||
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
|
||||
size_t getCmdSizeForActivePartitionConfig() const;
|
||||
|
||||
bool isComputeModeNeeded() const;
|
||||
bool isPipelineSelectAlreadyProgrammed() const;
|
||||
@@ -147,6 +148,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programActivePartitionConfig();
|
||||
|
||||
void programEnginePrologue(LinearStream &csr);
|
||||
size_t getCmdSizeForPrologue() const;
|
||||
|
||||
@@ -230,7 +230,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
PipeControlArgs args(dispatchFlags.dcFlush);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.workloadPartitionOffset = this->activePartitions > 1;
|
||||
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
commandStreamTask,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
@@ -280,6 +280,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
|
||||
lastSentNumGrfRequired = dispatchFlags.numGrfRequired;
|
||||
|
||||
csrSizeRequestFlags.activePartitionsChanged = this->activePartitionsConfig != this->activePartitions;
|
||||
this->activePartitionsConfig = this->activePartitions;
|
||||
|
||||
if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) {
|
||||
this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy;
|
||||
}
|
||||
@@ -356,6 +359,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
|
||||
|
||||
programPreemption(commandStreamCSR, dispatchFlags);
|
||||
programActivePartitionConfig();
|
||||
|
||||
bool dshDirty = dshState.updateAndCheck(&dsh);
|
||||
bool iohDirty = iohState.updateAndCheck(&ioh);
|
||||
@@ -811,6 +815,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
|
||||
size += getCmdSizeForEpilogue(dispatchFlags);
|
||||
size += getCmdsSizeForHardwareContext();
|
||||
size += getCmdSizeForActivePartitionConfig();
|
||||
|
||||
if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
|
||||
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
|
||||
|
||||
@@ -118,4 +118,13 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPerDssBackedBuffer(const
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_base.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h"
|
||||
@@ -138,4 +139,27 @@ GraphicsAllocation *CommandStreamReceiverHw<GfxFamily>::getClearColorAllocation(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForActivePartitionConfig() const {
|
||||
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
|
||||
return EncodeSetMMIO<GfxFamily>::sizeMEM +
|
||||
EncodeSetMMIO<GfxFamily>::sizeIMM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
|
||||
if (this->staticWorkPartitioningEnabled && csrSizeRequestFlags.activePartitionsChanged) {
|
||||
uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress();
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
workPartitionAddress);
|
||||
EncodeSetMMIO<GfxFamily>::encodeIMM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -123,5 +123,6 @@ struct CsrSizeRequestFlags {
|
||||
bool hasSharedHandles = false;
|
||||
bool numGrfRequiredChanged = false;
|
||||
bool specialPipelineSelectModeChanged = false;
|
||||
bool activePartitionsChanged = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -468,8 +468,12 @@ template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations() {
|
||||
this->flushBatchedSubmissions();
|
||||
|
||||
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
volatile uint32_t *pollAddress = this->getTagAddress();
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
while (*pollAddress < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
}
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
@@ -535,8 +539,12 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocation(GraphicsAllocatio
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
|
||||
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
volatile uint32_t *pollAddress = this->getTagAddress();
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
while (*pollAddress < this->latestFlushedTaskCount) {
|
||||
downloadAllocation(*this->getTagAllocation());
|
||||
}
|
||||
pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset);
|
||||
}
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
downloadAllocation(*graphicsAllocation);
|
||||
|
||||
Reference in New Issue
Block a user