mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
performance: use resource_barrier on Xe2 and PTL
Related-To: NEO-14943 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6b33e62d15
commit
0db5ce22a1
@@ -26,7 +26,7 @@ size_t EncodeComputeMode<Family>::getCmdSizeForComputeMode(const RootDeviceEnvir
|
||||
}
|
||||
size += sizeof(typename Family::STATE_COMPUTE_MODE);
|
||||
if (hasSharedHandles) {
|
||||
size += MemorySynchronizationCommands<Family>::getSizeForSingleBarrier();
|
||||
size += MemorySynchronizationCommands<Family>::getSizeForStallingBarrier();
|
||||
}
|
||||
if (productHelper.is3DPipelineSelectWARequired() && isRcs) {
|
||||
size += (2 * PreambleHelper<Family>::getCmdSizeForPipelineSelect(rootDeviceEnvironment));
|
||||
|
||||
@@ -831,7 +831,8 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
}
|
||||
|
||||
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
size += MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
|
||||
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
}
|
||||
|
||||
return size;
|
||||
|
||||
@@ -101,6 +101,7 @@ struct Gen12LpFamily : public Gen12Lp {
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint32_t;
|
||||
using StallingBarrierType = PIPE_CONTROL;
|
||||
static const GPGPU_WALKER cmdInitGpgpuWalker;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad;
|
||||
|
||||
@@ -491,6 +491,7 @@ struct MemorySynchronizationCommands {
|
||||
static void setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args);
|
||||
static void addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args);
|
||||
static void setSingleBarrier(void *commandsBuffer, PipeControlArgs &args);
|
||||
static void setStallingBarrier(void *commandsBuffer, PipeControlArgs &args);
|
||||
|
||||
static void addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
|
||||
static void setBarrierWithPostSyncOperation(void *&commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
|
||||
@@ -525,6 +526,7 @@ struct MemorySynchronizationCommands {
|
||||
static size_t getSizeForSingleAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t getSizeForAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t getSizeForInstructionCacheFlush();
|
||||
static size_t getSizeForStallingBarrier();
|
||||
|
||||
static bool isBarrierWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static bool isBarrierPriorToPipelineSelectWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
@@ -236,25 +236,40 @@ void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBu
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
|
||||
auto barrier = commandStream.getSpace(MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier());
|
||||
auto barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
if (args.csStallOnly) {
|
||||
barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
|
||||
}
|
||||
auto barrier = commandStream.getSpace(barrierSize);
|
||||
|
||||
setSingleBarrier(barrier, postSyncMode, gpuAddress, immediateData, args);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
|
||||
void MemorySynchronizationCommands<GfxFamily>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
|
||||
|
||||
pipeControl.setCommandStreamerStallEnable(true);
|
||||
setBarrierExtraProperties(&pipeControl, args);
|
||||
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
if (args.csStallOnly) {
|
||||
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
|
||||
setStallingBarrier(commandsBuffer, args);
|
||||
return;
|
||||
}
|
||||
|
||||
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
|
||||
|
||||
pipeControl.setCommandStreamerStallEnable(true);
|
||||
setBarrierExtraProperties(&pipeControl, args);
|
||||
|
||||
pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable);
|
||||
pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable);
|
||||
pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable);
|
||||
@@ -335,7 +350,7 @@ void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffe
|
||||
additionalArgs.csStallOnly = true;
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(commandsBuffer, additionalArgs);
|
||||
commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
|
||||
commandsBuffer = ptrOffset(commandsBuffer, getSizeForStallingBarrier());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -384,7 +399,7 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(const RootD
|
||||
size = getSizeForSingleBarrier() +
|
||||
getSizeForSingleAdditionalSynchronization(NEO::FenceType::release, rootDeviceEnvironment);
|
||||
} else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) {
|
||||
size = getSizeForSingleBarrier();
|
||||
size = getSizeForStallingBarrier();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@@ -851,6 +866,11 @@ bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier() {
|
||||
return sizeof(typename GfxFamily::StallingBarrierType);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t GfxCoreHelperHw<Family>::getQueuePriorityLevels() const {
|
||||
return 2;
|
||||
|
||||
@@ -291,6 +291,17 @@ uint32_t GfxCoreHelperHw<Family>::getDeviceTimestampWidth() const {
|
||||
return 64u;
|
||||
};
|
||||
|
||||
template <>
|
||||
void MemorySynchronizationCommands<Family>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
|
||||
using RESOURCE_BARRIER = typename Family::RESOURCE_BARRIER;
|
||||
|
||||
auto resourceBarrier = Family::cmdInitResourceBarrier;
|
||||
resourceBarrier.setBarrierType(RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
|
||||
resourceBarrier.setWaitStage(RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
|
||||
resourceBarrier.setSignalStage(RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
|
||||
*reinterpret_cast<RESOURCE_BARRIER *>(commandsBuffer) = resourceBarrier;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -108,6 +108,7 @@ struct Xe2HpgCoreFamily : public Xe2HpgCore {
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint64_t;
|
||||
using StallingBarrierType = RESOURCE_BARRIER;
|
||||
static const COMPUTE_WALKER cmdInitGpgpuWalker;
|
||||
static const CFE_STATE cmdInitCfeState;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
|
||||
@@ -259,6 +259,18 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount
|
||||
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
|
||||
return (threadsPerThreadGroup * simd);
|
||||
}
|
||||
|
||||
template <>
|
||||
void MemorySynchronizationCommands<Family>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
|
||||
using RESOURCE_BARRIER = typename Family::RESOURCE_BARRIER;
|
||||
|
||||
auto resourceBarrier = Family::cmdInitResourceBarrier;
|
||||
resourceBarrier.setBarrierType(RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
|
||||
resourceBarrier.setWaitStage(RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
|
||||
resourceBarrier.setSignalStage(RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
|
||||
*reinterpret_cast<RESOURCE_BARRIER *>(commandsBuffer) = resourceBarrier;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -110,6 +110,7 @@ struct Xe3CoreFamily : public Xe3Core {
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint64_t;
|
||||
using StallingBarrierType = RESOURCE_BARRIER;
|
||||
static const COMPUTE_WALKER cmdInitGpgpuWalker;
|
||||
static const CFE_STATE cmdInitCfeState;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
|
||||
@@ -90,6 +90,7 @@ struct XeHpcCoreFamily : public XeHpcCore {
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint32_t;
|
||||
using StallingBarrierType = PIPE_CONTROL;
|
||||
static const COMPUTE_WALKER cmdInitGpgpuWalker;
|
||||
static const CFE_STATE cmdInitCfeState;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
|
||||
@@ -110,6 +110,7 @@ struct XeHpgCoreFamily : public XeHpgCore {
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint32_t;
|
||||
using StallingBarrierType = PIPE_CONTROL;
|
||||
static const COMPUTE_WALKER cmdInitGpgpuWalker;
|
||||
static const CFE_STATE cmdInitCfeState;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
|
||||
Reference in New Issue
Block a user