mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Refactor partitioning of dispatched kernels
Related-To: NEO-6589 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
182042b04d
commit
9c4f05387b
@@ -61,7 +61,8 @@ struct EncodeDispatchKernel {
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount,
|
||||
bool isInternal, bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface);
|
||||
bool isInternal, bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
|
||||
@@ -50,7 +50,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
}
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec,
|
||||
args.isInternal, args.isCooperative, args.isIndirect,
|
||||
args.dispatchInterface);
|
||||
args.dispatchInterface, false);
|
||||
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = Family::cmdInitBatchBufferEnd;
|
||||
@@ -333,7 +333,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface) {
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
|
||||
|
||||
@@ -59,7 +59,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
|
||||
}
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec,
|
||||
args.isInternal, args.isCooperative, args.isIndirect, args.dispatchInterface);
|
||||
args.isInternal, args.isCooperative, args.isIndirect, args.dispatchInterface,
|
||||
args.partitionCount > 1);
|
||||
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = Family::cmdInitBatchBufferEnd;
|
||||
@@ -199,7 +200,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
}
|
||||
|
||||
bool requiresGlobalAtomicsUpdate = false;
|
||||
if (ImplicitScalingHelper::isImplicitScalingEnabled(container.getDevice()->getDeviceBitfield(), true)) {
|
||||
if (args.partitionCount > 1) {
|
||||
requiresGlobalAtomicsUpdate = container.lastSentUseGlobalAtomics != args.useGlobalAtomics;
|
||||
container.lastSentUseGlobalAtomics = args.useGlobalAtomics;
|
||||
}
|
||||
@@ -269,7 +270,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
|
||||
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
|
||||
|
||||
if (ImplicitScalingHelper::isImplicitScalingEnabled(args.device->getDeviceBitfield(), !args.isCooperative) &&
|
||||
if ((args.partitionCount > 1 && !args.isCooperative) &&
|
||||
!args.isInternal) {
|
||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||
if (args.eventAddress != 0) {
|
||||
@@ -440,7 +441,8 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface) {
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned) {
|
||||
size_t totalSize = sizeof(WALKER_TYPE);
|
||||
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);
|
||||
totalSize += EncodeStates<Family>::getAdjustStateComputeModeSize();
|
||||
@@ -457,7 +459,7 @@ size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device
|
||||
}
|
||||
}
|
||||
|
||||
if (ImplicitScalingHelper::isImplicitScalingEnabled(device->getDeviceBitfield(), !isCooperative) &&
|
||||
if ((isPartitioned && !isCooperative) &&
|
||||
!isInternal) {
|
||||
const bool staticPartitioning = device->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled();
|
||||
totalSize += ImplicitScalingDispatch<Family>::getSize(true, staticPartitioning, device->getDeviceBitfield(), groupStart, groupCount);
|
||||
|
||||
Reference in New Issue
Block a user