refactor: pass device to encoder functions

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-11-25 12:02:39 +00:00
committed by Compute-Runtime-Automation
parent 5dc5c839a6
commit 1ddcb92f1d
15 changed files with 99 additions and 45 deletions

View File

@@ -188,7 +188,7 @@ struct EncodeDispatchKernel {
static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
template <typename WalkerType>
static void setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
template <typename WalkerType>
static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);

View File

@@ -684,7 +684,7 @@ void EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(InterfaceDescriptorT
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
template <typename Family>
template <typename WalkerType>

View File

@@ -29,7 +29,7 @@ template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount,
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor);
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);

View File

@@ -90,6 +90,6 @@ void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd,
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
} // namespace NEO

View File

@@ -418,7 +418,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
&hwInfo, // hwInfo
args.device, // device
&args.outWalkerPtr, // outWalkerPtr
args.requiredPartitionDim, // requiredPartitionDim
args.partitionCount, // partitionCount
@@ -438,7 +438,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.partitionCount = implicitScalingArgs.partitionCount;
} else {
args.partitionCount = 1;
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder);
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder);
if (!args.makeCommandView) {
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();

View File

@@ -17,6 +17,7 @@ struct WalkerPartitionArgs;
namespace NEO {
struct HardwareInfo;
class Device;
class LinearStream;
struct PipeControlArgs;
struct RootDeviceEnvironment;
@@ -39,7 +40,7 @@ struct ImplicitScalingHelper {
struct ImplicitScalingDispatchCommandArgs {
uint64_t workPartitionAllocationGpuVa = 0;
const HardwareInfo *hwInfo = nullptr;
const NEO::Device *device = nullptr;
void **outWalkerPtr = nullptr;
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;

View File

@@ -126,7 +126,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
&walkerCmd,
totalProgrammedSize,
walkerPartitionArgs,
*dispatchCommandArgs.hwInfo);
*dispatchCommandArgs.device);
} else {
if (debugManager.flags.ExperimentalSetWalkerPartitionCount.get()) {
dispatchCommandArgs.partitionCount = debugManager.flags.ExperimentalSetWalkerPartitionCount.get();
@@ -142,7 +142,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
&walkerCmd,
totalProgrammedSize,
walkerPartitionArgs,
*dispatchCommandArgs.hwInfo);
*dispatchCommandArgs.device);
}
UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize);
}

View File

@@ -495,7 +495,7 @@ template <typename GfxFamily, typename WalkerType>
void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed,
WalkerType *inputWalker,
WalkerPartitionArgs &args,
const NEO::HardwareInfo &hwInfo) {
const NEO::Device &device) {
WalkerType *computeWalker = nullptr;
if (!args.blockDispatchToCommandBuffer) {
computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
@@ -527,7 +527,7 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
}
NEO::EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*inputWalker,
hwInfo,
device,
args.partitionCount,
args.workgroupSize,
args.maxWgCountPerTile,
@@ -580,7 +580,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
WalkerType *inputWalker,
uint32_t &totalBytesProgrammed,
WalkerPartitionArgs &args,
const NEO::HardwareInfo &hwInfo) {
const NEO::Device &device) {
totalBytesProgrammed = 0u;
void *currentBatchBufferPointer = cpuPointer;
@@ -650,7 +650,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
args.secondaryBatchBuffer);
// Walker section
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo);
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, device);
if (outWalkerPtr) {
*outWalkerPtr = walkerPtr;
}
@@ -726,7 +726,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
WalkerType *inputWalker,
uint32_t &totalBytesProgrammed,
WalkerPartitionArgs &args,
const NEO::HardwareInfo &hwInfo) {
const NEO::Device &device) {
totalBytesProgrammed = 0u;
void *currentBatchBufferPointer = cpuPointer;
@@ -747,7 +747,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
}
}
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo);
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, device);
if (!args.blockDispatchToCommandBuffer) {
if (outWalkerPtr) {