feature: add option to store walker command content in cpu memory

Related-To: NEO-10066

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-04-16 22:23:53 +00:00
committed by Compute-Runtime-Automation
parent 9ca2091725
commit 7c16278bee
10 changed files with 44 additions and 8 deletions

View File

@@ -54,6 +54,7 @@ struct EncodeDispatchKernelArgs {
IndirectHeap *dynamicStateHeap = nullptr;
const void *threadGroupDimensions = nullptr;
void *outWalkerPtr = nullptr;
void *cpuWalkerBuffer = nullptr;
std::list<void *> *additionalCommands = nullptr;
PreemptionMode preemptionMode = PreemptionMode::Initial;
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;

View File

@@ -413,6 +413,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
*buffer = walkerCmd;
}
if (args.cpuWalkerBuffer) {
*reinterpret_cast<WalkerType *>(args.cpuWalkerBuffer) = walkerCmd;
}
PreemptionHelper::applyPreemptionWaCmdsEnd<Family>(listCmdBufferStream, *args.device);
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {

View File

@@ -498,7 +498,6 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
uint32_t tileCount,
bool forceExecutionOnSingleTile) {
auto computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
WalkerType cmd = *inputWalker;
if (partitionCount > 1) {
auto partitionType = inputWalker->getPartitionType();
@@ -508,7 +507,7 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
assert(inputWalker->getThreadGroupIdStartingZ() == 0u);
assert(partitionType != WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
cmd.setWorkloadPartitionEnable(true);
inputWalker->setWorkloadPartitionEnable(true);
auto workgroupCount = 0u;
if (partitionType == WalkerType::PARTITION_TYPE::PARTITION_TYPE_X) {
@@ -520,15 +519,15 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
}
if (forceExecutionOnSingleTile) {
cmd.setPartitionSize(workgroupCount);
inputWalker->setPartitionSize(workgroupCount);
} else {
cmd.setPartitionSize(Math::divideAndRoundUp(workgroupCount, partitionCount));
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, partitionCount));
}
}
appendWalkerFields<GfxFamily, WalkerType>(cmd, tileCount);
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, tileCount);
*computeWalker = cmd;
*computeWalker = *inputWalker;
return computeWalker;
}