diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index ab2b96ca8d..4c580e9641 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -43,6 +43,7 @@ struct PipelineSelectArgs; struct RootDeviceEnvironment; struct StateBaseAddressProperties; struct StateComputeModeProperties; +struct ImplicitArgs; struct EncodeDispatchKernelArgs { uint64_t eventAddress = 0; @@ -187,7 +188,7 @@ struct EncodeDispatchKernel { static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template - static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData); + static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr); static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount); static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); @@ -201,6 +202,10 @@ struct EncodeDispatchKernel { template static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr); + template + static uint64_t getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args); + template + static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args); }; diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 2c40fcea90..a7c38ce3d7 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -24,8 +24,10 @@ template void NEO::EncodeDispatchKernel::adjustWalkOrder::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void NEO::EncodeDispatchKernel::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr); template void NEO::EncodeDispatchKernel::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr); -template void NEO::EncodeDispatchKernel::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData); +template void NEO::EncodeDispatchKernel::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr); template void NEO::EncodeDispatchKernel::encodeEuSchedulingPolicy(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy); +template uint64_t NEO::EncodeDispatchKernel::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void NEO::EncodeDispatchKernel::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); template struct NEO::EncodeStates; template struct NEO::EncodeMath; diff --git a/shared/source/command_container/command_encoder_heap_addressing.inl b/shared/source/command_container/command_encoder_heap_addressing.inl index 154f514820..21dcbbe9cd 100644 --- a/shared/source/command_container/command_encoder_heap_addressing.inl +++ b/shared/source/command_container/command_encoder_heap_addressing.inl @@ -10,7 +10,18 @@ namespace NEO { template template -void EncodeDispatchKernel::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData) { +void EncodeDispatchKernel::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr) { +} + +template +template +uint64_t EncodeDispatchKernel::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args) { + return 0u; +} + +template +template +void EncodeDispatchKernel::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrRequired) { } } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 3bbd851d96..8c7de7a692 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -239,6 +239,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis inlineDataProgramming = inlineDataProgrammingOffset != 0; } + auto scratchAddressForImmediatePatching = EncodeDispatchKernel::getScratchAddressForImmediatePatching(container, args); uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment); uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace; @@ -258,6 +259,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (pImplicitArgs) { offsetThreadData -= ImplicitArgs::getSize(); pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; + EncodeDispatchKernel::patchScratchAddressInImplicitArgs(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment); } @@ -322,7 +325,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } uint8_t *inlineData = reinterpret_cast(walkerCmd.getInlineDataPointer()); - EncodeDispatchKernel::programInlineDataHeapless(inlineData, args, container, offsetThreadData); + EncodeDispatchKernel::programInlineDataHeapless(inlineData, args, container, offsetThreadData, scratchAddressForImmediatePatching); if constexpr (heaplessModeEnabled == false) { walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData));