fix: add scratch ptr in implicit args patching for L0 immediate cmdlists
Related-To: NEO-11874 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
922286633b
commit
880aaee16c
|
@ -43,6 +43,7 @@ struct PipelineSelectArgs;
|
|||
struct RootDeviceEnvironment;
|
||||
struct StateBaseAddressProperties;
|
||||
struct StateComputeModeProperties;
|
||||
struct ImplicitArgs;
|
||||
|
||||
struct EncodeDispatchKernelArgs {
|
||||
uint64_t eventAddress = 0;
|
||||
|
@ -187,7 +188,7 @@ struct EncodeDispatchKernel {
|
|||
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
template <bool heaplessModeEnabled>
|
||||
static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
||||
static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);
|
||||
|
||||
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
|
||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||
|
@ -201,6 +202,10 @@ struct EncodeDispatchKernel {
|
|||
|
||||
template <bool isHeapless>
|
||||
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template <bool isHeapless>
|
||||
static uint64_t getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||
template <bool isHeapless>
|
||||
static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||
|
||||
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
|
||||
};
|
||||
|
|
|
@ -24,8 +24,10 @@ template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::Default
|
|||
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
||||
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
||||
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||
|
||||
template struct NEO::EncodeStates<Family>;
|
||||
template struct NEO::EncodeMath<Family>;
|
||||
|
|
|
@ -10,7 +10,18 @@
|
|||
namespace NEO {
|
||||
template <typename Family>
|
||||
template <bool heaplessModeEnabled>
|
||||
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData) {
|
||||
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <bool heaplessModeEnabled>
|
||||
uint64_t EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <bool heaplessModeEnabled>
|
||||
void EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrRequired) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -239,6 +239,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
inlineDataProgramming = inlineDataProgrammingOffset != 0;
|
||||
}
|
||||
|
||||
auto scratchAddressForImmediatePatching = EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<heaplessModeEnabled>(container, args);
|
||||
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
|
||||
uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment);
|
||||
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace;
|
||||
|
@ -258,6 +259,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
if (pImplicitArgs) {
|
||||
offsetThreadData -= ImplicitArgs::getSize();
|
||||
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
|
||||
EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching);
|
||||
|
||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
|
@ -322,7 +325,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
}
|
||||
|
||||
uint8_t *inlineData = reinterpret_cast<uint8_t *>(walkerCmd.getInlineDataPointer());
|
||||
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData);
|
||||
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData, scratchAddressForImmediatePatching);
|
||||
|
||||
if constexpr (heaplessModeEnabled == false) {
|
||||
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||
|
|
Loading…
Reference in New Issue