fix: add scratch ptr in implicit args patching for L0 immediate cmdlists
Related-To: NEO-11874 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
922286633b
commit
880aaee16c
|
@ -43,6 +43,7 @@ struct PipelineSelectArgs;
|
||||||
struct RootDeviceEnvironment;
|
struct RootDeviceEnvironment;
|
||||||
struct StateBaseAddressProperties;
|
struct StateBaseAddressProperties;
|
||||||
struct StateComputeModeProperties;
|
struct StateComputeModeProperties;
|
||||||
|
struct ImplicitArgs;
|
||||||
|
|
||||||
struct EncodeDispatchKernelArgs {
|
struct EncodeDispatchKernelArgs {
|
||||||
uint64_t eventAddress = 0;
|
uint64_t eventAddress = 0;
|
||||||
|
@ -187,7 +188,7 @@ struct EncodeDispatchKernel {
|
||||||
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||||
|
|
||||||
template <bool heaplessModeEnabled>
|
template <bool heaplessModeEnabled>
|
||||||
static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);
|
||||||
|
|
||||||
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
|
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
|
||||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||||
|
@ -201,6 +202,10 @@ struct EncodeDispatchKernel {
|
||||||
|
|
||||||
template <bool isHeapless>
|
template <bool isHeapless>
|
||||||
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
|
template <bool isHeapless>
|
||||||
|
static uint64_t getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||||
|
template <bool isHeapless>
|
||||||
|
static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||||
|
|
||||||
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
|
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
|
||||||
};
|
};
|
||||||
|
|
|
@ -24,8 +24,10 @@ template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::Default
|
||||||
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
||||||
|
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||||
|
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||||
|
|
||||||
template struct NEO::EncodeStates<Family>;
|
template struct NEO::EncodeStates<Family>;
|
||||||
template struct NEO::EncodeMath<Family>;
|
template struct NEO::EncodeMath<Family>;
|
||||||
|
|
|
@ -10,7 +10,18 @@
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <bool heaplessModeEnabled>
|
template <bool heaplessModeEnabled>
|
||||||
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData) {
|
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Family>
|
||||||
|
template <bool heaplessModeEnabled>
|
||||||
|
uint64_t EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Family>
|
||||||
|
template <bool heaplessModeEnabled>
|
||||||
|
void EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrRequired) {
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -239,6 +239,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
inlineDataProgramming = inlineDataProgrammingOffset != 0;
|
inlineDataProgramming = inlineDataProgrammingOffset != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto scratchAddressForImmediatePatching = EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<heaplessModeEnabled>(container, args);
|
||||||
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
|
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
|
||||||
uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment);
|
uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment);
|
||||||
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace;
|
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace;
|
||||||
|
@ -258,6 +259,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
if (pImplicitArgs) {
|
if (pImplicitArgs) {
|
||||||
offsetThreadData -= ImplicitArgs::getSize();
|
offsetThreadData -= ImplicitArgs::getSize();
|
||||||
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
|
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
|
||||||
|
EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching);
|
||||||
|
|
||||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment);
|
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,7 +325,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t *inlineData = reinterpret_cast<uint8_t *>(walkerCmd.getInlineDataPointer());
|
uint8_t *inlineData = reinterpret_cast<uint8_t *>(walkerCmd.getInlineDataPointer());
|
||||||
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData);
|
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData, scratchAddressForImmediatePatching);
|
||||||
|
|
||||||
if constexpr (heaplessModeEnabled == false) {
|
if constexpr (heaplessModeEnabled == false) {
|
||||||
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||||
|
|
Loading…
Reference in New Issue