mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
fix: add scratch ptr in implicit args patching for L0 regular cmdlists
Related-To: NEO-11874 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
153cda9a9f
commit
4008ccea05
@@ -20,6 +20,9 @@
|
||||
namespace NEO {
|
||||
enum class MemoryPool;
|
||||
enum class ImageType;
|
||||
struct EncodeDispatchKernelArgs;
|
||||
struct KernelDescriptor;
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
@@ -359,7 +362,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
|
||||
bool hasInOrderDependencies() const;
|
||||
void appendFullSynchronizedDispatchInit();
|
||||
|
||||
void addPatchScratchAddressInImplicitArgs(CommandsToPatch &commandsToPatch, NEO::EncodeDispatchKernelArgs &args, const NEO::KernelDescriptor &kernelDescriptor, bool kernelNeedsImplicitArgs);
|
||||
size_t addCmdForPatching(std::shared_ptr<NEO::InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
|
||||
uint64_t getInOrderIncrementValue() const;
|
||||
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
||||
|
||||
@@ -4097,6 +4097,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::addPatchScratchAddressInImplicitArgs(CommandsToPatch &commandsToPatch, NEO::EncodeDispatchKernelArgs &args, const NEO::KernelDescriptor &kernelDescriptor, bool kernelNeedsImplicitArgs) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSynchronizedDispatchCleanupSection() {
|
||||
if (getSynchronizedDispatchMode() != NEO::SynchronizedDispatchMode::full) {
|
||||
|
||||
@@ -126,6 +126,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ,
|
||||
", SIMD: ", kernelInfo->getMaxSimdSize());
|
||||
|
||||
bool kernelNeedsImplicitArgs = kernel->getImplicitArgs() != nullptr;
|
||||
bool needScratchSpace = false;
|
||||
bool kernelNeedsScratchSpace = false;
|
||||
for (uint32_t slotId = 0u; slotId < 2; slotId++) {
|
||||
@@ -373,6 +374,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
scratchInlineData.baseAddress = ssh->getGpuBase();
|
||||
}
|
||||
commandsToPatch.push_back(scratchInlineData);
|
||||
|
||||
addPatchScratchAddressInImplicitArgs(commandsToPatch, dispatchKernelArgs, kernelDescriptor, kernelNeedsImplicitArgs);
|
||||
}
|
||||
|
||||
if (!this->isFlushTaskSubmissionEnabled) {
|
||||
|
||||
@@ -31,6 +31,7 @@ struct CommandToPatch {
|
||||
CbWaitEventSemaphoreWait,
|
||||
CbWaitEventLoadRegisterImm,
|
||||
ComputeWalkerInlineDataScratch,
|
||||
ComputeWalkerImplicitArgsScratch,
|
||||
Invalid
|
||||
};
|
||||
void *pDestination = nullptr;
|
||||
|
||||
@@ -173,11 +173,11 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
bool patchNewInlineScratchAddress = false;
|
||||
bool patchNewScratchAddress = false;
|
||||
if (this->heaplessModeEnabled &&
|
||||
(commandList.getCommandListPatchedPerThreadScratchSize(0) < perThreadScratchSpaceSlot0Size ||
|
||||
commandList.getCommandListPatchedPerThreadScratchSize(1) < perThreadScratchSpaceSlot1Size)) {
|
||||
patchNewInlineScratchAddress = true;
|
||||
patchNewScratchAddress = true;
|
||||
}
|
||||
|
||||
auto &commandsToPatch = commandList.getCommandsToPatch();
|
||||
@@ -241,7 +241,16 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
break;
|
||||
}
|
||||
case CommandToPatch::ComputeWalkerInlineDataScratch: {
|
||||
if (!patchNewInlineScratchAddress) {
|
||||
if (!patchNewScratchAddress) {
|
||||
continue;
|
||||
}
|
||||
uint64_t fullScratchAddress = scratchAddress + commandToPatch.baseAddress;
|
||||
void *scratchAddressPatch = ptrOffset(commandToPatch.pDestination, commandToPatch.offset);
|
||||
std::memcpy(scratchAddressPatch, &fullScratchAddress, commandToPatch.patchSize);
|
||||
break;
|
||||
}
|
||||
case CommandToPatch::ComputeWalkerImplicitArgsScratch: {
|
||||
if (!patchNewScratchAddress) {
|
||||
continue;
|
||||
}
|
||||
uint64_t fullScratchAddress = scratchAddress + commandToPatch.baseAddress;
|
||||
@@ -254,7 +263,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
}
|
||||
}
|
||||
|
||||
if (patchNewInlineScratchAddress) {
|
||||
if (patchNewScratchAddress) {
|
||||
commandList.setCommandListPatchedPerThreadScratchSize(0, perThreadScratchSpaceSlot0Size);
|
||||
commandList.setCommandListPatchedPerThreadScratchSize(1, perThreadScratchSpaceSlot1Size);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::addCmdForPatching;
|
||||
using BaseClass::addFlushRequiredCommand;
|
||||
using BaseClass::addPatchScratchAddressInImplicitArgs;
|
||||
using BaseClass::allocateOrReuseKernelPrivateMemoryIfNeeded;
|
||||
using BaseClass::allowCbWaitEventsNoopDispatch;
|
||||
using BaseClass::appendBlitFill;
|
||||
|
||||
Reference in New Issue
Block a user