From 14b9198949ab74f0353c65bae6b16ba8afbdbe65 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 5 Aug 2025 17:38:26 +0000 Subject: [PATCH] feature: sum total required noop space of command list patchlist Related-To: NEO-15376 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist.h | 4 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 7 ++ .../source/mutable_cmdlist/mutable_cmdlist.h | 6 +- .../mutable_cmdlist/mutable_cmdlist_hw.h | 6 +- .../mutable_cmdlist/mutable_cmdlist_hw.inl | 29 ++++- .../core/source/mutable_cmdlist/variable.cpp | 12 +- .../core/source/mutable_cmdlist/variable.h | 6 +- .../mutable_cmdlist/variable_dispatch.cpp | 36 +++--- .../test_cmdlist_append_launch_kernel_3.cpp | 5 + .../tests/mutable_cmdlist_kernels_tests.cpp | 114 +++++++++++++----- 10 files changed, 163 insertions(+), 62 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 268e3f3165..86d3b03db6 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -471,6 +471,9 @@ struct CommandList : _ze_command_list_handle_t { uint32_t getFrontEndPatchListCount() const { return frontEndPatchListCount; } + size_t getTotalNoopSpace() const { + return totalNoopSpace; + } void forceDisableInOrderWaits() { inOrderWaitsDisabled = true; } @@ -520,6 +523,7 @@ struct CommandList : _ze_command_list_handle_t { size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte; size_t cmdListCurrentStartOffset = 0; size_t maxFillPatternSizeForCopyEngine = 0; + size_t totalNoopSpace = 0; uint32_t commandListPerThreadScratchSize[2]{}; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3b433a9478..1a7e758757 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -162,6 +162,7 @@ ze_result_t CommandListCoreFamily::reset() { isWalkerWithProfilingEnqueued = false; this->inOrderPatchCmds.clear(); + this->totalNoopSpace = 0; return ZE_RESULT_SUCCESS; } @@ -3339,8 +3340,11 @@ ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kern syncBufferSpace.offset = patchData.second; syncBufferSpace.pDestination = ptrOffset(patchData.first->getUnderlyingBuffer(), patchData.second); syncBufferSpace.patchSize = NEO::KernelHelper::getSyncBufferSize(requestedNumberOfWorkgroups); + syncBufferSpace.gpuAddress = patchData.first->getGpuAddressToPatch() + patchData.second; commandsToPatch.push_back(syncBufferSpace); + + this->totalNoopSpace += syncBufferSpace.patchSize; } return ZE_RESULT_SUCCESS; @@ -3363,8 +3367,11 @@ void CommandListCoreFamily::programRegionGroupBarrier(Kernel &ker regionBarrierSpace.offset = patchData.second; regionBarrierSpace.pDestination = ptrOffset(patchData.first->getUnderlyingBuffer(), patchData.second); regionBarrierSpace.patchSize = NEO::KernelHelper::getRegionGroupBarrierSize(threadGroupCount, localRegionSize); + regionBarrierSpace.gpuAddress = patchData.first->getGpuAddressToPatch() + patchData.second; commandsToPatch.push_back(regionBarrierSpace); + + this->totalNoopSpace += regionBarrierSpace.patchSize; } } diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist.h b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist.h index 5321b2de74..61d791fe51 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist.h @@ -170,9 +170,9 @@ struct MutableCommandList { virtual void updateScratchAddress(size_t patchIndex, MutableComputeWalker &oldWalker, MutableComputeWalker &newWalker) = 0; virtual void updateCmdListScratchPatchCommand(size_t patchIndex, MutableComputeWalker &oldWalker, MutableComputeWalker &newWalker) = 0; virtual uint64_t getCurrentScratchPatchAddress(size_t scratchAddressPatchIndex) const = 0; - virtual void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset) = 0; - virtual size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset) = 0; - virtual void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset) = 0; + virtual void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) = 0; + virtual size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) = 0; + virtual void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset, uint64_t &gpuAddress) = 0; virtual void disableAddressNoopPatch(size_t noopPatchIndex) = 0; virtual void addVariableToCommitList(Variable *variable) = 0; diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h index fa9171852e..bd102adb70 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h @@ -136,9 +136,9 @@ struct MutableCommandListCoreFamily : public MutableCommandListImp, public Comma void updateScratchAddress(size_t patchIndex, MutableComputeWalker &oldWalker, MutableComputeWalker &newWalker) override; void updateCmdListScratchPatchCommand(size_t patchIndex, MutableComputeWalker &oldWalker, MutableComputeWalker &newWalker) override; uint64_t getCurrentScratchPatchAddress(size_t scratchAddressPatchIndex) const override; - void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset) override; - size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset) override; - void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset) override; + void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) override; + size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) override; + void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset, uint64_t &gpuAddress) override; void disableAddressNoopPatch(size_t noopPatchIndex) override; void addKernelIsaMemoryPrefetchPadding(NEO::LinearStream &cmdStream, const Kernel &kernel, uint64_t cmdId) override; void addKernelIndirectDataMemoryPrefetchPadding(NEO::LinearStream &cmdStream, const Kernel &kernel, uint64_t cmdId) override; diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl index 0c81151efc..ac0a119a49 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl @@ -970,26 +970,43 @@ ze_result_t MutableCommandListCoreFamily::captureKernelGroupVaria } template -void MutableCommandListCoreFamily::updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset) { +void MutableCommandListCoreFamily::updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) { auto &commandsToPatch = CommandListCoreFamily::commandsToPatch; + auto &totalNoopSpace = CommandListCoreFamily::totalNoopSpace; UNRECOVERABLE_IF(noopPatchIndex >= commandsToPatch.size()); auto &noopPatch = commandsToPatch[noopPatchIndex]; + if (noopPatch.pDestination == nullptr) { + totalNoopSpace += newPatchSize; + } else { + if (newPatchSize > noopPatch.patchSize) { + totalNoopSpace += (newPatchSize - noopPatch.patchSize); + } else { + totalNoopSpace -= (noopPatch.patchSize - newPatchSize); + } + } + noopPatch.pDestination = newCpuPtr; noopPatch.patchSize = newPatchSize; noopPatch.offset = newOffset; + noopPatch.gpuAddress = newGpuAddress; } template -size_t MutableCommandListCoreFamily::createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset) { +size_t MutableCommandListCoreFamily::createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) { auto &commandsToPatch = CommandListCoreFamily::commandsToPatch; + auto &totalNoopSpace = CommandListCoreFamily::totalNoopSpace; + size_t noopPatchIndex = commandsToPatch.size(); + totalNoopSpace += newPatchSize; + CommandToPatch noopPatch; noopPatch.type = CommandToPatch::NoopSpace; noopPatch.offset = newOffset; noopPatch.pDestination = newCpuPtr; noopPatch.patchSize = newPatchSize; + noopPatch.gpuAddress = newGpuAddress; commandsToPatch.push_back(noopPatch); @@ -997,7 +1014,7 @@ size_t MutableCommandListCoreFamily::createNewCmdListNoopPatchDat } template -void MutableCommandListCoreFamily::fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset) { +void MutableCommandListCoreFamily::fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset, uint64_t &gpuAddress) { auto &commandsToPatch = CommandListCoreFamily::commandsToPatch; UNRECOVERABLE_IF(noopPatchIndex >= commandsToPatch.size()); auto &noopPatch = commandsToPatch[noopPatchIndex]; @@ -1005,15 +1022,21 @@ void MutableCommandListCoreFamily::fillCmdListNoopPatchData(size_ cpuPtr = noopPatch.pDestination; patchSize = noopPatch.patchSize; offset = noopPatch.offset; + gpuAddress = noopPatch.gpuAddress; } template void MutableCommandListCoreFamily::disableAddressNoopPatch(size_t noopPatchIndex) { auto &commandsToPatch = CommandListCoreFamily::commandsToPatch; + auto &totalNoopSpace = CommandListCoreFamily::totalNoopSpace; + UNRECOVERABLE_IF(noopPatchIndex >= commandsToPatch.size()); auto &noopPatch = commandsToPatch[noopPatchIndex]; noopPatch.pDestination = nullptr; + + UNRECOVERABLE_IF(totalNoopSpace < noopPatch.patchSize); + totalNoopSpace -= noopPatch.patchSize; } template diff --git a/level_zero/core/source/mutable_cmdlist/variable.cpp b/level_zero/core/source/mutable_cmdlist/variable.cpp index 1ea986318f..143cc0ea38 100644 --- a/level_zero/core/source/mutable_cmdlist/variable.cpp +++ b/level_zero/core/source/mutable_cmdlist/variable.cpp @@ -770,16 +770,16 @@ void Variable::updateAllocationResidency(NEO::GraphicsAllocation *oldAllocation, } } -void Variable::updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset) { - cmdList->updateCmdListNoopPatchData(noopPatchIndex, newCpuPtr, newPatchSize, newOffset); +void Variable::updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) { + cmdList->updateCmdListNoopPatchData(noopPatchIndex, newCpuPtr, newPatchSize, newOffset, newGpuAddress); } -size_t Variable::createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset) { - return cmdList->createNewCmdListNoopPatchData(newCpuPtr, newPatchSize, newOffset); +size_t Variable::createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress) { + return cmdList->createNewCmdListNoopPatchData(newCpuPtr, newPatchSize, newOffset, newGpuAddress); } -void Variable::fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset) { - cmdList->fillCmdListNoopPatchData(noopPatchIndex, cpuPtr, patchSize, offset); +void Variable::fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset, uint64_t &gpuAddress) { + cmdList->fillCmdListNoopPatchData(noopPatchIndex, cpuPtr, patchSize, offset, gpuAddress); } bool Variable::isCooperativeVariable() const { diff --git a/level_zero/core/source/mutable_cmdlist/variable.h b/level_zero/core/source/mutable_cmdlist/variable.h index ea3a0d4770..7f0fa6e85c 100644 --- a/level_zero/core/source/mutable_cmdlist/variable.h +++ b/level_zero/core/source/mutable_cmdlist/variable.h @@ -177,9 +177,9 @@ struct Variable : public VariableHandle { void commitVariable(); void updateAllocationResidency(NEO::GraphicsAllocation *oldAllocation, NEO::GraphicsAllocation *newAllocation); - void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset); - size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset); - void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset); + void updateCmdListNoopPatchData(size_t noopPatchIndex, void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress); + size_t createNewCmdListNoopPatchData(void *newCpuPtr, size_t newPatchSize, size_t newOffset, uint64_t newGpuAddress); + void fillCmdListNoopPatchData(size_t noopPatchIndex, void *&cpuPtr, size_t &patchSize, size_t &offset, uint64_t &gpuAddress); bool isCooperativeVariable() const; inline VariableDispatch *getInitialVariableDispatch() const { diff --git a/level_zero/core/source/mutable_cmdlist/variable_dispatch.cpp b/level_zero/core/source/mutable_cmdlist/variable_dispatch.cpp index fdccd69dba..e64a21d261 100644 --- a/level_zero/core/source/mutable_cmdlist/variable_dispatch.cpp +++ b/level_zero/core/source/mutable_cmdlist/variable_dispatch.cpp @@ -60,13 +60,15 @@ VariableDispatch::VariableDispatch(KernelDispatch *kernelDispatch, if (kernelDispatch->syncBufferNoopPatchIndex != undefined) { void *noopPtr = nullptr; size_t noopSize = 0; - groupCountVar->fillCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, noopPtr, noopSize, this->syncBufferOffset); + uint64_t gpuAddress = 0; + groupCountVar->fillCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, noopPtr, noopSize, this->syncBufferOffset, gpuAddress); UNRECOVERABLE_IF(noopSize != kernelDispatch->syncBufferSize); } if (kernelDispatch->regionBarrierNoopPatchIndex != undefined) { void *noopPtr = nullptr; size_t noopSize = 0; - groupCountVar->fillCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, noopPtr, noopSize, this->regionBarrierOffset); + uint64_t gpuAddress = 0; + groupCountVar->fillCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, noopPtr, noopSize, this->regionBarrierOffset, gpuAddress); UNRECOVERABLE_IF(noopSize != kernelDispatch->regionBarrierSize); } } @@ -251,10 +253,11 @@ void VariableDispatch::setGroupCount(const uint32_t groupCount[3], const NEO::De this->syncBufferOffset = syncBufferPair.second; void *newCpuPtr = ptrOffset(kernelDispatch->syncBuffer->getUnderlyingBuffer(), this->syncBufferOffset); + uint64_t newGpuAddress = kernelDispatch->syncBuffer->getGpuAddressToPatch() + this->syncBufferOffset; if (kernelDispatch->syncBufferNoopPatchIndex == undefined) { - kernelDispatch->syncBufferNoopPatchIndex = groupCountVar->createNewCmdListNoopPatchData(newCpuPtr, newSize, this->syncBufferOffset); + kernelDispatch->syncBufferNoopPatchIndex = groupCountVar->createNewCmdListNoopPatchData(newCpuPtr, newSize, this->syncBufferOffset, newGpuAddress); } else { - groupCountVar->updateCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, newCpuPtr, newSize, this->syncBufferOffset); + groupCountVar->updateCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, newCpuPtr, newSize, this->syncBufferOffset, newGpuAddress); } } else { // mutation of kernels - check noop patch needs update and repatch the sync buffer address @@ -265,17 +268,17 @@ void VariableDispatch::setGroupCount(const uint32_t groupCount[3], const NEO::De void *noopCpuPtr = nullptr; size_t noopSize = 0; size_t noopOffset = 0; - groupCountVar->fillCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, noopCpuPtr, noopSize, noopOffset); + uint64_t noopGpuAddress = 0; + groupCountVar->fillCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, noopCpuPtr, noopSize, noopOffset, noopGpuAddress); void *currentNoopPtr = ptrOffset(kernelDispatch->syncBuffer->getUnderlyingBuffer(), this->syncBufferOffset); - if (noopSize != kernelDispatch->syncBufferSize || noopOffset != this->syncBufferOffset || noopCpuPtr != currentNoopPtr) { - uint64_t newAddress = kernelDispatch->syncBuffer->getGpuAddressToPatch() + this->syncBufferOffset; + uint64_t currentKernelPatchGpuAddress = kernelDispatch->syncBuffer->getGpuAddressToPatch() + this->syncBufferOffset; indirectData->setAddress(kernelDispatch->kernelData->syncBufferAddressOffset, - newAddress, + currentKernelPatchGpuAddress, kernelDispatch->kernelData->syncBufferPointerSize); - groupCountVar->updateCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, currentNoopPtr, kernelDispatch->syncBufferSize, this->syncBufferOffset); + groupCountVar->updateCmdListNoopPatchData(kernelDispatch->syncBufferNoopPatchIndex, currentNoopPtr, kernelDispatch->syncBufferSize, this->syncBufferOffset, currentKernelPatchGpuAddress); } } } @@ -296,10 +299,11 @@ void VariableDispatch::setGroupCount(const uint32_t groupCount[3], const NEO::De this->regionBarrierOffset = regionBarrierPair.second; void *newCpuPtr = ptrOffset(kernelDispatch->regionBarrier->getUnderlyingBuffer(), this->regionBarrierOffset); + uint64_t newGpuAddress = kernelDispatch->regionBarrier->getGpuAddressToPatch() + this->regionBarrierOffset; if (kernelDispatch->regionBarrierNoopPatchIndex == undefined) { - kernelDispatch->regionBarrierNoopPatchIndex = groupCountVar->createNewCmdListNoopPatchData(newCpuPtr, newSize, this->regionBarrierOffset); + kernelDispatch->regionBarrierNoopPatchIndex = groupCountVar->createNewCmdListNoopPatchData(newCpuPtr, newSize, this->regionBarrierOffset, newGpuAddress); } else { - groupCountVar->updateCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, newCpuPtr, newSize, this->regionBarrierOffset); + groupCountVar->updateCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, newCpuPtr, newSize, this->regionBarrierOffset, newGpuAddress); } } else { // mutation of kernels - check noop patch needs update and repatch the region barrier address @@ -310,17 +314,17 @@ void VariableDispatch::setGroupCount(const uint32_t groupCount[3], const NEO::De void *noopCpuPtr = nullptr; size_t noopSize = 0; size_t noopOffset = 0; - groupCountVar->fillCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, noopCpuPtr, noopSize, noopOffset); + uint64_t noopGpuAddress = 0; + groupCountVar->fillCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, noopCpuPtr, noopSize, noopOffset, noopGpuAddress); void *currentNoopPtr = ptrOffset(kernelDispatch->regionBarrier->getUnderlyingBuffer(), this->regionBarrierOffset); - if (noopSize != kernelDispatch->regionBarrierSize || noopOffset != this->regionBarrierOffset || noopCpuPtr != currentNoopPtr) { - uint64_t newAddress = kernelDispatch->regionBarrier->getGpuAddressToPatch() + this->regionBarrierOffset; + uint64_t currentKernelPatchGpuAddress = kernelDispatch->regionBarrier->getGpuAddressToPatch() + this->regionBarrierOffset; indirectData->setAddress(kernelDispatch->kernelData->regionGroupBarrierBufferOffset, - newAddress, + currentKernelPatchGpuAddress, kernelDispatch->kernelData->regionGroupBarrierBufferPointerSize); - groupCountVar->updateCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, currentNoopPtr, kernelDispatch->regionBarrierSize, this->regionBarrierOffset); + groupCountVar->updateCmdListNoopPatchData(kernelDispatch->regionBarrierNoopPatchIndex, currentNoopPtr, kernelDispatch->regionBarrierSize, this->regionBarrierOffset, currentKernelPatchGpuAddress); } } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 44ccdc9a7d..edb4f0684e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -568,6 +568,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau auto noopParam = cmdsToPatch[cooperativeParams.syncBufferPatchIndex]; EXPECT_EQ(CommandToPatch::NoopSpace, noopParam.type); EXPECT_NE(0u, noopParam.patchSize); + EXPECT_EQ(noopParam.patchSize, commandList->getTotalNoopSpace()); commandList = std::make_unique>>(); commandList->initialize(device, engineGroupType, 0u); @@ -750,6 +751,10 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA auto noopParam = cmdsToPatch[launchParams.regionBarrierPatchIndex]; EXPECT_EQ(CommandToPatch::NoopSpace, noopParam.type); EXPECT_NE(0u, noopParam.patchSize); + EXPECT_EQ(noopParam.patchSize, cmdListRegular->getTotalNoopSpace()); + + cmdListRegular->reset(); + EXPECT_EQ(0u, cmdListRegular->getTotalNoopSpace()); } HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenAppendLaunchKernelWithMakeViewIsCalledThenNoPatchBuffer, IsAtLeastXeCore) { diff --git a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp index 0e77f5f5af..f76ccc7c7b 100644 --- a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp @@ -883,6 +883,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->appendLaunchKernel(kernelHandlePrivate, this->testGroupCount, nullptr, 0, nullptr, this->testLaunchParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), kernelPrivateIsaAllocation)); @@ -921,6 +923,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t noopSpace = NEO::KernelHelper::getSyncBufferSize(4); + EXPECT_EQ(noopSpace, mutableCommandList->base->getTotalNoopSpace()); + auto syncBufferAllocation = mutationPrivateFirst.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->syncBuffer; EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); @@ -940,6 +945,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); @@ -957,6 +964,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(noopSpace, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); @@ -966,6 +975,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 2, specialKernelGroup, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -976,6 +987,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->appendLaunchKernel(kernelHandle, this->testGroupCount, nullptr, 0, nullptr, this->testLaunchParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(noopSpace, mutableCommandList->base->getTotalNoopSpace()); + syncBufferAllocation = mutationSlmFirst.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->syncBuffer; EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); @@ -994,6 +1007,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); @@ -1011,6 +1026,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(noopSpace, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); @@ -1028,6 +1045,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), syncBufferAllocation)); EXPECT_FALSE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmIsaAllocation)); EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), privateAllocation)); @@ -1068,10 +1087,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getSyncBufferSize(4); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldOffset = offset; void *oldCpuPtr = cpuPtr; @@ -1091,10 +1112,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldOffset, offset); EXPECT_EQ(oldCpuPtr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); mutatedGroupCount.groupCountX = 64; mutatedGroupCount.groupCountY = 1; @@ -1107,11 +1129,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(ZE_RESULT_SUCCESS, result); oldOffset = patchSize; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getSyncBufferSize(64); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldOffset, offset); EXPECT_NE(oldCpuPtr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -1187,11 +1210,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getSyncBufferSize(4); EXPECT_EQ(requiredSize, patchSize); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -1227,7 +1252,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); // mutate back into kernel cooperative @@ -1250,7 +1277,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); + + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_NE(nullptr, cpuPtr); // mutate back into kernel regular @@ -1273,7 +1302,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); // mutate final into kernel cooperative @@ -1296,7 +1325,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_NE(nullptr, cpuPtr); } @@ -1325,6 +1354,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); + ASSERT_NE(0u, mutableCommandList->kernelMutations.size()); auto &mutation = mutableCommandList->kernelMutations[commandId - 1]; ASSERT_NE(nullptr, mutation.kernelGroup); @@ -1375,11 +1406,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getSyncBufferSize(4); EXPECT_EQ(requiredSize, patchSize); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); // mutate back into regular kernel result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); @@ -1401,8 +1434,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -1442,8 +1476,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldPatchSize = patchSize; @@ -1476,11 +1512,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->syncBufferNoopPatchIndex; EXPECT_NE(undefined, noopIndex); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getSyncBufferSize(8 * 4); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldPatchSize, offset); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldOffset = offset; oldPatchSize = patchSize; @@ -1515,10 +1552,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->syncBufferNoopPatchIndex; EXPECT_NE(undefined, noopIndex); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getSyncBufferSize(8 * 8); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); // new offset and new gpu sync address in cross-thread data memcpy(&syncBufferGpuPatchAddress, syncBufferAddress, sizeof(uint64_t)); @@ -1541,10 +1579,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(ZE_RESULT_SUCCESS, result); noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->syncBufferNoopPatchIndex; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(oldPatchSize, patchSize); EXPECT_EQ(oldOffset, offset); + EXPECT_EQ(oldPatchSize, mutableCommandList->base->getTotalNoopSpace()); // old offset and old gpu sync address in cross-thread data memcpy(&syncBufferGpuPatchAddress, syncBufferAddress, sizeof(uint64_t)); @@ -1587,10 +1626,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(4, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldOffset = offset; void *oldCpuPtr = cpuPtr; @@ -1610,10 +1651,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldOffset, offset); EXPECT_EQ(oldCpuPtr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); mutatedGroupCount.groupCountX = 64; mutatedGroupCount.groupCountY = 1; @@ -1626,11 +1668,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(ZE_RESULT_SUCCESS, result); oldOffset = patchSize; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(64, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldOffset, offset); EXPECT_NE(oldCpuPtr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -1668,11 +1711,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(4, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -1708,8 +1753,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); // mutate back into kernel region barrier result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernelHandle); @@ -1731,8 +1777,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); // mutate back into kernel regular result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); @@ -1754,8 +1801,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); // mutate final into kernel region barrier result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernelHandle); @@ -1777,8 +1825,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -1811,6 +1860,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_FALSE(mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->kernelData->usesRegionGroupBarrier); auto noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->regionBarrierNoopPatchIndex; EXPECT_EQ(undefined, noopIndex); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); // mutate into region barrier result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernelHandle); @@ -1854,11 +1904,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); auto requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(4, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); EXPECT_NE(nullptr, cpuPtr); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); // mutate back into regular kernel result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); @@ -1880,8 +1932,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(nullptr, cpuPtr); + EXPECT_EQ(0u, mutableCommandList->base->getTotalNoopSpace()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -1919,8 +1972,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, void *cpuPtr = nullptr; size_t patchSize = 0; size_t offset = 0; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + uint64_t gpuAddress = 0; + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldPatchSize = patchSize; @@ -1953,11 +2008,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->regionBarrierNoopPatchIndex; EXPECT_NE(undefined, noopIndex); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(8 * 4, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); EXPECT_EQ(oldPatchSize, offset); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); size_t oldOffset = offset; oldPatchSize = patchSize; @@ -1992,10 +2048,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->regionBarrierNoopPatchIndex; EXPECT_NE(undefined, noopIndex); - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); requiredSize = NEO::KernelHelper::getRegionGroupBarrierSize(8 * 8, this->testLaunchParams.localRegionSize); EXPECT_EQ(requiredSize, patchSize); + EXPECT_EQ(requiredSize, mutableCommandList->base->getTotalNoopSpace()); // new offset and new gpu sync address in cross-thread data memcpy(®ionBarrierBufferGpuPatchAddress, regionBarrierBufferAddress, sizeof(uint64_t)); @@ -2018,10 +2075,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(ZE_RESULT_SUCCESS, result); noopIndex = mutation.kernelGroup->getCurrentMutableKernel()->getKernelDispatch()->regionBarrierNoopPatchIndex; - mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset); + mutableCommandList->fillCmdListNoopPatchData(noopIndex, cpuPtr, patchSize, offset, gpuAddress); EXPECT_EQ(oldPatchSize, patchSize); EXPECT_EQ(oldOffset, offset); + EXPECT_EQ(oldPatchSize, mutableCommandList->base->getTotalNoopSpace()); // old offset and old gpu sync address in cross-thread data memcpy(®ionBarrierBufferGpuPatchAddress, regionBarrierBufferAddress, sizeof(uint64_t));