refactor: do not add bindless heaps to kernel residency

Related-To: NEO-11719

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-07-22 17:44:23 +00:00 committed by Compute-Runtime-Automation
parent a7fbc90ebd
commit f61ab615e7
3 changed files with 7 additions and 16 deletions

View File

@ -145,10 +145,6 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
}
auto ssInHeap = globalConstBuffer->getBindlessInfo();
if (ssInHeap.heapAllocation) {
this->residencyContainer.push_back(ssInHeap.heapAllocation);
}
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
*neoDevice, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
@ -172,10 +168,6 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
}
auto ssInHeap = globalVarBuffer->getBindlessInfo();
if (ssInHeap.heapAllocation) {
this->residencyContainer.push_back(ssInHeap.heapAllocation);
}
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
*neoDevice, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
@ -620,7 +612,6 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * NEO::BindlessImageSlot::redescribedImage), 0u);
isBindlessOffsetSet[argIndex] = true;
this->residencyContainer.push_back(ssInHeap->heapAllocation);
} else {
usingSurfaceStateHeap[argIndex] = true;
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
@ -824,7 +815,6 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
isBindlessOffsetSet[argIndex] = true;
this->residencyContainer.push_back(ssInHeap->heapAllocation);
} else {
usingSurfaceStateHeap[argIndex] = true;
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
@ -1240,7 +1230,6 @@ void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint3
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
auto ssInHeap = alloc->getBindlessInfo();
this->residencyContainer.push_back(ssInHeap.heapAllocation);
auto patchLocation = ptrOffset(getCrossThreadData(), bindless);
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);

View File

@ -2221,7 +2221,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledTh
EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr);
EXPECT_TRUE(memcmp(const_cast<uint8_t *>(patchLocation), &patchValue, sizeof(patchValue)) == 0);
EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
EXPECT_FALSE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
neoDevice->decRefInternal();
}
@ -2829,6 +2829,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfac
EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
EXPECT_FALSE(kernel->usingSurfaceStateHeap[3]);
EXPECT_EQ(0, std::count(kernel->residencyContainer.begin(), kernel->residencyContainer.end(), expectedSsInHeap.heapAllocation));
}
HWTEST2_F(SetKernelArg, givenNoGlobalAllocatorAndBindlessKernelWhenSetArgImageThenBindlessOffsetIsNotSetAndSshIsUsed, ImageSupport) {
@ -2950,6 +2951,7 @@ HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetAr
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
EXPECT_FALSE(kernel->usingSurfaceStateHeap[3]);
EXPECT_EQ(0, std::count(kernel->residencyContainer.begin(), kernel->residencyContainer.end(), expectedSsInHeap.heapAllocation));
}
HWTEST2_F(SetKernelArg, givenGlobalBindlessHelperAndImageViewWhenAllocatingBindlessSlotThenViewHasDifferentSlotThanParentImage, ImageSupport) {

View File

@ -710,9 +710,9 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(2u, residencyContainer.size());
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer));
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer.getBindlessInfo().heapAllocation));
EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer.getBindlessInfo().heapAllocation));
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalConstSurfaceAddressBindlessOffset));
@ -789,9 +789,9 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(2u, residencyContainer.size());
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer));
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer.getBindlessInfo().heapAllocation));
EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer.getBindlessInfo().heapAllocation));
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalVariablesSurfaceAddressBindlessOffset));