refactor: Add ult with check if bindless ss is patched correctly
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
7986e68dfc
commit
3be52fa1d0
|
@ -619,9 +619,13 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||||
auto ssInHeap = image->getBindlessSlot();
|
auto ssInHeap = image->getBindlessSlot();
|
||||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||||
// redescribed image's surface state is after image's implicit args and sampler
|
// redescribed image's surface state is after image's implicit args and sampler
|
||||||
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
|
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
|
||||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
|
||||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
uint64_t patchValue = this->heaplessEnabled
|
||||||
|
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
|
||||||
|
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||||
|
|
||||||
|
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
|
||||||
|
|
||||||
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * NEO::BindlessImageSlot::redescribedImage), 0u);
|
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * NEO::BindlessImageSlot::redescribedImage), 0u);
|
||||||
isBindlessOffsetSet[argIndex] = true;
|
isBindlessOffsetSet[argIndex] = true;
|
||||||
|
@ -812,8 +816,12 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||||
auto ssInHeap = image->getBindlessSlot();
|
auto ssInHeap = image->getBindlessSlot();
|
||||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||||
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset;
|
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset;
|
||||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
|
||||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
uint64_t patchValue = this->heaplessEnabled
|
||||||
|
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
|
||||||
|
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||||
|
|
||||||
|
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
|
||||||
|
|
||||||
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, isMediaBlockImage);
|
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, isMediaBlockImage);
|
||||||
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
|
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
|
||||||
|
|
|
@ -3115,6 +3115,56 @@ HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetAr
|
||||||
EXPECT_EQ(0, std::count(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), expectedSsInHeap.heapAllocation));
|
EXPECT_EQ(0, std::count(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), expectedSsInHeap.heapAllocation));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrectSurfaceStateAddressIsPatchedInCrossThreadData, ImageSupport) {
|
||||||
|
|
||||||
|
for (auto heaplessEnabled : {false, true}) {
|
||||||
|
|
||||||
|
createKernel();
|
||||||
|
kernel->heaplessEnabled = heaplessEnabled;
|
||||||
|
|
||||||
|
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice,
|
||||||
|
neoDevice->getNumGenericSubDevices() > 1);
|
||||||
|
NEO::BindlessHeapsHelper *bindlessHeapsHelper = neoDevice->getBindlessHeapsHelper();
|
||||||
|
ASSERT_NE(nullptr, bindlessHeapsHelper);
|
||||||
|
|
||||||
|
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
|
||||||
|
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
|
||||||
|
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
|
||||||
|
imageArg.bindless = 0x8;
|
||||||
|
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||||
|
ze_image_desc_t desc = {};
|
||||||
|
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||||
|
|
||||||
|
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||||
|
auto ret = imageHW->initialize(device, &desc);
|
||||||
|
auto handle = imageHW->toHandle();
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||||
|
|
||||||
|
ret = kernel->setArgRedescribedImage(3, handle);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||||
|
|
||||||
|
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||||
|
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||||
|
|
||||||
|
auto ctd = kernel->crossThreadData.get();
|
||||||
|
|
||||||
|
auto ssInHeap = imageHW->getBindlessSlot();
|
||||||
|
auto patchLocation = ptrOffset(ctd, imageArg.bindless);
|
||||||
|
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
|
||||||
|
uint64_t expectedPatchValue = kernel->heaplessEnabled
|
||||||
|
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
|
||||||
|
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||||
|
|
||||||
|
if (kernel->heaplessEnabled) {
|
||||||
|
uint64_t patchedValued = *(reinterpret_cast<uint64_t *>(patchLocation));
|
||||||
|
EXPECT_EQ(expectedPatchValue, patchedValued);
|
||||||
|
} else {
|
||||||
|
uint32_t patchedValued = *(reinterpret_cast<uint32_t *>(patchLocation));
|
||||||
|
EXPECT_EQ(static_cast<uint32_t>(expectedPatchValue), patchedValued);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(SetKernelArg, givenGlobalBindlessHelperAndImageViewWhenAllocatingBindlessSlotThenViewHasDifferentSlotThanParentImage, ImageSupport) {
|
HWTEST2_F(SetKernelArg, givenGlobalBindlessHelperAndImageViewWhenAllocatingBindlessSlotThenViewHasDifferentSlotThanParentImage, ImageSupport) {
|
||||||
createKernel();
|
createKernel();
|
||||||
|
|
||||||
|
|
|
@ -183,11 +183,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
|
|
||||||
uint32_t samplerCount = 0;
|
uint32_t samplerCount = 0;
|
||||||
|
|
||||||
if constexpr (Family::supportsSampler && heaplessModeEnabled == false) {
|
if constexpr (Family::supportsSampler) {
|
||||||
if (args.device->getDeviceInfo().imageSupport && !args.makeCommandView) {
|
if (args.device->getDeviceInfo().imageSupport && !args.makeCommandView) {
|
||||||
|
|
||||||
uint32_t samplerStateOffset = 0;
|
|
||||||
|
|
||||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
||||||
auto dsHeap = args.dynamicStateHeap;
|
auto dsHeap = args.dynamicStateHeap;
|
||||||
if (dsHeap == nullptr) {
|
if (dsHeap == nullptr) {
|
||||||
|
@ -199,22 +197,28 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
}
|
}
|
||||||
UNRECOVERABLE_IF(!dsHeap);
|
UNRECOVERABLE_IF(!dsHeap);
|
||||||
|
|
||||||
|
auto bindlessHeapsHelper = args.device->getBindlessHeapsHelper();
|
||||||
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(
|
uint64_t samplerStateOffset = EncodeStates<Family>::copySamplerState(
|
||||||
dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
||||||
kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
|
||||||
|
kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
||||||
args.dispatchInterface->getDynamicStateHeapData(),
|
args.dispatchInterface->getDynamicStateHeapData(),
|
||||||
args.device->getBindlessHeapsHelper(), rootDeviceEnvironment);
|
bindlessHeapsHelper, rootDeviceEnvironment);
|
||||||
|
|
||||||
if (args.device->getBindlessHeapsHelper() && !args.device->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
if (bindlessHeapsHelper && !bindlessHeapsHelper->isGlobalDshSupported()) {
|
||||||
// add offset of graphics allocation base address relative to heap base address
|
// add offset of graphics allocation base address relative to heap base address
|
||||||
samplerStateOffset += static_cast<uint32_t>(ptrDiff(dsHeap->getGpuBase(), args.device->getBindlessHeapsHelper()->getGlobalHeapsBase()));
|
samplerStateOffset += static_cast<uint32_t>(ptrDiff(dsHeap->getGpuBase(), bindlessHeapsHelper->getGlobalHeapsBase()));
|
||||||
|
}
|
||||||
|
if (heaplessModeEnabled && bindlessHeapsHelper) {
|
||||||
|
samplerStateOffset += bindlessHeapsHelper->getGlobalHeapsBase();
|
||||||
}
|
}
|
||||||
|
|
||||||
args.dispatchInterface->patchSamplerBindlessOffsetsInCrossThreadData(samplerStateOffset);
|
args.dispatchInterface->patchSamplerBindlessOffsetsInCrossThreadData(samplerStateOffset);
|
||||||
|
if constexpr (!heaplessModeEnabled) {
|
||||||
|
idd.setSamplerStatePointer(static_cast<uint32_t>(samplerStateOffset));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idd.setSamplerStatePointer(samplerStateOffset);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,9 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||||
NEO::ImplicitArgs *getImplicitArgs() const override { return nullptr; }
|
NEO::ImplicitArgs *getImplicitArgs() const override { return nullptr; }
|
||||||
|
|
||||||
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override { return; };
|
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override { return; };
|
||||||
void patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const override { return; };
|
void patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const override {
|
||||||
|
samplerStateOffsetPassed = samplerStateOffset;
|
||||||
|
}
|
||||||
|
|
||||||
MockGraphicsAllocation mockAllocation{};
|
MockGraphicsAllocation mockAllocation{};
|
||||||
static constexpr uint32_t crossThreadSize = 0x40;
|
static constexpr uint32_t crossThreadSize = 0x40;
|
||||||
|
@ -41,6 +43,8 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||||
uint32_t requiredWalkGroupOrder = 0x0u;
|
uint32_t requiredWalkGroupOrder = 0x0u;
|
||||||
KernelDescriptor kernelDescriptor{};
|
KernelDescriptor kernelDescriptor{};
|
||||||
|
|
||||||
|
mutable uint64_t samplerStateOffsetPassed = 0u;
|
||||||
|
|
||||||
ADDMETHOD_CONST_NOBASE(getKernelDescriptor, const KernelDescriptor &, kernelDescriptor, ());
|
ADDMETHOD_CONST_NOBASE(getKernelDescriptor, const KernelDescriptor &, kernelDescriptor, ());
|
||||||
ADDMETHOD_CONST_NOBASE(getGroupSize, const uint32_t *, groupSizes, ());
|
ADDMETHOD_CONST_NOBASE(getGroupSize, const uint32_t *, groupSizes, ());
|
||||||
ADDMETHOD_CONST_NOBASE(getSlmTotalSize, uint32_t, 0u, ());
|
ADDMETHOD_CONST_NOBASE(getSlmTotalSize, uint32_t, 0u, ());
|
||||||
|
|
Loading…
Reference in New Issue