feature: add bindless samplers support to level zero
- samplers using bindless adressing require patching bindless offsets to sampler states on kernel's cross thread data Related-To: NEO-10505 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
parent
420e1391b2
commit
f86d4220a5
|
@ -859,8 +859,16 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||||
ze_result_t KernelImp::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) {
|
ze_result_t KernelImp::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) {
|
||||||
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescSampler>();
|
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescSampler>();
|
||||||
const auto sampler = Sampler::fromHandle(*static_cast<const ze_sampler_handle_t *>(argVal));
|
const auto sampler = Sampler::fromHandle(*static_cast<const ze_sampler_handle_t *>(argVal));
|
||||||
sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, arg.bindful);
|
if (NEO::isValidOffset(arg.bindful)) {
|
||||||
|
sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, arg.bindful);
|
||||||
|
} else if (NEO::isValidOffset(arg.bindless)) {
|
||||||
|
const auto offset = kernelImmData->getDescriptor().payloadMappings.samplerTable.tableOffset;
|
||||||
|
auto &gfxCoreHelper = this->module->getDevice()->getNEODevice()->getRootDeviceEnvironmentRef().getHelper<NEO::GfxCoreHelper>();
|
||||||
|
const auto stateSize = gfxCoreHelper.getSamplerStateSize();
|
||||||
|
auto heapOffset = offset + static_cast<uint32_t>(stateSize) * arg.index;
|
||||||
|
|
||||||
|
sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, heapOffset);
|
||||||
|
}
|
||||||
auto samplerDesc = sampler->getSamplerDesc();
|
auto samplerDesc = sampler->getSamplerDesc();
|
||||||
|
|
||||||
NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.samplerSnapWa, (samplerDesc.addressMode == ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && samplerDesc.filterMode == ZE_SAMPLER_FILTER_MODE_NEAREST) ? std::numeric_limits<uint32_t>::max() : 0u);
|
NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.samplerSnapWa, (samplerDesc.addressMode == ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && samplerDesc.filterMode == ZE_SAMPLER_FILTER_MODE_NEAREST) ? std::numeric_limits<uint32_t>::max() : 0u);
|
||||||
|
@ -1352,6 +1360,36 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
|
||||||
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
|
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void KernelImp::patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const {
|
||||||
|
if (this->module == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||||
|
const auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||||
|
auto crossThreadData = getCrossThreadData();
|
||||||
|
for (size_t index = 0; index < kernelImmData->getDescriptor().payloadMappings.explicitArgs.size(); index++) {
|
||||||
|
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[index];
|
||||||
|
|
||||||
|
auto crossThreadOffset = NEO::undefined<NEO::CrossThreadDataOffset>;
|
||||||
|
if (arg.type == NEO::ArgDescriptor::argTSampler) {
|
||||||
|
crossThreadOffset = arg.as<NEO::ArgDescSampler>().bindless;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto samplerIndex = arg.as<NEO::ArgDescSampler>().index;
|
||||||
|
if (NEO::isValidOffset(crossThreadOffset)) {
|
||||||
|
auto patchLocation = ptrOffset(crossThreadData, crossThreadOffset);
|
||||||
|
|
||||||
|
if (samplerIndex < std::numeric_limits<uint8_t>::max()) {
|
||||||
|
auto surfaceStateOffset = static_cast<uint64_t>(samplerStateOffset + samplerIndex * samplerStateSize);
|
||||||
|
auto patchValue = surfaceStateOffset;
|
||||||
|
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), arg.as<NEO::ArgDescSampler>().size, patchValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
||||||
const auto &iter = getKernelDescriptor().getBindlessOffsetToSurfaceState().find(bindlessOffset);
|
const auto &iter = getKernelDescriptor().getBindlessOffsetToSurfaceState().find(bindlessOffset);
|
||||||
if (iter != getKernelDescriptor().getBindlessOffsetToSurfaceState().end()) {
|
if (iter != getKernelDescriptor().getBindlessOffsetToSurfaceState().end()) {
|
||||||
|
|
|
@ -175,6 +175,7 @@ struct KernelImp : Kernel {
|
||||||
void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override;
|
void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override;
|
||||||
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override;
|
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override;
|
||||||
void patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const;
|
void patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const;
|
||||||
|
void patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const override;
|
||||||
|
|
||||||
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override {
|
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override {
|
||||||
return privateMemoryGraphicsAllocation;
|
return privateMemoryGraphicsAllocation;
|
||||||
|
|
|
@ -1023,10 +1023,6 @@ int main(int argc, char *argv[]) {
|
||||||
i = testCase;
|
i = testCase;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (testCase == -1 && i == 2) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (i) {
|
switch (i) {
|
||||||
default:
|
default:
|
||||||
case 0:
|
case 0:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -53,6 +53,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||||
using ::L0::KernelImp::numThreadsPerThreadGroup;
|
using ::L0::KernelImp::numThreadsPerThreadGroup;
|
||||||
using ::L0::KernelImp::patchBindlessOffsetsInCrossThreadData;
|
using ::L0::KernelImp::patchBindlessOffsetsInCrossThreadData;
|
||||||
using ::L0::KernelImp::patchBindlessSurfaceState;
|
using ::L0::KernelImp::patchBindlessSurfaceState;
|
||||||
|
using ::L0::KernelImp::patchSamplerBindlessOffsetsInCrossThreadData;
|
||||||
using ::L0::KernelImp::perThreadDataForWholeThreadGroup;
|
using ::L0::KernelImp::perThreadDataForWholeThreadGroup;
|
||||||
using ::L0::KernelImp::perThreadDataSize;
|
using ::L0::KernelImp::perThreadDataSize;
|
||||||
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
||||||
|
|
|
@ -3894,5 +3894,70 @@ TEST(KernelImmutableDataTest, givenBindlessKernelWhenInitializingImmDataThenSshT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||||
|
Mock<Module> mockModule(this->device, nullptr);
|
||||||
|
Mock<KernelImp> mockKernel;
|
||||||
|
mockKernel.module = &mockModule;
|
||||||
|
|
||||||
|
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||||
|
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||||
|
|
||||||
|
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||||
|
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||||
|
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||||
|
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||||
|
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||||
|
|
||||||
|
auto argDescriptorSampler = NEO::ArgDescriptor(NEO::ArgDescriptor::argTSampler);
|
||||||
|
argDescriptorSampler.as<NEO::ArgDescSampler>() = NEO::ArgDescSampler();
|
||||||
|
argDescriptorSampler.as<NEO::ArgDescSampler>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||||
|
argDescriptorSampler.as<NEO::ArgDescSampler>().bindless = sizeof(uint64_t);
|
||||||
|
argDescriptorSampler.as<NEO::ArgDescSampler>().size = sizeof(uint64_t);
|
||||||
|
argDescriptorSampler.as<NEO::ArgDescSampler>().index = 1;
|
||||||
|
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptorSampler);
|
||||||
|
|
||||||
|
auto argDescriptorSampler2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTSampler);
|
||||||
|
argDescriptorSampler2.as<NEO::ArgDescSampler>() = NEO::ArgDescSampler();
|
||||||
|
argDescriptorSampler2.as<NEO::ArgDescSampler>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||||
|
argDescriptorSampler2.as<NEO::ArgDescSampler>().bindless = 2 * sizeof(uint64_t);
|
||||||
|
argDescriptorSampler2.as<NEO::ArgDescSampler>().size = sizeof(uint64_t);
|
||||||
|
argDescriptorSampler2.as<NEO::ArgDescSampler>().index = undefined<uint8_t>;
|
||||||
|
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptorSampler2);
|
||||||
|
|
||||||
|
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||||
|
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||||
|
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||||
|
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||||
|
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||||
|
|
||||||
|
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
|
||||||
|
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
|
||||||
|
|
||||||
|
mockKernel.isBindlessOffsetSet.resize(2, 0);
|
||||||
|
mockKernel.usingSurfaceStateHeap.resize(2, 0);
|
||||||
|
|
||||||
|
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||||
|
|
||||||
|
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
|
||||||
|
mockKernel.crossThreadDataSize = 5 * sizeof(uint64_t);
|
||||||
|
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
|
||||||
|
|
||||||
|
const uint64_t baseAddress = 0x1000;
|
||||||
|
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||||
|
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||||
|
|
||||||
|
auto patchValue1 = (static_cast<uint32_t>(baseAddress + 1 * samplerStateSize));
|
||||||
|
auto patchValue2 = 0u;
|
||||||
|
|
||||||
|
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
|
||||||
|
|
||||||
|
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||||
|
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
|
||||||
|
|
||||||
|
EXPECT_EQ(patchValue1, crossThreadData[1]);
|
||||||
|
EXPECT_EQ(0u, patchValue2);
|
||||||
|
EXPECT_EQ(0u, crossThreadData[2]);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -339,8 +339,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushi
|
||||||
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
|
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
|
||||||
auto stateBaseAddress = static_cast<STATE_BASE_ADDRESS *>(hwParserCsr.cmdStateBaseAddress);
|
auto stateBaseAddress = static_cast<STATE_BASE_ADDRESS *>(hwParserCsr.cmdStateBaseAddress);
|
||||||
EXPECT_TRUE(stateBaseAddress->getBindlessSamplerStateBaseAddressModifyEnable());
|
EXPECT_TRUE(stateBaseAddress->getBindlessSamplerStateBaseAddressModifyEnable());
|
||||||
EXPECT_EQ(0u, stateBaseAddress->getBindlessSamplerStateBaseAddress());
|
EXPECT_EQ(dsh.getHeapGpuBase(), stateBaseAddress->getBindlessSamplerStateBaseAddress());
|
||||||
EXPECT_EQ(0u, stateBaseAddress->getBindlessSamplerStateBufferSize());
|
EXPECT_EQ(dsh.getHeapSizeInPages(), stateBaseAddress->getBindlessSamplerStateBufferSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebugKeysThatOverrideMultiGpuSettingWhenStateBaseAddressIsProgrammedThenValuesMatch) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebugKeysThatOverrideMultiGpuSettingWhenStateBaseAddressIsProgrammedThenValuesMatch) {
|
||||||
|
|
|
@ -217,6 +217,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||||
}
|
}
|
||||||
|
|
||||||
idd.setSamplerStatePointer(samplerStateOffset);
|
idd.setSamplerStatePointer(samplerStateOffset);
|
||||||
|
args.dispatchInterface->patchSamplerBindlessOffsetsInCrossThreadData(samplerStateOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1195,6 +1195,7 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
|
||||||
static constexpr auto maxIndirectSamplerStateSize = 64U;
|
static constexpr auto maxIndirectSamplerStateSize = 64U;
|
||||||
auto &sampler = dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>();
|
auto &sampler = dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>();
|
||||||
sampler.bindful = maxIndirectSamplerStateSize + maxSamplerStateSize * src.samplerIndex;
|
sampler.bindful = maxIndirectSamplerStateSize + maxSamplerStateSize * src.samplerIndex;
|
||||||
|
sampler.index = src.samplerIndex;
|
||||||
dst.payloadMappings.samplerTable.numSamplers = std::max<uint8_t>(dst.payloadMappings.samplerTable.numSamplers, static_cast<uint8_t>(src.samplerIndex + 1));
|
dst.payloadMappings.samplerTable.numSamplers = std::max<uint8_t>(dst.payloadMappings.samplerTable.numSamplers, static_cast<uint8_t>(src.samplerIndex + 1));
|
||||||
} else {
|
} else {
|
||||||
dst.kernelAttributes.numArgsStateful++;
|
dst.kernelAttributes.numArgsStateful++;
|
||||||
|
@ -1212,12 +1213,16 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
|
||||||
case Types::Kernel::PayloadArgument::memoryAddressingModeBindless:
|
case Types::Kernel::PayloadArgument::memoryAddressingModeBindless:
|
||||||
if (dst.payloadMappings.explicitArgs[src.argIndex].is<NEO::ArgDescriptor::argTPointer>()) {
|
if (dst.payloadMappings.explicitArgs[src.argIndex].is<NEO::ArgDescriptor::argTPointer>()) {
|
||||||
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(false).bindless = src.offset;
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(false).bindless = src.offset;
|
||||||
|
dst.kernelAttributes.numArgsStateful++;
|
||||||
} else if (dst.payloadMappings.explicitArgs[src.argIndex].is<NEO::ArgDescriptor::argTImage>()) {
|
} else if (dst.payloadMappings.explicitArgs[src.argIndex].is<NEO::ArgDescriptor::argTImage>()) {
|
||||||
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescImage>(false).bindless = src.offset;
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescImage>(false).bindless = src.offset;
|
||||||
|
dst.kernelAttributes.numArgsStateful++;
|
||||||
} else {
|
} else {
|
||||||
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>(false).bindless = src.offset;
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>(false).bindless = src.offset;
|
||||||
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>(false).index = src.samplerIndex;
|
||||||
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescSampler>(false).size = src.size;
|
||||||
|
dst.payloadMappings.samplerTable.numSamplers = std::max<uint8_t>(dst.payloadMappings.samplerTable.numSamplers, static_cast<uint8_t>(src.samplerIndex + 1));
|
||||||
}
|
}
|
||||||
dst.kernelAttributes.numArgsStateful++;
|
|
||||||
break;
|
break;
|
||||||
case Types::Kernel::PayloadArgument::memoryAddressingModeSharedLocalMemory:
|
case Types::Kernel::PayloadArgument::memoryAddressingModeSharedLocalMemory:
|
||||||
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(false).slmOffset = src.offset;
|
dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(false).slmOffset = src.offset;
|
||||||
|
|
|
@ -55,9 +55,17 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||||
const auto surfaceStateCount = args.ssh->getMaxAvailableSpace() / sizeof(RENDER_SURFACE_STATE);
|
const auto surfaceStateCount = args.ssh->getMaxAvailableSpace() / sizeof(RENDER_SURFACE_STATE);
|
||||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(static_cast<uint32_t>(surfaceStateCount - 1));
|
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(static_cast<uint32_t>(surfaceStateCount - 1));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
|
if (args.dsh) {
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddress(args.dsh->getHeapGpuBase());
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBufferSize(args.dsh->getHeapSizeInPages());
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddress(args.globalHeapsBaseAddress);
|
||||||
|
args.stateBaseAddressCmd->setBindlessSamplerStateBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||||
|
}
|
||||||
|
|
||||||
auto &productHelper = args.gmmHelper->getRootDeviceEnvironment().template getHelper<ProductHelper>();
|
auto &productHelper = args.gmmHelper->getRootDeviceEnvironment().template getHelper<ProductHelper>();
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -48,5 +48,6 @@ struct DispatchKernelEncoderI {
|
||||||
|
|
||||||
virtual ImplicitArgs *getImplicitArgs() const = 0;
|
virtual ImplicitArgs *getImplicitArgs() const = 0;
|
||||||
virtual void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const = 0;
|
virtual void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const = 0;
|
||||||
|
virtual void patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const = 0;
|
||||||
};
|
};
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -106,6 +106,8 @@ struct ArgDescSampler final {
|
||||||
CrossThreadDataOffset samplerAddressingMode = undefined<CrossThreadDataOffset>;
|
CrossThreadDataOffset samplerAddressingMode = undefined<CrossThreadDataOffset>;
|
||||||
CrossThreadDataOffset samplerNormalizedCoords = undefined<CrossThreadDataOffset>;
|
CrossThreadDataOffset samplerNormalizedCoords = undefined<CrossThreadDataOffset>;
|
||||||
} metadataPayload;
|
} metadataPayload;
|
||||||
|
uint8_t index = undefined<uint8_t>;
|
||||||
|
uint8_t size = undefined<uint8_t>;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ArgDescValue final {
|
struct ArgDescValue final {
|
||||||
|
|
|
@ -1757,6 +1757,37 @@ kernels:
|
||||||
EXPECT_EQ(1u, kernelDescriptor->kernelAttributes.numArgsStateful);
|
EXPECT_EQ(1u, kernelDescriptor->kernelAttributes.numArgsStateful);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(decodeZeInfoKernelEntryTest, GivenBindlessSamplerWhenDecodingZeInfoThenNumberOfStatefulArgsDoesNotCountSampler) {
|
||||||
|
ConstStringRef zeinfo = R"===(
|
||||||
|
kernels:
|
||||||
|
- name : some_kernel
|
||||||
|
execution_env:
|
||||||
|
simd_size: 8
|
||||||
|
payload_arguments:
|
||||||
|
- arg_type: arg_bypointer
|
||||||
|
offset: 0
|
||||||
|
size: 8
|
||||||
|
arg_index: 0
|
||||||
|
addrmode: bindless
|
||||||
|
addrspace: sampler
|
||||||
|
access_type: readwrite
|
||||||
|
sampler_index: 0
|
||||||
|
sampler_type: texture
|
||||||
|
- arg_type: arg_bypointer
|
||||||
|
offset: 8
|
||||||
|
size: 4
|
||||||
|
arg_index: 1
|
||||||
|
addrmode: bindless
|
||||||
|
addrspace: global
|
||||||
|
access_type: readwrite
|
||||||
|
...
|
||||||
|
)===";
|
||||||
|
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||||
|
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, kernelDescriptor->kernelAttributes.numArgsStateful);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(decodeZeInfoKernelEntryTest, GivenBindlessImageAddressingWhenDecodingZeInfoThenImageAddressingModeIsBindless) {
|
TEST_F(decodeZeInfoKernelEntryTest, GivenBindlessImageAddressingWhenDecodingZeInfoThenImageAddressingModeIsBindless) {
|
||||||
ConstStringRef zeinfo = R"===(
|
ConstStringRef zeinfo = R"===(
|
||||||
kernels:
|
kernels:
|
||||||
|
@ -5643,10 +5674,14 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenValidSamplerArgumentWithMetadataThenPop
|
||||||
|
|
||||||
auto &sampler0 = args[0].as<ArgDescSampler>();
|
auto &sampler0 = args[0].as<ArgDescSampler>();
|
||||||
EXPECT_EQ(64U, sampler0.bindful);
|
EXPECT_EQ(64U, sampler0.bindful);
|
||||||
|
EXPECT_EQ(0U, sampler0.index);
|
||||||
|
EXPECT_EQ(undefined<uint8_t>, sampler0.size);
|
||||||
|
|
||||||
auto &sampler1 = args[1].as<ArgDescSampler>();
|
auto &sampler1 = args[1].as<ArgDescSampler>();
|
||||||
EXPECT_TRUE(args[1].getExtendedTypeInfo().isAccelerator);
|
EXPECT_TRUE(args[1].getExtendedTypeInfo().isAccelerator);
|
||||||
EXPECT_EQ(80U, sampler1.bindful);
|
EXPECT_EQ(80U, sampler1.bindful);
|
||||||
|
EXPECT_EQ(1U, sampler1.index);
|
||||||
|
EXPECT_EQ(undefined<uint8_t>, sampler1.size);
|
||||||
|
|
||||||
auto &sampler2 = args[2].as<ArgDescSampler>();
|
auto &sampler2 = args[2].as<ArgDescSampler>();
|
||||||
EXPECT_TRUE(args[2].getExtendedTypeInfo().isAccelerator);
|
EXPECT_TRUE(args[2].getExtendedTypeInfo().isAccelerator);
|
||||||
|
@ -5654,6 +5689,8 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenValidSamplerArgumentWithMetadataThenPop
|
||||||
EXPECT_EQ(0U, sampler2.metadataPayload.samplerSnapWa);
|
EXPECT_EQ(0U, sampler2.metadataPayload.samplerSnapWa);
|
||||||
EXPECT_EQ(4U, sampler2.metadataPayload.samplerNormalizedCoords);
|
EXPECT_EQ(4U, sampler2.metadataPayload.samplerNormalizedCoords);
|
||||||
EXPECT_EQ(8U, sampler2.metadataPayload.samplerAddressingMode);
|
EXPECT_EQ(8U, sampler2.metadataPayload.samplerAddressingMode);
|
||||||
|
EXPECT_EQ(2U, sampler2.index);
|
||||||
|
EXPECT_EQ(undefined<uint8_t>, sampler2.size);
|
||||||
|
|
||||||
auto &sampler3 = args[3].as<ArgDescSampler>();
|
auto &sampler3 = args[3].as<ArgDescSampler>();
|
||||||
EXPECT_TRUE(args[3].getExtendedTypeInfo().isAccelerator);
|
EXPECT_TRUE(args[3].getExtendedTypeInfo().isAccelerator);
|
||||||
|
@ -5669,6 +5706,46 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenValidSamplerArgumentWithMetadataThenPop
|
||||||
EXPECT_TRUE(kd.kernelAttributes.flags.usesVme);
|
EXPECT_TRUE(kd.kernelAttributes.flags.usesVme);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(decodeZeInfoKernelEntryTest, GivenBindlessSamplerArgumentWithMetadataThenKernelDescriptorIsPopulated) {
|
||||||
|
ConstStringRef zeinfo = R"===(
|
||||||
|
kernels:
|
||||||
|
- name : some_kernel
|
||||||
|
execution_env:
|
||||||
|
simd_size: 8
|
||||||
|
payload_arguments:
|
||||||
|
- arg_type: arg_bypointer
|
||||||
|
offset: 88
|
||||||
|
size: 8
|
||||||
|
arg_index: 0
|
||||||
|
addrmode: bindless
|
||||||
|
addrspace: sampler
|
||||||
|
access_type: readwrite
|
||||||
|
sampler_index: 3
|
||||||
|
sampler_type: texture
|
||||||
|
- arg_type: arg_bypointer
|
||||||
|
offset: 8
|
||||||
|
size: 4
|
||||||
|
arg_index: 1
|
||||||
|
addrmode: bindless
|
||||||
|
addrspace: global
|
||||||
|
access_type: readwrite
|
||||||
|
...
|
||||||
|
)===";
|
||||||
|
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||||
|
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, kernelDescriptor->kernelAttributes.numArgsStateful);
|
||||||
|
|
||||||
|
const auto &kd = *this->kernelDescriptor;
|
||||||
|
auto &args = kd.payloadMappings.explicitArgs;
|
||||||
|
|
||||||
|
auto &sampler0 = args[0].as<ArgDescSampler>();
|
||||||
|
EXPECT_EQ(undefined<DynamicStateHeapOffset>, sampler0.bindful);
|
||||||
|
EXPECT_EQ(88u, sampler0.bindless);
|
||||||
|
EXPECT_EQ(3U, sampler0.index);
|
||||||
|
EXPECT_EQ(8U, sampler0.size);
|
||||||
|
}
|
||||||
|
|
||||||
class IntelGTNotesFixture : public ::testing::Test {
|
class IntelGTNotesFixture : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -30,6 +30,7 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||||
NEO::ImplicitArgs *getImplicitArgs() const override { return nullptr; }
|
NEO::ImplicitArgs *getImplicitArgs() const override { return nullptr; }
|
||||||
|
|
||||||
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override { return; };
|
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override { return; };
|
||||||
|
void patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const override { return; };
|
||||||
|
|
||||||
MockGraphicsAllocation mockAllocation{};
|
MockGraphicsAllocation mockAllocation{};
|
||||||
static constexpr uint32_t crossThreadSize = 0x40;
|
static constexpr uint32_t crossThreadSize = 0x40;
|
||||||
|
|
Loading…
Reference in New Issue