fix: global bindless offsets for implicit args

- do not patch bindless offsets for implicit args in with global bindless
allocator, the offsets are patched at kernel initialization time and
do not change

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-08-23 11:20:16 +00:00
committed by Compute-Runtime-Automation
parent 3e5dec27ef
commit c9e8931a18
4 changed files with 53 additions and 3 deletions

View File

@@ -1365,7 +1365,11 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
}
}
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
const auto bindlessHeapsHelper = this->module && this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
if (!bindlessHeapsHelper) {
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
}
}
void KernelImp::patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerStateOffset) const {

View File

@@ -73,7 +73,7 @@ std::vector<uint8_t> compileToNative(const std::string &src, const std::string &
std::vector<uint8_t> ret;
const char *mainFileName = "main.cl";
const char *argv[] = {"ocloc", "-q", "-device", deviceName.c_str(), "-revision_id", revisionId.c_str(), "-file", mainFileName, "-o", "output.bin", "", "", "", ""};
const char *argv[] = {"ocloc", "-v", "-device", deviceName.c_str(), "-revision_id", revisionId.c_str(), "-file", mainFileName, "-o", "output.bin", "", "", "", ""};
uint32_t numArgs = sizeof(argv) / sizeof(argv[0]) - 4;
int argIndex = 10;
if (options.size() > 0) {

View File

@@ -3956,6 +3956,48 @@ TEST_F(BindlessKernelTest, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCross
EXPECT_EQ(0u, crossThreadData[0]);
}
TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImplicitArgsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice,
neoDevice->getNumGenericSubDevices() > 1);
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 0x8;
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless = NEO::undefined<NEO::SurfaceStateHeapOffset>;
mockKernel.isBindlessOffsetSet.resize(1, 1);
mockKernel.isBindlessOffsetSet[0] = true;
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
const uint64_t baseAddress = 0x1000;
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
EXPECT_EQ(0u, crossThreadData[0]);
EXPECT_EQ(0u, crossThreadData[1]);
EXPECT_EQ(0u, crossThreadData[2]);
EXPECT_EQ(0u, crossThreadData[3]);
}
TEST(KernelImmutableDataTest, givenBindlessKernelWhenInitializingImmDataThenSshTemplateIsAllocated) {
HardwareInfo hwInfo = *defaultHwInfo;