fix: allow kernel access across multiple virtual regions

Related to: NEO-8350 Signed-off-by: John Falkowski <john.falkowski@intel.com>
2026-01-03 23:03:02 +08:00 · 2024-04-23 07:14:31 +00:00
parent b499973658
commit b9c1ef65dd
3 changed files with 7 additions and 25 deletions
--- a/level_zero/core/source/kernel/kernel_hw.h
+++ b/level_zero/core/source/kernel/kernel_hw.h
@@ -39,22 +39,11 @@ struct KernelHw : public KernelImp {
        auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
        auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
        size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
-        // If the allocation is part of a mapped virtual range, then check to see if the buffer size needs to be extended to include more physical buffers.
+        // If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
        Device *device = module->getDevice();
        auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
        if (allocData && allocData->virtualReservationData) {
-            size_t calcBufferSizeForSsh = bufferSizeForSsh;
-            for (const auto &mappedAllocationData : allocData->virtualReservationData->mappedAllocations) {
-                // Add additional allocations buffer size to be programmed to allow full usage of the memory range if the allocation is after this starting address.
-                if (address != mappedAllocationData.second->ptr && mappedAllocationData.second->ptr > address) {
-                    calcBufferSizeForSsh += mappedAllocationData.second->mappedAllocation->allocation->getUnderlyingBufferSize();
-                    // Only allow for the surface state to be extended up to 4GB in size.
-                    bufferSizeForSsh = std::min(calcBufferSizeForSsh, MemoryConstants::gigaByte * 4);
-                    if (bufferSizeForSsh == MemoryConstants::gigaByte * 4) {
-                        break;
-                    }
-                }
-            }
+            bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
        }
        auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
        bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize),
--- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp
+++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp
@@ -2510,23 +2510,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,

        bool phys1Resident = false;
        bool phys2Resident = false;
-        NEO::GraphicsAllocation *baseAlloc = nullptr;
-        NEO::GraphicsAllocation *offsetAlloc = nullptr;
        for (auto alloc : kernel->getResidencyContainer()) {
            if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
                phys1Resident = true;
-                baseAlloc = alloc;
            }
            if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
                phys2Resident = true;
-                offsetAlloc = alloc;
            }
        }
        auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
        auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
        auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
        SurfaceStateBufferLength length = {0};
-        length.length = static_cast<uint32_t>((baseAlloc->getUnderlyingBufferSize() + offsetAlloc->getUnderlyingBufferSize()) - 1);
+        length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
        EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
        EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
        EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
@@ -2584,21 +2580,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,

        bool phys1Resident = false;
        bool phys2Resident = false;
-        NEO::GraphicsAllocation *offsetAlloc = nullptr;
        for (auto alloc : kernel->getResidencyContainer()) {
            if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
                phys1Resident = true;
            }
            if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
                phys2Resident = true;
-                offsetAlloc = alloc;
            }
        }
        auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
        auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
        auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
        SurfaceStateBufferLength length = {0};
-        length.length = static_cast<uint32_t>(offsetAlloc->getUnderlyingBufferSize() - 1);
+        length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
        EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
        EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
        EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
@@ -2649,18 +2643,16 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
        kernel->setArgBuffer(0, sizeof(ptr), &ptr);

        bool phys1Resident = false;
-        NEO::GraphicsAllocation *baseAlloc = nullptr;
        for (auto alloc : kernel->getResidencyContainer()) {
            if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
                phys1Resident = true;
-                baseAlloc = alloc;
            }
        }
        auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
        auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
        auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
        SurfaceStateBufferLength length = {0};
-        length.length = static_cast<uint32_t>(baseAlloc->getUnderlyingBufferSize() - 1);
+        length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
        EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
        EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
        EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));