refactor: move surface state programming method to base class

Related-To: NEO-15374 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2025-12-26 23:33:20 +08:00 · 2025-07-22 14:10:29 +00:00
parent b90b77e4e3
commit 5893fb08fb
3 changed files with 82 additions and 85 deletions
--- a/level_zero/core/source/kernel/kernel_hw.h
+++ b/level_zero/core/source/kernel/kernel_hw.h
@@ -29,90 +29,6 @@ namespace L0 {
 template <GFXCORE_FAMILY gfxCoreFamily>
 struct KernelHw : public KernelImp {
    using KernelImp::KernelImp;
-    using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
-
-    void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
-        uint64_t baseAddress = alloc->getGpuAddressToPatch();
-
-        // Remove misaligned bytes, accounted for in bufferOffset patch token
-        baseAddress &= this->surfaceStateAlignmentMask;
-        auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
-        auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
-        size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
-        // If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
-        Device *device = module->getDevice();
-        auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
-
-        auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
-        bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(getCrossThreadDataSpan(),
-                                                                         argInfo.bufferOffset, static_cast<uint32_t>(offset));
-        bool offsetedAddress = false;
-        if (false == offsetWasPatched) {
-            // fallback to handling offset in surface state
-            offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
-            baseAddress = reinterpret_cast<uintptr_t>(address);
-            bufferSizeForSsh -= offset;
-            DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
-
-            offset = 0;
-        }
-        void *surfaceStateAddress = nullptr;
-        auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
-
-        if (NEO::isValidOffset(argInfo.bindful)) {
-            surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), argInfo.bindful);
-            surfaceState = *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress);
-
-        } else if (NEO::isValidOffset(argInfo.bindless)) {
-            state.isBindlessOffsetSet[argIndex] = false;
-            state.usingSurfaceStateHeap[argIndex] = false;
-            if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
-                surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
-                state.isBindlessOffsetSet[argIndex] = true;
-            } else {
-                state.usingSurfaceStateHeap[argIndex] = true;
-                surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * sizeof(typename GfxFamily::RENDER_SURFACE_STATE));
-            }
-        }
-
-        uint64_t bufferAddressForSsh = baseAddress;
-        bufferSizeForSsh += misalignedSize;
-        bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
-
-        bool l3Enabled = true;
-        // Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
-        // Most commonly this issue will occur with Host Point Allocations from customers.
-        l3Enabled = isL3Capable(*alloc);
-
-        NEO::Device *neoDevice = device->getNEODevice();
-
-        if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
-            l3Enabled = false;
-        }
-
-        if (l3Enabled == false) {
-            this->state.kernelRequiresQueueUncachedMocsCount++;
-        }
-        auto isDebuggerActive = neoDevice->getDebugger() != nullptr;
-        NEO::EncodeSurfaceStateArgs args;
-        args.outMemory = &surfaceState;
-        args.graphicsAddress = bufferAddressForSsh;
-        if (allocData && allocData->virtualReservationData) {
-            bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
-        }
-        args.size = bufferSizeForSsh;
-        args.mocs = device->getMOCS(l3Enabled, false);
-        args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
-        args.allocation = alloc;
-        args.gmmHelper = neoDevice->getGmmHelper();
-        args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
-        args.implicitScaling = device->isImplicitScalingCapable();
-        args.isDebuggerActive = isDebuggerActive;
-
-        NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
-        UNRECOVERABLE_IF(surfaceStateAddress == nullptr);
-        *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
-    }
 };

 } // namespace L0
--- a/level_zero/core/source/kernel/kernel_imp.cpp
+++ b/level_zero/core/source/kernel/kernel_imp.cpp
@@ -1083,6 +1083,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
    this->walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
    this->surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
    this->surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
+    this->renderSurfaceStateSize = gfxHelper.getRenderSurfaceStateSize();

    if (isaAllocation->getAllocationType() == NEO::AllocationType::kernelIsaInternal && this->kernelImmData->getIsaParentAllocation() == nullptr) {
        isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
@@ -1662,4 +1663,83 @@ void KernelImp::evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::Kerne
                                                      kernelDescriptor.kernelAttributes.simdSize);
 }

+void KernelImp::setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) {
+    uint64_t baseAddress = alloc->getGpuAddressToPatch();
+
+    // Remove misaligned bytes, accounted for in bufferOffset patch token
+    baseAddress &= this->surfaceStateAlignmentMask;
+    auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
+    auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
+    size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
+    // If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
+    Device *device = module->getDevice();
+    auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
+
+    auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
+    bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(getCrossThreadDataSpan(),
+                                                                     argInfo.bufferOffset, static_cast<uint32_t>(offset));
+    bool offsetedAddress = false;
+    if (false == offsetWasPatched) {
+        // fallback to handling offset in surface state
+        offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
+        baseAddress = reinterpret_cast<uintptr_t>(address);
+        bufferSizeForSsh -= offset;
+        DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
+    }
+
+    NEO::EncodeSurfaceStateArgs args;
+    void *surfaceStateAddress = nullptr;
+
+    if (NEO::isValidOffset(argInfo.bindful)) {
+        surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), argInfo.bindful);
+        args.inTemplateMemory = surfaceStateAddress;
+    } else if (NEO::isValidOffset(argInfo.bindless)) {
+        state.isBindlessOffsetSet[argIndex] = false;
+        state.usingSurfaceStateHeap[argIndex] = false;
+        if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
+            surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
+            state.isBindlessOffsetSet[argIndex] = true;
+        } else {
+            state.usingSurfaceStateHeap[argIndex] = true;
+            surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * this->renderSurfaceStateSize);
+        }
+    }
+    args.outMemory = surfaceStateAddress;
+
+    uint64_t bufferAddressForSsh = baseAddress;
+    bufferSizeForSsh += misalignedSize;
+    bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
+
+    bool l3Enabled = true;
+    // Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
+    // Most commonly this issue will occur with Host Point Allocations from customers.
+    l3Enabled = isL3Capable(*alloc);
+
+    NEO::Device *neoDevice = device->getNEODevice();
+
+    if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
+        l3Enabled = false;
+    }
+
+    if (l3Enabled == false) {
+        this->state.kernelRequiresQueueUncachedMocsCount++;
+    }
+    auto isDebuggerActive = neoDevice->getDebugger() != nullptr;
+
+    args.graphicsAddress = bufferAddressForSsh;
+    if (allocData && allocData->virtualReservationData) {
+        bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
+    }
+    args.size = bufferSizeForSsh;
+    args.mocs = device->getMOCS(l3Enabled, false);
+    args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
+    args.allocation = alloc;
+    args.gmmHelper = neoDevice->getGmmHelper();
+    args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
+    args.implicitScaling = device->isImplicitScalingCapable();
+    args.isDebuggerActive = isDebuggerActive;
+
+    device->getGfxCoreHelper().encodeBufferSurfaceState(args);
+}
+
 } // namespace L0
--- a/level_zero/core/source/kernel/kernel_imp.h
+++ b/level_zero/core/source/kernel/kernel_imp.h
@@ -169,7 +169,7 @@ struct KernelImp : Kernel {

    ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal);

-    virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0;
+    MOCKABLE_VIRTUAL void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc);

    void setInlineSamplers();

@@ -325,6 +325,7 @@ struct KernelImp : Kernel {
    NEO::GraphicsAllocation *printfBuffer = nullptr;
    uintptr_t surfaceStateAlignmentMask = 0;
    uintptr_t surfaceStateAlignment = 0;
+    size_t renderSurfaceStateSize = 0;

    uint32_t implicitArgsVersion = 0;
    uint32_t walkerInlineDataSize = 0;