Added scratch size check

Related-To: NEO-7508 Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
2026-01-05 09:09:04 +08:00 · 2022-11-17 14:27:52 +00:00
parent fd691cd33c
commit 90034d4173
14 changed files with 183 additions and 7 deletions
--- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp
+++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp
@@ -158,7 +158,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
                                                                  bool &scratchSurfaceDirty,
                                                                  bool &vfeStateDirty) {
    uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64);
-    size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeAlignedUp * computeUnitsUsedForScratch;
+    size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
    scratchSurfaceDirty = false;
    auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
    if (scratchSizeBytes < requiredScratchSizeInBytes) {
@@ -174,7 +174,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
    }
    if (privateScratchSpaceSupported) {
        uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64);
-        size_t requiredPrivateScratchSizeInBytes = requiredPerThreadPrivateScratchSizeAlignedUp * computeUnitsUsedForScratch;
+        size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
        if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
            if (privateScratchAllocation) {
                privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
--- a/shared/source/helpers/hw_helper.h
+++ b/shared/source/helpers/hw_helper.h
@@ -136,6 +136,7 @@ class HwHelper {
    virtual size_t getSamplerStateSize() const = 0;
    virtual bool preferInternalBcsEngine() const = 0;
    virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
+    virtual uint32_t getMaxScratchSize() const = 0;
    virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
    virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
    virtual size_t getMax3dImageWidthOrHeight() const = 0;
@@ -354,6 +355,7 @@ class HwHelperHw : public HwHelper {
    void adjustPreemptionSurfaceSize(size_t &csrSize) const override;

    bool isScratchSpaceSurfaceStateAccessible() const override;
+    uint32_t getMaxScratchSize() const override;
    bool preferInternalBcsEngine() const override;
    size_t getMax3dImageWidthOrHeight() const override;
    uint64_t getMaxMemAllocSize() const override;
--- a/shared/source/helpers/hw_helper_bdw_and_later.inl
+++ b/shared/source/helpers/hw_helper_bdw_and_later.inl
@@ -145,6 +145,11 @@ bool HwHelperHw<GfxFamily>::isScratchSpaceSurfaceStateAccessible() const {
    return false;
 }

+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getMaxScratchSize() const {
+    return 2 * MB;
+}
+
 template <typename GfxFamily>
 inline bool HwHelperHw<GfxFamily>::platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const {
    return false;
--- a/shared/source/helpers/hw_helper_xehp_and_later.inl
+++ b/shared/source/helpers/hw_helper_xehp_and_later.inl
@@ -202,6 +202,10 @@ template <typename GfxFamily>
 bool HwHelperHw<GfxFamily>::isScratchSpaceSurfaceStateAccessible() const {
    return true;
 }
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getMaxScratchSize() const {
+    return 256 * KB;
+}

 template <typename GfxFamily>
 inline bool HwHelperHw<GfxFamily>::platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const {
--- a/shared/source/helpers/kernel_helpers.cpp
+++ b/shared/source/helpers/kernel_helpers.cpp
@@ -11,6 +11,7 @@
 #include "shared/source/device/device.h"
 #include "shared/source/helpers/basic_math.h"
 #include "shared/source/helpers/debug_helpers.h"
+#include "shared/source/helpers/hw_helper.h"

 #include <algorithm>

@@ -48,6 +49,11 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
 }

 KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device) {
+    auto &coreHelper = device->getRootDeviceEnvironment().getHelper<NEO::CoreHelper>();
+    uint32_t maxScratchSize = coreHelper.getMaxScratchSize();
+    if ((attributes.perThreadScratchSize[0] > maxScratchSize) || (attributes.perThreadScratchSize[1] > maxScratchSize)) {
+        return KernelHelper::ErrorCode::INVALID_KERNEL;
+    }
    auto globalMemorySize = device->getDeviceInfo().globalMemSize;
    uint32_t sizes[] = {attributes.perHwThreadPrivateMemorySize,
                        attributes.perThreadScratchSize[0],
--- a/shared/source/helpers/kernel_helpers.h
+++ b/shared/source/helpers/kernel_helpers.h
@@ -18,7 +18,8 @@ class Device;
 struct KernelHelper {
    enum class ErrorCode {
        SUCCESS = 0,
-        OUT_OF_DEVICE_MEMORY = 1
+        OUT_OF_DEVICE_MEMORY = 1,
+        INVALID_KERNEL = 2
    };
    static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
                                         uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,