Added scratch size check

Related-To: NEO-7508
Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
This commit is contained in:
Andrzej Koska
2022-11-17 14:27:52 +00:00
committed by Compute-Runtime-Automation
parent fd691cd33c
commit 90034d4173
14 changed files with 183 additions and 7 deletions

View File

@@ -158,7 +158,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
bool &scratchSurfaceDirty,
bool &vfeStateDirty) {
uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64);
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeAlignedUp * computeUnitsUsedForScratch;
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
scratchSurfaceDirty = false;
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
if (scratchSizeBytes < requiredScratchSizeInBytes) {
@@ -174,7 +174,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
}
if (privateScratchSpaceSupported) {
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64);
size_t requiredPrivateScratchSizeInBytes = requiredPerThreadPrivateScratchSizeAlignedUp * computeUnitsUsedForScratch;
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
if (privateScratchAllocation) {
privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());

View File

@@ -136,6 +136,7 @@ class HwHelper {
virtual size_t getSamplerStateSize() const = 0;
virtual bool preferInternalBcsEngine() const = 0;
virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
virtual uint32_t getMaxScratchSize() const = 0;
virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
virtual size_t getMax3dImageWidthOrHeight() const = 0;
@@ -354,6 +355,7 @@ class HwHelperHw : public HwHelper {
void adjustPreemptionSurfaceSize(size_t &csrSize) const override;
bool isScratchSpaceSurfaceStateAccessible() const override;
uint32_t getMaxScratchSize() const override;
bool preferInternalBcsEngine() const override;
size_t getMax3dImageWidthOrHeight() const override;
uint64_t getMaxMemAllocSize() const override;

View File

@@ -145,6 +145,11 @@ bool HwHelperHw<GfxFamily>::isScratchSpaceSurfaceStateAccessible() const {
return false;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMaxScratchSize() const {
return 2 * MB;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const {
return false;

View File

@@ -202,6 +202,10 @@ template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isScratchSpaceSurfaceStateAccessible() const {
return true;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMaxScratchSize() const {
return 256 * KB;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const {

View File

@@ -11,6 +11,7 @@
#include "shared/source/device/device.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hw_helper.h"
#include <algorithm>
@@ -48,6 +49,11 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
}
KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device) {
auto &coreHelper = device->getRootDeviceEnvironment().getHelper<NEO::CoreHelper>();
uint32_t maxScratchSize = coreHelper.getMaxScratchSize();
if ((attributes.perThreadScratchSize[0] > maxScratchSize) || (attributes.perThreadScratchSize[1] > maxScratchSize)) {
return KernelHelper::ErrorCode::INVALID_KERNEL;
}
auto globalMemorySize = device->getDeviceInfo().globalMemSize;
uint32_t sizes[] = {attributes.perHwThreadPrivateMemorySize,
attributes.perThreadScratchSize[0],

View File

@@ -18,7 +18,8 @@ class Device;
struct KernelHelper {
enum class ErrorCode {
SUCCESS = 0,
OUT_OF_DEVICE_MEMORY = 1
OUT_OF_DEVICE_MEMORY = 1,
INVALID_KERNEL = 2
};
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,