Return error code when there is no space for scratch/private

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2022-10-28 13:58:31 +02:00
committed by Compute-Runtime-Automation
parent 1131a6a4c1
commit ff01b9361e
10 changed files with 90 additions and 6 deletions

View File

@ -802,7 +802,10 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
auto &hwInfo = neoDevice->getHardwareInfo();
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto &kernelDescriptor = kernelImmData->getDescriptor();
auto ret = NEO::KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(kernelDescriptor.kernelAttributes, neoDevice);
if (ret == NEO::KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
@ -1074,5 +1077,5 @@ ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint
threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
return ZE_RESULT_SUCCESS;
} // namespace L0
}
} // namespace L0

View File

@ -93,6 +93,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
};
struct MockModule : public L0::ModuleImp {
using ModuleImp::allocatePrivateMemoryPerDispatch;
using ModuleImp::getKernelImmutableDataVector;
using ModuleImp::kernelImmDatas;
using ModuleImp::maxGroupSize;

View File

@ -62,6 +62,22 @@ TEST_F(KernelInitTest, givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHa
EXPECT_EQ(mockKernelImmData->getDescriptor().payloadMappings.explicitArgs[0].type, NEO::ArgDescriptor::ArgTUnknown);
}
TEST_F(KernelInitTest, givenKernelToInitWhenItHasTooBigPrivateSizeThenOutOfMemoryIsRetutned) {
auto globalSize = device->getNEODevice()->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(device->getNEODevice()->getDeviceBitfield().to_ulong()));
uint32_t perHwThreadPrivateMemorySizeRequested = (static_cast<uint32_t>((globalSize + device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch) / device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch)) + 100;
std::unique_ptr<MockImmutableData> mockKernelImmData =
std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
mockKernelImmData->resizeExplicitArgs(1);
EXPECT_EQ(kernel->initialize(&desc), ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
using KernelBaseAddressTests = Test<ModuleImmutableDataFixture>;
TEST_F(KernelBaseAddressTests, whenQueryingKernelBaseAddressThenCorrectAddressIsReturned) {
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
@ -774,7 +790,7 @@ TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenCont
EXPECT_EQ(sizeContainerWithoutPrivateMemory + 1u, sizeContainerWithPrivateMemory);
}
TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated) {
TEST_F(KernelImmutableDataTests, givenModuleWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated) {
auto zebinData = std::make_unique<ZebinTestData::ZebinWithL0TestCommonModule>(device->getHwInfo());
const auto &src = zebinData->storage;
ze_module_desc_t moduleDesc = {};
@ -784,7 +800,7 @@ TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMem
ModuleBuildLog *moduleBuildLog = nullptr;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
uint32_t perHwThreadPrivateMemorySizeRequested = std::numeric_limits<uint32_t>::max();
uint32_t perHwThreadPrivateMemorySizeRequested = 0x1000;
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
moduleBuildLog,
@ -792,6 +808,7 @@ TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMem
perHwThreadPrivateMemorySizeRequested,
mockKernelImmData.get());
result = module->initialize(&moduleDesc, device->getNEODevice());
module->allocatePrivateMemoryPerDispatch = true;
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_TRUE(module->shouldAllocatePrivateMemoryPerDispatch());

View File

@ -157,6 +157,10 @@ cl_int Kernel::initialize() {
pImplicitArgs->structVersion = 0;
pImplicitArgs->simdWidth = maxSimdSize;
}
auto ret = KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(kernelDescriptor.kernelAttributes, &pClDevice->getDevice());
if (ret == NEO::KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY) {
return CL_OUT_OF_RESOURCES;
}
crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;

View File

@ -782,7 +782,8 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
pDevice->getMemoryManager()->setForce32BitAllocations(false);
if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0)
pDevice->deviceInfo.computeUnitsUsedForScratch = 120;
EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->initialize());
kernel->initialize();
EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->patchPrivateSurface());
}
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) {

View File

@ -122,6 +122,7 @@ class MockKernel : public Kernel {
using Kernel::numberOfBindingTableStates;
using Kernel::parentEventOffset;
using Kernel::patchBufferOffset;
using Kernel::patchPrivateSurface;
using Kernel::patchWithImplicitSurface;
using Kernel::pImplicitArgs;
using Kernel::preferredWkgMultipleOffset;

View File

@ -457,6 +457,7 @@ HWTEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreat
mock->ioctl_expected.gemClose = 2;
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.execbuffer2 = 0;
mock->ioctl_expected.contextGetParam = 5;
// builtins kernels
mock->ioctl_expected.gemUserptr += 5;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -8,6 +8,7 @@
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
@ -46,4 +47,17 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
return maxWorkGroupsCount;
}
KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device) {
auto globalMemorySize = device->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(device->getDeviceBitfield().to_ulong()));
uint32_t sizes[] = {attributes.perHwThreadPrivateMemorySize,
attributes.perThreadScratchSize[0],
attributes.perThreadScratchSize[1]};
for (auto &size : sizes) {
if (size != 0 && static_cast<uint64_t>(device->getDeviceInfo().computeUnitsUsedForScratch) * static_cast<uint64_t>(size) > globalMemorySize) {
return KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY;
}
}
return KernelHelper::ErrorCode::SUCCESS;
}
} // namespace NEO

View File

@ -7,12 +7,19 @@
#pragma once
#include "shared/source/kernel/kernel_descriptor.h"
#include <cstddef>
#include <cstdint>
namespace NEO {
class Device;
struct KernelHelper {
enum class ErrorCode {
SUCCESS = 0,
OUT_OF_DEVICE_MEMORY = 1
};
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize);
@ -20,6 +27,7 @@ struct KernelHelper {
static uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
return perHwThreadPrivateMemorySize * computeUnitsUsedForScratch;
}
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
};
} // namespace NEO

View File

@ -8,6 +8,7 @@
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/test.h"
@ -79,3 +80,36 @@ TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkG
usedSlm = availableSlm;
EXPECT_EQ(1u, getMaxWorkGroupCount());
}
using KernelHelperTest = Test<DeviceFixture>;
TEST_F(KernelHelperTest, GivenStatelessPrivateSizeGreaterThanGlobalSizeWhenCheckingIfThereIsEnaughSpaceThenOutOfMemReturned) {
auto globalSize = pDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(pDevice->getDeviceBitfield().to_ulong()));
KernelDescriptor::KernelAttributes attributes = {};
attributes.perHwThreadPrivateMemorySize = (static_cast<uint32_t>((globalSize + pDevice->getDeviceInfo().computeUnitsUsedForScratch) / pDevice->getDeviceInfo().computeUnitsUsedForScratch)) + 100;
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY);
}
TEST_F(KernelHelperTest, GivenScratchSizeGreaterThanGlobalSizeWhenCheckingIfThereIsEnaughSpaceThenOutOfMemReturned) {
auto globalSize = pDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(pDevice->getDeviceBitfield().to_ulong()));
KernelDescriptor::KernelAttributes attributes = {};
attributes.perThreadScratchSize[0] = (static_cast<uint32_t>((globalSize + pDevice->getDeviceInfo().computeUnitsUsedForScratch) / pDevice->getDeviceInfo().computeUnitsUsedForScratch)) + 100;
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY);
}
TEST_F(KernelHelperTest, GivenScratchPrivateSizeGreaterThanGlobalSizeWhenCheckingIfThereIsEnaughSpaceThenOutOfMemReturned) {
auto globalSize = pDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(pDevice->getDeviceBitfield().to_ulong()));
KernelDescriptor::KernelAttributes attributes = {};
attributes.perThreadScratchSize[1] = (static_cast<uint32_t>((globalSize + pDevice->getDeviceInfo().computeUnitsUsedForScratch) / pDevice->getDeviceInfo().computeUnitsUsedForScratch)) + 100;
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY);
}
TEST_F(KernelHelperTest, GivenScratchAndPrivateSizeLessThanGlobalSizeWhenCheckingIfThereIsEnaughSpaceThenSuccessReturned) {
auto globalSize = pDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(pDevice->getDeviceBitfield().to_ulong()));
KernelDescriptor::KernelAttributes attributes = {};
auto size = (static_cast<uint32_t>((globalSize + pDevice->getDeviceInfo().computeUnitsUsedForScratch) / pDevice->getDeviceInfo().computeUnitsUsedForScratch)) - 100;
attributes.perHwThreadPrivateMemorySize = size;
attributes.perThreadScratchSize[0] = size;
attributes.perThreadScratchSize[1] = size;
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::SUCCESS);
}