diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 6e500a82c3..f9079b27cd 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -260,8 +260,12 @@ cl_int Kernel::initialize() { if (privateSurfaceSize) { privateSurfaceSize *= device.getDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); DEBUG_BREAK_IF(privateSurfaceSize == 0); + if ((is32Bit() || device.getMemoryManager()->peekForce32BitAllocations()) && (privateSurfaceSize > std::numeric_limits::max())) { + retVal = CL_OUT_OF_RESOURCES; + break; + } - privateSurface = device.getMemoryManager()->createGraphicsAllocationWithRequiredBitness(privateSurfaceSize, nullptr); + privateSurface = device.getMemoryManager()->createGraphicsAllocationWithRequiredBitness(static_cast(privateSurfaceSize), nullptr); if (privateSurface == nullptr) { retVal = CL_OUT_OF_RESOURCES; break; diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 098e59cecd..60720cbe5a 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -481,7 +481,7 @@ class Kernel : public BaseObject<_cl_kernel> { uint32_t crossThreadDataSize; GraphicsAllocation *privateSurface; - uint32_t privateSurfaceSize; + uint64_t privateSurfaceSize; GraphicsAllocation *kernelReflectionSurface; diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index d9ce885985..0e48825e4a 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -741,6 +741,60 @@ TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamGetConstantBuffe delete pKernelInfo; } +TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) { + auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); + pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); + auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); + executionEnvironment->CompiledSIMD32 = 32; + std::unique_ptr pKernelInfo(KernelInfo::create()); + pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); + MockContext context; + MockProgram program(&context, false); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); + pKernelInfo->gpuPointerSize = 4; + pDevice->getMemoryManager()->setForce32BitAllocations(false); + if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) + pDevice->getDeviceInfoToModify()->computeUnitsUsedForScratch = 120; + EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); +} + +TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { + auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); + pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); + auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); + executionEnvironment->CompiledSIMD32 = 32; + std::unique_ptr pKernelInfo(KernelInfo::create()); + pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); + MockContext context; + MockProgram program(&context, false); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); + pKernelInfo->gpuPointerSize = 4; + pDevice->getMemoryManager()->setForce32BitAllocations(true); + if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) + pDevice->getDeviceInfoToModify()->computeUnitsUsedForScratch = 120; + EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); +} + +TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { + auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); + pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); + auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); + executionEnvironment->CompiledSIMD32 = 32; + std::unique_ptr pKernelInfo(KernelInfo::create()); + pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); + MockContext context; + MockProgram program(&context, false); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pDevice)); + pKernelInfo->gpuPointerSize = 8; + pDevice->getMemoryManager()->setForce32BitAllocations(true); + if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) + pDevice->getDeviceInfoToModify()->computeUnitsUsedForScratch = 120; + EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); +} + TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { // define kernel info