diff --git a/public/cl_ext_private.h b/public/cl_ext_private.h index 56ed39f9a8..b08461a43c 100644 --- a/public/cl_ext_private.h +++ b/public/cl_ext_private.h @@ -64,6 +64,9 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield; #define CL_MEM_ALLOCATION_HANDLE_INTEL 0x10050 +//Used with createBuffer +#define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23) + /****************************** * UNIFIED MEMORY * *******************************/ diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index e09533013c..053c127386 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -89,14 +89,17 @@ void Buffer::validateInputAndCreateBuffer(cl_context &context, return; } - auto pDevice = pContext->getDevice(0); - if (size == 0 || size > pDevice->getHardwareCapabilities().maxMemAllocSize) { - retVal = CL_INVALID_BUFFER_SIZE; + if (!MemObjHelper::validateMemoryPropertiesForBuffer(properties)) { + retVal = CL_INVALID_VALUE; return; } - if (!MemObjHelper::validateMemoryPropertiesForBuffer(properties)) { - retVal = CL_INVALID_VALUE; + auto pDevice = pContext->getDevice(0); + bool allowCreateBuffersWithUnrestrictedSize = isValueSet(properties.flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || + isValueSet(properties.flags_intel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL); + + if (size == 0 || (size > pDevice->getHardwareCapabilities().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) { + retVal = CL_INVALID_BUFFER_SIZE; return; } diff --git a/runtime/mem_obj/mem_obj_helper.h b/runtime/mem_obj/mem_obj_helper.h index 8a571806ad..a4928e3aa8 100644 --- a/runtime/mem_obj/mem_obj_helper.h +++ b/runtime/mem_obj/mem_obj_helper.h @@ -138,6 +138,7 @@ class MemObjHelper { static bool checkUsedFlagsForBuffer(const MemoryProperties &properties) { MemoryProperties acceptedProperties; addCommonMemoryProperties(acceptedProperties); + addBufferMemoryProperties(acceptedProperties); addExtraMemoryProperties(acceptedProperties); return (isFieldValid(properties.flags, acceptedProperties.flags) && @@ -166,6 +167,11 @@ class MemObjHelper { properties.flags |= CL_MEM_NO_ACCESS_INTEL | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; } + static inline void addBufferMemoryProperties(MemoryProperties &properties) { + properties.flags |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; + properties.flags_intel |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; + } + static void addExtraMemoryProperties(MemoryProperties &properties); static bool validateExtraMemoryProperties(const MemoryProperties &properties); }; diff --git a/runtime/program/compile.cpp b/runtime/program/compile.cpp index 39c34765f5..0dc4b7a228 100644 --- a/runtime/program/compile.cpp +++ b/runtime/program/compile.cpp @@ -72,14 +72,16 @@ cl_int Program::compile( buildStatus = CL_BUILD_IN_PROGRESS; options = (buildOptions != nullptr) ? buildOptions : ""; - std::string reraStr = "-cl-intel-gtpin-rera"; - size_t pos = options.find(reraStr); - if (pos != std::string::npos) { - // compile option "-cl-intel-gtpin-rera" is present, move it to internalOptions - size_t reraLen = reraStr.length(); - options.erase(pos, reraLen); - internalOptions.append(reraStr); - internalOptions.append(" "); + + const std::vector optionsToExtract{"-cl-intel-gtpin-rera", "-cl-intel-greater-than-4GB-buffer-required"}; + + for (const auto &optionString : optionsToExtract) { + size_t pos = options.find(optionString); + if (pos != std::string::npos) { + options.erase(pos, optionString.length()); + internalOptions.append(optionString); + internalOptions.append(" "); + } } // create ELF writer to process all sources to be compiled diff --git a/runtime/program/internal_options.cpp b/runtime/program/internal_options.cpp index a312186e25..01d899585d 100644 --- a/runtime/program/internal_options.cpp +++ b/runtime/program/internal_options.cpp @@ -10,5 +10,5 @@ #include namespace NEO { -const std::vector Program::internalOptionsToExtract = {"-cl-intel-gtpin-rera"}; +const std::vector Program::internalOptionsToExtract = {"-cl-intel-gtpin-rera", "-cl-intel-greater-than-4GB-buffer-required"}; }; diff --git a/unit_tests/api/cl_create_buffer_tests.cpp b/unit_tests/api/cl_create_buffer_tests.cpp index 1c9623405f..8518cbedee 100644 --- a/unit_tests/api/cl_create_buffer_tests.cpp +++ b/unit_tests/api/cl_create_buffer_tests.cpp @@ -226,6 +226,47 @@ TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeWhenCreateBufferWi EXPECT_EQ(nullptr, buffer); } +TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferThenClSuccessIsReturned) { + auto pDevice = pContext->getDevice(0); + uint64_t bigSize = GB * 5; + size_t size = static_cast(bigSize); + cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; + auto memoryManager = static_cast(pDevice->getMemoryManager()); + memoryManager->turnOnFakingBigAllocations(); + + if (memoryManager->peekForce32BitAllocations() || is32bit) { + GTEST_SKIP(); + } + + auto buffer = clCreateBuffer(pContext, flags, size, nullptr, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, buffer); + + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(CL_SUCCESS, retVal); +} + +TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferWithPropertiesINTELThenClSuccesssIsReturned) { + auto pDevice = pContext->getDevice(0); + uint64_t bigSize = GB * 5; + size_t size = static_cast(bigSize); + cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, 0}; + + auto memoryManager = static_cast(pDevice->getMemoryManager()); + memoryManager->turnOnFakingBigAllocations(); + + if (memoryManager->peekForce32BitAllocations() || is32bit) { + GTEST_SKIP(); + } + + auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, size, nullptr, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, buffer); + + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(CL_SUCCESS, retVal); +} + TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemCopyHostPtrFlagWhenCreatingBufferThenNullIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; diff --git a/unit_tests/program/program_tests.cpp b/unit_tests/program/program_tests.cpp index 8518b43272..de6c18eaf0 100644 --- a/unit_tests/program/program_tests.cpp +++ b/unit_tests/program/program_tests.cpp @@ -1014,7 +1014,7 @@ TEST_P(ProgramFromSourceTest, CreateWithSource_Compile) { delete[](char *) pSourceBuffer; } -TEST_P(ProgramFromSourceTest, CompileProgramWithReraFlag) { +TEST_P(ProgramFromSourceTest, CompileProgramWithInternalFlags) { class MyCompilerInterface : public CompilerInterface { public: MyCompilerInterface() { buildOptions[0] = buildInternalOptions[0] = '\0'; }; @@ -1048,16 +1048,21 @@ TEST_P(ProgramFromSourceTest, CompileProgramWithReraFlag) { // Check default build options std::string s1; std::string s2; + std::string s3; cip->getBuildOptions(s1); size_t pos = s1.find("-cl-fast-relaxed-math"); EXPECT_EQ(pos, std::string::npos); cip->getBuildInternalOptions(s2); pos = s2.find("-cl-intel-gtpin-rera"); EXPECT_EQ(pos, std::string::npos); + cip->getBuildInternalOptions(s3); + pos = s3.find("-cl-intel-greater-than-4GB-buffer-required"); + EXPECT_EQ(pos, std::string::npos); - // Ask to build created program without "-cl-intel-gtpin-rera" flag. + // Ask to build created program without "-cl-intel-gtpin-rera" and "-cl-intel-greater-than-4GB-buffer-required" flags. s1.assign(""); s2.assign(""); + s3.assign(""); cl_int retVal = program->compile(0, nullptr, "-cl-fast-relaxed-math", 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); @@ -1068,11 +1073,16 @@ TEST_P(ProgramFromSourceTest, CompileProgramWithReraFlag) { cip->getBuildInternalOptions(s2); pos = s2.find("-cl-intel-gtpin-rera"); EXPECT_EQ(pos, std::string::npos); + cip->getBuildInternalOptions(s3); + pos = s3.find("-cl-intel-greater-than-4GB-buffer-required"); + EXPECT_EQ(pos, std::string::npos); - // Ask to build created program with "-cl-intel-gtpin-rera" flag. + // Ask to build created program with "-cl-intel-gtpin-rera" and "-cl-intel-greater-than-4GB-buffer-required" flags. s1.assign(""); s2.assign(""); - retVal = program->compile(0, nullptr, "-cl-intel-gtpin-rera -cl-finite-math-only", 0, nullptr, nullptr, nullptr, nullptr); + s3.assign(""); + retVal = program->compile(0, nullptr, "-cl-intel-greater-than-4GB-buffer-required -cl-intel-gtpin-rera -cl-finite-math-only", + 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied @@ -1084,6 +1094,9 @@ TEST_P(ProgramFromSourceTest, CompileProgramWithReraFlag) { cip->getBuildInternalOptions(s2); pos = s2.find("-cl-intel-gtpin-rera"); EXPECT_NE(pos, std::string::npos); + cip->getBuildInternalOptions(s3); + pos = s3.find("-cl-intel-greater-than-4GB-buffer-required"); + EXPECT_NE(pos, std::string::npos); } TEST_P(ProgramFromSourceTest, CreateWithSourceAdvanced) {