diff --git a/opencl/test/unit_test/api/cl_get_device_info_tests.inl b/opencl/test/unit_test/api/cl_get_device_info_tests.inl index 25778919ee..93c74711d2 100644 --- a/opencl/test/unit_test/api/cl_get_device_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_device_info_tests.inl @@ -112,6 +112,43 @@ TEST_F(clGetDeviceInfoTests, givenOpenCLDeviceWhenAskedForSupportedSvmTypeThenCo EXPECT_EQ(svmCaps, expectedCaps); } +TEST(clGetDeviceGlobalMemSizeTests, givenDebugFlagForGlobalMemSizePercentWhenAskedForGlobalMemSizeThenAdjustedGlobalMemSizeIsReturned) { + DebugManagerStateRestore restorer; + DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(100u); + ulong globalMemSize100percent = 0u; + + auto hwInfo = *defaultHwInfo; + + auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); + + auto retVal = clGetDeviceInfo( + pDevice.get(), + CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(ulong), + &globalMemSize100percent, + nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_NE(globalMemSize100percent, 0u); + + DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(50u); + ulong globalMemSize50percent = 0u; + + hwInfo = *defaultHwInfo; + + pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); + + retVal = clGetDeviceInfo( + pDevice.get(), + CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(ulong), + &globalMemSize50percent, + nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_NE(globalMemSize50percent, 0u); + + EXPECT_EQ(globalMemSize100percent / 2u, globalMemSize50percent); +} + TEST(clGetDeviceFineGrainedTests, givenDebugFlagForFineGrainedOverrideWhenItIsUsedWithZeroThenNoFineGrainSupport) { DebugManagerStateRestore restorer; DebugManager.flags.ForceFineGrainedSVMSupport.set(0); diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 5712baae96..84d5911d1b 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -240,4 +240,5 @@ DeferOsContextInitialization = -1 DebuggerOptDisable = -1 AlignLocalMemoryVaTo2MB = -1 EngineInstancedSubDevices = 0 -OverrideTimestampPacketSize = -1 \ No newline at end of file +OverrideTimestampPacketSize = -1 +ClDeviceGlobalMemSizeAvailablePercent = -1 \ No newline at end of file diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index bd4759ef78..968a1a730b 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -25,6 +25,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, AUBDumpFilterKernelEndIdx, -1, "End index of ker DECLARE_DEBUG_VARIABLE(int32_t, AUBDumpToggleCaptureOnOff, 0, "Toggle AUB capture on/off") DECLARE_DEBUG_VARIABLE(int32_t, AubDumpOverrideMmioRegister, 0, "Override mmio offset from list with new value from AubDumpOverrideMmioRegisterValue") DECLARE_DEBUG_VARIABLE(int32_t, AubDumpOverrideMmioRegisterValue, 0, "Value to override mmio offset from AubDumpOverrideMmioRegister") +DECLARE_DEBUG_VARIABLE(int32_t, ClDeviceGlobalMemSizeAvailablePercent, -1, "Percent of total GPU memory available; CL_DEVICE_GLOBAL_MEM_SIZE") DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, -1, "Set command stream receiver to: 0 - HW, 1 - AUB, 2 - TBX, 3 - HW & AUB, 4 - TBX & AUB") DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server") DECLARE_DEBUG_VARIABLE(bool, TbxFrontdoorMode, false, "Set TBX frontdoor mode for read and write memory accesses (the default mode is via backdoor)") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 3a6df152a4..d17b623e0c 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -517,10 +517,19 @@ uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const { ? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield) : getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex()); globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1); - globalMemorySize = static_cast(static_cast(globalMemorySize) * 0.8); + double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable(); + globalMemorySize = static_cast(static_cast(globalMemorySize) * percentOfGlobalMemoryAvailable); + return globalMemorySize; } +double Device::getPercentOfGlobalMemoryAvailable() const { + if (DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() != -1) { + return 0.01 * static_cast(DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get()); + } + return 0.8; +} + NEO::SourceLevelDebugger *Device::getSourceLevelDebugger() { auto debugger = getDebugger(); if (debugger) { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index bb3c08e097..951a844feb 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -137,6 +137,7 @@ class Device : public ReferenceTrackedObject { MOCKABLE_VIRTUAL SubDevice *createSubDevice(uint32_t subDeviceIndex); MOCKABLE_VIRTUAL SubDevice *createEngineInstancedSubDevice(uint32_t subDeviceIndex, aub_stream::EngineType engineType); virtual uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const; + double getPercentOfGlobalMemoryAvailable() const; virtual void createBindlessHeapsHelper() {} bool createSubDevices(); bool createGenericSubDevices(); diff --git a/shared/source/device/device_caps.cpp b/shared/source/device/device_caps.cpp index 8f712971af..9cef7fea75 100644 --- a/shared/source/device/device_caps.cpp +++ b/shared/source/device/device_caps.cpp @@ -57,7 +57,8 @@ void Device::initializeCaps() { deviceInfo.maxMemAllocSize = getGlobalMemorySize(singleSubDeviceMask); // Allocation can be placed only on one SubDevice if (DebugManager.flags.Force32bitAddressing.get() || addressing32bitAllowed || is32bit) { - deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, static_cast(4 * GB * 0.8)); + double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable(); + deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, static_cast(4 * GB * percentOfGlobalMemoryAvailable)); deviceInfo.addressBits = 32; deviceInfo.force32BitAddressess = is64bit; }