mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Improve returning global and max alloc memory size
Change-Id: I2f22481412184f01652b7e49bc30a57c56e6204b Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
b6d21cc636
commit
d35806fdd8
@ -74,7 +74,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
|
||||
void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;
|
||||
|
||||
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
|
||||
|
||||
void turnOnFakingBigAllocations();
|
||||
|
||||
|
@ -41,7 +41,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
|
||||
uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
|
||||
return 0 * GB;
|
||||
}
|
||||
|
||||
|
@ -473,7 +473,7 @@ TEST_F(DeviceGetCapsTest, givenDeviceCapsWhenLocalMemoryIsEnabledThenCalculateGl
|
||||
auto enabledOcl21Features = device->areOcl21FeaturesEnabled();
|
||||
bool addressing32Bit = is32bit || (is64bit && (enabledOcl21Features == false)) || DebugManager.flags.Force32bitAddressing.get();
|
||||
|
||||
auto localMem = pMemManager->getLocalMemorySize(0u);
|
||||
auto localMem = pMemManager->getLocalMemorySize(0u, static_cast<uint32_t>(device->getDeviceBitfield().to_ulong()));
|
||||
auto maxAppAddrSpace = pMemManager->getMaxApplicationAddress() + 1;
|
||||
auto memSize = std::min(localMem, maxAppAddrSpace);
|
||||
memSize = static_cast<cl_ulong>(memSize * 0.8);
|
||||
|
@ -255,11 +255,15 @@ TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenGettingGlobalMemorySizeThe
|
||||
const uint32_t numSubDevices = 2u;
|
||||
UltDeviceFactory deviceFactory{1, numSubDevices};
|
||||
|
||||
auto totalGlobalMemorySize = deviceFactory.rootDevices[0]->getGlobalMemorySize();
|
||||
auto rootDevice = deviceFactory.rootDevices[0];
|
||||
|
||||
auto totalGlobalMemorySize = rootDevice->getGlobalMemorySize(static_cast<uint32_t>(rootDevice->getDeviceBitfield().to_ulong()));
|
||||
auto expectedGlobalMemorySize = totalGlobalMemorySize / numSubDevices;
|
||||
|
||||
for (const auto &subDevice : deviceFactory.subDevices) {
|
||||
EXPECT_EQ(expectedGlobalMemorySize, static_cast<MockSubDevice *>(subDevice)->getGlobalMemorySize());
|
||||
auto mockSubDevice = static_cast<MockSubDevice *>(subDevice);
|
||||
auto subDeviceBitfield = static_cast<uint32_t>(mockSubDevice->getDeviceBitfield().to_ulong());
|
||||
EXPECT_EQ(expectedGlobalMemorySize, mockSubDevice->getGlobalMemorySize(subDeviceBitfield));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -89,5 +89,5 @@ TEST(MemoryManagerTest, givenOsAgnosticMemoryManagerWhenGetLocalMemoryIsCalledTh
|
||||
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
||||
MockMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
|
||||
EXPECT_EQ(0 * GB, memoryManager.getLocalMemorySize(0u));
|
||||
EXPECT_EQ(0 * GB, memoryManager.getLocalMemorySize(0u, 0xF));
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThen
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex));
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex, 0xF));
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMakeAllBuffersResidentSetWhenFlushThenDrmMemoryOperationHandlerIsLocked) {
|
||||
|
@ -105,7 +105,7 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThen
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex));
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex, 0xF));
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
@ -1204,7 +1204,7 @@ TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCal
|
||||
|
||||
auto memoryInfo = static_cast<MemoryInfoImpl *>(drm->getMemoryInfo());
|
||||
ASSERT_NE(nullptr, memoryInfo);
|
||||
EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0), memoryManager.getLocalMemorySize(0u));
|
||||
EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0), memoryManager.getLocalMemorySize(0u, 0xF));
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledButMemoryInfoIsNotAvailableThenSizeZeroIsReturned) {
|
||||
@ -1214,7 +1214,7 @@ TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCal
|
||||
executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drm);
|
||||
TestedDrmMemoryManager memoryManager(executionEnvironment);
|
||||
|
||||
EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u));
|
||||
EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u, 0xF));
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerLocalMemoryTest, givenGraphicsAllocationInDevicePoolIsAllocatedForImage1DWhenTheSizeReturnedFromGmmIsUnalignedThenCreateBufferObjectWithSizeAlignedTo64KB) {
|
||||
|
@ -8,5 +8,5 @@
|
||||
#include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_allocate_in_device_pool_tests.inl"
|
||||
|
||||
TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(0u));
|
||||
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(0u, 0xF));
|
||||
}
|
||||
|
@ -253,10 +253,9 @@ GmmClientContext *Device::getGmmClientContext() const {
|
||||
return getGmmHelper()->getClientContext();
|
||||
}
|
||||
|
||||
uint64_t Device::getGlobalMemorySize() const {
|
||||
|
||||
uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const {
|
||||
auto globalMemorySize = getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())
|
||||
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex())
|
||||
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield)
|
||||
: getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex());
|
||||
globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1);
|
||||
globalMemorySize = static_cast<uint64_t>(static_cast<double>(globalMemorySize) * 0.8);
|
||||
|
@ -115,7 +115,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
virtual bool createEngines();
|
||||
bool createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsage);
|
||||
MOCKABLE_VIRTUAL std::unique_ptr<CommandStreamReceiver> createCommandStreamReceiver() const;
|
||||
virtual uint64_t getGlobalMemorySize() const;
|
||||
virtual uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
|
||||
|
||||
DeviceInfo deviceInfo = {};
|
||||
|
||||
|
@ -50,7 +50,11 @@ void Device::initializeCaps() {
|
||||
|
||||
deviceInfo.globalMemCachelineSize = 64;
|
||||
|
||||
deviceInfo.globalMemSize = getGlobalMemorySize();
|
||||
uint32_t allSubDevicesMask = static_cast<uint32_t>(getDeviceBitfield().to_ulong());
|
||||
constexpr uint32_t singleSubDeviceMask = 1;
|
||||
|
||||
deviceInfo.globalMemSize = getGlobalMemorySize(allSubDevicesMask);
|
||||
deviceInfo.maxMemAllocSize = getGlobalMemorySize(singleSubDeviceMask); // Allocation can be placed only on one SubDevice
|
||||
|
||||
if (DebugManager.flags.Force32bitAddressing.get() || addressing32bitAllowed || is32bit) {
|
||||
deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, static_cast<uint64_t>(4 * GB * 0.8));
|
||||
@ -59,14 +63,15 @@ void Device::initializeCaps() {
|
||||
}
|
||||
|
||||
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
|
||||
deviceInfo.maxMemAllocSize = std::min(deviceInfo.globalMemSize, deviceInfo.maxMemAllocSize); // if globalMemSize was reduced for 32b
|
||||
|
||||
// OpenCL 1.2 requires 128MB minimum
|
||||
deviceInfo.maxMemAllocSize = std::min(std::max(deviceInfo.maxMemAllocSize / 2, static_cast<uint64_t>(128llu * MB)), this->hardwareCapabilities.maxMemAllocSize);
|
||||
|
||||
deviceInfo.profilingTimerResolution = getProfilingTimerResolution();
|
||||
deviceInfo.outProfilingTimerResolution = static_cast<size_t>(deviceInfo.profilingTimerResolution);
|
||||
|
||||
// OpenCL 1.2 requires 128MB minimum
|
||||
deviceInfo.maxMemAllocSize = std::min(std::max(deviceInfo.globalMemSize / 2, static_cast<uint64_t>(128llu * MB)), this->hardwareCapabilities.maxMemAllocSize);
|
||||
|
||||
static const int maxPixelSize = 16;
|
||||
constexpr uint64_t maxPixelSize = 16;
|
||||
deviceInfo.imageMaxBufferSize = static_cast<size_t>(deviceInfo.maxMemAllocSize / maxPixelSize);
|
||||
|
||||
deviceInfo.maxNumEUsPerSubSlice = 0;
|
||||
|
@ -47,8 +47,8 @@ Device *SubDevice::getParentDevice() const {
|
||||
return &rootDevice;
|
||||
}
|
||||
|
||||
uint64_t SubDevice::getGlobalMemorySize() const {
|
||||
auto globalMemorySize = Device::getGlobalMemorySize();
|
||||
uint64_t SubDevice::getGlobalMemorySize(uint32_t deviceBitfield) const {
|
||||
auto globalMemorySize = Device::getGlobalMemorySize(static_cast<uint32_t>(maxNBitValue(rootDevice.getNumSubDevices())));
|
||||
return globalMemorySize / rootDevice.getNumAvailableDevices();
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ class SubDevice : public Device {
|
||||
|
||||
protected:
|
||||
DeviceBitfield getDeviceBitfield() const override;
|
||||
uint64_t getGlobalMemorySize() const override;
|
||||
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const override;
|
||||
const uint32_t subDeviceIndex;
|
||||
RootDevice &rootDevice;
|
||||
};
|
||||
|
@ -114,7 +114,7 @@ class MemoryManager {
|
||||
void checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation);
|
||||
|
||||
virtual uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) = 0;
|
||||
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) = 0;
|
||||
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) = 0;
|
||||
|
||||
uint64_t getMaxApplicationAddress() { return is64bit ? MemoryConstants::max64BitAppAddress : MemoryConstants::max32BitAppAddress; };
|
||||
MOCKABLE_VIRTUAL uint64_t getInternalHeapBaseAddress(uint32_t rootDeviceIndex, bool useLocalMemory) { return getGfxPartition(rootDeviceIndex)->getHeapBase(selectInternalHeap(useLocalMemory)); }
|
||||
|
@ -40,7 +40,7 @@ class DrmMemoryManager : public MemoryManager {
|
||||
GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { return nullptr; }
|
||||
|
||||
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
|
||||
|
||||
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;
|
||||
void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;
|
||||
|
@ -41,7 +41,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
|
||||
}
|
||||
|
||||
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
|
||||
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
|
||||
return 0 * GB;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -295,7 +295,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
|
||||
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
|
||||
auto memoryInfo = static_cast<MemoryInfoImpl *>(getDrm(rootDeviceIndex).getMemoryInfo());
|
||||
if (!memoryInfo) {
|
||||
return 0;
|
||||
|
@ -48,7 +48,7 @@ class WddmMemoryManager : public MemoryManager {
|
||||
void obtainGpuAddressFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage);
|
||||
|
||||
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
|
||||
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
|
||||
|
||||
bool tryDeferDeletions(const D3DKMT_HANDLE *handles, uint32_t allocationCount, D3DKMT_HANDLE resourceHandle, uint32_t rootDeviceIndex);
|
||||
|
||||
|
@ -23,7 +23,7 @@ bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const v
|
||||
return mapGpuVaForOneHandleAllocation(allocation, requiredPtr);
|
||||
}
|
||||
|
||||
uint64_t WddmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
|
||||
uint64_t WddmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
|
||||
return 0 * GB;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -28,6 +28,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
struct MockSubDevice : public SubDevice {
|
||||
using SubDevice::getDeviceBitfield;
|
||||
using SubDevice::getGlobalMemorySize;
|
||||
using SubDevice::SubDevice;
|
||||
|
||||
|
Reference in New Issue
Block a user