Improve returning global and max alloc memory size

Change-Id: I2f22481412184f01652b7e49bc30a57c56e6204b
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-10-30 10:27:48 +01:00
committed by sys_ocldev
parent b6d21cc636
commit d35806fdd8
20 changed files with 39 additions and 30 deletions

View File

@ -74,7 +74,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
void turnOnFakingBigAllocations();

View File

@ -41,7 +41,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(
return nullptr;
}
uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
return 0 * GB;
}

View File

@ -473,7 +473,7 @@ TEST_F(DeviceGetCapsTest, givenDeviceCapsWhenLocalMemoryIsEnabledThenCalculateGl
auto enabledOcl21Features = device->areOcl21FeaturesEnabled();
bool addressing32Bit = is32bit || (is64bit && (enabledOcl21Features == false)) || DebugManager.flags.Force32bitAddressing.get();
auto localMem = pMemManager->getLocalMemorySize(0u);
auto localMem = pMemManager->getLocalMemorySize(0u, static_cast<uint32_t>(device->getDeviceBitfield().to_ulong()));
auto maxAppAddrSpace = pMemManager->getMaxApplicationAddress() + 1;
auto memSize = std::min(localMem, maxAppAddrSpace);
memSize = static_cast<cl_ulong>(memSize * 0.8);

View File

@ -255,11 +255,15 @@ TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenGettingGlobalMemorySizeThe
const uint32_t numSubDevices = 2u;
UltDeviceFactory deviceFactory{1, numSubDevices};
auto totalGlobalMemorySize = deviceFactory.rootDevices[0]->getGlobalMemorySize();
auto rootDevice = deviceFactory.rootDevices[0];
auto totalGlobalMemorySize = rootDevice->getGlobalMemorySize(static_cast<uint32_t>(rootDevice->getDeviceBitfield().to_ulong()));
auto expectedGlobalMemorySize = totalGlobalMemorySize / numSubDevices;
for (const auto &subDevice : deviceFactory.subDevices) {
EXPECT_EQ(expectedGlobalMemorySize, static_cast<MockSubDevice *>(subDevice)->getGlobalMemorySize());
auto mockSubDevice = static_cast<MockSubDevice *>(subDevice);
auto subDeviceBitfield = static_cast<uint32_t>(mockSubDevice->getDeviceBitfield().to_ulong());
EXPECT_EQ(expectedGlobalMemorySize, mockSubDevice->getGlobalMemorySize(subDeviceBitfield));
}
}

View File

@ -89,5 +89,5 @@ TEST(MemoryManagerTest, givenOsAgnosticMemoryManagerWhenGetLocalMemoryIsCalledTh
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
MockMemoryManager memoryManager(false, false, executionEnvironment);
EXPECT_EQ(0 * GB, memoryManager.getLocalMemorySize(0u));
EXPECT_EQ(0 * GB, memoryManager.getLocalMemorySize(0u, 0xF));
}

View File

@ -102,7 +102,7 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThen
}
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex));
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex, 0xF));
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMakeAllBuffersResidentSetWhenFlushThenDrmMemoryOperationHandlerIsLocked) {

View File

@ -105,7 +105,7 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThen
}
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex));
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex, 0xF));
}
namespace NEO {
@ -1204,7 +1204,7 @@ TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCal
auto memoryInfo = static_cast<MemoryInfoImpl *>(drm->getMemoryInfo());
ASSERT_NE(nullptr, memoryInfo);
EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0), memoryManager.getLocalMemorySize(0u));
EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0), memoryManager.getLocalMemorySize(0u, 0xF));
}
TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledButMemoryInfoIsNotAvailableThenSizeZeroIsReturned) {
@ -1214,7 +1214,7 @@ TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCal
executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drm);
TestedDrmMemoryManager memoryManager(executionEnvironment);
EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u));
EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u, 0xF));
}
TEST_F(DrmMemoryManagerLocalMemoryTest, givenGraphicsAllocationInDevicePoolIsAllocatedForImage1DWhenTheSizeReturnedFromGmmIsUnalignedThenCreateBufferObjectWithSizeAlignedTo64KB) {

View File

@ -8,5 +8,5 @@
#include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_allocate_in_device_pool_tests.inl"
TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) {
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(0u));
EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(0u, 0xF));
}

View File

@ -253,10 +253,9 @@ GmmClientContext *Device::getGmmClientContext() const {
return getGmmHelper()->getClientContext();
}
uint64_t Device::getGlobalMemorySize() const {
uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const {
auto globalMemorySize = getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex())
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield)
: getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex());
globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1);
globalMemorySize = static_cast<uint64_t>(static_cast<double>(globalMemorySize) * 0.8);

View File

@ -115,7 +115,7 @@ class Device : public ReferenceTrackedObject<Device> {
virtual bool createEngines();
bool createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsage);
MOCKABLE_VIRTUAL std::unique_ptr<CommandStreamReceiver> createCommandStreamReceiver() const;
virtual uint64_t getGlobalMemorySize() const;
virtual uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
DeviceInfo deviceInfo = {};

View File

@ -50,7 +50,11 @@ void Device::initializeCaps() {
deviceInfo.globalMemCachelineSize = 64;
deviceInfo.globalMemSize = getGlobalMemorySize();
uint32_t allSubDevicesMask = static_cast<uint32_t>(getDeviceBitfield().to_ulong());
constexpr uint32_t singleSubDeviceMask = 1;
deviceInfo.globalMemSize = getGlobalMemorySize(allSubDevicesMask);
deviceInfo.maxMemAllocSize = getGlobalMemorySize(singleSubDeviceMask); // Allocation can be placed only on one SubDevice
if (DebugManager.flags.Force32bitAddressing.get() || addressing32bitAllowed || is32bit) {
deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, static_cast<uint64_t>(4 * GB * 0.8));
@ -59,14 +63,15 @@ void Device::initializeCaps() {
}
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
deviceInfo.maxMemAllocSize = std::min(deviceInfo.globalMemSize, deviceInfo.maxMemAllocSize); // if globalMemSize was reduced for 32b
// OpenCL 1.2 requires 128MB minimum
deviceInfo.maxMemAllocSize = std::min(std::max(deviceInfo.maxMemAllocSize / 2, static_cast<uint64_t>(128llu * MB)), this->hardwareCapabilities.maxMemAllocSize);
deviceInfo.profilingTimerResolution = getProfilingTimerResolution();
deviceInfo.outProfilingTimerResolution = static_cast<size_t>(deviceInfo.profilingTimerResolution);
// OpenCL 1.2 requires 128MB minimum
deviceInfo.maxMemAllocSize = std::min(std::max(deviceInfo.globalMemSize / 2, static_cast<uint64_t>(128llu * MB)), this->hardwareCapabilities.maxMemAllocSize);
static const int maxPixelSize = 16;
constexpr uint64_t maxPixelSize = 16;
deviceInfo.imageMaxBufferSize = static_cast<size_t>(deviceInfo.maxMemAllocSize / maxPixelSize);
deviceInfo.maxNumEUsPerSubSlice = 0;

View File

@ -47,8 +47,8 @@ Device *SubDevice::getParentDevice() const {
return &rootDevice;
}
uint64_t SubDevice::getGlobalMemorySize() const {
auto globalMemorySize = Device::getGlobalMemorySize();
uint64_t SubDevice::getGlobalMemorySize(uint32_t deviceBitfield) const {
auto globalMemorySize = Device::getGlobalMemorySize(static_cast<uint32_t>(maxNBitValue(rootDevice.getNumSubDevices())));
return globalMemorySize / rootDevice.getNumAvailableDevices();
}

View File

@ -25,7 +25,7 @@ class SubDevice : public Device {
protected:
DeviceBitfield getDeviceBitfield() const override;
uint64_t getGlobalMemorySize() const override;
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const override;
const uint32_t subDeviceIndex;
RootDevice &rootDevice;
};

View File

@ -114,7 +114,7 @@ class MemoryManager {
void checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation);
virtual uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) = 0;
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) = 0;
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) = 0;
uint64_t getMaxApplicationAddress() { return is64bit ? MemoryConstants::max64BitAppAddress : MemoryConstants::max32BitAppAddress; };
MOCKABLE_VIRTUAL uint64_t getInternalHeapBaseAddress(uint32_t rootDeviceIndex, bool useLocalMemory) { return getGfxPartition(rootDeviceIndex)->getHeapBase(selectInternalHeap(useLocalMemory)); }

View File

@ -40,7 +40,7 @@ class DrmMemoryManager : public MemoryManager {
GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { return nullptr; }
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;
void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override;

View File

@ -41,7 +41,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
return 0 * GB;
}
} // namespace NEO

View File

@ -295,7 +295,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
return true;
}
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
auto memoryInfo = static_cast<MemoryInfoImpl *>(getDrm(rootDeviceIndex).getMemoryInfo());
if (!memoryInfo) {
return 0;

View File

@ -48,7 +48,7 @@ class WddmMemoryManager : public MemoryManager {
void obtainGpuAddressFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage);
uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override;
uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override;
bool tryDeferDeletions(const D3DKMT_HANDLE *handles, uint32_t allocationCount, D3DKMT_HANDLE resourceHandle, uint32_t rootDeviceIndex);

View File

@ -23,7 +23,7 @@ bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const v
return mapGpuVaForOneHandleAllocation(allocation, requiredPtr);
}
uint64_t WddmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
uint64_t WddmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
return 0 * GB;
}
} // namespace NEO

View File

@ -28,6 +28,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
const DeviceBitfield deviceBitfield);
struct MockSubDevice : public SubDevice {
using SubDevice::getDeviceBitfield;
using SubDevice::getGlobalMemorySize;
using SubDevice::SubDevice;