diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index 930594c789..92fe1c9e0b 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -529,6 +529,30 @@ TEST_F(DeviceGetCapsTest, givenGlobalMemSizeAndSharedSystemAllocationsSupportedW EXPECT_EQ(caps.maxMemAllocSize, expectedSize); } +TEST_F(DeviceGetCapsTest, whenDriverModelHasLimitationForMaxMemoryAllocationSizeThenTakeItIntoAccount) { + struct MockDriverModel : NEO::DriverModel { + size_t maxAllocSize; + + MockDriverModel(size_t maxAllocSize) : NEO::DriverModel(NEO::DriverModelType::UNKNOWN), maxAllocSize(maxAllocSize) {} + + void setGmmInputArgs(void *args) override {} + uint32_t getDeviceHandle() const override { return {}; } + PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; } + size_t getMaxMemAllocSize() const override { + return maxAllocSize; + } + }; + + DebugManagerStateRestore dbgRestorer; + size_t maxAllocSizeTestValue = 512; + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + device->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); + device->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(maxAllocSizeTestValue)); + device->initializeCaps(); + const auto &caps = device->getDeviceInfo(); + EXPECT_EQ(maxAllocSizeTestValue, caps.maxMemAllocSize); +} + TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenExtensionsStringEndsWithSpace) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); diff --git a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 5e7b248ae8..c27d4a9051 100644 --- a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -1606,22 +1606,25 @@ TEST_F(MockWddmMemoryManagerTest, givenAllocateGraphicsMemoryForBufferAndRequest wddm->mapGpuVaStatus = true; VariableBackup restorer{&wddm->callBaseMapGpuVa, false}; - DebugManager.flags.Enable64kbpages.set(true); - MemoryManagerCreate memoryManager(true, false, *executionEnvironment); - if (memoryManager.isLimitedGPU(0)) { - GTEST_SKIP(); + for (bool enable64KBpages : {true, false}) { + wddm->createAllocationResult.called = 0U; + DebugManager.flags.Enable64kbpages.set(enable64KBpages); + MemoryManagerCreate memoryManager(true, false, *executionEnvironment); + if (memoryManager.isLimitedGPU(0)) { + GTEST_SKIP(); + } + EXPECT_EQ(0, wddm->createAllocationResult.called); + + memoryManager.hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k - MemoryConstants::pageSize; + + WddmAllocation *wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize64k * 3, GraphicsAllocation::AllocationType::BUFFER, mockDeviceBitfield})); + EXPECT_NE(nullptr, wddmAlloc); + EXPECT_EQ(4, wddmAlloc->getNumGmms()); + EXPECT_EQ(4, wddm->createAllocationResult.called); + EXPECT_EQ(wddmAlloc->getGpuAddressToModify(), GmmHelper::canonize(wddmAlloc->reservedGpuVirtualAddress)); + + memoryManager.freeGraphicsMemory(wddmAlloc); } - EXPECT_EQ(0, wddm->createAllocationResult.called); - - memoryManager.hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k - MemoryConstants::pageSize; - - WddmAllocation *wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize64k * 3, GraphicsAllocation::AllocationType::BUFFER, mockDeviceBitfield})); - EXPECT_NE(nullptr, wddmAlloc); - EXPECT_EQ(4, wddmAlloc->getNumGmms()); - EXPECT_EQ(4, wddm->createAllocationResult.called); - EXPECT_EQ(wddmAlloc->getGpuAddressToModify(), GmmHelper::canonize(wddmAlloc->reservedGpuVirtualAddress)); - - memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenDefaultMemoryManagerWhenItIsCreatedThenCorrectHugeGfxMemoryChunkIsSet) { diff --git a/shared/source/device/device_caps.cpp b/shared/source/device/device_caps.cpp index 6969a67af9..188335aa97 100644 --- a/shared/source/device/device_caps.cpp +++ b/shared/source/device/device_caps.cpp @@ -78,6 +78,13 @@ void Device::initializeCaps() { deviceInfo.maxMemAllocSize = std::min(deviceInfo.maxMemAllocSize, this->hardwareCapabilities.maxMemAllocSize); } + // Some specific driver model configurations may impose additional limitations + auto driverModelMaxMemAlloc = std::numeric_limits::max(); + if (this->executionEnvironment->rootDeviceEnvironments[0]->osInterface) { + driverModelMaxMemAlloc = this->executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->getMaxMemAllocSize(); + } + deviceInfo.maxMemAllocSize = std::min(driverModelMaxMemAlloc, deviceInfo.maxMemAllocSize); + deviceInfo.profilingTimerResolution = getProfilingTimerResolution(); if (DebugManager.flags.OverrideProfilingTimerResolution.get() != -1) { deviceInfo.profilingTimerResolution = static_cast(DebugManager.flags.OverrideProfilingTimerResolution.get()); diff --git a/shared/source/os_interface/os_interface.h b/shared/source/os_interface/os_interface.h index 4d89f46a17..5738c62a47 100644 --- a/shared/source/os_interface/os_interface.h +++ b/shared/source/os_interface/os_interface.h @@ -11,6 +11,7 @@ #include "shared/source/os_interface/driver_info.h" #include +#include #include #include @@ -78,6 +79,10 @@ class DriverModel : public NonCopyableClass { virtual PhysicalDevicePciBusInfo getPciBusInfo() const = 0; + virtual size_t getMaxMemAllocSize() const { + return std::numeric_limits::max(); + } + protected: DriverModelType driverModelType; }; diff --git a/shared/source/os_interface/windows/CMakeLists.txt b/shared/source/os_interface/windows/CMakeLists.txt index 1b209ddd92..4554ea9f86 100644 --- a/shared/source/os_interface/windows/CMakeLists.txt +++ b/shared/source/os_interface/windows/CMakeLists.txt @@ -60,6 +60,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM ${CMAKE_CURRENT_SOURCE_DIR}/init_wddm_os_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kmdaf_listener${KMDAF_FILE_SUFFIX}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kmdaf_listener.h + ${CMAKE_CURRENT_SOURCE_DIR}/max_chunk_size_${DRIVER_MODEL}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_context_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_context_win.h ${CMAKE_CURRENT_SOURCE_DIR}/os_environment_win.cpp @@ -75,6 +76,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM ${CMAKE_CURRENT_SOURCE_DIR}/wddm/adapter_factory_dxcore.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/configure_device_address_space_${DRIVER_MODEL}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm/set_gmm_input_args_${DRIVER_MODEL}.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wddm/max_mem_alloc_size_${DRIVER_MODEL}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h diff --git a/shared/source/os_interface/windows/max_chunk_size_drm_or_wddm.cpp b/shared/source/os_interface/windows/max_chunk_size_drm_or_wddm.cpp new file mode 100644 index 0000000000..7270c5a695 --- /dev/null +++ b/shared/source/os_interface/windows/max_chunk_size_drm_or_wddm.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm_memory_manager.h" + +namespace NEO { + +size_t WddmMemoryManager::getHugeGfxMemoryChunkSize() const { + return 31 * MemoryConstants::megaByte; +} + +} // namespace NEO diff --git a/shared/source/os_interface/windows/max_chunk_size_wddm.cpp b/shared/source/os_interface/windows/max_chunk_size_wddm.cpp new file mode 100644 index 0000000000..443cde1960 --- /dev/null +++ b/shared/source/os_interface/windows/max_chunk_size_wddm.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm_memory_manager.h" + +namespace NEO { + +size_t WddmMemoryManager::getHugeGfxMemoryChunkSize() const { + return 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k; +} + +} // namespace NEO diff --git a/shared/source/os_interface/windows/wddm/max_mem_alloc_size_drm_or_wddm.cpp b/shared/source/os_interface/windows/wddm/max_mem_alloc_size_drm_or_wddm.cpp new file mode 100644 index 0000000000..6aa3a2da85 --- /dev/null +++ b/shared/source/os_interface/windows/wddm/max_mem_alloc_size_drm_or_wddm.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm/wddm.h" + +namespace NEO { + +size_t Wddm::getMaxMemAllocSize() const { + return MemoryConstants::gigaByte; +} + +} // namespace NEO diff --git a/shared/source/os_interface/windows/wddm/max_mem_alloc_size_wddm.cpp b/shared/source/os_interface/windows/wddm/max_mem_alloc_size_wddm.cpp new file mode 100644 index 0000000000..7b48b07d26 --- /dev/null +++ b/shared/source/os_interface/windows/wddm/max_mem_alloc_size_wddm.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm/wddm.h" + +namespace NEO { + +size_t Wddm::getMaxMemAllocSize() const { + return std::numeric_limits::max(); +} + +} // namespace NEO diff --git a/shared/source/os_interface/windows/wddm/wddm.h b/shared/source/os_interface/windows/wddm/wddm.h index 34fa14c77c..6d503407b6 100644 --- a/shared/source/os_interface/windows/wddm/wddm.h +++ b/shared/source/os_interface/windows/wddm/wddm.h @@ -185,6 +185,8 @@ class Wddm : public DriverModel { PhysicalDevicePciBusInfo getPciBusInfo() const override; + size_t getMaxMemAllocSize() const override; + static std::vector> discoverDevices(ExecutionEnvironment &executionEnvironment); protected: diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 7ee0c3eaed..cadd1d023f 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -97,7 +97,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForImageImpl(const GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) { size_t sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k); if (sizeAligned > getHugeGfxMemoryChunkSize()) { - return allocateHugeGraphicsMemory(allocationData); + return allocateHugeGraphicsMemory(allocationData, false); } auto wddmAllocation = std::make_unique(allocationData.rootDeviceIndex, @@ -129,7 +129,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory64kb(const Allocati return wddmAllocation.release(); } -GraphicsAllocation *WddmMemoryManager::allocateHugeGraphicsMemory(const AllocationData &allocationData) { +GraphicsAllocation *WddmMemoryManager::allocateHugeGraphicsMemory(const AllocationData &allocationData, bool sharedVirtualAddress) { void *hostPtr = nullptr, *alignedPtr = nullptr; size_t alignedSize = 0; bool uncacheable = allocationData.flags.uncacheable; @@ -175,7 +175,7 @@ GraphicsAllocation *WddmMemoryManager::allocateHugeGraphicsMemory(const Allocati wddmAllocation->storageInfo.multiStorage = true; - if (!createWddmAllocation(wddmAllocation.get(), nullptr)) { + if (!createWddmAllocation(wddmAllocation.get(), sharedVirtualAddress ? hostPtr : nullptr)) { for (auto gmmId = 0u; gmmId < wddmAllocation->getNumGmms(); ++gmmId) { delete wddmAllocation->getGmm(gmmId); } @@ -193,6 +193,9 @@ GraphicsAllocation *WddmMemoryManager::allocateUSMHostGraphicsMemory(const Alloc GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) { size_t newAlignment = allocationData.alignment ? alignUp(allocationData.alignment, MemoryConstants::pageSize) : MemoryConstants::pageSize; size_t sizeAligned = allocationData.size ? alignUp(allocationData.size, MemoryConstants::pageSize) : MemoryConstants::pageSize; + if (sizeAligned > getHugeGfxMemoryChunkSize()) { + return allocateHugeGraphicsMemory(allocationData, true); + } void *pSysMem = allocateSystemMemory(sizeAligned, newAlignment); Gmm *gmm = nullptr; @@ -238,7 +241,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryWithAlignment(const GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) { auto alignedSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size); if (alignedSize > getHugeGfxMemoryChunkSize()) { - return allocateHugeGraphicsMemory(allocationData); + return allocateHugeGraphicsMemory(allocationData, false); } auto wddmAllocation = std::make_unique(allocationData.rootDeviceIndex, @@ -266,7 +269,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(co GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) { if (allocationData.size > getHugeGfxMemoryChunkSize()) { - return allocateHugeGraphicsMemory(allocationData); + return allocateHugeGraphicsMemory(allocationData, false); } if (mallocRestrictions.minAddress > reinterpret_cast(allocationData.hostPtr)) { diff --git a/shared/source/os_interface/windows/wddm_memory_manager.h b/shared/source/os_interface/windows/wddm_memory_manager.h index a2374554eb..a0f8fd8c57 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.h +++ b/shared/source/os_interface/windows/wddm_memory_manager.h @@ -82,8 +82,8 @@ class WddmMemoryManager : public MemoryManager { GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) override; GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override; - MOCKABLE_VIRTUAL size_t getHugeGfxMemoryChunkSize() const { return 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k; } - GraphicsAllocation *allocateHugeGraphicsMemory(const AllocationData &allocationData); + MOCKABLE_VIRTUAL size_t getHugeGfxMemoryChunkSize() const; + GraphicsAllocation *allocateHugeGraphicsMemory(const AllocationData &allocationData, bool sharedVirtualAddress); GraphicsAllocation *createAllocationFromHandle(osHandle handle, bool requireSpecificBitness, bool ntHandle, GraphicsAllocation::AllocationType allocationType, uint32_t rootDeviceIndex); static bool validateAllocation(WddmAllocation *alloc); diff --git a/shared/wsl_compute_helper/source/wsl_compute_helper_types_demarshall.h b/shared/wsl_compute_helper/source/wsl_compute_helper_types_demarshall.h index 1ecc3c68a8..259482ea29 100644 --- a/shared/wsl_compute_helper/source/wsl_compute_helper_types_demarshall.h +++ b/shared/wsl_compute_helper/source/wsl_compute_helper_types_demarshall.h @@ -1018,6 +1018,10 @@ struct Demarshaller { uint32_t arrayElementIdSliceInfo = varLen->arrayElementId; const TokenHeader *tokSliceInfo = varLen->getValue(); const TokenHeader *tokSliceInfoEnd = varLen->getValue() + varLen->valueLengthInBytes / sizeof(TokenHeader); + static constexpr auto maxDstSlicesInfo = sizeof(dst.SystemInfo.SliceInfo) / sizeof(dst.SystemInfo.SliceInfo[0]); + if (arrayElementIdSliceInfo >= maxDstSlicesInfo) { + tokSliceInfo = tokSliceInfoEnd; + } while (tokSliceInfo < tokSliceInfoEnd) { if (false == tokSliceInfo->flags.flag4IsVariableLength) { switch (tokSliceInfo->id) { @@ -3783,6 +3787,10 @@ struct Demarshaller { uint32_t arrayElementIdSliceInfo = varLen->arrayElementId; const TokenHeader *tokSliceInfo = varLen->getValue(); const TokenHeader *tokSliceInfoEnd = varLen->getValue() + varLen->valueLengthInBytes / sizeof(TokenHeader); + static constexpr auto maxDstSlicesInfo = sizeof(dst.SystemInfo.SliceInfo) / sizeof(dst.SystemInfo.SliceInfo[0]); + if (arrayElementIdSliceInfo >= maxDstSlicesInfo) { + tokSliceInfo = tokSliceInfoEnd; + } while (tokSliceInfo < tokSliceInfoEnd) { if (false == tokSliceInfo->flags.flag4IsVariableLength) { switch (tokSliceInfo->id) {