diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index dea8ddf0f4..3232486c2d 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1750,7 +1750,8 @@ NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer NEO::AllocationProperties properties = {getRootDeviceIndex(), false, size, NEO::AllocationType::externalHostPtr, false, neoDevice->getDeviceBitfield()}; - properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; + // L3 must be flushed only if host memory is a transfer destination. + properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = !hostCopyAllowed; auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties, buffer); if (allocation == nullptr && hostCopyAllowed) { diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index 092058b52c..945e3443d8 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -917,6 +917,7 @@ TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelThenNewAllocati EXPECT_NE(allocation->getUnderlyingBuffer(), reinterpret_cast(buffer)); EXPECT_EQ(alignUp(size, MemoryConstants::pageSize), allocation->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(buffer, allocation->getUnderlyingBuffer(), size)); + EXPECT_FALSE(allocation->isFlushL3Required()); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); delete[] buffer; diff --git a/shared/source/gmm_helper/cache_settings_helper.cpp b/shared/source/gmm_helper/cache_settings_helper.cpp index 7bc7b8b3e5..98658cc54b 100644 --- a/shared/source/gmm_helper/cache_settings_helper.cpp +++ b/shared/source/gmm_helper/cache_settings_helper.cpp @@ -126,8 +126,8 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching } // Set 2-way coherency for allocations which are not aligned to cacheline -GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getGmmUsageTypeForUserPtr(const void *userPtr, size_t size, const ProductHelper &productHelper) { - if (!isL3Capable(userPtr, size) && productHelper.isMisalignedUserPtr2WayCoherent()) { +GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getGmmUsageTypeForUserPtr(bool isCacheFlushRequired, const void *userPtr, size_t size, const ProductHelper &productHelper) { + if (isCacheFlushRequired && !isL3Capable(userPtr, size) && productHelper.isMisalignedUserPtr2WayCoherent()) { return GMM_RESOURCE_USAGE_HW_CONTEXT; } else { return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER; diff --git a/shared/source/gmm_helper/cache_settings_helper.h b/shared/source/gmm_helper/cache_settings_helper.h index d9c34a375a..e4ab8142fa 100644 --- a/shared/source/gmm_helper/cache_settings_helper.h +++ b/shared/source/gmm_helper/cache_settings_helper.h @@ -20,7 +20,7 @@ struct RootDeviceEnvironment; struct CacheSettingsHelper { static GMM_RESOURCE_USAGE_TYPE_ENUM getGmmUsageType(AllocationType allocationType, bool forceUncached, const ProductHelper &productHelper); - static GMM_RESOURCE_USAGE_TYPE_ENUM getGmmUsageTypeForUserPtr(const void *userPtr, size_t size, const ProductHelper &productHelper); + static GMM_RESOURCE_USAGE_TYPE_ENUM getGmmUsageTypeForUserPtr(bool isCacheFlushRequired, const void *userPtr, size_t size, const ProductHelper &productHelper); static constexpr bool isUncachedType(GMM_RESOURCE_USAGE_TYPE_ENUM gmmResourceUsageType) { return ((gmmResourceUsageType == GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC) || diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 6b28f3620f..fa9b4c7e52 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -633,7 +633,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(con bo->setAddress(gpuVirtualAddress); - auto usageType = CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.hostPtr, allocationData.size, productHelper); + auto usageType = CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.flags.flushL3, allocationData.hostPtr, allocationData.size, productHelper); auto patIndex = rootDeviceEnvironment->getGmmClientContext()->cachePolicyGetPATIndex(nullptr, usageType, false, true); bo->setPatIndex(patIndex); diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 031603b947..649ee02c58 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -474,7 +474,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(co } auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmHelper(), alignedPtr, alignedSize, 0u, - CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.hostPtr, allocationData.size, productHelper), {}, gmmRequirements); + CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.flags.flushL3, allocationData.hostPtr, allocationData.size, productHelper), {}, gmmRequirements); wddmAllocation->setDefaultGmm(gmm); diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 73a13429a4..f95d6e287d 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -3803,7 +3803,7 @@ TEST_F(DrmMemoryManagerBasic, givenSpecificAddressSpaceWhenInitializingMemoryMan EXPECT_EQ(maxNBitValue(48 - 1), limit); } -TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { +TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWithFlushL3RequiredWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); @@ -3812,6 +3812,7 @@ TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWhenAllocateGraphicsMemoryThe allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.flags.flushL3 = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); EXPECT_NE(nullptr, allocation); @@ -3828,6 +3829,23 @@ TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWhenAllocateGraphicsMemoryThe memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWithFlushL3NotRequiredWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { + AllocationData allocationData; + std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); + + memoryManager->forceLimitedRangeAllocator(MemoryConstants::max48BitAddress); + + allocationData.size = 13; + allocationData.hostPtr = reinterpret_cast(0x5001); + allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.flags.flushL3 = false; + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); + EXPECT_NE(nullptr, allocation); + + EXPECT_EQ(MockGmmClientContextBase::MockPatIndex::cached, allocation->getBO()->peekPatIndex()); + memoryManager->freeGraphicsMemory(allocation); +} + TEST_F(DrmMemoryManagerBasic, givenAlignedHostPtrWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); @@ -3837,6 +3855,7 @@ TEST_F(DrmMemoryManagerBasic, givenAlignedHostPtrWhenAllocateGraphicsMemoryThenS allocationData.size = MemoryConstants::cacheLineSize; allocationData.hostPtr = reinterpret_cast(MemoryConstants::pageSize); allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.flags.flushL3 = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); EXPECT_NE(nullptr, allocation); diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 63bde8387d..cd8bb07ae8 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -1050,7 +1050,7 @@ TEST_F(WddmMemoryManagerSimpleTest, whenCreateAllocationFromHandleAndMapCallFail EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryImplCalled); } -TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedThenAlignedGraphicsAllocationIsCreatedWithCorrectGmmResource) { +TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedAndFlushL3RequiredThenSetCorrectGmmResource) { memoryManager.reset(new MockWddmMemoryManager(false, false, executionEnvironment)); auto size = 13u; auto hostPtr = reinterpret_cast(0x10001); @@ -1058,6 +1058,7 @@ TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrI AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = hostPtr; + allocationData.flags.flushL3 = true; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); @@ -1074,6 +1075,22 @@ TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrI memoryManager->freeGraphicsMemory(allocation); } +TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedAndFlushL3NotRequiredThenSetCorrectGmmResource) { + memoryManager.reset(new MockWddmMemoryManager(false, false, executionEnvironment)); + auto size = 13u; + auto hostPtr = reinterpret_cast(0x10001); + + AllocationData allocationData; + allocationData.size = size; + allocationData.hostPtr = hostPtr; + allocationData.flags.flushL3 = false; + auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); + EXPECT_NE(nullptr, allocation); + + EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER, allocation->getGmm(0)->resourceParams.Usage); + memoryManager->freeGraphicsMemory(allocation); +} + TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedAndImportedAllocationIsFalseThenAlignedGraphicsAllocationIsFreed) { memoryManager.reset(new MockWddmMemoryManager(false, false, executionEnvironment)); auto size = 13u; @@ -1082,6 +1099,7 @@ TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrI AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = hostPtr; + allocationData.flags.flushL3 = true; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer());