diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index 72c297e676..0d3e61f176 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -23,6 +23,7 @@ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" +#include "level_zero/core/source/helpers/default_descriptors.h" #include "level_zero/core/source/helpers/properties_parser.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/memory/memory_operations_helper.h" @@ -72,13 +73,16 @@ ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) { + + auto hostMemDesc = hostDesc ? hostDesc : &DefaultDescriptors::hostMemDesc; + if (NEO::debugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::debugManager.flags.ForceExtendedUSMBufferSize.get()); } bool relaxedSizeAllowed = NEO::debugManager.flags.AllowUnrestrictedSize.get(); - if (hostDesc->pNext) { - const ze_base_desc_t *extendedDesc = reinterpret_cast(hostDesc->pNext); + if (hostMemDesc->pNext) { + const ze_base_desc_t *extendedDesc = reinterpret_cast(hostMemDesc->pNext); if (extendedDesc->stype == ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC) { const ze_relaxed_allocation_limits_exp_desc_t *relaxedLimitsDesc = reinterpret_cast(extendedDesc); @@ -98,7 +102,7 @@ ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, StructuresLookupTable lookupTable = {}; lookupTable.relaxedSizeAllowed = NEO::debugManager.flags.AllowUnrestrictedSize.get(); - auto parseResult = prepareL0StructuresLookupTable(lookupTable, hostDesc->pNext); + auto parseResult = prepareL0StructuresLookupTable(lookupTable, hostMemDesc->pNext); if (parseResult != ZE_RESULT_SUCCESS) { return parseResult; @@ -131,11 +135,11 @@ ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, this->rootDeviceIndices, this->deviceBitfields); - if (hostDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED) { + if (hostMemDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } - if (hostDesc->flags & ZEX_HOST_MEM_ALLOC_FLAG_USE_HOST_PTR) { + if (hostMemDesc->flags & ZEX_HOST_MEM_ALLOC_FLAG_USE_HOST_PTR) { unifiedMemoryProperties.allocationFlags.hostptr = reinterpret_cast(*ptr); } @@ -208,6 +212,9 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) { + + auto deviceMemDesc = deviceDesc ? deviceDesc : &DefaultDescriptors::deviceMemDesc; + if (NEO::debugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::debugManager.flags.ForceExtendedUSMBufferSize.get()); } @@ -220,7 +227,7 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, StructuresLookupTable lookupTable = {}; lookupTable.relaxedSizeAllowed = NEO::debugManager.flags.AllowUnrestrictedSize.get(); - auto parseResult = prepareL0StructuresLookupTable(lookupTable, deviceDesc->pNext); + auto parseResult = prepareL0StructuresLookupTable(lookupTable, deviceMemDesc->pNext); if (parseResult != ZE_RESULT_SUCCESS) { return parseResult; @@ -261,11 +268,11 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, deviceBitfields[rootDeviceIndex] = neoDevice->getDeviceBitfield(); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, alignment, this->driverHandle->rootDeviceIndices, deviceBitfields); - unifiedMemoryProperties.allocationFlags.flags.shareable = isShareableMemory(deviceDesc->pNext, static_cast(lookupTable.exportMemory), neoDevice); + unifiedMemoryProperties.allocationFlags.flags.shareable = isShareableMemory(deviceMemDesc->pNext, static_cast(lookupTable.exportMemory), neoDevice); unifiedMemoryProperties.device = neoDevice; unifiedMemoryProperties.allocationFlags.flags.compressedHint = isAllocationSuitableForCompression(lookupTable, *device, size); - if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { + if (deviceMemDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } @@ -316,6 +323,10 @@ ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, size_t size, size_t alignment, void **ptr) { + + auto deviceMemDesc = deviceDesc ? deviceDesc : &DefaultDescriptors::deviceMemDesc; + auto hostMemDesc = hostDesc ? hostDesc : &DefaultDescriptors::hostMemDesc; + if (NEO::debugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::debugManager.flags.ForceExtendedUSMBufferSize.get()); } @@ -329,7 +340,7 @@ ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, StructuresLookupTable lookupTable = {}; lookupTable.relaxedSizeAllowed = NEO::debugManager.flags.AllowUnrestrictedSize.get(); - auto parseResult = prepareL0StructuresLookupTable(lookupTable, deviceDesc->pNext); + auto parseResult = prepareL0StructuresLookupTable(lookupTable, deviceMemDesc->pNext); if (parseResult != ZE_RESULT_SUCCESS) { return parseResult; @@ -360,15 +371,15 @@ ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, deviceBitfields); unifiedMemoryProperties.device = unifiedMemoryPropertiesDevice; - if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { + if (deviceMemDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } - if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { + if (deviceMemDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { unifiedMemoryProperties.allocationFlags.allocFlags.usmInitialPlacementGpu = 1; } - if (hostDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { + if (hostMemDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { unifiedMemoryProperties.allocationFlags.allocFlags.usmInitialPlacementCpu = 1; } @@ -377,7 +388,7 @@ ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, unifiedMemoryProperties.allocationFlags.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing(); } - if (hostDesc->flags & ZEX_HOST_MEM_ALLOC_FLAG_USE_HOST_PTR) { + if (hostMemDesc->flags & ZEX_HOST_MEM_ALLOC_FLAG_USE_HOST_PTR) { unifiedMemoryProperties.allocationFlags.hostptr = reinterpret_cast(*ptr); } diff --git a/level_zero/core/source/helpers/default_descriptors.cpp b/level_zero/core/source/helpers/default_descriptors.cpp index 804da11588..6a4e9a8c90 100644 --- a/level_zero/core/source/helpers/default_descriptors.cpp +++ b/level_zero/core/source/helpers/default_descriptors.cpp @@ -26,6 +26,18 @@ const ze_command_queue_desc_t commandQueueDesc = { .mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL, }; + +const ze_device_mem_alloc_desc_t deviceMemDesc = { + .stype = ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = static_cast(ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED), + .ordinal = 0}; + +const ze_host_mem_alloc_desc_t hostMemDesc = { + .stype = ze_structure_type_t::ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = static_cast(ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED | ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT)}; + } // namespace DefaultDescriptors } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/source/helpers/default_descriptors.h b/level_zero/core/source/helpers/default_descriptors.h index 831337f063..57d2ea6941 100644 --- a/level_zero/core/source/helpers/default_descriptors.h +++ b/level_zero/core/source/helpers/default_descriptors.h @@ -12,5 +12,7 @@ namespace L0 { namespace DefaultDescriptors { extern const ze_command_queue_desc_t commandQueueDesc; -} +extern const ze_device_mem_alloc_desc_t deviceMemDesc; +extern const ze_host_mem_alloc_desc_t hostMemDesc; +} // namespace DefaultDescriptors } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 48bf6bd585..d19518d58d 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -725,6 +725,64 @@ TEST_F(MemoryTest, whenAllocatingSharedMemoryWithUncachedFlagThenLocallyUncached ASSERT_EQ(result, ZE_RESULT_SUCCESS); } +TEST_F(MemoryTest, whenAllocatingHostMemoryWithoutDescriptorThenThenCachedResourceIsCreated) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_result_t result = context->allocHostMem(nullptr, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); + EXPECT_NE(nullptr, allocData); + EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 0u); + + result = context->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); +} + +TEST_F(MemoryTest, whenAllocatingDeviceMemoryWithoutDescriptorThenThenCachedResourceIsCreated) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_result_t result = context->allocDeviceMem(device->toHandle(), + nullptr, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); + EXPECT_NE(nullptr, allocData); + EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 0u); + + result = context->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); +} + +TEST_F(MemoryTest, whenAllocatingSharedMemoryWithoutDescriptorThenCachedResourceWithCpuInitialPlacementIsCreated) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_result_t result = context->allocSharedMem(device->toHandle(), + nullptr, + nullptr, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); + EXPECT_NE(nullptr, allocData); + EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 0u); + EXPECT_EQ(allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementCpu, 1u); + EXPECT_EQ(allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementGpu, 0u); + + result = context->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); +} + TEST_F(MemoryTest, whenAllocatingSharedMemoryWithUseHostPtrFlagThenExternalHostPtrIsSet) { size_t size = 10; size_t alignment = 1u;