From 9a73fa195b1ac1265454dff3150091809f409e94 Mon Sep 17 00:00:00 2001 From: "Warchulski, Jaroslaw" Date: Tue, 2 May 2023 12:27:55 +0000 Subject: [PATCH] feature: Add support for CL_DEVICE_HANDLE_LIST_KHR Related-To: NEO-6757 Signed-off-by: Warchulski, Jaroslaw --- .../helpers/cl_memory_properties_helpers.cpp | 13 ++++ opencl/source/mem_obj/buffer.cpp | 71 +++++++++++-------- opencl/source/mem_obj/image.cpp | 36 +++++++--- .../cl_memory_properties_helpers_tests.cpp | 40 +++++++++++ .../test/unit_test/mem_obj/buffer_tests.cpp | 34 +++++++++ opencl/test/unit_test/mem_obj/image_tests.cpp | 71 +++++++++++++++++++ .../memory_properties_flags.h | 3 +- 7 files changed, 229 insertions(+), 39 deletions(-) diff --git a/opencl/source/helpers/cl_memory_properties_helpers.cpp b/opencl/source/helpers/cl_memory_properties_helpers.cpp index 1587af0c96..1039dd7d1c 100644 --- a/opencl/source/helpers/cl_memory_properties_helpers.cpp +++ b/opencl/source/helpers/cl_memory_properties_helpers.cpp @@ -23,6 +23,7 @@ bool ClMemoryPropertiesHelper::parseMemoryProperties(const cl_mem_properties_int uint64_t handle = 0; uint64_t handleType = 0; uintptr_t hostptr = 0; + std::vector devices; if (properties != nullptr) { for (int i = 0; properties[i] != 0; i += 2) { @@ -60,6 +61,17 @@ bool ClMemoryPropertiesHelper::parseMemoryProperties(const cl_mem_properties_int handle = static_cast(properties[i + 1]); handleType = static_cast(UnifiedSharingHandleType::Win32Nt); break; + case CL_DEVICE_HANDLE_LIST_KHR: + while (properties[i + 1] != CL_DEVICE_HANDLE_LIST_END_KHR) { + cl_device_id deviceId = reinterpret_cast(properties[i + 1]); + auto pClDevice = NEO::castToObject(deviceId); + if ((pClDevice == nullptr) || (!context.isDeviceAssociated(*pClDevice))) { + return false; + } + devices.push_back(&pClDevice->getDevice()); + i++; + } + break; default: return false; } @@ -70,6 +82,7 @@ bool ClMemoryPropertiesHelper::parseMemoryProperties(const cl_mem_properties_int memoryProperties.handleType = handleType; memoryProperties.handle = handle; memoryProperties.hostptr = hostptr; + memoryProperties.associatedDevices = devices; switch (objectType) { case ClMemoryPropertiesHelper::ObjType::BUFFER: diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index 67968df38d..91999f5073 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -264,13 +264,34 @@ Buffer *Buffer::create(Context *context, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; + + RootDeviceIndicesContainer rootDeviceIndices; + const RootDeviceIndicesContainer *pRootDeviceIndices; + uint32_t defaultRootDeviceIndex; + Device *defaultDevice; + + if (memoryProperties.associatedDevices.empty()) { + defaultDevice = &context->getDevice(0)->getDevice(); + defaultRootDeviceIndex = defaultDevice->getRootDeviceIndex(); + pRootDeviceIndices = &context->getRootDeviceIndices(); + } else { + for (const auto &device : memoryProperties.associatedDevices) { + rootDeviceIndices.push_back(device->getRootDeviceIndex()); + } + defaultDevice = memoryProperties.associatedDevices[0]; + defaultRootDeviceIndex = rootDeviceIndices[0]; + rootDeviceIndices.remove_duplicates(); + pRootDeviceIndices = &rootDeviceIndices; + } + Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator(); - const bool implicitScalingEnabled = ImplicitScalingHelper::isImplicitScalingEnabled(context->getDevice(0u)->getDeviceBitfield(), true); + const bool implicitScalingEnabled = ImplicitScalingHelper::isImplicitScalingEnabled(defaultDevice->getDeviceBitfield(), true); const bool useHostPtr = memoryProperties.flags.useHostPtr; const bool copyHostPtr = memoryProperties.flags.copyHostPtr; if (implicitScalingEnabled == false && useHostPtr == false && - memoryProperties.flags.forceHostMemory == false) { + memoryProperties.flags.forceHostMemory == false && + memoryProperties.associatedDevices.empty()) { cl_int poolAllocRet = CL_SUCCESS; auto bufferFromPool = bufferPoolAllocator.allocateBufferFromPool(memoryProperties, flags, @@ -281,16 +302,12 @@ Buffer *Buffer::create(Context *context, if (CL_SUCCESS == poolAllocRet) { const bool needsCopy = copyHostPtr; if (needsCopy) { - for (auto &clDevice : context->getDevices()) { - if (copyHostPointer(bufferFromPool, - clDevice->getDevice(), - size, - hostPtr, - implicitScalingEnabled, - poolAllocRet)) { - break; - } - } + copyHostPointer(bufferFromPool, + *defaultDevice, + size, + hostPtr, + implicitScalingEnabled, + poolAllocRet); } if (!needsCopy || poolAllocRet == CL_SUCCESS) { return bufferFromPool; @@ -312,7 +329,7 @@ Buffer *Buffer::create(Context *context, void *allocationCpuPtr = nullptr; bool forceCopyHostPtr = false; - for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + for (auto &rootDeviceIndex : *pRootDeviceIndices) { allocationInfos[rootDeviceIndex] = {}; auto &allocationInfo = allocationInfos[rootDeviceIndex]; @@ -465,11 +482,10 @@ Buffer *Buffer::create(Context *context, } } - auto rootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex(); - auto &allocationInfo = allocationInfos[rootDeviceIndex]; + auto &allocationInfo = allocationInfos[defaultRootDeviceIndex]; auto allocation = allocationInfo.memory; auto memoryStorage = allocation->getUnderlyingBuffer(); - if (context->getRootDeviceIndices().size() > 1) { + if (pRootDeviceIndices->size() > 1) { multiGraphicsAllocation.setMultiStorage(!MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool())); } @@ -497,7 +513,7 @@ Buffer *Buffer::create(Context *context, ", GPU address: ", allocationInfo.memory->getGpuAddress(), ", memoryPool: ", getMemoryPoolString(allocationInfo.memory)); - for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + for (auto &rootDeviceIndex : *pRootDeviceIndices) { auto &allocationInfo = allocationInfos[rootDeviceIndex]; if (useHostPtr) { if (!allocationInfo.zeroCopyAllowed && !allocationInfo.isHostPtrSVM) { @@ -520,18 +536,15 @@ Buffer *Buffer::create(Context *context, pBuffer->setHostPtrMinSize(size); } if (allocationInfo.copyMemoryFromHostPtr) { - for (auto &clDevice : context->getDevices()) { - if (copyHostPointer(pBuffer, - clDevice->getDevice(), - size, - hostPtr, - implicitScalingEnabled, - errcodeRet)) { - auto migrationSyncData = pBuffer->getMultiGraphicsAllocation().getMigrationSyncData(); - if (migrationSyncData) { - migrationSyncData->setCurrentLocation(clDevice->getRootDeviceIndex()); - } - break; + if (copyHostPointer(pBuffer, + *defaultDevice, + size, + hostPtr, + implicitScalingEnabled, + errcodeRet)) { + auto migrationSyncData = pBuffer->getMultiGraphicsAllocation().getMigrationSyncData(); + if (migrationSyncData) { + migrationSyncData->setCurrentLocation(defaultRootDeviceIndex); } } } diff --git a/opencl/source/mem_obj/image.cpp b/opencl/source/mem_obj/image.cpp index 0bd989cb70..5a2338db4c 100644 --- a/opencl/source/mem_obj/image.cpp +++ b/opencl/source/mem_obj/image.cpp @@ -8,6 +8,7 @@ #include "opencl/source/mem_obj/image.h" #include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" @@ -131,6 +132,25 @@ Image *Image::create(Context *context, cl_int &errcodeRet) { UNRECOVERABLE_IF(surfaceFormat == nullptr); + RootDeviceIndicesContainer rootDeviceIndices; + const RootDeviceIndicesContainer *pRootDeviceIndices; + uint32_t defaultRootDeviceIndex; + Device *defaultDevice; + + if (memoryProperties.associatedDevices.empty()) { + defaultDevice = &context->getDevice(0)->getDevice(); + defaultRootDeviceIndex = defaultDevice->getRootDeviceIndex(); + pRootDeviceIndices = &context->getRootDeviceIndices(); + } else { + for (const auto &device : memoryProperties.associatedDevices) { + rootDeviceIndices.push_back(device->getRootDeviceIndex()); + } + defaultDevice = memoryProperties.associatedDevices[0]; + defaultRootDeviceIndex = rootDeviceIndices[0]; + rootDeviceIndices.remove_duplicates(); + pRootDeviceIndices = &rootDeviceIndices; + } + size_t imageWidth = imageDesc->image_width; size_t imageHeight = getImageHeight(*imageDesc); size_t imageDepth = getImageDepth(*imageDesc); @@ -153,19 +173,17 @@ Image *Image::create(Context *context, : imageWidth * surfaceFormat->surfaceFormat.imageElementSizeInBytes; const auto hostPtrSlicePitch = getHostPtrSlicePitch(*imageDesc, hostPtrRowPitch, imageHeight); - auto defaultClDevice = context->getDevice(0); - auto defaultRootDeviceIndex = defaultClDevice->getRootDeviceIndex(); - auto &defaultProductHelper = defaultClDevice->getProductHelper(); + auto &defaultProductHelper = defaultDevice->getProductHelper(); imgInfo.linearStorage = defaultProductHelper.isLinearStoragePreferred(context->isSharedContext, Image::isImage1d(*imageDesc), memoryProperties.flags.forceLinearStorage); // if device doesn't support images, it can create only linear images - if (!context->getDevice(0)->getSharedDeviceInfo().imageSupport && !imgInfo.linearStorage) { + if (!defaultDevice->getDeviceInfo().imageSupport && !imgInfo.linearStorage) { errcodeRet = CL_INVALID_OPERATION; return nullptr; } - auto &clGfxCoreHelper = defaultClDevice->getRootDeviceEnvironment().getHelper(); + auto &clGfxCoreHelper = defaultDevice->getRootDeviceEnvironment().getHelper(); bool preferCompression = MemObjHelper::isSuitableForCompression(!imgInfo.linearStorage, memoryProperties, *context, true); preferCompression &= clGfxCoreHelper.allowImageCompression(surfaceFormat->oclImageFormat); @@ -185,7 +203,7 @@ Image *Image::create(Context *context, auto imageFromBuffer = isImageFromBuffer(*imageDesc, parentBuffer); // get allocation for image - for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + for (auto &rootDeviceIndex : *pRootDeviceIndices) { allocationInfos[rootDeviceIndex] = {}; auto &allocationInfo = allocationInfos[rootDeviceIndex]; allocationInfo.zeroCopyAllowed = false; @@ -250,7 +268,7 @@ Image *Image::create(Context *context, multiGraphicsAllocation.addAllocation(allocationInfo.memory); } - if (context->getRootDeviceIndices().size() > 1) { + if (pRootDeviceIndices->size() > 1) { multiGraphicsAllocation.setMultiStorage(!MemoryPoolHelper::isSystemMemoryPool(allocationInfos[defaultRootDeviceIndex].memory->getMemoryPool())); } @@ -260,7 +278,7 @@ Image *Image::create(Context *context, setImageProperties(image, *imageDesc, imgInfo, parentImage, parentBuffer, hostPtrRowPitch, hostPtrSlicePitch, imageCount, hostPtrMinSize); errcodeRet = CL_SUCCESS; - auto &defaultHwInfo = defaultClDevice->getHardwareInfo(); + auto &defaultHwInfo = defaultDevice->getHardwareInfo(); if (context->isProvidingPerformanceHints()) { auto &allocationInfo = allocationInfos[defaultRootDeviceIndex]; @@ -281,7 +299,7 @@ Image *Image::create(Context *context, copyRegion = {imageWidth, imageCount, 1}; } - auto &defaultGfxCoreHelper = defaultClDevice->getGfxCoreHelper(); + auto &defaultGfxCoreHelper = defaultDevice->getGfxCoreHelper(); auto allocationInSystemMemory = MemoryPoolHelper::isSystemMemoryPool(memory->getMemoryPool()); bool isCpuTransferPreferred = imgInfo.linearStorage && defaultGfxCoreHelper.isCpuImageTransferPreferred(defaultHwInfo); bool isCpuTransferPreferredInSystemMemory = imgInfo.linearStorage && allocationInSystemMemory; diff --git a/opencl/test/unit_test/helpers/cl_memory_properties_helpers_tests.cpp b/opencl/test/unit_test/helpers/cl_memory_properties_helpers_tests.cpp index ae3e5c5003..1c6255b7e6 100644 --- a/opencl/test/unit_test/helpers/cl_memory_properties_helpers_tests.cpp +++ b/opencl/test/unit_test/helpers/cl_memory_properties_helpers_tests.cpp @@ -428,6 +428,46 @@ TEST_F(MemoryPropertiesHelperTests, givenDmaBufWhenParsePropertiesThenHandleIsSe EXPECT_EQ(memoryProperties.handle, 0x1234u); } +TEST_F(MemoryPropertiesHelperTests, givenDeviceHandleListWhenParsePropertiesThenAssociatedDevicesAreSet) { + auto clDevice = context.getDevice(0); + auto clDevice2 = context.getDevice(1); + cl_device_id deviceId = clDevice; + cl_device_id deviceId2 = clDevice2; + + cl_mem_properties_intel properties[] = { + CL_DEVICE_HANDLE_LIST_KHR, + reinterpret_cast(deviceId), + reinterpret_cast(deviceId2), + CL_DEVICE_HANDLE_LIST_END_KHR, + 0}; + + EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, + ClMemoryPropertiesHelper::ObjType::BUFFER, context)); + + EXPECT_EQ(memoryProperties.associatedDevices[0], &clDevice->getDevice()); + EXPECT_EQ(memoryProperties.associatedDevices[1], &clDevice2->getDevice()); +} + +TEST_F(MemoryPropertiesHelperTests, givenDeviceHandleListWhenParsePropertiesThenAssociatedDevicesAreNotSet) { + cl_mem_properties_intel properties[] = { + CL_DEVICE_HANDLE_LIST_KHR, + reinterpret_cast(&context), + CL_DEVICE_HANDLE_LIST_END_KHR, + 0}; + + EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, + ClMemoryPropertiesHelper::ObjType::BUFFER, context)); + + HardwareInfo hwInfo = *defaultHwInfo; + auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = clDevice.get(); + + properties[1] = reinterpret_cast(deviceId); + + EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, + ClMemoryPropertiesHelper::ObjType::BUFFER, context)); +} + TEST_F(MemoryPropertiesHelperTests, WhenAdjustingDeviceBitfieldThenCorrectBitfieldIsReturned) { UltClDeviceFactory deviceFactory{2, 4}; auto memoryPropertiesRootDevice0 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.rootDevices[0]->getDevice()); diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index 7429bce235..ca19e2bdd5 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -619,6 +619,40 @@ TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInS } } +TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) { + MockDefaultContext context; + auto clDevice = context.getDevice(1); + auto clDevice2 = context.getDevice(2); + cl_device_id deviceId = clDevice; + cl_device_id deviceId2 = clDevice2; + + cl_mem_properties_intel properties[] = { + CL_DEVICE_HANDLE_LIST_KHR, + reinterpret_cast(deviceId), + reinterpret_cast(deviceId2), + CL_DEVICE_HANDLE_LIST_END_KHR, + 0}; + + cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; + cl_int retVal = CL_INVALID_VALUE; + MemoryProperties memoryProperties{}; + cl_mem_flags_intel flagsIntel = 0; + cl_mem_alloc_flags_intel allocflags = 0; + uint8_t data; + + ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, + ClMemoryPropertiesHelper::ObjType::BUFFER, context); + + Buffer *buffer = Buffer::create(&context, memoryProperties, flags, flagsIntel, 1, &data, retVal); + + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(buffer->getGraphicsAllocation(0), nullptr); + EXPECT_NE(buffer->getGraphicsAllocation(1), nullptr); + EXPECT_NE(buffer->getGraphicsAllocation(2), nullptr); + + clReleaseMemObject(buffer); +} + struct CompressedBuffersTests : public ::testing::Test { void SetUp() override { ExecutionEnvironment *executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u); diff --git a/opencl/test/unit_test/mem_obj/image_tests.cpp b/opencl/test/unit_test/mem_obj/image_tests.cpp index e4bf82510c..1db6d76119 100644 --- a/opencl/test/unit_test/mem_obj/image_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_tests.cpp @@ -1803,6 +1803,77 @@ TEST(ImageTest, givenMultiDeviceEnvironmentWhenReleaseImageFromBufferThenMainBuf buffer->release(); } +TEST(ImageTest, givenPropertiesWithClDeviceHandleListKHRWhenCreateImageThenCorrectImageIsSet) { + MockDefaultContext context(1); + auto clDevice = context.getDevice(1); + auto clDevice2 = context.getDevice(2); + cl_device_id deviceId = clDevice; + cl_device_id deviceId2 = clDevice2; + + cl_mem_properties_intel properties[] = { + CL_DEVICE_HANDLE_LIST_KHR, + reinterpret_cast(deviceId), + reinterpret_cast(deviceId2), + CL_DEVICE_HANDLE_LIST_END_KHR, + 0}; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.ForceLinearImages.set(true); + + cl_image_format imageFormat; + cl_image_desc imageDesc; + cl_int retVal; + + char hostPtr[elementSize * 2 + 64]{}; + + imageFormat.image_channel_data_type = channelType; + imageFormat.image_channel_order = channelOrder; + + imageDesc.num_mip_levels = 0; + imageDesc.num_samples = 0; + imageDesc.mem_object = NULL; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + imageDesc.image_width = 1; + imageDesc.image_height = 0; + imageDesc.image_depth = 0; + imageDesc.image_array_size = 0; + imageDesc.image_row_pitch = 0; + imageDesc.image_slice_pitch = 0; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; + cl_mem_flags_intel flagsIntel = 0; + cl_mem_alloc_flags_intel allocflags = 0; + MemoryProperties memoryProperties{}; + + ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, + ClMemoryPropertiesHelper::ObjType::IMAGE, context); + + auto surfaceFormat = Image::getSurfaceFormatFromTable( + flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); + + auto image = Image::create( + &context, + memoryProperties, + flags, + 0, + surfaceFormat, + &imageDesc, + hostPtr, + retVal); + + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(nullptr, image); + + EXPECT_EQ(image->getGraphicsAllocation(0), nullptr); + EXPECT_NE(image->getGraphicsAllocation(1), nullptr); + EXPECT_NE(image->getGraphicsAllocation(2), nullptr); + + EXPECT_EQ(static_cast(elementSize), image->getHostPtrRowPitch()); + EXPECT_EQ(0u, image->getHostPtrSlicePitch()); + + delete image; +} + using MultiRootDeviceImageTest = ::testing::Test; HWTEST2_F(MultiRootDeviceImageTest, givenHostPtrToCopyWhenImageIsCreatedWithMultiStorageThenMemoryIsPutInFirstDeviceInContext, IsAtLeastGen12lp) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); diff --git a/shared/source/memory_properties/memory_properties_flags.h b/shared/source/memory_properties/memory_properties_flags.h index e231c12218..dff828f36e 100644 --- a/shared/source/memory_properties/memory_properties_flags.h +++ b/shared/source/memory_properties/memory_properties_flags.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,6 +17,7 @@ struct MemoryProperties { uint64_t handleType = 0; uintptr_t hostptr = 0; const Device *pDevice = nullptr; + std::vector associatedDevices; uint32_t memCacheClos = 0; union { MemoryFlags flags;