fix OCL: set correct memory location after initial transfer to image

when multi-storage image is initialized with memory then we need to track
location of actual memory

Related-To: NEO-5735
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2023-02-02 17:22:51 +00:00 committed by Compute-Runtime-Automation
parent d0c0c60205
commit 078224d400
2 changed files with 110 additions and 49 deletions

View File

@ -21,6 +21,7 @@
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/migration_sync_data.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/cl_device/cl_device_get_cap.inl"
@ -152,7 +153,9 @@ Image *Image::create(Context *context,
: imageWidth * surfaceFormat->surfaceFormat.ImageElementSizeInBytes;
const auto hostPtrSlicePitch = getHostPtrSlicePitch(*imageDesc, hostPtrRowPitch, imageHeight);
auto &defaultGfxCoreHelper = context->getDevice(0)->getGfxCoreHelper();
auto defaultClDevice = context->getDevice(0);
auto defaultRootDeviceIndex = defaultClDevice->getRootDeviceIndex();
auto &defaultGfxCoreHelper = defaultClDevice->getGfxCoreHelper();
imgInfo.linearStorage = defaultGfxCoreHelper.isLinearStoragePreferred(context->isSharedContext, Image::isImage1d(*imageDesc),
memoryProperties.flags.forceLinearStorage);
@ -162,7 +165,7 @@ Image *Image::create(Context *context,
return nullptr;
}
auto &clGfxCoreHelper = context->getDevice(0)->getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
auto &clGfxCoreHelper = defaultClDevice->getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
bool preferCompression = MemObjHelper::isSuitableForCompression(!imgInfo.linearStorage, memoryProperties,
*context, true);
preferCompression &= clGfxCoreHelper.allowImageCompression(surfaceFormat->OCLImageFormat);
@ -247,7 +250,6 @@ Image *Image::create(Context *context,
multiGraphicsAllocation.addAllocation(allocationInfo.memory);
}
auto defaultRootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex();
multiGraphicsAllocation.setMultiStorage(context->getRootDeviceIndices().size() > 1);
Image *image = createImageHw(context, memoryProperties, flags, flagsIntel, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat,
@ -255,59 +257,60 @@ Image *Image::create(Context *context,
setImageProperties(image, *imageDesc, imgInfo, parentImage, parentBuffer, hostPtrRowPitch, hostPtrSlicePitch, imageCount, hostPtrMinSize);
// transfer Memory if needed
bool isMemoryTransferred = false;
errcodeRet = CL_SUCCESS;
auto &defaultHwInfo = defaultClDevice->getHardwareInfo();
if (context->isProvidingPerformanceHints()) {
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
errcodeRet = CL_SUCCESS;
auto &allocationInfo = allocationInfos[rootDeviceIndex];
auto &hwInfo = *memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
auto &allocationInfo = allocationInfos[defaultRootDeviceIndex];
if (context->isProvidingPerformanceHints()) {
providePerformanceHintForCreateImage(image, hwInfo, allocationInfo, context);
}
auto isMemoryTransferNeeded = !isMemoryTransferred && allocationInfo.transferNeeded;
if (isMemoryTransferNeeded) {
std::array<size_t, 3> copyOrigin = {{0, 0, 0}};
std::array<size_t, 3> copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}};
if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
copyRegion = {imageWidth, imageCount, 1};
}
auto allocationInSystemMemory = MemoryPoolHelper::isSystemMemoryPool(allocationInfo.memory->getMemoryPool());
bool isCpuTransferPreferred = imgInfo.linearStorage && defaultGfxCoreHelper.isCpuImageTransferPreferred(hwInfo);
bool isCpuTransferPreferredInSystemMemory = imgInfo.linearStorage && allocationInSystemMemory;
if (isCpuTransferPreferredInSystemMemory) {
void *pDestinationAddress = allocationInfo.memory->getUnderlyingBuffer();
image->transferData(pDestinationAddress, imgInfo.rowPitch, imgInfo.slicePitch,
const_cast<void *>(hostPtr), hostPtrRowPitch, hostPtrSlicePitch,
copyRegion, copyOrigin);
} else if (isCpuTransferPreferred) {
void *pDestinationAddress = context->getMemoryManager()->lockResource(allocationInfo.memory);
image->transferData(pDestinationAddress, imgInfo.rowPitch, imgInfo.slicePitch,
const_cast<void *>(hostPtr), hostPtrRowPitch, hostPtrSlicePitch,
copyRegion, copyOrigin);
context->getMemoryManager()->unlockResource(allocationInfo.memory);
} else {
auto cmdQ = context->getSpecialQueue(rootDeviceIndex);
if (isNV12Image(&image->getImageFormat())) {
errcodeRet = image->writeNV12Planes(hostPtr, hostPtrRowPitch, rootDeviceIndex);
} else {
errcodeRet = cmdQ->enqueueWriteImage(image, CL_TRUE, &copyOrigin[0], &copyRegion[0],
hostPtrRowPitch, hostPtrSlicePitch,
hostPtr, allocationInfo.mapAllocation, 0, nullptr, nullptr);
}
}
isMemoryTransferred = true;
}
providePerformanceHintForCreateImage(image, defaultHwInfo, allocationInfo, context);
}
for (auto &allocationInfo : allocationInfos) {
if (allocationInfo.mapAllocation) {
image->mapAllocations.addAllocation(allocationInfo.mapAllocation);
}
}
if (allocationInfos[defaultRootDeviceIndex].transferNeeded) {
auto memory = image->getGraphicsAllocation(defaultRootDeviceIndex);
std::array<size_t, 3> copyOrigin = {{0, 0, 0}};
std::array<size_t, 3> copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}};
if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
copyRegion = {imageWidth, imageCount, 1};
}
auto allocationInSystemMemory = MemoryPoolHelper::isSystemMemoryPool(memory->getMemoryPool());
bool isCpuTransferPreferred = imgInfo.linearStorage && defaultGfxCoreHelper.isCpuImageTransferPreferred(defaultHwInfo);
bool isCpuTransferPreferredInSystemMemory = imgInfo.linearStorage && allocationInSystemMemory;
if (isCpuTransferPreferredInSystemMemory) {
void *pDestinationAddress = memory->getUnderlyingBuffer();
image->transferData(pDestinationAddress, imgInfo.rowPitch, imgInfo.slicePitch,
const_cast<void *>(hostPtr), hostPtrRowPitch, hostPtrSlicePitch,
copyRegion, copyOrigin);
} else if (isCpuTransferPreferred) {
void *pDestinationAddress = context->getMemoryManager()->lockResource(memory);
image->transferData(pDestinationAddress, imgInfo.rowPitch, imgInfo.slicePitch,
const_cast<void *>(hostPtr), hostPtrRowPitch, hostPtrSlicePitch,
copyRegion, copyOrigin);
context->getMemoryManager()->unlockResource(memory);
} else {
auto cmdQ = context->getSpecialQueue(defaultRootDeviceIndex);
if (isNV12Image(&image->getImageFormat())) {
errcodeRet = image->writeNV12Planes(hostPtr, hostPtrRowPitch, defaultRootDeviceIndex);
} else {
errcodeRet = cmdQ->enqueueWriteImage(image, CL_TRUE, &copyOrigin[0], &copyRegion[0],
hostPtrRowPitch, hostPtrSlicePitch,
hostPtr, image->getMapAllocation(defaultRootDeviceIndex), 0, nullptr, nullptr);
}
}
auto migrationSyncData = image->getMultiGraphicsAllocation().getMigrationSyncData();
if (migrationSyncData) {
migrationSyncData->setCurrentLocation(defaultRootDeviceIndex);
}
}
if (imageFromBuffer) {
parentBuffer->incRefInternal();

View File

@ -10,6 +10,7 @@
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/image/image_surface_state.h"
#include "shared/source/memory_manager/migration_sync_data.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/fixtures/memory_management_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@ -1801,3 +1802,60 @@ TEST(ImageTest, givenMultiDeviceEnvironmentWhenReleaseImageFromBufferThenMainBuf
buffer->release();
}
TEST(ImageTest, givenHostPtrToCopyWhenImageIsCreatedWithMultiStorageThenMemoryIsPutInFirstDeviceInContext) {
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
cl_image_desc imageDesc{};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
imageDesc.image_width = 4;
imageDesc.image_height = 1;
imageDesc.image_row_pitch = 4;
cl_image_format imageFormat = {};
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
imageFormat.image_channel_order = CL_R;
UltClDeviceFactory deviceFactory{2, 0};
{
cl_device_id deviceIds[] = {
deviceFactory.rootDevices[0],
deviceFactory.rootDevices[1]};
MockContext context{nullptr, nullptr};
context.initializeWithDevices(ClDeviceVector{deviceIds, 2}, false);
uint32_t data{};
auto surfaceFormat = Image::getSurfaceFormatFromTable(
flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
std::unique_ptr<Image> image(
Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()),
flags, 0, surfaceFormat, &imageDesc, &data, retVal));
EXPECT_NE(nullptr, image);
EXPECT_EQ(2u, context.getRootDeviceIndices().size());
EXPECT_EQ(0u, image->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation());
}
{
cl_device_id deviceIds[] = {
deviceFactory.rootDevices[1],
deviceFactory.rootDevices[0]};
MockContext context{nullptr, nullptr};
context.initializeWithDevices(ClDeviceVector{deviceIds, 2}, false);
uint32_t data{};
auto surfaceFormat = Image::getSurfaceFormatFromTable(
flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
std::unique_ptr<Image> image(
Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()),
flags, 0, surfaceFormat, &imageDesc, &data, retVal));
EXPECT_NE(nullptr, image);
EXPECT_EQ(2u, context.getRootDeviceIndices().size());
EXPECT_EQ(1u, image->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation());
}
}