Fixes for misaligned hostPtr enqueueReadWrite

- use getGpuAddress for BuiltinOpParams
- fix read/writeImage

Change-Id: I2e6e9a1d91871fa9f22851f31eb5a7b337b5aecc
This commit is contained in:
Hoppe, Mateusz
2019-01-10 15:38:56 +01:00
committed by sys_ocldev
parent 3c59bae5a4
commit 64ff9d30b7
11 changed files with 275 additions and 25 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -476,6 +476,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToImage3d> : public BuiltinDisp
// Determine size of host ptr surface for residency purposes
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
hostPtrSize += operationParams.srcOffset.x;
// Set-up kernel
auto bytesExponent = Math::log2(bytesPerPixel);
@@ -562,6 +563,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDisp
// Determine size of host ptr surface for residency purposes
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
hostPtrSize += operationParams.dstOffset.x;
// Set-up ISA
auto bytesExponent = Math::log2(bytesPerPixel);

View File

@@ -88,13 +88,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
BuiltInOwnershipWrapper builtInLock(builder, this->context);
void *dstPtr = ptr;
void *alignedDstPtr = dstPtr;
size_t dstPtrOffset = 0;
if (!isAligned<4>(dstPtr)) {
alignedDstPtr = alignDown(dstPtr, 4);
dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
}
MemObjSurface bufferSurf(buffer);
HostPtrSurface hostPtrSurf(dstPtr, size);
@@ -105,8 +98,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedDstPtr = alignDown(dstPtr, 4);
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
dc.dstPtr = alignedDstPtr;
dc.dstOffset = {dstPtrOffset, 0, 0};

View File

@@ -80,7 +80,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedDstPtr = alignDown(dstPtr, 4);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -86,12 +86,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedDstPtr = alignDown(dstPtr, 4);
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
dc.srcMemObj = srcImage;
dc.dstPtr = dstPtr;
dc.dstPtr = alignedDstPtr;
dc.dstOffset.x = dstPtrOffset;
dc.srcOffset = origin;
dc.size = region;
dc.srcRowPitch = inputRowPitch;

View File

@@ -89,13 +89,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
BuiltInOwnershipWrapper builtInLock(builder, this->context);
void *srcPtr = const_cast<void *>(ptr);
void *alignedSrcPtr = srcPtr;
size_t srcPtrOffset = 0;
if (!isAligned<4>(srcPtr)) {
alignedSrcPtr = alignDown(srcPtr, 4);
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
}
HostPtrSurface hostPtrSurf(srcPtr, size, true);
MemObjSurface bufferSurf(buffer);
@@ -106,8 +99,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedSrcPtr = alignDown(srcPtr, 4);
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
dc.srcPtr = alignedSrcPtr;
dc.srcOffset = {srcPtrOffset, 0, 0};

View File

@@ -79,7 +79,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedSrcPtr = alignDown(srcPtr, 4);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -80,11 +80,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedSrcPtr = alignDown(srcPtr, 4);
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
dc.srcPtr = srcPtr;
dc.srcPtr = alignedSrcPtr;
dc.srcOffset.x = srcPtrOffset;
dc.dstMemObj = dstImage;
dc.dstOffset = origin;
dc.size = region;

View File

@@ -18,6 +18,7 @@ target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_image_aub_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_buffer_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_image_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_with_timestamp_packet_aub_tests.cpp

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,6 +12,7 @@
#include "runtime/mem_obj/image.h"
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h"
#include "unit_tests/mocks/mock_context.h"
#include "test.h"
@@ -222,3 +223,19 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values( // channels
CL_R, CL_RG, CL_RGBA),
::testing::ValuesIn(readImageParams)));
using AUBReadImageUnaligned = AUBImageUnaligned;
HWTEST_F(AUBReadImageUnaligned, misalignedHostPtr) {
const std::vector<size_t> pixelSizes = {1, 2, 4};
const std::vector<size_t> offsets = {0, 1, 2, 3};
const std::vector<size_t> sizes = {3, 2, 1};
for (auto pixelSize : pixelSizes) {
for (auto offset : offsets) {
for (auto size : sizes) {
testReadImageUnaligned<FamilyType>(offset, size, pixelSize);
}
}
}
}

View File

@@ -0,0 +1,211 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/mem_obj/image.h"
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_context.h"
#include "test.h"
using namespace OCLRT;
struct AUBImageUnaligned
: public CommandEnqueueAUBFixture,
public ::testing::Test {
void SetUp() override {
CommandEnqueueAUBFixture::SetUp();
}
void TearDown() override {
CommandEnqueueAUBFixture::TearDown();
}
template <typename FamilyType>
void testReadImageUnaligned(size_t offset, size_t size, size_t pixelSize) {
MockContext context(&pCmdQ->getDevice());
char srcMemory[] = "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz";
const auto bufferSize = sizeof(srcMemory) - 1;
char *imageMemory = &srcMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image
void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize);
memset(dstMemory, 0, bufferSize);
char referenceMemory[bufferSize] = {0};
const size_t testWidth = bufferSize / 4 / pixelSize;
const size_t testHeight = 4;
const size_t testDepth = 1;
cl_image_format imageFormat;
cl_image_desc imageDesc;
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
switch (pixelSize) {
case 1:
imageFormat.image_channel_order = CL_R;
break;
case 2:
imageFormat.image_channel_order = CL_RG;
break;
case 3:
ASSERT_TRUE(false);
break;
case 4:
imageFormat.image_channel_order = CL_RGBA;
break;
}
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = testWidth;
imageDesc.image_height = testHeight;
imageDesc.image_depth = testDepth;
imageDesc.image_array_size = 1;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.mem_object = NULL;
auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize);
auto dstMemoryGPUPtr = reinterpret_cast<char *>(graphicsAllocation->getGpuAddress());
cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
auto retVal = CL_INVALID_VALUE;
auto image = std::unique_ptr<Image>(Image::create(
&context,
flags,
surfaceFormat,
&imageDesc,
imageMemory,
retVal));
ASSERT_NE(nullptr, image);
EXPECT_FALSE(image->isMemObjZeroCopy());
const size_t origin[3] = {0, 1, 0};
const size_t region[3] = {size, 1, 1};
size_t inputRowPitch = testWidth;
size_t inputSlicePitch = inputRowPitch * testHeight;
retVal = pCmdQ->enqueueReadImage(
image.get(),
CL_TRUE,
origin,
region,
inputRowPitch,
inputSlicePitch,
ptrOffset(dstMemory, offset),
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
pCmdQ->finish(true);
AUBCommandStreamFixture::expectMemory<FamilyType>(dstMemoryGPUPtr, referenceMemory, offset);
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, offset), &imageMemory[inputRowPitch * origin[1] * pixelSize], size * pixelSize);
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, size * pixelSize + offset), referenceMemory, bufferSize - offset - size * pixelSize);
alignedFree(dstMemory);
}
template <typename FamilyType>
void testWriteImageUnaligned(size_t offset, size_t size, size_t pixelSize) {
DebugManagerStateRestore restorer;
DebugManager.flags.ForceLinearImages.set(true);
MockContext context(&pCmdQ->getDevice());
char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz";
const auto bufferSize = sizeof(srcMemory);
char dstMemory[bufferSize + 1] = {0};
char *imageMemory = &dstMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image
char referenceMemory[bufferSize] = {0};
const size_t testWidth = bufferSize / 4 / pixelSize;
const size_t testHeight = 4;
const size_t testDepth = 1;
cl_image_format imageFormat;
cl_image_desc imageDesc;
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
switch (pixelSize) {
case 1:
imageFormat.image_channel_order = CL_R;
break;
case 2:
imageFormat.image_channel_order = CL_RG;
break;
case 3:
ASSERT_TRUE(false);
break;
case 4:
imageFormat.image_channel_order = CL_RGBA;
break;
}
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = testWidth;
imageDesc.image_height = testHeight;
imageDesc.image_depth = testDepth;
imageDesc.image_array_size = 1;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.mem_object = NULL;
cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
auto retVal = CL_INVALID_VALUE;
auto image = std::unique_ptr<Image>(Image::create(
&context,
flags,
surfaceFormat,
&imageDesc,
imageMemory,
retVal));
ASSERT_NE(nullptr, image);
EXPECT_FALSE(image->isMemObjZeroCopy());
auto dstMemoryGPUPtr = reinterpret_cast<char *>(image->getGraphicsAllocation()->getGpuAddress());
const size_t origin[3] = {0, 1, 0}; // write first row
const size_t region[3] = {size, 1, 1}; // write only "size" number of pixels
size_t inputRowPitch = testWidth;
size_t inputSlicePitch = inputRowPitch * testHeight;
retVal = pCmdQ->enqueueWriteImage(
image.get(),
CL_TRUE,
origin,
region,
inputRowPitch,
inputSlicePitch,
ptrOffset(srcMemory, offset),
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
pCmdQ->finish(true);
auto imageRowPitch = image->getImageDesc().image_row_pitch;
AUBCommandStreamFixture::expectMemory<FamilyType>(dstMemoryGPUPtr, referenceMemory, inputRowPitch * pixelSize); // validate zero row is not written
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, imageRowPitch), &srcMemory[offset], size * pixelSize); // validate first row is written,
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, imageRowPitch + size * pixelSize), referenceMemory, (inputRowPitch - size) * pixelSize); // only size number of pixels, with correct data
for (uint32_t row = 2; row < testHeight; row++) {
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, row * imageRowPitch), referenceMemory, inputRowPitch * pixelSize); // next image rows shouldn;t be modified
}
}
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -11,6 +11,7 @@
#include "runtime/mem_obj/image.h"
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h"
#include "unit_tests/mocks/mock_context.h"
#include "test.h"
@@ -214,3 +215,19 @@ INSTANTIATE_TEST_CASE_P(AUBWriteImage_simple, AUBWriteImage,
::testing::Values( // channels
CL_R, CL_RG, CL_RGBA),
::testing::ValuesIn(writeImageParams)));
using AUBWriteImageUnaligned = AUBImageUnaligned;
HWTEST_F(AUBWriteImageUnaligned, misalignedHostPtr) {
const std::vector<size_t> pixelSizes = {1, 2, 4};
const std::vector<size_t> offsets = {0, 1, 2, 3};
const std::vector<size_t> sizes = {3, 2, 1};
for (auto pixelSize : pixelSizes) {
for (auto offset : offsets) {
for (auto size : sizes) {
testWriteImageUnaligned<FamilyType>(offset, size, pixelSize);
}
}
}
}