mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Fixes for misaligned hostPtr enqueueReadWrite
- use getGpuAddress for BuiltinOpParams - fix read/writeImage Change-Id: I2e6e9a1d91871fa9f22851f31eb5a7b337b5aecc
This commit is contained in:
committed by
sys_ocldev
parent
3c59bae5a4
commit
64ff9d30b7
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -476,6 +476,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToImage3d> : public BuiltinDisp
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
|
||||
hostPtrSize += operationParams.srcOffset.x;
|
||||
|
||||
// Set-up kernel
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
@@ -562,6 +563,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDisp
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
|
||||
hostPtrSize += operationParams.dstOffset.x;
|
||||
|
||||
// Set-up ISA
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
|
||||
@@ -88,13 +88,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
void *dstPtr = ptr;
|
||||
void *alignedDstPtr = dstPtr;
|
||||
size_t dstPtrOffset = 0;
|
||||
|
||||
if (!isAligned<4>(dstPtr)) {
|
||||
alignedDstPtr = alignDown(dstPtr, 4);
|
||||
dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
|
||||
}
|
||||
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(dstPtr, size);
|
||||
@@ -105,8 +98,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedDstPtr = alignDown(dstPtr, 4);
|
||||
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.dstPtr = alignedDstPtr;
|
||||
dc.dstOffset = {dstPtrOffset, 0, 0};
|
||||
|
||||
@@ -80,7 +80,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedDstPtr = alignDown(dstPtr, 4);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -86,12 +86,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedDstPtr = alignDown(dstPtr, 4);
|
||||
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstPtr = dstPtr;
|
||||
dc.dstPtr = alignedDstPtr;
|
||||
dc.dstOffset.x = dstPtrOffset;
|
||||
dc.srcOffset = origin;
|
||||
dc.size = region;
|
||||
dc.srcRowPitch = inputRowPitch;
|
||||
|
||||
@@ -89,13 +89,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
void *srcPtr = const_cast<void *>(ptr);
|
||||
void *alignedSrcPtr = srcPtr;
|
||||
size_t srcPtrOffset = 0;
|
||||
|
||||
if (!isAligned<4>(srcPtr)) {
|
||||
alignedSrcPtr = alignDown(srcPtr, 4);
|
||||
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
|
||||
}
|
||||
|
||||
HostPtrSurface hostPtrSurf(srcPtr, size, true);
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
@@ -106,8 +99,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedSrcPtr = alignDown(srcPtr, 4);
|
||||
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcPtr = alignedSrcPtr;
|
||||
dc.srcOffset = {srcPtrOffset, 0, 0};
|
||||
|
||||
@@ -79,7 +79,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedSrcPtr = alignDown(srcPtr, 4);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -80,11 +80,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedSrcPtr = alignDown(srcPtr, 4);
|
||||
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.srcPtr = alignedSrcPtr;
|
||||
dc.srcOffset.x = srcPtrOffset;
|
||||
dc.dstMemObj = dstImage;
|
||||
dc.dstOffset = origin;
|
||||
dc.size = region;
|
||||
|
||||
@@ -18,6 +18,7 @@ target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_image_aub_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_buffer_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_image_aub_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_with_timestamp_packet_aub_tests.cpp
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
|
||||
#include "unit_tests/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
#include "test.h"
|
||||
|
||||
@@ -222,3 +223,19 @@ INSTANTIATE_TEST_CASE_P(
|
||||
::testing::Values( // channels
|
||||
CL_R, CL_RG, CL_RGBA),
|
||||
::testing::ValuesIn(readImageParams)));
|
||||
|
||||
using AUBReadImageUnaligned = AUBImageUnaligned;
|
||||
|
||||
HWTEST_F(AUBReadImageUnaligned, misalignedHostPtr) {
|
||||
const std::vector<size_t> pixelSizes = {1, 2, 4};
|
||||
const std::vector<size_t> offsets = {0, 1, 2, 3};
|
||||
const std::vector<size_t> sizes = {3, 2, 1};
|
||||
|
||||
for (auto pixelSize : pixelSizes) {
|
||||
for (auto offset : offsets) {
|
||||
for (auto size : sizes) {
|
||||
testReadImageUnaligned<FamilyType>(offset, size, pixelSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
struct AUBImageUnaligned
|
||||
: public CommandEnqueueAUBFixture,
|
||||
public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
CommandEnqueueAUBFixture::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
CommandEnqueueAUBFixture::TearDown();
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void testReadImageUnaligned(size_t offset, size_t size, size_t pixelSize) {
|
||||
MockContext context(&pCmdQ->getDevice());
|
||||
|
||||
char srcMemory[] = "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz";
|
||||
const auto bufferSize = sizeof(srcMemory) - 1;
|
||||
char *imageMemory = &srcMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image
|
||||
void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize);
|
||||
memset(dstMemory, 0, bufferSize);
|
||||
char referenceMemory[bufferSize] = {0};
|
||||
|
||||
const size_t testWidth = bufferSize / 4 / pixelSize;
|
||||
const size_t testHeight = 4;
|
||||
const size_t testDepth = 1;
|
||||
|
||||
cl_image_format imageFormat;
|
||||
cl_image_desc imageDesc;
|
||||
|
||||
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
switch (pixelSize) {
|
||||
case 1:
|
||||
imageFormat.image_channel_order = CL_R;
|
||||
break;
|
||||
case 2:
|
||||
imageFormat.image_channel_order = CL_RG;
|
||||
break;
|
||||
case 3:
|
||||
ASSERT_TRUE(false);
|
||||
break;
|
||||
case 4:
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
break;
|
||||
}
|
||||
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
imageDesc.image_width = testWidth;
|
||||
imageDesc.image_height = testHeight;
|
||||
imageDesc.image_depth = testDepth;
|
||||
imageDesc.image_array_size = 1;
|
||||
imageDesc.image_row_pitch = 0;
|
||||
imageDesc.image_slice_pitch = 0;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.num_samples = 0;
|
||||
imageDesc.mem_object = NULL;
|
||||
|
||||
auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize);
|
||||
auto dstMemoryGPUPtr = reinterpret_cast<char *>(graphicsAllocation->getGpuAddress());
|
||||
|
||||
cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE;
|
||||
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
|
||||
auto image = std::unique_ptr<Image>(Image::create(
|
||||
&context,
|
||||
flags,
|
||||
surfaceFormat,
|
||||
&imageDesc,
|
||||
imageMemory,
|
||||
retVal));
|
||||
ASSERT_NE(nullptr, image);
|
||||
EXPECT_FALSE(image->isMemObjZeroCopy());
|
||||
|
||||
const size_t origin[3] = {0, 1, 0};
|
||||
const size_t region[3] = {size, 1, 1};
|
||||
|
||||
size_t inputRowPitch = testWidth;
|
||||
size_t inputSlicePitch = inputRowPitch * testHeight;
|
||||
|
||||
retVal = pCmdQ->enqueueReadImage(
|
||||
image.get(),
|
||||
CL_TRUE,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptrOffset(dstMemory, offset),
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->finish(true);
|
||||
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(dstMemoryGPUPtr, referenceMemory, offset);
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, offset), &imageMemory[inputRowPitch * origin[1] * pixelSize], size * pixelSize);
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, size * pixelSize + offset), referenceMemory, bufferSize - offset - size * pixelSize);
|
||||
alignedFree(dstMemory);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void testWriteImageUnaligned(size_t offset, size_t size, size_t pixelSize) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ForceLinearImages.set(true);
|
||||
MockContext context(&pCmdQ->getDevice());
|
||||
|
||||
char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz";
|
||||
const auto bufferSize = sizeof(srcMemory);
|
||||
char dstMemory[bufferSize + 1] = {0};
|
||||
char *imageMemory = &dstMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image
|
||||
char referenceMemory[bufferSize] = {0};
|
||||
|
||||
const size_t testWidth = bufferSize / 4 / pixelSize;
|
||||
const size_t testHeight = 4;
|
||||
const size_t testDepth = 1;
|
||||
|
||||
cl_image_format imageFormat;
|
||||
cl_image_desc imageDesc;
|
||||
|
||||
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
switch (pixelSize) {
|
||||
case 1:
|
||||
imageFormat.image_channel_order = CL_R;
|
||||
break;
|
||||
case 2:
|
||||
imageFormat.image_channel_order = CL_RG;
|
||||
break;
|
||||
case 3:
|
||||
ASSERT_TRUE(false);
|
||||
break;
|
||||
case 4:
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
break;
|
||||
}
|
||||
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
imageDesc.image_width = testWidth;
|
||||
imageDesc.image_height = testHeight;
|
||||
imageDesc.image_depth = testDepth;
|
||||
imageDesc.image_array_size = 1;
|
||||
imageDesc.image_row_pitch = 0;
|
||||
imageDesc.image_slice_pitch = 0;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.num_samples = 0;
|
||||
imageDesc.mem_object = NULL;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE;
|
||||
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
|
||||
auto image = std::unique_ptr<Image>(Image::create(
|
||||
&context,
|
||||
flags,
|
||||
surfaceFormat,
|
||||
&imageDesc,
|
||||
imageMemory,
|
||||
retVal));
|
||||
ASSERT_NE(nullptr, image);
|
||||
EXPECT_FALSE(image->isMemObjZeroCopy());
|
||||
|
||||
auto dstMemoryGPUPtr = reinterpret_cast<char *>(image->getGraphicsAllocation()->getGpuAddress());
|
||||
|
||||
const size_t origin[3] = {0, 1, 0}; // write first row
|
||||
const size_t region[3] = {size, 1, 1}; // write only "size" number of pixels
|
||||
|
||||
size_t inputRowPitch = testWidth;
|
||||
size_t inputSlicePitch = inputRowPitch * testHeight;
|
||||
|
||||
retVal = pCmdQ->enqueueWriteImage(
|
||||
image.get(),
|
||||
CL_TRUE,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptrOffset(srcMemory, offset),
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
pCmdQ->finish(true);
|
||||
|
||||
auto imageRowPitch = image->getImageDesc().image_row_pitch;
|
||||
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(dstMemoryGPUPtr, referenceMemory, inputRowPitch * pixelSize); // validate zero row is not written
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, imageRowPitch), &srcMemory[offset], size * pixelSize); // validate first row is written,
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, imageRowPitch + size * pixelSize), referenceMemory, (inputRowPitch - size) * pixelSize); // only size number of pixels, with correct data
|
||||
for (uint32_t row = 2; row < testHeight; row++) {
|
||||
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, row * imageRowPitch), referenceMemory, inputRowPitch * pixelSize); // next image rows shouldn;t be modified
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
|
||||
#include "unit_tests/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
#include "test.h"
|
||||
|
||||
@@ -214,3 +215,19 @@ INSTANTIATE_TEST_CASE_P(AUBWriteImage_simple, AUBWriteImage,
|
||||
::testing::Values( // channels
|
||||
CL_R, CL_RG, CL_RGBA),
|
||||
::testing::ValuesIn(writeImageParams)));
|
||||
|
||||
using AUBWriteImageUnaligned = AUBImageUnaligned;
|
||||
|
||||
HWTEST_F(AUBWriteImageUnaligned, misalignedHostPtr) {
|
||||
const std::vector<size_t> pixelSizes = {1, 2, 4};
|
||||
const std::vector<size_t> offsets = {0, 1, 2, 3};
|
||||
const std::vector<size_t> sizes = {3, 2, 1};
|
||||
|
||||
for (auto pixelSize : pixelSizes) {
|
||||
for (auto offset : offsets) {
|
||||
for (auto size : sizes) {
|
||||
testWriteImageUnaligned<FamilyType>(offset, size, pixelSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user