Do not align down pointer passed to hostPtr allocation

- do not align up hostPtr allocation size
- align BaseAddress programmed in SurfaceState to DWORD

Change-Id: Ic6d02e53fd13dda881f8eb845a131bffe4deb45c
This commit is contained in:
Hoppe, Mateusz
2019-01-03 16:18:53 +01:00
committed by sys_ocldev
parent acc5e87b40
commit cbc4d349a8
9 changed files with 172 additions and 67 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
MemObjSurface bufferSurf(buffer);
HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset);
HostPtrSurface hostPtrSurf(dstPtr, size);
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
if (size != 0) {
@@ -105,8 +105,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
hostPtrSurf.getAllocation()->allocationOffset += dstPtrOffset;
}
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
}
HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true);
HostPtrSurface hostPtrSurf(srcPtr, size, true);
MemObjSurface bufferSurf(buffer);
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
@@ -106,8 +106,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
if (!status) {
return CL_OUT_OF_RESOURCES;
}
hostPtrSurf.getAllocation()->allocationOffset += srcPtrOffset;
}
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -33,7 +33,15 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
auto gmmHelper = executionEnvironment->getGmmHelper();
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
auto surfaceSize = alignUp(getSize(), 4);
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
bufferAddress += this->offset;
auto bufferAddressAligned = alignDown(bufferAddress, 4);
auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned);
auto surfaceSize = alignUp(getSize() + bufferOffset, 4);
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
@@ -42,10 +50,6 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
surfaceState->setHeight(Length.SurfaceState.Height + 1);
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
bufferAddress += this->offset;
auto bufferSize = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getUnderlyingBufferSize() : getSize();
if (bufferAddress != 0) {
@@ -67,7 +71,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
}
surfaceState->setSurfaceBaseAddress(bufferAddress);
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -327,30 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
EXPECT_FALSE(csr.disableL3Cache);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
void *ptr = (void *)0x1039;
cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto allocation = csr.getTemporaryAllocations().peekHead();
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
allocation = allocation->next;
}
ASSERT_NE(allocation, nullptr);
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -434,27 +434,3 @@ HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocatio
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenWriteBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
void *ptr = (void *)0x1039;
cl_int retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto allocation = csr.getTemporaryAllocations().peekHead();
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
allocation = allocation->next;
}
ASSERT_NE(allocation, nullptr);
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
}

View File

@@ -1345,6 +1345,29 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT == surfaceState.getCoherencyType());
}
HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
RENDER_SURFACE_STATE surfaceState = {};
MockContext context;
void *svmPtr = reinterpret_cast<void *>(0x1005);
Buffer::setSurfaceState(device.get(),
&surfaceState,
5,
svmPtr,
nullptr,
0);
EXPECT_EQ(0x1004u, surfaceState.getSurfaceBaseAddress());
SURFACE_STATE_BUFFER_LENGTH length = {};
length.SurfaceState.Width = surfaceState.getWidth() - 1;
length.SurfaceState.Height = surfaceState.getHeight() - 1;
length.SurfaceState.Depth = surfaceState.getDepth() - 1;
EXPECT_EQ(alignUp(5u, 4u), length.Length + 1);
}
struct BufferUnmapTest : public DeviceFixture, public ::testing::Test {
void SetUp() override {
DeviceFixture::SetUp();

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2017-2018 Intel Corporation
# Copyright (C) 2017-2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -10,3 +10,4 @@ set(IGDRCL_SRCS_tests_scenarios
${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp
)
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios})
add_subdirectories()

View File

@@ -0,0 +1,13 @@
#
# Copyright (C) 2017-2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(IGDRCL_SRCS_tests_scenarios_windows
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp
)
if(WIN32)
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows})
endif()

View File

@@ -0,0 +1,116 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/wddm_device_command_stream.h"
#include "unit_tests/fixtures/buffer_fixture.h"
#include "unit_tests/helpers/hw_info_helper.h"
#include "unit_tests/helpers/execution_environment_helper.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_device.h"
#include "unit_tests/os_interface/windows/mock_wddm_memory_manager.h"
#include "test.h"
using namespace OCLRT;
struct EnqueueBufferWindowsTest : public HardwareParse,
public ::testing::Test {
EnqueueBufferWindowsTest(void)
: buffer(nullptr) {
}
void SetUp() override {
executionEnvironment = getExecutionEnvironmentImpl(hwInfo);
}
void TearDown() override {
buffer.reset(nullptr);
}
template <typename FamilyType>
void initializeFixture() {
auto wddmCsr = new WddmCommandStreamReceiver<FamilyType>(*hwInfo, *executionEnvironment);
executionEnvironment->commandStreamReceivers.resize(1);
executionEnvironment->commandStreamReceivers[0][0].reset(wddmCsr);
memoryManager = new MockWddmMemoryManager(executionEnvironment->osInterface->get()->getWddm(), *executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(hwInfo, executionEnvironment, 0));
context = std::make_unique<MockContext>(device.get());
const size_t bufferMisalignment = 1;
const size_t bufferSize = 16;
bufferMemory = std::make_unique<uint32_t[]>(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t)));
cl_int retVal = 0;
buffer.reset(Buffer::create(context.get(),
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
bufferSize,
reinterpret_cast<char *>(bufferMemory.get()) + bufferMisalignment,
retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
}
protected:
HwInfoHelper hwInfoHelper;
HardwareInfo *hwInfo = nullptr;
ExecutionEnvironment *executionEnvironment;
cl_queue_properties properties = {};
std::unique_ptr<uint32_t[]> bufferMemory;
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
std::unique_ptr<Buffer> buffer;
MockWddmMemoryManager *memoryManager = nullptr;
};
HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) {
initializeFixture<FamilyType>();
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), &properties);
uint32_t memory[2] = {};
char *misalignedPtr = reinterpret_cast<char *>(memory) + 1;
buffer->forceDisallowCPUCopy = true;
auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 4, misalignedPtr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size());
Kernel *kernel = cmdQ->lastEnqueuedKernels[0];
parseCommands<FamilyType>(*cmdQ);
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
const auto &surfaceStateDst = getSurfaceState<FamilyType>(1);
if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), *pKernelArg);
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
} else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), static_cast<uint64_t>(*pKernelArg));
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
}
}
if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() +
kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset);
EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset);
} else {
EXPECT_TRUE(false);
}
}