mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Do not align down pointer passed to hostPtr allocation
- do not align up hostPtr allocation size - align BaseAddress programmed in SurfaceState to DWORD Change-Id: Ic6d02e53fd13dda881f8eb845a131bffe4deb45c
This commit is contained in:
committed by
sys_ocldev
parent
acc5e87b40
commit
cbc4d349a8
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
}
|
||||
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset);
|
||||
HostPtrSurface hostPtrSurf(dstPtr, size);
|
||||
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
|
||||
|
||||
if (size != 0) {
|
||||
@@ -105,8 +105,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hostPtrSurf.getAllocation()->allocationOffset += dstPtrOffset;
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
|
||||
}
|
||||
|
||||
HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true);
|
||||
HostPtrSurface hostPtrSurf(srcPtr, size, true);
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
|
||||
|
||||
@@ -106,8 +106,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hostPtrSurf.getAllocation()->allocationOffset += srcPtrOffset;
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -33,7 +33,15 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
|
||||
|
||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
|
||||
auto surfaceSize = alignUp(getSize(), 4);
|
||||
|
||||
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
|
||||
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
|
||||
bufferAddress += this->offset;
|
||||
|
||||
auto bufferAddressAligned = alignDown(bufferAddress, 4);
|
||||
auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned);
|
||||
|
||||
auto surfaceSize = alignUp(getSize() + bufferOffset, 4);
|
||||
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
|
||||
@@ -42,10 +50,6 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
|
||||
surfaceState->setHeight(Length.SurfaceState.Height + 1);
|
||||
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
|
||||
|
||||
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
|
||||
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
|
||||
bufferAddress += this->offset;
|
||||
|
||||
auto bufferSize = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getUnderlyingBufferSize() : getSize();
|
||||
|
||||
if (bufferAddress != 0) {
|
||||
@@ -67,7 +71,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
}
|
||||
|
||||
surfaceState->setSurfaceBaseAddress(bufferAddress);
|
||||
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
|
||||
|
||||
Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -327,30 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
|
||||
void *ptr = (void *)0x1039;
|
||||
|
||||
cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto allocation = csr.getTemporaryAllocations().peekHead();
|
||||
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
|
||||
allocation = allocation->next;
|
||||
}
|
||||
|
||||
ASSERT_NE(allocation, nullptr);
|
||||
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -434,27 +434,3 @@ HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocatio
|
||||
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueWriteBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenWriteBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
|
||||
void *ptr = (void *)0x1039;
|
||||
|
||||
cl_int retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto allocation = csr.getTemporaryAllocations().peekHead();
|
||||
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
|
||||
allocation = allocation->next;
|
||||
}
|
||||
|
||||
ASSERT_NE(allocation, nullptr);
|
||||
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
|
||||
}
|
||||
|
||||
@@ -1345,6 +1345,29 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
|
||||
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT == surfaceState.getCoherencyType());
|
||||
}
|
||||
|
||||
HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
MockContext context;
|
||||
void *svmPtr = reinterpret_cast<void *>(0x1005);
|
||||
|
||||
Buffer::setSurfaceState(device.get(),
|
||||
&surfaceState,
|
||||
5,
|
||||
svmPtr,
|
||||
nullptr,
|
||||
0);
|
||||
|
||||
EXPECT_EQ(0x1004u, surfaceState.getSurfaceBaseAddress());
|
||||
SURFACE_STATE_BUFFER_LENGTH length = {};
|
||||
length.SurfaceState.Width = surfaceState.getWidth() - 1;
|
||||
length.SurfaceState.Height = surfaceState.getHeight() - 1;
|
||||
length.SurfaceState.Depth = surfaceState.getDepth() - 1;
|
||||
EXPECT_EQ(alignUp(5u, 4u), length.Length + 1);
|
||||
}
|
||||
|
||||
struct BufferUnmapTest : public DeviceFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2017-2018 Intel Corporation
|
||||
# Copyright (C) 2017-2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -10,3 +10,4 @@ set(IGDRCL_SRCS_tests_scenarios
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp
|
||||
)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios})
|
||||
add_subdirectories()
|
||||
13
unit_tests/scenarios/windows/CMakeLists.txt
Normal file
13
unit_tests/scenarios/windows/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
#
|
||||
# Copyright (C) 2017-2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(IGDRCL_SRCS_tests_scenarios_windows
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp
|
||||
)
|
||||
if(WIN32)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows})
|
||||
endif()
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/windows/os_interface.h"
|
||||
#include "runtime/os_interface/windows/wddm_device_command_stream.h"
|
||||
|
||||
#include "unit_tests/fixtures/buffer_fixture.h"
|
||||
#include "unit_tests/helpers/hw_info_helper.h"
|
||||
#include "unit_tests/helpers/execution_environment_helper.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
#include "unit_tests/mocks/mock_device.h"
|
||||
#include "unit_tests/os_interface/windows/mock_wddm_memory_manager.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
struct EnqueueBufferWindowsTest : public HardwareParse,
|
||||
public ::testing::Test {
|
||||
EnqueueBufferWindowsTest(void)
|
||||
: buffer(nullptr) {
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
executionEnvironment = getExecutionEnvironmentImpl(hwInfo);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
buffer.reset(nullptr);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void initializeFixture() {
|
||||
auto wddmCsr = new WddmCommandStreamReceiver<FamilyType>(*hwInfo, *executionEnvironment);
|
||||
|
||||
executionEnvironment->commandStreamReceivers.resize(1);
|
||||
executionEnvironment->commandStreamReceivers[0][0].reset(wddmCsr);
|
||||
|
||||
memoryManager = new MockWddmMemoryManager(executionEnvironment->osInterface->get()->getWddm(), *executionEnvironment);
|
||||
executionEnvironment->memoryManager.reset(memoryManager);
|
||||
|
||||
device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(hwInfo, executionEnvironment, 0));
|
||||
|
||||
context = std::make_unique<MockContext>(device.get());
|
||||
|
||||
const size_t bufferMisalignment = 1;
|
||||
const size_t bufferSize = 16;
|
||||
bufferMemory = std::make_unique<uint32_t[]>(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t)));
|
||||
cl_int retVal = 0;
|
||||
|
||||
buffer.reset(Buffer::create(context.get(),
|
||||
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
bufferSize,
|
||||
reinterpret_cast<char *>(bufferMemory.get()) + bufferMisalignment,
|
||||
retVal));
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
protected:
|
||||
HwInfoHelper hwInfoHelper;
|
||||
HardwareInfo *hwInfo = nullptr;
|
||||
ExecutionEnvironment *executionEnvironment;
|
||||
cl_queue_properties properties = {};
|
||||
std::unique_ptr<uint32_t[]> bufferMemory;
|
||||
std::unique_ptr<MockDevice> device;
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<Buffer> buffer;
|
||||
|
||||
MockWddmMemoryManager *memoryManager = nullptr;
|
||||
};
|
||||
|
||||
HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) {
|
||||
initializeFixture<FamilyType>();
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), &properties);
|
||||
uint32_t memory[2] = {};
|
||||
char *misalignedPtr = reinterpret_cast<char *>(memory) + 1;
|
||||
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 4, misalignedPtr, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size());
|
||||
Kernel *kernel = cmdQ->lastEnqueuedKernels[0];
|
||||
|
||||
parseCommands<FamilyType>(*cmdQ);
|
||||
|
||||
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
|
||||
const auto &surfaceStateDst = getSurfaceState<FamilyType>(1);
|
||||
|
||||
if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
|
||||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
|
||||
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), *pKernelArg);
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
|
||||
|
||||
} else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset);
|
||||
} else {
|
||||
EXPECT_TRUE(false);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user